1: /* $OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $ */
2:
3: /* Copyright (c) 2011, VMware, Inc.
4: * All rights reserved.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions are met:
8: * * Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * * Redistributions in binary form must reproduce the above copyright
11: * notice, this list of conditions and the following disclaimer in the
12: * documentation and/or other materials provided with the distribution.
13: * * Neither the name of the VMware, Inc. nor the names of its contributors
14: * may be used to endorse or promote products derived from this software
15: * without specific prior written permission.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
21: * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27: */
28:
29: /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
30: *
31: * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
32: * as described in;
33: * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
34: *
35: * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
36: * from chapter 2. "Shell Command Language"
37: * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
38: * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
39: * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
40: * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
41: * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
42: * a valid bracket expression is treated as an ordinary character; 4. a differing
43: * number of consecutive slashes within pattern and string will NOT match;
44: * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
45: *
46: * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
47: * from chapter 9, "Regular Expressions"
48: * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
49: * with no support for collating symbols, equivalence class expressions or
50: * character class expressions. A partial range expression with a leading
51: * hyphen following a valid range expression will match only the ordinary
52: * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
53: * 'a' through 'm', a <hyphen> '-', or a 'z').
54: *
55: * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
56: * path segment of string, and FNM_CASEFOLD to ignore alpha case.
57: *
58: * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
59: * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
60: * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
61: * nonalpha characters within a range.
62: *
63: * XXX comments below indicate porting required for multi-byte character sets
64: * and non-POSIX locale collation orders; requires mbr* APIs to track shift
65: * state of pattern and string (rewinding pattern and string repeatedly).
66: *
67: * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
68: * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate
69: * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
70: */
71:
72: #include <config.h>
73:
74: #ifndef HAVE_FNMATCH
75:
76: #include <sys/types.h>
77:
78: #include <stdio.h>
79: #include <ctype.h>
80: #ifdef HAVE_STRING_H
81: # include <string.h>
82: #endif /* HAVE_STRING_H */
83: #ifdef HAVE_STRINGS_H
84: # include <strings.h>
85: #endif /* HAVE_STRINGS_H */
86: #include <limits.h>
87:
88: #include "missing.h"
89: #include "compat/charclass.h"
90: #include "compat/fnmatch.h"
91:
92: #define RANGE_MATCH 1
93: #define RANGE_NOMATCH 0
94: #define RANGE_ERROR (-1)
95:
96: static int
97: classmatch(const char *pattern, char test, int foldcase, const char **ep)
98: {
99: const char * const mismatch = pattern;
100: const char *colon;
101: struct cclass *cc;
102: int rval = RANGE_NOMATCH;
103: size_t len;
104:
105: if (pattern[0] != '[' || pattern[1] != ':') {
106: *ep = mismatch;
107: return RANGE_ERROR;
108: }
109: pattern += 2;
110:
111: if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
112: *ep = mismatch;
113: return RANGE_ERROR;
114: }
115: *ep = colon + 2;
116: len = (size_t)(colon - pattern);
117:
118: if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
119: pattern = "lower:]";
120: for (cc = cclasses; cc->name != NULL; cc++) {
121: if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
122: if (cc->isctype((unsigned char)test))
123: rval = RANGE_MATCH;
124: break;
125: }
126: }
127: if (cc->name == NULL) {
128: /* invalid character class, treat as normal text */
129: *ep = mismatch;
130: rval = RANGE_ERROR;
131: }
132: return rval;
133: }
134:
135: /* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled.
136: * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
137: * however the "\/" sequence is advanced to '/'.
138: *
139: * Both pattern and string are **char to support pointer increment of arbitrary
140: * multibyte characters for the given locale, in a later iteration of this code
141: */
142: static int fnmatch_ch(const char **pattern, const char **string, int flags)
143: {
144: const char * const mismatch = *pattern;
145: const int nocase = !!(flags & FNM_CASEFOLD);
146: const int escape = !(flags & FNM_NOESCAPE);
147: const int slash = !!(flags & FNM_PATHNAME);
148: int result = FNM_NOMATCH;
149: const char *startch;
150: int negate;
151:
152: if (**pattern == '[')
153: {
154: ++*pattern;
155:
156: /* Handle negation, either leading ! or ^ operators (never both) */
157: negate = ((**pattern == '!') || (**pattern == '^'));
158: if (negate)
159: ++*pattern;
160:
161: /* ']' is an ordinary character at the start of the range pattern */
162: if (**pattern == ']')
163: goto leadingclosebrace;
164:
165: while (**pattern)
166: {
167: if (**pattern == ']') {
168: ++*pattern;
169: /* XXX: Fix for MBCS character width */
170: ++*string;
171: return (result ^ negate);
172: }
173:
174: if (escape && (**pattern == '\\')) {
175: ++*pattern;
176:
177: /* Patterns must be terminated with ']', not EOS */
178: if (!**pattern)
179: break;
180: }
181:
182: /* Patterns must be terminated with ']' not '/' */
183: if (slash && (**pattern == '/'))
184: break;
185:
186: /* Match character classes. */
187: if (classmatch(*pattern, **string, nocase, pattern)
188: == RANGE_MATCH) {
189: result = 0;
190: continue;
191: }
192:
193: leadingclosebrace:
194: /* Look at only well-formed range patterns;
195: * "x-]" is not allowed unless escaped ("x-\]")
196: * XXX: Fix for locale/MBCS character width
197: */
198: if (((*pattern)[1] == '-') && ((*pattern)[2] != ']'))
199: {
200: startch = *pattern;
201: *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
202:
203: /* NOT a properly balanced [expr] pattern, EOS terminated
204: * or ranges containing a slash in FNM_PATHNAME mode pattern
205: * fall out to to the rewind and test '[' literal code path
206: */
207: if (!**pattern || (slash && (**pattern == '/')))
208: break;
209:
210: /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
211: if ((**string >= *startch) && (**string <= **pattern))
212: result = 0;
213: else if (nocase && (isupper((unsigned char)**string) ||
214: isupper((unsigned char)*startch) ||
215: isupper((unsigned char)**pattern))
216: && (tolower((unsigned char)**string) >= tolower((unsigned char)*startch))
217: && (tolower((unsigned char)**string) <= tolower((unsigned char)**pattern)))
218: result = 0;
219:
220: ++*pattern;
221: continue;
222: }
223:
224: /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
225: if ((**string == **pattern))
226: result = 0;
227: else if (nocase && (isupper((unsigned char)**string) ||
228: isupper((unsigned char)**pattern))
229: && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
230: result = 0;
231:
232: ++*pattern;
233: }
234:
235: /* NOT a properly balanced [expr] pattern; Rewind
236: * and reset result to test '[' literal
237: */
238: *pattern = mismatch;
239: result = FNM_NOMATCH;
240: }
241: else if (**pattern == '?') {
242: /* Optimize '?' match before unescaping **pattern */
243: if (!**string || (slash && (**string == '/')))
244: return FNM_NOMATCH;
245: result = 0;
246: goto fnmatch_ch_success;
247: }
248: else if (escape && (**pattern == '\\') && (*pattern)[1]) {
249: ++*pattern;
250: }
251:
252: /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
253: if (**string == **pattern)
254: result = 0;
255: else if (nocase && (isupper((unsigned char)**string) || isupper((unsigned char)**pattern))
256: && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
257: result = 0;
258:
259: /* Refuse to advance over trailing slash or nulls
260: */
261: if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/'))))
262: return result;
263:
264: fnmatch_ch_success:
265: ++*pattern;
266: ++*string;
267: return result;
268: }
269:
270: int rpl_fnmatch(const char *pattern, const char *string, int flags)
271: {
272: static const char dummystring[2] = {' ', 0};
273: const int escape = !(flags & FNM_NOESCAPE);
274: const int slash = !!(flags & FNM_PATHNAME);
275: const int leading_dir = !!(flags & FNM_LEADING_DIR);
276: const char *strendseg;
277: const char *dummyptr;
278: const char *matchptr;
279: int wild;
280: /* For '*' wild processing only; surpress 'used before initialization'
281: * warnings with dummy initialization values;
282: */
283: const char *strstartseg = NULL;
284: const char *mismatch = NULL;
285: int matchlen = 0;
286:
287: if (strlen(pattern) > PATH_MAX || strlen(string) > PATH_MAX)
288: return FNM_NOMATCH;
289:
290: if (*pattern == '*')
291: goto firstsegment;
292:
293: while (*pattern && *string)
294: {
295: /* Pre-decode "\/" which has no special significance, and
296: * match balanced slashes, starting a new segment pattern
297: */
298: if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
299: ++pattern;
300: if (slash && (*pattern == '/') && (*string == '/')) {
301: ++pattern;
302: ++string;
303: }
304:
305: firstsegment:
306: /* At the beginning of each segment, validate leading period behavior.
307: */
308: if ((flags & FNM_PERIOD) && (*string == '.'))
309: {
310: if (*pattern == '.')
311: ++pattern;
312: else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
313: pattern += 2;
314: else
315: return FNM_NOMATCH;
316: ++string;
317: }
318:
319: /* Determine the end of string segment
320: *
321: * Presumes '/' character is unique, not composite in any MBCS encoding
322: */
323: if (slash) {
324: strendseg = strchr(string, '/');
325: if (!strendseg)
326: strendseg = strchr(string, '\0');
327: }
328: else {
329: strendseg = strchr(string, '\0');
330: }
331:
332: /* Allow pattern '*' to be consumed even with no remaining string to match
333: */
334: while (*pattern)
335: {
336: if ((string > strendseg)
337: || ((string == strendseg) && (*pattern != '*')))
338: break;
339:
340: if (slash && ((*pattern == '/')
341: || (escape && (*pattern == '\\')
342: && (pattern[1] == '/'))))
343: break;
344:
345: /* Reduce groups of '*' and '?' to n '?' matches
346: * followed by one '*' test for simplicity
347: */
348: for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern)
349: {
350: if (*pattern == '*') {
351: wild = 1;
352: }
353: else if (string < strendseg) { /* && (*pattern == '?') */
354: /* XXX: Advance 1 char for MBCS locale */
355: ++string;
356: }
357: else { /* (string >= strendseg) && (*pattern == '?') */
358: return FNM_NOMATCH;
359: }
360: }
361:
362: if (wild)
363: {
364: strstartseg = string;
365: mismatch = pattern;
366:
367: /* Count fixed (non '*') char matches remaining in pattern
368: * excluding '/' (or "\/") and '*'
369: */
370: for (matchptr = pattern, matchlen = 0; 1; ++matchlen)
371: {
372: if ((*matchptr == '\0')
373: || (slash && ((*matchptr == '/')
374: || (escape && (*matchptr == '\\')
375: && (matchptr[1] == '/')))))
376: {
377: /* Compare precisely this many trailing string chars,
378: * the resulting match needs no wildcard loop
379: */
380: /* XXX: Adjust for MBCS */
381: if (string + matchlen > strendseg)
382: return FNM_NOMATCH;
383:
384: string = strendseg - matchlen;
385: wild = 0;
386: break;
387: }
388:
389: if (*matchptr == '*')
390: {
391: /* Ensure at least this many trailing string chars remain
392: * for the first comparison
393: */
394: /* XXX: Adjust for MBCS */
395: if (string + matchlen > strendseg)
396: return FNM_NOMATCH;
397:
398: /* Begin first wild comparison at the current position */
399: break;
400: }
401:
402: /* Skip forward in pattern by a single character match
403: * Use a dummy fnmatch_ch() test to count one "[range]" escape
404: */
405: /* XXX: Adjust for MBCS */
406: if (escape && (*matchptr == '\\') && matchptr[1]) {
407: matchptr += 2;
408: }
409: else if (*matchptr == '[') {
410: dummyptr = dummystring;
411: fnmatch_ch(&matchptr, &dummyptr, flags);
412: }
413: else {
414: ++matchptr;
415: }
416: }
417: }
418:
419: /* Incrementally match string against the pattern
420: */
421: while (*pattern && (string < strendseg))
422: {
423: /* Success; begin a new wild pattern search
424: */
425: if (*pattern == '*')
426: break;
427:
428: if (slash && ((*string == '/')
429: || (*pattern == '/')
430: || (escape && (*pattern == '\\')
431: && (pattern[1] == '/'))))
432: break;
433:
434: /* Compare ch's (the pattern is advanced over "\/" to the '/',
435: * but slashes will mismatch, and are not consumed)
436: */
437: if (!fnmatch_ch(&pattern, &string, flags))
438: continue;
439:
440: /* Failed to match, loop against next char offset of string segment
441: * until not enough string chars remain to match the fixed pattern
442: */
443: if (wild) {
444: /* XXX: Advance 1 char for MBCS locale */
445: string = ++strstartseg;
446: if (string + matchlen > strendseg)
447: return FNM_NOMATCH;
448:
449: pattern = mismatch;
450: continue;
451: }
452: else
453: return FNM_NOMATCH;
454: }
455: }
456:
457: if (*string && !((slash || leading_dir) && (*string == '/')))
458: return FNM_NOMATCH;
459:
460: if (*pattern && !(slash && ((*pattern == '/')
461: || (escape && (*pattern == '\\')
462: && (pattern[1] == '/')))))
463: return FNM_NOMATCH;
464:
465: if (leading_dir && !*pattern && *string == '/')
466: return 0;
467: }
468:
469: /* Where both pattern and string are at EOS, declare success
470: */
471: if (!*string && !*pattern)
472: return 0;
473:
474: /* pattern didn't match to the end of string */
475: return FNM_NOMATCH;
476: }
477: #endif /* HAVE_FNMATCH */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>