File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sudo / compat / fnmatch.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 10:46:11 2013 UTC (10 years, 11 months ago) by misho
Branches: sudo, MAIN
CVS tags: v1_8_8p0, v1_8_8, v1_8_7p0, v1_8_7, v1_8_10p3_0, v1_8_10p3, HEAD
1.8.7

    1: /*	$OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $	*/
    2: 
    3: /* Copyright (c) 2011, VMware, Inc.
    4:  * All rights reserved.
    5:  * 
    6:  * Redistribution and use in source and binary forms, with or without
    7:  * modification, are permitted provided that the following conditions are met:
    8:  *     * Redistributions of source code must retain the above copyright
    9:  *       notice, this list of conditions and the following disclaimer.
   10:  *     * Redistributions in binary form must reproduce the above copyright
   11:  *       notice, this list of conditions and the following disclaimer in the
   12:  *       documentation and/or other materials provided with the distribution.
   13:  *     * Neither the name of the VMware, Inc. nor the names of its contributors
   14:  *       may be used to endorse or promote products derived from this software
   15:  *       without specific prior written permission.
   16:  * 
   17:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20:  * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
   21:  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   22:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   23:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   24:  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27:  */
   28: 
   29: /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
   30:  *
   31:  * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
   32:  * as described in;
   33:  *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
   34:  *
   35:  * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
   36:  * from chapter 2. "Shell Command Language"
   37:  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
   38:  * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 
   39:  * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 
   40:  * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 
   41:  * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
   42:  * a valid bracket expression is treated as an ordinary character; 4. a differing
   43:  * number of consecutive slashes within pattern and string will NOT match;
   44:  * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
   45:  *
   46:  * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
   47:  * from chapter 9, "Regular Expressions"
   48:  *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
   49:  * with no support for collating symbols, equivalence class expressions or 
   50:  * character class expressions.  A partial range expression with a leading 
   51:  * hyphen following a valid range expression will match only the ordinary
   52:  * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 
   53:  * 'a' through 'm', a <hyphen> '-', or a 'z').
   54:  *
   55:  * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
   56:  * path segment of string, and FNM_CASEFOLD to ignore alpha case.
   57:  *
   58:  * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
   59:  * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
   60:  * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
   61:  * nonalpha characters within a range.
   62:  *
   63:  * XXX comments below indicate porting required for multi-byte character sets
   64:  * and non-POSIX locale collation orders; requires mbr* APIs to track shift
   65:  * state of pattern and string (rewinding pattern and string repeatedly).
   66:  *
   67:  * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
   68:  * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
   69:  * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
   70:  */
   71: 
   72: #include <config.h>
   73: 
   74: #ifndef HAVE_FNMATCH
   75: 
   76: #include <sys/types.h>
   77: 
   78: #include <stdio.h>
   79: #include <ctype.h>
   80: #ifdef HAVE_STRING_H
   81: # include <string.h>
   82: #endif /* HAVE_STRING_H */
   83: #ifdef HAVE_STRINGS_H
   84: # include <strings.h>
   85: #endif /* HAVE_STRINGS_H */
   86: #include <limits.h>
   87: 
   88: #include "missing.h"
   89: #include "compat/charclass.h"
   90: #include "compat/fnmatch.h"
   91: 
   92: #define	RANGE_MATCH	1
   93: #define	RANGE_NOMATCH	0
   94: #define	RANGE_ERROR	(-1)
   95: 
   96: static int
   97: classmatch(const char *pattern, char test, int foldcase, const char **ep)
   98: {
   99: 	const char * const mismatch = pattern;
  100: 	const char *colon;
  101: 	struct cclass *cc;
  102: 	int rval = RANGE_NOMATCH;
  103: 	size_t len;
  104: 
  105: 	if (pattern[0] != '[' || pattern[1] != ':') {
  106: 		*ep = mismatch;
  107: 		return RANGE_ERROR;
  108: 	}
  109: 	pattern += 2;
  110: 
  111: 	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
  112: 		*ep = mismatch;
  113: 		return RANGE_ERROR;
  114: 	}
  115: 	*ep = colon + 2;
  116: 	len = (size_t)(colon - pattern);
  117: 
  118: 	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
  119: 		pattern = "lower:]";
  120: 	for (cc = cclasses; cc->name != NULL; cc++) {
  121: 		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
  122: 			if (cc->isctype((unsigned char)test))
  123: 				rval = RANGE_MATCH;
  124: 			break;
  125: 		}
  126: 	}
  127: 	if (cc->name == NULL) {
  128: 		/* invalid character class, treat as normal text */
  129: 		*ep = mismatch;
  130: 		rval = RANGE_ERROR;
  131: 	}
  132: 	return rval;
  133: }
  134: 
  135: /* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
  136:  * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 
  137:  * however the "\/" sequence is advanced to '/'.
  138:  *
  139:  * Both pattern and string are **char to support pointer increment of arbitrary
  140:  * multibyte characters for the given locale, in a later iteration of this code
  141:  */
  142: static int fnmatch_ch(const char **pattern, const char **string, int flags)
  143: {
  144:     const char * const mismatch = *pattern;
  145:     const int nocase = !!(flags & FNM_CASEFOLD);
  146:     const int escape = !(flags & FNM_NOESCAPE);
  147:     const int slash = !!(flags & FNM_PATHNAME);
  148:     int result = FNM_NOMATCH;
  149:     const char *startch;
  150:     int negate;
  151: 
  152:     if (**pattern == '[')
  153:     {
  154:         ++*pattern;
  155: 
  156:         /* Handle negation, either leading ! or ^ operators (never both) */
  157:         negate = ((**pattern == '!') || (**pattern == '^'));
  158:         if (negate)
  159:             ++*pattern;
  160: 
  161:         /* ']' is an ordinary character at the start of the range pattern */
  162:         if (**pattern == ']')
  163:             goto leadingclosebrace;
  164: 
  165:         while (**pattern)
  166:         {
  167:             if (**pattern == ']') {
  168:                 ++*pattern;
  169:                 /* XXX: Fix for MBCS character width */
  170:                 ++*string;
  171:                 return (result ^ negate);
  172:             }
  173: 
  174:             if (escape && (**pattern == '\\')) {
  175:                 ++*pattern;
  176: 
  177:                 /* Patterns must be terminated with ']', not EOS */
  178:                 if (!**pattern)
  179:                     break;
  180:             }
  181: 
  182:             /* Patterns must be terminated with ']' not '/' */
  183:             if (slash && (**pattern == '/'))
  184:                 break;
  185: 
  186:             /* Match character classes. */
  187:             if (classmatch(*pattern, **string, nocase, pattern)
  188:                 == RANGE_MATCH) {
  189:                 result = 0;
  190:                 continue;
  191:             }
  192: 
  193: leadingclosebrace:
  194:             /* Look at only well-formed range patterns; 
  195:              * "x-]" is not allowed unless escaped ("x-\]")
  196:              * XXX: Fix for locale/MBCS character width
  197:              */
  198:             if (((*pattern)[1] == '-') && ((*pattern)[2] != ']'))
  199:             {
  200:                 startch = *pattern;
  201:                 *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
  202: 
  203:                 /* NOT a properly balanced [expr] pattern, EOS terminated 
  204:                  * or ranges containing a slash in FNM_PATHNAME mode pattern
  205:                  * fall out to to the rewind and test '[' literal code path
  206:                  */
  207:                 if (!**pattern || (slash && (**pattern == '/')))
  208:                     break;
  209: 
  210:                 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  211:                 if ((**string >= *startch) && (**string <= **pattern))
  212:                     result = 0;
  213:                 else if (nocase && (isupper((unsigned char)**string) ||
  214: 				    isupper((unsigned char)*startch) ||
  215: 				    isupper((unsigned char)**pattern))
  216:                             && (tolower((unsigned char)**string) >= tolower((unsigned char)*startch)) 
  217:                             && (tolower((unsigned char)**string) <= tolower((unsigned char)**pattern)))
  218:                     result = 0;
  219: 
  220:                 ++*pattern;
  221:                 continue;
  222:             }
  223: 
  224:             /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  225:             if ((**string == **pattern))
  226:                 result = 0;
  227:             else if (nocase && (isupper((unsigned char)**string) ||
  228: 				isupper((unsigned char)**pattern))
  229:                             && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
  230:                 result = 0;
  231: 
  232:             ++*pattern;
  233:         }
  234: 
  235:         /* NOT a properly balanced [expr] pattern; Rewind
  236:          * and reset result to test '[' literal
  237:          */
  238:         *pattern = mismatch;
  239:         result = FNM_NOMATCH;
  240:     }
  241:     else if (**pattern == '?') {
  242:         /* Optimize '?' match before unescaping **pattern */
  243:         if (!**string || (slash && (**string == '/')))
  244:             return FNM_NOMATCH;
  245:         result = 0;
  246:         goto fnmatch_ch_success;
  247:     }
  248:     else if (escape && (**pattern == '\\') && (*pattern)[1]) {
  249:         ++*pattern;
  250:     }
  251: 
  252:     /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
  253:     if (**string == **pattern)
  254:         result = 0;
  255:     else if (nocase && (isupper((unsigned char)**string) || isupper((unsigned char)**pattern))
  256:                     && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
  257:         result = 0;
  258: 
  259:     /* Refuse to advance over trailing slash or nulls
  260:      */
  261:     if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/'))))
  262:         return result;
  263: 
  264: fnmatch_ch_success:
  265:     ++*pattern;
  266:     ++*string;
  267:     return result;
  268: }
  269: 
  270: int rpl_fnmatch(const char *pattern, const char *string, int flags)
  271: {
  272:     static const char dummystring[2] = {' ', 0};
  273:     const int escape = !(flags & FNM_NOESCAPE);
  274:     const int slash = !!(flags & FNM_PATHNAME);
  275:     const int leading_dir = !!(flags & FNM_LEADING_DIR);
  276:     const char *strendseg;
  277:     const char *dummyptr;
  278:     const char *matchptr;
  279:     int wild;
  280:     /* For '*' wild processing only; surpress 'used before initialization'
  281:      * warnings with dummy initialization values;
  282:      */
  283:     const char *strstartseg = NULL;
  284:     const char *mismatch = NULL;
  285:     int matchlen = 0;
  286: 
  287:     if (strlen(pattern) > PATH_MAX || strlen(string) > PATH_MAX)
  288: 	return FNM_NOMATCH;
  289: 
  290:     if (*pattern == '*')
  291:         goto firstsegment;
  292: 
  293:     while (*pattern && *string)
  294:     {
  295:         /* Pre-decode "\/" which has no special significance, and
  296:          * match balanced slashes, starting a new segment pattern
  297:          */
  298:         if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
  299:             ++pattern;
  300:         if (slash && (*pattern == '/') && (*string == '/')) {
  301:             ++pattern;
  302:             ++string;
  303:         }            
  304: 
  305: firstsegment:
  306:         /* At the beginning of each segment, validate leading period behavior.
  307:          */
  308:         if ((flags & FNM_PERIOD) && (*string == '.'))
  309:         {
  310:             if (*pattern == '.')
  311:                 ++pattern;
  312:             else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
  313:                 pattern += 2;
  314:             else
  315:                 return FNM_NOMATCH;
  316:             ++string;
  317:         }
  318: 
  319:         /* Determine the end of string segment
  320:          *
  321:          * Presumes '/' character is unique, not composite in any MBCS encoding
  322:          */
  323:         if (slash) {
  324:             strendseg = strchr(string, '/');
  325:             if (!strendseg)
  326:                 strendseg = strchr(string, '\0');
  327:         }
  328:         else {
  329:             strendseg = strchr(string, '\0');
  330:         }
  331: 
  332:         /* Allow pattern '*' to be consumed even with no remaining string to match
  333:          */
  334:         while (*pattern)
  335:         {
  336:             if ((string > strendseg)
  337:                 || ((string == strendseg) && (*pattern != '*')))
  338:                 break;
  339: 
  340:             if (slash && ((*pattern == '/')
  341:                            || (escape && (*pattern == '\\')
  342:                                       && (pattern[1] == '/'))))
  343:                 break;
  344: 
  345:             /* Reduce groups of '*' and '?' to n '?' matches
  346:              * followed by one '*' test for simplicity
  347:              */
  348:             for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern)
  349:             {
  350:                 if (*pattern == '*') {
  351:                     wild = 1;
  352:                 }
  353:                 else if (string < strendseg) {  /* && (*pattern == '?') */
  354:                     /* XXX: Advance 1 char for MBCS locale */
  355:                     ++string;
  356:                 }
  357:                 else {  /* (string >= strendseg) && (*pattern == '?') */
  358:                     return FNM_NOMATCH;
  359:                 }
  360:             }
  361: 
  362:             if (wild)
  363:             {
  364:                 strstartseg = string;
  365:                 mismatch = pattern;
  366: 
  367:                 /* Count fixed (non '*') char matches remaining in pattern
  368:                  * excluding '/' (or "\/") and '*'
  369:                  */
  370:                 for (matchptr = pattern, matchlen = 0; 1; ++matchlen)
  371:                 {
  372:                     if ((*matchptr == '\0') 
  373:                         || (slash && ((*matchptr == '/')
  374:                                       || (escape && (*matchptr == '\\')
  375:                                                  && (matchptr[1] == '/')))))
  376:                     {
  377:                         /* Compare precisely this many trailing string chars,
  378:                          * the resulting match needs no wildcard loop
  379:                          */
  380:                         /* XXX: Adjust for MBCS */
  381:                         if (string + matchlen > strendseg)
  382:                             return FNM_NOMATCH;
  383: 
  384:                         string = strendseg - matchlen;
  385:                         wild = 0;
  386:                         break;
  387:                     }
  388: 
  389:                     if (*matchptr == '*')
  390:                     {
  391:                         /* Ensure at least this many trailing string chars remain
  392:                          * for the first comparison
  393:                          */
  394:                         /* XXX: Adjust for MBCS */
  395:                         if (string + matchlen > strendseg)
  396:                             return FNM_NOMATCH;
  397: 
  398:                         /* Begin first wild comparison at the current position */
  399:                         break;
  400:                     }
  401: 
  402:                     /* Skip forward in pattern by a single character match
  403:                      * Use a dummy fnmatch_ch() test to count one "[range]" escape
  404:                      */ 
  405:                     /* XXX: Adjust for MBCS */
  406:                     if (escape && (*matchptr == '\\') && matchptr[1]) {
  407:                         matchptr += 2;
  408:                     }
  409:                     else if (*matchptr == '[') {
  410:                         dummyptr = dummystring;
  411:                         fnmatch_ch(&matchptr, &dummyptr, flags);
  412:                     }
  413:                     else {
  414:                         ++matchptr;
  415:                     }
  416:                 }
  417:             }
  418: 
  419:             /* Incrementally match string against the pattern
  420:              */
  421:             while (*pattern && (string < strendseg))
  422:             {
  423:                 /* Success; begin a new wild pattern search
  424:                  */
  425:                 if (*pattern == '*')
  426:                     break;
  427: 
  428:                 if (slash && ((*string == '/')
  429:                               || (*pattern == '/')
  430:                               || (escape && (*pattern == '\\')
  431:                                          && (pattern[1] == '/'))))
  432:                     break;
  433: 
  434:                 /* Compare ch's (the pattern is advanced over "\/" to the '/',
  435:                  * but slashes will mismatch, and are not consumed)
  436:                  */
  437:                 if (!fnmatch_ch(&pattern, &string, flags))
  438:                     continue;
  439: 
  440:                 /* Failed to match, loop against next char offset of string segment 
  441:                  * until not enough string chars remain to match the fixed pattern
  442:                  */
  443:                 if (wild) {
  444:                     /* XXX: Advance 1 char for MBCS locale */
  445:                     string = ++strstartseg;
  446:                     if (string + matchlen > strendseg)
  447:                         return FNM_NOMATCH;
  448: 
  449:                     pattern = mismatch;
  450:                     continue;
  451:                 }
  452:                 else
  453:                     return FNM_NOMATCH;
  454:             }
  455:         }
  456: 
  457:         if (*string && !((slash || leading_dir) && (*string == '/')))
  458:             return FNM_NOMATCH;
  459: 
  460:         if (*pattern && !(slash && ((*pattern == '/')
  461:                                     || (escape && (*pattern == '\\')
  462:                                                && (pattern[1] == '/')))))
  463:             return FNM_NOMATCH;
  464: 
  465:         if (leading_dir && !*pattern && *string == '/')
  466:             return 0;
  467:     }
  468: 
  469:     /* Where both pattern and string are at EOS, declare success
  470:      */
  471:     if (!*string && !*pattern)
  472:         return 0;
  473: 
  474:     /* pattern didn't match to the end of string */
  475:     return FNM_NOMATCH;
  476: }
  477: #endif /* HAVE_FNMATCH */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>