File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sudo / compat / fnmatch.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 12:26:49 2012 UTC (12 years, 1 month ago) by misho
Branches: sudo, MAIN
CVS tags: v1_8_5p1, HEAD
sudo 1.8.5p1

    1: /*	$OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $	*/
    2: 
    3: /* Copyright (c) 2011, VMware, Inc.
    4:  * All rights reserved.
    5:  * 
    6:  * Redistribution and use in source and binary forms, with or without
    7:  * modification, are permitted provided that the following conditions are met:
    8:  *     * Redistributions of source code must retain the above copyright
    9:  *       notice, this list of conditions and the following disclaimer.
   10:  *     * Redistributions in binary form must reproduce the above copyright
   11:  *       notice, this list of conditions and the following disclaimer in the
   12:  *       documentation and/or other materials provided with the distribution.
   13:  *     * Neither the name of the VMware, Inc. nor the names of its contributors
   14:  *       may be used to endorse or promote products derived from this software
   15:  *       without specific prior written permission.
   16:  * 
   17:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   18:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20:  * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
   21:  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   22:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   23:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   24:  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27:  */
   28: 
   29: /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
   30:  *
   31:  * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
   32:  * as described in;
   33:  *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
   34:  *
   35:  * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
   36:  * from chapter 2. "Shell Command Language"
   37:  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
   38:  * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 
   39:  * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 
   40:  * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 
   41:  * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
   42:  * a valid bracket expression is treated as an ordinary character; 4. a differing
   43:  * number of consecutive slashes within pattern and string will NOT match;
   44:  * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
   45:  *
   46:  * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
   47:  * from chapter 9, "Regular Expressions"
   48:  *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
   49:  * with no support for collating symbols, equivalence class expressions or 
   50:  * character class expressions.  A partial range expression with a leading 
   51:  * hyphen following a valid range expression will match only the ordinary
   52:  * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 
   53:  * 'a' through 'm', a <hyphen> '-', or a 'z').
   54:  *
   55:  * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
   56:  * path segment of string, and FNM_CASEFOLD to ignore alpha case.
   57:  *
   58:  * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
   59:  * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
   60:  * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
   61:  * nonalpha characters within a range.
   62:  *
   63:  * XXX comments below indicate porting required for multi-byte character sets
   64:  * and non-POSIX locale collation orders; requires mbr* APIs to track shift
   65:  * state of pattern and string (rewinding pattern and string repeatedly).
   66:  *
   67:  * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
   68:  * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
   69:  * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
   70:  */
   71: 
   72: #include <config.h>
   73: 
   74: #include <sys/types.h>
   75: 
   76: #include <stdio.h>
   77: #include <ctype.h>
   78: #ifdef HAVE_STRING_H
   79: # include <string.h>
   80: #endif /* HAVE_STRING_H */
   81: #ifdef HAVE_STRINGS_H
   82: # include <strings.h>
   83: #endif /* HAVE_STRINGS_H */
   84: #include <limits.h>
   85: 
   86: #include "missing.h"
   87: #include "compat/charclass.h"
   88: #include "compat/fnmatch.h"
   89: 
   90: #define	RANGE_MATCH	1
   91: #define	RANGE_NOMATCH	0
   92: #define	RANGE_ERROR	(-1)
   93: 
   94: static int
   95: classmatch(const char *pattern, char test, int foldcase, const char **ep)
   96: {
   97: 	const char * const mismatch = pattern;
   98: 	const char *colon;
   99: 	struct cclass *cc;
  100: 	int rval = RANGE_NOMATCH;
  101: 	size_t len;
  102: 
  103: 	if (pattern[0] != '[' || pattern[1] != ':') {
  104: 		*ep = mismatch;
  105: 		return RANGE_ERROR;
  106: 	}
  107: 	pattern += 2;
  108: 
  109: 	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
  110: 		*ep = mismatch;
  111: 		return RANGE_ERROR;
  112: 	}
  113: 	*ep = colon + 2;
  114: 	len = (size_t)(colon - pattern);
  115: 
  116: 	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
  117: 		pattern = "lower:]";
  118: 	for (cc = cclasses; cc->name != NULL; cc++) {
  119: 		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
  120: 			if (cc->isctype((unsigned char)test))
  121: 				rval = RANGE_MATCH;
  122: 			break;
  123: 		}
  124: 	}
  125: 	if (cc->name == NULL) {
  126: 		/* invalid character class, treat as normal text */
  127: 		*ep = mismatch;
  128: 		rval = RANGE_ERROR;
  129: 	}
  130: 	return rval;
  131: }
  132: 
  133: /* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
  134:  * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 
  135:  * however the "\/" sequence is advanced to '/'.
  136:  *
  137:  * Both pattern and string are **char to support pointer increment of arbitrary
  138:  * multibyte characters for the given locale, in a later iteration of this code
  139:  */
  140: static int fnmatch_ch(const char **pattern, const char **string, int flags)
  141: {
  142:     const char * const mismatch = *pattern;
  143:     const int nocase = !!(flags & FNM_CASEFOLD);
  144:     const int escape = !(flags & FNM_NOESCAPE);
  145:     const int slash = !!(flags & FNM_PATHNAME);
  146:     int result = FNM_NOMATCH;
  147:     const char *startch;
  148:     int negate;
  149: 
  150:     if (**pattern == '[')
  151:     {
  152:         ++*pattern;
  153: 
  154:         /* Handle negation, either leading ! or ^ operators (never both) */
  155:         negate = ((**pattern == '!') || (**pattern == '^'));
  156:         if (negate)
  157:             ++*pattern;
  158: 
  159:         /* ']' is an ordinary character at the start of the range pattern */
  160:         if (**pattern == ']')
  161:             goto leadingclosebrace;
  162: 
  163:         while (**pattern)
  164:         {
  165:             if (**pattern == ']') {
  166:                 ++*pattern;
  167:                 /* XXX: Fix for MBCS character width */
  168:                 ++*string;
  169:                 return (result ^ negate);
  170:             }
  171: 
  172:             if (escape && (**pattern == '\\')) {
  173:                 ++*pattern;
  174: 
  175:                 /* Patterns must be terminated with ']', not EOS */
  176:                 if (!**pattern)
  177:                     break;
  178:             }
  179: 
  180:             /* Patterns must be terminated with ']' not '/' */
  181:             if (slash && (**pattern == '/'))
  182:                 break;
  183: 
  184:             /* Match character classes. */
  185:             if (classmatch(*pattern, **string, nocase, pattern)
  186:                 == RANGE_MATCH) {
  187:                 result = 0;
  188:                 continue;
  189:             }
  190: 
  191: leadingclosebrace:
  192:             /* Look at only well-formed range patterns; 
  193:              * "x-]" is not allowed unless escaped ("x-\]")
  194:              * XXX: Fix for locale/MBCS character width
  195:              */
  196:             if (((*pattern)[1] == '-') && ((*pattern)[2] != ']'))
  197:             {
  198:                 startch = *pattern;
  199:                 *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
  200: 
  201:                 /* NOT a properly balanced [expr] pattern, EOS terminated 
  202:                  * or ranges containing a slash in FNM_PATHNAME mode pattern
  203:                  * fall out to to the rewind and test '[' literal code path
  204:                  */
  205:                 if (!**pattern || (slash && (**pattern == '/')))
  206:                     break;
  207: 
  208:                 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  209:                 if ((**string >= *startch) && (**string <= **pattern))
  210:                     result = 0;
  211:                 else if (nocase && (isupper((unsigned char)**string) ||
  212: 				    isupper((unsigned char)*startch) ||
  213: 				    isupper((unsigned char)**pattern))
  214:                             && (tolower((unsigned char)**string) >= tolower((unsigned char)*startch)) 
  215:                             && (tolower((unsigned char)**string) <= tolower((unsigned char)**pattern)))
  216:                     result = 0;
  217: 
  218:                 ++*pattern;
  219:                 continue;
  220:             }
  221: 
  222:             /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
  223:             if ((**string == **pattern))
  224:                 result = 0;
  225:             else if (nocase && (isupper((unsigned char)**string) ||
  226: 				isupper((unsigned char)**pattern))
  227:                             && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
  228:                 result = 0;
  229: 
  230:             ++*pattern;
  231:         }
  232: 
  233:         /* NOT a properly balanced [expr] pattern; Rewind
  234:          * and reset result to test '[' literal
  235:          */
  236:         *pattern = mismatch;
  237:         result = FNM_NOMATCH;
  238:     }
  239:     else if (**pattern == '?') {
  240:         /* Optimize '?' match before unescaping **pattern */
  241:         if (!**string || (slash && (**string == '/')))
  242:             return FNM_NOMATCH;
  243:         result = 0;
  244:         goto fnmatch_ch_success;
  245:     }
  246:     else if (escape && (**pattern == '\\') && (*pattern)[1]) {
  247:         ++*pattern;
  248:     }
  249: 
  250:     /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
  251:     if (**string == **pattern)
  252:         result = 0;
  253:     else if (nocase && (isupper((unsigned char)**string) || isupper((unsigned char)**pattern))
  254:                     && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
  255:         result = 0;
  256: 
  257:     /* Refuse to advance over trailing slash or nulls
  258:      */
  259:     if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/'))))
  260:         return result;
  261: 
  262: fnmatch_ch_success:
  263:     ++*pattern;
  264:     ++*string;
  265:     return result;
  266: }
  267: 
  268: int rpl_fnmatch(const char *pattern, const char *string, int flags)
  269: {
  270:     static const char dummystring[2] = {' ', 0};
  271:     const int escape = !(flags & FNM_NOESCAPE);
  272:     const int slash = !!(flags & FNM_PATHNAME);
  273:     const int leading_dir = !!(flags & FNM_LEADING_DIR);
  274:     const char *strendseg;
  275:     const char *dummyptr;
  276:     const char *matchptr;
  277:     int wild;
  278:     /* For '*' wild processing only; surpress 'used before initialization'
  279:      * warnings with dummy initialization values;
  280:      */
  281:     const char *strstartseg = NULL;
  282:     const char *mismatch = NULL;
  283:     int matchlen = 0;
  284: 
  285:     if (strlen(pattern) > PATH_MAX || strlen(string) > PATH_MAX)
  286: 	return FNM_NOMATCH;
  287: 
  288:     if (*pattern == '*')
  289:         goto firstsegment;
  290: 
  291:     while (*pattern && *string)
  292:     {
  293:         /* Pre-decode "\/" which has no special significance, and
  294:          * match balanced slashes, starting a new segment pattern
  295:          */
  296:         if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
  297:             ++pattern;
  298:         if (slash && (*pattern == '/') && (*string == '/')) {
  299:             ++pattern;
  300:             ++string;
  301:         }            
  302: 
  303: firstsegment:
  304:         /* At the beginning of each segment, validate leading period behavior.
  305:          */
  306:         if ((flags & FNM_PERIOD) && (*string == '.'))
  307:         {
  308:             if (*pattern == '.')
  309:                 ++pattern;
  310:             else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
  311:                 pattern += 2;
  312:             else
  313:                 return FNM_NOMATCH;
  314:             ++string;
  315:         }
  316: 
  317:         /* Determine the end of string segment
  318:          *
  319:          * Presumes '/' character is unique, not composite in any MBCS encoding
  320:          */
  321:         if (slash) {
  322:             strendseg = strchr(string, '/');
  323:             if (!strendseg)
  324:                 strendseg = strchr(string, '\0');
  325:         }
  326:         else {
  327:             strendseg = strchr(string, '\0');
  328:         }
  329: 
  330:         /* Allow pattern '*' to be consumed even with no remaining string to match
  331:          */
  332:         while (*pattern)
  333:         {
  334:             if ((string > strendseg)
  335:                 || ((string == strendseg) && (*pattern != '*')))
  336:                 break;
  337: 
  338:             if (slash && ((*pattern == '/')
  339:                            || (escape && (*pattern == '\\')
  340:                                       && (pattern[1] == '/'))))
  341:                 break;
  342: 
  343:             /* Reduce groups of '*' and '?' to n '?' matches
  344:              * followed by one '*' test for simplicity
  345:              */
  346:             for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern)
  347:             {
  348:                 if (*pattern == '*') {
  349:                     wild = 1;
  350:                 }
  351:                 else if (string < strendseg) {  /* && (*pattern == '?') */
  352:                     /* XXX: Advance 1 char for MBCS locale */
  353:                     ++string;
  354:                 }
  355:                 else {  /* (string >= strendseg) && (*pattern == '?') */
  356:                     return FNM_NOMATCH;
  357:                 }
  358:             }
  359: 
  360:             if (wild)
  361:             {
  362:                 strstartseg = string;
  363:                 mismatch = pattern;
  364: 
  365:                 /* Count fixed (non '*') char matches remaining in pattern
  366:                  * excluding '/' (or "\/") and '*'
  367:                  */
  368:                 for (matchptr = pattern, matchlen = 0; 1; ++matchlen)
  369:                 {
  370:                     if ((*matchptr == '\0') 
  371:                         || (slash && ((*matchptr == '/')
  372:                                       || (escape && (*matchptr == '\\')
  373:                                                  && (matchptr[1] == '/')))))
  374:                     {
  375:                         /* Compare precisely this many trailing string chars,
  376:                          * the resulting match needs no wildcard loop
  377:                          */
  378:                         /* XXX: Adjust for MBCS */
  379:                         if (string + matchlen > strendseg)
  380:                             return FNM_NOMATCH;
  381: 
  382:                         string = strendseg - matchlen;
  383:                         wild = 0;
  384:                         break;
  385:                     }
  386: 
  387:                     if (*matchptr == '*')
  388:                     {
  389:                         /* Ensure at least this many trailing string chars remain
  390:                          * for the first comparison
  391:                          */
  392:                         /* XXX: Adjust for MBCS */
  393:                         if (string + matchlen > strendseg)
  394:                             return FNM_NOMATCH;
  395: 
  396:                         /* Begin first wild comparison at the current position */
  397:                         break;
  398:                     }
  399: 
  400:                     /* Skip forward in pattern by a single character match
  401:                      * Use a dummy fnmatch_ch() test to count one "[range]" escape
  402:                      */ 
  403:                     /* XXX: Adjust for MBCS */
  404:                     if (escape && (*matchptr == '\\') && matchptr[1]) {
  405:                         matchptr += 2;
  406:                     }
  407:                     else if (*matchptr == '[') {
  408:                         dummyptr = dummystring;
  409:                         fnmatch_ch(&matchptr, &dummyptr, flags);
  410:                     }
  411:                     else {
  412:                         ++matchptr;
  413:                     }
  414:                 }
  415:             }
  416: 
  417:             /* Incrementally match string against the pattern
  418:              */
  419:             while (*pattern && (string < strendseg))
  420:             {
  421:                 /* Success; begin a new wild pattern search
  422:                  */
  423:                 if (*pattern == '*')
  424:                     break;
  425: 
  426:                 if (slash && ((*string == '/')
  427:                               || (*pattern == '/')
  428:                               || (escape && (*pattern == '\\')
  429:                                          && (pattern[1] == '/'))))
  430:                     break;
  431: 
  432:                 /* Compare ch's (the pattern is advanced over "\/" to the '/',
  433:                  * but slashes will mismatch, and are not consumed)
  434:                  */
  435:                 if (!fnmatch_ch(&pattern, &string, flags))
  436:                     continue;
  437: 
  438:                 /* Failed to match, loop against next char offset of string segment 
  439:                  * until not enough string chars remain to match the fixed pattern
  440:                  */
  441:                 if (wild) {
  442:                     /* XXX: Advance 1 char for MBCS locale */
  443:                     string = ++strstartseg;
  444:                     if (string + matchlen > strendseg)
  445:                         return FNM_NOMATCH;
  446: 
  447:                     pattern = mismatch;
  448:                     continue;
  449:                 }
  450:                 else
  451:                     return FNM_NOMATCH;
  452:             }
  453:         }
  454: 
  455:         if (*string && !((slash || leading_dir) && (*string == '/')))
  456:             return FNM_NOMATCH;
  457: 
  458:         if (*pattern && !(slash && ((*pattern == '/')
  459:                                     || (escape && (*pattern == '\\')
  460:                                                && (pattern[1] == '/')))))
  461:             return FNM_NOMATCH;
  462: 
  463:         if (leading_dir && !*pattern && *string == '/')
  464:             return 0;
  465:     }
  466: 
  467:     /* Where both pattern and string are at EOS, declare success
  468:      */
  469:     if (!*string && !*pattern)
  470:         return 0;
  471: 
  472:     /* pattern didn't match to the end of string */
  473:     return FNM_NOMATCH;
  474: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>