Annotation of embedaddon/sudo/compat/fnmatch.c, revision 1.1.1.3

1.1.1.2   misho       1: /*     $OpenBSD: fnmatch.c,v 1.15 2011/02/10 21:31:59 stsp Exp $       */
                      2: 
                      3: /* Copyright (c) 2011, VMware, Inc.
                      4:  * All rights reserved.
                      5:  * 
1.1       misho       6:  * Redistribution and use in source and binary forms, with or without
1.1.1.2   misho       7:  * modification, are permitted provided that the following conditions are met:
                      8:  *     * Redistributions of source code must retain the above copyright
                      9:  *       notice, this list of conditions and the following disclaimer.
                     10:  *     * Redistributions in binary form must reproduce the above copyright
                     11:  *       notice, this list of conditions and the following disclaimer in the
                     12:  *       documentation and/or other materials provided with the distribution.
                     13:  *     * Neither the name of the VMware, Inc. nor the names of its contributors
                     14:  *       may be used to endorse or promote products derived from this software
                     15:  *       without specific prior written permission.
                     16:  * 
                     17:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     18:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1.1       misho      19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.1.1.2   misho      20:  * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
                     21:  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
                     22:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
                     23:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
                     24:  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     25:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     26:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1.1       misho      27:  */
                     28: 
1.1.1.2   misho      29: /* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
                     30:  *
                     31:  * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
                     32:  * as described in;
                     33:  *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
                     34:  *
                     35:  * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
                     36:  * from chapter 2. "Shell Command Language"
                     37:  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
                     38:  * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 
                     39:  * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 
                     40:  * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 
                     41:  * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
                     42:  * a valid bracket expression is treated as an ordinary character; 4. a differing
                     43:  * number of consecutive slashes within pattern and string will NOT match;
                     44:  * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
                     45:  *
                     46:  * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
                     47:  * from chapter 9, "Regular Expressions"
                     48:  *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
                     49:  * with no support for collating symbols, equivalence class expressions or 
                     50:  * character class expressions.  A partial range expression with a leading 
                     51:  * hyphen following a valid range expression will match only the ordinary
                     52:  * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 
                     53:  * 'a' through 'm', a <hyphen> '-', or a 'z').
                     54:  *
                     55:  * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
                     56:  * path segment of string, and FNM_CASEFOLD to ignore alpha case.
                     57:  *
                     58:  * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
                     59:  * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
                     60:  * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
                     61:  * nonalpha characters within a range.
                     62:  *
                     63:  * XXX comments below indicate porting required for multi-byte character sets
                     64:  * and non-POSIX locale collation orders; requires mbr* APIs to track shift
                     65:  * state of pattern and string (rewinding pattern and string repeatedly).
                     66:  *
                     67:  * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
                     68:  * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
                     69:  * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
1.1       misho      70:  */
                     71: 
                     72: #include <config.h>
                     73: 
1.1.1.3 ! misho      74: #ifndef HAVE_FNMATCH
        !            75: 
1.1       misho      76: #include <sys/types.h>
                     77: 
                     78: #include <stdio.h>
                     79: #include <ctype.h>
                     80: #ifdef HAVE_STRING_H
                     81: # include <string.h>
                     82: #endif /* HAVE_STRING_H */
                     83: #ifdef HAVE_STRINGS_H
                     84: # include <strings.h>
                     85: #endif /* HAVE_STRINGS_H */
1.1.1.2   misho      86: #include <limits.h>
1.1       misho      87: 
                     88: #include "missing.h"
                     89: #include "compat/charclass.h"
                     90: #include "compat/fnmatch.h"
                     91: 
                     92: #define        RANGE_MATCH     1
                     93: #define        RANGE_NOMATCH   0
                     94: #define        RANGE_ERROR     (-1)
                     95: 
                     96: static int
1.1.1.2   misho      97: classmatch(const char *pattern, char test, int foldcase, const char **ep)
1.1       misho      98: {
1.1.1.2   misho      99:        const char * const mismatch = pattern;
1.1       misho     100:        const char *colon;
1.1.1.2   misho     101:        struct cclass *cc;
1.1       misho     102:        int rval = RANGE_NOMATCH;
1.1.1.2   misho     103:        size_t len;
                    104: 
                    105:        if (pattern[0] != '[' || pattern[1] != ':') {
                    106:                *ep = mismatch;
                    107:                return RANGE_ERROR;
                    108:        }
                    109:        pattern += 2;
1.1       misho     110: 
                    111:        if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
1.1.1.2   misho     112:                *ep = mismatch;
1.1       misho     113:                return RANGE_ERROR;
                    114:        }
                    115:        *ep = colon + 2;
                    116:        len = (size_t)(colon - pattern);
                    117: 
                    118:        if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
                    119:                pattern = "lower:]";
                    120:        for (cc = cclasses; cc->name != NULL; cc++) {
                    121:                if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
1.1.1.2   misho     122:                        if (cc->isctype((unsigned char)test))
1.1       misho     123:                                rval = RANGE_MATCH;
                    124:                        break;
                    125:                }
                    126:        }
                    127:        if (cc->name == NULL) {
1.1.1.2   misho     128:                /* invalid character class, treat as normal text */
                    129:                *ep = mismatch;
1.1       misho     130:                rval = RANGE_ERROR;
                    131:        }
                    132:        return rval;
                    133: }
1.1.1.2   misho     134: 
                    135: /* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
                    136:  * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 
                    137:  * however the "\/" sequence is advanced to '/'.
                    138:  *
                    139:  * Both pattern and string are **char to support pointer increment of arbitrary
                    140:  * multibyte characters for the given locale, in a later iteration of this code
                    141:  */
                    142: static int fnmatch_ch(const char **pattern, const char **string, int flags)
                    143: {
                    144:     const char * const mismatch = *pattern;
                    145:     const int nocase = !!(flags & FNM_CASEFOLD);
                    146:     const int escape = !(flags & FNM_NOESCAPE);
                    147:     const int slash = !!(flags & FNM_PATHNAME);
                    148:     int result = FNM_NOMATCH;
                    149:     const char *startch;
                    150:     int negate;
                    151: 
                    152:     if (**pattern == '[')
                    153:     {
                    154:         ++*pattern;
                    155: 
                    156:         /* Handle negation, either leading ! or ^ operators (never both) */
                    157:         negate = ((**pattern == '!') || (**pattern == '^'));
                    158:         if (negate)
                    159:             ++*pattern;
                    160: 
                    161:         /* ']' is an ordinary character at the start of the range pattern */
                    162:         if (**pattern == ']')
                    163:             goto leadingclosebrace;
                    164: 
                    165:         while (**pattern)
                    166:         {
                    167:             if (**pattern == ']') {
                    168:                 ++*pattern;
                    169:                 /* XXX: Fix for MBCS character width */
                    170:                 ++*string;
                    171:                 return (result ^ negate);
                    172:             }
                    173: 
                    174:             if (escape && (**pattern == '\\')) {
                    175:                 ++*pattern;
                    176: 
                    177:                 /* Patterns must be terminated with ']', not EOS */
                    178:                 if (!**pattern)
                    179:                     break;
                    180:             }
                    181: 
                    182:             /* Patterns must be terminated with ']' not '/' */
                    183:             if (slash && (**pattern == '/'))
                    184:                 break;
                    185: 
                    186:             /* Match character classes. */
                    187:             if (classmatch(*pattern, **string, nocase, pattern)
                    188:                 == RANGE_MATCH) {
                    189:                 result = 0;
                    190:                 continue;
                    191:             }
                    192: 
                    193: leadingclosebrace:
                    194:             /* Look at only well-formed range patterns; 
                    195:              * "x-]" is not allowed unless escaped ("x-\]")
                    196:              * XXX: Fix for locale/MBCS character width
                    197:              */
                    198:             if (((*pattern)[1] == '-') && ((*pattern)[2] != ']'))
                    199:             {
                    200:                 startch = *pattern;
                    201:                 *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
                    202: 
                    203:                 /* NOT a properly balanced [expr] pattern, EOS terminated 
                    204:                  * or ranges containing a slash in FNM_PATHNAME mode pattern
                    205:                  * fall out to to the rewind and test '[' literal code path
                    206:                  */
                    207:                 if (!**pattern || (slash && (**pattern == '/')))
                    208:                     break;
                    209: 
                    210:                 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
                    211:                 if ((**string >= *startch) && (**string <= **pattern))
                    212:                     result = 0;
                    213:                 else if (nocase && (isupper((unsigned char)**string) ||
                    214:                                    isupper((unsigned char)*startch) ||
                    215:                                    isupper((unsigned char)**pattern))
                    216:                             && (tolower((unsigned char)**string) >= tolower((unsigned char)*startch)) 
                    217:                             && (tolower((unsigned char)**string) <= tolower((unsigned char)**pattern)))
                    218:                     result = 0;
                    219: 
                    220:                 ++*pattern;
                    221:                 continue;
                    222:             }
                    223: 
                    224:             /* XXX: handle locale/MBCS comparison, advance by MBCS char width */
                    225:             if ((**string == **pattern))
                    226:                 result = 0;
                    227:             else if (nocase && (isupper((unsigned char)**string) ||
                    228:                                isupper((unsigned char)**pattern))
                    229:                             && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
                    230:                 result = 0;
                    231: 
                    232:             ++*pattern;
                    233:         }
                    234: 
                    235:         /* NOT a properly balanced [expr] pattern; Rewind
                    236:          * and reset result to test '[' literal
                    237:          */
                    238:         *pattern = mismatch;
                    239:         result = FNM_NOMATCH;
                    240:     }
                    241:     else if (**pattern == '?') {
                    242:         /* Optimize '?' match before unescaping **pattern */
                    243:         if (!**string || (slash && (**string == '/')))
                    244:             return FNM_NOMATCH;
                    245:         result = 0;
                    246:         goto fnmatch_ch_success;
                    247:     }
                    248:     else if (escape && (**pattern == '\\') && (*pattern)[1]) {
                    249:         ++*pattern;
                    250:     }
                    251: 
                    252:     /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
                    253:     if (**string == **pattern)
                    254:         result = 0;
                    255:     else if (nocase && (isupper((unsigned char)**string) || isupper((unsigned char)**pattern))
                    256:                     && (tolower((unsigned char)**string) == tolower((unsigned char)**pattern)))
                    257:         result = 0;
                    258: 
                    259:     /* Refuse to advance over trailing slash or nulls
                    260:      */
                    261:     if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/'))))
                    262:         return result;
                    263: 
                    264: fnmatch_ch_success:
                    265:     ++*pattern;
                    266:     ++*string;
                    267:     return result;
                    268: }
                    269: 
                    270: int rpl_fnmatch(const char *pattern, const char *string, int flags)
                    271: {
                    272:     static const char dummystring[2] = {' ', 0};
                    273:     const int escape = !(flags & FNM_NOESCAPE);
                    274:     const int slash = !!(flags & FNM_PATHNAME);
                    275:     const int leading_dir = !!(flags & FNM_LEADING_DIR);
                    276:     const char *strendseg;
                    277:     const char *dummyptr;
                    278:     const char *matchptr;
                    279:     int wild;
                    280:     /* For '*' wild processing only; surpress 'used before initialization'
                    281:      * warnings with dummy initialization values;
                    282:      */
                    283:     const char *strstartseg = NULL;
                    284:     const char *mismatch = NULL;
                    285:     int matchlen = 0;
                    286: 
                    287:     if (strlen(pattern) > PATH_MAX || strlen(string) > PATH_MAX)
                    288:        return FNM_NOMATCH;
                    289: 
                    290:     if (*pattern == '*')
                    291:         goto firstsegment;
                    292: 
                    293:     while (*pattern && *string)
                    294:     {
                    295:         /* Pre-decode "\/" which has no special significance, and
                    296:          * match balanced slashes, starting a new segment pattern
                    297:          */
                    298:         if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
                    299:             ++pattern;
                    300:         if (slash && (*pattern == '/') && (*string == '/')) {
                    301:             ++pattern;
                    302:             ++string;
                    303:         }            
                    304: 
                    305: firstsegment:
                    306:         /* At the beginning of each segment, validate leading period behavior.
                    307:          */
                    308:         if ((flags & FNM_PERIOD) && (*string == '.'))
                    309:         {
                    310:             if (*pattern == '.')
                    311:                 ++pattern;
                    312:             else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
                    313:                 pattern += 2;
                    314:             else
                    315:                 return FNM_NOMATCH;
                    316:             ++string;
                    317:         }
                    318: 
                    319:         /* Determine the end of string segment
                    320:          *
                    321:          * Presumes '/' character is unique, not composite in any MBCS encoding
                    322:          */
                    323:         if (slash) {
                    324:             strendseg = strchr(string, '/');
                    325:             if (!strendseg)
                    326:                 strendseg = strchr(string, '\0');
                    327:         }
                    328:         else {
                    329:             strendseg = strchr(string, '\0');
                    330:         }
                    331: 
                    332:         /* Allow pattern '*' to be consumed even with no remaining string to match
                    333:          */
                    334:         while (*pattern)
                    335:         {
                    336:             if ((string > strendseg)
                    337:                 || ((string == strendseg) && (*pattern != '*')))
                    338:                 break;
                    339: 
                    340:             if (slash && ((*pattern == '/')
                    341:                            || (escape && (*pattern == '\\')
                    342:                                       && (pattern[1] == '/'))))
                    343:                 break;
                    344: 
                    345:             /* Reduce groups of '*' and '?' to n '?' matches
                    346:              * followed by one '*' test for simplicity
                    347:              */
                    348:             for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern)
                    349:             {
                    350:                 if (*pattern == '*') {
                    351:                     wild = 1;
                    352:                 }
                    353:                 else if (string < strendseg) {  /* && (*pattern == '?') */
                    354:                     /* XXX: Advance 1 char for MBCS locale */
                    355:                     ++string;
                    356:                 }
                    357:                 else {  /* (string >= strendseg) && (*pattern == '?') */
                    358:                     return FNM_NOMATCH;
                    359:                 }
                    360:             }
                    361: 
                    362:             if (wild)
                    363:             {
                    364:                 strstartseg = string;
                    365:                 mismatch = pattern;
                    366: 
                    367:                 /* Count fixed (non '*') char matches remaining in pattern
                    368:                  * excluding '/' (or "\/") and '*'
                    369:                  */
                    370:                 for (matchptr = pattern, matchlen = 0; 1; ++matchlen)
                    371:                 {
                    372:                     if ((*matchptr == '\0') 
                    373:                         || (slash && ((*matchptr == '/')
                    374:                                       || (escape && (*matchptr == '\\')
                    375:                                                  && (matchptr[1] == '/')))))
                    376:                     {
                    377:                         /* Compare precisely this many trailing string chars,
                    378:                          * the resulting match needs no wildcard loop
                    379:                          */
                    380:                         /* XXX: Adjust for MBCS */
                    381:                         if (string + matchlen > strendseg)
                    382:                             return FNM_NOMATCH;
                    383: 
                    384:                         string = strendseg - matchlen;
                    385:                         wild = 0;
                    386:                         break;
                    387:                     }
                    388: 
                    389:                     if (*matchptr == '*')
                    390:                     {
                    391:                         /* Ensure at least this many trailing string chars remain
                    392:                          * for the first comparison
                    393:                          */
                    394:                         /* XXX: Adjust for MBCS */
                    395:                         if (string + matchlen > strendseg)
                    396:                             return FNM_NOMATCH;
                    397: 
                    398:                         /* Begin first wild comparison at the current position */
                    399:                         break;
                    400:                     }
                    401: 
                    402:                     /* Skip forward in pattern by a single character match
                    403:                      * Use a dummy fnmatch_ch() test to count one "[range]" escape
                    404:                      */ 
                    405:                     /* XXX: Adjust for MBCS */
                    406:                     if (escape && (*matchptr == '\\') && matchptr[1]) {
                    407:                         matchptr += 2;
                    408:                     }
                    409:                     else if (*matchptr == '[') {
                    410:                         dummyptr = dummystring;
                    411:                         fnmatch_ch(&matchptr, &dummyptr, flags);
                    412:                     }
                    413:                     else {
                    414:                         ++matchptr;
                    415:                     }
                    416:                 }
                    417:             }
                    418: 
                    419:             /* Incrementally match string against the pattern
                    420:              */
                    421:             while (*pattern && (string < strendseg))
                    422:             {
                    423:                 /* Success; begin a new wild pattern search
                    424:                  */
                    425:                 if (*pattern == '*')
                    426:                     break;
                    427: 
                    428:                 if (slash && ((*string == '/')
                    429:                               || (*pattern == '/')
                    430:                               || (escape && (*pattern == '\\')
                    431:                                          && (pattern[1] == '/'))))
                    432:                     break;
                    433: 
                    434:                 /* Compare ch's (the pattern is advanced over "\/" to the '/',
                    435:                  * but slashes will mismatch, and are not consumed)
                    436:                  */
                    437:                 if (!fnmatch_ch(&pattern, &string, flags))
                    438:                     continue;
                    439: 
                    440:                 /* Failed to match, loop against next char offset of string segment 
                    441:                  * until not enough string chars remain to match the fixed pattern
                    442:                  */
                    443:                 if (wild) {
                    444:                     /* XXX: Advance 1 char for MBCS locale */
                    445:                     string = ++strstartseg;
                    446:                     if (string + matchlen > strendseg)
                    447:                         return FNM_NOMATCH;
                    448: 
                    449:                     pattern = mismatch;
                    450:                     continue;
                    451:                 }
                    452:                 else
                    453:                     return FNM_NOMATCH;
                    454:             }
                    455:         }
                    456: 
                    457:         if (*string && !((slash || leading_dir) && (*string == '/')))
                    458:             return FNM_NOMATCH;
                    459: 
                    460:         if (*pattern && !(slash && ((*pattern == '/')
                    461:                                     || (escape && (*pattern == '\\')
                    462:                                                && (pattern[1] == '/')))))
                    463:             return FNM_NOMATCH;
                    464: 
                    465:         if (leading_dir && !*pattern && *string == '/')
                    466:             return 0;
                    467:     }
                    468: 
                    469:     /* Where both pattern and string are at EOS, declare success
                    470:      */
                    471:     if (!*string && !*pattern)
                    472:         return 0;
                    473: 
                    474:     /* pattern didn't match to the end of string */
                    475:     return FNM_NOMATCH;
                    476: }
1.1.1.3 ! misho     477: #endif /* HAVE_FNMATCH */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>