Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_get.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2008 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains some convenience functions for extracting substrings
                     42: from the subject string after a regex match has succeeded. The original idea
                     43: for these functions came from Scott Wimer. */
                     44: 
                     45: 
                     46: #include "config.h"
                     47: 
                     48: #include "pcre_internal.h"
                     49: 
                     50: 
                     51: /*************************************************
                     52: *           Find number for named string         *
                     53: *************************************************/
                     54: 
                     55: /* This function is used by the get_first_set() function below, as well
                     56: as being generally available. It assumes that names are unique.
                     57: 
                     58: Arguments:
                     59:   code        the compiled regex
                     60:   stringname  the name whose number is required
                     61: 
                     62: Returns:      the number of the named parentheses, or a negative number
                     63:                 (PCRE_ERROR_NOSUBSTRING) if not found
                     64: */
                     65: 
                     66: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                     67: pcre_get_stringnumber(const pcre *code, const char *stringname)
                     68: {
                     69: int rc;
                     70: int entrysize;
                     71: int top, bot;
                     72: uschar *nametable;
                     73: 
                     74: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
                     75:   return rc;
                     76: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
                     77: 
                     78: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
                     79:   return rc;
                     80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
                     81:   return rc;
                     82: 
                     83: bot = 0;
                     84: while (top > bot)
                     85:   {
                     86:   int mid = (top + bot) / 2;
                     87:   uschar *entry = nametable + entrysize*mid;
                     88:   int c = strcmp(stringname, (char *)(entry + 2));
                     89:   if (c == 0) return (entry[0] << 8) + entry[1];
                     90:   if (c > 0) bot = mid + 1; else top = mid;
                     91:   }
                     92: 
                     93: return PCRE_ERROR_NOSUBSTRING;
                     94: }
                     95: 
                     96: 
                     97: 
                     98: /*************************************************
                     99: *     Find (multiple) entries for named string   *
                    100: *************************************************/
                    101: 
                    102: /* This is used by the get_first_set() function below, as well as being
                    103: generally available. It is used when duplicated names are permitted.
                    104: 
                    105: Arguments:
                    106:   code        the compiled regex
                    107:   stringname  the name whose entries required
                    108:   firstptr    where to put the pointer to the first entry
                    109:   lastptr     where to put the pointer to the last entry
                    110: 
                    111: Returns:      the length of each entry, or a negative number
                    112:                 (PCRE_ERROR_NOSUBSTRING) if not found
                    113: */
                    114: 
                    115: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    116: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
                    117:   char **firstptr, char **lastptr)
                    118: {
                    119: int rc;
                    120: int entrysize;
                    121: int top, bot;
                    122: uschar *nametable, *lastentry;
                    123: 
                    124: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
                    125:   return rc;
                    126: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
                    127: 
                    128: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
                    129:   return rc;
                    130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
                    131:   return rc;
                    132: 
                    133: lastentry = nametable + entrysize * (top - 1);
                    134: bot = 0;
                    135: while (top > bot)
                    136:   {
                    137:   int mid = (top + bot) / 2;
                    138:   uschar *entry = nametable + entrysize*mid;
                    139:   int c = strcmp(stringname, (char *)(entry + 2));
                    140:   if (c == 0)
                    141:     {
                    142:     uschar *first = entry;
                    143:     uschar *last = entry;
                    144:     while (first > nametable)
                    145:       {
                    146:       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
                    147:       first -= entrysize;
                    148:       }
                    149:     while (last < lastentry)
                    150:       {
                    151:       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
                    152:       last += entrysize;
                    153:       }
                    154:     *firstptr = (char *)first;
                    155:     *lastptr = (char *)last;
                    156:     return entrysize;
                    157:     }
                    158:   if (c > 0) bot = mid + 1; else top = mid;
                    159:   }
                    160: 
                    161: return PCRE_ERROR_NOSUBSTRING;
                    162: }
                    163: 
                    164: 
                    165: 
                    166: /*************************************************
                    167: *    Find first set of multiple named strings    *
                    168: *************************************************/
                    169: 
                    170: /* This function allows for duplicate names in the table of named substrings.
                    171: It returns the number of the first one that was set in a pattern match.
                    172: 
                    173: Arguments:
                    174:   code         the compiled regex
                    175:   stringname   the name of the capturing substring
                    176:   ovector      the vector of matched substrings
                    177: 
                    178: Returns:       the number of the first that is set,
                    179:                or the number of the last one if none are set,
                    180:                or a negative number on error
                    181: */
                    182: 
                    183: static int
                    184: get_first_set(const pcre *code, const char *stringname, int *ovector)
                    185: {
                    186: const real_pcre *re = (const real_pcre *)code;
                    187: int entrysize;
                    188: char *first, *last;
                    189: uschar *entry;
                    190: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
                    191:   return pcre_get_stringnumber(code, stringname);
                    192: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
                    193: if (entrysize <= 0) return entrysize;
                    194: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
                    195:   {
                    196:   int n = (entry[0] << 8) + entry[1];
                    197:   if (ovector[n*2] >= 0) return n;
                    198:   }
                    199: return (first[0] << 8) + first[1];
                    200: }
                    201: 
                    202: 
                    203: 
                    204: 
                    205: /*************************************************
                    206: *      Copy captured string to given buffer      *
                    207: *************************************************/
                    208: 
                    209: /* This function copies a single captured substring into a given buffer.
                    210: Note that we use memcpy() rather than strncpy() in case there are binary zeros
                    211: in the string.
                    212: 
                    213: Arguments:
                    214:   subject        the subject string that was matched
                    215:   ovector        pointer to the offsets table
                    216:   stringcount    the number of substrings that were captured
                    217:                    (i.e. the yield of the pcre_exec call, unless
                    218:                    that was zero, in which case it should be 1/3
                    219:                    of the offset table size)
                    220:   stringnumber   the number of the required substring
                    221:   buffer         where to put the substring
                    222:   size           the size of the buffer
                    223: 
                    224: Returns:         if successful:
                    225:                    the length of the copied string, not including the zero
                    226:                    that is put on the end; can be zero
                    227:                  if not successful:
                    228:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
                    229:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    230: */
                    231: 
                    232: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    233: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
                    234:   int stringnumber, char *buffer, int size)
                    235: {
                    236: int yield;
                    237: if (stringnumber < 0 || stringnumber >= stringcount)
                    238:   return PCRE_ERROR_NOSUBSTRING;
                    239: stringnumber *= 2;
                    240: yield = ovector[stringnumber+1] - ovector[stringnumber];
                    241: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
                    242: memcpy(buffer, subject + ovector[stringnumber], yield);
                    243: buffer[yield] = 0;
                    244: return yield;
                    245: }
                    246: 
                    247: 
                    248: 
                    249: /*************************************************
                    250: *   Copy named captured string to given buffer   *
                    251: *************************************************/
                    252: 
                    253: /* This function copies a single captured substring into a given buffer,
                    254: identifying it by name. If the regex permits duplicate names, the first
                    255: substring that is set is chosen.
                    256: 
                    257: Arguments:
                    258:   code           the compiled regex
                    259:   subject        the subject string that was matched
                    260:   ovector        pointer to the offsets table
                    261:   stringcount    the number of substrings that were captured
                    262:                    (i.e. the yield of the pcre_exec call, unless
                    263:                    that was zero, in which case it should be 1/3
                    264:                    of the offset table size)
                    265:   stringname     the name of the required substring
                    266:   buffer         where to put the substring
                    267:   size           the size of the buffer
                    268: 
                    269: Returns:         if successful:
                    270:                    the length of the copied string, not including the zero
                    271:                    that is put on the end; can be zero
                    272:                  if not successful:
                    273:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
                    274:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    275: */
                    276: 
                    277: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    278: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
                    279:   int stringcount, const char *stringname, char *buffer, int size)
                    280: {
                    281: int n = get_first_set(code, stringname, ovector);
                    282: if (n <= 0) return n;
                    283: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
                    284: }
                    285: 
                    286: 
                    287: 
                    288: /*************************************************
                    289: *      Copy all captured strings to new store    *
                    290: *************************************************/
                    291: 
                    292: /* This function gets one chunk of store and builds a list of pointers and all
                    293: of the captured substrings in it. A NULL pointer is put on the end of the list.
                    294: 
                    295: Arguments:
                    296:   subject        the subject string that was matched
                    297:   ovector        pointer to the offsets table
                    298:   stringcount    the number of substrings that were captured
                    299:                    (i.e. the yield of the pcre_exec call, unless
                    300:                    that was zero, in which case it should be 1/3
                    301:                    of the offset table size)
                    302:   listptr        set to point to the list of pointers
                    303: 
                    304: Returns:         if successful: 0
                    305:                  if not successful:
                    306:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
                    307: */
                    308: 
                    309: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    310: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
                    311:   const char ***listptr)
                    312: {
                    313: int i;
                    314: int size = sizeof(char *);
                    315: int double_count = stringcount * 2;
                    316: char **stringlist;
                    317: char *p;
                    318: 
                    319: for (i = 0; i < double_count; i += 2)
                    320:   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
                    321: 
                    322: stringlist = (char **)(pcre_malloc)(size);
                    323: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
                    324: 
                    325: *listptr = (const char **)stringlist;
                    326: p = (char *)(stringlist + stringcount + 1);
                    327: 
                    328: for (i = 0; i < double_count; i += 2)
                    329:   {
                    330:   int len = ovector[i+1] - ovector[i];
                    331:   memcpy(p, subject + ovector[i], len);
                    332:   *stringlist++ = p;
                    333:   p += len;
                    334:   *p++ = 0;
                    335:   }
                    336: 
                    337: *stringlist = NULL;
                    338: return 0;
                    339: }
                    340: 
                    341: 
                    342: 
                    343: /*************************************************
                    344: *   Free store obtained by get_substring_list    *
                    345: *************************************************/
                    346: 
                    347: /* This function exists for the benefit of people calling PCRE from non-C
                    348: programs that can call its functions, but not free() or (pcre_free)() directly.
                    349: 
                    350: Argument:   the result of a previous pcre_get_substring_list()
                    351: Returns:    nothing
                    352: */
                    353: 
                    354: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
                    355: pcre_free_substring_list(const char **pointer)
                    356: {
                    357: (pcre_free)((void *)pointer);
                    358: }
                    359: 
                    360: 
                    361: 
                    362: /*************************************************
                    363: *      Copy captured string to new store         *
                    364: *************************************************/
                    365: 
                    366: /* This function copies a single captured substring into a piece of new
                    367: store
                    368: 
                    369: Arguments:
                    370:   subject        the subject string that was matched
                    371:   ovector        pointer to the offsets table
                    372:   stringcount    the number of substrings that were captured
                    373:                    (i.e. the yield of the pcre_exec call, unless
                    374:                    that was zero, in which case it should be 1/3
                    375:                    of the offset table size)
                    376:   stringnumber   the number of the required substring
                    377:   stringptr      where to put a pointer to the substring
                    378: 
                    379: Returns:         if successful:
                    380:                    the length of the string, not including the zero that
                    381:                    is put on the end; can be zero
                    382:                  if not successful:
                    383:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
                    384:                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
                    385: */
                    386: 
                    387: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    388: pcre_get_substring(const char *subject, int *ovector, int stringcount,
                    389:   int stringnumber, const char **stringptr)
                    390: {
                    391: int yield;
                    392: char *substring;
                    393: if (stringnumber < 0 || stringnumber >= stringcount)
                    394:   return PCRE_ERROR_NOSUBSTRING;
                    395: stringnumber *= 2;
                    396: yield = ovector[stringnumber+1] - ovector[stringnumber];
                    397: substring = (char *)(pcre_malloc)(yield + 1);
                    398: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
                    399: memcpy(substring, subject + ovector[stringnumber], yield);
                    400: substring[yield] = 0;
                    401: *stringptr = substring;
                    402: return yield;
                    403: }
                    404: 
                    405: 
                    406: 
                    407: /*************************************************
                    408: *   Copy named captured string to new store      *
                    409: *************************************************/
                    410: 
                    411: /* This function copies a single captured substring, identified by name, into
                    412: new store. If the regex permits duplicate names, the first substring that is
                    413: set is chosen.
                    414: 
                    415: Arguments:
                    416:   code           the compiled regex
                    417:   subject        the subject string that was matched
                    418:   ovector        pointer to the offsets table
                    419:   stringcount    the number of substrings that were captured
                    420:                    (i.e. the yield of the pcre_exec call, unless
                    421:                    that was zero, in which case it should be 1/3
                    422:                    of the offset table size)
                    423:   stringname     the name of the required substring
                    424:   stringptr      where to put the pointer
                    425: 
                    426: Returns:         if successful:
                    427:                    the length of the copied string, not including the zero
                    428:                    that is put on the end; can be zero
                    429:                  if not successful:
                    430:                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
                    431:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    432: */
                    433: 
                    434: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    435: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
                    436:   int stringcount, const char *stringname, const char **stringptr)
                    437: {
                    438: int n = get_first_set(code, stringname, ovector);
                    439: if (n <= 0) return n;
                    440: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
                    441: }
                    442: 
                    443: 
                    444: 
                    445: 
                    446: /*************************************************
                    447: *       Free store obtained by get_substring     *
                    448: *************************************************/
                    449: 
                    450: /* This function exists for the benefit of people calling PCRE from non-C
                    451: programs that can call its functions, but not free() or (pcre_free)() directly.
                    452: 
                    453: Argument:   the result of a previous pcre_get_substring()
                    454: Returns:    nothing
                    455: */
                    456: 
                    457: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
                    458: pcre_free_substring(const char *pointer)
                    459: {
                    460: (pcre_free)((void *)pointer);
                    461: }
                    462: 
                    463: /* End of pcre_get.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>