Annotation of embedaddon/pcre/pcre_get.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2008 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains some convenience functions for extracting substrings
                     42: from the subject string after a regex match has succeeded. The original idea
                     43: for these functions came from Scott Wimer. */
                     44: 
                     45: 
                     46: #ifdef HAVE_CONFIG_H
                     47: #include "config.h"
                     48: #endif
                     49: 
                     50: #include "pcre_internal.h"
                     51: 
                     52: 
                     53: /*************************************************
                     54: *           Find number for named string         *
                     55: *************************************************/
                     56: 
                     57: /* This function is used by the get_first_set() function below, as well
                     58: as being generally available. It assumes that names are unique.
                     59: 
                     60: Arguments:
                     61:   code        the compiled regex
                     62:   stringname  the name whose number is required
                     63: 
                     64: Returns:      the number of the named parentheses, or a negative number
                     65:                 (PCRE_ERROR_NOSUBSTRING) if not found
                     66: */
                     67: 
                     68: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                     69: pcre_get_stringnumber(const pcre *code, const char *stringname)
                     70: {
                     71: int rc;
                     72: int entrysize;
                     73: int top, bot;
                     74: uschar *nametable;
                     75: 
                     76: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
                     77:   return rc;
                     78: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
                     79: 
                     80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
                     81:   return rc;
                     82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
                     83:   return rc;
                     84: 
                     85: bot = 0;
                     86: while (top > bot)
                     87:   {
                     88:   int mid = (top + bot) / 2;
                     89:   uschar *entry = nametable + entrysize*mid;
                     90:   int c = strcmp(stringname, (char *)(entry + 2));
                     91:   if (c == 0) return (entry[0] << 8) + entry[1];
                     92:   if (c > 0) bot = mid + 1; else top = mid;
                     93:   }
                     94: 
                     95: return PCRE_ERROR_NOSUBSTRING;
                     96: }
                     97: 
                     98: 
                     99: 
                    100: /*************************************************
                    101: *     Find (multiple) entries for named string   *
                    102: *************************************************/
                    103: 
                    104: /* This is used by the get_first_set() function below, as well as being
                    105: generally available. It is used when duplicated names are permitted.
                    106: 
                    107: Arguments:
                    108:   code        the compiled regex
                    109:   stringname  the name whose entries required
                    110:   firstptr    where to put the pointer to the first entry
                    111:   lastptr     where to put the pointer to the last entry
                    112: 
                    113: Returns:      the length of each entry, or a negative number
                    114:                 (PCRE_ERROR_NOSUBSTRING) if not found
                    115: */
                    116: 
                    117: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    118: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
                    119:   char **firstptr, char **lastptr)
                    120: {
                    121: int rc;
                    122: int entrysize;
                    123: int top, bot;
                    124: uschar *nametable, *lastentry;
                    125: 
                    126: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
                    127:   return rc;
                    128: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
                    129: 
                    130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
                    131:   return rc;
                    132: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
                    133:   return rc;
                    134: 
                    135: lastentry = nametable + entrysize * (top - 1);
                    136: bot = 0;
                    137: while (top > bot)
                    138:   {
                    139:   int mid = (top + bot) / 2;
                    140:   uschar *entry = nametable + entrysize*mid;
                    141:   int c = strcmp(stringname, (char *)(entry + 2));
                    142:   if (c == 0)
                    143:     {
                    144:     uschar *first = entry;
                    145:     uschar *last = entry;
                    146:     while (first > nametable)
                    147:       {
                    148:       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
                    149:       first -= entrysize;
                    150:       }
                    151:     while (last < lastentry)
                    152:       {
                    153:       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
                    154:       last += entrysize;
                    155:       }
                    156:     *firstptr = (char *)first;
                    157:     *lastptr = (char *)last;
                    158:     return entrysize;
                    159:     }
                    160:   if (c > 0) bot = mid + 1; else top = mid;
                    161:   }
                    162: 
                    163: return PCRE_ERROR_NOSUBSTRING;
                    164: }
                    165: 
                    166: 
                    167: 
                    168: /*************************************************
                    169: *    Find first set of multiple named strings    *
                    170: *************************************************/
                    171: 
                    172: /* This function allows for duplicate names in the table of named substrings.
                    173: It returns the number of the first one that was set in a pattern match.
                    174: 
                    175: Arguments:
                    176:   code         the compiled regex
                    177:   stringname   the name of the capturing substring
                    178:   ovector      the vector of matched substrings
                    179: 
                    180: Returns:       the number of the first that is set,
                    181:                or the number of the last one if none are set,
                    182:                or a negative number on error
                    183: */
                    184: 
                    185: static int
                    186: get_first_set(const pcre *code, const char *stringname, int *ovector)
                    187: {
                    188: const real_pcre *re = (const real_pcre *)code;
                    189: int entrysize;
                    190: char *first, *last;
                    191: uschar *entry;
                    192: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
                    193:   return pcre_get_stringnumber(code, stringname);
                    194: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
                    195: if (entrysize <= 0) return entrysize;
                    196: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
                    197:   {
                    198:   int n = (entry[0] << 8) + entry[1];
                    199:   if (ovector[n*2] >= 0) return n;
                    200:   }
                    201: return (first[0] << 8) + first[1];
                    202: }
                    203: 
                    204: 
                    205: 
                    206: 
                    207: /*************************************************
                    208: *      Copy captured string to given buffer      *
                    209: *************************************************/
                    210: 
                    211: /* This function copies a single captured substring into a given buffer.
                    212: Note that we use memcpy() rather than strncpy() in case there are binary zeros
                    213: in the string.
                    214: 
                    215: Arguments:
                    216:   subject        the subject string that was matched
                    217:   ovector        pointer to the offsets table
                    218:   stringcount    the number of substrings that were captured
                    219:                    (i.e. the yield of the pcre_exec call, unless
                    220:                    that was zero, in which case it should be 1/3
                    221:                    of the offset table size)
                    222:   stringnumber   the number of the required substring
                    223:   buffer         where to put the substring
                    224:   size           the size of the buffer
                    225: 
                    226: Returns:         if successful:
                    227:                    the length of the copied string, not including the zero
                    228:                    that is put on the end; can be zero
                    229:                  if not successful:
                    230:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
                    231:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    232: */
                    233: 
                    234: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    235: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
                    236:   int stringnumber, char *buffer, int size)
                    237: {
                    238: int yield;
                    239: if (stringnumber < 0 || stringnumber >= stringcount)
                    240:   return PCRE_ERROR_NOSUBSTRING;
                    241: stringnumber *= 2;
                    242: yield = ovector[stringnumber+1] - ovector[stringnumber];
                    243: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
                    244: memcpy(buffer, subject + ovector[stringnumber], yield);
                    245: buffer[yield] = 0;
                    246: return yield;
                    247: }
                    248: 
                    249: 
                    250: 
                    251: /*************************************************
                    252: *   Copy named captured string to given buffer   *
                    253: *************************************************/
                    254: 
                    255: /* This function copies a single captured substring into a given buffer,
                    256: identifying it by name. If the regex permits duplicate names, the first
                    257: substring that is set is chosen.
                    258: 
                    259: Arguments:
                    260:   code           the compiled regex
                    261:   subject        the subject string that was matched
                    262:   ovector        pointer to the offsets table
                    263:   stringcount    the number of substrings that were captured
                    264:                    (i.e. the yield of the pcre_exec call, unless
                    265:                    that was zero, in which case it should be 1/3
                    266:                    of the offset table size)
                    267:   stringname     the name of the required substring
                    268:   buffer         where to put the substring
                    269:   size           the size of the buffer
                    270: 
                    271: Returns:         if successful:
                    272:                    the length of the copied string, not including the zero
                    273:                    that is put on the end; can be zero
                    274:                  if not successful:
                    275:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
                    276:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    277: */
                    278: 
                    279: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    280: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
                    281:   int stringcount, const char *stringname, char *buffer, int size)
                    282: {
                    283: int n = get_first_set(code, stringname, ovector);
                    284: if (n <= 0) return n;
                    285: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
                    286: }
                    287: 
                    288: 
                    289: 
                    290: /*************************************************
                    291: *      Copy all captured strings to new store    *
                    292: *************************************************/
                    293: 
                    294: /* This function gets one chunk of store and builds a list of pointers and all
                    295: of the captured substrings in it. A NULL pointer is put on the end of the list.
                    296: 
                    297: Arguments:
                    298:   subject        the subject string that was matched
                    299:   ovector        pointer to the offsets table
                    300:   stringcount    the number of substrings that were captured
                    301:                    (i.e. the yield of the pcre_exec call, unless
                    302:                    that was zero, in which case it should be 1/3
                    303:                    of the offset table size)
                    304:   listptr        set to point to the list of pointers
                    305: 
                    306: Returns:         if successful: 0
                    307:                  if not successful:
                    308:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
                    309: */
                    310: 
                    311: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    312: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
                    313:   const char ***listptr)
                    314: {
                    315: int i;
                    316: int size = sizeof(char *);
                    317: int double_count = stringcount * 2;
                    318: char **stringlist;
                    319: char *p;
                    320: 
                    321: for (i = 0; i < double_count; i += 2)
                    322:   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
                    323: 
                    324: stringlist = (char **)(pcre_malloc)(size);
                    325: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
                    326: 
                    327: *listptr = (const char **)stringlist;
                    328: p = (char *)(stringlist + stringcount + 1);
                    329: 
                    330: for (i = 0; i < double_count; i += 2)
                    331:   {
                    332:   int len = ovector[i+1] - ovector[i];
                    333:   memcpy(p, subject + ovector[i], len);
                    334:   *stringlist++ = p;
                    335:   p += len;
                    336:   *p++ = 0;
                    337:   }
                    338: 
                    339: *stringlist = NULL;
                    340: return 0;
                    341: }
                    342: 
                    343: 
                    344: 
                    345: /*************************************************
                    346: *   Free store obtained by get_substring_list    *
                    347: *************************************************/
                    348: 
                    349: /* This function exists for the benefit of people calling PCRE from non-C
                    350: programs that can call its functions, but not free() or (pcre_free)() directly.
                    351: 
                    352: Argument:   the result of a previous pcre_get_substring_list()
                    353: Returns:    nothing
                    354: */
                    355: 
                    356: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
                    357: pcre_free_substring_list(const char **pointer)
                    358: {
                    359: (pcre_free)((void *)pointer);
                    360: }
                    361: 
                    362: 
                    363: 
                    364: /*************************************************
                    365: *      Copy captured string to new store         *
                    366: *************************************************/
                    367: 
                    368: /* This function copies a single captured substring into a piece of new
                    369: store
                    370: 
                    371: Arguments:
                    372:   subject        the subject string that was matched
                    373:   ovector        pointer to the offsets table
                    374:   stringcount    the number of substrings that were captured
                    375:                    (i.e. the yield of the pcre_exec call, unless
                    376:                    that was zero, in which case it should be 1/3
                    377:                    of the offset table size)
                    378:   stringnumber   the number of the required substring
                    379:   stringptr      where to put a pointer to the substring
                    380: 
                    381: Returns:         if successful:
                    382:                    the length of the string, not including the zero that
                    383:                    is put on the end; can be zero
                    384:                  if not successful:
                    385:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
                    386:                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
                    387: */
                    388: 
                    389: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    390: pcre_get_substring(const char *subject, int *ovector, int stringcount,
                    391:   int stringnumber, const char **stringptr)
                    392: {
                    393: int yield;
                    394: char *substring;
                    395: if (stringnumber < 0 || stringnumber >= stringcount)
                    396:   return PCRE_ERROR_NOSUBSTRING;
                    397: stringnumber *= 2;
                    398: yield = ovector[stringnumber+1] - ovector[stringnumber];
                    399: substring = (char *)(pcre_malloc)(yield + 1);
                    400: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
                    401: memcpy(substring, subject + ovector[stringnumber], yield);
                    402: substring[yield] = 0;
                    403: *stringptr = substring;
                    404: return yield;
                    405: }
                    406: 
                    407: 
                    408: 
                    409: /*************************************************
                    410: *   Copy named captured string to new store      *
                    411: *************************************************/
                    412: 
                    413: /* This function copies a single captured substring, identified by name, into
                    414: new store. If the regex permits duplicate names, the first substring that is
                    415: set is chosen.
                    416: 
                    417: Arguments:
                    418:   code           the compiled regex
                    419:   subject        the subject string that was matched
                    420:   ovector        pointer to the offsets table
                    421:   stringcount    the number of substrings that were captured
                    422:                    (i.e. the yield of the pcre_exec call, unless
                    423:                    that was zero, in which case it should be 1/3
                    424:                    of the offset table size)
                    425:   stringname     the name of the required substring
                    426:   stringptr      where to put the pointer
                    427: 
                    428: Returns:         if successful:
                    429:                    the length of the copied string, not including the zero
                    430:                    that is put on the end; can be zero
                    431:                  if not successful:
                    432:                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
                    433:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
                    434: */
                    435: 
                    436: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
                    437: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
                    438:   int stringcount, const char *stringname, const char **stringptr)
                    439: {
                    440: int n = get_first_set(code, stringname, ovector);
                    441: if (n <= 0) return n;
                    442: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
                    443: }
                    444: 
                    445: 
                    446: 
                    447: 
                    448: /*************************************************
                    449: *       Free store obtained by get_substring     *
                    450: *************************************************/
                    451: 
                    452: /* This function exists for the benefit of people calling PCRE from non-C
                    453: programs that can call its functions, but not free() or (pcre_free)() directly.
                    454: 
                    455: Argument:   the result of a previous pcre_get_substring()
                    456: Returns:    nothing
                    457: */
                    458: 
                    459: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
                    460: pcre_free_substring(const char *pointer)
                    461: {
                    462: (pcre_free)((void *)pointer);
                    463: }
                    464: 
                    465: /* End of pcre_get.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>