Annotation of embedaddon/pcre/pcre_get.c, revision 1.1

1.1     ! misho       1: /*************************************************
        !             2: *      Perl-Compatible Regular Expressions       *
        !             3: *************************************************/
        !             4: 
        !             5: /* PCRE is a library of functions to support regular expressions whose syntax
        !             6: and semantics are as close as possible to those of the Perl 5 language.
        !             7: 
        !             8:                        Written by Philip Hazel
        !             9:            Copyright (c) 1997-2008 University of Cambridge
        !            10: 
        !            11: -----------------------------------------------------------------------------
        !            12: Redistribution and use in source and binary forms, with or without
        !            13: modification, are permitted provided that the following conditions are met:
        !            14: 
        !            15:     * Redistributions of source code must retain the above copyright notice,
        !            16:       this list of conditions and the following disclaimer.
        !            17: 
        !            18:     * Redistributions in binary form must reproduce the above copyright
        !            19:       notice, this list of conditions and the following disclaimer in the
        !            20:       documentation and/or other materials provided with the distribution.
        !            21: 
        !            22:     * Neither the name of the University of Cambridge nor the names of its
        !            23:       contributors may be used to endorse or promote products derived from
        !            24:       this software without specific prior written permission.
        !            25: 
        !            26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
        !            27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
        !            30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
        !            31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
        !            32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
        !            33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
        !            34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
        !            35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
        !            36: POSSIBILITY OF SUCH DAMAGE.
        !            37: -----------------------------------------------------------------------------
        !            38: */
        !            39: 
        !            40: 
        !            41: /* This module contains some convenience functions for extracting substrings
        !            42: from the subject string after a regex match has succeeded. The original idea
        !            43: for these functions came from Scott Wimer. */
        !            44: 
        !            45: 
        !            46: #ifdef HAVE_CONFIG_H
        !            47: #include "config.h"
        !            48: #endif
        !            49: 
        !            50: #include "pcre_internal.h"
        !            51: 
        !            52: 
        !            53: /*************************************************
        !            54: *           Find number for named string         *
        !            55: *************************************************/
        !            56: 
        !            57: /* This function is used by the get_first_set() function below, as well
        !            58: as being generally available. It assumes that names are unique.
        !            59: 
        !            60: Arguments:
        !            61:   code        the compiled regex
        !            62:   stringname  the name whose number is required
        !            63: 
        !            64: Returns:      the number of the named parentheses, or a negative number
        !            65:                 (PCRE_ERROR_NOSUBSTRING) if not found
        !            66: */
        !            67: 
        !            68: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !            69: pcre_get_stringnumber(const pcre *code, const char *stringname)
        !            70: {
        !            71: int rc;
        !            72: int entrysize;
        !            73: int top, bot;
        !            74: uschar *nametable;
        !            75: 
        !            76: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
        !            77:   return rc;
        !            78: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
        !            79: 
        !            80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
        !            81:   return rc;
        !            82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
        !            83:   return rc;
        !            84: 
        !            85: bot = 0;
        !            86: while (top > bot)
        !            87:   {
        !            88:   int mid = (top + bot) / 2;
        !            89:   uschar *entry = nametable + entrysize*mid;
        !            90:   int c = strcmp(stringname, (char *)(entry + 2));
        !            91:   if (c == 0) return (entry[0] << 8) + entry[1];
        !            92:   if (c > 0) bot = mid + 1; else top = mid;
        !            93:   }
        !            94: 
        !            95: return PCRE_ERROR_NOSUBSTRING;
        !            96: }
        !            97: 
        !            98: 
        !            99: 
        !           100: /*************************************************
        !           101: *     Find (multiple) entries for named string   *
        !           102: *************************************************/
        !           103: 
        !           104: /* This is used by the get_first_set() function below, as well as being
        !           105: generally available. It is used when duplicated names are permitted.
        !           106: 
        !           107: Arguments:
        !           108:   code        the compiled regex
        !           109:   stringname  the name whose entries required
        !           110:   firstptr    where to put the pointer to the first entry
        !           111:   lastptr     where to put the pointer to the last entry
        !           112: 
        !           113: Returns:      the length of each entry, or a negative number
        !           114:                 (PCRE_ERROR_NOSUBSTRING) if not found
        !           115: */
        !           116: 
        !           117: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           118: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
        !           119:   char **firstptr, char **lastptr)
        !           120: {
        !           121: int rc;
        !           122: int entrysize;
        !           123: int top, bot;
        !           124: uschar *nametable, *lastentry;
        !           125: 
        !           126: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
        !           127:   return rc;
        !           128: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
        !           129: 
        !           130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
        !           131:   return rc;
        !           132: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
        !           133:   return rc;
        !           134: 
        !           135: lastentry = nametable + entrysize * (top - 1);
        !           136: bot = 0;
        !           137: while (top > bot)
        !           138:   {
        !           139:   int mid = (top + bot) / 2;
        !           140:   uschar *entry = nametable + entrysize*mid;
        !           141:   int c = strcmp(stringname, (char *)(entry + 2));
        !           142:   if (c == 0)
        !           143:     {
        !           144:     uschar *first = entry;
        !           145:     uschar *last = entry;
        !           146:     while (first > nametable)
        !           147:       {
        !           148:       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
        !           149:       first -= entrysize;
        !           150:       }
        !           151:     while (last < lastentry)
        !           152:       {
        !           153:       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
        !           154:       last += entrysize;
        !           155:       }
        !           156:     *firstptr = (char *)first;
        !           157:     *lastptr = (char *)last;
        !           158:     return entrysize;
        !           159:     }
        !           160:   if (c > 0) bot = mid + 1; else top = mid;
        !           161:   }
        !           162: 
        !           163: return PCRE_ERROR_NOSUBSTRING;
        !           164: }
        !           165: 
        !           166: 
        !           167: 
        !           168: /*************************************************
        !           169: *    Find first set of multiple named strings    *
        !           170: *************************************************/
        !           171: 
        !           172: /* This function allows for duplicate names in the table of named substrings.
        !           173: It returns the number of the first one that was set in a pattern match.
        !           174: 
        !           175: Arguments:
        !           176:   code         the compiled regex
        !           177:   stringname   the name of the capturing substring
        !           178:   ovector      the vector of matched substrings
        !           179: 
        !           180: Returns:       the number of the first that is set,
        !           181:                or the number of the last one if none are set,
        !           182:                or a negative number on error
        !           183: */
        !           184: 
        !           185: static int
        !           186: get_first_set(const pcre *code, const char *stringname, int *ovector)
        !           187: {
        !           188: const real_pcre *re = (const real_pcre *)code;
        !           189: int entrysize;
        !           190: char *first, *last;
        !           191: uschar *entry;
        !           192: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
        !           193:   return pcre_get_stringnumber(code, stringname);
        !           194: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
        !           195: if (entrysize <= 0) return entrysize;
        !           196: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
        !           197:   {
        !           198:   int n = (entry[0] << 8) + entry[1];
        !           199:   if (ovector[n*2] >= 0) return n;
        !           200:   }
        !           201: return (first[0] << 8) + first[1];
        !           202: }
        !           203: 
        !           204: 
        !           205: 
        !           206: 
        !           207: /*************************************************
        !           208: *      Copy captured string to given buffer      *
        !           209: *************************************************/
        !           210: 
        !           211: /* This function copies a single captured substring into a given buffer.
        !           212: Note that we use memcpy() rather than strncpy() in case there are binary zeros
        !           213: in the string.
        !           214: 
        !           215: Arguments:
        !           216:   subject        the subject string that was matched
        !           217:   ovector        pointer to the offsets table
        !           218:   stringcount    the number of substrings that were captured
        !           219:                    (i.e. the yield of the pcre_exec call, unless
        !           220:                    that was zero, in which case it should be 1/3
        !           221:                    of the offset table size)
        !           222:   stringnumber   the number of the required substring
        !           223:   buffer         where to put the substring
        !           224:   size           the size of the buffer
        !           225: 
        !           226: Returns:         if successful:
        !           227:                    the length of the copied string, not including the zero
        !           228:                    that is put on the end; can be zero
        !           229:                  if not successful:
        !           230:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
        !           231:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
        !           232: */
        !           233: 
        !           234: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           235: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
        !           236:   int stringnumber, char *buffer, int size)
        !           237: {
        !           238: int yield;
        !           239: if (stringnumber < 0 || stringnumber >= stringcount)
        !           240:   return PCRE_ERROR_NOSUBSTRING;
        !           241: stringnumber *= 2;
        !           242: yield = ovector[stringnumber+1] - ovector[stringnumber];
        !           243: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
        !           244: memcpy(buffer, subject + ovector[stringnumber], yield);
        !           245: buffer[yield] = 0;
        !           246: return yield;
        !           247: }
        !           248: 
        !           249: 
        !           250: 
        !           251: /*************************************************
        !           252: *   Copy named captured string to given buffer   *
        !           253: *************************************************/
        !           254: 
        !           255: /* This function copies a single captured substring into a given buffer,
        !           256: identifying it by name. If the regex permits duplicate names, the first
        !           257: substring that is set is chosen.
        !           258: 
        !           259: Arguments:
        !           260:   code           the compiled regex
        !           261:   subject        the subject string that was matched
        !           262:   ovector        pointer to the offsets table
        !           263:   stringcount    the number of substrings that were captured
        !           264:                    (i.e. the yield of the pcre_exec call, unless
        !           265:                    that was zero, in which case it should be 1/3
        !           266:                    of the offset table size)
        !           267:   stringname     the name of the required substring
        !           268:   buffer         where to put the substring
        !           269:   size           the size of the buffer
        !           270: 
        !           271: Returns:         if successful:
        !           272:                    the length of the copied string, not including the zero
        !           273:                    that is put on the end; can be zero
        !           274:                  if not successful:
        !           275:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
        !           276:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
        !           277: */
        !           278: 
        !           279: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           280: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
        !           281:   int stringcount, const char *stringname, char *buffer, int size)
        !           282: {
        !           283: int n = get_first_set(code, stringname, ovector);
        !           284: if (n <= 0) return n;
        !           285: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
        !           286: }
        !           287: 
        !           288: 
        !           289: 
        !           290: /*************************************************
        !           291: *      Copy all captured strings to new store    *
        !           292: *************************************************/
        !           293: 
        !           294: /* This function gets one chunk of store and builds a list of pointers and all
        !           295: of the captured substrings in it. A NULL pointer is put on the end of the list.
        !           296: 
        !           297: Arguments:
        !           298:   subject        the subject string that was matched
        !           299:   ovector        pointer to the offsets table
        !           300:   stringcount    the number of substrings that were captured
        !           301:                    (i.e. the yield of the pcre_exec call, unless
        !           302:                    that was zero, in which case it should be 1/3
        !           303:                    of the offset table size)
        !           304:   listptr        set to point to the list of pointers
        !           305: 
        !           306: Returns:         if successful: 0
        !           307:                  if not successful:
        !           308:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
        !           309: */
        !           310: 
        !           311: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           312: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
        !           313:   const char ***listptr)
        !           314: {
        !           315: int i;
        !           316: int size = sizeof(char *);
        !           317: int double_count = stringcount * 2;
        !           318: char **stringlist;
        !           319: char *p;
        !           320: 
        !           321: for (i = 0; i < double_count; i += 2)
        !           322:   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
        !           323: 
        !           324: stringlist = (char **)(pcre_malloc)(size);
        !           325: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
        !           326: 
        !           327: *listptr = (const char **)stringlist;
        !           328: p = (char *)(stringlist + stringcount + 1);
        !           329: 
        !           330: for (i = 0; i < double_count; i += 2)
        !           331:   {
        !           332:   int len = ovector[i+1] - ovector[i];
        !           333:   memcpy(p, subject + ovector[i], len);
        !           334:   *stringlist++ = p;
        !           335:   p += len;
        !           336:   *p++ = 0;
        !           337:   }
        !           338: 
        !           339: *stringlist = NULL;
        !           340: return 0;
        !           341: }
        !           342: 
        !           343: 
        !           344: 
        !           345: /*************************************************
        !           346: *   Free store obtained by get_substring_list    *
        !           347: *************************************************/
        !           348: 
        !           349: /* This function exists for the benefit of people calling PCRE from non-C
        !           350: programs that can call its functions, but not free() or (pcre_free)() directly.
        !           351: 
        !           352: Argument:   the result of a previous pcre_get_substring_list()
        !           353: Returns:    nothing
        !           354: */
        !           355: 
        !           356: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
        !           357: pcre_free_substring_list(const char **pointer)
        !           358: {
        !           359: (pcre_free)((void *)pointer);
        !           360: }
        !           361: 
        !           362: 
        !           363: 
        !           364: /*************************************************
        !           365: *      Copy captured string to new store         *
        !           366: *************************************************/
        !           367: 
        !           368: /* This function copies a single captured substring into a piece of new
        !           369: store
        !           370: 
        !           371: Arguments:
        !           372:   subject        the subject string that was matched
        !           373:   ovector        pointer to the offsets table
        !           374:   stringcount    the number of substrings that were captured
        !           375:                    (i.e. the yield of the pcre_exec call, unless
        !           376:                    that was zero, in which case it should be 1/3
        !           377:                    of the offset table size)
        !           378:   stringnumber   the number of the required substring
        !           379:   stringptr      where to put a pointer to the substring
        !           380: 
        !           381: Returns:         if successful:
        !           382:                    the length of the string, not including the zero that
        !           383:                    is put on the end; can be zero
        !           384:                  if not successful:
        !           385:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
        !           386:                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
        !           387: */
        !           388: 
        !           389: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           390: pcre_get_substring(const char *subject, int *ovector, int stringcount,
        !           391:   int stringnumber, const char **stringptr)
        !           392: {
        !           393: int yield;
        !           394: char *substring;
        !           395: if (stringnumber < 0 || stringnumber >= stringcount)
        !           396:   return PCRE_ERROR_NOSUBSTRING;
        !           397: stringnumber *= 2;
        !           398: yield = ovector[stringnumber+1] - ovector[stringnumber];
        !           399: substring = (char *)(pcre_malloc)(yield + 1);
        !           400: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
        !           401: memcpy(substring, subject + ovector[stringnumber], yield);
        !           402: substring[yield] = 0;
        !           403: *stringptr = substring;
        !           404: return yield;
        !           405: }
        !           406: 
        !           407: 
        !           408: 
        !           409: /*************************************************
        !           410: *   Copy named captured string to new store      *
        !           411: *************************************************/
        !           412: 
        !           413: /* This function copies a single captured substring, identified by name, into
        !           414: new store. If the regex permits duplicate names, the first substring that is
        !           415: set is chosen.
        !           416: 
        !           417: Arguments:
        !           418:   code           the compiled regex
        !           419:   subject        the subject string that was matched
        !           420:   ovector        pointer to the offsets table
        !           421:   stringcount    the number of substrings that were captured
        !           422:                    (i.e. the yield of the pcre_exec call, unless
        !           423:                    that was zero, in which case it should be 1/3
        !           424:                    of the offset table size)
        !           425:   stringname     the name of the required substring
        !           426:   stringptr      where to put the pointer
        !           427: 
        !           428: Returns:         if successful:
        !           429:                    the length of the copied string, not including the zero
        !           430:                    that is put on the end; can be zero
        !           431:                  if not successful:
        !           432:                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
        !           433:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
        !           434: */
        !           435: 
        !           436: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
        !           437: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
        !           438:   int stringcount, const char *stringname, const char **stringptr)
        !           439: {
        !           440: int n = get_first_set(code, stringname, ovector);
        !           441: if (n <= 0) return n;
        !           442: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
        !           443: }
        !           444: 
        !           445: 
        !           446: 
        !           447: 
        !           448: /*************************************************
        !           449: *       Free store obtained by get_substring     *
        !           450: *************************************************/
        !           451: 
        !           452: /* This function exists for the benefit of people calling PCRE from non-C
        !           453: programs that can call its functions, but not free() or (pcre_free)() directly.
        !           454: 
        !           455: Argument:   the result of a previous pcre_get_substring()
        !           456: Returns:    nothing
        !           457: */
        !           458: 
        !           459: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
        !           460: pcre_free_substring(const char *pointer)
        !           461: {
        !           462: (pcre_free)((void *)pointer);
        !           463: }
        !           464: 
        !           465: /* End of pcre_get.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>