File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_get.c
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:51 2012 UTC (12 years, 4 months ago) by misho
CVS tags: MAIN, HEAD
Initial revision

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language.
    7: 
    8:                        Written by Philip Hazel
    9:            Copyright (c) 1997-2008 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: 
   41: /* This module contains some convenience functions for extracting substrings
   42: from the subject string after a regex match has succeeded. The original idea
   43: for these functions came from Scott Wimer. */
   44: 
   45: 
   46: #ifdef HAVE_CONFIG_H
   47: #include "config.h"
   48: #endif
   49: 
   50: #include "pcre_internal.h"
   51: 
   52: 
   53: /*************************************************
   54: *           Find number for named string         *
   55: *************************************************/
   56: 
   57: /* This function is used by the get_first_set() function below, as well
   58: as being generally available. It assumes that names are unique.
   59: 
   60: Arguments:
   61:   code        the compiled regex
   62:   stringname  the name whose number is required
   63: 
   64: Returns:      the number of the named parentheses, or a negative number
   65:                 (PCRE_ERROR_NOSUBSTRING) if not found
   66: */
   67: 
   68: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   69: pcre_get_stringnumber(const pcre *code, const char *stringname)
   70: {
   71: int rc;
   72: int entrysize;
   73: int top, bot;
   74: uschar *nametable;
   75: 
   76: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
   77:   return rc;
   78: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
   79: 
   80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
   81:   return rc;
   82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
   83:   return rc;
   84: 
   85: bot = 0;
   86: while (top > bot)
   87:   {
   88:   int mid = (top + bot) / 2;
   89:   uschar *entry = nametable + entrysize*mid;
   90:   int c = strcmp(stringname, (char *)(entry + 2));
   91:   if (c == 0) return (entry[0] << 8) + entry[1];
   92:   if (c > 0) bot = mid + 1; else top = mid;
   93:   }
   94: 
   95: return PCRE_ERROR_NOSUBSTRING;
   96: }
   97: 
   98: 
   99: 
  100: /*************************************************
  101: *     Find (multiple) entries for named string   *
  102: *************************************************/
  103: 
  104: /* This is used by the get_first_set() function below, as well as being
  105: generally available. It is used when duplicated names are permitted.
  106: 
  107: Arguments:
  108:   code        the compiled regex
  109:   stringname  the name whose entries required
  110:   firstptr    where to put the pointer to the first entry
  111:   lastptr     where to put the pointer to the last entry
  112: 
  113: Returns:      the length of each entry, or a negative number
  114:                 (PCRE_ERROR_NOSUBSTRING) if not found
  115: */
  116: 
  117: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  118: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
  119:   char **firstptr, char **lastptr)
  120: {
  121: int rc;
  122: int entrysize;
  123: int top, bot;
  124: uschar *nametable, *lastentry;
  125: 
  126: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  127:   return rc;
  128: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
  129: 
  130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  131:   return rc;
  132: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  133:   return rc;
  134: 
  135: lastentry = nametable + entrysize * (top - 1);
  136: bot = 0;
  137: while (top > bot)
  138:   {
  139:   int mid = (top + bot) / 2;
  140:   uschar *entry = nametable + entrysize*mid;
  141:   int c = strcmp(stringname, (char *)(entry + 2));
  142:   if (c == 0)
  143:     {
  144:     uschar *first = entry;
  145:     uschar *last = entry;
  146:     while (first > nametable)
  147:       {
  148:       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
  149:       first -= entrysize;
  150:       }
  151:     while (last < lastentry)
  152:       {
  153:       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
  154:       last += entrysize;
  155:       }
  156:     *firstptr = (char *)first;
  157:     *lastptr = (char *)last;
  158:     return entrysize;
  159:     }
  160:   if (c > 0) bot = mid + 1; else top = mid;
  161:   }
  162: 
  163: return PCRE_ERROR_NOSUBSTRING;
  164: }
  165: 
  166: 
  167: 
  168: /*************************************************
  169: *    Find first set of multiple named strings    *
  170: *************************************************/
  171: 
  172: /* This function allows for duplicate names in the table of named substrings.
  173: It returns the number of the first one that was set in a pattern match.
  174: 
  175: Arguments:
  176:   code         the compiled regex
  177:   stringname   the name of the capturing substring
  178:   ovector      the vector of matched substrings
  179: 
  180: Returns:       the number of the first that is set,
  181:                or the number of the last one if none are set,
  182:                or a negative number on error
  183: */
  184: 
  185: static int
  186: get_first_set(const pcre *code, const char *stringname, int *ovector)
  187: {
  188: const real_pcre *re = (const real_pcre *)code;
  189: int entrysize;
  190: char *first, *last;
  191: uschar *entry;
  192: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  193:   return pcre_get_stringnumber(code, stringname);
  194: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
  195: if (entrysize <= 0) return entrysize;
  196: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
  197:   {
  198:   int n = (entry[0] << 8) + entry[1];
  199:   if (ovector[n*2] >= 0) return n;
  200:   }
  201: return (first[0] << 8) + first[1];
  202: }
  203: 
  204: 
  205: 
  206: 
  207: /*************************************************
  208: *      Copy captured string to given buffer      *
  209: *************************************************/
  210: 
  211: /* This function copies a single captured substring into a given buffer.
  212: Note that we use memcpy() rather than strncpy() in case there are binary zeros
  213: in the string.
  214: 
  215: Arguments:
  216:   subject        the subject string that was matched
  217:   ovector        pointer to the offsets table
  218:   stringcount    the number of substrings that were captured
  219:                    (i.e. the yield of the pcre_exec call, unless
  220:                    that was zero, in which case it should be 1/3
  221:                    of the offset table size)
  222:   stringnumber   the number of the required substring
  223:   buffer         where to put the substring
  224:   size           the size of the buffer
  225: 
  226: Returns:         if successful:
  227:                    the length of the copied string, not including the zero
  228:                    that is put on the end; can be zero
  229:                  if not successful:
  230:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
  231:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  232: */
  233: 
  234: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  235: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
  236:   int stringnumber, char *buffer, int size)
  237: {
  238: int yield;
  239: if (stringnumber < 0 || stringnumber >= stringcount)
  240:   return PCRE_ERROR_NOSUBSTRING;
  241: stringnumber *= 2;
  242: yield = ovector[stringnumber+1] - ovector[stringnumber];
  243: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
  244: memcpy(buffer, subject + ovector[stringnumber], yield);
  245: buffer[yield] = 0;
  246: return yield;
  247: }
  248: 
  249: 
  250: 
  251: /*************************************************
  252: *   Copy named captured string to given buffer   *
  253: *************************************************/
  254: 
  255: /* This function copies a single captured substring into a given buffer,
  256: identifying it by name. If the regex permits duplicate names, the first
  257: substring that is set is chosen.
  258: 
  259: Arguments:
  260:   code           the compiled regex
  261:   subject        the subject string that was matched
  262:   ovector        pointer to the offsets table
  263:   stringcount    the number of substrings that were captured
  264:                    (i.e. the yield of the pcre_exec call, unless
  265:                    that was zero, in which case it should be 1/3
  266:                    of the offset table size)
  267:   stringname     the name of the required substring
  268:   buffer         where to put the substring
  269:   size           the size of the buffer
  270: 
  271: Returns:         if successful:
  272:                    the length of the copied string, not including the zero
  273:                    that is put on the end; can be zero
  274:                  if not successful:
  275:                    PCRE_ERROR_NOMEMORY (-6) buffer too small
  276:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  277: */
  278: 
  279: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  280: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
  281:   int stringcount, const char *stringname, char *buffer, int size)
  282: {
  283: int n = get_first_set(code, stringname, ovector);
  284: if (n <= 0) return n;
  285: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
  286: }
  287: 
  288: 
  289: 
  290: /*************************************************
  291: *      Copy all captured strings to new store    *
  292: *************************************************/
  293: 
  294: /* This function gets one chunk of store and builds a list of pointers and all
  295: of the captured substrings in it. A NULL pointer is put on the end of the list.
  296: 
  297: Arguments:
  298:   subject        the subject string that was matched
  299:   ovector        pointer to the offsets table
  300:   stringcount    the number of substrings that were captured
  301:                    (i.e. the yield of the pcre_exec call, unless
  302:                    that was zero, in which case it should be 1/3
  303:                    of the offset table size)
  304:   listptr        set to point to the list of pointers
  305: 
  306: Returns:         if successful: 0
  307:                  if not successful:
  308:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
  309: */
  310: 
  311: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  312: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
  313:   const char ***listptr)
  314: {
  315: int i;
  316: int size = sizeof(char *);
  317: int double_count = stringcount * 2;
  318: char **stringlist;
  319: char *p;
  320: 
  321: for (i = 0; i < double_count; i += 2)
  322:   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
  323: 
  324: stringlist = (char **)(pcre_malloc)(size);
  325: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
  326: 
  327: *listptr = (const char **)stringlist;
  328: p = (char *)(stringlist + stringcount + 1);
  329: 
  330: for (i = 0; i < double_count; i += 2)
  331:   {
  332:   int len = ovector[i+1] - ovector[i];
  333:   memcpy(p, subject + ovector[i], len);
  334:   *stringlist++ = p;
  335:   p += len;
  336:   *p++ = 0;
  337:   }
  338: 
  339: *stringlist = NULL;
  340: return 0;
  341: }
  342: 
  343: 
  344: 
  345: /*************************************************
  346: *   Free store obtained by get_substring_list    *
  347: *************************************************/
  348: 
  349: /* This function exists for the benefit of people calling PCRE from non-C
  350: programs that can call its functions, but not free() or (pcre_free)() directly.
  351: 
  352: Argument:   the result of a previous pcre_get_substring_list()
  353: Returns:    nothing
  354: */
  355: 
  356: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  357: pcre_free_substring_list(const char **pointer)
  358: {
  359: (pcre_free)((void *)pointer);
  360: }
  361: 
  362: 
  363: 
  364: /*************************************************
  365: *      Copy captured string to new store         *
  366: *************************************************/
  367: 
  368: /* This function copies a single captured substring into a piece of new
  369: store
  370: 
  371: Arguments:
  372:   subject        the subject string that was matched
  373:   ovector        pointer to the offsets table
  374:   stringcount    the number of substrings that were captured
  375:                    (i.e. the yield of the pcre_exec call, unless
  376:                    that was zero, in which case it should be 1/3
  377:                    of the offset table size)
  378:   stringnumber   the number of the required substring
  379:   stringptr      where to put a pointer to the substring
  380: 
  381: Returns:         if successful:
  382:                    the length of the string, not including the zero that
  383:                    is put on the end; can be zero
  384:                  if not successful:
  385:                    PCRE_ERROR_NOMEMORY (-6) failed to get store
  386:                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
  387: */
  388: 
  389: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  390: pcre_get_substring(const char *subject, int *ovector, int stringcount,
  391:   int stringnumber, const char **stringptr)
  392: {
  393: int yield;
  394: char *substring;
  395: if (stringnumber < 0 || stringnumber >= stringcount)
  396:   return PCRE_ERROR_NOSUBSTRING;
  397: stringnumber *= 2;
  398: yield = ovector[stringnumber+1] - ovector[stringnumber];
  399: substring = (char *)(pcre_malloc)(yield + 1);
  400: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
  401: memcpy(substring, subject + ovector[stringnumber], yield);
  402: substring[yield] = 0;
  403: *stringptr = substring;
  404: return yield;
  405: }
  406: 
  407: 
  408: 
  409: /*************************************************
  410: *   Copy named captured string to new store      *
  411: *************************************************/
  412: 
  413: /* This function copies a single captured substring, identified by name, into
  414: new store. If the regex permits duplicate names, the first substring that is
  415: set is chosen.
  416: 
  417: Arguments:
  418:   code           the compiled regex
  419:   subject        the subject string that was matched
  420:   ovector        pointer to the offsets table
  421:   stringcount    the number of substrings that were captured
  422:                    (i.e. the yield of the pcre_exec call, unless
  423:                    that was zero, in which case it should be 1/3
  424:                    of the offset table size)
  425:   stringname     the name of the required substring
  426:   stringptr      where to put the pointer
  427: 
  428: Returns:         if successful:
  429:                    the length of the copied string, not including the zero
  430:                    that is put on the end; can be zero
  431:                  if not successful:
  432:                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
  433:                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  434: */
  435: 
  436: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  437: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
  438:   int stringcount, const char *stringname, const char **stringptr)
  439: {
  440: int n = get_first_set(code, stringname, ovector);
  441: if (n <= 0) return n;
  442: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
  443: }
  444: 
  445: 
  446: 
  447: 
  448: /*************************************************
  449: *       Free store obtained by get_substring     *
  450: *************************************************/
  451: 
  452: /* This function exists for the benefit of people calling PCRE from non-C
  453: programs that can call its functions, but not free() or (pcre_free)() directly.
  454: 
  455: Argument:   the result of a previous pcre_get_substring()
  456: Returns:    nothing
  457: */
  458: 
  459: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  460: pcre_free_substring(const char *pointer)
  461: {
  462: (pcre_free)((void *)pointer);
  463: }
  464: 
  465: /* End of pcre_get.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>