Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_get.c, revision 1.1
1.1 ! misho 1: /*************************************************
! 2: * Perl-Compatible Regular Expressions *
! 3: *************************************************/
! 4:
! 5: /* PCRE is a library of functions to support regular expressions whose syntax
! 6: and semantics are as close as possible to those of the Perl 5 language.
! 7:
! 8: Written by Philip Hazel
! 9: Copyright (c) 1997-2008 University of Cambridge
! 10:
! 11: -----------------------------------------------------------------------------
! 12: Redistribution and use in source and binary forms, with or without
! 13: modification, are permitted provided that the following conditions are met:
! 14:
! 15: * Redistributions of source code must retain the above copyright notice,
! 16: this list of conditions and the following disclaimer.
! 17:
! 18: * Redistributions in binary form must reproduce the above copyright
! 19: notice, this list of conditions and the following disclaimer in the
! 20: documentation and/or other materials provided with the distribution.
! 21:
! 22: * Neither the name of the University of Cambridge nor the names of its
! 23: contributors may be used to endorse or promote products derived from
! 24: this software without specific prior written permission.
! 25:
! 26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! 27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
! 30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 36: POSSIBILITY OF SUCH DAMAGE.
! 37: -----------------------------------------------------------------------------
! 38: */
! 39:
! 40:
! 41: /* This module contains some convenience functions for extracting substrings
! 42: from the subject string after a regex match has succeeded. The original idea
! 43: for these functions came from Scott Wimer. */
! 44:
! 45:
! 46: #include "config.h"
! 47:
! 48: #include "pcre_internal.h"
! 49:
! 50:
! 51: /*************************************************
! 52: * Find number for named string *
! 53: *************************************************/
! 54:
! 55: /* This function is used by the get_first_set() function below, as well
! 56: as being generally available. It assumes that names are unique.
! 57:
! 58: Arguments:
! 59: code the compiled regex
! 60: stringname the name whose number is required
! 61:
! 62: Returns: the number of the named parentheses, or a negative number
! 63: (PCRE_ERROR_NOSUBSTRING) if not found
! 64: */
! 65:
! 66: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 67: pcre_get_stringnumber(const pcre *code, const char *stringname)
! 68: {
! 69: int rc;
! 70: int entrysize;
! 71: int top, bot;
! 72: uschar *nametable;
! 73:
! 74: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
! 75: return rc;
! 76: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
! 77:
! 78: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
! 79: return rc;
! 80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
! 81: return rc;
! 82:
! 83: bot = 0;
! 84: while (top > bot)
! 85: {
! 86: int mid = (top + bot) / 2;
! 87: uschar *entry = nametable + entrysize*mid;
! 88: int c = strcmp(stringname, (char *)(entry + 2));
! 89: if (c == 0) return (entry[0] << 8) + entry[1];
! 90: if (c > 0) bot = mid + 1; else top = mid;
! 91: }
! 92:
! 93: return PCRE_ERROR_NOSUBSTRING;
! 94: }
! 95:
! 96:
! 97:
! 98: /*************************************************
! 99: * Find (multiple) entries for named string *
! 100: *************************************************/
! 101:
! 102: /* This is used by the get_first_set() function below, as well as being
! 103: generally available. It is used when duplicated names are permitted.
! 104:
! 105: Arguments:
! 106: code the compiled regex
! 107: stringname the name whose entries required
! 108: firstptr where to put the pointer to the first entry
! 109: lastptr where to put the pointer to the last entry
! 110:
! 111: Returns: the length of each entry, or a negative number
! 112: (PCRE_ERROR_NOSUBSTRING) if not found
! 113: */
! 114:
! 115: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 116: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
! 117: char **firstptr, char **lastptr)
! 118: {
! 119: int rc;
! 120: int entrysize;
! 121: int top, bot;
! 122: uschar *nametable, *lastentry;
! 123:
! 124: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
! 125: return rc;
! 126: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
! 127:
! 128: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
! 129: return rc;
! 130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
! 131: return rc;
! 132:
! 133: lastentry = nametable + entrysize * (top - 1);
! 134: bot = 0;
! 135: while (top > bot)
! 136: {
! 137: int mid = (top + bot) / 2;
! 138: uschar *entry = nametable + entrysize*mid;
! 139: int c = strcmp(stringname, (char *)(entry + 2));
! 140: if (c == 0)
! 141: {
! 142: uschar *first = entry;
! 143: uschar *last = entry;
! 144: while (first > nametable)
! 145: {
! 146: if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
! 147: first -= entrysize;
! 148: }
! 149: while (last < lastentry)
! 150: {
! 151: if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
! 152: last += entrysize;
! 153: }
! 154: *firstptr = (char *)first;
! 155: *lastptr = (char *)last;
! 156: return entrysize;
! 157: }
! 158: if (c > 0) bot = mid + 1; else top = mid;
! 159: }
! 160:
! 161: return PCRE_ERROR_NOSUBSTRING;
! 162: }
! 163:
! 164:
! 165:
! 166: /*************************************************
! 167: * Find first set of multiple named strings *
! 168: *************************************************/
! 169:
! 170: /* This function allows for duplicate names in the table of named substrings.
! 171: It returns the number of the first one that was set in a pattern match.
! 172:
! 173: Arguments:
! 174: code the compiled regex
! 175: stringname the name of the capturing substring
! 176: ovector the vector of matched substrings
! 177:
! 178: Returns: the number of the first that is set,
! 179: or the number of the last one if none are set,
! 180: or a negative number on error
! 181: */
! 182:
! 183: static int
! 184: get_first_set(const pcre *code, const char *stringname, int *ovector)
! 185: {
! 186: const real_pcre *re = (const real_pcre *)code;
! 187: int entrysize;
! 188: char *first, *last;
! 189: uschar *entry;
! 190: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
! 191: return pcre_get_stringnumber(code, stringname);
! 192: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
! 193: if (entrysize <= 0) return entrysize;
! 194: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
! 195: {
! 196: int n = (entry[0] << 8) + entry[1];
! 197: if (ovector[n*2] >= 0) return n;
! 198: }
! 199: return (first[0] << 8) + first[1];
! 200: }
! 201:
! 202:
! 203:
! 204:
! 205: /*************************************************
! 206: * Copy captured string to given buffer *
! 207: *************************************************/
! 208:
! 209: /* This function copies a single captured substring into a given buffer.
! 210: Note that we use memcpy() rather than strncpy() in case there are binary zeros
! 211: in the string.
! 212:
! 213: Arguments:
! 214: subject the subject string that was matched
! 215: ovector pointer to the offsets table
! 216: stringcount the number of substrings that were captured
! 217: (i.e. the yield of the pcre_exec call, unless
! 218: that was zero, in which case it should be 1/3
! 219: of the offset table size)
! 220: stringnumber the number of the required substring
! 221: buffer where to put the substring
! 222: size the size of the buffer
! 223:
! 224: Returns: if successful:
! 225: the length of the copied string, not including the zero
! 226: that is put on the end; can be zero
! 227: if not successful:
! 228: PCRE_ERROR_NOMEMORY (-6) buffer too small
! 229: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 230: */
! 231:
! 232: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 233: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
! 234: int stringnumber, char *buffer, int size)
! 235: {
! 236: int yield;
! 237: if (stringnumber < 0 || stringnumber >= stringcount)
! 238: return PCRE_ERROR_NOSUBSTRING;
! 239: stringnumber *= 2;
! 240: yield = ovector[stringnumber+1] - ovector[stringnumber];
! 241: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
! 242: memcpy(buffer, subject + ovector[stringnumber], yield);
! 243: buffer[yield] = 0;
! 244: return yield;
! 245: }
! 246:
! 247:
! 248:
! 249: /*************************************************
! 250: * Copy named captured string to given buffer *
! 251: *************************************************/
! 252:
! 253: /* This function copies a single captured substring into a given buffer,
! 254: identifying it by name. If the regex permits duplicate names, the first
! 255: substring that is set is chosen.
! 256:
! 257: Arguments:
! 258: code the compiled regex
! 259: subject the subject string that was matched
! 260: ovector pointer to the offsets table
! 261: stringcount the number of substrings that were captured
! 262: (i.e. the yield of the pcre_exec call, unless
! 263: that was zero, in which case it should be 1/3
! 264: of the offset table size)
! 265: stringname the name of the required substring
! 266: buffer where to put the substring
! 267: size the size of the buffer
! 268:
! 269: Returns: if successful:
! 270: the length of the copied string, not including the zero
! 271: that is put on the end; can be zero
! 272: if not successful:
! 273: PCRE_ERROR_NOMEMORY (-6) buffer too small
! 274: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 275: */
! 276:
! 277: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 278: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
! 279: int stringcount, const char *stringname, char *buffer, int size)
! 280: {
! 281: int n = get_first_set(code, stringname, ovector);
! 282: if (n <= 0) return n;
! 283: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
! 284: }
! 285:
! 286:
! 287:
! 288: /*************************************************
! 289: * Copy all captured strings to new store *
! 290: *************************************************/
! 291:
! 292: /* This function gets one chunk of store and builds a list of pointers and all
! 293: of the captured substrings in it. A NULL pointer is put on the end of the list.
! 294:
! 295: Arguments:
! 296: subject the subject string that was matched
! 297: ovector pointer to the offsets table
! 298: stringcount the number of substrings that were captured
! 299: (i.e. the yield of the pcre_exec call, unless
! 300: that was zero, in which case it should be 1/3
! 301: of the offset table size)
! 302: listptr set to point to the list of pointers
! 303:
! 304: Returns: if successful: 0
! 305: if not successful:
! 306: PCRE_ERROR_NOMEMORY (-6) failed to get store
! 307: */
! 308:
! 309: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 310: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
! 311: const char ***listptr)
! 312: {
! 313: int i;
! 314: int size = sizeof(char *);
! 315: int double_count = stringcount * 2;
! 316: char **stringlist;
! 317: char *p;
! 318:
! 319: for (i = 0; i < double_count; i += 2)
! 320: size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
! 321:
! 322: stringlist = (char **)(pcre_malloc)(size);
! 323: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
! 324:
! 325: *listptr = (const char **)stringlist;
! 326: p = (char *)(stringlist + stringcount + 1);
! 327:
! 328: for (i = 0; i < double_count; i += 2)
! 329: {
! 330: int len = ovector[i+1] - ovector[i];
! 331: memcpy(p, subject + ovector[i], len);
! 332: *stringlist++ = p;
! 333: p += len;
! 334: *p++ = 0;
! 335: }
! 336:
! 337: *stringlist = NULL;
! 338: return 0;
! 339: }
! 340:
! 341:
! 342:
! 343: /*************************************************
! 344: * Free store obtained by get_substring_list *
! 345: *************************************************/
! 346:
! 347: /* This function exists for the benefit of people calling PCRE from non-C
! 348: programs that can call its functions, but not free() or (pcre_free)() directly.
! 349:
! 350: Argument: the result of a previous pcre_get_substring_list()
! 351: Returns: nothing
! 352: */
! 353:
! 354: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
! 355: pcre_free_substring_list(const char **pointer)
! 356: {
! 357: (pcre_free)((void *)pointer);
! 358: }
! 359:
! 360:
! 361:
! 362: /*************************************************
! 363: * Copy captured string to new store *
! 364: *************************************************/
! 365:
! 366: /* This function copies a single captured substring into a piece of new
! 367: store
! 368:
! 369: Arguments:
! 370: subject the subject string that was matched
! 371: ovector pointer to the offsets table
! 372: stringcount the number of substrings that were captured
! 373: (i.e. the yield of the pcre_exec call, unless
! 374: that was zero, in which case it should be 1/3
! 375: of the offset table size)
! 376: stringnumber the number of the required substring
! 377: stringptr where to put a pointer to the substring
! 378:
! 379: Returns: if successful:
! 380: the length of the string, not including the zero that
! 381: is put on the end; can be zero
! 382: if not successful:
! 383: PCRE_ERROR_NOMEMORY (-6) failed to get store
! 384: PCRE_ERROR_NOSUBSTRING (-7) substring not present
! 385: */
! 386:
! 387: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 388: pcre_get_substring(const char *subject, int *ovector, int stringcount,
! 389: int stringnumber, const char **stringptr)
! 390: {
! 391: int yield;
! 392: char *substring;
! 393: if (stringnumber < 0 || stringnumber >= stringcount)
! 394: return PCRE_ERROR_NOSUBSTRING;
! 395: stringnumber *= 2;
! 396: yield = ovector[stringnumber+1] - ovector[stringnumber];
! 397: substring = (char *)(pcre_malloc)(yield + 1);
! 398: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
! 399: memcpy(substring, subject + ovector[stringnumber], yield);
! 400: substring[yield] = 0;
! 401: *stringptr = substring;
! 402: return yield;
! 403: }
! 404:
! 405:
! 406:
! 407: /*************************************************
! 408: * Copy named captured string to new store *
! 409: *************************************************/
! 410:
! 411: /* This function copies a single captured substring, identified by name, into
! 412: new store. If the regex permits duplicate names, the first substring that is
! 413: set is chosen.
! 414:
! 415: Arguments:
! 416: code the compiled regex
! 417: subject the subject string that was matched
! 418: ovector pointer to the offsets table
! 419: stringcount the number of substrings that were captured
! 420: (i.e. the yield of the pcre_exec call, unless
! 421: that was zero, in which case it should be 1/3
! 422: of the offset table size)
! 423: stringname the name of the required substring
! 424: stringptr where to put the pointer
! 425:
! 426: Returns: if successful:
! 427: the length of the copied string, not including the zero
! 428: that is put on the end; can be zero
! 429: if not successful:
! 430: PCRE_ERROR_NOMEMORY (-6) couldn't get memory
! 431: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 432: */
! 433:
! 434: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 435: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
! 436: int stringcount, const char *stringname, const char **stringptr)
! 437: {
! 438: int n = get_first_set(code, stringname, ovector);
! 439: if (n <= 0) return n;
! 440: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
! 441: }
! 442:
! 443:
! 444:
! 445:
! 446: /*************************************************
! 447: * Free store obtained by get_substring *
! 448: *************************************************/
! 449:
! 450: /* This function exists for the benefit of people calling PCRE from non-C
! 451: programs that can call its functions, but not free() or (pcre_free)() directly.
! 452:
! 453: Argument: the result of a previous pcre_get_substring()
! 454: Returns: nothing
! 455: */
! 456:
! 457: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
! 458: pcre_free_substring(const char *pointer)
! 459: {
! 460: (pcre_free)((void *)pointer);
! 461: }
! 462:
! 463: /* End of pcre_get.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>