Annotation of embedaddon/pcre/pcre_get.c, revision 1.1
1.1 ! misho 1: /*************************************************
! 2: * Perl-Compatible Regular Expressions *
! 3: *************************************************/
! 4:
! 5: /* PCRE is a library of functions to support regular expressions whose syntax
! 6: and semantics are as close as possible to those of the Perl 5 language.
! 7:
! 8: Written by Philip Hazel
! 9: Copyright (c) 1997-2008 University of Cambridge
! 10:
! 11: -----------------------------------------------------------------------------
! 12: Redistribution and use in source and binary forms, with or without
! 13: modification, are permitted provided that the following conditions are met:
! 14:
! 15: * Redistributions of source code must retain the above copyright notice,
! 16: this list of conditions and the following disclaimer.
! 17:
! 18: * Redistributions in binary form must reproduce the above copyright
! 19: notice, this list of conditions and the following disclaimer in the
! 20: documentation and/or other materials provided with the distribution.
! 21:
! 22: * Neither the name of the University of Cambridge nor the names of its
! 23: contributors may be used to endorse or promote products derived from
! 24: this software without specific prior written permission.
! 25:
! 26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! 27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
! 30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 36: POSSIBILITY OF SUCH DAMAGE.
! 37: -----------------------------------------------------------------------------
! 38: */
! 39:
! 40:
! 41: /* This module contains some convenience functions for extracting substrings
! 42: from the subject string after a regex match has succeeded. The original idea
! 43: for these functions came from Scott Wimer. */
! 44:
! 45:
! 46: #ifdef HAVE_CONFIG_H
! 47: #include "config.h"
! 48: #endif
! 49:
! 50: #include "pcre_internal.h"
! 51:
! 52:
! 53: /*************************************************
! 54: * Find number for named string *
! 55: *************************************************/
! 56:
! 57: /* This function is used by the get_first_set() function below, as well
! 58: as being generally available. It assumes that names are unique.
! 59:
! 60: Arguments:
! 61: code the compiled regex
! 62: stringname the name whose number is required
! 63:
! 64: Returns: the number of the named parentheses, or a negative number
! 65: (PCRE_ERROR_NOSUBSTRING) if not found
! 66: */
! 67:
! 68: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 69: pcre_get_stringnumber(const pcre *code, const char *stringname)
! 70: {
! 71: int rc;
! 72: int entrysize;
! 73: int top, bot;
! 74: uschar *nametable;
! 75:
! 76: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
! 77: return rc;
! 78: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
! 79:
! 80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
! 81: return rc;
! 82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
! 83: return rc;
! 84:
! 85: bot = 0;
! 86: while (top > bot)
! 87: {
! 88: int mid = (top + bot) / 2;
! 89: uschar *entry = nametable + entrysize*mid;
! 90: int c = strcmp(stringname, (char *)(entry + 2));
! 91: if (c == 0) return (entry[0] << 8) + entry[1];
! 92: if (c > 0) bot = mid + 1; else top = mid;
! 93: }
! 94:
! 95: return PCRE_ERROR_NOSUBSTRING;
! 96: }
! 97:
! 98:
! 99:
! 100: /*************************************************
! 101: * Find (multiple) entries for named string *
! 102: *************************************************/
! 103:
! 104: /* This is used by the get_first_set() function below, as well as being
! 105: generally available. It is used when duplicated names are permitted.
! 106:
! 107: Arguments:
! 108: code the compiled regex
! 109: stringname the name whose entries required
! 110: firstptr where to put the pointer to the first entry
! 111: lastptr where to put the pointer to the last entry
! 112:
! 113: Returns: the length of each entry, or a negative number
! 114: (PCRE_ERROR_NOSUBSTRING) if not found
! 115: */
! 116:
! 117: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 118: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
! 119: char **firstptr, char **lastptr)
! 120: {
! 121: int rc;
! 122: int entrysize;
! 123: int top, bot;
! 124: uschar *nametable, *lastentry;
! 125:
! 126: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
! 127: return rc;
! 128: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
! 129:
! 130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
! 131: return rc;
! 132: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
! 133: return rc;
! 134:
! 135: lastentry = nametable + entrysize * (top - 1);
! 136: bot = 0;
! 137: while (top > bot)
! 138: {
! 139: int mid = (top + bot) / 2;
! 140: uschar *entry = nametable + entrysize*mid;
! 141: int c = strcmp(stringname, (char *)(entry + 2));
! 142: if (c == 0)
! 143: {
! 144: uschar *first = entry;
! 145: uschar *last = entry;
! 146: while (first > nametable)
! 147: {
! 148: if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
! 149: first -= entrysize;
! 150: }
! 151: while (last < lastentry)
! 152: {
! 153: if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
! 154: last += entrysize;
! 155: }
! 156: *firstptr = (char *)first;
! 157: *lastptr = (char *)last;
! 158: return entrysize;
! 159: }
! 160: if (c > 0) bot = mid + 1; else top = mid;
! 161: }
! 162:
! 163: return PCRE_ERROR_NOSUBSTRING;
! 164: }
! 165:
! 166:
! 167:
! 168: /*************************************************
! 169: * Find first set of multiple named strings *
! 170: *************************************************/
! 171:
! 172: /* This function allows for duplicate names in the table of named substrings.
! 173: It returns the number of the first one that was set in a pattern match.
! 174:
! 175: Arguments:
! 176: code the compiled regex
! 177: stringname the name of the capturing substring
! 178: ovector the vector of matched substrings
! 179:
! 180: Returns: the number of the first that is set,
! 181: or the number of the last one if none are set,
! 182: or a negative number on error
! 183: */
! 184:
! 185: static int
! 186: get_first_set(const pcre *code, const char *stringname, int *ovector)
! 187: {
! 188: const real_pcre *re = (const real_pcre *)code;
! 189: int entrysize;
! 190: char *first, *last;
! 191: uschar *entry;
! 192: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
! 193: return pcre_get_stringnumber(code, stringname);
! 194: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
! 195: if (entrysize <= 0) return entrysize;
! 196: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
! 197: {
! 198: int n = (entry[0] << 8) + entry[1];
! 199: if (ovector[n*2] >= 0) return n;
! 200: }
! 201: return (first[0] << 8) + first[1];
! 202: }
! 203:
! 204:
! 205:
! 206:
! 207: /*************************************************
! 208: * Copy captured string to given buffer *
! 209: *************************************************/
! 210:
! 211: /* This function copies a single captured substring into a given buffer.
! 212: Note that we use memcpy() rather than strncpy() in case there are binary zeros
! 213: in the string.
! 214:
! 215: Arguments:
! 216: subject the subject string that was matched
! 217: ovector pointer to the offsets table
! 218: stringcount the number of substrings that were captured
! 219: (i.e. the yield of the pcre_exec call, unless
! 220: that was zero, in which case it should be 1/3
! 221: of the offset table size)
! 222: stringnumber the number of the required substring
! 223: buffer where to put the substring
! 224: size the size of the buffer
! 225:
! 226: Returns: if successful:
! 227: the length of the copied string, not including the zero
! 228: that is put on the end; can be zero
! 229: if not successful:
! 230: PCRE_ERROR_NOMEMORY (-6) buffer too small
! 231: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 232: */
! 233:
! 234: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 235: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
! 236: int stringnumber, char *buffer, int size)
! 237: {
! 238: int yield;
! 239: if (stringnumber < 0 || stringnumber >= stringcount)
! 240: return PCRE_ERROR_NOSUBSTRING;
! 241: stringnumber *= 2;
! 242: yield = ovector[stringnumber+1] - ovector[stringnumber];
! 243: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
! 244: memcpy(buffer, subject + ovector[stringnumber], yield);
! 245: buffer[yield] = 0;
! 246: return yield;
! 247: }
! 248:
! 249:
! 250:
! 251: /*************************************************
! 252: * Copy named captured string to given buffer *
! 253: *************************************************/
! 254:
! 255: /* This function copies a single captured substring into a given buffer,
! 256: identifying it by name. If the regex permits duplicate names, the first
! 257: substring that is set is chosen.
! 258:
! 259: Arguments:
! 260: code the compiled regex
! 261: subject the subject string that was matched
! 262: ovector pointer to the offsets table
! 263: stringcount the number of substrings that were captured
! 264: (i.e. the yield of the pcre_exec call, unless
! 265: that was zero, in which case it should be 1/3
! 266: of the offset table size)
! 267: stringname the name of the required substring
! 268: buffer where to put the substring
! 269: size the size of the buffer
! 270:
! 271: Returns: if successful:
! 272: the length of the copied string, not including the zero
! 273: that is put on the end; can be zero
! 274: if not successful:
! 275: PCRE_ERROR_NOMEMORY (-6) buffer too small
! 276: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 277: */
! 278:
! 279: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 280: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
! 281: int stringcount, const char *stringname, char *buffer, int size)
! 282: {
! 283: int n = get_first_set(code, stringname, ovector);
! 284: if (n <= 0) return n;
! 285: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
! 286: }
! 287:
! 288:
! 289:
! 290: /*************************************************
! 291: * Copy all captured strings to new store *
! 292: *************************************************/
! 293:
! 294: /* This function gets one chunk of store and builds a list of pointers and all
! 295: of the captured substrings in it. A NULL pointer is put on the end of the list.
! 296:
! 297: Arguments:
! 298: subject the subject string that was matched
! 299: ovector pointer to the offsets table
! 300: stringcount the number of substrings that were captured
! 301: (i.e. the yield of the pcre_exec call, unless
! 302: that was zero, in which case it should be 1/3
! 303: of the offset table size)
! 304: listptr set to point to the list of pointers
! 305:
! 306: Returns: if successful: 0
! 307: if not successful:
! 308: PCRE_ERROR_NOMEMORY (-6) failed to get store
! 309: */
! 310:
! 311: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 312: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
! 313: const char ***listptr)
! 314: {
! 315: int i;
! 316: int size = sizeof(char *);
! 317: int double_count = stringcount * 2;
! 318: char **stringlist;
! 319: char *p;
! 320:
! 321: for (i = 0; i < double_count; i += 2)
! 322: size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
! 323:
! 324: stringlist = (char **)(pcre_malloc)(size);
! 325: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
! 326:
! 327: *listptr = (const char **)stringlist;
! 328: p = (char *)(stringlist + stringcount + 1);
! 329:
! 330: for (i = 0; i < double_count; i += 2)
! 331: {
! 332: int len = ovector[i+1] - ovector[i];
! 333: memcpy(p, subject + ovector[i], len);
! 334: *stringlist++ = p;
! 335: p += len;
! 336: *p++ = 0;
! 337: }
! 338:
! 339: *stringlist = NULL;
! 340: return 0;
! 341: }
! 342:
! 343:
! 344:
! 345: /*************************************************
! 346: * Free store obtained by get_substring_list *
! 347: *************************************************/
! 348:
! 349: /* This function exists for the benefit of people calling PCRE from non-C
! 350: programs that can call its functions, but not free() or (pcre_free)() directly.
! 351:
! 352: Argument: the result of a previous pcre_get_substring_list()
! 353: Returns: nothing
! 354: */
! 355:
! 356: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
! 357: pcre_free_substring_list(const char **pointer)
! 358: {
! 359: (pcre_free)((void *)pointer);
! 360: }
! 361:
! 362:
! 363:
! 364: /*************************************************
! 365: * Copy captured string to new store *
! 366: *************************************************/
! 367:
! 368: /* This function copies a single captured substring into a piece of new
! 369: store
! 370:
! 371: Arguments:
! 372: subject the subject string that was matched
! 373: ovector pointer to the offsets table
! 374: stringcount the number of substrings that were captured
! 375: (i.e. the yield of the pcre_exec call, unless
! 376: that was zero, in which case it should be 1/3
! 377: of the offset table size)
! 378: stringnumber the number of the required substring
! 379: stringptr where to put a pointer to the substring
! 380:
! 381: Returns: if successful:
! 382: the length of the string, not including the zero that
! 383: is put on the end; can be zero
! 384: if not successful:
! 385: PCRE_ERROR_NOMEMORY (-6) failed to get store
! 386: PCRE_ERROR_NOSUBSTRING (-7) substring not present
! 387: */
! 388:
! 389: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 390: pcre_get_substring(const char *subject, int *ovector, int stringcount,
! 391: int stringnumber, const char **stringptr)
! 392: {
! 393: int yield;
! 394: char *substring;
! 395: if (stringnumber < 0 || stringnumber >= stringcount)
! 396: return PCRE_ERROR_NOSUBSTRING;
! 397: stringnumber *= 2;
! 398: yield = ovector[stringnumber+1] - ovector[stringnumber];
! 399: substring = (char *)(pcre_malloc)(yield + 1);
! 400: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
! 401: memcpy(substring, subject + ovector[stringnumber], yield);
! 402: substring[yield] = 0;
! 403: *stringptr = substring;
! 404: return yield;
! 405: }
! 406:
! 407:
! 408:
! 409: /*************************************************
! 410: * Copy named captured string to new store *
! 411: *************************************************/
! 412:
! 413: /* This function copies a single captured substring, identified by name, into
! 414: new store. If the regex permits duplicate names, the first substring that is
! 415: set is chosen.
! 416:
! 417: Arguments:
! 418: code the compiled regex
! 419: subject the subject string that was matched
! 420: ovector pointer to the offsets table
! 421: stringcount the number of substrings that were captured
! 422: (i.e. the yield of the pcre_exec call, unless
! 423: that was zero, in which case it should be 1/3
! 424: of the offset table size)
! 425: stringname the name of the required substring
! 426: stringptr where to put the pointer
! 427:
! 428: Returns: if successful:
! 429: the length of the copied string, not including the zero
! 430: that is put on the end; can be zero
! 431: if not successful:
! 432: PCRE_ERROR_NOMEMORY (-6) couldn't get memory
! 433: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
! 434: */
! 435:
! 436: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
! 437: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
! 438: int stringcount, const char *stringname, const char **stringptr)
! 439: {
! 440: int n = get_first_set(code, stringname, ovector);
! 441: if (n <= 0) return n;
! 442: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
! 443: }
! 444:
! 445:
! 446:
! 447:
! 448: /*************************************************
! 449: * Free store obtained by get_substring *
! 450: *************************************************/
! 451:
! 452: /* This function exists for the benefit of people calling PCRE from non-C
! 453: programs that can call its functions, but not free() or (pcre_free)() directly.
! 454:
! 455: Argument: the result of a previous pcre_get_substring()
! 456: Returns: nothing
! 457: */
! 458:
! 459: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
! 460: pcre_free_substring(const char *pointer)
! 461: {
! 462: (pcre_free)((void *)pointer);
! 463: }
! 464:
! 465: /* End of pcre_get.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>