1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2012 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains some convenience functions for extracting substrings
42: from the subject string after a regex match has succeeded. The original idea
43: for these functions came from Scott Wimer. */
44:
45:
46: #ifdef HAVE_CONFIG_H
47: #include "config.h"
48: #endif
49:
50: #include "pcre_internal.h"
51:
52:
53: /*************************************************
54: * Find number for named string *
55: *************************************************/
56:
57: /* This function is used by the get_first_set() function below, as well
58: as being generally available. It assumes that names are unique.
59:
60: Arguments:
61: code the compiled regex
62: stringname the name whose number is required
63:
64: Returns: the number of the named parentheses, or a negative number
65: (PCRE_ERROR_NOSUBSTRING) if not found
66: */
67:
68: #ifdef COMPILE_PCRE8
69: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70: pcre_get_stringnumber(const pcre *code, const char *stringname)
71: #else
72: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73: pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
74: #endif
75: {
76: int rc;
77: int entrysize;
78: int top, bot;
79: pcre_uchar *nametable;
80:
81: #ifdef COMPILE_PCRE8
82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
83: return rc;
84: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
85:
86: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
87: return rc;
88: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89: return rc;
90: #endif
91: #ifdef COMPILE_PCRE16
92: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
93: return rc;
94: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
95:
96: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
97: return rc;
98: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
99: return rc;
100: #endif
101:
102: bot = 0;
103: while (top > bot)
104: {
105: int mid = (top + bot) / 2;
106: pcre_uchar *entry = nametable + entrysize*mid;
107: int c = STRCMP_UC_UC((pcre_uchar *)stringname,
108: (pcre_uchar *)(entry + IMM2_SIZE));
109: if (c == 0) return GET2(entry, 0);
110: if (c > 0) bot = mid + 1; else top = mid;
111: }
112:
113: return PCRE_ERROR_NOSUBSTRING;
114: }
115:
116:
117:
118: /*************************************************
119: * Find (multiple) entries for named string *
120: *************************************************/
121:
122: /* This is used by the get_first_set() function below, as well as being
123: generally available. It is used when duplicated names are permitted.
124:
125: Arguments:
126: code the compiled regex
127: stringname the name whose entries required
128: firstptr where to put the pointer to the first entry
129: lastptr where to put the pointer to the last entry
130:
131: Returns: the length of each entry, or a negative number
132: (PCRE_ERROR_NOSUBSTRING) if not found
133: */
134:
135: #ifdef COMPILE_PCRE8
136: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
137: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
138: char **firstptr, char **lastptr)
139: #else
140: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
141: pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
142: PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
143: #endif
144: {
145: int rc;
146: int entrysize;
147: int top, bot;
148: pcre_uchar *nametable, *lastentry;
149:
150: #ifdef COMPILE_PCRE8
151: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
152: return rc;
153: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
154:
155: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
156: return rc;
157: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
158: return rc;
159: #endif
160: #ifdef COMPILE_PCRE16
161: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
162: return rc;
163: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
164:
165: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
166: return rc;
167: if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
168: return rc;
169: #endif
170:
171: lastentry = nametable + entrysize * (top - 1);
172: bot = 0;
173: while (top > bot)
174: {
175: int mid = (top + bot) / 2;
176: pcre_uchar *entry = nametable + entrysize*mid;
177: int c = STRCMP_UC_UC((pcre_uchar *)stringname,
178: (pcre_uchar *)(entry + IMM2_SIZE));
179: if (c == 0)
180: {
181: pcre_uchar *first = entry;
182: pcre_uchar *last = entry;
183: while (first > nametable)
184: {
185: if (STRCMP_UC_UC((pcre_uchar *)stringname,
186: (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
187: first -= entrysize;
188: }
189: while (last < lastentry)
190: {
191: if (STRCMP_UC_UC((pcre_uchar *)stringname,
192: (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
193: last += entrysize;
194: }
195: #ifdef COMPILE_PCRE8
196: *firstptr = (char *)first;
197: *lastptr = (char *)last;
198: #else
199: *firstptr = (PCRE_UCHAR16 *)first;
200: *lastptr = (PCRE_UCHAR16 *)last;
201: #endif
202: return entrysize;
203: }
204: if (c > 0) bot = mid + 1; else top = mid;
205: }
206:
207: return PCRE_ERROR_NOSUBSTRING;
208: }
209:
210:
211:
212: /*************************************************
213: * Find first set of multiple named strings *
214: *************************************************/
215:
216: /* This function allows for duplicate names in the table of named substrings.
217: It returns the number of the first one that was set in a pattern match.
218:
219: Arguments:
220: code the compiled regex
221: stringname the name of the capturing substring
222: ovector the vector of matched substrings
223:
224: Returns: the number of the first that is set,
225: or the number of the last one if none are set,
226: or a negative number on error
227: */
228:
229: #ifdef COMPILE_PCRE8
230: static int
231: get_first_set(const pcre *code, const char *stringname, int *ovector)
232: #else
233: static int
234: get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
235: #endif
236: {
237: const REAL_PCRE *re = (const REAL_PCRE *)code;
238: int entrysize;
239: pcre_uchar *entry;
240: #ifdef COMPILE_PCRE8
241: char *first, *last;
242: #else
243: PCRE_UCHAR16 *first, *last;
244: #endif
245:
246: #ifdef COMPILE_PCRE8
247: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
248: return pcre_get_stringnumber(code, stringname);
249: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
250: #else
251: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
252: return pcre16_get_stringnumber(code, stringname);
253: entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
254: #endif
255: if (entrysize <= 0) return entrysize;
256: for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
257: {
258: int n = GET2(entry, 0);
259: if (ovector[n*2] >= 0) return n;
260: }
261: return GET2(entry, 0);
262: }
263:
264:
265:
266:
267: /*************************************************
268: * Copy captured string to given buffer *
269: *************************************************/
270:
271: /* This function copies a single captured substring into a given buffer.
272: Note that we use memcpy() rather than strncpy() in case there are binary zeros
273: in the string.
274:
275: Arguments:
276: subject the subject string that was matched
277: ovector pointer to the offsets table
278: stringcount the number of substrings that were captured
279: (i.e. the yield of the pcre_exec call, unless
280: that was zero, in which case it should be 1/3
281: of the offset table size)
282: stringnumber the number of the required substring
283: buffer where to put the substring
284: size the size of the buffer
285:
286: Returns: if successful:
287: the length of the copied string, not including the zero
288: that is put on the end; can be zero
289: if not successful:
290: PCRE_ERROR_NOMEMORY (-6) buffer too small
291: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
292: */
293:
294: #ifdef COMPILE_PCRE8
295: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
296: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
297: int stringnumber, char *buffer, int size)
298: #else
299: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
300: pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
301: int stringnumber, PCRE_UCHAR16 *buffer, int size)
302: #endif
303: {
304: int yield;
305: if (stringnumber < 0 || stringnumber >= stringcount)
306: return PCRE_ERROR_NOSUBSTRING;
307: stringnumber *= 2;
308: yield = ovector[stringnumber+1] - ovector[stringnumber];
309: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
310: memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
311: buffer[yield] = 0;
312: return yield;
313: }
314:
315:
316:
317: /*************************************************
318: * Copy named captured string to given buffer *
319: *************************************************/
320:
321: /* This function copies a single captured substring into a given buffer,
322: identifying it by name. If the regex permits duplicate names, the first
323: substring that is set is chosen.
324:
325: Arguments:
326: code the compiled regex
327: subject the subject string that was matched
328: ovector pointer to the offsets table
329: stringcount the number of substrings that were captured
330: (i.e. the yield of the pcre_exec call, unless
331: that was zero, in which case it should be 1/3
332: of the offset table size)
333: stringname the name of the required substring
334: buffer where to put the substring
335: size the size of the buffer
336:
337: Returns: if successful:
338: the length of the copied string, not including the zero
339: that is put on the end; can be zero
340: if not successful:
341: PCRE_ERROR_NOMEMORY (-6) buffer too small
342: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
343: */
344:
345: #ifdef COMPILE_PCRE8
346: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
347: pcre_copy_named_substring(const pcre *code, const char *subject,
348: int *ovector, int stringcount, const char *stringname,
349: char *buffer, int size)
350: #else
351: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
352: pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
353: int *ovector, int stringcount, PCRE_SPTR16 stringname,
354: PCRE_UCHAR16 *buffer, int size)
355: #endif
356: {
357: int n = get_first_set(code, stringname, ovector);
358: if (n <= 0) return n;
359: #ifdef COMPILE_PCRE8
360: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
361: #else
362: return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
363: #endif
364: }
365:
366:
367:
368: /*************************************************
369: * Copy all captured strings to new store *
370: *************************************************/
371:
372: /* This function gets one chunk of store and builds a list of pointers and all
373: of the captured substrings in it. A NULL pointer is put on the end of the list.
374:
375: Arguments:
376: subject the subject string that was matched
377: ovector pointer to the offsets table
378: stringcount the number of substrings that were captured
379: (i.e. the yield of the pcre_exec call, unless
380: that was zero, in which case it should be 1/3
381: of the offset table size)
382: listptr set to point to the list of pointers
383:
384: Returns: if successful: 0
385: if not successful:
386: PCRE_ERROR_NOMEMORY (-6) failed to get store
387: */
388:
389: #ifdef COMPILE_PCRE8
390: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
391: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
392: const char ***listptr)
393: #else
394: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
395: pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
396: PCRE_SPTR16 **listptr)
397: #endif
398: {
399: int i;
400: int size = sizeof(pcre_uchar *);
401: int double_count = stringcount * 2;
402: pcre_uchar **stringlist;
403: pcre_uchar *p;
404:
405: for (i = 0; i < double_count; i += 2)
406: size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
407:
408: stringlist = (pcre_uchar **)(PUBL(malloc))(size);
409: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
410:
411: #ifdef COMPILE_PCRE8
412: *listptr = (const char **)stringlist;
413: #else
414: *listptr = (PCRE_SPTR16 *)stringlist;
415: #endif
416: p = (pcre_uchar *)(stringlist + stringcount + 1);
417:
418: for (i = 0; i < double_count; i += 2)
419: {
420: int len = ovector[i+1] - ovector[i];
421: memcpy(p, subject + ovector[i], IN_UCHARS(len));
422: *stringlist++ = p;
423: p += len;
424: *p++ = 0;
425: }
426:
427: *stringlist = NULL;
428: return 0;
429: }
430:
431:
432:
433: /*************************************************
434: * Free store obtained by get_substring_list *
435: *************************************************/
436:
437: /* This function exists for the benefit of people calling PCRE from non-C
438: programs that can call its functions, but not free() or (PUBL(free))()
439: directly.
440:
441: Argument: the result of a previous pcre_get_substring_list()
442: Returns: nothing
443: */
444:
445: #ifdef COMPILE_PCRE8
446: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
447: pcre_free_substring_list(const char **pointer)
448: #else
449: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
450: pcre16_free_substring_list(PCRE_SPTR16 *pointer)
451: #endif
452: {
453: (PUBL(free))((void *)pointer);
454: }
455:
456:
457:
458: /*************************************************
459: * Copy captured string to new store *
460: *************************************************/
461:
462: /* This function copies a single captured substring into a piece of new
463: store
464:
465: Arguments:
466: subject the subject string that was matched
467: ovector pointer to the offsets table
468: stringcount the number of substrings that were captured
469: (i.e. the yield of the pcre_exec call, unless
470: that was zero, in which case it should be 1/3
471: of the offset table size)
472: stringnumber the number of the required substring
473: stringptr where to put a pointer to the substring
474:
475: Returns: if successful:
476: the length of the string, not including the zero that
477: is put on the end; can be zero
478: if not successful:
479: PCRE_ERROR_NOMEMORY (-6) failed to get store
480: PCRE_ERROR_NOSUBSTRING (-7) substring not present
481: */
482:
483: #ifdef COMPILE_PCRE8
484: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
485: pcre_get_substring(const char *subject, int *ovector, int stringcount,
486: int stringnumber, const char **stringptr)
487: #else
488: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
489: pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
490: int stringnumber, PCRE_SPTR16 *stringptr)
491: #endif
492: {
493: int yield;
494: pcre_uchar *substring;
495: if (stringnumber < 0 || stringnumber >= stringcount)
496: return PCRE_ERROR_NOSUBSTRING;
497: stringnumber *= 2;
498: yield = ovector[stringnumber+1] - ovector[stringnumber];
499: substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
500: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
501: memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
502: substring[yield] = 0;
503: #ifdef COMPILE_PCRE8
504: *stringptr = (const char *)substring;
505: #else
506: *stringptr = (PCRE_SPTR16)substring;
507: #endif
508: return yield;
509: }
510:
511:
512:
513: /*************************************************
514: * Copy named captured string to new store *
515: *************************************************/
516:
517: /* This function copies a single captured substring, identified by name, into
518: new store. If the regex permits duplicate names, the first substring that is
519: set is chosen.
520:
521: Arguments:
522: code the compiled regex
523: subject the subject string that was matched
524: ovector pointer to the offsets table
525: stringcount the number of substrings that were captured
526: (i.e. the yield of the pcre_exec call, unless
527: that was zero, in which case it should be 1/3
528: of the offset table size)
529: stringname the name of the required substring
530: stringptr where to put the pointer
531:
532: Returns: if successful:
533: the length of the copied string, not including the zero
534: that is put on the end; can be zero
535: if not successful:
536: PCRE_ERROR_NOMEMORY (-6) couldn't get memory
537: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
538: */
539:
540: #ifdef COMPILE_PCRE8
541: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
542: pcre_get_named_substring(const pcre *code, const char *subject,
543: int *ovector, int stringcount, const char *stringname,
544: const char **stringptr)
545: #else
546: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
547: pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
548: int *ovector, int stringcount, PCRE_SPTR16 stringname,
549: PCRE_SPTR16 *stringptr)
550: #endif
551: {
552: int n = get_first_set(code, stringname, ovector);
553: if (n <= 0) return n;
554: #ifdef COMPILE_PCRE8
555: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
556: #else
557: return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
558: #endif
559: }
560:
561:
562:
563:
564: /*************************************************
565: * Free store obtained by get_substring *
566: *************************************************/
567:
568: /* This function exists for the benefit of people calling PCRE from non-C
569: programs that can call its functions, but not free() or (PUBL(free))()
570: directly.
571:
572: Argument: the result of a previous pcre_get_substring()
573: Returns: nothing
574: */
575:
576: #ifdef COMPILE_PCRE8
577: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
578: pcre_free_substring(const char *pointer)
579: #else
580: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
581: pcre16_free_substring(PCRE_SPTR16 pointer)
582: #endif
583: {
584: (PUBL(free))((void *)pointer);
585: }
586:
587: /* End of pcre_get.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>