Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_get.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2008 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains some convenience functions for extracting substrings
42: from the subject string after a regex match has succeeded. The original idea
43: for these functions came from Scott Wimer. */
44:
45:
46: #include "config.h"
47:
48: #include "pcre_internal.h"
49:
50:
51: /*************************************************
52: * Find number for named string *
53: *************************************************/
54:
55: /* This function is used by the get_first_set() function below, as well
56: as being generally available. It assumes that names are unique.
57:
58: Arguments:
59: code the compiled regex
60: stringname the name whose number is required
61:
62: Returns: the number of the named parentheses, or a negative number
63: (PCRE_ERROR_NOSUBSTRING) if not found
64: */
65:
66: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
67: pcre_get_stringnumber(const pcre *code, const char *stringname)
68: {
69: int rc;
70: int entrysize;
71: int top, bot;
72: uschar *nametable;
73:
74: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
75: return rc;
76: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
77:
78: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
79: return rc;
80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
81: return rc;
82:
83: bot = 0;
84: while (top > bot)
85: {
86: int mid = (top + bot) / 2;
87: uschar *entry = nametable + entrysize*mid;
88: int c = strcmp(stringname, (char *)(entry + 2));
89: if (c == 0) return (entry[0] << 8) + entry[1];
90: if (c > 0) bot = mid + 1; else top = mid;
91: }
92:
93: return PCRE_ERROR_NOSUBSTRING;
94: }
95:
96:
97:
98: /*************************************************
99: * Find (multiple) entries for named string *
100: *************************************************/
101:
102: /* This is used by the get_first_set() function below, as well as being
103: generally available. It is used when duplicated names are permitted.
104:
105: Arguments:
106: code the compiled regex
107: stringname the name whose entries required
108: firstptr where to put the pointer to the first entry
109: lastptr where to put the pointer to the last entry
110:
111: Returns: the length of each entry, or a negative number
112: (PCRE_ERROR_NOSUBSTRING) if not found
113: */
114:
115: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
116: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
117: char **firstptr, char **lastptr)
118: {
119: int rc;
120: int entrysize;
121: int top, bot;
122: uschar *nametable, *lastentry;
123:
124: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
125: return rc;
126: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
127:
128: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
129: return rc;
130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
131: return rc;
132:
133: lastentry = nametable + entrysize * (top - 1);
134: bot = 0;
135: while (top > bot)
136: {
137: int mid = (top + bot) / 2;
138: uschar *entry = nametable + entrysize*mid;
139: int c = strcmp(stringname, (char *)(entry + 2));
140: if (c == 0)
141: {
142: uschar *first = entry;
143: uschar *last = entry;
144: while (first > nametable)
145: {
146: if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
147: first -= entrysize;
148: }
149: while (last < lastentry)
150: {
151: if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
152: last += entrysize;
153: }
154: *firstptr = (char *)first;
155: *lastptr = (char *)last;
156: return entrysize;
157: }
158: if (c > 0) bot = mid + 1; else top = mid;
159: }
160:
161: return PCRE_ERROR_NOSUBSTRING;
162: }
163:
164:
165:
166: /*************************************************
167: * Find first set of multiple named strings *
168: *************************************************/
169:
170: /* This function allows for duplicate names in the table of named substrings.
171: It returns the number of the first one that was set in a pattern match.
172:
173: Arguments:
174: code the compiled regex
175: stringname the name of the capturing substring
176: ovector the vector of matched substrings
177:
178: Returns: the number of the first that is set,
179: or the number of the last one if none are set,
180: or a negative number on error
181: */
182:
183: static int
184: get_first_set(const pcre *code, const char *stringname, int *ovector)
185: {
186: const real_pcre *re = (const real_pcre *)code;
187: int entrysize;
188: char *first, *last;
189: uschar *entry;
190: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
191: return pcre_get_stringnumber(code, stringname);
192: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
193: if (entrysize <= 0) return entrysize;
194: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
195: {
196: int n = (entry[0] << 8) + entry[1];
197: if (ovector[n*2] >= 0) return n;
198: }
199: return (first[0] << 8) + first[1];
200: }
201:
202:
203:
204:
205: /*************************************************
206: * Copy captured string to given buffer *
207: *************************************************/
208:
209: /* This function copies a single captured substring into a given buffer.
210: Note that we use memcpy() rather than strncpy() in case there are binary zeros
211: in the string.
212:
213: Arguments:
214: subject the subject string that was matched
215: ovector pointer to the offsets table
216: stringcount the number of substrings that were captured
217: (i.e. the yield of the pcre_exec call, unless
218: that was zero, in which case it should be 1/3
219: of the offset table size)
220: stringnumber the number of the required substring
221: buffer where to put the substring
222: size the size of the buffer
223:
224: Returns: if successful:
225: the length of the copied string, not including the zero
226: that is put on the end; can be zero
227: if not successful:
228: PCRE_ERROR_NOMEMORY (-6) buffer too small
229: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
230: */
231:
232: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
233: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
234: int stringnumber, char *buffer, int size)
235: {
236: int yield;
237: if (stringnumber < 0 || stringnumber >= stringcount)
238: return PCRE_ERROR_NOSUBSTRING;
239: stringnumber *= 2;
240: yield = ovector[stringnumber+1] - ovector[stringnumber];
241: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
242: memcpy(buffer, subject + ovector[stringnumber], yield);
243: buffer[yield] = 0;
244: return yield;
245: }
246:
247:
248:
249: /*************************************************
250: * Copy named captured string to given buffer *
251: *************************************************/
252:
253: /* This function copies a single captured substring into a given buffer,
254: identifying it by name. If the regex permits duplicate names, the first
255: substring that is set is chosen.
256:
257: Arguments:
258: code the compiled regex
259: subject the subject string that was matched
260: ovector pointer to the offsets table
261: stringcount the number of substrings that were captured
262: (i.e. the yield of the pcre_exec call, unless
263: that was zero, in which case it should be 1/3
264: of the offset table size)
265: stringname the name of the required substring
266: buffer where to put the substring
267: size the size of the buffer
268:
269: Returns: if successful:
270: the length of the copied string, not including the zero
271: that is put on the end; can be zero
272: if not successful:
273: PCRE_ERROR_NOMEMORY (-6) buffer too small
274: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
275: */
276:
277: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
278: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
279: int stringcount, const char *stringname, char *buffer, int size)
280: {
281: int n = get_first_set(code, stringname, ovector);
282: if (n <= 0) return n;
283: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
284: }
285:
286:
287:
288: /*************************************************
289: * Copy all captured strings to new store *
290: *************************************************/
291:
292: /* This function gets one chunk of store and builds a list of pointers and all
293: of the captured substrings in it. A NULL pointer is put on the end of the list.
294:
295: Arguments:
296: subject the subject string that was matched
297: ovector pointer to the offsets table
298: stringcount the number of substrings that were captured
299: (i.e. the yield of the pcre_exec call, unless
300: that was zero, in which case it should be 1/3
301: of the offset table size)
302: listptr set to point to the list of pointers
303:
304: Returns: if successful: 0
305: if not successful:
306: PCRE_ERROR_NOMEMORY (-6) failed to get store
307: */
308:
309: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
310: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
311: const char ***listptr)
312: {
313: int i;
314: int size = sizeof(char *);
315: int double_count = stringcount * 2;
316: char **stringlist;
317: char *p;
318:
319: for (i = 0; i < double_count; i += 2)
320: size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
321:
322: stringlist = (char **)(pcre_malloc)(size);
323: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
324:
325: *listptr = (const char **)stringlist;
326: p = (char *)(stringlist + stringcount + 1);
327:
328: for (i = 0; i < double_count; i += 2)
329: {
330: int len = ovector[i+1] - ovector[i];
331: memcpy(p, subject + ovector[i], len);
332: *stringlist++ = p;
333: p += len;
334: *p++ = 0;
335: }
336:
337: *stringlist = NULL;
338: return 0;
339: }
340:
341:
342:
343: /*************************************************
344: * Free store obtained by get_substring_list *
345: *************************************************/
346:
347: /* This function exists for the benefit of people calling PCRE from non-C
348: programs that can call its functions, but not free() or (pcre_free)() directly.
349:
350: Argument: the result of a previous pcre_get_substring_list()
351: Returns: nothing
352: */
353:
354: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
355: pcre_free_substring_list(const char **pointer)
356: {
357: (pcre_free)((void *)pointer);
358: }
359:
360:
361:
362: /*************************************************
363: * Copy captured string to new store *
364: *************************************************/
365:
366: /* This function copies a single captured substring into a piece of new
367: store
368:
369: Arguments:
370: subject the subject string that was matched
371: ovector pointer to the offsets table
372: stringcount the number of substrings that were captured
373: (i.e. the yield of the pcre_exec call, unless
374: that was zero, in which case it should be 1/3
375: of the offset table size)
376: stringnumber the number of the required substring
377: stringptr where to put a pointer to the substring
378:
379: Returns: if successful:
380: the length of the string, not including the zero that
381: is put on the end; can be zero
382: if not successful:
383: PCRE_ERROR_NOMEMORY (-6) failed to get store
384: PCRE_ERROR_NOSUBSTRING (-7) substring not present
385: */
386:
387: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
388: pcre_get_substring(const char *subject, int *ovector, int stringcount,
389: int stringnumber, const char **stringptr)
390: {
391: int yield;
392: char *substring;
393: if (stringnumber < 0 || stringnumber >= stringcount)
394: return PCRE_ERROR_NOSUBSTRING;
395: stringnumber *= 2;
396: yield = ovector[stringnumber+1] - ovector[stringnumber];
397: substring = (char *)(pcre_malloc)(yield + 1);
398: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
399: memcpy(substring, subject + ovector[stringnumber], yield);
400: substring[yield] = 0;
401: *stringptr = substring;
402: return yield;
403: }
404:
405:
406:
407: /*************************************************
408: * Copy named captured string to new store *
409: *************************************************/
410:
411: /* This function copies a single captured substring, identified by name, into
412: new store. If the regex permits duplicate names, the first substring that is
413: set is chosen.
414:
415: Arguments:
416: code the compiled regex
417: subject the subject string that was matched
418: ovector pointer to the offsets table
419: stringcount the number of substrings that were captured
420: (i.e. the yield of the pcre_exec call, unless
421: that was zero, in which case it should be 1/3
422: of the offset table size)
423: stringname the name of the required substring
424: stringptr where to put the pointer
425:
426: Returns: if successful:
427: the length of the copied string, not including the zero
428: that is put on the end; can be zero
429: if not successful:
430: PCRE_ERROR_NOMEMORY (-6) couldn't get memory
431: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
432: */
433:
434: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
435: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
436: int stringcount, const char *stringname, const char **stringptr)
437: {
438: int n = get_first_set(code, stringname, ovector);
439: if (n <= 0) return n;
440: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
441: }
442:
443:
444:
445:
446: /*************************************************
447: * Free store obtained by get_substring *
448: *************************************************/
449:
450: /* This function exists for the benefit of people calling PCRE from non-C
451: programs that can call its functions, but not free() or (pcre_free)() directly.
452:
453: Argument: the result of a previous pcre_get_substring()
454: Returns: nothing
455: */
456:
457: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
458: pcre_free_substring(const char *pointer)
459: {
460: (pcre_free)((void *)pointer);
461: }
462:
463: /* End of pcre_get.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>