Annotation of embedaddon/pcre/pcre_get.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2008 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains some convenience functions for extracting substrings
42: from the subject string after a regex match has succeeded. The original idea
43: for these functions came from Scott Wimer. */
44:
45:
46: #ifdef HAVE_CONFIG_H
47: #include "config.h"
48: #endif
49:
50: #include "pcre_internal.h"
51:
52:
53: /*************************************************
54: * Find number for named string *
55: *************************************************/
56:
57: /* This function is used by the get_first_set() function below, as well
58: as being generally available. It assumes that names are unique.
59:
60: Arguments:
61: code the compiled regex
62: stringname the name whose number is required
63:
64: Returns: the number of the named parentheses, or a negative number
65: (PCRE_ERROR_NOSUBSTRING) if not found
66: */
67:
68: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
69: pcre_get_stringnumber(const pcre *code, const char *stringname)
70: {
71: int rc;
72: int entrysize;
73: int top, bot;
74: uschar *nametable;
75:
76: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
77: return rc;
78: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
79:
80: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
81: return rc;
82: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
83: return rc;
84:
85: bot = 0;
86: while (top > bot)
87: {
88: int mid = (top + bot) / 2;
89: uschar *entry = nametable + entrysize*mid;
90: int c = strcmp(stringname, (char *)(entry + 2));
91: if (c == 0) return (entry[0] << 8) + entry[1];
92: if (c > 0) bot = mid + 1; else top = mid;
93: }
94:
95: return PCRE_ERROR_NOSUBSTRING;
96: }
97:
98:
99:
100: /*************************************************
101: * Find (multiple) entries for named string *
102: *************************************************/
103:
104: /* This is used by the get_first_set() function below, as well as being
105: generally available. It is used when duplicated names are permitted.
106:
107: Arguments:
108: code the compiled regex
109: stringname the name whose entries required
110: firstptr where to put the pointer to the first entry
111: lastptr where to put the pointer to the last entry
112:
113: Returns: the length of each entry, or a negative number
114: (PCRE_ERROR_NOSUBSTRING) if not found
115: */
116:
117: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
118: pcre_get_stringtable_entries(const pcre *code, const char *stringname,
119: char **firstptr, char **lastptr)
120: {
121: int rc;
122: int entrysize;
123: int top, bot;
124: uschar *nametable, *lastentry;
125:
126: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
127: return rc;
128: if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
129:
130: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
131: return rc;
132: if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
133: return rc;
134:
135: lastentry = nametable + entrysize * (top - 1);
136: bot = 0;
137: while (top > bot)
138: {
139: int mid = (top + bot) / 2;
140: uschar *entry = nametable + entrysize*mid;
141: int c = strcmp(stringname, (char *)(entry + 2));
142: if (c == 0)
143: {
144: uschar *first = entry;
145: uschar *last = entry;
146: while (first > nametable)
147: {
148: if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
149: first -= entrysize;
150: }
151: while (last < lastentry)
152: {
153: if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
154: last += entrysize;
155: }
156: *firstptr = (char *)first;
157: *lastptr = (char *)last;
158: return entrysize;
159: }
160: if (c > 0) bot = mid + 1; else top = mid;
161: }
162:
163: return PCRE_ERROR_NOSUBSTRING;
164: }
165:
166:
167:
168: /*************************************************
169: * Find first set of multiple named strings *
170: *************************************************/
171:
172: /* This function allows for duplicate names in the table of named substrings.
173: It returns the number of the first one that was set in a pattern match.
174:
175: Arguments:
176: code the compiled regex
177: stringname the name of the capturing substring
178: ovector the vector of matched substrings
179:
180: Returns: the number of the first that is set,
181: or the number of the last one if none are set,
182: or a negative number on error
183: */
184:
185: static int
186: get_first_set(const pcre *code, const char *stringname, int *ovector)
187: {
188: const real_pcre *re = (const real_pcre *)code;
189: int entrysize;
190: char *first, *last;
191: uschar *entry;
192: if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
193: return pcre_get_stringnumber(code, stringname);
194: entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
195: if (entrysize <= 0) return entrysize;
196: for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
197: {
198: int n = (entry[0] << 8) + entry[1];
199: if (ovector[n*2] >= 0) return n;
200: }
201: return (first[0] << 8) + first[1];
202: }
203:
204:
205:
206:
207: /*************************************************
208: * Copy captured string to given buffer *
209: *************************************************/
210:
211: /* This function copies a single captured substring into a given buffer.
212: Note that we use memcpy() rather than strncpy() in case there are binary zeros
213: in the string.
214:
215: Arguments:
216: subject the subject string that was matched
217: ovector pointer to the offsets table
218: stringcount the number of substrings that were captured
219: (i.e. the yield of the pcre_exec call, unless
220: that was zero, in which case it should be 1/3
221: of the offset table size)
222: stringnumber the number of the required substring
223: buffer where to put the substring
224: size the size of the buffer
225:
226: Returns: if successful:
227: the length of the copied string, not including the zero
228: that is put on the end; can be zero
229: if not successful:
230: PCRE_ERROR_NOMEMORY (-6) buffer too small
231: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
232: */
233:
234: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
235: pcre_copy_substring(const char *subject, int *ovector, int stringcount,
236: int stringnumber, char *buffer, int size)
237: {
238: int yield;
239: if (stringnumber < 0 || stringnumber >= stringcount)
240: return PCRE_ERROR_NOSUBSTRING;
241: stringnumber *= 2;
242: yield = ovector[stringnumber+1] - ovector[stringnumber];
243: if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
244: memcpy(buffer, subject + ovector[stringnumber], yield);
245: buffer[yield] = 0;
246: return yield;
247: }
248:
249:
250:
251: /*************************************************
252: * Copy named captured string to given buffer *
253: *************************************************/
254:
255: /* This function copies a single captured substring into a given buffer,
256: identifying it by name. If the regex permits duplicate names, the first
257: substring that is set is chosen.
258:
259: Arguments:
260: code the compiled regex
261: subject the subject string that was matched
262: ovector pointer to the offsets table
263: stringcount the number of substrings that were captured
264: (i.e. the yield of the pcre_exec call, unless
265: that was zero, in which case it should be 1/3
266: of the offset table size)
267: stringname the name of the required substring
268: buffer where to put the substring
269: size the size of the buffer
270:
271: Returns: if successful:
272: the length of the copied string, not including the zero
273: that is put on the end; can be zero
274: if not successful:
275: PCRE_ERROR_NOMEMORY (-6) buffer too small
276: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
277: */
278:
279: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
280: pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
281: int stringcount, const char *stringname, char *buffer, int size)
282: {
283: int n = get_first_set(code, stringname, ovector);
284: if (n <= 0) return n;
285: return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
286: }
287:
288:
289:
290: /*************************************************
291: * Copy all captured strings to new store *
292: *************************************************/
293:
294: /* This function gets one chunk of store and builds a list of pointers and all
295: of the captured substrings in it. A NULL pointer is put on the end of the list.
296:
297: Arguments:
298: subject the subject string that was matched
299: ovector pointer to the offsets table
300: stringcount the number of substrings that were captured
301: (i.e. the yield of the pcre_exec call, unless
302: that was zero, in which case it should be 1/3
303: of the offset table size)
304: listptr set to point to the list of pointers
305:
306: Returns: if successful: 0
307: if not successful:
308: PCRE_ERROR_NOMEMORY (-6) failed to get store
309: */
310:
311: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
312: pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
313: const char ***listptr)
314: {
315: int i;
316: int size = sizeof(char *);
317: int double_count = stringcount * 2;
318: char **stringlist;
319: char *p;
320:
321: for (i = 0; i < double_count; i += 2)
322: size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
323:
324: stringlist = (char **)(pcre_malloc)(size);
325: if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
326:
327: *listptr = (const char **)stringlist;
328: p = (char *)(stringlist + stringcount + 1);
329:
330: for (i = 0; i < double_count; i += 2)
331: {
332: int len = ovector[i+1] - ovector[i];
333: memcpy(p, subject + ovector[i], len);
334: *stringlist++ = p;
335: p += len;
336: *p++ = 0;
337: }
338:
339: *stringlist = NULL;
340: return 0;
341: }
342:
343:
344:
345: /*************************************************
346: * Free store obtained by get_substring_list *
347: *************************************************/
348:
349: /* This function exists for the benefit of people calling PCRE from non-C
350: programs that can call its functions, but not free() or (pcre_free)() directly.
351:
352: Argument: the result of a previous pcre_get_substring_list()
353: Returns: nothing
354: */
355:
356: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
357: pcre_free_substring_list(const char **pointer)
358: {
359: (pcre_free)((void *)pointer);
360: }
361:
362:
363:
364: /*************************************************
365: * Copy captured string to new store *
366: *************************************************/
367:
368: /* This function copies a single captured substring into a piece of new
369: store
370:
371: Arguments:
372: subject the subject string that was matched
373: ovector pointer to the offsets table
374: stringcount the number of substrings that were captured
375: (i.e. the yield of the pcre_exec call, unless
376: that was zero, in which case it should be 1/3
377: of the offset table size)
378: stringnumber the number of the required substring
379: stringptr where to put a pointer to the substring
380:
381: Returns: if successful:
382: the length of the string, not including the zero that
383: is put on the end; can be zero
384: if not successful:
385: PCRE_ERROR_NOMEMORY (-6) failed to get store
386: PCRE_ERROR_NOSUBSTRING (-7) substring not present
387: */
388:
389: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
390: pcre_get_substring(const char *subject, int *ovector, int stringcount,
391: int stringnumber, const char **stringptr)
392: {
393: int yield;
394: char *substring;
395: if (stringnumber < 0 || stringnumber >= stringcount)
396: return PCRE_ERROR_NOSUBSTRING;
397: stringnumber *= 2;
398: yield = ovector[stringnumber+1] - ovector[stringnumber];
399: substring = (char *)(pcre_malloc)(yield + 1);
400: if (substring == NULL) return PCRE_ERROR_NOMEMORY;
401: memcpy(substring, subject + ovector[stringnumber], yield);
402: substring[yield] = 0;
403: *stringptr = substring;
404: return yield;
405: }
406:
407:
408:
409: /*************************************************
410: * Copy named captured string to new store *
411: *************************************************/
412:
413: /* This function copies a single captured substring, identified by name, into
414: new store. If the regex permits duplicate names, the first substring that is
415: set is chosen.
416:
417: Arguments:
418: code the compiled regex
419: subject the subject string that was matched
420: ovector pointer to the offsets table
421: stringcount the number of substrings that were captured
422: (i.e. the yield of the pcre_exec call, unless
423: that was zero, in which case it should be 1/3
424: of the offset table size)
425: stringname the name of the required substring
426: stringptr where to put the pointer
427:
428: Returns: if successful:
429: the length of the copied string, not including the zero
430: that is put on the end; can be zero
431: if not successful:
432: PCRE_ERROR_NOMEMORY (-6) couldn't get memory
433: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
434: */
435:
436: PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
437: pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
438: int stringcount, const char *stringname, const char **stringptr)
439: {
440: int n = get_first_set(code, stringname, ovector);
441: if (n <= 0) return n;
442: return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
443: }
444:
445:
446:
447:
448: /*************************************************
449: * Free store obtained by get_substring *
450: *************************************************/
451:
452: /* This function exists for the benefit of people calling PCRE from non-C
453: programs that can call its functions, but not free() or (pcre_free)() directly.
454:
455: Argument: the result of a previous pcre_get_substring()
456: Returns: nothing
457: */
458:
459: PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
460: pcre_free_substring(const char *pointer)
461: {
462: (pcre_free)((void *)pointer);
463: }
464:
465: /* End of pcre_get.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>