Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_printint.src, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2010 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains a PCRE private debugging function for printing out the
42: internal form of a compiled regular expression, along with some supporting
43: local functions. This source file is used in two places:
44:
45: (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46: (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
47: compiles.
48:
49: (2) It is always #included by pcretest.c, which can be asked to print out a
50: compiled regex for debugging purposes. */
51:
52:
53: /* Macro that decides whether a character should be output as a literal or in
54: hexadecimal. We don't use isprint() because that can vary from system to system
55: (even without the use of locales) and we want the output always to be the same,
56: for testing purposes. This macro is used in pcretest as well as in this file. */
57:
58: #ifdef EBCDIC
59: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
60: #else
61: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
62: #endif
63:
64: /* The table of operator names. */
65:
66: static const char *OP_names[] = { OP_NAME_LIST };
67:
68:
69:
70: /*************************************************
71: * Print single- or multi-byte character *
72: *************************************************/
73:
74: static int
75: print_char(FILE *f, uschar *ptr, BOOL utf8)
76: {
77: int c = *ptr;
78:
79: #ifndef SUPPORT_UTF8
80: utf8 = utf8; /* Avoid compiler warning */
81: if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82: return 0;
83:
84: #else
85: if (!utf8 || (c & 0xc0) != 0xc0)
86: {
87: if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
88: return 0;
89: }
90: else
91: {
92: int i;
93: int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
94: int s = 6*a;
95: c = (c & _pcre_utf8_table3[a]) << s;
96: for (i = 1; i <= a; i++)
97: {
98: /* This is a check for malformed UTF-8; it should only occur if the sanity
99: check has been turned off. Rather than swallow random bytes, just stop if
100: we hit a bad one. Print it with \X instead of \x as an indication. */
101:
102: if ((ptr[i] & 0xc0) != 0x80)
103: {
104: fprintf(f, "\\X{%x}", c);
105: return i - 1;
106: }
107:
108: /* The byte is OK */
109:
110: s -= 6;
111: c |= (ptr[i] & 0x3f) << s;
112: }
113: if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
114: return a;
115: }
116: #endif
117: }
118:
119:
120:
121: /*************************************************
122: * Find Unicode property name *
123: *************************************************/
124:
125: static const char *
126: get_ucpname(int ptype, int pvalue)
127: {
128: #ifdef SUPPORT_UCP
129: int i;
130: for (i = _pcre_utt_size - 1; i >= 0; i--)
131: {
132: if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
133: }
134: return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
135: #else
136: /* It gets harder and harder to shut off unwanted compiler warnings. */
137: ptype = ptype * pvalue;
138: return (ptype == pvalue)? "??" : "??";
139: #endif
140: }
141:
142:
143:
144: /*************************************************
145: * Print compiled regex *
146: *************************************************/
147:
148: /* Make this function work for a regex with integers either byte order.
149: However, we assume that what we are passed is a compiled regex. The
150: print_lengths flag controls whether offsets and lengths of items are printed.
151: They can be turned off from pcretest so that automatic tests on bytecode can be
152: written that do not depend on the value of LINK_SIZE. */
153:
154: static void
155: pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
156: {
157: real_pcre *re = (real_pcre *)external_re;
158: uschar *codestart, *code;
159: BOOL utf8;
160:
161: unsigned int options = re->options;
162: int offset = re->name_table_offset;
163: int count = re->name_count;
164: int size = re->name_entry_size;
165:
166: if (re->magic_number != MAGIC_NUMBER)
167: {
168: offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
169: count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
170: size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
171: options = ((options << 24) & 0xff000000) |
172: ((options << 8) & 0x00ff0000) |
173: ((options >> 8) & 0x0000ff00) |
174: ((options >> 24) & 0x000000ff);
175: }
176:
177: code = codestart = (uschar *)re + offset + count * size;
178: utf8 = (options & PCRE_UTF8) != 0;
179:
180: for(;;)
181: {
182: uschar *ccode;
183: int c;
184: int extra = 0;
185:
186: if (print_lengths)
187: fprintf(f, "%3d ", (int)(code - codestart));
188: else
189: fprintf(f, " ");
190:
191: switch(*code)
192: {
193: /* ========================================================================== */
194: /* These cases are never obeyed. This is a fudge that causes a compile-
195: time error if the vectors OP_names or _pcre_OP_lengths, which are indexed
196: by opcode, are not the correct length. It seems to be the only way to do
197: such a check at compile time, as the sizeof() operator does not work in
198: the C preprocessor. We do this while compiling pcretest, because that
199: #includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this
200: when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
201: know the size of _pcre_OP_lengths. */
202:
203: #ifdef COMPILING_PCRETEST
204: case OP_TABLE_LENGTH:
205: case OP_TABLE_LENGTH +
206: ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
207: (sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)):
208: break;
209: #endif
210: /* ========================================================================== */
211:
212: case OP_END:
213: fprintf(f, " %s\n", OP_names[*code]);
214: fprintf(f, "------------------------------------------------------------------\n");
215: return;
216:
217: case OP_OPT:
218: fprintf(f, " %.2x %s", code[1], OP_names[*code]);
219: break;
220:
221: case OP_CHAR:
222: fprintf(f, " ");
223: do
224: {
225: code++;
226: code += 1 + print_char(f, code, utf8);
227: }
228: while (*code == OP_CHAR);
229: fprintf(f, "\n");
230: continue;
231:
232: case OP_CHARNC:
233: fprintf(f, " NC ");
234: do
235: {
236: code++;
237: code += 1 + print_char(f, code, utf8);
238: }
239: while (*code == OP_CHARNC);
240: fprintf(f, "\n");
241: continue;
242:
243: case OP_CBRA:
244: case OP_SCBRA:
245: if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
246: else fprintf(f, " ");
247: fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
248: break;
249:
250: case OP_BRA:
251: case OP_SBRA:
252: case OP_KETRMAX:
253: case OP_KETRMIN:
254: case OP_ALT:
255: case OP_KET:
256: case OP_ASSERT:
257: case OP_ASSERT_NOT:
258: case OP_ASSERTBACK:
259: case OP_ASSERTBACK_NOT:
260: case OP_ONCE:
261: case OP_COND:
262: case OP_SCOND:
263: case OP_REVERSE:
264: if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
265: else fprintf(f, " ");
266: fprintf(f, "%s", OP_names[*code]);
267: break;
268:
269: case OP_CLOSE:
270: fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
271: break;
272:
273: case OP_CREF:
274: case OP_NCREF:
275: fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
276: break;
277:
278: case OP_RREF:
279: c = GET2(code, 1);
280: if (c == RREF_ANY)
281: fprintf(f, " Cond recurse any");
282: else
283: fprintf(f, " Cond recurse %d", c);
284: break;
285:
286: case OP_NRREF:
287: c = GET2(code, 1);
288: if (c == RREF_ANY)
289: fprintf(f, " Cond nrecurse any");
290: else
291: fprintf(f, " Cond nrecurse %d", c);
292: break;
293:
294: case OP_DEF:
295: fprintf(f, " Cond def");
296: break;
297:
298: case OP_STAR:
299: case OP_MINSTAR:
300: case OP_POSSTAR:
301: case OP_PLUS:
302: case OP_MINPLUS:
303: case OP_POSPLUS:
304: case OP_QUERY:
305: case OP_MINQUERY:
306: case OP_POSQUERY:
307: case OP_TYPESTAR:
308: case OP_TYPEMINSTAR:
309: case OP_TYPEPOSSTAR:
310: case OP_TYPEPLUS:
311: case OP_TYPEMINPLUS:
312: case OP_TYPEPOSPLUS:
313: case OP_TYPEQUERY:
314: case OP_TYPEMINQUERY:
315: case OP_TYPEPOSQUERY:
316: fprintf(f, " ");
317: if (*code >= OP_TYPESTAR)
318: {
319: fprintf(f, "%s", OP_names[code[1]]);
320: if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
321: {
322: fprintf(f, " %s ", get_ucpname(code[2], code[3]));
323: extra = 2;
324: }
325: }
326: else extra = print_char(f, code+1, utf8);
327: fprintf(f, "%s", OP_names[*code]);
328: break;
329:
330: case OP_EXACT:
331: case OP_UPTO:
332: case OP_MINUPTO:
333: case OP_POSUPTO:
334: fprintf(f, " ");
335: extra = print_char(f, code+3, utf8);
336: fprintf(f, "{");
337: if (*code != OP_EXACT) fprintf(f, "0,");
338: fprintf(f, "%d}", GET2(code,1));
339: if (*code == OP_MINUPTO) fprintf(f, "?");
340: else if (*code == OP_POSUPTO) fprintf(f, "+");
341: break;
342:
343: case OP_TYPEEXACT:
344: case OP_TYPEUPTO:
345: case OP_TYPEMINUPTO:
346: case OP_TYPEPOSUPTO:
347: fprintf(f, " %s", OP_names[code[3]]);
348: if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
349: {
350: fprintf(f, " %s ", get_ucpname(code[4], code[5]));
351: extra = 2;
352: }
353: fprintf(f, "{");
354: if (*code != OP_TYPEEXACT) fprintf(f, "0,");
355: fprintf(f, "%d}", GET2(code,1));
356: if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
357: else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
358: break;
359:
360: case OP_NOT:
361: c = code[1];
362: if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
363: else fprintf(f, " [^\\x%02x]", c);
364: break;
365:
366: case OP_NOTSTAR:
367: case OP_NOTMINSTAR:
368: case OP_NOTPOSSTAR:
369: case OP_NOTPLUS:
370: case OP_NOTMINPLUS:
371: case OP_NOTPOSPLUS:
372: case OP_NOTQUERY:
373: case OP_NOTMINQUERY:
374: case OP_NOTPOSQUERY:
375: c = code[1];
376: if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
377: else fprintf(f, " [^\\x%02x]", c);
378: fprintf(f, "%s", OP_names[*code]);
379: break;
380:
381: case OP_NOTEXACT:
382: case OP_NOTUPTO:
383: case OP_NOTMINUPTO:
384: case OP_NOTPOSUPTO:
385: c = code[3];
386: if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
387: else fprintf(f, " [^\\x%02x]{", c);
388: if (*code != OP_NOTEXACT) fprintf(f, "0,");
389: fprintf(f, "%d}", GET2(code,1));
390: if (*code == OP_NOTMINUPTO) fprintf(f, "?");
391: else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
392: break;
393:
394: case OP_RECURSE:
395: if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
396: else fprintf(f, " ");
397: fprintf(f, "%s", OP_names[*code]);
398: break;
399:
400: case OP_REF:
401: fprintf(f, " \\%d", GET2(code,1));
402: ccode = code + _pcre_OP_lengths[*code];
403: goto CLASS_REF_REPEAT;
404:
405: case OP_CALLOUT:
406: fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
407: GET(code, 2 + LINK_SIZE));
408: break;
409:
410: case OP_PROP:
411: case OP_NOTPROP:
412: fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
413: break;
414:
415: /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
416: having this code always here, and it makes it less messy without all those
417: #ifdefs. */
418:
419: case OP_CLASS:
420: case OP_NCLASS:
421: case OP_XCLASS:
422: {
423: int i, min, max;
424: BOOL printmap;
425:
426: fprintf(f, " [");
427:
428: if (*code == OP_XCLASS)
429: {
430: extra = GET(code, 1);
431: ccode = code + LINK_SIZE + 1;
432: printmap = (*ccode & XCL_MAP) != 0;
433: if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
434: }
435: else
436: {
437: printmap = TRUE;
438: ccode = code + 1;
439: }
440:
441: /* Print a bit map */
442:
443: if (printmap)
444: {
445: for (i = 0; i < 256; i++)
446: {
447: if ((ccode[i/8] & (1 << (i&7))) != 0)
448: {
449: int j;
450: for (j = i+1; j < 256; j++)
451: if ((ccode[j/8] & (1 << (j&7))) == 0) break;
452: if (i == '-' || i == ']') fprintf(f, "\\");
453: if (PRINTABLE(i)) fprintf(f, "%c", i);
454: else fprintf(f, "\\x%02x", i);
455: if (--j > i)
456: {
457: if (j != i + 1) fprintf(f, "-");
458: if (j == '-' || j == ']') fprintf(f, "\\");
459: if (PRINTABLE(j)) fprintf(f, "%c", j);
460: else fprintf(f, "\\x%02x", j);
461: }
462: i = j;
463: }
464: }
465: ccode += 32;
466: }
467:
468: /* For an XCLASS there is always some additional data */
469:
470: if (*code == OP_XCLASS)
471: {
472: int ch;
473: while ((ch = *ccode++) != XCL_END)
474: {
475: if (ch == XCL_PROP)
476: {
477: int ptype = *ccode++;
478: int pvalue = *ccode++;
479: fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
480: }
481: else if (ch == XCL_NOTPROP)
482: {
483: int ptype = *ccode++;
484: int pvalue = *ccode++;
485: fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
486: }
487: else
488: {
489: ccode += 1 + print_char(f, ccode, TRUE);
490: if (ch == XCL_RANGE)
491: {
492: fprintf(f, "-");
493: ccode += 1 + print_char(f, ccode, TRUE);
494: }
495: }
496: }
497: }
498:
499: /* Indicate a non-UTF8 class which was created by negation */
500:
501: fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
502:
503: /* Handle repeats after a class or a back reference */
504:
505: CLASS_REF_REPEAT:
506: switch(*ccode)
507: {
508: case OP_CRSTAR:
509: case OP_CRMINSTAR:
510: case OP_CRPLUS:
511: case OP_CRMINPLUS:
512: case OP_CRQUERY:
513: case OP_CRMINQUERY:
514: fprintf(f, "%s", OP_names[*ccode]);
515: extra += _pcre_OP_lengths[*ccode];
516: break;
517:
518: case OP_CRRANGE:
519: case OP_CRMINRANGE:
520: min = GET2(ccode,1);
521: max = GET2(ccode,3);
522: if (max == 0) fprintf(f, "{%d,}", min);
523: else fprintf(f, "{%d,%d}", min, max);
524: if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
525: extra += _pcre_OP_lengths[*ccode];
526: break;
527:
528: /* Do nothing if it's not a repeat; this code stops picky compilers
529: warning about the lack of a default code path. */
530:
531: default:
532: break;
533: }
534: }
535: break;
536:
537: case OP_MARK:
538: case OP_PRUNE_ARG:
539: case OP_SKIP_ARG:
540: fprintf(f, " %s %s", OP_names[*code], code + 2);
541: extra += code[1];
542: break;
543:
544: case OP_THEN:
545: if (print_lengths)
546: fprintf(f, " %s %d", OP_names[*code], GET(code, 1));
547: else
548: fprintf(f, " %s", OP_names[*code]);
549: break;
550:
551: case OP_THEN_ARG:
552: if (print_lengths)
553: fprintf(f, " %s %d %s", OP_names[*code], GET(code, 1),
554: code + 2 + LINK_SIZE);
555: else
556: fprintf(f, " %s %s", OP_names[*code], code + 2 + LINK_SIZE);
557: extra += code[1+LINK_SIZE];
558: break;
559:
560: /* Anything else is just an item with no data*/
561:
562: default:
563: fprintf(f, " %s", OP_names[*code]);
564: break;
565: }
566:
567: code += _pcre_OP_lengths[*code] + extra;
568: fprintf(f, "\n");
569: }
570: }
571:
572: /* End of pcre_printint.src */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>