embedaddon/pcre/pcre_printint.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_printint.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (9 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD

pcre 8.34

1: /************************************************* 2: * Perl-Compatible Regular Expressions * 3: *************************************************/ 4: 5: /* PCRE is a library of functions to support regular expressions whose syntax 6: and semantics are as close as possible to those of the Perl 5 language. 7: 8: Written by Philip Hazel 9: Copyright (c) 1997-2012 University of Cambridge 10: 11: ----------------------------------------------------------------------------- 12: Redistribution and use in source and binary forms, with or without 13: modification, are permitted provided that the following conditions are met: 14: 15: * Redistributions of source code must retain the above copyright notice, 16: this list of conditions and the following disclaimer. 17: 18: * Redistributions in binary form must reproduce the above copyright 19: notice, this list of conditions and the following disclaimer in the 20: documentation and/or other materials provided with the distribution. 21: 22: * Neither the name of the University of Cambridge nor the names of its 23: contributors may be used to endorse or promote products derived from 24: this software without specific prior written permission. 25: 26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36: POSSIBILITY OF SUCH DAMAGE. 37: ----------------------------------------------------------------------------- 38: */ 39: 40: 41: /* This module contains a PCRE private debugging function for printing out the 42: internal form of a compiled regular expression, along with some supporting 43: local functions. This source file is used in two places: 44: 45: (1) It is #included by pcre_compile.c when it is compiled in debugging mode 46: (PCRE_DEBUG defined in pcre_internal.h). It is not included in production 47: compiles. In this case PCRE_INCLUDED is defined. 48: 49: (2) It is also compiled separately and linked with pcretest.c, which can be 50: asked to print out a compiled regex for debugging purposes. */ 51: 52: #ifndef PCRE_INCLUDED 53: 54: #ifdef HAVE_CONFIG_H 55: #include "config.h" 56: #endif 57: 58: /* For pcretest program. */ 59: #define PRIV(name) name 60: 61: /* We have to include pcre_internal.h because we need the internal info for 62: displaying the results of pcre_study() and we also need to know about the 63: internal macros, structures, and other internal data values; pcretest has 64: "inside information" compared to a program that strictly follows the PCRE API. 65: 66: Although pcre_internal.h does itself include pcre.h, we explicitly include it 67: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set 68: appropriately for an application, not for building PCRE. */ 69: 70: #include "pcre.h" 71: #include "pcre_internal.h" 72: 73: /* These are the funtions that are contained within. It doesn't seem worth 74: having a separate .h file just for this. */ 75: 76: #endif /* PCRE_INCLUDED */ 77: 78: #ifdef PCRE_INCLUDED 79: static /* Keep the following function as private. */ 80: #endif 81: 82: #if defined COMPILE_PCRE8 83: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); 84: #elif defined COMPILE_PCRE16 85: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); 86: #elif defined COMPILE_PCRE32 87: void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths); 88: #endif 89: 90: /* Macro that decides whether a character should be output as a literal or in 91: hexadecimal. We don't use isprint() because that can vary from system to system 92: (even without the use of locales) and we want the output always to be the same, 93: for testing purposes. */ 94: 95: #ifdef EBCDIC 96: #define PRINTABLE(c) ((c) >= 64 && (c) < 255) 97: #else 98: #define PRINTABLE(c) ((c) >= 32 && (c) < 127) 99: #endif 100: 101: /* The table of operator names. */ 102: 103: static const char *priv_OP_names[] = { OP_NAME_LIST }; 104: 105: /* This table of operator lengths is not actually used by the working code, 106: but its size is needed for a check that ensures it is the correct size for the 107: number of opcodes (thus catching update omissions). */ 108: 109: static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS }; 110: 111: 112: 113: /************************************************* 114: * Print single- or multi-byte character * 115: *************************************************/ 116: 117: static unsigned int 118: print_char(FILE *f, pcre_uchar *ptr, BOOL utf) 119: { 120: pcre_uint32 c = *ptr; 121: 122: #ifndef SUPPORT_UTF 123: 124: (void)utf; /* Avoid compiler warning */ 125: if (PRINTABLE(c)) fprintf(f, "%c", (char)c); 126: else if (c <= 0x80) fprintf(f, "\\x%02x", c); 127: else fprintf(f, "\\x{%x}", c); 128: return 0; 129: 130: #else 131: 132: #if defined COMPILE_PCRE8 133: 134: if (!utf || (c & 0xc0) != 0xc0) 135: { 136: if (PRINTABLE(c)) fprintf(f, "%c", (char)c); 137: else if (c < 0x80) fprintf(f, "\\x%02x", c); 138: else fprintf(f, "\\x{%02x}", c); 139: return 0; 140: } 141: else 142: { 143: int i; 144: int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ 145: int s = 6*a; 146: c = (c & PRIV(utf8_table3)[a]) << s; 147: for (i = 1; i <= a; i++) 148: { 149: /* This is a check for malformed UTF-8; it should only occur if the sanity 150: check has been turned off. Rather than swallow random bytes, just stop if 151: we hit a bad one. Print it with \X instead of \x as an indication. */ 152: 153: if ((ptr[i] & 0xc0) != 0x80) 154: { 155: fprintf(f, "\\X{%x}", c); 156: return i - 1; 157: } 158: 159: /* The byte is OK */ 160: 161: s -= 6; 162: c |= (ptr[i] & 0x3f) << s; 163: } 164: fprintf(f, "\\x{%x}", c); 165: return a; 166: } 167: 168: #elif defined COMPILE_PCRE16 169: 170: if (!utf || (c & 0xfc00) != 0xd800) 171: { 172: if (PRINTABLE(c)) fprintf(f, "%c", (char)c); 173: else if (c <= 0x80) fprintf(f, "\\x%02x", c); 174: else fprintf(f, "\\x{%02x}", c); 175: return 0; 176: } 177: else 178: { 179: /* This is a check for malformed UTF-16; it should only occur if the sanity 180: check has been turned off. Rather than swallow a low surrogate, just stop if 181: we hit a bad one. Print it with \X instead of \x as an indication. */ 182: 183: if ((ptr[1] & 0xfc00) != 0xdc00) 184: { 185: fprintf(f, "\\X{%x}", c); 186: return 0; 187: } 188: 189: c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; 190: fprintf(f, "\\x{%x}", c); 191: return 1; 192: } 193: 194: #elif defined COMPILE_PCRE32 195: 196: if (!utf || (c & 0xfffff800u) != 0xd800u) 197: { 198: if (PRINTABLE(c)) fprintf(f, "%c", (char)c); 199: else if (c <= 0x80) fprintf(f, "\\x%02x", c); 200: else fprintf(f, "\\x{%x}", c); 201: return 0; 202: } 203: else 204: { 205: /* This is a check for malformed UTF-32; it should only occur if the sanity 206: check has been turned off. Rather than swallow a surrogate, just stop if 207: we hit one. Print it with \X instead of \x as an indication. */ 208: fprintf(f, "\\X{%x}", c); 209: return 0; 210: } 211: 212: #endif /* COMPILE_PCRE[8|16|32] */ 213: 214: #endif /* SUPPORT_UTF */ 215: } 216: 217: /************************************************* 218: * Print uchar string (regardless of utf) * 219: *************************************************/ 220: 221: static void 222: print_puchar(FILE *f, PCRE_PUCHAR ptr) 223: { 224: while (*ptr != '\0') 225: { 226: register pcre_uint32 c = *ptr++; 227: if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); 228: } 229: } 230: 231: /************************************************* 232: * Find Unicode property name * 233: *************************************************/ 234: 235: static const char * 236: get_ucpname(unsigned int ptype, unsigned int pvalue) 237: { 238: #ifdef SUPPORT_UCP 239: int i; 240: for (i = PRIV(utt_size) - 1; i >= 0; i--) 241: { 242: if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; 243: } 244: return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; 245: #else 246: /* It gets harder and harder to shut off unwanted compiler warnings. */ 247: ptype = ptype * pvalue; 248: return (ptype == pvalue)? "??" : "??"; 249: #endif 250: } 251: 252: 253: /************************************************* 254: * Print Unicode property value * 255: *************************************************/ 256: 257: /* "Normal" properties can be printed from tables. The PT_CLIST property is a 258: pseudo-property that contains a pointer to a list of case-equivalent 259: characters. This is used only when UCP support is available and UTF mode is 260: selected. It should never occur otherwise, but just in case it does, have 261: something ready to print. */ 262: 263: static void 264: print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after) 265: { 266: if (code[1] != PT_CLIST) 267: { 268: fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1], 269: code[2]), after); 270: } 271: else 272: { 273: const char *not = (*code == OP_PROP)? "" : "not "; 274: #ifndef SUPPORT_UCP 275: fprintf(f, "%s%sclist %d%s", before, not, code[2], after); 276: #else 277: const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2]; 278: fprintf (f, "%s%sclist", before, not); 279: while (*p < NOTACHAR) fprintf(f, " %04x", *p++); 280: fprintf(f, "%s", after); 281: #endif 282: } 283: } 284: 285: 286: 287: 288: /************************************************* 289: * Print compiled regex * 290: *************************************************/ 291: 292: /* Make this function work for a regex with integers either byte order. 293: However, we assume that what we are passed is a compiled regex. The 294: print_lengths flag controls whether offsets and lengths of items are printed. 295: They can be turned off from pcretest so that automatic tests on bytecode can be 296: written that do not depend on the value of LINK_SIZE. */ 297: 298: #ifdef PCRE_INCLUDED 299: static /* Keep the following function as private. */ 300: #endif 301: #if defined COMPILE_PCRE8 302: void 303: pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) 304: #elif defined COMPILE_PCRE16 305: void 306: pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) 307: #elif defined COMPILE_PCRE32 308: void 309: pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths) 310: #endif 311: { 312: REAL_PCRE *re = (REAL_PCRE *)external_re; 313: pcre_uchar *codestart, *code; 314: BOOL utf; 315: 316: unsigned int options = re->options; 317: int offset = re->name_table_offset; 318: int count = re->name_count; 319: int size = re->name_entry_size; 320: 321: if (re->magic_number != MAGIC_NUMBER) 322: { 323: offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); 324: count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); 325: size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); 326: options = ((options << 24) & 0xff000000) | 327: ((options << 8) & 0x00ff0000) | 328: ((options >> 8) & 0x0000ff00) | 329: ((options >> 24) & 0x000000ff); 330: } 331: 332: code = codestart = (pcre_uchar *)re + offset + count * size; 333: /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ 334: utf = (options & PCRE_UTF8) != 0; 335: 336: for(;;) 337: { 338: pcre_uchar *ccode; 339: const char *flag = " "; 340: pcre_uint32 c; 341: unsigned int extra = 0; 342: 343: if (print_lengths) 344: fprintf(f, "%3d ", (int)(code - codestart)); 345: else 346: fprintf(f, " "); 347: 348: switch(*code) 349: { 350: /* ========================================================================== */ 351: /* These cases are never obeyed. This is a fudge that causes a compile- 352: time error if the vectors OP_names or OP_lengths, which are indexed 353: by opcode, are not the correct length. It seems to be the only way to do 354: such a check at compile time, as the sizeof() operator does not work in 355: the C preprocessor. */ 356: 357: case OP_TABLE_LENGTH: 358: case OP_TABLE_LENGTH + 359: ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && 360: (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)): 361: break; 362: /* ========================================================================== */ 363: 364: case OP_END: 365: fprintf(f, " %s\n", priv_OP_names[*code]); 366: fprintf(f, "------------------------------------------------------------------\n"); 367: return; 368: 369: case OP_CHAR: 370: fprintf(f, " "); 371: do 372: { 373: code++; 374: code += 1 + print_char(f, code, utf); 375: } 376: while (*code == OP_CHAR); 377: fprintf(f, "\n"); 378: continue; 379: 380: case OP_CHARI: 381: fprintf(f, " /i "); 382: do 383: { 384: code++; 385: code += 1 + print_char(f, code, utf); 386: } 387: while (*code == OP_CHARI); 388: fprintf(f, "\n"); 389: continue; 390: 391: case OP_CBRA: 392: case OP_CBRAPOS: 393: case OP_SCBRA: 394: case OP_SCBRAPOS: 395: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); 396: else fprintf(f, " "); 397: fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE)); 398: break; 399: 400: case OP_BRA: 401: case OP_BRAPOS: 402: case OP_SBRA: 403: case OP_SBRAPOS: 404: case OP_KETRMAX: 405: case OP_KETRMIN: 406: case OP_KETRPOS: 407: case OP_ALT: 408: case OP_KET: 409: case OP_ASSERT: 410: case OP_ASSERT_NOT: 411: case OP_ASSERTBACK: 412: case OP_ASSERTBACK_NOT: 413: case OP_ONCE: 414: case OP_ONCE_NC: 415: case OP_COND: 416: case OP_SCOND: 417: case OP_REVERSE: 418: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); 419: else fprintf(f, " "); 420: fprintf(f, "%s", priv_OP_names[*code]); 421: break; 422: 423: case OP_CLOSE: 424: fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1)); 425: break; 426: 427: case OP_CREF: 428: fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]); 429: break; 430: 431: case OP_DNCREF: 432: { 433: pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + 434: IMM2_SIZE; 435: fprintf(f, " %s Cond ref <", flag); 436: print_puchar(f, entry); 437: fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); 438: } 439: break; 440: 441: case OP_RREF: 442: c = GET2(code, 1); 443: if (c == RREF_ANY) 444: fprintf(f, " Cond recurse any"); 445: else 446: fprintf(f, " Cond recurse %d", c); 447: break; 448: 449: case OP_DNRREF: 450: { 451: pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + 452: IMM2_SIZE; 453: fprintf(f, " %s Cond recurse <", flag); 454: print_puchar(f, entry); 455: fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); 456: } 457: break; 458: 459: case OP_DEF: 460: fprintf(f, " Cond def"); 461: break; 462: 463: case OP_STARI: 464: case OP_MINSTARI: 465: case OP_POSSTARI: 466: case OP_PLUSI: 467: case OP_MINPLUSI: 468: case OP_POSPLUSI: 469: case OP_QUERYI: 470: case OP_MINQUERYI: 471: case OP_POSQUERYI: 472: flag = "/i"; 473: /* Fall through */ 474: case OP_STAR: 475: case OP_MINSTAR: 476: case OP_POSSTAR: 477: case OP_PLUS: 478: case OP_MINPLUS: 479: case OP_POSPLUS: 480: case OP_QUERY: 481: case OP_MINQUERY: 482: case OP_POSQUERY: 483: case OP_TYPESTAR: 484: case OP_TYPEMINSTAR: 485: case OP_TYPEPOSSTAR: 486: case OP_TYPEPLUS: 487: case OP_TYPEMINPLUS: 488: case OP_TYPEPOSPLUS: 489: case OP_TYPEQUERY: 490: case OP_TYPEMINQUERY: 491: case OP_TYPEPOSQUERY: 492: fprintf(f, " %s ", flag); 493: if (*code >= OP_TYPESTAR) 494: { 495: if (code[1] == OP_PROP || code[1] == OP_NOTPROP) 496: { 497: print_prop(f, code + 1, "", " "); 498: extra = 2; 499: } 500: else fprintf(f, "%s", priv_OP_names[code[1]]); 501: } 502: else extra = print_char(f, code+1, utf); 503: fprintf(f, "%s", priv_OP_names[*code]); 504: break; 505: 506: case OP_EXACTI: 507: case OP_UPTOI: 508: case OP_MINUPTOI: 509: case OP_POSUPTOI: 510: flag = "/i"; 511: /* Fall through */ 512: case OP_EXACT: 513: case OP_UPTO: 514: case OP_MINUPTO: 515: case OP_POSUPTO: 516: fprintf(f, " %s ", flag); 517: extra = print_char(f, code + 1 + IMM2_SIZE, utf); 518: fprintf(f, "{"); 519: if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); 520: fprintf(f, "%d}", GET2(code,1)); 521: if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); 522: else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); 523: break; 524: 525: case OP_TYPEEXACT: 526: case OP_TYPEUPTO: 527: case OP_TYPEMINUPTO: 528: case OP_TYPEPOSUPTO: 529: if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 530: { 531: print_prop(f, code + IMM2_SIZE + 1, " ", " "); 532: extra = 2; 533: } 534: else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]); 535: fprintf(f, "{"); 536: if (*code != OP_TYPEEXACT) fprintf(f, "0,"); 537: fprintf(f, "%d}", GET2(code,1)); 538: if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); 539: else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); 540: break; 541: 542: case OP_NOTI: 543: flag = "/i"; 544: /* Fall through */ 545: case OP_NOT: 546: fprintf(f, " %s [^", flag); 547: extra = print_char(f, code + 1, utf); 548: fprintf(f, "]"); 549: break; 550: 551: case OP_NOTSTARI: 552: case OP_NOTMINSTARI: 553: case OP_NOTPOSSTARI: 554: case OP_NOTPLUSI: 555: case OP_NOTMINPLUSI: 556: case OP_NOTPOSPLUSI: 557: case OP_NOTQUERYI: 558: case OP_NOTMINQUERYI: 559: case OP_NOTPOSQUERYI: 560: flag = "/i"; 561: /* Fall through */ 562: 563: case OP_NOTSTAR: 564: case OP_NOTMINSTAR: 565: case OP_NOTPOSSTAR: 566: case OP_NOTPLUS: 567: case OP_NOTMINPLUS: 568: case OP_NOTPOSPLUS: 569: case OP_NOTQUERY: 570: case OP_NOTMINQUERY: 571: case OP_NOTPOSQUERY: 572: fprintf(f, " %s [^", flag); 573: extra = print_char(f, code + 1, utf); 574: fprintf(f, "]%s", priv_OP_names[*code]); 575: break; 576: 577: case OP_NOTEXACTI: 578: case OP_NOTUPTOI: 579: case OP_NOTMINUPTOI: 580: case OP_NOTPOSUPTOI: 581: flag = "/i"; 582: /* Fall through */ 583: 584: case OP_NOTEXACT: 585: case OP_NOTUPTO: 586: case OP_NOTMINUPTO: 587: case OP_NOTPOSUPTO: 588: fprintf(f, " %s [^", flag); 589: extra = print_char(f, code + 1 + IMM2_SIZE, utf); 590: fprintf(f, "]{"); 591: if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); 592: fprintf(f, "%d}", GET2(code,1)); 593: if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); 594: else 595: if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); 596: break; 597: 598: case OP_RECURSE: 599: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); 600: else fprintf(f, " "); 601: fprintf(f, "%s", priv_OP_names[*code]); 602: break; 603: 604: case OP_REFI: 605: flag = "/i"; 606: /* Fall through */ 607: case OP_REF: 608: fprintf(f, " %s \\%d", flag, GET2(code,1)); 609: ccode = code + priv_OP_lengths[*code]; 610: goto CLASS_REF_REPEAT; 611: 612: case OP_DNREFI: 613: flag = "/i"; 614: /* Fall through */ 615: case OP_DNREF: 616: { 617: pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) + 618: IMM2_SIZE; 619: fprintf(f, " %s \\k<", flag); 620: print_puchar(f, entry); 621: fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); 622: } 623: ccode = code + priv_OP_lengths[*code]; 624: goto CLASS_REF_REPEAT; 625: 626: case OP_CALLOUT: 627: fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2), 628: GET(code, 2 + LINK_SIZE)); 629: break; 630: 631: case OP_PROP: 632: case OP_NOTPROP: 633: print_prop(f, code, " ", ""); 634: break; 635: 636: /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm 637: in having this code always here, and it makes it less messy without all 638: those #ifdefs. */ 639: 640: case OP_CLASS: 641: case OP_NCLASS: 642: case OP_XCLASS: 643: { 644: int i; 645: unsigned int min, max; 646: BOOL printmap; 647: pcre_uint8 *map; 648: 649: fprintf(f, " ["); 650: 651: if (*code == OP_XCLASS) 652: { 653: extra = GET(code, 1); 654: ccode = code + LINK_SIZE + 1; 655: printmap = (*ccode & XCL_MAP) != 0; 656: if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); 657: } 658: else 659: { 660: printmap = TRUE; 661: ccode = code + 1; 662: } 663: 664: /* Print a bit map */ 665: 666: if (printmap) 667: { 668: map = (pcre_uint8 *)ccode; 669: for (i = 0; i < 256; i++) 670: { 671: if ((map[i/8] & (1 << (i&7))) != 0) 672: { 673: int j; 674: for (j = i+1; j < 256; j++) 675: if ((map[j/8] & (1 << (j&7))) == 0) break; 676: if (i == '-' || i == ']') fprintf(f, "\\"); 677: if (PRINTABLE(i)) fprintf(f, "%c", i); 678: else fprintf(f, "\\x%02x", i); 679: if (--j > i) 680: { 681: if (j != i + 1) fprintf(f, "-"); 682: if (j == '-' || j == ']') fprintf(f, "\\"); 683: if (PRINTABLE(j)) fprintf(f, "%c", j); 684: else fprintf(f, "\\x%02x", j); 685: } 686: i = j; 687: } 688: } 689: ccode += 32 / sizeof(pcre_uchar); 690: } 691: 692: /* For an XCLASS there is always some additional data */ 693: 694: if (*code == OP_XCLASS) 695: { 696: pcre_uchar ch; 697: while ((ch = *ccode++) != XCL_END) 698: { 699: BOOL not = FALSE; 700: const char *notch = ""; 701: 702: switch(ch) 703: { 704: case XCL_NOTPROP: 705: not = TRUE; 706: notch = "^"; 707: /* Fall through */ 708: 709: case XCL_PROP: 710: { 711: unsigned int ptype = *ccode++; 712: unsigned int pvalue = *ccode++; 713: 714: switch(ptype) 715: { 716: case PT_PXGRAPH: 717: fprintf(f, "[:%sgraph:]", notch); 718: break; 719: 720: case PT_PXPRINT: 721: fprintf(f, "[:%sprint:]", notch); 722: break; 723: 724: case PT_PXPUNCT: 725: fprintf(f, "[:%spunct:]", notch); 726: break; 727: 728: default: 729: fprintf(f, "\\%c{%s}", (not? 'P':'p'), 730: get_ucpname(ptype, pvalue)); 731: break; 732: } 733: } 734: break; 735: 736: default: 737: ccode += 1 + print_char(f, ccode, utf); 738: if (ch == XCL_RANGE) 739: { 740: fprintf(f, "-"); 741: ccode += 1 + print_char(f, ccode, utf); 742: } 743: break; 744: } 745: } 746: } 747: 748: /* Indicate a non-UTF class which was created by negation */ 749: 750: fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); 751: 752: /* Handle repeats after a class or a back reference */ 753: 754: CLASS_REF_REPEAT: 755: switch(*ccode) 756: { 757: case OP_CRSTAR: 758: case OP_CRMINSTAR: 759: case OP_CRPLUS: 760: case OP_CRMINPLUS: 761: case OP_CRQUERY: 762: case OP_CRMINQUERY: 763: case OP_CRPOSSTAR: 764: case OP_CRPOSPLUS: 765: case OP_CRPOSQUERY: 766: fprintf(f, "%s", priv_OP_names[*ccode]); 767: extra += priv_OP_lengths[*ccode]; 768: break; 769: 770: case OP_CRRANGE: 771: case OP_CRMINRANGE: 772: case OP_CRPOSRANGE: 773: min = GET2(ccode,1); 774: max = GET2(ccode,1 + IMM2_SIZE); 775: if (max == 0) fprintf(f, "{%u,}", min); 776: else fprintf(f, "{%u,%u}", min, max); 777: if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); 778: else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+"); 779: extra += priv_OP_lengths[*ccode]; 780: break; 781: 782: /* Do nothing if it's not a repeat; this code stops picky compilers 783: warning about the lack of a default code path. */ 784: 785: default: 786: break; 787: } 788: } 789: break; 790: 791: case OP_MARK: 792: case OP_PRUNE_ARG: 793: case OP_SKIP_ARG: 794: case OP_THEN_ARG: 795: fprintf(f, " %s ", priv_OP_names[*code]); 796: print_puchar(f, code + 2); 797: extra += code[1]; 798: break; 799: 800: case OP_THEN: 801: fprintf(f, " %s", priv_OP_names[*code]); 802: break; 803: 804: case OP_CIRCM: 805: case OP_DOLLM: 806: flag = "/m"; 807: /* Fall through */ 808: 809: /* Anything else is just an item with no data, but possibly a flag. */ 810: 811: default: 812: fprintf(f, " %s %s", flag, priv_OP_names[*code]); 813: break; 814: } 815: 816: code += priv_OP_lengths[*code] + extra; 817: fprintf(f, "\n"); 818: } 819: } 820: 821: /* End of pcre_printint.src */