Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_printint.src, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2010 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains a PCRE private debugging function for printing out the
                     42: internal form of a compiled regular expression, along with some supporting
                     43: local functions. This source file is used in two places:
                     44: 
                     45: (1) It is #included by pcre_compile.c when it is compiled in debugging mode
                     46: (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
                     47: compiles.
                     48: 
                     49: (2) It is always #included by pcretest.c, which can be asked to print out a
                     50: compiled regex for debugging purposes. */
                     51: 
                     52: 
                     53: /* Macro that decides whether a character should be output as a literal or in
                     54: hexadecimal. We don't use isprint() because that can vary from system to system
                     55: (even without the use of locales) and we want the output always to be the same,
                     56: for testing purposes. This macro is used in pcretest as well as in this file. */
                     57: 
                     58: #ifdef EBCDIC
                     59: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
                     60: #else
                     61: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
                     62: #endif
                     63: 
                     64: /* The table of operator names. */
                     65: 
                     66: static const char *OP_names[] = { OP_NAME_LIST };
                     67: 
                     68: 
                     69: 
                     70: /*************************************************
                     71: *       Print single- or multi-byte character    *
                     72: *************************************************/
                     73: 
                     74: static int
                     75: print_char(FILE *f, uschar *ptr, BOOL utf8)
                     76: {
                     77: int c = *ptr;
                     78: 
                     79: #ifndef SUPPORT_UTF8
                     80: utf8 = utf8;  /* Avoid compiler warning */
                     81: if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
                     82: return 0;
                     83: 
                     84: #else
                     85: if (!utf8 || (c & 0xc0) != 0xc0)
                     86:   {
                     87:   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
                     88:   return 0;
                     89:   }
                     90: else
                     91:   {
                     92:   int i;
                     93:   int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
                     94:   int s = 6*a;
                     95:   c = (c & _pcre_utf8_table3[a]) << s;
                     96:   for (i = 1; i <= a; i++)
                     97:     {
                     98:     /* This is a check for malformed UTF-8; it should only occur if the sanity
                     99:     check has been turned off. Rather than swallow random bytes, just stop if
                    100:     we hit a bad one. Print it with \X instead of \x as an indication. */
                    101: 
                    102:     if ((ptr[i] & 0xc0) != 0x80)
                    103:       {
                    104:       fprintf(f, "\\X{%x}", c);
                    105:       return i - 1;
                    106:       }
                    107: 
                    108:     /* The byte is OK */
                    109: 
                    110:     s -= 6;
                    111:     c |= (ptr[i] & 0x3f) << s;
                    112:     }
                    113:   if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
                    114:   return a;
                    115:   }
                    116: #endif
                    117: }
                    118: 
                    119: 
                    120: 
                    121: /*************************************************
                    122: *          Find Unicode property name            *
                    123: *************************************************/
                    124: 
                    125: static const char *
                    126: get_ucpname(int ptype, int pvalue)
                    127: {
                    128: #ifdef SUPPORT_UCP
                    129: int i;
                    130: for (i = _pcre_utt_size - 1; i >= 0; i--)
                    131:   {
                    132:   if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
                    133:   }
                    134: return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
                    135: #else
                    136: /* It gets harder and harder to shut off unwanted compiler warnings. */
                    137: ptype = ptype * pvalue;
                    138: return (ptype == pvalue)? "??" : "??";
                    139: #endif
                    140: }
                    141: 
                    142: 
                    143: 
                    144: /*************************************************
                    145: *         Print compiled regex                   *
                    146: *************************************************/
                    147: 
                    148: /* Make this function work for a regex with integers either byte order.
                    149: However, we assume that what we are passed is a compiled regex. The
                    150: print_lengths flag controls whether offsets and lengths of items are printed.
                    151: They can be turned off from pcretest so that automatic tests on bytecode can be
                    152: written that do not depend on the value of LINK_SIZE. */
                    153: 
                    154: static void
                    155: pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
                    156: {
                    157: real_pcre *re = (real_pcre *)external_re;
                    158: uschar *codestart, *code;
                    159: BOOL utf8;
                    160: 
                    161: unsigned int options = re->options;
                    162: int offset = re->name_table_offset;
                    163: int count = re->name_count;
                    164: int size = re->name_entry_size;
                    165: 
                    166: if (re->magic_number != MAGIC_NUMBER)
                    167:   {
                    168:   offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
                    169:   count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
                    170:   size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
                    171:   options = ((options << 24) & 0xff000000) |
                    172:             ((options <<  8) & 0x00ff0000) |
                    173:             ((options >>  8) & 0x0000ff00) |
                    174:             ((options >> 24) & 0x000000ff);
                    175:   }
                    176: 
                    177: code = codestart = (uschar *)re + offset + count * size;
                    178: utf8 = (options & PCRE_UTF8) != 0;
                    179: 
                    180: for(;;)
                    181:   {
                    182:   uschar *ccode;
                    183:   int c;
                    184:   int extra = 0;
                    185: 
                    186:   if (print_lengths)
                    187:     fprintf(f, "%3d ", (int)(code - codestart));
                    188:   else
                    189:     fprintf(f, "    ");
                    190: 
                    191:   switch(*code)
                    192:     {
                    193: /* ========================================================================== */
                    194:       /* These cases are never obeyed. This is a fudge that causes a compile-
                    195:       time error if the vectors OP_names or _pcre_OP_lengths, which are indexed
                    196:       by opcode, are not the correct length. It seems to be the only way to do
                    197:       such a check at compile time, as the sizeof() operator does not work in
                    198:       the C preprocessor. We do this while compiling pcretest, because that
                    199:       #includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this
                    200:       when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
                    201:       know the size of _pcre_OP_lengths. */
                    202: 
                    203: #ifdef COMPILING_PCRETEST
                    204:       case OP_TABLE_LENGTH:
                    205:       case OP_TABLE_LENGTH +
                    206:         ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
                    207:         (sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)):
                    208:       break;
                    209: #endif
                    210: /* ========================================================================== */
                    211: 
                    212:     case OP_END:
                    213:     fprintf(f, "    %s\n", OP_names[*code]);
                    214:     fprintf(f, "------------------------------------------------------------------\n");
                    215:     return;
                    216: 
                    217:     case OP_OPT:
                    218:     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
                    219:     break;
                    220: 
                    221:     case OP_CHAR:
                    222:     fprintf(f, "    ");
                    223:     do
                    224:       {
                    225:       code++;
                    226:       code += 1 + print_char(f, code, utf8);
                    227:       }
                    228:     while (*code == OP_CHAR);
                    229:     fprintf(f, "\n");
                    230:     continue;
                    231: 
                    232:     case OP_CHARNC:
                    233:     fprintf(f, " NC ");
                    234:     do
                    235:       {
                    236:       code++;
                    237:       code += 1 + print_char(f, code, utf8);
                    238:       }
                    239:     while (*code == OP_CHARNC);
                    240:     fprintf(f, "\n");
                    241:     continue;
                    242: 
                    243:     case OP_CBRA:
                    244:     case OP_SCBRA:
                    245:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
                    246:       else fprintf(f, "    ");
                    247:     fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
                    248:     break;
                    249: 
                    250:     case OP_BRA:
                    251:     case OP_SBRA:
                    252:     case OP_KETRMAX:
                    253:     case OP_KETRMIN:
                    254:     case OP_ALT:
                    255:     case OP_KET:
                    256:     case OP_ASSERT:
                    257:     case OP_ASSERT_NOT:
                    258:     case OP_ASSERTBACK:
                    259:     case OP_ASSERTBACK_NOT:
                    260:     case OP_ONCE:
                    261:     case OP_COND:
                    262:     case OP_SCOND:
                    263:     case OP_REVERSE:
                    264:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
                    265:       else fprintf(f, "    ");
                    266:     fprintf(f, "%s", OP_names[*code]);
                    267:     break;
                    268: 
                    269:     case OP_CLOSE:
                    270:     fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
                    271:     break;
                    272: 
                    273:     case OP_CREF:
                    274:     case OP_NCREF:
                    275:     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
                    276:     break;
                    277: 
                    278:     case OP_RREF:
                    279:     c = GET2(code, 1);
                    280:     if (c == RREF_ANY)
                    281:       fprintf(f, "    Cond recurse any");
                    282:     else
                    283:       fprintf(f, "    Cond recurse %d", c);
                    284:     break;
                    285: 
                    286:     case OP_NRREF:
                    287:     c = GET2(code, 1);
                    288:     if (c == RREF_ANY)
                    289:       fprintf(f, "    Cond nrecurse any");
                    290:     else
                    291:       fprintf(f, "    Cond nrecurse %d", c);
                    292:     break;
                    293: 
                    294:     case OP_DEF:
                    295:     fprintf(f, "    Cond def");
                    296:     break;
                    297: 
                    298:     case OP_STAR:
                    299:     case OP_MINSTAR:
                    300:     case OP_POSSTAR:
                    301:     case OP_PLUS:
                    302:     case OP_MINPLUS:
                    303:     case OP_POSPLUS:
                    304:     case OP_QUERY:
                    305:     case OP_MINQUERY:
                    306:     case OP_POSQUERY:
                    307:     case OP_TYPESTAR:
                    308:     case OP_TYPEMINSTAR:
                    309:     case OP_TYPEPOSSTAR:
                    310:     case OP_TYPEPLUS:
                    311:     case OP_TYPEMINPLUS:
                    312:     case OP_TYPEPOSPLUS:
                    313:     case OP_TYPEQUERY:
                    314:     case OP_TYPEMINQUERY:
                    315:     case OP_TYPEPOSQUERY:
                    316:     fprintf(f, "    ");
                    317:     if (*code >= OP_TYPESTAR)
                    318:       {
                    319:       fprintf(f, "%s", OP_names[code[1]]);
                    320:       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
                    321:         {
                    322:         fprintf(f, " %s ", get_ucpname(code[2], code[3]));
                    323:         extra = 2;
                    324:         }
                    325:       }
                    326:     else extra = print_char(f, code+1, utf8);
                    327:     fprintf(f, "%s", OP_names[*code]);
                    328:     break;
                    329: 
                    330:     case OP_EXACT:
                    331:     case OP_UPTO:
                    332:     case OP_MINUPTO:
                    333:     case OP_POSUPTO:
                    334:     fprintf(f, "    ");
                    335:     extra = print_char(f, code+3, utf8);
                    336:     fprintf(f, "{");
                    337:     if (*code != OP_EXACT) fprintf(f, "0,");
                    338:     fprintf(f, "%d}", GET2(code,1));
                    339:     if (*code == OP_MINUPTO) fprintf(f, "?");
                    340:       else if (*code == OP_POSUPTO) fprintf(f, "+");
                    341:     break;
                    342: 
                    343:     case OP_TYPEEXACT:
                    344:     case OP_TYPEUPTO:
                    345:     case OP_TYPEMINUPTO:
                    346:     case OP_TYPEPOSUPTO:
                    347:     fprintf(f, "    %s", OP_names[code[3]]);
                    348:     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
                    349:       {
                    350:       fprintf(f, " %s ", get_ucpname(code[4], code[5]));
                    351:       extra = 2;
                    352:       }
                    353:     fprintf(f, "{");
                    354:     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
                    355:     fprintf(f, "%d}", GET2(code,1));
                    356:     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
                    357:       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
                    358:     break;
                    359: 
                    360:     case OP_NOT:
                    361:     c = code[1];
                    362:     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
                    363:       else fprintf(f, "    [^\\x%02x]", c);
                    364:     break;
                    365: 
                    366:     case OP_NOTSTAR:
                    367:     case OP_NOTMINSTAR:
                    368:     case OP_NOTPOSSTAR:
                    369:     case OP_NOTPLUS:
                    370:     case OP_NOTMINPLUS:
                    371:     case OP_NOTPOSPLUS:
                    372:     case OP_NOTQUERY:
                    373:     case OP_NOTMINQUERY:
                    374:     case OP_NOTPOSQUERY:
                    375:     c = code[1];
                    376:     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
                    377:       else fprintf(f, "    [^\\x%02x]", c);
                    378:     fprintf(f, "%s", OP_names[*code]);
                    379:     break;
                    380: 
                    381:     case OP_NOTEXACT:
                    382:     case OP_NOTUPTO:
                    383:     case OP_NOTMINUPTO:
                    384:     case OP_NOTPOSUPTO:
                    385:     c = code[3];
                    386:     if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
                    387:       else fprintf(f, "    [^\\x%02x]{", c);
                    388:     if (*code != OP_NOTEXACT) fprintf(f, "0,");
                    389:     fprintf(f, "%d}", GET2(code,1));
                    390:     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
                    391:       else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
                    392:     break;
                    393: 
                    394:     case OP_RECURSE:
                    395:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
                    396:       else fprintf(f, "    ");
                    397:     fprintf(f, "%s", OP_names[*code]);
                    398:     break;
                    399: 
                    400:     case OP_REF:
                    401:     fprintf(f, "    \\%d", GET2(code,1));
                    402:     ccode = code + _pcre_OP_lengths[*code];
                    403:     goto CLASS_REF_REPEAT;
                    404: 
                    405:     case OP_CALLOUT:
                    406:     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
                    407:       GET(code, 2 + LINK_SIZE));
                    408:     break;
                    409: 
                    410:     case OP_PROP:
                    411:     case OP_NOTPROP:
                    412:     fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
                    413:     break;
                    414: 
                    415:     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
                    416:     having this code always here, and it makes it less messy without all those
                    417:     #ifdefs. */
                    418: 
                    419:     case OP_CLASS:
                    420:     case OP_NCLASS:
                    421:     case OP_XCLASS:
                    422:       {
                    423:       int i, min, max;
                    424:       BOOL printmap;
                    425: 
                    426:       fprintf(f, "    [");
                    427: 
                    428:       if (*code == OP_XCLASS)
                    429:         {
                    430:         extra = GET(code, 1);
                    431:         ccode = code + LINK_SIZE + 1;
                    432:         printmap = (*ccode & XCL_MAP) != 0;
                    433:         if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
                    434:         }
                    435:       else
                    436:         {
                    437:         printmap = TRUE;
                    438:         ccode = code + 1;
                    439:         }
                    440: 
                    441:       /* Print a bit map */
                    442: 
                    443:       if (printmap)
                    444:         {
                    445:         for (i = 0; i < 256; i++)
                    446:           {
                    447:           if ((ccode[i/8] & (1 << (i&7))) != 0)
                    448:             {
                    449:             int j;
                    450:             for (j = i+1; j < 256; j++)
                    451:               if ((ccode[j/8] & (1 << (j&7))) == 0) break;
                    452:             if (i == '-' || i == ']') fprintf(f, "\\");
                    453:             if (PRINTABLE(i)) fprintf(f, "%c", i);
                    454:               else fprintf(f, "\\x%02x", i);
                    455:             if (--j > i)
                    456:               {
                    457:               if (j != i + 1) fprintf(f, "-");
                    458:               if (j == '-' || j == ']') fprintf(f, "\\");
                    459:               if (PRINTABLE(j)) fprintf(f, "%c", j);
                    460:                 else fprintf(f, "\\x%02x", j);
                    461:               }
                    462:             i = j;
                    463:             }
                    464:           }
                    465:         ccode += 32;
                    466:         }
                    467: 
                    468:       /* For an XCLASS there is always some additional data */
                    469: 
                    470:       if (*code == OP_XCLASS)
                    471:         {
                    472:         int ch;
                    473:         while ((ch = *ccode++) != XCL_END)
                    474:           {
                    475:           if (ch == XCL_PROP)
                    476:             {
                    477:             int ptype = *ccode++;
                    478:             int pvalue = *ccode++;
                    479:             fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
                    480:             }
                    481:           else if (ch == XCL_NOTPROP)
                    482:             {
                    483:             int ptype = *ccode++;
                    484:             int pvalue = *ccode++;
                    485:             fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
                    486:             }
                    487:           else
                    488:             {
                    489:             ccode += 1 + print_char(f, ccode, TRUE);
                    490:             if (ch == XCL_RANGE)
                    491:               {
                    492:               fprintf(f, "-");
                    493:               ccode += 1 + print_char(f, ccode, TRUE);
                    494:               }
                    495:             }
                    496:           }
                    497:         }
                    498: 
                    499:       /* Indicate a non-UTF8 class which was created by negation */
                    500: 
                    501:       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
                    502: 
                    503:       /* Handle repeats after a class or a back reference */
                    504: 
                    505:       CLASS_REF_REPEAT:
                    506:       switch(*ccode)
                    507:         {
                    508:         case OP_CRSTAR:
                    509:         case OP_CRMINSTAR:
                    510:         case OP_CRPLUS:
                    511:         case OP_CRMINPLUS:
                    512:         case OP_CRQUERY:
                    513:         case OP_CRMINQUERY:
                    514:         fprintf(f, "%s", OP_names[*ccode]);
                    515:         extra += _pcre_OP_lengths[*ccode];
                    516:         break;
                    517: 
                    518:         case OP_CRRANGE:
                    519:         case OP_CRMINRANGE:
                    520:         min = GET2(ccode,1);
                    521:         max = GET2(ccode,3);
                    522:         if (max == 0) fprintf(f, "{%d,}", min);
                    523:         else fprintf(f, "{%d,%d}", min, max);
                    524:         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
                    525:         extra += _pcre_OP_lengths[*ccode];
                    526:         break;
                    527: 
                    528:         /* Do nothing if it's not a repeat; this code stops picky compilers
                    529:         warning about the lack of a default code path. */
                    530: 
                    531:         default:
                    532:         break;
                    533:         }
                    534:       }
                    535:     break;
                    536: 
                    537:     case OP_MARK:
                    538:     case OP_PRUNE_ARG:
                    539:     case OP_SKIP_ARG:
                    540:     fprintf(f, "    %s %s", OP_names[*code], code + 2);
                    541:     extra += code[1];
                    542:     break;
                    543: 
                    544:     case OP_THEN:
                    545:     if (print_lengths)
                    546:       fprintf(f, "    %s %d", OP_names[*code], GET(code, 1));
                    547:     else
                    548:       fprintf(f, "    %s", OP_names[*code]);
                    549:     break;
                    550: 
                    551:     case OP_THEN_ARG:
                    552:     if (print_lengths)
                    553:       fprintf(f, "    %s %d %s", OP_names[*code], GET(code, 1),
                    554:         code + 2 + LINK_SIZE);
                    555:     else
                    556:       fprintf(f, "    %s %s", OP_names[*code], code + 2 + LINK_SIZE);
                    557:     extra += code[1+LINK_SIZE];
                    558:     break;
                    559: 
                    560:     /* Anything else is just an item with no data*/
                    561: 
                    562:     default:
                    563:     fprintf(f, "    %s", OP_names[*code]);
                    564:     break;
                    565:     }
                    566: 
                    567:   code += _pcre_OP_lengths[*code] + extra;
                    568:   fprintf(f, "\n");
                    569:   }
                    570: }
                    571: 
                    572: /* End of pcre_printint.src */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>