File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_printint.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:04 2014 UTC (9 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language.
    7: 
    8:                        Written by Philip Hazel
    9:            Copyright (c) 1997-2012 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: 
   41: /* This module contains a PCRE private debugging function for printing out the
   42: internal form of a compiled regular expression, along with some supporting
   43: local functions. This source file is used in two places:
   44: 
   45: (1) It is #included by pcre_compile.c when it is compiled in debugging mode
   46: (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
   47: compiles. In this case PCRE_INCLUDED is defined.
   48: 
   49: (2) It is also compiled separately and linked with pcretest.c, which can be
   50: asked to print out a compiled regex for debugging purposes. */
   51: 
   52: #ifndef PCRE_INCLUDED
   53: 
   54: #ifdef HAVE_CONFIG_H
   55: #include "config.h"
   56: #endif
   57: 
   58: /* For pcretest program. */
   59: #define PRIV(name) name
   60: 
   61: /* We have to include pcre_internal.h because we need the internal info for
   62: displaying the results of pcre_study() and we also need to know about the
   63: internal macros, structures, and other internal data values; pcretest has
   64: "inside information" compared to a program that strictly follows the PCRE API.
   65: 
   66: Although pcre_internal.h does itself include pcre.h, we explicitly include it
   67: here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
   68: appropriately for an application, not for building PCRE. */
   69: 
   70: #include "pcre.h"
   71: #include "pcre_internal.h"
   72: 
   73: /* These are the funtions that are contained within. It doesn't seem worth
   74: having a separate .h file just for this. */
   75: 
   76: #endif /* PCRE_INCLUDED */
   77: 
   78: #ifdef PCRE_INCLUDED
   79: static /* Keep the following function as private. */
   80: #endif
   81: 
   82: #if defined COMPILE_PCRE8
   83: void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
   84: #elif defined COMPILE_PCRE16
   85: void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
   86: #elif defined COMPILE_PCRE32
   87: void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
   88: #endif
   89: 
   90: /* Macro that decides whether a character should be output as a literal or in
   91: hexadecimal. We don't use isprint() because that can vary from system to system
   92: (even without the use of locales) and we want the output always to be the same,
   93: for testing purposes. */
   94: 
   95: #ifdef EBCDIC
   96: #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
   97: #else
   98: #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
   99: #endif
  100: 
  101: /* The table of operator names. */
  102: 
  103: static const char *priv_OP_names[] = { OP_NAME_LIST };
  104: 
  105: /* This table of operator lengths is not actually used by the working code,
  106: but its size is needed for a check that ensures it is the correct size for the
  107: number of opcodes (thus catching update omissions). */
  108: 
  109: static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
  110: 
  111: 
  112: 
  113: /*************************************************
  114: *       Print single- or multi-byte character    *
  115: *************************************************/
  116: 
  117: static unsigned int
  118: print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
  119: {
  120: pcre_uint32 c = *ptr;
  121: 
  122: #ifndef SUPPORT_UTF
  123: 
  124: (void)utf;  /* Avoid compiler warning */
  125: if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  126: else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  127: else fprintf(f, "\\x{%x}", c);
  128: return 0;
  129: 
  130: #else
  131: 
  132: #if defined COMPILE_PCRE8
  133: 
  134: if (!utf || (c & 0xc0) != 0xc0)
  135:   {
  136:   if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  137:   else if (c < 0x80) fprintf(f, "\\x%02x", c);
  138:   else fprintf(f, "\\x{%02x}", c);
  139:   return 0;
  140:   }
  141: else
  142:   {
  143:   int i;
  144:   int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
  145:   int s = 6*a;
  146:   c = (c & PRIV(utf8_table3)[a]) << s;
  147:   for (i = 1; i <= a; i++)
  148:     {
  149:     /* This is a check for malformed UTF-8; it should only occur if the sanity
  150:     check has been turned off. Rather than swallow random bytes, just stop if
  151:     we hit a bad one. Print it with \X instead of \x as an indication. */
  152: 
  153:     if ((ptr[i] & 0xc0) != 0x80)
  154:       {
  155:       fprintf(f, "\\X{%x}", c);
  156:       return i - 1;
  157:       }
  158: 
  159:     /* The byte is OK */
  160: 
  161:     s -= 6;
  162:     c |= (ptr[i] & 0x3f) << s;
  163:     }
  164:   fprintf(f, "\\x{%x}", c);
  165:   return a;
  166:   }
  167: 
  168: #elif defined COMPILE_PCRE16
  169: 
  170: if (!utf || (c & 0xfc00) != 0xd800)
  171:   {
  172:   if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  173:   else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  174:   else fprintf(f, "\\x{%02x}", c);
  175:   return 0;
  176:   }
  177: else
  178:   {
  179:   /* This is a check for malformed UTF-16; it should only occur if the sanity
  180:   check has been turned off. Rather than swallow a low surrogate, just stop if
  181:   we hit a bad one. Print it with \X instead of \x as an indication. */
  182: 
  183:   if ((ptr[1] & 0xfc00) != 0xdc00)
  184:     {
  185:     fprintf(f, "\\X{%x}", c);
  186:     return 0;
  187:     }
  188: 
  189:   c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
  190:   fprintf(f, "\\x{%x}", c);
  191:   return 1;
  192:   }
  193: 
  194: #elif defined COMPILE_PCRE32
  195: 
  196: if (!utf || (c & 0xfffff800u) != 0xd800u)
  197:   {
  198:   if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  199:   else if (c <= 0x80) fprintf(f, "\\x%02x", c);
  200:   else fprintf(f, "\\x{%x}", c);
  201:   return 0;
  202:   }
  203: else
  204:   {
  205:   /* This is a check for malformed UTF-32; it should only occur if the sanity
  206:   check has been turned off. Rather than swallow a surrogate, just stop if
  207:   we hit one. Print it with \X instead of \x as an indication. */
  208:   fprintf(f, "\\X{%x}", c);
  209:   return 0;
  210:   }
  211: 
  212: #endif /* COMPILE_PCRE[8|16|32] */
  213: 
  214: #endif /* SUPPORT_UTF */
  215: }
  216: 
  217: /*************************************************
  218: *  Print uchar string (regardless of utf)        *
  219: *************************************************/
  220: 
  221: static void
  222: print_puchar(FILE *f, PCRE_PUCHAR ptr)
  223: {
  224: while (*ptr != '\0')
  225:   {
  226:   register pcre_uint32 c = *ptr++;
  227:   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
  228:   }
  229: }
  230: 
  231: /*************************************************
  232: *          Find Unicode property name            *
  233: *************************************************/
  234: 
  235: static const char *
  236: get_ucpname(unsigned int ptype, unsigned int pvalue)
  237: {
  238: #ifdef SUPPORT_UCP
  239: int i;
  240: for (i = PRIV(utt_size) - 1; i >= 0; i--)
  241:   {
  242:   if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
  243:   }
  244: return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
  245: #else
  246: /* It gets harder and harder to shut off unwanted compiler warnings. */
  247: ptype = ptype * pvalue;
  248: return (ptype == pvalue)? "??" : "??";
  249: #endif
  250: }
  251: 
  252: 
  253: /*************************************************
  254: *       Print Unicode property value             *
  255: *************************************************/
  256: 
  257: /* "Normal" properties can be printed from tables. The PT_CLIST property is a
  258: pseudo-property that contains a pointer to a list of case-equivalent
  259: characters. This is used only when UCP support is available and UTF mode is
  260: selected. It should never occur otherwise, but just in case it does, have
  261: something ready to print. */
  262: 
  263: static void
  264: print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
  265: {
  266: if (code[1] != PT_CLIST)
  267:   {
  268:   fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
  269:     code[2]), after);
  270:   }
  271: else
  272:   {
  273:   const char *not = (*code == OP_PROP)? "" : "not ";
  274: #ifndef SUPPORT_UCP
  275:   fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
  276: #else
  277:   const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
  278:   fprintf (f, "%s%sclist", before, not);
  279:   while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
  280:   fprintf(f, "%s", after);
  281: #endif
  282:   }
  283: }
  284: 
  285: 
  286: 
  287: 
  288: /*************************************************
  289: *         Print compiled regex                   *
  290: *************************************************/
  291: 
  292: /* Make this function work for a regex with integers either byte order.
  293: However, we assume that what we are passed is a compiled regex. The
  294: print_lengths flag controls whether offsets and lengths of items are printed.
  295: They can be turned off from pcretest so that automatic tests on bytecode can be
  296: written that do not depend on the value of LINK_SIZE. */
  297: 
  298: #ifdef PCRE_INCLUDED
  299: static /* Keep the following function as private. */
  300: #endif
  301: #if defined COMPILE_PCRE8
  302: void
  303: pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  304: #elif defined COMPILE_PCRE16
  305: void
  306: pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  307: #elif defined COMPILE_PCRE32
  308: void
  309: pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
  310: #endif
  311: {
  312: REAL_PCRE *re = (REAL_PCRE *)external_re;
  313: pcre_uchar *codestart, *code;
  314: BOOL utf;
  315: 
  316: unsigned int options = re->options;
  317: int offset = re->name_table_offset;
  318: int count = re->name_count;
  319: int size = re->name_entry_size;
  320: 
  321: if (re->magic_number != MAGIC_NUMBER)
  322:   {
  323:   offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
  324:   count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
  325:   size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
  326:   options = ((options << 24) & 0xff000000) |
  327:             ((options <<  8) & 0x00ff0000) |
  328:             ((options >>  8) & 0x0000ff00) |
  329:             ((options >> 24) & 0x000000ff);
  330:   }
  331: 
  332: code = codestart = (pcre_uchar *)re + offset + count * size;
  333: /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
  334: utf = (options & PCRE_UTF8) != 0;
  335: 
  336: for(;;)
  337:   {
  338:   pcre_uchar *ccode;
  339:   const char *flag = "  ";
  340:   pcre_uint32 c;
  341:   unsigned int extra = 0;
  342: 
  343:   if (print_lengths)
  344:     fprintf(f, "%3d ", (int)(code - codestart));
  345:   else
  346:     fprintf(f, "    ");
  347: 
  348:   switch(*code)
  349:     {
  350: /* ========================================================================== */
  351:       /* These cases are never obeyed. This is a fudge that causes a compile-
  352:       time error if the vectors OP_names or OP_lengths, which are indexed
  353:       by opcode, are not the correct length. It seems to be the only way to do
  354:       such a check at compile time, as the sizeof() operator does not work in
  355:       the C preprocessor. */
  356: 
  357:       case OP_TABLE_LENGTH:
  358:       case OP_TABLE_LENGTH +
  359:         ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
  360:         (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
  361:       break;
  362: /* ========================================================================== */
  363: 
  364:     case OP_END:
  365:     fprintf(f, "    %s\n", priv_OP_names[*code]);
  366:     fprintf(f, "------------------------------------------------------------------\n");
  367:     return;
  368: 
  369:     case OP_CHAR:
  370:     fprintf(f, "    ");
  371:     do
  372:       {
  373:       code++;
  374:       code += 1 + print_char(f, code, utf);
  375:       }
  376:     while (*code == OP_CHAR);
  377:     fprintf(f, "\n");
  378:     continue;
  379: 
  380:     case OP_CHARI:
  381:     fprintf(f, " /i ");
  382:     do
  383:       {
  384:       code++;
  385:       code += 1 + print_char(f, code, utf);
  386:       }
  387:     while (*code == OP_CHARI);
  388:     fprintf(f, "\n");
  389:     continue;
  390: 
  391:     case OP_CBRA:
  392:     case OP_CBRAPOS:
  393:     case OP_SCBRA:
  394:     case OP_SCBRAPOS:
  395:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  396:       else fprintf(f, "    ");
  397:     fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
  398:     break;
  399: 
  400:     case OP_BRA:
  401:     case OP_BRAPOS:
  402:     case OP_SBRA:
  403:     case OP_SBRAPOS:
  404:     case OP_KETRMAX:
  405:     case OP_KETRMIN:
  406:     case OP_KETRPOS:
  407:     case OP_ALT:
  408:     case OP_KET:
  409:     case OP_ASSERT:
  410:     case OP_ASSERT_NOT:
  411:     case OP_ASSERTBACK:
  412:     case OP_ASSERTBACK_NOT:
  413:     case OP_ONCE:
  414:     case OP_ONCE_NC:
  415:     case OP_COND:
  416:     case OP_SCOND:
  417:     case OP_REVERSE:
  418:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  419:       else fprintf(f, "    ");
  420:     fprintf(f, "%s", priv_OP_names[*code]);
  421:     break;
  422: 
  423:     case OP_CLOSE:
  424:     fprintf(f, "    %s %d", priv_OP_names[*code], GET2(code, 1));
  425:     break;
  426: 
  427:     case OP_CREF:
  428:     fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
  429:     break;
  430: 
  431:     case OP_DNCREF:
  432:       {
  433:       pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  434:         IMM2_SIZE;
  435:       fprintf(f, " %s Cond ref <", flag);
  436:       print_puchar(f, entry);
  437:       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  438:       }
  439:     break;
  440: 
  441:     case OP_RREF:
  442:     c = GET2(code, 1);
  443:     if (c == RREF_ANY)
  444:       fprintf(f, "    Cond recurse any");
  445:     else
  446:       fprintf(f, "    Cond recurse %d", c);
  447:     break;
  448: 
  449:     case OP_DNRREF:
  450:       {
  451:       pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  452:         IMM2_SIZE;
  453:       fprintf(f, " %s Cond recurse <", flag);
  454:       print_puchar(f, entry);
  455:       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  456:       }
  457:     break;
  458: 
  459:     case OP_DEF:
  460:     fprintf(f, "    Cond def");
  461:     break;
  462: 
  463:     case OP_STARI:
  464:     case OP_MINSTARI:
  465:     case OP_POSSTARI:
  466:     case OP_PLUSI:
  467:     case OP_MINPLUSI:
  468:     case OP_POSPLUSI:
  469:     case OP_QUERYI:
  470:     case OP_MINQUERYI:
  471:     case OP_POSQUERYI:
  472:     flag = "/i";
  473:     /* Fall through */
  474:     case OP_STAR:
  475:     case OP_MINSTAR:
  476:     case OP_POSSTAR:
  477:     case OP_PLUS:
  478:     case OP_MINPLUS:
  479:     case OP_POSPLUS:
  480:     case OP_QUERY:
  481:     case OP_MINQUERY:
  482:     case OP_POSQUERY:
  483:     case OP_TYPESTAR:
  484:     case OP_TYPEMINSTAR:
  485:     case OP_TYPEPOSSTAR:
  486:     case OP_TYPEPLUS:
  487:     case OP_TYPEMINPLUS:
  488:     case OP_TYPEPOSPLUS:
  489:     case OP_TYPEQUERY:
  490:     case OP_TYPEMINQUERY:
  491:     case OP_TYPEPOSQUERY:
  492:     fprintf(f, " %s ", flag);
  493:     if (*code >= OP_TYPESTAR)
  494:       {
  495:       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
  496:         {
  497:         print_prop(f, code + 1, "", " ");
  498:         extra = 2;
  499:         }
  500:       else fprintf(f, "%s", priv_OP_names[code[1]]);
  501:       }
  502:     else extra = print_char(f, code+1, utf);
  503:     fprintf(f, "%s", priv_OP_names[*code]);
  504:     break;
  505: 
  506:     case OP_EXACTI:
  507:     case OP_UPTOI:
  508:     case OP_MINUPTOI:
  509:     case OP_POSUPTOI:
  510:     flag = "/i";
  511:     /* Fall through */
  512:     case OP_EXACT:
  513:     case OP_UPTO:
  514:     case OP_MINUPTO:
  515:     case OP_POSUPTO:
  516:     fprintf(f, " %s ", flag);
  517:     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  518:     fprintf(f, "{");
  519:     if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
  520:     fprintf(f, "%d}", GET2(code,1));
  521:     if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
  522:       else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
  523:     break;
  524: 
  525:     case OP_TYPEEXACT:
  526:     case OP_TYPEUPTO:
  527:     case OP_TYPEMINUPTO:
  528:     case OP_TYPEPOSUPTO:
  529:     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
  530:       {
  531:       print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
  532:       extra = 2;
  533:       }
  534:     else fprintf(f, "    %s", priv_OP_names[code[1 + IMM2_SIZE]]);
  535:     fprintf(f, "{");
  536:     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
  537:     fprintf(f, "%d}", GET2(code,1));
  538:     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
  539:       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
  540:     break;
  541: 
  542:     case OP_NOTI:
  543:     flag = "/i";
  544:     /* Fall through */
  545:     case OP_NOT:
  546:     fprintf(f, " %s [^", flag);
  547:     extra = print_char(f, code + 1, utf);
  548:     fprintf(f, "]");
  549:     break;
  550: 
  551:     case OP_NOTSTARI:
  552:     case OP_NOTMINSTARI:
  553:     case OP_NOTPOSSTARI:
  554:     case OP_NOTPLUSI:
  555:     case OP_NOTMINPLUSI:
  556:     case OP_NOTPOSPLUSI:
  557:     case OP_NOTQUERYI:
  558:     case OP_NOTMINQUERYI:
  559:     case OP_NOTPOSQUERYI:
  560:     flag = "/i";
  561:     /* Fall through */
  562: 
  563:     case OP_NOTSTAR:
  564:     case OP_NOTMINSTAR:
  565:     case OP_NOTPOSSTAR:
  566:     case OP_NOTPLUS:
  567:     case OP_NOTMINPLUS:
  568:     case OP_NOTPOSPLUS:
  569:     case OP_NOTQUERY:
  570:     case OP_NOTMINQUERY:
  571:     case OP_NOTPOSQUERY:
  572:     fprintf(f, " %s [^", flag);
  573:     extra = print_char(f, code + 1, utf);
  574:     fprintf(f, "]%s", priv_OP_names[*code]);
  575:     break;
  576: 
  577:     case OP_NOTEXACTI:
  578:     case OP_NOTUPTOI:
  579:     case OP_NOTMINUPTOI:
  580:     case OP_NOTPOSUPTOI:
  581:     flag = "/i";
  582:     /* Fall through */
  583: 
  584:     case OP_NOTEXACT:
  585:     case OP_NOTUPTO:
  586:     case OP_NOTMINUPTO:
  587:     case OP_NOTPOSUPTO:
  588:     fprintf(f, " %s [^", flag);
  589:     extra = print_char(f, code + 1 + IMM2_SIZE, utf);
  590:     fprintf(f, "]{");
  591:     if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
  592:     fprintf(f, "%d}", GET2(code,1));
  593:     if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
  594:       else
  595:     if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
  596:     break;
  597: 
  598:     case OP_RECURSE:
  599:     if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
  600:       else fprintf(f, "    ");
  601:     fprintf(f, "%s", priv_OP_names[*code]);
  602:     break;
  603: 
  604:     case OP_REFI:
  605:     flag = "/i";
  606:     /* Fall through */
  607:     case OP_REF:
  608:     fprintf(f, " %s \\%d", flag, GET2(code,1));
  609:     ccode = code + priv_OP_lengths[*code];
  610:     goto CLASS_REF_REPEAT;
  611: 
  612:     case OP_DNREFI:
  613:     flag = "/i";
  614:     /* Fall through */
  615:     case OP_DNREF:
  616:       {
  617:       pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
  618:         IMM2_SIZE;
  619:       fprintf(f, " %s \\k<", flag);
  620:       print_puchar(f, entry);
  621:       fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
  622:       }
  623:     ccode = code + priv_OP_lengths[*code];
  624:     goto CLASS_REF_REPEAT;
  625: 
  626:     case OP_CALLOUT:
  627:     fprintf(f, "    %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
  628:       GET(code, 2 + LINK_SIZE));
  629:     break;
  630: 
  631:     case OP_PROP:
  632:     case OP_NOTPROP:
  633:     print_prop(f, code, "    ", "");
  634:     break;
  635: 
  636:     /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
  637:     in having this code always here, and it makes it less messy without all
  638:     those #ifdefs. */
  639: 
  640:     case OP_CLASS:
  641:     case OP_NCLASS:
  642:     case OP_XCLASS:
  643:       {
  644:       int i;
  645:       unsigned int min, max;
  646:       BOOL printmap;
  647:       pcre_uint8 *map;
  648: 
  649:       fprintf(f, "    [");
  650: 
  651:       if (*code == OP_XCLASS)
  652:         {
  653:         extra = GET(code, 1);
  654:         ccode = code + LINK_SIZE + 1;
  655:         printmap = (*ccode & XCL_MAP) != 0;
  656:         if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
  657:         }
  658:       else
  659:         {
  660:         printmap = TRUE;
  661:         ccode = code + 1;
  662:         }
  663: 
  664:       /* Print a bit map */
  665: 
  666:       if (printmap)
  667:         {
  668:         map = (pcre_uint8 *)ccode;
  669:         for (i = 0; i < 256; i++)
  670:           {
  671:           if ((map[i/8] & (1 << (i&7))) != 0)
  672:             {
  673:             int j;
  674:             for (j = i+1; j < 256; j++)
  675:               if ((map[j/8] & (1 << (j&7))) == 0) break;
  676:             if (i == '-' || i == ']') fprintf(f, "\\");
  677:             if (PRINTABLE(i)) fprintf(f, "%c", i);
  678:               else fprintf(f, "\\x%02x", i);
  679:             if (--j > i)
  680:               {
  681:               if (j != i + 1) fprintf(f, "-");
  682:               if (j == '-' || j == ']') fprintf(f, "\\");
  683:               if (PRINTABLE(j)) fprintf(f, "%c", j);
  684:                 else fprintf(f, "\\x%02x", j);
  685:               }
  686:             i = j;
  687:             }
  688:           }
  689:         ccode += 32 / sizeof(pcre_uchar);
  690:         }
  691: 
  692:       /* For an XCLASS there is always some additional data */
  693: 
  694:       if (*code == OP_XCLASS)
  695:         {
  696:         pcre_uchar ch;
  697:         while ((ch = *ccode++) != XCL_END)
  698:           {
  699:           BOOL not = FALSE;
  700:           const char *notch = "";
  701: 
  702:           switch(ch)
  703:             {
  704:             case XCL_NOTPROP:
  705:             not = TRUE;
  706:             notch = "^";
  707:             /* Fall through */
  708: 
  709:             case XCL_PROP:
  710:               {
  711:               unsigned int ptype = *ccode++;
  712:               unsigned int pvalue = *ccode++;
  713: 
  714:               switch(ptype)
  715:                 {
  716:                 case PT_PXGRAPH:
  717:                 fprintf(f, "[:%sgraph:]", notch);
  718:                 break;
  719: 
  720:                 case PT_PXPRINT:
  721:                 fprintf(f, "[:%sprint:]", notch);
  722:                 break;
  723: 
  724:                 case PT_PXPUNCT:
  725:                 fprintf(f, "[:%spunct:]", notch);
  726:                 break;
  727: 
  728:                 default:
  729:                 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
  730:                   get_ucpname(ptype, pvalue));
  731:                 break;
  732:                 }
  733:               }
  734:             break;
  735: 
  736:             default:
  737:             ccode += 1 + print_char(f, ccode, utf);
  738:             if (ch == XCL_RANGE)
  739:               {
  740:               fprintf(f, "-");
  741:               ccode += 1 + print_char(f, ccode, utf);
  742:               }
  743:             break;
  744:             }
  745:           }
  746:         }
  747: 
  748:       /* Indicate a non-UTF class which was created by negation */
  749: 
  750:       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
  751: 
  752:       /* Handle repeats after a class or a back reference */
  753: 
  754:       CLASS_REF_REPEAT:
  755:       switch(*ccode)
  756:         {
  757:         case OP_CRSTAR:
  758:         case OP_CRMINSTAR:
  759:         case OP_CRPLUS:
  760:         case OP_CRMINPLUS:
  761:         case OP_CRQUERY:
  762:         case OP_CRMINQUERY:
  763:         case OP_CRPOSSTAR:
  764:         case OP_CRPOSPLUS:
  765:         case OP_CRPOSQUERY:
  766:         fprintf(f, "%s", priv_OP_names[*ccode]);
  767:         extra += priv_OP_lengths[*ccode];
  768:         break;
  769: 
  770:         case OP_CRRANGE:
  771:         case OP_CRMINRANGE:
  772:         case OP_CRPOSRANGE:
  773:         min = GET2(ccode,1);
  774:         max = GET2(ccode,1 + IMM2_SIZE);
  775:         if (max == 0) fprintf(f, "{%u,}", min);
  776:         else fprintf(f, "{%u,%u}", min, max);
  777:         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
  778:         else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
  779:         extra += priv_OP_lengths[*ccode];
  780:         break;
  781: 
  782:         /* Do nothing if it's not a repeat; this code stops picky compilers
  783:         warning about the lack of a default code path. */
  784: 
  785:         default:
  786:         break;
  787:         }
  788:       }
  789:     break;
  790: 
  791:     case OP_MARK:
  792:     case OP_PRUNE_ARG:
  793:     case OP_SKIP_ARG:
  794:     case OP_THEN_ARG:
  795:     fprintf(f, "    %s ", priv_OP_names[*code]);
  796:     print_puchar(f, code + 2);
  797:     extra += code[1];
  798:     break;
  799: 
  800:     case OP_THEN:
  801:     fprintf(f, "    %s", priv_OP_names[*code]);
  802:     break;
  803: 
  804:     case OP_CIRCM:
  805:     case OP_DOLLM:
  806:     flag = "/m";
  807:     /* Fall through */
  808: 
  809:     /* Anything else is just an item with no data, but possibly a flag. */
  810: 
  811:     default:
  812:     fprintf(f, " %s %s", flag, priv_OP_names[*code]);
  813:     break;
  814:     }
  815: 
  816:   code += priv_OP_lengths[*code] + extra;
  817:   fprintf(f, "\n");
  818:   }
  819: }
  820: 
  821: /* End of pcre_printint.src */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>