File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_internal.h
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:51 2012 UTC (12 years, 4 months ago) by misho
CVS tags: MAIN, HEAD
Initial revision

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: 
    6: /* PCRE is a library of functions to support regular expressions whose syntax
    7: and semantics are as close as possible to those of the Perl 5 language.
    8: 
    9:                        Written by Philip Hazel
   10:            Copyright (c) 1997-2011 University of Cambridge
   11: 
   12: -----------------------------------------------------------------------------
   13: Redistribution and use in source and binary forms, with or without
   14: modification, are permitted provided that the following conditions are met:
   15: 
   16:     * Redistributions of source code must retain the above copyright notice,
   17:       this list of conditions and the following disclaimer.
   18: 
   19:     * Redistributions in binary form must reproduce the above copyright
   20:       notice, this list of conditions and the following disclaimer in the
   21:       documentation and/or other materials provided with the distribution.
   22: 
   23:     * Neither the name of the University of Cambridge nor the names of its
   24:       contributors may be used to endorse or promote products derived from
   25:       this software without specific prior written permission.
   26: 
   27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37: POSSIBILITY OF SUCH DAMAGE.
   38: -----------------------------------------------------------------------------
   39: */
   40: 
   41: /* This header contains definitions that are shared between the different
   42: modules, but which are not relevant to the exported API. This includes some
   43: functions whose names all begin with "_pcre_". */
   44: 
   45: #ifndef PCRE_INTERNAL_H
   46: #define PCRE_INTERNAL_H
   47: 
   48: /* Define PCRE_DEBUG to get debugging output on stdout. */
   49: 
   50: #if 0
   51: #define PCRE_DEBUG
   52: #endif
   53: 
   54: /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
   55: script prevents both being selected, but not everybody uses "configure". */
   56: 
   57: #if defined EBCDIC && defined SUPPORT_UTF8
   58: #error The use of both EBCDIC and SUPPORT_UTF8 is not supported.
   59: #endif
   60: 
   61: /* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
   62: "configure" script ensures this, but not everybody uses "configure". */
   63: 
   64: #if defined SUPPORT_UCP && !defined SUPPORT_UTF8
   65: #define SUPPORT_UTF8 1
   66: #endif
   67: 
   68: /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
   69: inline, and there are *still* stupid compilers about that don't like indented
   70: pre-processor statements, or at least there were when I first wrote this. After
   71: all, it had only been about 10 years then...
   72: 
   73: It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
   74: be absolutely sure we get our version. */
   75: 
   76: #undef DPRINTF
   77: #ifdef PCRE_DEBUG
   78: #define DPRINTF(p) printf p
   79: #else
   80: #define DPRINTF(p) /* Nothing */
   81: #endif
   82: 
   83: 
   84: /* Standard C headers plus the external interface definition. The only time
   85: setjmp and stdarg are used is when NO_RECURSE is set. */
   86: 
   87: #include <ctype.h>
   88: #include <limits.h>
   89: #include <stddef.h>
   90: #include <stdio.h>
   91: #include <stdlib.h>
   92: #include <string.h>
   93: 
   94: /* When compiling a DLL for Windows, the exported symbols have to be declared
   95: using some MS magic. I found some useful information on this web page:
   96: http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
   97: information there, using __declspec(dllexport) without "extern" we have a
   98: definition; with "extern" we have a declaration. The settings here override the
   99: setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
  100: which is all that is needed for applications (they just import the symbols). We
  101: use:
  102: 
  103:   PCRE_EXP_DECL       for declarations
  104:   PCRE_EXP_DEFN       for definitions of exported functions
  105:   PCRE_EXP_DATA_DEFN  for definitions of exported variables
  106: 
  107: The reason for the two DEFN macros is that in non-Windows environments, one
  108: does not want to have "extern" before variable definitions because it leads to
  109: compiler warnings. So we distinguish between functions and variables. In
  110: Windows, the two should always be the same.
  111: 
  112: The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
  113: which is an application, but needs to import this file in order to "peek" at
  114: internals, can #include pcre.h first to get an application's-eye view.
  115: 
  116: In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
  117: special-purpose environments) might want to stick other stuff in front of
  118: exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
  119: PCRE_EXP_DATA_DEFN only if they are not already set. */
  120: 
  121: #ifndef PCRE_EXP_DECL
  122: #  ifdef _WIN32
  123: #    ifndef PCRE_STATIC
  124: #      define PCRE_EXP_DECL       extern __declspec(dllexport)
  125: #      define PCRE_EXP_DEFN       __declspec(dllexport)
  126: #      define PCRE_EXP_DATA_DEFN  __declspec(dllexport)
  127: #    else
  128: #      define PCRE_EXP_DECL       extern
  129: #      define PCRE_EXP_DEFN
  130: #      define PCRE_EXP_DATA_DEFN
  131: #    endif
  132: #  else
  133: #    ifdef __cplusplus
  134: #      define PCRE_EXP_DECL       extern "C"
  135: #    else
  136: #      define PCRE_EXP_DECL       extern
  137: #    endif
  138: #    ifndef PCRE_EXP_DEFN
  139: #      define PCRE_EXP_DEFN       PCRE_EXP_DECL
  140: #    endif
  141: #    ifndef PCRE_EXP_DATA_DEFN
  142: #      define PCRE_EXP_DATA_DEFN
  143: #    endif
  144: #  endif
  145: #endif
  146: 
  147: /* When compiling with the MSVC compiler, it is sometimes necessary to include
  148: a "calling convention" before exported function names. (This is secondhand
  149: information; I know nothing about MSVC myself). For example, something like
  150: 
  151:   void __cdecl function(....)
  152: 
  153: might be needed. In order so make this easy, all the exported functions have
  154: PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
  155: set, we ensure here that it has no effect. */
  156: 
  157: #ifndef PCRE_CALL_CONVENTION
  158: #define PCRE_CALL_CONVENTION
  159: #endif
  160: 
  161: /* We need to have types that specify unsigned 16-bit and 32-bit integers. We
  162: cannot determine these outside the compilation (e.g. by running a program as
  163: part of "configure") because PCRE is often cross-compiled for use on other
  164: systems. Instead we make use of the maximum sizes that are available at
  165: preprocessor time in standard C environments. */
  166: 
  167: #if USHRT_MAX == 65535
  168:   typedef unsigned short pcre_uint16;
  169:   typedef short pcre_int16;
  170: #elif UINT_MAX == 65535
  171:   typedef unsigned int pcre_uint16;
  172:   typedef int pcre_int16;
  173: #else
  174:   #error Cannot determine a type for 16-bit unsigned integers
  175: #endif
  176: 
  177: #if UINT_MAX == 4294967295
  178:   typedef unsigned int pcre_uint32;
  179:   typedef int pcre_int32;
  180: #elif ULONG_MAX == 4294967295
  181:   typedef unsigned long int pcre_uint32;
  182:   typedef long int pcre_int32;
  183: #else
  184:   #error Cannot determine a type for 32-bit unsigned integers
  185: #endif
  186: 
  187: /* When checking for integer overflow in pcre_compile(), we need to handle
  188: large integers. If a 64-bit integer type is available, we can use that.
  189: Otherwise we have to cast to double, which of course requires floating point
  190: arithmetic. Handle this by defining a macro for the appropriate type. If
  191: stdint.h is available, include it; it may define INT64_MAX. Systems that do not
  192: have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
  193: by "configure". */
  194: 
  195: #if HAVE_STDINT_H
  196: #include <stdint.h>
  197: #elif HAVE_INTTYPES_H
  198: #include <inttypes.h>
  199: #endif
  200: 
  201: #if defined INT64_MAX || defined int64_t
  202: #define INT64_OR_DOUBLE int64_t
  203: #else
  204: #define INT64_OR_DOUBLE double
  205: #endif
  206: 
  207: /* All character handling must be done as unsigned characters. Otherwise there
  208: are problems with top-bit-set characters and functions such as isspace().
  209: However, we leave the interface to the outside world as char *, because that
  210: should make things easier for callers. We define a short type for unsigned char
  211: to save lots of typing. I tried "uchar", but it causes problems on Digital
  212: Unix, where it is defined in sys/types, so use "uschar" instead. */
  213: 
  214: typedef unsigned char uschar;
  215: 
  216: /* This is an unsigned int value that no character can ever have. UTF-8
  217: characters only go up to 0x7fffffff (though Unicode doesn't go beyond
  218: 0x0010ffff). */
  219: 
  220: #define NOTACHAR 0xffffffff
  221: 
  222: /* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
  223: "any" and "anycrlf" at present). The following macros are used to package up
  224: testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
  225: modules to indicate in which datablock the parameters exist, and what the
  226: start/end of string field names are. */
  227: 
  228: #define NLTYPE_FIXED    0     /* Newline is a fixed length string */
  229: #define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
  230: #define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
  231: 
  232: /* This macro checks for a newline at the given position */
  233: 
  234: #define IS_NEWLINE(p) \
  235:   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
  236:     ((p) < NLBLOCK->PSEND && \
  237:      _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
  238:        utf8)) \
  239:     : \
  240:     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
  241:      (p)[0] == NLBLOCK->nl[0] && \
  242:      (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
  243:     ) \
  244:   )
  245: 
  246: /* This macro checks for a newline immediately preceding the given position */
  247: 
  248: #define WAS_NEWLINE(p) \
  249:   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
  250:     ((p) > NLBLOCK->PSSTART && \
  251:      _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
  252:        &(NLBLOCK->nllen), utf8)) \
  253:     : \
  254:     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
  255:      (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
  256:      (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
  257:     ) \
  258:   )
  259: 
  260: /* When PCRE is compiled as a C++ library, the subject pointer can be replaced
  261: with a custom type. This makes it possible, for example, to allow pcre_exec()
  262: to process subject strings that are discontinuous by using a smart pointer
  263: class. It must always be possible to inspect all of the subject string in
  264: pcre_exec() because of the way it backtracks. Two macros are required in the
  265: normal case, for sign-unspecified and unsigned char pointers. The former is
  266: used for the external interface and appears in pcre.h, which is why its name
  267: must begin with PCRE_. */
  268: 
  269: #ifdef CUSTOM_SUBJECT_PTR
  270: #define PCRE_SPTR CUSTOM_SUBJECT_PTR
  271: #define USPTR CUSTOM_SUBJECT_PTR
  272: #else
  273: #define PCRE_SPTR const char *
  274: #define USPTR const unsigned char *
  275: #endif
  276: 
  277: 
  278: 
  279: /* Include the public PCRE header and the definitions of UCP character property
  280: values. */
  281: 
  282: #include "pcre.h"
  283: #include "ucp.h"
  284: 
  285: /* When compiling for use with the Virtual Pascal compiler, these functions
  286: need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
  287: option on the command line. */
  288: 
  289: #ifdef VPCOMPAT
  290: #define strlen(s)        _strlen(s)
  291: #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
  292: #define memcmp(s,c,n)    _memcmp(s,c,n)
  293: #define memcpy(d,s,n)    _memcpy(d,s,n)
  294: #define memmove(d,s,n)   _memmove(d,s,n)
  295: #define memset(s,c,n)    _memset(s,c,n)
  296: #else  /* VPCOMPAT */
  297: 
  298: /* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
  299: define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
  300: is set. Otherwise, include an emulating function for those systems that have
  301: neither (there some non-Unix environments where this is the case). */
  302: 
  303: #ifndef HAVE_MEMMOVE
  304: #undef  memmove        /* some systems may have a macro */
  305: #ifdef HAVE_BCOPY
  306: #define memmove(a, b, c) bcopy(b, a, c)
  307: #else  /* HAVE_BCOPY */
  308: static void *
  309: pcre_memmove(void *d, const void *s, size_t n)
  310: {
  311: size_t i;
  312: unsigned char *dest = (unsigned char *)d;
  313: const unsigned char *src = (const unsigned char *)s;
  314: if (dest > src)
  315:   {
  316:   dest += n;
  317:   src += n;
  318:   for (i = 0; i < n; ++i) *(--dest) = *(--src);
  319:   return (void *)dest;
  320:   }
  321: else
  322:   {
  323:   for (i = 0; i < n; ++i) *dest++ = *src++;
  324:   return (void *)(dest - n);
  325:   }
  326: }
  327: #define memmove(a, b, c) pcre_memmove(a, b, c)
  328: #endif   /* not HAVE_BCOPY */
  329: #endif   /* not HAVE_MEMMOVE */
  330: #endif   /* not VPCOMPAT */
  331: 
  332: 
  333: /* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
  334: in big-endian order) by default. These are used, for example, to link from the
  335: start of a subpattern to its alternatives and its end. The use of 2 bytes per
  336: offset limits the size of the compiled regex to around 64K, which is big enough
  337: for almost everybody. However, I received a request for an even bigger limit.
  338: For this reason, and also to make the code easier to maintain, the storing and
  339: loading of offsets from the byte string is now handled by the macros that are
  340: defined here.
  341: 
  342: The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
  343: the config.h file, but can be overridden by using -D on the command line. This
  344: is automated on Unix systems via the "configure" command. */
  345: 
  346: #if LINK_SIZE == 2
  347: 
  348: #define PUT(a,n,d)   \
  349:   (a[n] = (d) >> 8), \
  350:   (a[(n)+1] = (d) & 255)
  351: 
  352: #define GET(a,n) \
  353:   (((a)[n] << 8) | (a)[(n)+1])
  354: 
  355: #define MAX_PATTERN_SIZE (1 << 16)
  356: 
  357: 
  358: #elif LINK_SIZE == 3
  359: 
  360: #define PUT(a,n,d)       \
  361:   (a[n] = (d) >> 16),    \
  362:   (a[(n)+1] = (d) >> 8), \
  363:   (a[(n)+2] = (d) & 255)
  364: 
  365: #define GET(a,n) \
  366:   (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
  367: 
  368: #define MAX_PATTERN_SIZE (1 << 24)
  369: 
  370: 
  371: #elif LINK_SIZE == 4
  372: 
  373: #define PUT(a,n,d)        \
  374:   (a[n] = (d) >> 24),     \
  375:   (a[(n)+1] = (d) >> 16), \
  376:   (a[(n)+2] = (d) >> 8),  \
  377:   (a[(n)+3] = (d) & 255)
  378: 
  379: #define GET(a,n) \
  380:   (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
  381: 
  382: #define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */
  383: 
  384: 
  385: #else
  386: #error LINK_SIZE must be either 2, 3, or 4
  387: #endif
  388: 
  389: 
  390: /* Convenience macro defined in terms of the others */
  391: 
  392: #define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
  393: 
  394: 
  395: /* PCRE uses some other 2-byte quantities that do not change when the size of
  396: offsets changes. There are used for repeat counts and for other things such as
  397: capturing parenthesis numbers in back references. */
  398: 
  399: #define PUT2(a,n,d)   \
  400:   a[n] = (d) >> 8; \
  401:   a[(n)+1] = (d) & 255
  402: 
  403: #define GET2(a,n) \
  404:   (((a)[n] << 8) | (a)[(n)+1])
  405: 
  406: #define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2
  407: 
  408: 
  409: /* When UTF-8 encoding is being used, a character is no longer just a single
  410: byte. The macros for character handling generate simple sequences when used in
  411: byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
  412: not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
  413: never be called in byte mode. To make sure they can never even appear when
  414: UTF-8 support is omitted, we don't even define them. */
  415: 
  416: #ifndef SUPPORT_UTF8
  417: #define GETCHAR(c, eptr) c = *eptr;
  418: #define GETCHARTEST(c, eptr) c = *eptr;
  419: #define GETCHARINC(c, eptr) c = *eptr++;
  420: #define GETCHARINCTEST(c, eptr) c = *eptr++;
  421: #define GETCHARLEN(c, eptr, len) c = *eptr;
  422: /* #define GETCHARLENTEST(c, eptr, len) */
  423: /* #define BACKCHAR(eptr) */
  424: 
  425: #else   /* SUPPORT_UTF8 */
  426: 
  427: /* These macros were originally written in the form of loops that used data
  428: from the tables whose names start with _pcre_utf8_table. They were rewritten by
  429: a user so as not to use loops, because in some environments this gives a
  430: significant performance advantage, and it seems never to do any harm. */
  431: 
  432: /* Base macro to pick up the remaining bytes of a UTF-8 character, not
  433: advancing the pointer. */
  434: 
  435: #define GETUTF8(c, eptr) \
  436:     { \
  437:     if ((c & 0x20) == 0) \
  438:       c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
  439:     else if ((c & 0x10) == 0) \
  440:       c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
  441:     else if ((c & 0x08) == 0) \
  442:       c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
  443:       ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
  444:     else if ((c & 0x04) == 0) \
  445:       c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
  446:           ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
  447:           (eptr[4] & 0x3f); \
  448:     else \
  449:       c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
  450:           ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
  451:           ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
  452:     }
  453: 
  454: /* Get the next UTF-8 character, not advancing the pointer. This is called when
  455: we know we are in UTF-8 mode. */
  456: 
  457: #define GETCHAR(c, eptr) \
  458:   c = *eptr; \
  459:   if (c >= 0xc0) GETUTF8(c, eptr);
  460: 
  461: /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
  462: pointer. */
  463: 
  464: #define GETCHARTEST(c, eptr) \
  465:   c = *eptr; \
  466:   if (utf8 && c >= 0xc0) GETUTF8(c, eptr);
  467: 
  468: /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
  469: the pointer. */
  470: 
  471: #define GETUTF8INC(c, eptr) \
  472:     { \
  473:     if ((c & 0x20) == 0) \
  474:       c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \
  475:     else if ((c & 0x10) == 0) \
  476:       { \
  477:       c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \
  478:       eptr += 2; \
  479:       } \
  480:     else if ((c & 0x08) == 0) \
  481:       { \
  482:       c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \
  483:           ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
  484:       eptr += 3; \
  485:       } \
  486:     else if ((c & 0x04) == 0) \
  487:       { \
  488:       c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \
  489:           ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \
  490:           (eptr[3] & 0x3f); \
  491:       eptr += 4; \
  492:       } \
  493:     else \
  494:       { \
  495:       c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \
  496:           ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \
  497:           ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \
  498:       eptr += 5; \
  499:       } \
  500:     }
  501: 
  502: /* Get the next UTF-8 character, advancing the pointer. This is called when we
  503: know we are in UTF-8 mode. */
  504: 
  505: #define GETCHARINC(c, eptr) \
  506:   c = *eptr++; \
  507:   if (c >= 0xc0) GETUTF8INC(c, eptr);
  508: 
  509: /* Get the next character, testing for UTF-8 mode, and advancing the pointer.
  510: This is called when we don't know if we are in UTF-8 mode. */
  511: 
  512: #define GETCHARINCTEST(c, eptr) \
  513:   c = *eptr++; \
  514:   if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr);
  515: 
  516: /* Base macro to pick up the remaining bytes of a UTF-8 character, not
  517: advancing the pointer, incrementing the length. */
  518: 
  519: #define GETUTF8LEN(c, eptr, len) \
  520:     { \
  521:     if ((c & 0x20) == 0) \
  522:       { \
  523:       c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
  524:       len++; \
  525:       } \
  526:     else if ((c & 0x10)  == 0) \
  527:       { \
  528:       c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
  529:       len += 2; \
  530:       } \
  531:     else if ((c & 0x08)  == 0) \
  532:       {\
  533:       c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
  534:           ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
  535:       len += 3; \
  536:       } \
  537:     else if ((c & 0x04)  == 0) \
  538:       { \
  539:       c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
  540:           ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
  541:           (eptr[4] & 0x3f); \
  542:       len += 4; \
  543:       } \
  544:     else \
  545:       {\
  546:       c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
  547:           ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
  548:           ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
  549:       len += 5; \
  550:       } \
  551:     }
  552: 
  553: /* Get the next UTF-8 character, not advancing the pointer, incrementing length
  554: if there are extra bytes. This is called when we know we are in UTF-8 mode. */
  555: 
  556: #define GETCHARLEN(c, eptr, len) \
  557:   c = *eptr; \
  558:   if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
  559: 
  560: /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
  561: pointer, incrementing length if there are extra bytes. This is called when we
  562: do not know if we are in UTF-8 mode. */
  563: 
  564: #define GETCHARLENTEST(c, eptr, len) \
  565:   c = *eptr; \
  566:   if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len);
  567: 
  568: /* If the pointer is not at the start of a character, move it back until
  569: it is. This is called only in UTF-8 mode - we don't put a test within the macro
  570: because almost all calls are already within a block of UTF-8 only code. */
  571: 
  572: #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
  573: 
  574: #endif  /* SUPPORT_UTF8 */
  575: 
  576: 
  577: /* In case there is no definition of offsetof() provided - though any proper
  578: Standard C system should have one. */
  579: 
  580: #ifndef offsetof
  581: #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
  582: #endif
  583: 
  584: 
  585: /* Private flags containing information about the compiled regex. They used to
  586: live at the top end of the options word, but that got almost full, so now they
  587: are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
  588: the restrictions on partial matching have been lifted. It remains for backwards
  589: compatibility. */
  590: 
  591: #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
  592: #define PCRE_FIRSTSET      0x0002  /* first_byte is set */
  593: #define PCRE_REQCHSET      0x0004  /* req_byte is set */
  594: #define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
  595: #define PCRE_JCHANGED      0x0010  /* j option used in regex */
  596: #define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
  597: #define PCRE_HASTHEN       0x0040  /* pattern contains (*THEN) */
  598: 
  599: /* Flags for the "extra" block produced by pcre_study(). */
  600: 
  601: #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */
  602: #define PCRE_STUDY_MINLEN  0x0002  /* a minimum length field exists */
  603: 
  604: /* Masks for identifying the public options that are permitted at compile
  605: time, run time, or study time, respectively. */
  606: 
  607: #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
  608:                            PCRE_NEWLINE_ANYCRLF)
  609: 
  610: #define PUBLIC_COMPILE_OPTIONS \
  611:   (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
  612:    PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
  613:    PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
  614:    PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
  615:    PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
  616: 
  617: #define PUBLIC_EXEC_OPTIONS \
  618:   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
  619:    PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
  620:    PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
  621: 
  622: #define PUBLIC_DFA_EXEC_OPTIONS \
  623:   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
  624:    PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
  625:    PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
  626:    PCRE_NO_START_OPTIMIZE)
  627: 
  628: #define PUBLIC_STUDY_OPTIONS \
  629:    PCRE_STUDY_JIT_COMPILE
  630: 
  631: /* Magic number to provide a small check against being handed junk. Also used
  632: to detect whether a pattern was compiled on a host of different endianness. */
  633: 
  634: #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
  635: 
  636: /* Negative values for the firstchar and reqchar variables */
  637: 
  638: #define REQ_UNSET (-2)
  639: #define REQ_NONE  (-1)
  640: 
  641: /* The maximum remaining length of subject we are prepared to search for a
  642: req_byte match. */
  643: 
  644: #define REQ_BYTE_MAX 1000
  645: 
  646: /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
  647: variable-length repeat, or a anything other than literal characters. */
  648: 
  649: #define REQ_CASELESS 0x0100    /* indicates caselessness */
  650: #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
  651: 
  652: /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
  653: environments where these macros are defined elsewhere. Unfortunately, there
  654: is no way to do the same for the typedef. */
  655: 
  656: typedef int BOOL;
  657: 
  658: #ifndef FALSE
  659: #define FALSE   0
  660: #define TRUE    1
  661: #endif
  662: 
  663: /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
  664: character constants like '*' because the compiler would emit their EBCDIC code,
  665: which is different from their ASCII/UTF-8 code. Instead we define macros for
  666: the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
  667: is enabled. When UTF-8 support is not enabled, the definitions use character
  668: literals. Both character and string versions of each character are needed, and
  669: there are some longer strings as well.
  670: 
  671: This means that, on EBCDIC platforms, the PCRE library can handle either
  672: EBCDIC, or UTF-8, but not both. To support both in the same compiled library
  673: would need different lookups depending on whether PCRE_UTF8 was set or not.
  674: This would make it impossible to use characters in switch/case statements,
  675: which would reduce performance. For a theoretical use (which nobody has asked
  676: for) in a minority area (EBCDIC platforms), this is not sensible. Any
  677: application that did need both could compile two versions of the library, using
  678: macros to give the functions distinct names. */
  679: 
  680: #ifndef SUPPORT_UTF8
  681: 
  682: /* UTF-8 support is not enabled; use the platform-dependent character literals
  683: so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
  684: 
  685: #define CHAR_HT                     '\t'
  686: #define CHAR_VT                     '\v'
  687: #define CHAR_FF                     '\f'
  688: #define CHAR_CR                     '\r'
  689: #define CHAR_NL                     '\n'
  690: #define CHAR_BS                     '\b'
  691: #define CHAR_BEL                    '\a'
  692: #ifdef EBCDIC
  693: #define CHAR_ESC                    '\047'
  694: #define CHAR_DEL                    '\007'
  695: #else
  696: #define CHAR_ESC                    '\033'
  697: #define CHAR_DEL                    '\177'
  698: #endif
  699: 
  700: #define CHAR_SPACE                  ' '
  701: #define CHAR_EXCLAMATION_MARK       '!'
  702: #define CHAR_QUOTATION_MARK         '"'
  703: #define CHAR_NUMBER_SIGN            '#'
  704: #define CHAR_DOLLAR_SIGN            '$'
  705: #define CHAR_PERCENT_SIGN           '%'
  706: #define CHAR_AMPERSAND              '&'
  707: #define CHAR_APOSTROPHE             '\''
  708: #define CHAR_LEFT_PARENTHESIS       '('
  709: #define CHAR_RIGHT_PARENTHESIS      ')'
  710: #define CHAR_ASTERISK               '*'
  711: #define CHAR_PLUS                   '+'
  712: #define CHAR_COMMA                  ','
  713: #define CHAR_MINUS                  '-'
  714: #define CHAR_DOT                    '.'
  715: #define CHAR_SLASH                  '/'
  716: #define CHAR_0                      '0'
  717: #define CHAR_1                      '1'
  718: #define CHAR_2                      '2'
  719: #define CHAR_3                      '3'
  720: #define CHAR_4                      '4'
  721: #define CHAR_5                      '5'
  722: #define CHAR_6                      '6'
  723: #define CHAR_7                      '7'
  724: #define CHAR_8                      '8'
  725: #define CHAR_9                      '9'
  726: #define CHAR_COLON                  ':'
  727: #define CHAR_SEMICOLON              ';'
  728: #define CHAR_LESS_THAN_SIGN         '<'
  729: #define CHAR_EQUALS_SIGN            '='
  730: #define CHAR_GREATER_THAN_SIGN      '>'
  731: #define CHAR_QUESTION_MARK          '?'
  732: #define CHAR_COMMERCIAL_AT          '@'
  733: #define CHAR_A                      'A'
  734: #define CHAR_B                      'B'
  735: #define CHAR_C                      'C'
  736: #define CHAR_D                      'D'
  737: #define CHAR_E                      'E'
  738: #define CHAR_F                      'F'
  739: #define CHAR_G                      'G'
  740: #define CHAR_H                      'H'
  741: #define CHAR_I                      'I'
  742: #define CHAR_J                      'J'
  743: #define CHAR_K                      'K'
  744: #define CHAR_L                      'L'
  745: #define CHAR_M                      'M'
  746: #define CHAR_N                      'N'
  747: #define CHAR_O                      'O'
  748: #define CHAR_P                      'P'
  749: #define CHAR_Q                      'Q'
  750: #define CHAR_R                      'R'
  751: #define CHAR_S                      'S'
  752: #define CHAR_T                      'T'
  753: #define CHAR_U                      'U'
  754: #define CHAR_V                      'V'
  755: #define CHAR_W                      'W'
  756: #define CHAR_X                      'X'
  757: #define CHAR_Y                      'Y'
  758: #define CHAR_Z                      'Z'
  759: #define CHAR_LEFT_SQUARE_BRACKET    '['
  760: #define CHAR_BACKSLASH              '\\'
  761: #define CHAR_RIGHT_SQUARE_BRACKET   ']'
  762: #define CHAR_CIRCUMFLEX_ACCENT      '^'
  763: #define CHAR_UNDERSCORE             '_'
  764: #define CHAR_GRAVE_ACCENT           '`'
  765: #define CHAR_a                      'a'
  766: #define CHAR_b                      'b'
  767: #define CHAR_c                      'c'
  768: #define CHAR_d                      'd'
  769: #define CHAR_e                      'e'
  770: #define CHAR_f                      'f'
  771: #define CHAR_g                      'g'
  772: #define CHAR_h                      'h'
  773: #define CHAR_i                      'i'
  774: #define CHAR_j                      'j'
  775: #define CHAR_k                      'k'
  776: #define CHAR_l                      'l'
  777: #define CHAR_m                      'm'
  778: #define CHAR_n                      'n'
  779: #define CHAR_o                      'o'
  780: #define CHAR_p                      'p'
  781: #define CHAR_q                      'q'
  782: #define CHAR_r                      'r'
  783: #define CHAR_s                      's'
  784: #define CHAR_t                      't'
  785: #define CHAR_u                      'u'
  786: #define CHAR_v                      'v'
  787: #define CHAR_w                      'w'
  788: #define CHAR_x                      'x'
  789: #define CHAR_y                      'y'
  790: #define CHAR_z                      'z'
  791: #define CHAR_LEFT_CURLY_BRACKET     '{'
  792: #define CHAR_VERTICAL_LINE          '|'
  793: #define CHAR_RIGHT_CURLY_BRACKET    '}'
  794: #define CHAR_TILDE                  '~'
  795: 
  796: #define STR_HT                      "\t"
  797: #define STR_VT                      "\v"
  798: #define STR_FF                      "\f"
  799: #define STR_CR                      "\r"
  800: #define STR_NL                      "\n"
  801: #define STR_BS                      "\b"
  802: #define STR_BEL                     "\a"
  803: #ifdef EBCDIC
  804: #define STR_ESC                     "\047"
  805: #define STR_DEL                     "\007"
  806: #else
  807: #define STR_ESC                     "\033"
  808: #define STR_DEL                     "\177"
  809: #endif
  810: 
  811: #define STR_SPACE                   " "
  812: #define STR_EXCLAMATION_MARK        "!"
  813: #define STR_QUOTATION_MARK          "\""
  814: #define STR_NUMBER_SIGN             "#"
  815: #define STR_DOLLAR_SIGN             "$"
  816: #define STR_PERCENT_SIGN            "%"
  817: #define STR_AMPERSAND               "&"
  818: #define STR_APOSTROPHE              "'"
  819: #define STR_LEFT_PARENTHESIS        "("
  820: #define STR_RIGHT_PARENTHESIS       ")"
  821: #define STR_ASTERISK                "*"
  822: #define STR_PLUS                    "+"
  823: #define STR_COMMA                   ","
  824: #define STR_MINUS                   "-"
  825: #define STR_DOT                     "."
  826: #define STR_SLASH                   "/"
  827: #define STR_0                       "0"
  828: #define STR_1                       "1"
  829: #define STR_2                       "2"
  830: #define STR_3                       "3"
  831: #define STR_4                       "4"
  832: #define STR_5                       "5"
  833: #define STR_6                       "6"
  834: #define STR_7                       "7"
  835: #define STR_8                       "8"
  836: #define STR_9                       "9"
  837: #define STR_COLON                   ":"
  838: #define STR_SEMICOLON               ";"
  839: #define STR_LESS_THAN_SIGN          "<"
  840: #define STR_EQUALS_SIGN             "="
  841: #define STR_GREATER_THAN_SIGN       ">"
  842: #define STR_QUESTION_MARK           "?"
  843: #define STR_COMMERCIAL_AT           "@"
  844: #define STR_A                       "A"
  845: #define STR_B                       "B"
  846: #define STR_C                       "C"
  847: #define STR_D                       "D"
  848: #define STR_E                       "E"
  849: #define STR_F                       "F"
  850: #define STR_G                       "G"
  851: #define STR_H                       "H"
  852: #define STR_I                       "I"
  853: #define STR_J                       "J"
  854: #define STR_K                       "K"
  855: #define STR_L                       "L"
  856: #define STR_M                       "M"
  857: #define STR_N                       "N"
  858: #define STR_O                       "O"
  859: #define STR_P                       "P"
  860: #define STR_Q                       "Q"
  861: #define STR_R                       "R"
  862: #define STR_S                       "S"
  863: #define STR_T                       "T"
  864: #define STR_U                       "U"
  865: #define STR_V                       "V"
  866: #define STR_W                       "W"
  867: #define STR_X                       "X"
  868: #define STR_Y                       "Y"
  869: #define STR_Z                       "Z"
  870: #define STR_LEFT_SQUARE_BRACKET     "["
  871: #define STR_BACKSLASH               "\\"
  872: #define STR_RIGHT_SQUARE_BRACKET    "]"
  873: #define STR_CIRCUMFLEX_ACCENT       "^"
  874: #define STR_UNDERSCORE              "_"
  875: #define STR_GRAVE_ACCENT            "`"
  876: #define STR_a                       "a"
  877: #define STR_b                       "b"
  878: #define STR_c                       "c"
  879: #define STR_d                       "d"
  880: #define STR_e                       "e"
  881: #define STR_f                       "f"
  882: #define STR_g                       "g"
  883: #define STR_h                       "h"
  884: #define STR_i                       "i"
  885: #define STR_j                       "j"
  886: #define STR_k                       "k"
  887: #define STR_l                       "l"
  888: #define STR_m                       "m"
  889: #define STR_n                       "n"
  890: #define STR_o                       "o"
  891: #define STR_p                       "p"
  892: #define STR_q                       "q"
  893: #define STR_r                       "r"
  894: #define STR_s                       "s"
  895: #define STR_t                       "t"
  896: #define STR_u                       "u"
  897: #define STR_v                       "v"
  898: #define STR_w                       "w"
  899: #define STR_x                       "x"
  900: #define STR_y                       "y"
  901: #define STR_z                       "z"
  902: #define STR_LEFT_CURLY_BRACKET      "{"
  903: #define STR_VERTICAL_LINE           "|"
  904: #define STR_RIGHT_CURLY_BRACKET     "}"
  905: #define STR_TILDE                   "~"
  906: 
  907: #define STRING_ACCEPT0              "ACCEPT\0"
  908: #define STRING_COMMIT0              "COMMIT\0"
  909: #define STRING_F0                   "F\0"
  910: #define STRING_FAIL0                "FAIL\0"
  911: #define STRING_MARK0                "MARK\0"
  912: #define STRING_PRUNE0               "PRUNE\0"
  913: #define STRING_SKIP0                "SKIP\0"
  914: #define STRING_THEN                 "THEN"
  915: 
  916: #define STRING_alpha0               "alpha\0"
  917: #define STRING_lower0               "lower\0"
  918: #define STRING_upper0               "upper\0"
  919: #define STRING_alnum0               "alnum\0"
  920: #define STRING_ascii0               "ascii\0"
  921: #define STRING_blank0               "blank\0"
  922: #define STRING_cntrl0               "cntrl\0"
  923: #define STRING_digit0               "digit\0"
  924: #define STRING_graph0               "graph\0"
  925: #define STRING_print0               "print\0"
  926: #define STRING_punct0               "punct\0"
  927: #define STRING_space0               "space\0"
  928: #define STRING_word0                "word\0"
  929: #define STRING_xdigit               "xdigit"
  930: 
  931: #define STRING_DEFINE               "DEFINE"
  932: 
  933: #define STRING_CR_RIGHTPAR             "CR)"
  934: #define STRING_LF_RIGHTPAR             "LF)"
  935: #define STRING_CRLF_RIGHTPAR           "CRLF)"
  936: #define STRING_ANY_RIGHTPAR            "ANY)"
  937: #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
  938: #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
  939: #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
  940: #define STRING_UTF8_RIGHTPAR           "UTF8)"
  941: #define STRING_UCP_RIGHTPAR            "UCP)"
  942: #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
  943: 
  944: #else  /* SUPPORT_UTF8 */
  945: 
  946: /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
  947: works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
  948: only. */
  949: 
  950: #define CHAR_HT                     '\011'
  951: #define CHAR_VT                     '\013'
  952: #define CHAR_FF                     '\014'
  953: #define CHAR_CR                     '\015'
  954: #define CHAR_NL                     '\012'
  955: #define CHAR_BS                     '\010'
  956: #define CHAR_BEL                    '\007'
  957: #define CHAR_ESC                    '\033'
  958: #define CHAR_DEL                    '\177'
  959: 
  960: #define CHAR_SPACE                  '\040'
  961: #define CHAR_EXCLAMATION_MARK       '\041'
  962: #define CHAR_QUOTATION_MARK         '\042'
  963: #define CHAR_NUMBER_SIGN            '\043'
  964: #define CHAR_DOLLAR_SIGN            '\044'
  965: #define CHAR_PERCENT_SIGN           '\045'
  966: #define CHAR_AMPERSAND              '\046'
  967: #define CHAR_APOSTROPHE             '\047'
  968: #define CHAR_LEFT_PARENTHESIS       '\050'
  969: #define CHAR_RIGHT_PARENTHESIS      '\051'
  970: #define CHAR_ASTERISK               '\052'
  971: #define CHAR_PLUS                   '\053'
  972: #define CHAR_COMMA                  '\054'
  973: #define CHAR_MINUS                  '\055'
  974: #define CHAR_DOT                    '\056'
  975: #define CHAR_SLASH                  '\057'
  976: #define CHAR_0                      '\060'
  977: #define CHAR_1                      '\061'
  978: #define CHAR_2                      '\062'
  979: #define CHAR_3                      '\063'
  980: #define CHAR_4                      '\064'
  981: #define CHAR_5                      '\065'
  982: #define CHAR_6                      '\066'
  983: #define CHAR_7                      '\067'
  984: #define CHAR_8                      '\070'
  985: #define CHAR_9                      '\071'
  986: #define CHAR_COLON                  '\072'
  987: #define CHAR_SEMICOLON              '\073'
  988: #define CHAR_LESS_THAN_SIGN         '\074'
  989: #define CHAR_EQUALS_SIGN            '\075'
  990: #define CHAR_GREATER_THAN_SIGN      '\076'
  991: #define CHAR_QUESTION_MARK          '\077'
  992: #define CHAR_COMMERCIAL_AT          '\100'
  993: #define CHAR_A                      '\101'
  994: #define CHAR_B                      '\102'
  995: #define CHAR_C                      '\103'
  996: #define CHAR_D                      '\104'
  997: #define CHAR_E                      '\105'
  998: #define CHAR_F                      '\106'
  999: #define CHAR_G                      '\107'
 1000: #define CHAR_H                      '\110'
 1001: #define CHAR_I                      '\111'
 1002: #define CHAR_J                      '\112'
 1003: #define CHAR_K                      '\113'
 1004: #define CHAR_L                      '\114'
 1005: #define CHAR_M                      '\115'
 1006: #define CHAR_N                      '\116'
 1007: #define CHAR_O                      '\117'
 1008: #define CHAR_P                      '\120'
 1009: #define CHAR_Q                      '\121'
 1010: #define CHAR_R                      '\122'
 1011: #define CHAR_S                      '\123'
 1012: #define CHAR_T                      '\124'
 1013: #define CHAR_U                      '\125'
 1014: #define CHAR_V                      '\126'
 1015: #define CHAR_W                      '\127'
 1016: #define CHAR_X                      '\130'
 1017: #define CHAR_Y                      '\131'
 1018: #define CHAR_Z                      '\132'
 1019: #define CHAR_LEFT_SQUARE_BRACKET    '\133'
 1020: #define CHAR_BACKSLASH              '\134'
 1021: #define CHAR_RIGHT_SQUARE_BRACKET   '\135'
 1022: #define CHAR_CIRCUMFLEX_ACCENT      '\136'
 1023: #define CHAR_UNDERSCORE             '\137'
 1024: #define CHAR_GRAVE_ACCENT           '\140'
 1025: #define CHAR_a                      '\141'
 1026: #define CHAR_b                      '\142'
 1027: #define CHAR_c                      '\143'
 1028: #define CHAR_d                      '\144'
 1029: #define CHAR_e                      '\145'
 1030: #define CHAR_f                      '\146'
 1031: #define CHAR_g                      '\147'
 1032: #define CHAR_h                      '\150'
 1033: #define CHAR_i                      '\151'
 1034: #define CHAR_j                      '\152'
 1035: #define CHAR_k                      '\153'
 1036: #define CHAR_l                      '\154'
 1037: #define CHAR_m                      '\155'
 1038: #define CHAR_n                      '\156'
 1039: #define CHAR_o                      '\157'
 1040: #define CHAR_p                      '\160'
 1041: #define CHAR_q                      '\161'
 1042: #define CHAR_r                      '\162'
 1043: #define CHAR_s                      '\163'
 1044: #define CHAR_t                      '\164'
 1045: #define CHAR_u                      '\165'
 1046: #define CHAR_v                      '\166'
 1047: #define CHAR_w                      '\167'
 1048: #define CHAR_x                      '\170'
 1049: #define CHAR_y                      '\171'
 1050: #define CHAR_z                      '\172'
 1051: #define CHAR_LEFT_CURLY_BRACKET     '\173'
 1052: #define CHAR_VERTICAL_LINE          '\174'
 1053: #define CHAR_RIGHT_CURLY_BRACKET    '\175'
 1054: #define CHAR_TILDE                  '\176'
 1055: 
 1056: #define STR_HT                      "\011"
 1057: #define STR_VT                      "\013"
 1058: #define STR_FF                      "\014"
 1059: #define STR_CR                      "\015"
 1060: #define STR_NL                      "\012"
 1061: #define STR_BS                      "\010"
 1062: #define STR_BEL                     "\007"
 1063: #define STR_ESC                     "\033"
 1064: #define STR_DEL                     "\177"
 1065: 
 1066: #define STR_SPACE                   "\040"
 1067: #define STR_EXCLAMATION_MARK        "\041"
 1068: #define STR_QUOTATION_MARK          "\042"
 1069: #define STR_NUMBER_SIGN             "\043"
 1070: #define STR_DOLLAR_SIGN             "\044"
 1071: #define STR_PERCENT_SIGN            "\045"
 1072: #define STR_AMPERSAND               "\046"
 1073: #define STR_APOSTROPHE              "\047"
 1074: #define STR_LEFT_PARENTHESIS        "\050"
 1075: #define STR_RIGHT_PARENTHESIS       "\051"
 1076: #define STR_ASTERISK                "\052"
 1077: #define STR_PLUS                    "\053"
 1078: #define STR_COMMA                   "\054"
 1079: #define STR_MINUS                   "\055"
 1080: #define STR_DOT                     "\056"
 1081: #define STR_SLASH                   "\057"
 1082: #define STR_0                       "\060"
 1083: #define STR_1                       "\061"
 1084: #define STR_2                       "\062"
 1085: #define STR_3                       "\063"
 1086: #define STR_4                       "\064"
 1087: #define STR_5                       "\065"
 1088: #define STR_6                       "\066"
 1089: #define STR_7                       "\067"
 1090: #define STR_8                       "\070"
 1091: #define STR_9                       "\071"
 1092: #define STR_COLON                   "\072"
 1093: #define STR_SEMICOLON               "\073"
 1094: #define STR_LESS_THAN_SIGN          "\074"
 1095: #define STR_EQUALS_SIGN             "\075"
 1096: #define STR_GREATER_THAN_SIGN       "\076"
 1097: #define STR_QUESTION_MARK           "\077"
 1098: #define STR_COMMERCIAL_AT           "\100"
 1099: #define STR_A                       "\101"
 1100: #define STR_B                       "\102"
 1101: #define STR_C                       "\103"
 1102: #define STR_D                       "\104"
 1103: #define STR_E                       "\105"
 1104: #define STR_F                       "\106"
 1105: #define STR_G                       "\107"
 1106: #define STR_H                       "\110"
 1107: #define STR_I                       "\111"
 1108: #define STR_J                       "\112"
 1109: #define STR_K                       "\113"
 1110: #define STR_L                       "\114"
 1111: #define STR_M                       "\115"
 1112: #define STR_N                       "\116"
 1113: #define STR_O                       "\117"
 1114: #define STR_P                       "\120"
 1115: #define STR_Q                       "\121"
 1116: #define STR_R                       "\122"
 1117: #define STR_S                       "\123"
 1118: #define STR_T                       "\124"
 1119: #define STR_U                       "\125"
 1120: #define STR_V                       "\126"
 1121: #define STR_W                       "\127"
 1122: #define STR_X                       "\130"
 1123: #define STR_Y                       "\131"
 1124: #define STR_Z                       "\132"
 1125: #define STR_LEFT_SQUARE_BRACKET     "\133"
 1126: #define STR_BACKSLASH               "\134"
 1127: #define STR_RIGHT_SQUARE_BRACKET    "\135"
 1128: #define STR_CIRCUMFLEX_ACCENT       "\136"
 1129: #define STR_UNDERSCORE              "\137"
 1130: #define STR_GRAVE_ACCENT            "\140"
 1131: #define STR_a                       "\141"
 1132: #define STR_b                       "\142"
 1133: #define STR_c                       "\143"
 1134: #define STR_d                       "\144"
 1135: #define STR_e                       "\145"
 1136: #define STR_f                       "\146"
 1137: #define STR_g                       "\147"
 1138: #define STR_h                       "\150"
 1139: #define STR_i                       "\151"
 1140: #define STR_j                       "\152"
 1141: #define STR_k                       "\153"
 1142: #define STR_l                       "\154"
 1143: #define STR_m                       "\155"
 1144: #define STR_n                       "\156"
 1145: #define STR_o                       "\157"
 1146: #define STR_p                       "\160"
 1147: #define STR_q                       "\161"
 1148: #define STR_r                       "\162"
 1149: #define STR_s                       "\163"
 1150: #define STR_t                       "\164"
 1151: #define STR_u                       "\165"
 1152: #define STR_v                       "\166"
 1153: #define STR_w                       "\167"
 1154: #define STR_x                       "\170"
 1155: #define STR_y                       "\171"
 1156: #define STR_z                       "\172"
 1157: #define STR_LEFT_CURLY_BRACKET      "\173"
 1158: #define STR_VERTICAL_LINE           "\174"
 1159: #define STR_RIGHT_CURLY_BRACKET     "\175"
 1160: #define STR_TILDE                   "\176"
 1161: 
 1162: #define STRING_ACCEPT0              STR_A STR_C STR_C STR_E STR_P STR_T "\0"
 1163: #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
 1164: #define STRING_F0                   STR_F "\0"
 1165: #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
 1166: #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
 1167: #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
 1168: #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
 1169: #define STRING_THEN                 STR_T STR_H STR_E STR_N
 1170: 
 1171: #define STRING_alpha0               STR_a STR_l STR_p STR_h STR_a "\0"
 1172: #define STRING_lower0               STR_l STR_o STR_w STR_e STR_r "\0"
 1173: #define STRING_upper0               STR_u STR_p STR_p STR_e STR_r "\0"
 1174: #define STRING_alnum0               STR_a STR_l STR_n STR_u STR_m "\0"
 1175: #define STRING_ascii0               STR_a STR_s STR_c STR_i STR_i "\0"
 1176: #define STRING_blank0               STR_b STR_l STR_a STR_n STR_k "\0"
 1177: #define STRING_cntrl0               STR_c STR_n STR_t STR_r STR_l "\0"
 1178: #define STRING_digit0               STR_d STR_i STR_g STR_i STR_t "\0"
 1179: #define STRING_graph0               STR_g STR_r STR_a STR_p STR_h "\0"
 1180: #define STRING_print0               STR_p STR_r STR_i STR_n STR_t "\0"
 1181: #define STRING_punct0               STR_p STR_u STR_n STR_c STR_t "\0"
 1182: #define STRING_space0               STR_s STR_p STR_a STR_c STR_e "\0"
 1183: #define STRING_word0                STR_w STR_o STR_r STR_d       "\0"
 1184: #define STRING_xdigit               STR_x STR_d STR_i STR_g STR_i STR_t
 1185: 
 1186: #define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E
 1187: 
 1188: #define STRING_CR_RIGHTPAR             STR_C STR_R STR_RIGHT_PARENTHESIS
 1189: #define STRING_LF_RIGHTPAR             STR_L STR_F STR_RIGHT_PARENTHESIS
 1190: #define STRING_CRLF_RIGHTPAR           STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 1191: #define STRING_ANY_RIGHTPAR            STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
 1192: #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 1193: #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 1194: #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
 1195: #define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
 1196: #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 1197: #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 1198: 
 1199: #endif  /* SUPPORT_UTF8 */
 1200: 
 1201: /* Escape items that are just an encoding of a particular data value. */
 1202: 
 1203: #ifndef ESC_e
 1204: #define ESC_e CHAR_ESC
 1205: #endif
 1206: 
 1207: #ifndef ESC_f
 1208: #define ESC_f CHAR_FF
 1209: #endif
 1210: 
 1211: #ifndef ESC_n
 1212: #define ESC_n CHAR_NL
 1213: #endif
 1214: 
 1215: #ifndef ESC_r
 1216: #define ESC_r CHAR_CR
 1217: #endif
 1218: 
 1219: /* We can't officially use ESC_t because it is a POSIX reserved identifier
 1220: (presumably because of all the others like size_t). */
 1221: 
 1222: #ifndef ESC_tee
 1223: #define ESC_tee CHAR_HT
 1224: #endif
 1225: 
 1226: /* Codes for different types of Unicode property */
 1227: 
 1228: #define PT_ANY        0    /* Any property - matches all chars */
 1229: #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
 1230: #define PT_GC         2    /* Specified general characteristic (e.g. L) */
 1231: #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
 1232: #define PT_SC         4    /* Script (e.g. Han) */
 1233: #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
 1234: #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
 1235: #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
 1236: #define PT_WORD       8    /* Word - L plus N plus underscore */
 1237: 
 1238: /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 1239: contain UTF-8 characters with values greater than 255. */
 1240: 
 1241: #define XCL_NOT    0x01    /* Flag: this is a negative class */
 1242: #define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
 1243: 
 1244: #define XCL_END       0    /* Marks end of individual items */
 1245: #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
 1246: #define XCL_RANGE     2    /* A range (two multibyte chars) follows */
 1247: #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
 1248: #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
 1249: 
 1250: /* These are escaped items that aren't just an encoding of a particular data
 1251: value such as \n. They must have non-zero values, as check_escape() returns
 1252: their negation. Also, they must appear in the same order as in the opcode
 1253: definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
 1254: corresponds to "." in DOTALL mode rather than an escape sequence. It is also
 1255: used for [^] in JavaScript compatibility mode, and for \C in non-utf8 mode. In
 1256: non-DOTALL mode, "." behaves like \N.
 1257: 
 1258: The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
 1259: when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.
 1260: They must be contiguous, and remain in order so that the replacements can be
 1261: looked up from a table.
 1262: 
 1263: The final escape must be ESC_REF as subsequent values are used for
 1264: backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
 1265: greater than ESC_b and less than ESC_Z to detect the types that may be
 1266: repeated. These are the types that consume characters. If any new escapes are
 1267: put in between that don't consume a character, that code will have to change.
 1268: */
 1269: 
 1270: enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 1271:        ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
 1272:        ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
 1273:        ESC_E, ESC_Q, ESC_g, ESC_k,
 1274:        ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
 1275:        ESC_REF };
 1276: 
 1277: /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
 1278: OP_EOD must correspond in order to the list of escapes immediately above.
 1279: 
 1280: *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
 1281: that follow must also be updated to match. There are also tables called
 1282: "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
 1283: 
 1284: enum {
 1285:   OP_END,            /* 0 End of pattern */
 1286: 
 1287:   /* Values corresponding to backslashed metacharacters */
 1288: 
 1289:   OP_SOD,            /* 1 Start of data: \A */
 1290:   OP_SOM,            /* 2 Start of match (subject + offset): \G */
 1291:   OP_SET_SOM,        /* 3 Set start of match (\K) */
 1292:   OP_NOT_WORD_BOUNDARY,  /*  4 \B */
 1293:   OP_WORD_BOUNDARY,      /*  5 \b */
 1294:   OP_NOT_DIGIT,          /*  6 \D */
 1295:   OP_DIGIT,              /*  7 \d */
 1296:   OP_NOT_WHITESPACE,     /*  8 \S */
 1297:   OP_WHITESPACE,         /*  9 \s */
 1298:   OP_NOT_WORDCHAR,       /* 10 \W */
 1299:   OP_WORDCHAR,           /* 11 \w */
 1300: 
 1301:   OP_ANY,            /* 12 Match any character except newline */
 1302:   OP_ALLANY,         /* 13 Match any character */
 1303:   OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
 1304:   OP_NOTPROP,        /* 15 \P (not Unicode property) */
 1305:   OP_PROP,           /* 16 \p (Unicode property) */
 1306:   OP_ANYNL,          /* 17 \R (any newline sequence) */
 1307:   OP_NOT_HSPACE,     /* 18 \H (not horizontal whitespace) */
 1308:   OP_HSPACE,         /* 19 \h (horizontal whitespace) */
 1309:   OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */
 1310:   OP_VSPACE,         /* 21 \v (vertical whitespace) */
 1311:   OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
 1312:   OP_EODN,           /* 23 End of data or \n at end of data: \Z. */
 1313:   OP_EOD,            /* 24 End of data: \z */
 1314: 
 1315:   OP_CIRC,           /* 25 Start of line - not multiline */
 1316:   OP_CIRCM,          /* 26 Start of line - multiline */
 1317:   OP_DOLL,           /* 27 End of line - not multiline */
 1318:   OP_DOLLM,          /* 28 End of line - multiline */
 1319:   OP_CHAR,           /* 29 Match one character, casefully */
 1320:   OP_CHARI,          /* 30 Match one character, caselessly */
 1321:   OP_NOT,            /* 31 Match one character, not the given one, casefully */
 1322:   OP_NOTI,           /* 32 Match one character, not the given one, caselessly */
 1323: 
 1324:   /* The following sets of 13 opcodes must always be kept in step because
 1325:   the offset from the first one is used to generate the others. */
 1326: 
 1327:   /**** Single characters, caseful, must precede the caseless ones ****/
 1328: 
 1329:   OP_STAR,           /* 33 The maximizing and minimizing versions of */
 1330:   OP_MINSTAR,        /* 34 these six opcodes must come in pairs, with */
 1331:   OP_PLUS,           /* 35 the minimizing one second. */
 1332:   OP_MINPLUS,        /* 36 */
 1333:   OP_QUERY,          /* 37 */
 1334:   OP_MINQUERY,       /* 38 */
 1335: 
 1336:   OP_UPTO,           /* 39 From 0 to n matches of one character, caseful*/
 1337:   OP_MINUPTO,        /* 40 */
 1338:   OP_EXACT,          /* 41 Exactly n matches */
 1339: 
 1340:   OP_POSSTAR,        /* 42 Possessified star, caseful */
 1341:   OP_POSPLUS,        /* 43 Possessified plus, caseful */
 1342:   OP_POSQUERY,       /* 44 Posesssified query, caseful */
 1343:   OP_POSUPTO,        /* 45 Possessified upto, caseful */
 1344: 
 1345:   /**** Single characters, caseless, must follow the caseful ones */
 1346: 
 1347:   OP_STARI,          /* 46 */
 1348:   OP_MINSTARI,       /* 47 */
 1349:   OP_PLUSI,          /* 48 */
 1350:   OP_MINPLUSI,       /* 49 */
 1351:   OP_QUERYI,         /* 50 */
 1352:   OP_MINQUERYI,      /* 51 */
 1353: 
 1354:   OP_UPTOI,          /* 52 From 0 to n matches of one character, caseless */
 1355:   OP_MINUPTOI,       /* 53 */
 1356:   OP_EXACTI,         /* 54 */
 1357: 
 1358:   OP_POSSTARI,       /* 55 Possessified star, caseless */
 1359:   OP_POSPLUSI,       /* 56 Possessified plus, caseless */
 1360:   OP_POSQUERYI,      /* 57 Posesssified query, caseless */
 1361:   OP_POSUPTOI,       /* 58 Possessified upto, caseless */
 1362: 
 1363:   /**** The negated ones must follow the non-negated ones, and match them ****/
 1364:   /**** Negated single character, caseful; must precede the caseless ones ****/
 1365: 
 1366:   OP_NOTSTAR,        /* 59 The maximizing and minimizing versions of */
 1367:   OP_NOTMINSTAR,     /* 60 these six opcodes must come in pairs, with */
 1368:   OP_NOTPLUS,        /* 61 the minimizing one second. They must be in */
 1369:   OP_NOTMINPLUS,     /* 62 exactly the same order as those above. */
 1370:   OP_NOTQUERY,       /* 63 */
 1371:   OP_NOTMINQUERY,    /* 64 */
 1372: 
 1373:   OP_NOTUPTO,        /* 65 From 0 to n matches, caseful */
 1374:   OP_NOTMINUPTO,     /* 66 */
 1375:   OP_NOTEXACT,       /* 67 Exactly n matches */
 1376: 
 1377:   OP_NOTPOSSTAR,     /* 68 Possessified versions, caseful */
 1378:   OP_NOTPOSPLUS,     /* 69 */
 1379:   OP_NOTPOSQUERY,    /* 70 */
 1380:   OP_NOTPOSUPTO,     /* 71 */
 1381: 
 1382:   /**** Negated single character, caseless; must follow the caseful ones ****/
 1383: 
 1384:   OP_NOTSTARI,       /* 72 */
 1385:   OP_NOTMINSTARI,    /* 73 */
 1386:   OP_NOTPLUSI,       /* 74 */
 1387:   OP_NOTMINPLUSI,    /* 75 */
 1388:   OP_NOTQUERYI,      /* 76 */
 1389:   OP_NOTMINQUERYI,   /* 77 */
 1390: 
 1391:   OP_NOTUPTOI,       /* 78 From 0 to n matches, caseless */
 1392:   OP_NOTMINUPTOI,    /* 79 */
 1393:   OP_NOTEXACTI,      /* 80 Exactly n matches */
 1394: 
 1395:   OP_NOTPOSSTARI,    /* 81 Possessified versions, caseless */
 1396:   OP_NOTPOSPLUSI,    /* 82 */
 1397:   OP_NOTPOSQUERYI,   /* 83 */
 1398:   OP_NOTPOSUPTOI,    /* 84 */
 1399: 
 1400:   /**** Character types ****/
 1401: 
 1402:   OP_TYPESTAR,       /* 85 The maximizing and minimizing versions of */
 1403:   OP_TYPEMINSTAR,    /* 86 these six opcodes must come in pairs, with */
 1404:   OP_TYPEPLUS,       /* 87 the minimizing one second. These codes must */
 1405:   OP_TYPEMINPLUS,    /* 88 be in exactly the same order as those above. */
 1406:   OP_TYPEQUERY,      /* 89 */
 1407:   OP_TYPEMINQUERY,   /* 90 */
 1408: 
 1409:   OP_TYPEUPTO,       /* 91 From 0 to n matches */
 1410:   OP_TYPEMINUPTO,    /* 92 */
 1411:   OP_TYPEEXACT,      /* 93 Exactly n matches */
 1412: 
 1413:   OP_TYPEPOSSTAR,    /* 94 Possessified versions */
 1414:   OP_TYPEPOSPLUS,    /* 95 */
 1415:   OP_TYPEPOSQUERY,   /* 96 */
 1416:   OP_TYPEPOSUPTO,    /* 97 */
 1417: 
 1418:   /* These are used for character classes and back references; only the
 1419:   first six are the same as the sets above. */
 1420: 
 1421:   OP_CRSTAR,         /* 98 The maximizing and minimizing versions of */
 1422:   OP_CRMINSTAR,      /* 99 all these opcodes must come in pairs, with */
 1423:   OP_CRPLUS,         /* 100 the minimizing one second. These codes must */
 1424:   OP_CRMINPLUS,      /* 101 be in exactly the same order as those above. */
 1425:   OP_CRQUERY,        /* 102 */
 1426:   OP_CRMINQUERY,     /* 103 */
 1427: 
 1428:   OP_CRRANGE,        /* 104 These are different to the three sets above. */
 1429:   OP_CRMINRANGE,     /* 105 */
 1430: 
 1431:   /* End of quantifier opcodes */
 1432: 
 1433:   OP_CLASS,          /* 106 Match a character class, chars < 256 only */
 1434:   OP_NCLASS,         /* 107 Same, but the bitmap was created from a negative
 1435:                               class - the difference is relevant only when a
 1436:                               UTF-8 character > 255 is encountered. */
 1437:   OP_XCLASS,         /* 108 Extended class for handling UTF-8 chars within the
 1438:                               class. This does both positive and negative. */
 1439:   OP_REF,            /* 109 Match a back reference, casefully */
 1440:   OP_REFI,           /* 110 Match a back reference, caselessly */
 1441:   OP_RECURSE,        /* 111 Match a numbered subpattern (possibly recursive) */
 1442:   OP_CALLOUT,        /* 112 Call out to external function if provided */
 1443: 
 1444:   OP_ALT,            /* 113 Start of alternation */
 1445:   OP_KET,            /* 114 End of group that doesn't have an unbounded repeat */
 1446:   OP_KETRMAX,        /* 115 These two must remain together and in this */
 1447:   OP_KETRMIN,        /* 116 order. They are for groups the repeat for ever. */
 1448:   OP_KETRPOS,        /* 117 Possessive unlimited repeat. */
 1449: 
 1450:   /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
 1451:   asserts must remain in order. */
 1452: 
 1453:   OP_REVERSE,        /* 118 Move pointer back - used in lookbehind assertions */
 1454:   OP_ASSERT,         /* 119 Positive lookahead */
 1455:   OP_ASSERT_NOT,     /* 120 Negative lookahead */
 1456:   OP_ASSERTBACK,     /* 121 Positive lookbehind */
 1457:   OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
 1458: 
 1459:   /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
 1460:   after the assertions, with ONCE first, as there's a test for >= ONCE for a
 1461:   subpattern that isn't an assertion. The POS versions must immediately follow
 1462:   the non-POS versions in each case. */
 1463: 
 1464:   OP_ONCE,           /* 123 Atomic group, contains captures */
 1465:   OP_ONCE_NC,        /* 124 Atomic group containing no captures */
 1466:   OP_BRA,            /* 125 Start of non-capturing bracket */
 1467:   OP_BRAPOS,         /* 126 Ditto, with unlimited, possessive repeat */
 1468:   OP_CBRA,           /* 127 Start of capturing bracket */
 1469:   OP_CBRAPOS,        /* 128 Ditto, with unlimited, possessive repeat */
 1470:   OP_COND,           /* 129 Conditional group */
 1471: 
 1472:   /* These five must follow the previous five, in the same order. There's a
 1473:   check for >= SBRA to distinguish the two sets. */
 1474: 
 1475:   OP_SBRA,           /* 130 Start of non-capturing bracket, check empty  */
 1476:   OP_SBRAPOS,        /* 131 Ditto, with unlimited, possessive repeat */
 1477:   OP_SCBRA,          /* 132 Start of capturing bracket, check empty */
 1478:   OP_SCBRAPOS,       /* 133 Ditto, with unlimited, possessive repeat */
 1479:   OP_SCOND,          /* 134 Conditional group, check empty */
 1480: 
 1481:   /* The next two pairs must (respectively) be kept together. */
 1482: 
 1483:   OP_CREF,           /* 135 Used to hold a capture number as condition */
 1484:   OP_NCREF,          /* 136 Same, but generated by a name reference*/
 1485:   OP_RREF,           /* 137 Used to hold a recursion number as condition */
 1486:   OP_NRREF,          /* 138 Same, but generated by a name reference*/
 1487:   OP_DEF,            /* 139 The DEFINE condition */
 1488: 
 1489:   OP_BRAZERO,        /* 140 These two must remain together and in this */
 1490:   OP_BRAMINZERO,     /* 141 order. */
 1491:   OP_BRAPOSZERO,     /* 142 */
 1492: 
 1493:   /* These are backtracking control verbs */
 1494: 
 1495:   OP_MARK,           /* 143 always has an argument */
 1496:   OP_PRUNE,          /* 144 */
 1497:   OP_PRUNE_ARG,      /* 145 same, but with argument */
 1498:   OP_SKIP,           /* 146 */
 1499:   OP_SKIP_ARG,       /* 147 same, but with argument */
 1500:   OP_THEN,           /* 148 */
 1501:   OP_THEN_ARG,       /* 149 same, but with argument */
 1502:   OP_COMMIT,         /* 150 */
 1503: 
 1504:   /* These are forced failure and success verbs */
 1505: 
 1506:   OP_FAIL,           /* 151 */
 1507:   OP_ACCEPT,         /* 152 */
 1508:   OP_ASSERT_ACCEPT,  /* 153 Used inside assertions */
 1509:   OP_CLOSE,          /* 154 Used before OP_ACCEPT to close open captures */
 1510: 
 1511:   /* This is used to skip a subpattern with a {0} quantifier */
 1512: 
 1513:   OP_SKIPZERO,       /* 155 */
 1514: 
 1515:   /* This is not an opcode, but is used to check that tables indexed by opcode
 1516:   are the correct length, in order to catch updating errors - there have been
 1517:   some in the past. */
 1518: 
 1519:   OP_TABLE_LENGTH
 1520: };
 1521: 
 1522: /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
 1523: definitions that follow must also be updated to match. There are also tables
 1524: called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
 1525: 
 1526: 
 1527: /* This macro defines textual names for all the opcodes. These are used only
 1528: for debugging, and some of them are only partial names. The macro is referenced
 1529: only in pcre_printint.c, which fills out the full names in many cases (and in
 1530: some cases doesn't actually use these names at all). */
 1531: 
 1532: #define OP_NAME_LIST \
 1533:   "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
 1534:   "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte",         \
 1535:   "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
 1536:   "extuni",  "\\Z", "\\z",                                        \
 1537:   "^", "^", "$", "$", "char", "chari", "not", "noti",             \
 1538:   "*", "*?", "+", "+?", "?", "??",                                \
 1539:   "{", "{", "{",                                                  \
 1540:   "*+","++", "?+", "{",                                           \
 1541:   "*", "*?", "+", "+?", "?", "??",                                \
 1542:   "{", "{", "{",                                                  \
 1543:   "*+","++", "?+", "{",                                           \
 1544:   "*", "*?", "+", "+?", "?", "??",                                \
 1545:   "{", "{", "{",                                                  \
 1546:   "*+","++", "?+", "{",                                           \
 1547:   "*", "*?", "+", "+?", "?", "??",                                \
 1548:   "{", "{", "{",                                                  \
 1549:   "*+","++", "?+", "{",                                           \
 1550:   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
 1551:   "*+","++", "?+", "{",                                           \
 1552:   "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
 1553:   "class", "nclass", "xclass", "Ref", "Refi",                     \
 1554:   "Recurse", "Callout",                                           \
 1555:   "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
 1556:   "Reverse", "Assert", "Assert not", "AssertB", "AssertB not",    \
 1557:   "Once", "Once_NC",                                              \
 1558:   "Bra", "BraPos", "CBra", "CBraPos",                             \
 1559:   "Cond",                                                         \
 1560:   "SBra", "SBraPos", "SCBra", "SCBraPos",                         \
 1561:   "SCond",                                                        \
 1562:   "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
 1563:   "Brazero", "Braminzero", "Braposzero",                          \
 1564:   "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
 1565:   "*THEN", "*THEN", "*COMMIT", "*FAIL",                           \
 1566:   "*ACCEPT", "*ASSERT_ACCEPT",                                    \
 1567:   "Close", "Skip zero"
 1568: 
 1569: 
 1570: /* This macro defines the length of fixed length operations in the compiled
 1571: regex. The lengths are used when searching for specific things, and also in the
 1572: debugging printing of a compiled regex. We use a macro so that it can be
 1573: defined close to the definitions of the opcodes themselves.
 1574: 
 1575: As things have been extended, some of these are no longer fixed lenths, but are
 1576: minima instead. For example, the length of a single-character repeat may vary
 1577: in UTF-8 mode. The code that uses this table must know about such things. */
 1578: 
 1579: #define OP_LENGTHS \
 1580:   1,                             /* End                                    */ \
 1581:   1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
 1582:   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
 1583:   1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
 1584:   3, 3,                          /* \P, \p                                 */ \
 1585:   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
 1586:   1,                             /* \X                                     */ \
 1587:   1, 1, 1, 1, 1, 1,              /* \Z, \z, ^, ^M, $, $M                   */ \
 1588:   2,                             /* Char  - the minimum length             */ \
 1589:   2,                             /* Chari  - the minimum length            */ \
 1590:   2,                             /* not                                    */ \
 1591:   2,                             /* noti                                   */ \
 1592:   /* Positive single-char repeats                             ** These are */ \
 1593:   2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??       ** minima in */ \
 1594:   4, 4, 4,                       /* upto, minupto, exact      ** mode      */ \
 1595:   2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \
 1596:   2, 2, 2, 2, 2, 2,              /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8     */ \
 1597:   4, 4, 4,                       /* upto I, minupto I, exact I             */ \
 1598:   2, 2, 2, 4,                    /* *+I, ++I, ?+I, upto+I                  */ \
 1599:   /* Negative single-char repeats - only for chars < 256                   */ \
 1600:   2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
 1601:   4, 4, 4,                       /* NOT upto, minupto, exact               */ \
 1602:   2, 2, 2, 4,                    /* Possessive NOT *, +, ?, upto           */ \
 1603:   2, 2, 2, 2, 2, 2,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */ \
 1604:   4, 4, 4,                       /* NOT upto I, minupto I, exact I         */ \
 1605:   2, 2, 2, 4,                    /* Possessive NOT *I, +I, ?I, upto I      */ \
 1606:   /* Positive type repeats                                                 */ \
 1607:   2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
 1608:   4, 4, 4,                       /* Type upto, minupto, exact              */ \
 1609:   2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \
 1610:   /* Character class & ref repeats                                         */ \
 1611:   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
 1612:   5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
 1613:  33,                             /* CLASS                                  */ \
 1614:  33,                             /* NCLASS                                 */ \
 1615:   0,                             /* XCLASS - variable length               */ \
 1616:   3,                             /* REF                                    */ \
 1617:   3,                             /* REFI                                   */ \
 1618:   1+LINK_SIZE,                   /* RECURSE                                */ \
 1619:   2+2*LINK_SIZE,                 /* CALLOUT                                */ \
 1620:   1+LINK_SIZE,                   /* Alt                                    */ \
 1621:   1+LINK_SIZE,                   /* Ket                                    */ \
 1622:   1+LINK_SIZE,                   /* KetRmax                                */ \
 1623:   1+LINK_SIZE,                   /* KetRmin                                */ \
 1624:   1+LINK_SIZE,                   /* KetRpos                                */ \
 1625:   1+LINK_SIZE,                   /* Reverse                                */ \
 1626:   1+LINK_SIZE,                   /* Assert                                 */ \
 1627:   1+LINK_SIZE,                   /* Assert not                             */ \
 1628:   1+LINK_SIZE,                   /* Assert behind                          */ \
 1629:   1+LINK_SIZE,                   /* Assert behind not                      */ \
 1630:   1+LINK_SIZE,                   /* ONCE                                   */ \
 1631:   1+LINK_SIZE,                   /* ONCE_NC                                */ \
 1632:   1+LINK_SIZE,                   /* BRA                                    */ \
 1633:   1+LINK_SIZE,                   /* BRAPOS                                 */ \
 1634:   3+LINK_SIZE,                   /* CBRA                                   */ \
 1635:   3+LINK_SIZE,                   /* CBRAPOS                                */ \
 1636:   1+LINK_SIZE,                   /* COND                                   */ \
 1637:   1+LINK_SIZE,                   /* SBRA                                   */ \
 1638:   1+LINK_SIZE,                   /* SBRAPOS                                */ \
 1639:   3+LINK_SIZE,                   /* SCBRA                                  */ \
 1640:   3+LINK_SIZE,                   /* SCBRAPOS                               */ \
 1641:   1+LINK_SIZE,                   /* SCOND                                  */ \
 1642:   3, 3,                          /* CREF, NCREF                            */ \
 1643:   3, 3,                          /* RREF, NRREF                            */ \
 1644:   1,                             /* DEF                                    */ \
 1645:   1, 1, 1,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */ \
 1646:   3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
 1647:   1, 3,                          /* SKIP, SKIP_ARG                         */ \
 1648:   1, 3,                          /* THEN, THEN_ARG                         */ \
 1649:   1, 1, 1, 1,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */ \
 1650:   3, 1                           /* CLOSE, SKIPZERO  */
 1651: 
 1652: /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
 1653: condition. */
 1654: 
 1655: #define RREF_ANY  0xffff
 1656: 
 1657: /* Compile time error code numbers. They are given names so that they can more
 1658: easily be tracked. When a new number is added, the table called eint in
 1659: pcreposix.c must be updated. */
 1660: 
 1661: enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
 1662:        ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
 1663:        ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
 1664:        ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
 1665:        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
 1666:        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
 1667:        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
 1668:        ERR70, ERR71, ERR72, ERRCOUNT };
 1669: 
 1670: /* The real format of the start of the pcre block; the index of names and the
 1671: code vector run on as long as necessary after the end. We store an explicit
 1672: offset to the name table so that if a regex is compiled on one host, saved, and
 1673: then run on another where the size of pointers is different, all might still
 1674: be well. For the case of compiled-on-4 and run-on-8, we include an extra
 1675: pointer that is always NULL. For future-proofing, a few dummy fields were
 1676: originally included - even though you can never get this planning right - but
 1677: there is only one left now.
 1678: 
 1679: NOTE NOTE NOTE:
 1680: Because people can now save and re-use compiled patterns, any additions to this
 1681: structure should be made at the end, and something earlier (e.g. a new
 1682: flag in the options or one of the dummy fields) should indicate that the new
 1683: fields are present. Currently PCRE always sets the dummy fields to zero.
 1684: NOTE NOTE NOTE
 1685: */
 1686: 
 1687: typedef struct real_pcre {
 1688:   pcre_uint32 magic_number;
 1689:   pcre_uint32 size;               /* Total that was malloced */
 1690:   pcre_uint32 options;            /* Public options */
 1691:   pcre_uint16 flags;              /* Private flags */
 1692:   pcre_uint16 dummy1;             /* For future use */
 1693:   pcre_uint16 top_bracket;
 1694:   pcre_uint16 top_backref;
 1695:   pcre_uint16 first_byte;
 1696:   pcre_uint16 req_byte;
 1697:   pcre_uint16 name_table_offset;  /* Offset to name table that follows */
 1698:   pcre_uint16 name_entry_size;    /* Size of any name items */
 1699:   pcre_uint16 name_count;         /* Number of name items */
 1700:   pcre_uint16 ref_count;          /* Reference count */
 1701: 
 1702:   const unsigned char *tables;    /* Pointer to tables or NULL for std */
 1703:   const unsigned char *nullpad;   /* NULL padding */
 1704: } real_pcre;
 1705: 
 1706: /* The format of the block used to store data from pcre_study(). The same
 1707: remark (see NOTE above) about extending this structure applies. */
 1708: 
 1709: typedef struct pcre_study_data {
 1710:   pcre_uint32 size;               /* Total that was malloced */
 1711:   pcre_uint32 flags;              /* Private flags */
 1712:   uschar start_bits[32];          /* Starting char bits */
 1713:   pcre_uint32 minlength;          /* Minimum subject length */
 1714: } pcre_study_data;
 1715: 
 1716: /* Structure for building a chain of open capturing subpatterns during
 1717: compiling, so that instructions to close them can be compiled when (*ACCEPT) is
 1718: encountered. This is also used to identify subpatterns that contain recursive
 1719: back references to themselves, so that they can be made atomic. */
 1720: 
 1721: typedef struct open_capitem {
 1722:   struct open_capitem *next;    /* Chain link */
 1723:   pcre_uint16 number;           /* Capture number */
 1724:   pcre_uint16 flag;             /* Set TRUE if recursive back ref */
 1725: } open_capitem;
 1726: 
 1727: /* Structure for passing "static" information around between the functions
 1728: doing the compiling, so that they are thread-safe. */
 1729: 
 1730: typedef struct compile_data {
 1731:   const uschar *lcc;            /* Points to lower casing table */
 1732:   const uschar *fcc;            /* Points to case-flipping table */
 1733:   const uschar *cbits;          /* Points to character type table */
 1734:   const uschar *ctypes;         /* Points to table of type maps */
 1735:   const uschar *start_workspace;/* The start of working space */
 1736:   const uschar *start_code;     /* The start of the compiled code */
 1737:   const uschar *start_pattern;  /* The start of the pattern */
 1738:   const uschar *end_pattern;    /* The end of the pattern */
 1739:   open_capitem *open_caps;      /* Chain of open capture items */
 1740:   uschar *hwm;                  /* High watermark of workspace */
 1741:   uschar *name_table;           /* The name/number table */
 1742:   int  names_found;             /* Number of entries so far */
 1743:   int  name_entry_size;         /* Size of each entry */
 1744:   int  workspace_size;          /* Size of workspace */
 1745:   int  bracount;                /* Count of capturing parens as we compile */
 1746:   int  final_bracount;          /* Saved value after first pass */
 1747:   int  top_backref;             /* Maximum back reference */
 1748:   unsigned int backref_map;     /* Bitmap of low back refs */
 1749:   int  assert_depth;            /* Depth of nested assertions */
 1750:   int  external_options;        /* External (initial) options */
 1751:   int  external_flags;          /* External flag bits to be set */
 1752:   int  req_varyopt;             /* "After variable item" flag for reqbyte */
 1753:   BOOL had_accept;              /* (*ACCEPT) encountered */
 1754:   BOOL check_lookbehind;        /* Lookbehinds need later checking */
 1755:   int  nltype;                  /* Newline type */
 1756:   int  nllen;                   /* Newline string length */
 1757:   uschar nl[4];                 /* Newline string when fixed length */
 1758: } compile_data;
 1759: 
 1760: /* Structure for maintaining a chain of pointers to the currently incomplete
 1761: branches, for testing for left recursion while compiling. */
 1762: 
 1763: typedef struct branch_chain {
 1764:   struct branch_chain *outer;
 1765:   uschar *current_branch;
 1766: } branch_chain;
 1767: 
 1768: /* Structure for items in a linked list that represents an explicit recursive
 1769: call within the pattern; used by pcre_exec(). */
 1770: 
 1771: typedef struct recursion_info {
 1772:   struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
 1773:   int group_num;                  /* Number of group that was called */
 1774:   int *offset_save;               /* Pointer to start of saved offsets */
 1775:   int saved_max;                  /* Number of saved offsets */
 1776:   USPTR subject_position;         /* Position at start of recursion */
 1777: } recursion_info;
 1778: 
 1779: /* A similar structure for pcre_dfa_exec(). */
 1780: 
 1781: typedef struct dfa_recursion_info {
 1782:   struct dfa_recursion_info *prevrec;
 1783:   int group_num;
 1784:   USPTR subject_position;
 1785: } dfa_recursion_info;
 1786: 
 1787: /* Structure for building a chain of data for holding the values of the subject
 1788: pointer at the start of each subpattern, so as to detect when an empty string
 1789: has been matched by a subpattern - to break infinite loops; used by
 1790: pcre_exec(). */
 1791: 
 1792: typedef struct eptrblock {
 1793:   struct eptrblock *epb_prev;
 1794:   USPTR epb_saved_eptr;
 1795: } eptrblock;
 1796: 
 1797: 
 1798: /* Structure for passing "static" information around between the functions
 1799: doing traditional NFA matching, so that they are thread-safe. */
 1800: 
 1801: typedef struct match_data {
 1802:   unsigned long int match_call_count;      /* As it says */
 1803:   unsigned long int match_limit;           /* As it says */
 1804:   unsigned long int match_limit_recursion; /* As it says */
 1805:   int   *offset_vector;         /* Offset vector */
 1806:   int    offset_end;            /* One past the end */
 1807:   int    offset_max;            /* The maximum usable for return data */
 1808:   int    nltype;                /* Newline type */
 1809:   int    nllen;                 /* Newline string length */
 1810:   int    name_count;            /* Number of names in name table */
 1811:   int    name_entry_size;       /* Size of entry in names table */
 1812:   uschar *name_table;           /* Table of names */
 1813:   uschar nl[4];                 /* Newline string when fixed */
 1814:   const  uschar *lcc;           /* Points to lower casing table */
 1815:   const  uschar *ctypes;        /* Points to table of type maps */
 1816:   BOOL   offset_overflow;       /* Set if too many extractions */
 1817:   BOOL   notbol;                /* NOTBOL flag */
 1818:   BOOL   noteol;                /* NOTEOL flag */
 1819:   BOOL   utf8;                  /* UTF8 flag */
 1820:   BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
 1821:   BOOL   use_ucp;               /* PCRE_UCP flag */
 1822:   BOOL   endonly;               /* Dollar not before final \n */
 1823:   BOOL   notempty;              /* Empty string match not wanted */
 1824:   BOOL   notempty_atstart;      /* Empty string match at start not wanted */
 1825:   BOOL   hitend;                /* Hit the end of the subject at some point */
 1826:   BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
 1827:   BOOL   hasthen;               /* Pattern contains (*THEN) */
 1828:   BOOL   ignore_skip_arg;       /* For re-run when SKIP name not found */
 1829:   const  uschar *start_code;    /* For use when recursing */
 1830:   USPTR  start_subject;         /* Start of the subject string */
 1831:   USPTR  end_subject;           /* End of the subject string */
 1832:   USPTR  start_match_ptr;       /* Start of matched string */
 1833:   USPTR  end_match_ptr;         /* Subject position at end match */
 1834:   USPTR  start_used_ptr;        /* Earliest consulted character */
 1835:   int    partial;               /* PARTIAL options */
 1836:   int    end_offset_top;        /* Highwater mark at end of match */
 1837:   int    capture_last;          /* Most recent capture number */
 1838:   int    start_offset;          /* The start offset value */
 1839:   int    match_function_type;   /* Set for certain special calls of MATCH() */
 1840:   eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
 1841:   int    eptrn;                 /* Next free eptrblock */
 1842:   recursion_info *recursive;    /* Linked list of recursion data */
 1843:   void  *callout_data;          /* To pass back to callouts */
 1844:   const  uschar *mark;          /* Mark pointer to pass back on success */
 1845:   const  uschar *nomatch_mark;  /* Mark pointer to pass back on failure */
 1846:   const  uschar *once_target;   /* Where to back up to for atomic groups */
 1847: } match_data;
 1848: 
 1849: /* A similar structure is used for the same purpose by the DFA matching
 1850: functions. */
 1851: 
 1852: typedef struct dfa_match_data {
 1853:   const uschar *start_code;      /* Start of the compiled pattern */
 1854:   const uschar *start_subject;   /* Start of the subject string */
 1855:   const uschar *end_subject;     /* End of subject string */
 1856:   const uschar *start_used_ptr;  /* Earliest consulted character */
 1857:   const uschar *tables;          /* Character tables */
 1858:   int   start_offset;            /* The start offset value */
 1859:   int   moptions;                /* Match options */
 1860:   int   poptions;                /* Pattern options */
 1861:   int    nltype;                 /* Newline type */
 1862:   int    nllen;                  /* Newline string length */
 1863:   uschar nl[4];                  /* Newline string when fixed */
 1864:   void  *callout_data;           /* To pass back to callouts */
 1865:   dfa_recursion_info *recursive; /* Linked list of recursion data */
 1866: } dfa_match_data;
 1867: 
 1868: /* Bit definitions for entries in the pcre_ctypes table. */
 1869: 
 1870: #define ctype_space   0x01
 1871: #define ctype_letter  0x02
 1872: #define ctype_digit   0x04
 1873: #define ctype_xdigit  0x08
 1874: #define ctype_word    0x10   /* alphanumeric or '_' */
 1875: #define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
 1876: 
 1877: /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
 1878: of bits for a class map. Some classes are built by combining these tables. */
 1879: 
 1880: #define cbit_space     0      /* [:space:] or \s */
 1881: #define cbit_xdigit   32      /* [:xdigit:] */
 1882: #define cbit_digit    64      /* [:digit:] or \d */
 1883: #define cbit_upper    96      /* [:upper:] */
 1884: #define cbit_lower   128      /* [:lower:] */
 1885: #define cbit_word    160      /* [:word:] or \w */
 1886: #define cbit_graph   192      /* [:graph:] */
 1887: #define cbit_print   224      /* [:print:] */
 1888: #define cbit_punct   256      /* [:punct:] */
 1889: #define cbit_cntrl   288      /* [:cntrl:] */
 1890: #define cbit_length  320      /* Length of the cbits table */
 1891: 
 1892: /* Offsets of the various tables from the base tables pointer, and
 1893: total length. */
 1894: 
 1895: #define lcc_offset      0
 1896: #define fcc_offset    256
 1897: #define cbits_offset  512
 1898: #define ctypes_offset (cbits_offset + cbit_length)
 1899: #define tables_length (ctypes_offset + 256)
 1900: 
 1901: /* Layout of the UCP type table that translates property names into types and
 1902: codes. Each entry used to point directly to a name, but to reduce the number of
 1903: relocations in shared libraries, it now has an offset into a single string
 1904: instead. */
 1905: 
 1906: typedef struct {
 1907:   pcre_uint16 name_offset;
 1908:   pcre_uint16 type;
 1909:   pcre_uint16 value;
 1910: } ucp_type_table;
 1911: 
 1912: 
 1913: /* Internal shared data tables. These are tables that are used by more than one
 1914: of the exported public functions. They have to be "external" in the C sense,
 1915: but are not part of the PCRE public API. The data for these tables is in the
 1916: pcre_tables.c module. */
 1917: 
 1918: extern const int    _pcre_utf8_table1[];
 1919: extern const int    _pcre_utf8_table2[];
 1920: extern const int    _pcre_utf8_table3[];
 1921: extern const uschar _pcre_utf8_table4[];
 1922: 
 1923: #ifdef SUPPORT_JIT
 1924: extern const uschar _pcre_utf8_char_sizes[];
 1925: #endif
 1926: 
 1927: extern const int    _pcre_utf8_table1_size;
 1928: 
 1929: extern const char   _pcre_utt_names[];
 1930: extern const ucp_type_table _pcre_utt[];
 1931: extern const int _pcre_utt_size;
 1932: 
 1933: extern const uschar _pcre_default_tables[];
 1934: 
 1935: extern const uschar _pcre_OP_lengths[];
 1936: 
 1937: 
 1938: /* Internal shared functions. These are functions that are used by more than
 1939: one of the exported public functions. They have to be "external" in the C
 1940: sense, but are not part of the PCRE public API. */
 1941: 
 1942: extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
 1943: extern BOOL          _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
 1944: extern int           _pcre_ord2utf8(int, uschar *);
 1945: extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,
 1946:                        const pcre_study_data *, pcre_study_data *);
 1947: extern int           _pcre_valid_utf8(USPTR, int, int *);
 1948: extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
 1949: extern BOOL          _pcre_xclass(int, const uschar *);
 1950: 
 1951: #ifdef SUPPORT_JIT
 1952: extern void          _pcre_jit_compile(const real_pcre *, pcre_extra *);
 1953: extern int           _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR,
 1954:                         int, int, int, int, int *, int);
 1955: extern void          _pcre_jit_free(void *);
 1956: extern int           _pcre_jit_get_size(void *);
 1957: #endif
 1958: 
 1959: /* Unicode character database (UCD) */
 1960: 
 1961: typedef struct {
 1962:   uschar script;
 1963:   uschar chartype;
 1964:   pcre_int32 other_case;
 1965: } ucd_record;
 1966: 
 1967: extern const ucd_record  _pcre_ucd_records[];
 1968: extern const uschar      _pcre_ucd_stage1[];
 1969: extern const pcre_uint16 _pcre_ucd_stage2[];
 1970: extern const int         _pcre_ucp_gentype[];
 1971: #ifdef SUPPORT_JIT
 1972: extern const int         _pcre_ucp_typerange[];
 1973: #endif
 1974: 
 1975: /* UCD access macros */
 1976: 
 1977: #define UCD_BLOCK_SIZE 128
 1978: #define GET_UCD(ch) (_pcre_ucd_records + \
 1979:         _pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
 1980:         UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
 1981: 
 1982: #define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
 1983: #define UCD_SCRIPT(ch)    GET_UCD(ch)->script
 1984: #define UCD_CATEGORY(ch)  _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
 1985: #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
 1986: 
 1987: #endif
 1988: 
 1989: /* End of pcre_internal.h */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>