Annotation of embedaddon/pcre/pcre_internal.h, revision 1.1
1.1 ! misho 1: /*************************************************
! 2: * Perl-Compatible Regular Expressions *
! 3: *************************************************/
! 4:
! 5:
! 6: /* PCRE is a library of functions to support regular expressions whose syntax
! 7: and semantics are as close as possible to those of the Perl 5 language.
! 8:
! 9: Written by Philip Hazel
! 10: Copyright (c) 1997-2011 University of Cambridge
! 11:
! 12: -----------------------------------------------------------------------------
! 13: Redistribution and use in source and binary forms, with or without
! 14: modification, are permitted provided that the following conditions are met:
! 15:
! 16: * Redistributions of source code must retain the above copyright notice,
! 17: this list of conditions and the following disclaimer.
! 18:
! 19: * Redistributions in binary form must reproduce the above copyright
! 20: notice, this list of conditions and the following disclaimer in the
! 21: documentation and/or other materials provided with the distribution.
! 22:
! 23: * Neither the name of the University of Cambridge nor the names of its
! 24: contributors may be used to endorse or promote products derived from
! 25: this software without specific prior written permission.
! 26:
! 27: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
! 28: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 29: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 30: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
! 31: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 32: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 33: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 34: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 35: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 36: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 37: POSSIBILITY OF SUCH DAMAGE.
! 38: -----------------------------------------------------------------------------
! 39: */
! 40:
! 41: /* This header contains definitions that are shared between the different
! 42: modules, but which are not relevant to the exported API. This includes some
! 43: functions whose names all begin with "_pcre_". */
! 44:
! 45: #ifndef PCRE_INTERNAL_H
! 46: #define PCRE_INTERNAL_H
! 47:
! 48: /* Define PCRE_DEBUG to get debugging output on stdout. */
! 49:
! 50: #if 0
! 51: #define PCRE_DEBUG
! 52: #endif
! 53:
! 54: /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
! 55: script prevents both being selected, but not everybody uses "configure". */
! 56:
! 57: #if defined EBCDIC && defined SUPPORT_UTF8
! 58: #error The use of both EBCDIC and SUPPORT_UTF8 is not supported.
! 59: #endif
! 60:
! 61: /* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
! 62: "configure" script ensures this, but not everybody uses "configure". */
! 63:
! 64: #if defined SUPPORT_UCP && !defined SUPPORT_UTF8
! 65: #define SUPPORT_UTF8 1
! 66: #endif
! 67:
! 68: /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
! 69: inline, and there are *still* stupid compilers about that don't like indented
! 70: pre-processor statements, or at least there were when I first wrote this. After
! 71: all, it had only been about 10 years then...
! 72:
! 73: It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
! 74: be absolutely sure we get our version. */
! 75:
! 76: #undef DPRINTF
! 77: #ifdef PCRE_DEBUG
! 78: #define DPRINTF(p) printf p
! 79: #else
! 80: #define DPRINTF(p) /* Nothing */
! 81: #endif
! 82:
! 83:
! 84: /* Standard C headers plus the external interface definition. The only time
! 85: setjmp and stdarg are used is when NO_RECURSE is set. */
! 86:
! 87: #include <ctype.h>
! 88: #include <limits.h>
! 89: #include <stddef.h>
! 90: #include <stdio.h>
! 91: #include <stdlib.h>
! 92: #include <string.h>
! 93:
! 94: /* When compiling a DLL for Windows, the exported symbols have to be declared
! 95: using some MS magic. I found some useful information on this web page:
! 96: http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
! 97: information there, using __declspec(dllexport) without "extern" we have a
! 98: definition; with "extern" we have a declaration. The settings here override the
! 99: setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
! 100: which is all that is needed for applications (they just import the symbols). We
! 101: use:
! 102:
! 103: PCRE_EXP_DECL for declarations
! 104: PCRE_EXP_DEFN for definitions of exported functions
! 105: PCRE_EXP_DATA_DEFN for definitions of exported variables
! 106:
! 107: The reason for the two DEFN macros is that in non-Windows environments, one
! 108: does not want to have "extern" before variable definitions because it leads to
! 109: compiler warnings. So we distinguish between functions and variables. In
! 110: Windows, the two should always be the same.
! 111:
! 112: The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
! 113: which is an application, but needs to import this file in order to "peek" at
! 114: internals, can #include pcre.h first to get an application's-eye view.
! 115:
! 116: In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
! 117: special-purpose environments) might want to stick other stuff in front of
! 118: exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
! 119: PCRE_EXP_DATA_DEFN only if they are not already set. */
! 120:
! 121: #ifndef PCRE_EXP_DECL
! 122: # ifdef _WIN32
! 123: # ifndef PCRE_STATIC
! 124: # define PCRE_EXP_DECL extern __declspec(dllexport)
! 125: # define PCRE_EXP_DEFN __declspec(dllexport)
! 126: # define PCRE_EXP_DATA_DEFN __declspec(dllexport)
! 127: # else
! 128: # define PCRE_EXP_DECL extern
! 129: # define PCRE_EXP_DEFN
! 130: # define PCRE_EXP_DATA_DEFN
! 131: # endif
! 132: # else
! 133: # ifdef __cplusplus
! 134: # define PCRE_EXP_DECL extern "C"
! 135: # else
! 136: # define PCRE_EXP_DECL extern
! 137: # endif
! 138: # ifndef PCRE_EXP_DEFN
! 139: # define PCRE_EXP_DEFN PCRE_EXP_DECL
! 140: # endif
! 141: # ifndef PCRE_EXP_DATA_DEFN
! 142: # define PCRE_EXP_DATA_DEFN
! 143: # endif
! 144: # endif
! 145: #endif
! 146:
! 147: /* When compiling with the MSVC compiler, it is sometimes necessary to include
! 148: a "calling convention" before exported function names. (This is secondhand
! 149: information; I know nothing about MSVC myself). For example, something like
! 150:
! 151: void __cdecl function(....)
! 152:
! 153: might be needed. In order so make this easy, all the exported functions have
! 154: PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
! 155: set, we ensure here that it has no effect. */
! 156:
! 157: #ifndef PCRE_CALL_CONVENTION
! 158: #define PCRE_CALL_CONVENTION
! 159: #endif
! 160:
! 161: /* We need to have types that specify unsigned 16-bit and 32-bit integers. We
! 162: cannot determine these outside the compilation (e.g. by running a program as
! 163: part of "configure") because PCRE is often cross-compiled for use on other
! 164: systems. Instead we make use of the maximum sizes that are available at
! 165: preprocessor time in standard C environments. */
! 166:
! 167: #if USHRT_MAX == 65535
! 168: typedef unsigned short pcre_uint16;
! 169: typedef short pcre_int16;
! 170: #elif UINT_MAX == 65535
! 171: typedef unsigned int pcre_uint16;
! 172: typedef int pcre_int16;
! 173: #else
! 174: #error Cannot determine a type for 16-bit unsigned integers
! 175: #endif
! 176:
! 177: #if UINT_MAX == 4294967295
! 178: typedef unsigned int pcre_uint32;
! 179: typedef int pcre_int32;
! 180: #elif ULONG_MAX == 4294967295
! 181: typedef unsigned long int pcre_uint32;
! 182: typedef long int pcre_int32;
! 183: #else
! 184: #error Cannot determine a type for 32-bit unsigned integers
! 185: #endif
! 186:
! 187: /* When checking for integer overflow in pcre_compile(), we need to handle
! 188: large integers. If a 64-bit integer type is available, we can use that.
! 189: Otherwise we have to cast to double, which of course requires floating point
! 190: arithmetic. Handle this by defining a macro for the appropriate type. If
! 191: stdint.h is available, include it; it may define INT64_MAX. Systems that do not
! 192: have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
! 193: by "configure". */
! 194:
! 195: #if HAVE_STDINT_H
! 196: #include <stdint.h>
! 197: #elif HAVE_INTTYPES_H
! 198: #include <inttypes.h>
! 199: #endif
! 200:
! 201: #if defined INT64_MAX || defined int64_t
! 202: #define INT64_OR_DOUBLE int64_t
! 203: #else
! 204: #define INT64_OR_DOUBLE double
! 205: #endif
! 206:
! 207: /* All character handling must be done as unsigned characters. Otherwise there
! 208: are problems with top-bit-set characters and functions such as isspace().
! 209: However, we leave the interface to the outside world as char *, because that
! 210: should make things easier for callers. We define a short type for unsigned char
! 211: to save lots of typing. I tried "uchar", but it causes problems on Digital
! 212: Unix, where it is defined in sys/types, so use "uschar" instead. */
! 213:
! 214: typedef unsigned char uschar;
! 215:
! 216: /* This is an unsigned int value that no character can ever have. UTF-8
! 217: characters only go up to 0x7fffffff (though Unicode doesn't go beyond
! 218: 0x0010ffff). */
! 219:
! 220: #define NOTACHAR 0xffffffff
! 221:
! 222: /* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
! 223: "any" and "anycrlf" at present). The following macros are used to package up
! 224: testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
! 225: modules to indicate in which datablock the parameters exist, and what the
! 226: start/end of string field names are. */
! 227:
! 228: #define NLTYPE_FIXED 0 /* Newline is a fixed length string */
! 229: #define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
! 230: #define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
! 231:
! 232: /* This macro checks for a newline at the given position */
! 233:
! 234: #define IS_NEWLINE(p) \
! 235: ((NLBLOCK->nltype != NLTYPE_FIXED)? \
! 236: ((p) < NLBLOCK->PSEND && \
! 237: _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
! 238: utf8)) \
! 239: : \
! 240: ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
! 241: (p)[0] == NLBLOCK->nl[0] && \
! 242: (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
! 243: ) \
! 244: )
! 245:
! 246: /* This macro checks for a newline immediately preceding the given position */
! 247:
! 248: #define WAS_NEWLINE(p) \
! 249: ((NLBLOCK->nltype != NLTYPE_FIXED)? \
! 250: ((p) > NLBLOCK->PSSTART && \
! 251: _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
! 252: &(NLBLOCK->nllen), utf8)) \
! 253: : \
! 254: ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
! 255: (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
! 256: (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
! 257: ) \
! 258: )
! 259:
! 260: /* When PCRE is compiled as a C++ library, the subject pointer can be replaced
! 261: with a custom type. This makes it possible, for example, to allow pcre_exec()
! 262: to process subject strings that are discontinuous by using a smart pointer
! 263: class. It must always be possible to inspect all of the subject string in
! 264: pcre_exec() because of the way it backtracks. Two macros are required in the
! 265: normal case, for sign-unspecified and unsigned char pointers. The former is
! 266: used for the external interface and appears in pcre.h, which is why its name
! 267: must begin with PCRE_. */
! 268:
! 269: #ifdef CUSTOM_SUBJECT_PTR
! 270: #define PCRE_SPTR CUSTOM_SUBJECT_PTR
! 271: #define USPTR CUSTOM_SUBJECT_PTR
! 272: #else
! 273: #define PCRE_SPTR const char *
! 274: #define USPTR const unsigned char *
! 275: #endif
! 276:
! 277:
! 278:
! 279: /* Include the public PCRE header and the definitions of UCP character property
! 280: values. */
! 281:
! 282: #include "pcre.h"
! 283: #include "ucp.h"
! 284:
! 285: /* When compiling for use with the Virtual Pascal compiler, these functions
! 286: need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
! 287: option on the command line. */
! 288:
! 289: #ifdef VPCOMPAT
! 290: #define strlen(s) _strlen(s)
! 291: #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
! 292: #define memcmp(s,c,n) _memcmp(s,c,n)
! 293: #define memcpy(d,s,n) _memcpy(d,s,n)
! 294: #define memmove(d,s,n) _memmove(d,s,n)
! 295: #define memset(s,c,n) _memset(s,c,n)
! 296: #else /* VPCOMPAT */
! 297:
! 298: /* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
! 299: define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
! 300: is set. Otherwise, include an emulating function for those systems that have
! 301: neither (there some non-Unix environments where this is the case). */
! 302:
! 303: #ifndef HAVE_MEMMOVE
! 304: #undef memmove /* some systems may have a macro */
! 305: #ifdef HAVE_BCOPY
! 306: #define memmove(a, b, c) bcopy(b, a, c)
! 307: #else /* HAVE_BCOPY */
! 308: static void *
! 309: pcre_memmove(void *d, const void *s, size_t n)
! 310: {
! 311: size_t i;
! 312: unsigned char *dest = (unsigned char *)d;
! 313: const unsigned char *src = (const unsigned char *)s;
! 314: if (dest > src)
! 315: {
! 316: dest += n;
! 317: src += n;
! 318: for (i = 0; i < n; ++i) *(--dest) = *(--src);
! 319: return (void *)dest;
! 320: }
! 321: else
! 322: {
! 323: for (i = 0; i < n; ++i) *dest++ = *src++;
! 324: return (void *)(dest - n);
! 325: }
! 326: }
! 327: #define memmove(a, b, c) pcre_memmove(a, b, c)
! 328: #endif /* not HAVE_BCOPY */
! 329: #endif /* not HAVE_MEMMOVE */
! 330: #endif /* not VPCOMPAT */
! 331:
! 332:
! 333: /* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
! 334: in big-endian order) by default. These are used, for example, to link from the
! 335: start of a subpattern to its alternatives and its end. The use of 2 bytes per
! 336: offset limits the size of the compiled regex to around 64K, which is big enough
! 337: for almost everybody. However, I received a request for an even bigger limit.
! 338: For this reason, and also to make the code easier to maintain, the storing and
! 339: loading of offsets from the byte string is now handled by the macros that are
! 340: defined here.
! 341:
! 342: The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
! 343: the config.h file, but can be overridden by using -D on the command line. This
! 344: is automated on Unix systems via the "configure" command. */
! 345:
! 346: #if LINK_SIZE == 2
! 347:
! 348: #define PUT(a,n,d) \
! 349: (a[n] = (d) >> 8), \
! 350: (a[(n)+1] = (d) & 255)
! 351:
! 352: #define GET(a,n) \
! 353: (((a)[n] << 8) | (a)[(n)+1])
! 354:
! 355: #define MAX_PATTERN_SIZE (1 << 16)
! 356:
! 357:
! 358: #elif LINK_SIZE == 3
! 359:
! 360: #define PUT(a,n,d) \
! 361: (a[n] = (d) >> 16), \
! 362: (a[(n)+1] = (d) >> 8), \
! 363: (a[(n)+2] = (d) & 255)
! 364:
! 365: #define GET(a,n) \
! 366: (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
! 367:
! 368: #define MAX_PATTERN_SIZE (1 << 24)
! 369:
! 370:
! 371: #elif LINK_SIZE == 4
! 372:
! 373: #define PUT(a,n,d) \
! 374: (a[n] = (d) >> 24), \
! 375: (a[(n)+1] = (d) >> 16), \
! 376: (a[(n)+2] = (d) >> 8), \
! 377: (a[(n)+3] = (d) & 255)
! 378:
! 379: #define GET(a,n) \
! 380: (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
! 381:
! 382: #define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
! 383:
! 384:
! 385: #else
! 386: #error LINK_SIZE must be either 2, 3, or 4
! 387: #endif
! 388:
! 389:
! 390: /* Convenience macro defined in terms of the others */
! 391:
! 392: #define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
! 393:
! 394:
! 395: /* PCRE uses some other 2-byte quantities that do not change when the size of
! 396: offsets changes. There are used for repeat counts and for other things such as
! 397: capturing parenthesis numbers in back references. */
! 398:
! 399: #define PUT2(a,n,d) \
! 400: a[n] = (d) >> 8; \
! 401: a[(n)+1] = (d) & 255
! 402:
! 403: #define GET2(a,n) \
! 404: (((a)[n] << 8) | (a)[(n)+1])
! 405:
! 406: #define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
! 407:
! 408:
! 409: /* When UTF-8 encoding is being used, a character is no longer just a single
! 410: byte. The macros for character handling generate simple sequences when used in
! 411: byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
! 412: not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
! 413: never be called in byte mode. To make sure they can never even appear when
! 414: UTF-8 support is omitted, we don't even define them. */
! 415:
! 416: #ifndef SUPPORT_UTF8
! 417: #define GETCHAR(c, eptr) c = *eptr;
! 418: #define GETCHARTEST(c, eptr) c = *eptr;
! 419: #define GETCHARINC(c, eptr) c = *eptr++;
! 420: #define GETCHARINCTEST(c, eptr) c = *eptr++;
! 421: #define GETCHARLEN(c, eptr, len) c = *eptr;
! 422: /* #define GETCHARLENTEST(c, eptr, len) */
! 423: /* #define BACKCHAR(eptr) */
! 424:
! 425: #else /* SUPPORT_UTF8 */
! 426:
! 427: /* These macros were originally written in the form of loops that used data
! 428: from the tables whose names start with _pcre_utf8_table. They were rewritten by
! 429: a user so as not to use loops, because in some environments this gives a
! 430: significant performance advantage, and it seems never to do any harm. */
! 431:
! 432: /* Base macro to pick up the remaining bytes of a UTF-8 character, not
! 433: advancing the pointer. */
! 434:
! 435: #define GETUTF8(c, eptr) \
! 436: { \
! 437: if ((c & 0x20) == 0) \
! 438: c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
! 439: else if ((c & 0x10) == 0) \
! 440: c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
! 441: else if ((c & 0x08) == 0) \
! 442: c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
! 443: ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
! 444: else if ((c & 0x04) == 0) \
! 445: c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
! 446: ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
! 447: (eptr[4] & 0x3f); \
! 448: else \
! 449: c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
! 450: ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
! 451: ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
! 452: }
! 453:
! 454: /* Get the next UTF-8 character, not advancing the pointer. This is called when
! 455: we know we are in UTF-8 mode. */
! 456:
! 457: #define GETCHAR(c, eptr) \
! 458: c = *eptr; \
! 459: if (c >= 0xc0) GETUTF8(c, eptr);
! 460:
! 461: /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
! 462: pointer. */
! 463:
! 464: #define GETCHARTEST(c, eptr) \
! 465: c = *eptr; \
! 466: if (utf8 && c >= 0xc0) GETUTF8(c, eptr);
! 467:
! 468: /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
! 469: the pointer. */
! 470:
! 471: #define GETUTF8INC(c, eptr) \
! 472: { \
! 473: if ((c & 0x20) == 0) \
! 474: c = ((c & 0x1f) << 6) | (*eptr++ & 0x3f); \
! 475: else if ((c & 0x10) == 0) \
! 476: { \
! 477: c = ((c & 0x0f) << 12) | ((*eptr & 0x3f) << 6) | (eptr[1] & 0x3f); \
! 478: eptr += 2; \
! 479: } \
! 480: else if ((c & 0x08) == 0) \
! 481: { \
! 482: c = ((c & 0x07) << 18) | ((*eptr & 0x3f) << 12) | \
! 483: ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
! 484: eptr += 3; \
! 485: } \
! 486: else if ((c & 0x04) == 0) \
! 487: { \
! 488: c = ((c & 0x03) << 24) | ((*eptr & 0x3f) << 18) | \
! 489: ((eptr[1] & 0x3f) << 12) | ((eptr[2] & 0x3f) << 6) | \
! 490: (eptr[3] & 0x3f); \
! 491: eptr += 4; \
! 492: } \
! 493: else \
! 494: { \
! 495: c = ((c & 0x01) << 30) | ((*eptr & 0x3f) << 24) | \
! 496: ((eptr[1] & 0x3f) << 18) | ((eptr[2] & 0x3f) << 12) | \
! 497: ((eptr[3] & 0x3f) << 6) | (eptr[4] & 0x3f); \
! 498: eptr += 5; \
! 499: } \
! 500: }
! 501:
! 502: /* Get the next UTF-8 character, advancing the pointer. This is called when we
! 503: know we are in UTF-8 mode. */
! 504:
! 505: #define GETCHARINC(c, eptr) \
! 506: c = *eptr++; \
! 507: if (c >= 0xc0) GETUTF8INC(c, eptr);
! 508:
! 509: /* Get the next character, testing for UTF-8 mode, and advancing the pointer.
! 510: This is called when we don't know if we are in UTF-8 mode. */
! 511:
! 512: #define GETCHARINCTEST(c, eptr) \
! 513: c = *eptr++; \
! 514: if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr);
! 515:
! 516: /* Base macro to pick up the remaining bytes of a UTF-8 character, not
! 517: advancing the pointer, incrementing the length. */
! 518:
! 519: #define GETUTF8LEN(c, eptr, len) \
! 520: { \
! 521: if ((c & 0x20) == 0) \
! 522: { \
! 523: c = ((c & 0x1f) << 6) | (eptr[1] & 0x3f); \
! 524: len++; \
! 525: } \
! 526: else if ((c & 0x10) == 0) \
! 527: { \
! 528: c = ((c & 0x0f) << 12) | ((eptr[1] & 0x3f) << 6) | (eptr[2] & 0x3f); \
! 529: len += 2; \
! 530: } \
! 531: else if ((c & 0x08) == 0) \
! 532: {\
! 533: c = ((c & 0x07) << 18) | ((eptr[1] & 0x3f) << 12) | \
! 534: ((eptr[2] & 0x3f) << 6) | (eptr[3] & 0x3f); \
! 535: len += 3; \
! 536: } \
! 537: else if ((c & 0x04) == 0) \
! 538: { \
! 539: c = ((c & 0x03) << 24) | ((eptr[1] & 0x3f) << 18) | \
! 540: ((eptr[2] & 0x3f) << 12) | ((eptr[3] & 0x3f) << 6) | \
! 541: (eptr[4] & 0x3f); \
! 542: len += 4; \
! 543: } \
! 544: else \
! 545: {\
! 546: c = ((c & 0x01) << 30) | ((eptr[1] & 0x3f) << 24) | \
! 547: ((eptr[2] & 0x3f) << 18) | ((eptr[3] & 0x3f) << 12) | \
! 548: ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
! 549: len += 5; \
! 550: } \
! 551: }
! 552:
! 553: /* Get the next UTF-8 character, not advancing the pointer, incrementing length
! 554: if there are extra bytes. This is called when we know we are in UTF-8 mode. */
! 555:
! 556: #define GETCHARLEN(c, eptr, len) \
! 557: c = *eptr; \
! 558: if (c >= 0xc0) GETUTF8LEN(c, eptr, len);
! 559:
! 560: /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
! 561: pointer, incrementing length if there are extra bytes. This is called when we
! 562: do not know if we are in UTF-8 mode. */
! 563:
! 564: #define GETCHARLENTEST(c, eptr, len) \
! 565: c = *eptr; \
! 566: if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len);
! 567:
! 568: /* If the pointer is not at the start of a character, move it back until
! 569: it is. This is called only in UTF-8 mode - we don't put a test within the macro
! 570: because almost all calls are already within a block of UTF-8 only code. */
! 571:
! 572: #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
! 573:
! 574: #endif /* SUPPORT_UTF8 */
! 575:
! 576:
! 577: /* In case there is no definition of offsetof() provided - though any proper
! 578: Standard C system should have one. */
! 579:
! 580: #ifndef offsetof
! 581: #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
! 582: #endif
! 583:
! 584:
! 585: /* Private flags containing information about the compiled regex. They used to
! 586: live at the top end of the options word, but that got almost full, so now they
! 587: are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
! 588: the restrictions on partial matching have been lifted. It remains for backwards
! 589: compatibility. */
! 590:
! 591: #define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
! 592: #define PCRE_FIRSTSET 0x0002 /* first_byte is set */
! 593: #define PCRE_REQCHSET 0x0004 /* req_byte is set */
! 594: #define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
! 595: #define PCRE_JCHANGED 0x0010 /* j option used in regex */
! 596: #define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
! 597: #define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */
! 598:
! 599: /* Flags for the "extra" block produced by pcre_study(). */
! 600:
! 601: #define PCRE_STUDY_MAPPED 0x0001 /* a map of starting chars exists */
! 602: #define PCRE_STUDY_MINLEN 0x0002 /* a minimum length field exists */
! 603:
! 604: /* Masks for identifying the public options that are permitted at compile
! 605: time, run time, or study time, respectively. */
! 606:
! 607: #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
! 608: PCRE_NEWLINE_ANYCRLF)
! 609:
! 610: #define PUBLIC_COMPILE_OPTIONS \
! 611: (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
! 612: PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
! 613: PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
! 614: PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
! 615: PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
! 616:
! 617: #define PUBLIC_EXEC_OPTIONS \
! 618: (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
! 619: PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
! 620: PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
! 621:
! 622: #define PUBLIC_DFA_EXEC_OPTIONS \
! 623: (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
! 624: PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
! 625: PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
! 626: PCRE_NO_START_OPTIMIZE)
! 627:
! 628: #define PUBLIC_STUDY_OPTIONS \
! 629: PCRE_STUDY_JIT_COMPILE
! 630:
! 631: /* Magic number to provide a small check against being handed junk. Also used
! 632: to detect whether a pattern was compiled on a host of different endianness. */
! 633:
! 634: #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
! 635:
! 636: /* Negative values for the firstchar and reqchar variables */
! 637:
! 638: #define REQ_UNSET (-2)
! 639: #define REQ_NONE (-1)
! 640:
! 641: /* The maximum remaining length of subject we are prepared to search for a
! 642: req_byte match. */
! 643:
! 644: #define REQ_BYTE_MAX 1000
! 645:
! 646: /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
! 647: variable-length repeat, or a anything other than literal characters. */
! 648:
! 649: #define REQ_CASELESS 0x0100 /* indicates caselessness */
! 650: #define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
! 651:
! 652: /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
! 653: environments where these macros are defined elsewhere. Unfortunately, there
! 654: is no way to do the same for the typedef. */
! 655:
! 656: typedef int BOOL;
! 657:
! 658: #ifndef FALSE
! 659: #define FALSE 0
! 660: #define TRUE 1
! 661: #endif
! 662:
! 663: /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
! 664: character constants like '*' because the compiler would emit their EBCDIC code,
! 665: which is different from their ASCII/UTF-8 code. Instead we define macros for
! 666: the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
! 667: is enabled. When UTF-8 support is not enabled, the definitions use character
! 668: literals. Both character and string versions of each character are needed, and
! 669: there are some longer strings as well.
! 670:
! 671: This means that, on EBCDIC platforms, the PCRE library can handle either
! 672: EBCDIC, or UTF-8, but not both. To support both in the same compiled library
! 673: would need different lookups depending on whether PCRE_UTF8 was set or not.
! 674: This would make it impossible to use characters in switch/case statements,
! 675: which would reduce performance. For a theoretical use (which nobody has asked
! 676: for) in a minority area (EBCDIC platforms), this is not sensible. Any
! 677: application that did need both could compile two versions of the library, using
! 678: macros to give the functions distinct names. */
! 679:
! 680: #ifndef SUPPORT_UTF8
! 681:
! 682: /* UTF-8 support is not enabled; use the platform-dependent character literals
! 683: so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
! 684:
! 685: #define CHAR_HT '\t'
! 686: #define CHAR_VT '\v'
! 687: #define CHAR_FF '\f'
! 688: #define CHAR_CR '\r'
! 689: #define CHAR_NL '\n'
! 690: #define CHAR_BS '\b'
! 691: #define CHAR_BEL '\a'
! 692: #ifdef EBCDIC
! 693: #define CHAR_ESC '\047'
! 694: #define CHAR_DEL '\007'
! 695: #else
! 696: #define CHAR_ESC '\033'
! 697: #define CHAR_DEL '\177'
! 698: #endif
! 699:
! 700: #define CHAR_SPACE ' '
! 701: #define CHAR_EXCLAMATION_MARK '!'
! 702: #define CHAR_QUOTATION_MARK '"'
! 703: #define CHAR_NUMBER_SIGN '#'
! 704: #define CHAR_DOLLAR_SIGN '$'
! 705: #define CHAR_PERCENT_SIGN '%'
! 706: #define CHAR_AMPERSAND '&'
! 707: #define CHAR_APOSTROPHE '\''
! 708: #define CHAR_LEFT_PARENTHESIS '('
! 709: #define CHAR_RIGHT_PARENTHESIS ')'
! 710: #define CHAR_ASTERISK '*'
! 711: #define CHAR_PLUS '+'
! 712: #define CHAR_COMMA ','
! 713: #define CHAR_MINUS '-'
! 714: #define CHAR_DOT '.'
! 715: #define CHAR_SLASH '/'
! 716: #define CHAR_0 '0'
! 717: #define CHAR_1 '1'
! 718: #define CHAR_2 '2'
! 719: #define CHAR_3 '3'
! 720: #define CHAR_4 '4'
! 721: #define CHAR_5 '5'
! 722: #define CHAR_6 '6'
! 723: #define CHAR_7 '7'
! 724: #define CHAR_8 '8'
! 725: #define CHAR_9 '9'
! 726: #define CHAR_COLON ':'
! 727: #define CHAR_SEMICOLON ';'
! 728: #define CHAR_LESS_THAN_SIGN '<'
! 729: #define CHAR_EQUALS_SIGN '='
! 730: #define CHAR_GREATER_THAN_SIGN '>'
! 731: #define CHAR_QUESTION_MARK '?'
! 732: #define CHAR_COMMERCIAL_AT '@'
! 733: #define CHAR_A 'A'
! 734: #define CHAR_B 'B'
! 735: #define CHAR_C 'C'
! 736: #define CHAR_D 'D'
! 737: #define CHAR_E 'E'
! 738: #define CHAR_F 'F'
! 739: #define CHAR_G 'G'
! 740: #define CHAR_H 'H'
! 741: #define CHAR_I 'I'
! 742: #define CHAR_J 'J'
! 743: #define CHAR_K 'K'
! 744: #define CHAR_L 'L'
! 745: #define CHAR_M 'M'
! 746: #define CHAR_N 'N'
! 747: #define CHAR_O 'O'
! 748: #define CHAR_P 'P'
! 749: #define CHAR_Q 'Q'
! 750: #define CHAR_R 'R'
! 751: #define CHAR_S 'S'
! 752: #define CHAR_T 'T'
! 753: #define CHAR_U 'U'
! 754: #define CHAR_V 'V'
! 755: #define CHAR_W 'W'
! 756: #define CHAR_X 'X'
! 757: #define CHAR_Y 'Y'
! 758: #define CHAR_Z 'Z'
! 759: #define CHAR_LEFT_SQUARE_BRACKET '['
! 760: #define CHAR_BACKSLASH '\\'
! 761: #define CHAR_RIGHT_SQUARE_BRACKET ']'
! 762: #define CHAR_CIRCUMFLEX_ACCENT '^'
! 763: #define CHAR_UNDERSCORE '_'
! 764: #define CHAR_GRAVE_ACCENT '`'
! 765: #define CHAR_a 'a'
! 766: #define CHAR_b 'b'
! 767: #define CHAR_c 'c'
! 768: #define CHAR_d 'd'
! 769: #define CHAR_e 'e'
! 770: #define CHAR_f 'f'
! 771: #define CHAR_g 'g'
! 772: #define CHAR_h 'h'
! 773: #define CHAR_i 'i'
! 774: #define CHAR_j 'j'
! 775: #define CHAR_k 'k'
! 776: #define CHAR_l 'l'
! 777: #define CHAR_m 'm'
! 778: #define CHAR_n 'n'
! 779: #define CHAR_o 'o'
! 780: #define CHAR_p 'p'
! 781: #define CHAR_q 'q'
! 782: #define CHAR_r 'r'
! 783: #define CHAR_s 's'
! 784: #define CHAR_t 't'
! 785: #define CHAR_u 'u'
! 786: #define CHAR_v 'v'
! 787: #define CHAR_w 'w'
! 788: #define CHAR_x 'x'
! 789: #define CHAR_y 'y'
! 790: #define CHAR_z 'z'
! 791: #define CHAR_LEFT_CURLY_BRACKET '{'
! 792: #define CHAR_VERTICAL_LINE '|'
! 793: #define CHAR_RIGHT_CURLY_BRACKET '}'
! 794: #define CHAR_TILDE '~'
! 795:
! 796: #define STR_HT "\t"
! 797: #define STR_VT "\v"
! 798: #define STR_FF "\f"
! 799: #define STR_CR "\r"
! 800: #define STR_NL "\n"
! 801: #define STR_BS "\b"
! 802: #define STR_BEL "\a"
! 803: #ifdef EBCDIC
! 804: #define STR_ESC "\047"
! 805: #define STR_DEL "\007"
! 806: #else
! 807: #define STR_ESC "\033"
! 808: #define STR_DEL "\177"
! 809: #endif
! 810:
! 811: #define STR_SPACE " "
! 812: #define STR_EXCLAMATION_MARK "!"
! 813: #define STR_QUOTATION_MARK "\""
! 814: #define STR_NUMBER_SIGN "#"
! 815: #define STR_DOLLAR_SIGN "$"
! 816: #define STR_PERCENT_SIGN "%"
! 817: #define STR_AMPERSAND "&"
! 818: #define STR_APOSTROPHE "'"
! 819: #define STR_LEFT_PARENTHESIS "("
! 820: #define STR_RIGHT_PARENTHESIS ")"
! 821: #define STR_ASTERISK "*"
! 822: #define STR_PLUS "+"
! 823: #define STR_COMMA ","
! 824: #define STR_MINUS "-"
! 825: #define STR_DOT "."
! 826: #define STR_SLASH "/"
! 827: #define STR_0 "0"
! 828: #define STR_1 "1"
! 829: #define STR_2 "2"
! 830: #define STR_3 "3"
! 831: #define STR_4 "4"
! 832: #define STR_5 "5"
! 833: #define STR_6 "6"
! 834: #define STR_7 "7"
! 835: #define STR_8 "8"
! 836: #define STR_9 "9"
! 837: #define STR_COLON ":"
! 838: #define STR_SEMICOLON ";"
! 839: #define STR_LESS_THAN_SIGN "<"
! 840: #define STR_EQUALS_SIGN "="
! 841: #define STR_GREATER_THAN_SIGN ">"
! 842: #define STR_QUESTION_MARK "?"
! 843: #define STR_COMMERCIAL_AT "@"
! 844: #define STR_A "A"
! 845: #define STR_B "B"
! 846: #define STR_C "C"
! 847: #define STR_D "D"
! 848: #define STR_E "E"
! 849: #define STR_F "F"
! 850: #define STR_G "G"
! 851: #define STR_H "H"
! 852: #define STR_I "I"
! 853: #define STR_J "J"
! 854: #define STR_K "K"
! 855: #define STR_L "L"
! 856: #define STR_M "M"
! 857: #define STR_N "N"
! 858: #define STR_O "O"
! 859: #define STR_P "P"
! 860: #define STR_Q "Q"
! 861: #define STR_R "R"
! 862: #define STR_S "S"
! 863: #define STR_T "T"
! 864: #define STR_U "U"
! 865: #define STR_V "V"
! 866: #define STR_W "W"
! 867: #define STR_X "X"
! 868: #define STR_Y "Y"
! 869: #define STR_Z "Z"
! 870: #define STR_LEFT_SQUARE_BRACKET "["
! 871: #define STR_BACKSLASH "\\"
! 872: #define STR_RIGHT_SQUARE_BRACKET "]"
! 873: #define STR_CIRCUMFLEX_ACCENT "^"
! 874: #define STR_UNDERSCORE "_"
! 875: #define STR_GRAVE_ACCENT "`"
! 876: #define STR_a "a"
! 877: #define STR_b "b"
! 878: #define STR_c "c"
! 879: #define STR_d "d"
! 880: #define STR_e "e"
! 881: #define STR_f "f"
! 882: #define STR_g "g"
! 883: #define STR_h "h"
! 884: #define STR_i "i"
! 885: #define STR_j "j"
! 886: #define STR_k "k"
! 887: #define STR_l "l"
! 888: #define STR_m "m"
! 889: #define STR_n "n"
! 890: #define STR_o "o"
! 891: #define STR_p "p"
! 892: #define STR_q "q"
! 893: #define STR_r "r"
! 894: #define STR_s "s"
! 895: #define STR_t "t"
! 896: #define STR_u "u"
! 897: #define STR_v "v"
! 898: #define STR_w "w"
! 899: #define STR_x "x"
! 900: #define STR_y "y"
! 901: #define STR_z "z"
! 902: #define STR_LEFT_CURLY_BRACKET "{"
! 903: #define STR_VERTICAL_LINE "|"
! 904: #define STR_RIGHT_CURLY_BRACKET "}"
! 905: #define STR_TILDE "~"
! 906:
! 907: #define STRING_ACCEPT0 "ACCEPT\0"
! 908: #define STRING_COMMIT0 "COMMIT\0"
! 909: #define STRING_F0 "F\0"
! 910: #define STRING_FAIL0 "FAIL\0"
! 911: #define STRING_MARK0 "MARK\0"
! 912: #define STRING_PRUNE0 "PRUNE\0"
! 913: #define STRING_SKIP0 "SKIP\0"
! 914: #define STRING_THEN "THEN"
! 915:
! 916: #define STRING_alpha0 "alpha\0"
! 917: #define STRING_lower0 "lower\0"
! 918: #define STRING_upper0 "upper\0"
! 919: #define STRING_alnum0 "alnum\0"
! 920: #define STRING_ascii0 "ascii\0"
! 921: #define STRING_blank0 "blank\0"
! 922: #define STRING_cntrl0 "cntrl\0"
! 923: #define STRING_digit0 "digit\0"
! 924: #define STRING_graph0 "graph\0"
! 925: #define STRING_print0 "print\0"
! 926: #define STRING_punct0 "punct\0"
! 927: #define STRING_space0 "space\0"
! 928: #define STRING_word0 "word\0"
! 929: #define STRING_xdigit "xdigit"
! 930:
! 931: #define STRING_DEFINE "DEFINE"
! 932:
! 933: #define STRING_CR_RIGHTPAR "CR)"
! 934: #define STRING_LF_RIGHTPAR "LF)"
! 935: #define STRING_CRLF_RIGHTPAR "CRLF)"
! 936: #define STRING_ANY_RIGHTPAR "ANY)"
! 937: #define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
! 938: #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
! 939: #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
! 940: #define STRING_UTF8_RIGHTPAR "UTF8)"
! 941: #define STRING_UCP_RIGHTPAR "UCP)"
! 942: #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
! 943:
! 944: #else /* SUPPORT_UTF8 */
! 945:
! 946: /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
! 947: works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
! 948: only. */
! 949:
! 950: #define CHAR_HT '\011'
! 951: #define CHAR_VT '\013'
! 952: #define CHAR_FF '\014'
! 953: #define CHAR_CR '\015'
! 954: #define CHAR_NL '\012'
! 955: #define CHAR_BS '\010'
! 956: #define CHAR_BEL '\007'
! 957: #define CHAR_ESC '\033'
! 958: #define CHAR_DEL '\177'
! 959:
! 960: #define CHAR_SPACE '\040'
! 961: #define CHAR_EXCLAMATION_MARK '\041'
! 962: #define CHAR_QUOTATION_MARK '\042'
! 963: #define CHAR_NUMBER_SIGN '\043'
! 964: #define CHAR_DOLLAR_SIGN '\044'
! 965: #define CHAR_PERCENT_SIGN '\045'
! 966: #define CHAR_AMPERSAND '\046'
! 967: #define CHAR_APOSTROPHE '\047'
! 968: #define CHAR_LEFT_PARENTHESIS '\050'
! 969: #define CHAR_RIGHT_PARENTHESIS '\051'
! 970: #define CHAR_ASTERISK '\052'
! 971: #define CHAR_PLUS '\053'
! 972: #define CHAR_COMMA '\054'
! 973: #define CHAR_MINUS '\055'
! 974: #define CHAR_DOT '\056'
! 975: #define CHAR_SLASH '\057'
! 976: #define CHAR_0 '\060'
! 977: #define CHAR_1 '\061'
! 978: #define CHAR_2 '\062'
! 979: #define CHAR_3 '\063'
! 980: #define CHAR_4 '\064'
! 981: #define CHAR_5 '\065'
! 982: #define CHAR_6 '\066'
! 983: #define CHAR_7 '\067'
! 984: #define CHAR_8 '\070'
! 985: #define CHAR_9 '\071'
! 986: #define CHAR_COLON '\072'
! 987: #define CHAR_SEMICOLON '\073'
! 988: #define CHAR_LESS_THAN_SIGN '\074'
! 989: #define CHAR_EQUALS_SIGN '\075'
! 990: #define CHAR_GREATER_THAN_SIGN '\076'
! 991: #define CHAR_QUESTION_MARK '\077'
! 992: #define CHAR_COMMERCIAL_AT '\100'
! 993: #define CHAR_A '\101'
! 994: #define CHAR_B '\102'
! 995: #define CHAR_C '\103'
! 996: #define CHAR_D '\104'
! 997: #define CHAR_E '\105'
! 998: #define CHAR_F '\106'
! 999: #define CHAR_G '\107'
! 1000: #define CHAR_H '\110'
! 1001: #define CHAR_I '\111'
! 1002: #define CHAR_J '\112'
! 1003: #define CHAR_K '\113'
! 1004: #define CHAR_L '\114'
! 1005: #define CHAR_M '\115'
! 1006: #define CHAR_N '\116'
! 1007: #define CHAR_O '\117'
! 1008: #define CHAR_P '\120'
! 1009: #define CHAR_Q '\121'
! 1010: #define CHAR_R '\122'
! 1011: #define CHAR_S '\123'
! 1012: #define CHAR_T '\124'
! 1013: #define CHAR_U '\125'
! 1014: #define CHAR_V '\126'
! 1015: #define CHAR_W '\127'
! 1016: #define CHAR_X '\130'
! 1017: #define CHAR_Y '\131'
! 1018: #define CHAR_Z '\132'
! 1019: #define CHAR_LEFT_SQUARE_BRACKET '\133'
! 1020: #define CHAR_BACKSLASH '\134'
! 1021: #define CHAR_RIGHT_SQUARE_BRACKET '\135'
! 1022: #define CHAR_CIRCUMFLEX_ACCENT '\136'
! 1023: #define CHAR_UNDERSCORE '\137'
! 1024: #define CHAR_GRAVE_ACCENT '\140'
! 1025: #define CHAR_a '\141'
! 1026: #define CHAR_b '\142'
! 1027: #define CHAR_c '\143'
! 1028: #define CHAR_d '\144'
! 1029: #define CHAR_e '\145'
! 1030: #define CHAR_f '\146'
! 1031: #define CHAR_g '\147'
! 1032: #define CHAR_h '\150'
! 1033: #define CHAR_i '\151'
! 1034: #define CHAR_j '\152'
! 1035: #define CHAR_k '\153'
! 1036: #define CHAR_l '\154'
! 1037: #define CHAR_m '\155'
! 1038: #define CHAR_n '\156'
! 1039: #define CHAR_o '\157'
! 1040: #define CHAR_p '\160'
! 1041: #define CHAR_q '\161'
! 1042: #define CHAR_r '\162'
! 1043: #define CHAR_s '\163'
! 1044: #define CHAR_t '\164'
! 1045: #define CHAR_u '\165'
! 1046: #define CHAR_v '\166'
! 1047: #define CHAR_w '\167'
! 1048: #define CHAR_x '\170'
! 1049: #define CHAR_y '\171'
! 1050: #define CHAR_z '\172'
! 1051: #define CHAR_LEFT_CURLY_BRACKET '\173'
! 1052: #define CHAR_VERTICAL_LINE '\174'
! 1053: #define CHAR_RIGHT_CURLY_BRACKET '\175'
! 1054: #define CHAR_TILDE '\176'
! 1055:
! 1056: #define STR_HT "\011"
! 1057: #define STR_VT "\013"
! 1058: #define STR_FF "\014"
! 1059: #define STR_CR "\015"
! 1060: #define STR_NL "\012"
! 1061: #define STR_BS "\010"
! 1062: #define STR_BEL "\007"
! 1063: #define STR_ESC "\033"
! 1064: #define STR_DEL "\177"
! 1065:
! 1066: #define STR_SPACE "\040"
! 1067: #define STR_EXCLAMATION_MARK "\041"
! 1068: #define STR_QUOTATION_MARK "\042"
! 1069: #define STR_NUMBER_SIGN "\043"
! 1070: #define STR_DOLLAR_SIGN "\044"
! 1071: #define STR_PERCENT_SIGN "\045"
! 1072: #define STR_AMPERSAND "\046"
! 1073: #define STR_APOSTROPHE "\047"
! 1074: #define STR_LEFT_PARENTHESIS "\050"
! 1075: #define STR_RIGHT_PARENTHESIS "\051"
! 1076: #define STR_ASTERISK "\052"
! 1077: #define STR_PLUS "\053"
! 1078: #define STR_COMMA "\054"
! 1079: #define STR_MINUS "\055"
! 1080: #define STR_DOT "\056"
! 1081: #define STR_SLASH "\057"
! 1082: #define STR_0 "\060"
! 1083: #define STR_1 "\061"
! 1084: #define STR_2 "\062"
! 1085: #define STR_3 "\063"
! 1086: #define STR_4 "\064"
! 1087: #define STR_5 "\065"
! 1088: #define STR_6 "\066"
! 1089: #define STR_7 "\067"
! 1090: #define STR_8 "\070"
! 1091: #define STR_9 "\071"
! 1092: #define STR_COLON "\072"
! 1093: #define STR_SEMICOLON "\073"
! 1094: #define STR_LESS_THAN_SIGN "\074"
! 1095: #define STR_EQUALS_SIGN "\075"
! 1096: #define STR_GREATER_THAN_SIGN "\076"
! 1097: #define STR_QUESTION_MARK "\077"
! 1098: #define STR_COMMERCIAL_AT "\100"
! 1099: #define STR_A "\101"
! 1100: #define STR_B "\102"
! 1101: #define STR_C "\103"
! 1102: #define STR_D "\104"
! 1103: #define STR_E "\105"
! 1104: #define STR_F "\106"
! 1105: #define STR_G "\107"
! 1106: #define STR_H "\110"
! 1107: #define STR_I "\111"
! 1108: #define STR_J "\112"
! 1109: #define STR_K "\113"
! 1110: #define STR_L "\114"
! 1111: #define STR_M "\115"
! 1112: #define STR_N "\116"
! 1113: #define STR_O "\117"
! 1114: #define STR_P "\120"
! 1115: #define STR_Q "\121"
! 1116: #define STR_R "\122"
! 1117: #define STR_S "\123"
! 1118: #define STR_T "\124"
! 1119: #define STR_U "\125"
! 1120: #define STR_V "\126"
! 1121: #define STR_W "\127"
! 1122: #define STR_X "\130"
! 1123: #define STR_Y "\131"
! 1124: #define STR_Z "\132"
! 1125: #define STR_LEFT_SQUARE_BRACKET "\133"
! 1126: #define STR_BACKSLASH "\134"
! 1127: #define STR_RIGHT_SQUARE_BRACKET "\135"
! 1128: #define STR_CIRCUMFLEX_ACCENT "\136"
! 1129: #define STR_UNDERSCORE "\137"
! 1130: #define STR_GRAVE_ACCENT "\140"
! 1131: #define STR_a "\141"
! 1132: #define STR_b "\142"
! 1133: #define STR_c "\143"
! 1134: #define STR_d "\144"
! 1135: #define STR_e "\145"
! 1136: #define STR_f "\146"
! 1137: #define STR_g "\147"
! 1138: #define STR_h "\150"
! 1139: #define STR_i "\151"
! 1140: #define STR_j "\152"
! 1141: #define STR_k "\153"
! 1142: #define STR_l "\154"
! 1143: #define STR_m "\155"
! 1144: #define STR_n "\156"
! 1145: #define STR_o "\157"
! 1146: #define STR_p "\160"
! 1147: #define STR_q "\161"
! 1148: #define STR_r "\162"
! 1149: #define STR_s "\163"
! 1150: #define STR_t "\164"
! 1151: #define STR_u "\165"
! 1152: #define STR_v "\166"
! 1153: #define STR_w "\167"
! 1154: #define STR_x "\170"
! 1155: #define STR_y "\171"
! 1156: #define STR_z "\172"
! 1157: #define STR_LEFT_CURLY_BRACKET "\173"
! 1158: #define STR_VERTICAL_LINE "\174"
! 1159: #define STR_RIGHT_CURLY_BRACKET "\175"
! 1160: #define STR_TILDE "\176"
! 1161:
! 1162: #define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0"
! 1163: #define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0"
! 1164: #define STRING_F0 STR_F "\0"
! 1165: #define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0"
! 1166: #define STRING_MARK0 STR_M STR_A STR_R STR_K "\0"
! 1167: #define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0"
! 1168: #define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0"
! 1169: #define STRING_THEN STR_T STR_H STR_E STR_N
! 1170:
! 1171: #define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
! 1172: #define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
! 1173: #define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
! 1174: #define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0"
! 1175: #define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
! 1176: #define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0"
! 1177: #define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0"
! 1178: #define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0"
! 1179: #define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0"
! 1180: #define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0"
! 1181: #define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0"
! 1182: #define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
! 1183: #define STRING_word0 STR_w STR_o STR_r STR_d "\0"
! 1184: #define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
! 1185:
! 1186: #define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
! 1187:
! 1188: #define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
! 1189: #define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
! 1190: #define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
! 1191: #define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
! 1192: #define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
! 1193: #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
! 1194: #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
! 1195: #define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
! 1196: #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
! 1197: #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
! 1198:
! 1199: #endif /* SUPPORT_UTF8 */
! 1200:
! 1201: /* Escape items that are just an encoding of a particular data value. */
! 1202:
! 1203: #ifndef ESC_e
! 1204: #define ESC_e CHAR_ESC
! 1205: #endif
! 1206:
! 1207: #ifndef ESC_f
! 1208: #define ESC_f CHAR_FF
! 1209: #endif
! 1210:
! 1211: #ifndef ESC_n
! 1212: #define ESC_n CHAR_NL
! 1213: #endif
! 1214:
! 1215: #ifndef ESC_r
! 1216: #define ESC_r CHAR_CR
! 1217: #endif
! 1218:
! 1219: /* We can't officially use ESC_t because it is a POSIX reserved identifier
! 1220: (presumably because of all the others like size_t). */
! 1221:
! 1222: #ifndef ESC_tee
! 1223: #define ESC_tee CHAR_HT
! 1224: #endif
! 1225:
! 1226: /* Codes for different types of Unicode property */
! 1227:
! 1228: #define PT_ANY 0 /* Any property - matches all chars */
! 1229: #define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */
! 1230: #define PT_GC 2 /* Specified general characteristic (e.g. L) */
! 1231: #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */
! 1232: #define PT_SC 4 /* Script (e.g. Han) */
! 1233: #define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
! 1234: #define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */
! 1235: #define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
! 1236: #define PT_WORD 8 /* Word - L plus N plus underscore */
! 1237:
! 1238: /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
! 1239: contain UTF-8 characters with values greater than 255. */
! 1240:
! 1241: #define XCL_NOT 0x01 /* Flag: this is a negative class */
! 1242: #define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
! 1243:
! 1244: #define XCL_END 0 /* Marks end of individual items */
! 1245: #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
! 1246: #define XCL_RANGE 2 /* A range (two multibyte chars) follows */
! 1247: #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
! 1248: #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
! 1249:
! 1250: /* These are escaped items that aren't just an encoding of a particular data
! 1251: value such as \n. They must have non-zero values, as check_escape() returns
! 1252: their negation. Also, they must appear in the same order as in the opcode
! 1253: definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
! 1254: corresponds to "." in DOTALL mode rather than an escape sequence. It is also
! 1255: used for [^] in JavaScript compatibility mode, and for \C in non-utf8 mode. In
! 1256: non-DOTALL mode, "." behaves like \N.
! 1257:
! 1258: The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
! 1259: when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.
! 1260: They must be contiguous, and remain in order so that the replacements can be
! 1261: looked up from a table.
! 1262:
! 1263: The final escape must be ESC_REF as subsequent values are used for
! 1264: backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
! 1265: greater than ESC_b and less than ESC_Z to detect the types that may be
! 1266: repeated. These are the types that consume characters. If any new escapes are
! 1267: put in between that don't consume a character, that code will have to change.
! 1268: */
! 1269:
! 1270: enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
! 1271: ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
! 1272: ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
! 1273: ESC_E, ESC_Q, ESC_g, ESC_k,
! 1274: ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
! 1275: ESC_REF };
! 1276:
! 1277: /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
! 1278: OP_EOD must correspond in order to the list of escapes immediately above.
! 1279:
! 1280: *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
! 1281: that follow must also be updated to match. There are also tables called
! 1282: "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
! 1283:
! 1284: enum {
! 1285: OP_END, /* 0 End of pattern */
! 1286:
! 1287: /* Values corresponding to backslashed metacharacters */
! 1288:
! 1289: OP_SOD, /* 1 Start of data: \A */
! 1290: OP_SOM, /* 2 Start of match (subject + offset): \G */
! 1291: OP_SET_SOM, /* 3 Set start of match (\K) */
! 1292: OP_NOT_WORD_BOUNDARY, /* 4 \B */
! 1293: OP_WORD_BOUNDARY, /* 5 \b */
! 1294: OP_NOT_DIGIT, /* 6 \D */
! 1295: OP_DIGIT, /* 7 \d */
! 1296: OP_NOT_WHITESPACE, /* 8 \S */
! 1297: OP_WHITESPACE, /* 9 \s */
! 1298: OP_NOT_WORDCHAR, /* 10 \W */
! 1299: OP_WORDCHAR, /* 11 \w */
! 1300:
! 1301: OP_ANY, /* 12 Match any character except newline */
! 1302: OP_ALLANY, /* 13 Match any character */
! 1303: OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
! 1304: OP_NOTPROP, /* 15 \P (not Unicode property) */
! 1305: OP_PROP, /* 16 \p (Unicode property) */
! 1306: OP_ANYNL, /* 17 \R (any newline sequence) */
! 1307: OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */
! 1308: OP_HSPACE, /* 19 \h (horizontal whitespace) */
! 1309: OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */
! 1310: OP_VSPACE, /* 21 \v (vertical whitespace) */
! 1311: OP_EXTUNI, /* 22 \X (extended Unicode sequence */
! 1312: OP_EODN, /* 23 End of data or \n at end of data: \Z. */
! 1313: OP_EOD, /* 24 End of data: \z */
! 1314:
! 1315: OP_CIRC, /* 25 Start of line - not multiline */
! 1316: OP_CIRCM, /* 26 Start of line - multiline */
! 1317: OP_DOLL, /* 27 End of line - not multiline */
! 1318: OP_DOLLM, /* 28 End of line - multiline */
! 1319: OP_CHAR, /* 29 Match one character, casefully */
! 1320: OP_CHARI, /* 30 Match one character, caselessly */
! 1321: OP_NOT, /* 31 Match one character, not the given one, casefully */
! 1322: OP_NOTI, /* 32 Match one character, not the given one, caselessly */
! 1323:
! 1324: /* The following sets of 13 opcodes must always be kept in step because
! 1325: the offset from the first one is used to generate the others. */
! 1326:
! 1327: /**** Single characters, caseful, must precede the caseless ones ****/
! 1328:
! 1329: OP_STAR, /* 33 The maximizing and minimizing versions of */
! 1330: OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
! 1331: OP_PLUS, /* 35 the minimizing one second. */
! 1332: OP_MINPLUS, /* 36 */
! 1333: OP_QUERY, /* 37 */
! 1334: OP_MINQUERY, /* 38 */
! 1335:
! 1336: OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/
! 1337: OP_MINUPTO, /* 40 */
! 1338: OP_EXACT, /* 41 Exactly n matches */
! 1339:
! 1340: OP_POSSTAR, /* 42 Possessified star, caseful */
! 1341: OP_POSPLUS, /* 43 Possessified plus, caseful */
! 1342: OP_POSQUERY, /* 44 Posesssified query, caseful */
! 1343: OP_POSUPTO, /* 45 Possessified upto, caseful */
! 1344:
! 1345: /**** Single characters, caseless, must follow the caseful ones */
! 1346:
! 1347: OP_STARI, /* 46 */
! 1348: OP_MINSTARI, /* 47 */
! 1349: OP_PLUSI, /* 48 */
! 1350: OP_MINPLUSI, /* 49 */
! 1351: OP_QUERYI, /* 50 */
! 1352: OP_MINQUERYI, /* 51 */
! 1353:
! 1354: OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */
! 1355: OP_MINUPTOI, /* 53 */
! 1356: OP_EXACTI, /* 54 */
! 1357:
! 1358: OP_POSSTARI, /* 55 Possessified star, caseless */
! 1359: OP_POSPLUSI, /* 56 Possessified plus, caseless */
! 1360: OP_POSQUERYI, /* 57 Posesssified query, caseless */
! 1361: OP_POSUPTOI, /* 58 Possessified upto, caseless */
! 1362:
! 1363: /**** The negated ones must follow the non-negated ones, and match them ****/
! 1364: /**** Negated single character, caseful; must precede the caseless ones ****/
! 1365:
! 1366: OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
! 1367: OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
! 1368: OP_NOTPLUS, /* 61 the minimizing one second. They must be in */
! 1369: OP_NOTMINPLUS, /* 62 exactly the same order as those above. */
! 1370: OP_NOTQUERY, /* 63 */
! 1371: OP_NOTMINQUERY, /* 64 */
! 1372:
! 1373: OP_NOTUPTO, /* 65 From 0 to n matches, caseful */
! 1374: OP_NOTMINUPTO, /* 66 */
! 1375: OP_NOTEXACT, /* 67 Exactly n matches */
! 1376:
! 1377: OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */
! 1378: OP_NOTPOSPLUS, /* 69 */
! 1379: OP_NOTPOSQUERY, /* 70 */
! 1380: OP_NOTPOSUPTO, /* 71 */
! 1381:
! 1382: /**** Negated single character, caseless; must follow the caseful ones ****/
! 1383:
! 1384: OP_NOTSTARI, /* 72 */
! 1385: OP_NOTMINSTARI, /* 73 */
! 1386: OP_NOTPLUSI, /* 74 */
! 1387: OP_NOTMINPLUSI, /* 75 */
! 1388: OP_NOTQUERYI, /* 76 */
! 1389: OP_NOTMINQUERYI, /* 77 */
! 1390:
! 1391: OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */
! 1392: OP_NOTMINUPTOI, /* 79 */
! 1393: OP_NOTEXACTI, /* 80 Exactly n matches */
! 1394:
! 1395: OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */
! 1396: OP_NOTPOSPLUSI, /* 82 */
! 1397: OP_NOTPOSQUERYI, /* 83 */
! 1398: OP_NOTPOSUPTOI, /* 84 */
! 1399:
! 1400: /**** Character types ****/
! 1401:
! 1402: OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
! 1403: OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
! 1404: OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */
! 1405: OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */
! 1406: OP_TYPEQUERY, /* 89 */
! 1407: OP_TYPEMINQUERY, /* 90 */
! 1408:
! 1409: OP_TYPEUPTO, /* 91 From 0 to n matches */
! 1410: OP_TYPEMINUPTO, /* 92 */
! 1411: OP_TYPEEXACT, /* 93 Exactly n matches */
! 1412:
! 1413: OP_TYPEPOSSTAR, /* 94 Possessified versions */
! 1414: OP_TYPEPOSPLUS, /* 95 */
! 1415: OP_TYPEPOSQUERY, /* 96 */
! 1416: OP_TYPEPOSUPTO, /* 97 */
! 1417:
! 1418: /* These are used for character classes and back references; only the
! 1419: first six are the same as the sets above. */
! 1420:
! 1421: OP_CRSTAR, /* 98 The maximizing and minimizing versions of */
! 1422: OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */
! 1423: OP_CRPLUS, /* 100 the minimizing one second. These codes must */
! 1424: OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */
! 1425: OP_CRQUERY, /* 102 */
! 1426: OP_CRMINQUERY, /* 103 */
! 1427:
! 1428: OP_CRRANGE, /* 104 These are different to the three sets above. */
! 1429: OP_CRMINRANGE, /* 105 */
! 1430:
! 1431: /* End of quantifier opcodes */
! 1432:
! 1433: OP_CLASS, /* 106 Match a character class, chars < 256 only */
! 1434: OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
! 1435: class - the difference is relevant only when a
! 1436: UTF-8 character > 255 is encountered. */
! 1437: OP_XCLASS, /* 108 Extended class for handling UTF-8 chars within the
! 1438: class. This does both positive and negative. */
! 1439: OP_REF, /* 109 Match a back reference, casefully */
! 1440: OP_REFI, /* 110 Match a back reference, caselessly */
! 1441: OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
! 1442: OP_CALLOUT, /* 112 Call out to external function if provided */
! 1443:
! 1444: OP_ALT, /* 113 Start of alternation */
! 1445: OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
! 1446: OP_KETRMAX, /* 115 These two must remain together and in this */
! 1447: OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
! 1448: OP_KETRPOS, /* 117 Possessive unlimited repeat. */
! 1449:
! 1450: /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
! 1451: asserts must remain in order. */
! 1452:
! 1453: OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
! 1454: OP_ASSERT, /* 119 Positive lookahead */
! 1455: OP_ASSERT_NOT, /* 120 Negative lookahead */
! 1456: OP_ASSERTBACK, /* 121 Positive lookbehind */
! 1457: OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
! 1458:
! 1459: /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
! 1460: after the assertions, with ONCE first, as there's a test for >= ONCE for a
! 1461: subpattern that isn't an assertion. The POS versions must immediately follow
! 1462: the non-POS versions in each case. */
! 1463:
! 1464: OP_ONCE, /* 123 Atomic group, contains captures */
! 1465: OP_ONCE_NC, /* 124 Atomic group containing no captures */
! 1466: OP_BRA, /* 125 Start of non-capturing bracket */
! 1467: OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
! 1468: OP_CBRA, /* 127 Start of capturing bracket */
! 1469: OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
! 1470: OP_COND, /* 129 Conditional group */
! 1471:
! 1472: /* These five must follow the previous five, in the same order. There's a
! 1473: check for >= SBRA to distinguish the two sets. */
! 1474:
! 1475: OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
! 1476: OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
! 1477: OP_SCBRA, /* 132 Start of capturing bracket, check empty */
! 1478: OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
! 1479: OP_SCOND, /* 134 Conditional group, check empty */
! 1480:
! 1481: /* The next two pairs must (respectively) be kept together. */
! 1482:
! 1483: OP_CREF, /* 135 Used to hold a capture number as condition */
! 1484: OP_NCREF, /* 136 Same, but generated by a name reference*/
! 1485: OP_RREF, /* 137 Used to hold a recursion number as condition */
! 1486: OP_NRREF, /* 138 Same, but generated by a name reference*/
! 1487: OP_DEF, /* 139 The DEFINE condition */
! 1488:
! 1489: OP_BRAZERO, /* 140 These two must remain together and in this */
! 1490: OP_BRAMINZERO, /* 141 order. */
! 1491: OP_BRAPOSZERO, /* 142 */
! 1492:
! 1493: /* These are backtracking control verbs */
! 1494:
! 1495: OP_MARK, /* 143 always has an argument */
! 1496: OP_PRUNE, /* 144 */
! 1497: OP_PRUNE_ARG, /* 145 same, but with argument */
! 1498: OP_SKIP, /* 146 */
! 1499: OP_SKIP_ARG, /* 147 same, but with argument */
! 1500: OP_THEN, /* 148 */
! 1501: OP_THEN_ARG, /* 149 same, but with argument */
! 1502: OP_COMMIT, /* 150 */
! 1503:
! 1504: /* These are forced failure and success verbs */
! 1505:
! 1506: OP_FAIL, /* 151 */
! 1507: OP_ACCEPT, /* 152 */
! 1508: OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
! 1509: OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
! 1510:
! 1511: /* This is used to skip a subpattern with a {0} quantifier */
! 1512:
! 1513: OP_SKIPZERO, /* 155 */
! 1514:
! 1515: /* This is not an opcode, but is used to check that tables indexed by opcode
! 1516: are the correct length, in order to catch updating errors - there have been
! 1517: some in the past. */
! 1518:
! 1519: OP_TABLE_LENGTH
! 1520: };
! 1521:
! 1522: /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
! 1523: definitions that follow must also be updated to match. There are also tables
! 1524: called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
! 1525:
! 1526:
! 1527: /* This macro defines textual names for all the opcodes. These are used only
! 1528: for debugging, and some of them are only partial names. The macro is referenced
! 1529: only in pcre_printint.c, which fills out the full names in many cases (and in
! 1530: some cases doesn't actually use these names at all). */
! 1531:
! 1532: #define OP_NAME_LIST \
! 1533: "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
! 1534: "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
! 1535: "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
! 1536: "extuni", "\\Z", "\\z", \
! 1537: "^", "^", "$", "$", "char", "chari", "not", "noti", \
! 1538: "*", "*?", "+", "+?", "?", "??", \
! 1539: "{", "{", "{", \
! 1540: "*+","++", "?+", "{", \
! 1541: "*", "*?", "+", "+?", "?", "??", \
! 1542: "{", "{", "{", \
! 1543: "*+","++", "?+", "{", \
! 1544: "*", "*?", "+", "+?", "?", "??", \
! 1545: "{", "{", "{", \
! 1546: "*+","++", "?+", "{", \
! 1547: "*", "*?", "+", "+?", "?", "??", \
! 1548: "{", "{", "{", \
! 1549: "*+","++", "?+", "{", \
! 1550: "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
! 1551: "*+","++", "?+", "{", \
! 1552: "*", "*?", "+", "+?", "?", "??", "{", "{", \
! 1553: "class", "nclass", "xclass", "Ref", "Refi", \
! 1554: "Recurse", "Callout", \
! 1555: "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
! 1556: "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
! 1557: "Once", "Once_NC", \
! 1558: "Bra", "BraPos", "CBra", "CBraPos", \
! 1559: "Cond", \
! 1560: "SBra", "SBraPos", "SCBra", "SCBraPos", \
! 1561: "SCond", \
! 1562: "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
! 1563: "Brazero", "Braminzero", "Braposzero", \
! 1564: "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
! 1565: "*THEN", "*THEN", "*COMMIT", "*FAIL", \
! 1566: "*ACCEPT", "*ASSERT_ACCEPT", \
! 1567: "Close", "Skip zero"
! 1568:
! 1569:
! 1570: /* This macro defines the length of fixed length operations in the compiled
! 1571: regex. The lengths are used when searching for specific things, and also in the
! 1572: debugging printing of a compiled regex. We use a macro so that it can be
! 1573: defined close to the definitions of the opcodes themselves.
! 1574:
! 1575: As things have been extended, some of these are no longer fixed lenths, but are
! 1576: minima instead. For example, the length of a single-character repeat may vary
! 1577: in UTF-8 mode. The code that uses this table must know about such things. */
! 1578:
! 1579: #define OP_LENGTHS \
! 1580: 1, /* End */ \
! 1581: 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
! 1582: 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
! 1583: 1, 1, 1, /* Any, AllAny, Anybyte */ \
! 1584: 3, 3, /* \P, \p */ \
! 1585: 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
! 1586: 1, /* \X */ \
! 1587: 1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \
! 1588: 2, /* Char - the minimum length */ \
! 1589: 2, /* Chari - the minimum length */ \
! 1590: 2, /* not */ \
! 1591: 2, /* noti */ \
! 1592: /* Positive single-char repeats ** These are */ \
! 1593: 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
! 1594: 4, 4, 4, /* upto, minupto, exact ** mode */ \
! 1595: 2, 2, 2, 4, /* *+, ++, ?+, upto+ */ \
! 1596: 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \
! 1597: 4, 4, 4, /* upto I, minupto I, exact I */ \
! 1598: 2, 2, 2, 4, /* *+I, ++I, ?+I, upto+I */ \
! 1599: /* Negative single-char repeats - only for chars < 256 */ \
! 1600: 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
! 1601: 4, 4, 4, /* NOT upto, minupto, exact */ \
! 1602: 2, 2, 2, 4, /* Possessive NOT *, +, ?, upto */ \
! 1603: 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \
! 1604: 4, 4, 4, /* NOT upto I, minupto I, exact I */ \
! 1605: 2, 2, 2, 4, /* Possessive NOT *I, +I, ?I, upto I */ \
! 1606: /* Positive type repeats */ \
! 1607: 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
! 1608: 4, 4, 4, /* Type upto, minupto, exact */ \
! 1609: 2, 2, 2, 4, /* Possessive *+, ++, ?+, upto+ */ \
! 1610: /* Character class & ref repeats */ \
! 1611: 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
! 1612: 5, 5, /* CRRANGE, CRMINRANGE */ \
! 1613: 33, /* CLASS */ \
! 1614: 33, /* NCLASS */ \
! 1615: 0, /* XCLASS - variable length */ \
! 1616: 3, /* REF */ \
! 1617: 3, /* REFI */ \
! 1618: 1+LINK_SIZE, /* RECURSE */ \
! 1619: 2+2*LINK_SIZE, /* CALLOUT */ \
! 1620: 1+LINK_SIZE, /* Alt */ \
! 1621: 1+LINK_SIZE, /* Ket */ \
! 1622: 1+LINK_SIZE, /* KetRmax */ \
! 1623: 1+LINK_SIZE, /* KetRmin */ \
! 1624: 1+LINK_SIZE, /* KetRpos */ \
! 1625: 1+LINK_SIZE, /* Reverse */ \
! 1626: 1+LINK_SIZE, /* Assert */ \
! 1627: 1+LINK_SIZE, /* Assert not */ \
! 1628: 1+LINK_SIZE, /* Assert behind */ \
! 1629: 1+LINK_SIZE, /* Assert behind not */ \
! 1630: 1+LINK_SIZE, /* ONCE */ \
! 1631: 1+LINK_SIZE, /* ONCE_NC */ \
! 1632: 1+LINK_SIZE, /* BRA */ \
! 1633: 1+LINK_SIZE, /* BRAPOS */ \
! 1634: 3+LINK_SIZE, /* CBRA */ \
! 1635: 3+LINK_SIZE, /* CBRAPOS */ \
! 1636: 1+LINK_SIZE, /* COND */ \
! 1637: 1+LINK_SIZE, /* SBRA */ \
! 1638: 1+LINK_SIZE, /* SBRAPOS */ \
! 1639: 3+LINK_SIZE, /* SCBRA */ \
! 1640: 3+LINK_SIZE, /* SCBRAPOS */ \
! 1641: 1+LINK_SIZE, /* SCOND */ \
! 1642: 3, 3, /* CREF, NCREF */ \
! 1643: 3, 3, /* RREF, NRREF */ \
! 1644: 1, /* DEF */ \
! 1645: 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
! 1646: 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
! 1647: 1, 3, /* SKIP, SKIP_ARG */ \
! 1648: 1, 3, /* THEN, THEN_ARG */ \
! 1649: 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
! 1650: 3, 1 /* CLOSE, SKIPZERO */
! 1651:
! 1652: /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
! 1653: condition. */
! 1654:
! 1655: #define RREF_ANY 0xffff
! 1656:
! 1657: /* Compile time error code numbers. They are given names so that they can more
! 1658: easily be tracked. When a new number is added, the table called eint in
! 1659: pcreposix.c must be updated. */
! 1660:
! 1661: enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
! 1662: ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
! 1663: ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
! 1664: ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
! 1665: ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
! 1666: ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
! 1667: ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
! 1668: ERR70, ERR71, ERR72, ERRCOUNT };
! 1669:
! 1670: /* The real format of the start of the pcre block; the index of names and the
! 1671: code vector run on as long as necessary after the end. We store an explicit
! 1672: offset to the name table so that if a regex is compiled on one host, saved, and
! 1673: then run on another where the size of pointers is different, all might still
! 1674: be well. For the case of compiled-on-4 and run-on-8, we include an extra
! 1675: pointer that is always NULL. For future-proofing, a few dummy fields were
! 1676: originally included - even though you can never get this planning right - but
! 1677: there is only one left now.
! 1678:
! 1679: NOTE NOTE NOTE:
! 1680: Because people can now save and re-use compiled patterns, any additions to this
! 1681: structure should be made at the end, and something earlier (e.g. a new
! 1682: flag in the options or one of the dummy fields) should indicate that the new
! 1683: fields are present. Currently PCRE always sets the dummy fields to zero.
! 1684: NOTE NOTE NOTE
! 1685: */
! 1686:
! 1687: typedef struct real_pcre {
! 1688: pcre_uint32 magic_number;
! 1689: pcre_uint32 size; /* Total that was malloced */
! 1690: pcre_uint32 options; /* Public options */
! 1691: pcre_uint16 flags; /* Private flags */
! 1692: pcre_uint16 dummy1; /* For future use */
! 1693: pcre_uint16 top_bracket;
! 1694: pcre_uint16 top_backref;
! 1695: pcre_uint16 first_byte;
! 1696: pcre_uint16 req_byte;
! 1697: pcre_uint16 name_table_offset; /* Offset to name table that follows */
! 1698: pcre_uint16 name_entry_size; /* Size of any name items */
! 1699: pcre_uint16 name_count; /* Number of name items */
! 1700: pcre_uint16 ref_count; /* Reference count */
! 1701:
! 1702: const unsigned char *tables; /* Pointer to tables or NULL for std */
! 1703: const unsigned char *nullpad; /* NULL padding */
! 1704: } real_pcre;
! 1705:
! 1706: /* The format of the block used to store data from pcre_study(). The same
! 1707: remark (see NOTE above) about extending this structure applies. */
! 1708:
! 1709: typedef struct pcre_study_data {
! 1710: pcre_uint32 size; /* Total that was malloced */
! 1711: pcre_uint32 flags; /* Private flags */
! 1712: uschar start_bits[32]; /* Starting char bits */
! 1713: pcre_uint32 minlength; /* Minimum subject length */
! 1714: } pcre_study_data;
! 1715:
! 1716: /* Structure for building a chain of open capturing subpatterns during
! 1717: compiling, so that instructions to close them can be compiled when (*ACCEPT) is
! 1718: encountered. This is also used to identify subpatterns that contain recursive
! 1719: back references to themselves, so that they can be made atomic. */
! 1720:
! 1721: typedef struct open_capitem {
! 1722: struct open_capitem *next; /* Chain link */
! 1723: pcre_uint16 number; /* Capture number */
! 1724: pcre_uint16 flag; /* Set TRUE if recursive back ref */
! 1725: } open_capitem;
! 1726:
! 1727: /* Structure for passing "static" information around between the functions
! 1728: doing the compiling, so that they are thread-safe. */
! 1729:
! 1730: typedef struct compile_data {
! 1731: const uschar *lcc; /* Points to lower casing table */
! 1732: const uschar *fcc; /* Points to case-flipping table */
! 1733: const uschar *cbits; /* Points to character type table */
! 1734: const uschar *ctypes; /* Points to table of type maps */
! 1735: const uschar *start_workspace;/* The start of working space */
! 1736: const uschar *start_code; /* The start of the compiled code */
! 1737: const uschar *start_pattern; /* The start of the pattern */
! 1738: const uschar *end_pattern; /* The end of the pattern */
! 1739: open_capitem *open_caps; /* Chain of open capture items */
! 1740: uschar *hwm; /* High watermark of workspace */
! 1741: uschar *name_table; /* The name/number table */
! 1742: int names_found; /* Number of entries so far */
! 1743: int name_entry_size; /* Size of each entry */
! 1744: int workspace_size; /* Size of workspace */
! 1745: int bracount; /* Count of capturing parens as we compile */
! 1746: int final_bracount; /* Saved value after first pass */
! 1747: int top_backref; /* Maximum back reference */
! 1748: unsigned int backref_map; /* Bitmap of low back refs */
! 1749: int assert_depth; /* Depth of nested assertions */
! 1750: int external_options; /* External (initial) options */
! 1751: int external_flags; /* External flag bits to be set */
! 1752: int req_varyopt; /* "After variable item" flag for reqbyte */
! 1753: BOOL had_accept; /* (*ACCEPT) encountered */
! 1754: BOOL check_lookbehind; /* Lookbehinds need later checking */
! 1755: int nltype; /* Newline type */
! 1756: int nllen; /* Newline string length */
! 1757: uschar nl[4]; /* Newline string when fixed length */
! 1758: } compile_data;
! 1759:
! 1760: /* Structure for maintaining a chain of pointers to the currently incomplete
! 1761: branches, for testing for left recursion while compiling. */
! 1762:
! 1763: typedef struct branch_chain {
! 1764: struct branch_chain *outer;
! 1765: uschar *current_branch;
! 1766: } branch_chain;
! 1767:
! 1768: /* Structure for items in a linked list that represents an explicit recursive
! 1769: call within the pattern; used by pcre_exec(). */
! 1770:
! 1771: typedef struct recursion_info {
! 1772: struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
! 1773: int group_num; /* Number of group that was called */
! 1774: int *offset_save; /* Pointer to start of saved offsets */
! 1775: int saved_max; /* Number of saved offsets */
! 1776: USPTR subject_position; /* Position at start of recursion */
! 1777: } recursion_info;
! 1778:
! 1779: /* A similar structure for pcre_dfa_exec(). */
! 1780:
! 1781: typedef struct dfa_recursion_info {
! 1782: struct dfa_recursion_info *prevrec;
! 1783: int group_num;
! 1784: USPTR subject_position;
! 1785: } dfa_recursion_info;
! 1786:
! 1787: /* Structure for building a chain of data for holding the values of the subject
! 1788: pointer at the start of each subpattern, so as to detect when an empty string
! 1789: has been matched by a subpattern - to break infinite loops; used by
! 1790: pcre_exec(). */
! 1791:
! 1792: typedef struct eptrblock {
! 1793: struct eptrblock *epb_prev;
! 1794: USPTR epb_saved_eptr;
! 1795: } eptrblock;
! 1796:
! 1797:
! 1798: /* Structure for passing "static" information around between the functions
! 1799: doing traditional NFA matching, so that they are thread-safe. */
! 1800:
! 1801: typedef struct match_data {
! 1802: unsigned long int match_call_count; /* As it says */
! 1803: unsigned long int match_limit; /* As it says */
! 1804: unsigned long int match_limit_recursion; /* As it says */
! 1805: int *offset_vector; /* Offset vector */
! 1806: int offset_end; /* One past the end */
! 1807: int offset_max; /* The maximum usable for return data */
! 1808: int nltype; /* Newline type */
! 1809: int nllen; /* Newline string length */
! 1810: int name_count; /* Number of names in name table */
! 1811: int name_entry_size; /* Size of entry in names table */
! 1812: uschar *name_table; /* Table of names */
! 1813: uschar nl[4]; /* Newline string when fixed */
! 1814: const uschar *lcc; /* Points to lower casing table */
! 1815: const uschar *ctypes; /* Points to table of type maps */
! 1816: BOOL offset_overflow; /* Set if too many extractions */
! 1817: BOOL notbol; /* NOTBOL flag */
! 1818: BOOL noteol; /* NOTEOL flag */
! 1819: BOOL utf8; /* UTF8 flag */
! 1820: BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */
! 1821: BOOL use_ucp; /* PCRE_UCP flag */
! 1822: BOOL endonly; /* Dollar not before final \n */
! 1823: BOOL notempty; /* Empty string match not wanted */
! 1824: BOOL notempty_atstart; /* Empty string match at start not wanted */
! 1825: BOOL hitend; /* Hit the end of the subject at some point */
! 1826: BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
! 1827: BOOL hasthen; /* Pattern contains (*THEN) */
! 1828: BOOL ignore_skip_arg; /* For re-run when SKIP name not found */
! 1829: const uschar *start_code; /* For use when recursing */
! 1830: USPTR start_subject; /* Start of the subject string */
! 1831: USPTR end_subject; /* End of the subject string */
! 1832: USPTR start_match_ptr; /* Start of matched string */
! 1833: USPTR end_match_ptr; /* Subject position at end match */
! 1834: USPTR start_used_ptr; /* Earliest consulted character */
! 1835: int partial; /* PARTIAL options */
! 1836: int end_offset_top; /* Highwater mark at end of match */
! 1837: int capture_last; /* Most recent capture number */
! 1838: int start_offset; /* The start offset value */
! 1839: int match_function_type; /* Set for certain special calls of MATCH() */
! 1840: eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
! 1841: int eptrn; /* Next free eptrblock */
! 1842: recursion_info *recursive; /* Linked list of recursion data */
! 1843: void *callout_data; /* To pass back to callouts */
! 1844: const uschar *mark; /* Mark pointer to pass back on success */
! 1845: const uschar *nomatch_mark; /* Mark pointer to pass back on failure */
! 1846: const uschar *once_target; /* Where to back up to for atomic groups */
! 1847: } match_data;
! 1848:
! 1849: /* A similar structure is used for the same purpose by the DFA matching
! 1850: functions. */
! 1851:
! 1852: typedef struct dfa_match_data {
! 1853: const uschar *start_code; /* Start of the compiled pattern */
! 1854: const uschar *start_subject; /* Start of the subject string */
! 1855: const uschar *end_subject; /* End of subject string */
! 1856: const uschar *start_used_ptr; /* Earliest consulted character */
! 1857: const uschar *tables; /* Character tables */
! 1858: int start_offset; /* The start offset value */
! 1859: int moptions; /* Match options */
! 1860: int poptions; /* Pattern options */
! 1861: int nltype; /* Newline type */
! 1862: int nllen; /* Newline string length */
! 1863: uschar nl[4]; /* Newline string when fixed */
! 1864: void *callout_data; /* To pass back to callouts */
! 1865: dfa_recursion_info *recursive; /* Linked list of recursion data */
! 1866: } dfa_match_data;
! 1867:
! 1868: /* Bit definitions for entries in the pcre_ctypes table. */
! 1869:
! 1870: #define ctype_space 0x01
! 1871: #define ctype_letter 0x02
! 1872: #define ctype_digit 0x04
! 1873: #define ctype_xdigit 0x08
! 1874: #define ctype_word 0x10 /* alphanumeric or '_' */
! 1875: #define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
! 1876:
! 1877: /* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
! 1878: of bits for a class map. Some classes are built by combining these tables. */
! 1879:
! 1880: #define cbit_space 0 /* [:space:] or \s */
! 1881: #define cbit_xdigit 32 /* [:xdigit:] */
! 1882: #define cbit_digit 64 /* [:digit:] or \d */
! 1883: #define cbit_upper 96 /* [:upper:] */
! 1884: #define cbit_lower 128 /* [:lower:] */
! 1885: #define cbit_word 160 /* [:word:] or \w */
! 1886: #define cbit_graph 192 /* [:graph:] */
! 1887: #define cbit_print 224 /* [:print:] */
! 1888: #define cbit_punct 256 /* [:punct:] */
! 1889: #define cbit_cntrl 288 /* [:cntrl:] */
! 1890: #define cbit_length 320 /* Length of the cbits table */
! 1891:
! 1892: /* Offsets of the various tables from the base tables pointer, and
! 1893: total length. */
! 1894:
! 1895: #define lcc_offset 0
! 1896: #define fcc_offset 256
! 1897: #define cbits_offset 512
! 1898: #define ctypes_offset (cbits_offset + cbit_length)
! 1899: #define tables_length (ctypes_offset + 256)
! 1900:
! 1901: /* Layout of the UCP type table that translates property names into types and
! 1902: codes. Each entry used to point directly to a name, but to reduce the number of
! 1903: relocations in shared libraries, it now has an offset into a single string
! 1904: instead. */
! 1905:
! 1906: typedef struct {
! 1907: pcre_uint16 name_offset;
! 1908: pcre_uint16 type;
! 1909: pcre_uint16 value;
! 1910: } ucp_type_table;
! 1911:
! 1912:
! 1913: /* Internal shared data tables. These are tables that are used by more than one
! 1914: of the exported public functions. They have to be "external" in the C sense,
! 1915: but are not part of the PCRE public API. The data for these tables is in the
! 1916: pcre_tables.c module. */
! 1917:
! 1918: extern const int _pcre_utf8_table1[];
! 1919: extern const int _pcre_utf8_table2[];
! 1920: extern const int _pcre_utf8_table3[];
! 1921: extern const uschar _pcre_utf8_table4[];
! 1922:
! 1923: #ifdef SUPPORT_JIT
! 1924: extern const uschar _pcre_utf8_char_sizes[];
! 1925: #endif
! 1926:
! 1927: extern const int _pcre_utf8_table1_size;
! 1928:
! 1929: extern const char _pcre_utt_names[];
! 1930: extern const ucp_type_table _pcre_utt[];
! 1931: extern const int _pcre_utt_size;
! 1932:
! 1933: extern const uschar _pcre_default_tables[];
! 1934:
! 1935: extern const uschar _pcre_OP_lengths[];
! 1936:
! 1937:
! 1938: /* Internal shared functions. These are functions that are used by more than
! 1939: one of the exported public functions. They have to be "external" in the C
! 1940: sense, but are not part of the PCRE public API. */
! 1941:
! 1942: extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
! 1943: extern BOOL _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
! 1944: extern int _pcre_ord2utf8(int, uschar *);
! 1945: extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
! 1946: const pcre_study_data *, pcre_study_data *);
! 1947: extern int _pcre_valid_utf8(USPTR, int, int *);
! 1948: extern BOOL _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
! 1949: extern BOOL _pcre_xclass(int, const uschar *);
! 1950:
! 1951: #ifdef SUPPORT_JIT
! 1952: extern void _pcre_jit_compile(const real_pcre *, pcre_extra *);
! 1953: extern int _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR,
! 1954: int, int, int, int, int *, int);
! 1955: extern void _pcre_jit_free(void *);
! 1956: extern int _pcre_jit_get_size(void *);
! 1957: #endif
! 1958:
! 1959: /* Unicode character database (UCD) */
! 1960:
! 1961: typedef struct {
! 1962: uschar script;
! 1963: uschar chartype;
! 1964: pcre_int32 other_case;
! 1965: } ucd_record;
! 1966:
! 1967: extern const ucd_record _pcre_ucd_records[];
! 1968: extern const uschar _pcre_ucd_stage1[];
! 1969: extern const pcre_uint16 _pcre_ucd_stage2[];
! 1970: extern const int _pcre_ucp_gentype[];
! 1971: #ifdef SUPPORT_JIT
! 1972: extern const int _pcre_ucp_typerange[];
! 1973: #endif
! 1974:
! 1975: /* UCD access macros */
! 1976:
! 1977: #define UCD_BLOCK_SIZE 128
! 1978: #define GET_UCD(ch) (_pcre_ucd_records + \
! 1979: _pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
! 1980: UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
! 1981:
! 1982: #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
! 1983: #define UCD_SCRIPT(ch) GET_UCD(ch)->script
! 1984: #define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
! 1985: #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
! 1986:
! 1987: #endif
! 1988:
! 1989: /* End of pcre_internal.h */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>