version 1.1, 2012/02/21 23:05:51
|
version 1.1.1.3, 2012/10/09 09:19:17
|
Line 7
|
Line 7
|
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
|
|
Written by Philip Hazel |
Written by Philip Hazel |
Copyright (c) 1997-2011 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
|
|
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
Line 40 POSSIBILITY OF SUCH DAMAGE.
|
Line 40 POSSIBILITY OF SUCH DAMAGE.
|
|
|
/* This header contains definitions that are shared between the different |
/* This header contains definitions that are shared between the different |
modules, but which are not relevant to the exported API. This includes some |
modules, but which are not relevant to the exported API. This includes some |
functions whose names all begin with "_pcre_". */ | functions whose names all begin with "_pcre_" or "_pcre16_" depending on |
| the PRIV macro. */ |
|
|
#ifndef PCRE_INTERNAL_H |
#ifndef PCRE_INTERNAL_H |
#define PCRE_INTERNAL_H |
#define PCRE_INTERNAL_H |
Line 51 functions whose names all begin with "_pcre_". */
|
Line 52 functions whose names all begin with "_pcre_". */
|
#define PCRE_DEBUG |
#define PCRE_DEBUG |
#endif |
#endif |
|
|
/* We do not support both EBCDIC and UTF-8 at the same time. The "configure" | /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */ |
script prevents both being selected, but not everybody uses "configure". */ | #ifndef COMPILE_PCRE16 |
| #define COMPILE_PCRE8 |
#if defined EBCDIC && defined SUPPORT_UTF8 | |
#error The use of both EBCDIC and SUPPORT_UTF8 is not supported. | |
#endif |
#endif |
|
|
/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The | /* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The |
"configure" script ensures this, but not everybody uses "configure". */ |
"configure" script ensures this, but not everybody uses "configure". */ |
|
|
#if defined SUPPORT_UCP && !defined SUPPORT_UTF8 | #if defined SUPPORT_UCP && !(defined SUPPORT_UTF) |
| #define SUPPORT_UTF 1 |
| #endif |
| |
| /* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility |
| reasons with existing code. */ |
| |
| #if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF) |
| #define SUPPORT_UTF 1 |
| #endif |
| |
| /* Fixme: SUPPORT_UTF8 should be eventually disappear from the code. |
| Until then we define it if SUPPORT_UTF is defined. */ |
| |
| #if defined SUPPORT_UTF && !(defined SUPPORT_UTF8) |
#define SUPPORT_UTF8 1 |
#define SUPPORT_UTF8 1 |
#endif |
#endif |
|
|
|
/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure" |
|
script prevents both being selected, but not everybody uses "configure". */ |
|
|
|
#if defined EBCDIC && defined SUPPORT_UTF |
|
#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported. |
|
#endif |
|
|
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef |
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef |
inline, and there are *still* stupid compilers about that don't like indented |
inline, and there are *still* stupid compilers about that don't like indented |
pre-processor statements, or at least there were when I first wrote this. After |
pre-processor statements, or at least there were when I first wrote this. After |
Line 158 set, we ensure here that it has no effect. */
|
Line 178 set, we ensure here that it has no effect. */
|
#define PCRE_CALL_CONVENTION |
#define PCRE_CALL_CONVENTION |
#endif |
#endif |
|
|
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We | /* We need to have types that specify unsigned 8, 16 and 32-bit integers. We |
cannot determine these outside the compilation (e.g. by running a program as |
cannot determine these outside the compilation (e.g. by running a program as |
part of "configure") because PCRE is often cross-compiled for use on other |
part of "configure") because PCRE is often cross-compiled for use on other |
systems. Instead we make use of the maximum sizes that are available at |
systems. Instead we make use of the maximum sizes that are available at |
preprocessor time in standard C environments. */ |
preprocessor time in standard C environments. */ |
|
|
|
typedef unsigned char pcre_uint8; |
|
|
#if USHRT_MAX == 65535 |
#if USHRT_MAX == 65535 |
typedef unsigned short pcre_uint16; |
typedef unsigned short pcre_uint16; |
typedef short pcre_int16; |
typedef short pcre_int16; |
Line 206 by "configure". */
|
Line 228 by "configure". */
|
|
|
/* All character handling must be done as unsigned characters. Otherwise there |
/* All character handling must be done as unsigned characters. Otherwise there |
are problems with top-bit-set characters and functions such as isspace(). |
are problems with top-bit-set characters and functions such as isspace(). |
However, we leave the interface to the outside world as char *, because that | However, we leave the interface to the outside world as char * or short *, |
should make things easier for callers. We define a short type for unsigned char | because that should make things easier for callers. This character type is |
to save lots of typing. I tried "uchar", but it causes problems on Digital | called pcre_uchar. |
Unix, where it is defined in sys/types, so use "uschar" instead. */ | |
|
|
typedef unsigned char uschar; | The IN_UCHARS macro multiply its argument with the byte size of the current |
| pcre_uchar type. Useful for memcpy and such operations, whose require the |
| byte size of their input/output buffers. |
|
|
|
The MAX_255 macro checks whether its pcre_uchar input is less than 256. |
|
|
|
The TABLE_GET macro is designed for accessing elements of tables whose contain |
|
exactly 256 items. When the character is able to contain more than 256 |
|
items, some check is needed before accessing these tables. |
|
*/ |
|
|
|
#ifdef COMPILE_PCRE8 |
|
|
|
typedef unsigned char pcre_uchar; |
|
#define IN_UCHARS(x) (x) |
|
#define MAX_255(c) 1 |
|
#define TABLE_GET(c, table, default) ((table)[c]) |
|
|
|
#else |
|
|
|
#ifdef COMPILE_PCRE16 |
|
#if USHRT_MAX != 65535 |
|
/* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in |
|
pcre.h(.in) and disable (comment out) this message. */ |
|
#error Warning: PCRE_UCHAR16 is not a 16 bit data type. |
|
#endif |
|
|
|
typedef pcre_uint16 pcre_uchar; |
|
#define IN_UCHARS(x) ((x) << 1) |
|
#define MAX_255(c) ((c) <= 255u) |
|
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) |
|
|
|
#else |
|
#error Unsupported compiling mode |
|
#endif /* COMPILE_PCRE16 */ |
|
|
|
#endif /* COMPILE_PCRE8 */ |
|
|
/* This is an unsigned int value that no character can ever have. UTF-8 |
/* This is an unsigned int value that no character can ever have. UTF-8 |
characters only go up to 0x7fffffff (though Unicode doesn't go beyond |
characters only go up to 0x7fffffff (though Unicode doesn't go beyond |
0x0010ffff). */ |
0x0010ffff). */ |
Line 234 start/end of string field names are. */
|
Line 291 start/end of string field names are. */
|
#define IS_NEWLINE(p) \ |
#define IS_NEWLINE(p) \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((p) < NLBLOCK->PSEND && \ |
((p) < NLBLOCK->PSEND && \ |
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ | PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ |
utf8)) \ | &(NLBLOCK->nllen), utf)) \ |
: \ |
: \ |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
(p)[0] == NLBLOCK->nl[0] && \ |
(p)[0] == NLBLOCK->nl[0] && \ |
Line 248 start/end of string field names are. */
|
Line 305 start/end of string field names are. */
|
#define WAS_NEWLINE(p) \ |
#define WAS_NEWLINE(p) \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((p) > NLBLOCK->PSSTART && \ |
((p) > NLBLOCK->PSSTART && \ |
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ | PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ |
&(NLBLOCK->nllen), utf8)) \ | &(NLBLOCK->nllen), utf)) \ |
: \ |
: \ |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
Line 267 used for the external interface and appears in pcre.h,
|
Line 324 used for the external interface and appears in pcre.h,
|
must begin with PCRE_. */ |
must begin with PCRE_. */ |
|
|
#ifdef CUSTOM_SUBJECT_PTR |
#ifdef CUSTOM_SUBJECT_PTR |
#define PCRE_SPTR CUSTOM_SUBJECT_PTR | #define PCRE_PUCHAR CUSTOM_SUBJECT_PTR |
#define USPTR CUSTOM_SUBJECT_PTR | |
#else |
#else |
#define PCRE_SPTR const char * | #define PCRE_PUCHAR const pcre_uchar * |
#define USPTR const unsigned char * | |
#endif |
#endif |
|
|
|
|
|
|
/* Include the public PCRE header and the definitions of UCP character property |
/* Include the public PCRE header and the definitions of UCP character property |
values. */ |
values. */ |
|
|
Line 343 The macros are controlled by the value of LINK_SIZE. T
|
Line 396 The macros are controlled by the value of LINK_SIZE. T
|
the config.h file, but can be overridden by using -D on the command line. This |
the config.h file, but can be overridden by using -D on the command line. This |
is automated on Unix systems via the "configure" command. */ |
is automated on Unix systems via the "configure" command. */ |
|
|
|
#ifdef COMPILE_PCRE8 |
|
|
#if LINK_SIZE == 2 |
#if LINK_SIZE == 2 |
|
|
#define PUT(a,n,d) \ |
#define PUT(a,n,d) \ |
Line 379 is automated on Unix systems via the "configure" comma
|
Line 434 is automated on Unix systems via the "configure" comma
|
#define GET(a,n) \ |
#define GET(a,n) \ |
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) |
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) |
|
|
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ | /* Keep it positive */ |
| #define MAX_PATTERN_SIZE (1 << 30) |
|
|
|
#else |
|
#error LINK_SIZE must be either 2, 3, or 4 |
|
#endif |
|
|
|
#else /* COMPILE_PCRE8 */ |
|
|
|
#ifdef COMPILE_PCRE16 |
|
|
|
#if LINK_SIZE == 2 |
|
|
|
#undef LINK_SIZE |
|
#define LINK_SIZE 1 |
|
|
|
#define PUT(a,n,d) \ |
|
(a[n] = (d)) |
|
|
|
#define GET(a,n) \ |
|
(a[n]) |
|
|
|
#define MAX_PATTERN_SIZE (1 << 16) |
|
|
|
#elif LINK_SIZE == 3 || LINK_SIZE == 4 |
|
|
|
#undef LINK_SIZE |
|
#define LINK_SIZE 2 |
|
|
|
#define PUT(a,n,d) \ |
|
(a[n] = (d) >> 16), \ |
|
(a[(n)+1] = (d) & 65535) |
|
|
|
#define GET(a,n) \ |
|
(((a)[n] << 16) | (a)[(n)+1]) |
|
|
|
/* Keep it positive */ |
|
#define MAX_PATTERN_SIZE (1 << 30) |
|
|
#else |
#else |
#error LINK_SIZE must be either 2, 3, or 4 |
#error LINK_SIZE must be either 2, 3, or 4 |
#endif |
#endif |
|
|
|
#else |
|
#error Unsupported compiling mode |
|
#endif /* COMPILE_PCRE16 */ |
|
|
|
#endif /* COMPILE_PCRE8 */ |
|
|
/* Convenience macro defined in terms of the others */ |
/* Convenience macro defined in terms of the others */ |
|
|
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE |
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE |
Line 396 is automated on Unix systems via the "configure" comma
|
Line 492 is automated on Unix systems via the "configure" comma
|
offsets changes. There are used for repeat counts and for other things such as |
offsets changes. There are used for repeat counts and for other things such as |
capturing parenthesis numbers in back references. */ |
capturing parenthesis numbers in back references. */ |
|
|
|
#ifdef COMPILE_PCRE8 |
|
|
|
#define IMM2_SIZE 2 |
|
|
#define PUT2(a,n,d) \ |
#define PUT2(a,n,d) \ |
a[n] = (d) >> 8; \ |
a[n] = (d) >> 8; \ |
a[(n)+1] = (d) & 255 |
a[(n)+1] = (d) & 255 |
Line 403 capturing parenthesis numbers in back references. */
|
Line 503 capturing parenthesis numbers in back references. */
|
#define GET2(a,n) \ |
#define GET2(a,n) \ |
(((a)[n] << 8) | (a)[(n)+1]) |
(((a)[n] << 8) | (a)[(n)+1]) |
|
|
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 | #else /* COMPILE_PCRE8 */ |
|
|
|
#ifdef COMPILE_PCRE16 |
|
|
/* When UTF-8 encoding is being used, a character is no longer just a single | #define IMM2_SIZE 1 |
byte. The macros for character handling generate simple sequences when used in | |
byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is | |
not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should | |
never be called in byte mode. To make sure they can never even appear when | |
UTF-8 support is omitted, we don't even define them. */ | |
|
|
#ifndef SUPPORT_UTF8 | #define PUT2(a,n,d) \ |
| a[n] = d |
| |
| #define GET2(a,n) \ |
| a[n] |
| |
| #else |
| #error Unsupported compiling mode |
| #endif /* COMPILE_PCRE16 */ |
| |
| #endif /* COMPILE_PCRE8 */ |
| |
| #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE |
| |
| /* The maximum length of a MARK name is currently one data unit; it may be |
| changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */ |
| |
| #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1) |
| |
| /* When UTF encoding is being used, a character is no longer just a single |
| character. The macros for character handling generate simple sequences when |
| used in character-mode, and more complicated ones for UTF characters. |
| GETCHARLENTEST and other macros are not used when UTF is not supported, |
| so they are not defined. To make sure they can never even appear when |
| UTF support is omitted, we don't even define them. */ |
| |
| #ifndef SUPPORT_UTF |
| |
| /* #define MAX_VALUE_FOR_SINGLE_CHAR */ |
| /* #define HAS_EXTRALEN(c) */ |
| /* #define GET_EXTRALEN(c) */ |
| /* #define NOT_FIRSTCHAR(c) */ |
#define GETCHAR(c, eptr) c = *eptr; |
#define GETCHAR(c, eptr) c = *eptr; |
#define GETCHARTEST(c, eptr) c = *eptr; |
#define GETCHARTEST(c, eptr) c = *eptr; |
#define GETCHARINC(c, eptr) c = *eptr++; |
#define GETCHARINC(c, eptr) c = *eptr++; |
Line 421 UTF-8 support is omitted, we don't even define them. *
|
Line 548 UTF-8 support is omitted, we don't even define them. *
|
#define GETCHARLEN(c, eptr, len) c = *eptr; |
#define GETCHARLEN(c, eptr, len) c = *eptr; |
/* #define GETCHARLENTEST(c, eptr, len) */ |
/* #define GETCHARLENTEST(c, eptr, len) */ |
/* #define BACKCHAR(eptr) */ |
/* #define BACKCHAR(eptr) */ |
|
/* #define FORWARDCHAR(eptr) */ |
|
/* #define ACROSSCHAR(condition, eptr, action) */ |
|
|
#else /* SUPPORT_UTF8 */ | #else /* SUPPORT_UTF */ |
|
|
|
#ifdef COMPILE_PCRE8 |
|
|
/* These macros were originally written in the form of loops that used data |
/* These macros were originally written in the form of loops that used data |
from the tables whose names start with _pcre_utf8_table. They were rewritten by | from the tables whose names start with PRIV(utf8_table). They were rewritten by |
a user so as not to use loops, because in some environments this gives a |
a user so as not to use loops, because in some environments this gives a |
significant performance advantage, and it seems never to do any harm. */ |
significant performance advantage, and it seems never to do any harm. */ |
|
|
|
/* Tells the biggest code point which can be encoded as a single character. */ |
|
|
|
#define MAX_VALUE_FOR_SINGLE_CHAR 127 |
|
|
|
/* Tests whether the code point needs extra characters to decode. */ |
|
|
|
#define HAS_EXTRALEN(c) ((c) >= 0xc0) |
|
|
|
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. |
|
Otherwise it has an undefined behaviour. */ |
|
|
|
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f]) |
|
|
|
/* Returns TRUE, if the given character is not the first character |
|
of a UTF sequence. */ |
|
|
|
#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80) |
|
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, not |
/* Base macro to pick up the remaining bytes of a UTF-8 character, not |
advancing the pointer. */ |
advancing the pointer. */ |
|
|
Line 463 pointer. */
|
Line 612 pointer. */
|
|
|
#define GETCHARTEST(c, eptr) \ |
#define GETCHARTEST(c, eptr) \ |
c = *eptr; \ |
c = *eptr; \ |
if (utf8 && c >= 0xc0) GETUTF8(c, eptr); | if (utf && c >= 0xc0) GETUTF8(c, eptr); |
|
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing |
/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing |
the pointer. */ |
the pointer. */ |
Line 511 This is called when we don't know if we are in UTF-8 m
|
Line 660 This is called when we don't know if we are in UTF-8 m
|
|
|
#define GETCHARINCTEST(c, eptr) \ |
#define GETCHARINCTEST(c, eptr) \ |
c = *eptr++; \ |
c = *eptr++; \ |
if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr); | if (utf && c >= 0xc0) GETUTF8INC(c, eptr); |
|
|
/* Base macro to pick up the remaining bytes of a UTF-8 character, not |
/* Base macro to pick up the remaining bytes of a UTF-8 character, not |
advancing the pointer, incrementing the length. */ |
advancing the pointer, incrementing the length. */ |
Line 563 do not know if we are in UTF-8 mode. */
|
Line 712 do not know if we are in UTF-8 mode. */
|
|
|
#define GETCHARLENTEST(c, eptr, len) \ |
#define GETCHARLENTEST(c, eptr, len) \ |
c = *eptr; \ |
c = *eptr; \ |
if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len); | if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len); |
|
|
/* If the pointer is not at the start of a character, move it back until |
/* If the pointer is not at the start of a character, move it back until |
it is. This is called only in UTF-8 mode - we don't put a test within the macro |
it is. This is called only in UTF-8 mode - we don't put a test within the macro |
Line 571 because almost all calls are already within a block of
|
Line 720 because almost all calls are already within a block of
|
|
|
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- |
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- |
|
|
#endif /* SUPPORT_UTF8 */ | /* Same as above, just in the other direction. */ |
| #define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++ |
|
|
|
/* Same as above, but it allows a fully customizable form. */ |
|
#define ACROSSCHAR(condition, eptr, action) \ |
|
while((condition) && ((eptr) & 0xc0) == 0x80) action |
|
|
|
#else /* COMPILE_PCRE8 */ |
|
|
|
#ifdef COMPILE_PCRE16 |
|
|
|
/* Tells the biggest code point which can be encoded as a single character. */ |
|
|
|
#define MAX_VALUE_FOR_SINGLE_CHAR 65535 |
|
|
|
/* Tests whether the code point needs extra characters to decode. */ |
|
|
|
#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800) |
|
|
|
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. |
|
Otherwise it has an undefined behaviour. */ |
|
|
|
#define GET_EXTRALEN(c) 1 |
|
|
|
/* Returns TRUE, if the given character is not the first character |
|
of a UTF sequence. */ |
|
|
|
#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00) |
|
|
|
/* Base macro to pick up the low surrogate of a UTF-16 character, not |
|
advancing the pointer. */ |
|
|
|
#define GETUTF16(c, eptr) \ |
|
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; } |
|
|
|
/* Get the next UTF-16 character, not advancing the pointer. This is called when |
|
we know we are in UTF-16 mode. */ |
|
|
|
#define GETCHAR(c, eptr) \ |
|
c = *eptr; \ |
|
if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr); |
|
|
|
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the |
|
pointer. */ |
|
|
|
#define GETCHARTEST(c, eptr) \ |
|
c = *eptr; \ |
|
if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr); |
|
|
|
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing |
|
the pointer. */ |
|
|
|
#define GETUTF16INC(c, eptr) \ |
|
{ c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; } |
|
|
|
/* Get the next UTF-16 character, advancing the pointer. This is called when we |
|
know we are in UTF-16 mode. */ |
|
|
|
#define GETCHARINC(c, eptr) \ |
|
c = *eptr++; \ |
|
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); |
|
|
|
/* Get the next character, testing for UTF-16 mode, and advancing the pointer. |
|
This is called when we don't know if we are in UTF-16 mode. */ |
|
|
|
#define GETCHARINCTEST(c, eptr) \ |
|
c = *eptr++; \ |
|
if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); |
|
|
|
/* Base macro to pick up the low surrogate of a UTF-16 character, not |
|
advancing the pointer, incrementing the length. */ |
|
|
|
#define GETUTF16LEN(c, eptr, len) \ |
|
{ c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; } |
|
|
|
/* Get the next UTF-16 character, not advancing the pointer, incrementing |
|
length if there is a low surrogate. This is called when we know we are in |
|
UTF-16 mode. */ |
|
|
|
#define GETCHARLEN(c, eptr, len) \ |
|
c = *eptr; \ |
|
if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); |
|
|
|
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the |
|
pointer, incrementing length if there is a low surrogate. This is called when |
|
we do not know if we are in UTF-16 mode. */ |
|
|
|
#define GETCHARLENTEST(c, eptr, len) \ |
|
c = *eptr; \ |
|
if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); |
|
|
|
/* If the pointer is not at the start of a character, move it back until |
|
it is. This is called only in UTF-16 mode - we don't put a test within the |
|
macro because almost all calls are already within a block of UTF-16 only |
|
code. */ |
|
|
|
#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr-- |
|
|
|
/* Same as above, just in the other direction. */ |
|
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++ |
|
|
|
/* Same as above, but it allows a fully customizable form. */ |
|
#define ACROSSCHAR(condition, eptr, action) \ |
|
if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action |
|
|
|
#endif |
|
|
|
#endif /* COMPILE_PCRE8 */ |
|
|
|
#endif /* SUPPORT_UTF */ |
|
|
|
|
/* In case there is no definition of offsetof() provided - though any proper |
/* In case there is no definition of offsetof() provided - though any proper |
Standard C system should have one. */ |
Standard C system should have one. */ |
|
|
Line 588 are in a 16-bit flags word. From release 8.00, PCRE_NO
|
Line 846 are in a 16-bit flags word. From release 8.00, PCRE_NO
|
the restrictions on partial matching have been lifted. It remains for backwards |
the restrictions on partial matching have been lifted. It remains for backwards |
compatibility. */ |
compatibility. */ |
|
|
#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */ | #ifdef COMPILE_PCRE8 |
#define PCRE_FIRSTSET 0x0002 /* first_byte is set */ | #define PCRE_MODE 0x0001 /* compiled in 8 bit mode */ |
#define PCRE_REQCHSET 0x0004 /* req_byte is set */ | #endif |
#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */ | #ifdef COMPILE_PCRE16 |
#define PCRE_JCHANGED 0x0010 /* j option used in regex */ | #define PCRE_MODE 0x0002 /* compiled in 16 bit mode */ |
#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */ | #endif |
#define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */ | #define PCRE_FIRSTSET 0x0010 /* first_char is set */ |
| #define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ |
| #define PCRE_REQCHSET 0x0040 /* req_byte is set */ |
| #define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ |
| #define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ |
| #define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ |
| #define PCRE_JCHANGED 0x0400 /* j option used in regex */ |
| #define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ |
| #define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ |
|
|
/* Flags for the "extra" block produced by pcre_study(). */ |
/* Flags for the "extra" block produced by pcre_study(). */ |
|
|
Line 626 time, run time, or study time, respectively. */
|
Line 892 time, run time, or study time, respectively. */
|
PCRE_NO_START_OPTIMIZE) |
PCRE_NO_START_OPTIMIZE) |
|
|
#define PUBLIC_STUDY_OPTIONS \ |
#define PUBLIC_STUDY_OPTIONS \ |
PCRE_STUDY_JIT_COMPILE | (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \ |
| PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) |
|
|
/* Magic number to provide a small check against being handed junk. Also used | /* Magic number to provide a small check against being handed junk. */ |
to detect whether a pattern was compiled on a host of different endianness. */ | |
|
|
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ |
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ |
|
|
|
/* This variable is used to detect a loaded regular expression |
|
in different endianness. */ |
|
|
|
#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ |
|
|
/* Negative values for the firstchar and reqchar variables */ |
/* Negative values for the firstchar and reqchar variables */ |
|
|
#define REQ_UNSET (-2) |
#define REQ_UNSET (-2) |
Line 643 req_byte match. */
|
Line 914 req_byte match. */
|
|
|
#define REQ_BYTE_MAX 1000 |
#define REQ_BYTE_MAX 1000 |
|
|
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a |
|
variable-length repeat, or a anything other than literal characters. */ |
|
|
|
#define REQ_CASELESS 0x0100 /* indicates caselessness */ |
|
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */ |
|
|
|
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in |
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in |
environments where these macros are defined elsewhere. Unfortunately, there |
environments where these macros are defined elsewhere. Unfortunately, there |
is no way to do the same for the typedef. */ |
is no way to do the same for the typedef. */ |
Line 677 for) in a minority area (EBCDIC platforms), this is no
|
Line 942 for) in a minority area (EBCDIC platforms), this is no
|
application that did need both could compile two versions of the library, using |
application that did need both could compile two versions of the library, using |
macros to give the functions distinct names. */ |
macros to give the functions distinct names. */ |
|
|
#ifndef SUPPORT_UTF8 | #ifndef SUPPORT_UTF |
|
|
/* UTF-8 support is not enabled; use the platform-dependent character literals |
/* UTF-8 support is not enabled; use the platform-dependent character literals |
so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */ |
so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */ |
Line 937 so that PCRE works on both ASCII and EBCDIC platforms,
|
Line 1202 so that PCRE works on both ASCII and EBCDIC platforms,
|
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" |
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" |
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" |
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" |
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" |
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" |
#define STRING_UTF8_RIGHTPAR "UTF8)" | #ifdef COMPILE_PCRE8 |
| #define STRING_UTF_RIGHTPAR "UTF8)" |
| #endif |
| #ifdef COMPILE_PCRE16 |
| #define STRING_UTF_RIGHTPAR "UTF16)" |
| #endif |
#define STRING_UCP_RIGHTPAR "UCP)" |
#define STRING_UCP_RIGHTPAR "UCP)" |
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" |
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" |
|
|
#else /* SUPPORT_UTF8 */ | #else /* SUPPORT_UTF */ |
|
|
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This |
/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This |
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode |
works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode |
Line 1192 only. */
|
Line 1462 only. */
|
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS |
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS |
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS |
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS |
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS |
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS |
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS | #ifdef COMPILE_PCRE8 |
| #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS |
| #endif |
| #ifdef COMPILE_PCRE16 |
| #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS |
| #endif |
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS |
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS |
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS |
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS |
|
|
#endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
|
|
/* Escape items that are just an encoding of a particular data value. */ |
/* Escape items that are just an encoding of a particular data value. */ |
|
|
Line 1236 only. */
|
Line 1511 only. */
|
#define PT_WORD 8 /* Word - L plus N plus underscore */ |
#define PT_WORD 8 /* Word - L plus N plus underscore */ |
|
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
contain UTF-8 characters with values greater than 255. */ | contain characters with values greater than 255. */ |
|
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
Line 1252 value such as \n. They must have non-zero values, as c
|
Line 1527 value such as \n. They must have non-zero values, as c
|
their negation. Also, they must appear in the same order as in the opcode |
their negation. Also, they must appear in the same order as in the opcode |
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it |
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it |
corresponds to "." in DOTALL mode rather than an escape sequence. It is also |
corresponds to "." in DOTALL mode rather than an escape sequence. It is also |
used for [^] in JavaScript compatibility mode, and for \C in non-utf8 mode. In | used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In |
non-DOTALL mode, "." behaves like \N. |
non-DOTALL mode, "." behaves like \N. |
|
|
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. |
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. |
Line 1433 enum {
|
Line 1708 enum {
|
OP_CLASS, /* 106 Match a character class, chars < 256 only */ |
OP_CLASS, /* 106 Match a character class, chars < 256 only */ |
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative |
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative |
class - the difference is relevant only when a |
class - the difference is relevant only when a |
UTF-8 character > 255 is encountered. */ | character > 255 is encountered. */ |
OP_XCLASS, /* 108 Extended class for handling UTF-8 chars within the | OP_XCLASS, /* 108 Extended class for handling > 255 chars within the |
class. This does both positive and negative. */ |
class. This does both positive and negative. */ |
OP_REF, /* 109 Match a back reference, casefully */ |
OP_REF, /* 109 Match a back reference, casefully */ |
OP_REFI, /* 110 Match a back reference, caselessly */ |
OP_REFI, /* 110 Match a back reference, caselessly */ |
Line 1591 in UTF-8 mode. The code that uses this table must know
|
Line 1866 in UTF-8 mode. The code that uses this table must know
|
2, /* noti */ \ |
2, /* noti */ \ |
/* Positive single-char repeats ** These are */ \ |
/* Positive single-char repeats ** These are */ \ |
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ |
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ |
4, 4, 4, /* upto, minupto, exact ** mode */ \ | 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ |
2, 2, 2, 4, /* *+, ++, ?+, upto+ */ \ | 2+IMM2_SIZE, /* exact */ \ |
| 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ |
2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ |
2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ |
4, 4, 4, /* upto I, minupto I, exact I */ \ | 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ |
2, 2, 2, 4, /* *+I, ++I, ?+I, upto+I */ \ | 2+IMM2_SIZE, /* exact I */ \ |
| 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ |
/* Negative single-char repeats - only for chars < 256 */ \ |
/* Negative single-char repeats - only for chars < 256 */ \ |
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ |
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ |
4, 4, 4, /* NOT upto, minupto, exact */ \ | 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ |
2, 2, 2, 4, /* Possessive NOT *, +, ?, upto */ \ | 2+IMM2_SIZE, /* NOT exact */ \ |
| 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ |
2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ |
2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ |
4, 4, 4, /* NOT upto I, minupto I, exact I */ \ | 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ |
2, 2, 2, 4, /* Possessive NOT *I, +I, ?I, upto I */ \ | 2+IMM2_SIZE, /* NOT exact I */ \ |
| 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ |
/* Positive type repeats */ \ |
/* Positive type repeats */ \ |
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ |
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ |
4, 4, 4, /* Type upto, minupto, exact */ \ | 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ |
2, 2, 2, 4, /* Possessive *+, ++, ?+, upto+ */ \ | 2+IMM2_SIZE, /* Type exact */ \ |
| 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ |
/* Character class & ref repeats */ \ |
/* Character class & ref repeats */ \ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ |
5, 5, /* CRRANGE, CRMINRANGE */ \ | 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ |
33, /* CLASS */ \ | 1+(32/sizeof(pcre_uchar)), /* CLASS */ \ |
33, /* NCLASS */ \ | 1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ |
0, /* XCLASS - variable length */ \ |
0, /* XCLASS - variable length */ \ |
3, /* REF */ \ | 1+IMM2_SIZE, /* REF */ \ |
3, /* REFI */ \ | 1+IMM2_SIZE, /* REFI */ \ |
1+LINK_SIZE, /* RECURSE */ \ |
1+LINK_SIZE, /* RECURSE */ \ |
2+2*LINK_SIZE, /* CALLOUT */ \ |
2+2*LINK_SIZE, /* CALLOUT */ \ |
1+LINK_SIZE, /* Alt */ \ |
1+LINK_SIZE, /* Alt */ \ |
Line 1631 in UTF-8 mode. The code that uses this table must know
|
Line 1911 in UTF-8 mode. The code that uses this table must know
|
1+LINK_SIZE, /* ONCE_NC */ \ |
1+LINK_SIZE, /* ONCE_NC */ \ |
1+LINK_SIZE, /* BRA */ \ |
1+LINK_SIZE, /* BRA */ \ |
1+LINK_SIZE, /* BRAPOS */ \ |
1+LINK_SIZE, /* BRAPOS */ \ |
3+LINK_SIZE, /* CBRA */ \ | 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ |
3+LINK_SIZE, /* CBRAPOS */ \ | 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ |
1+LINK_SIZE, /* COND */ \ |
1+LINK_SIZE, /* COND */ \ |
1+LINK_SIZE, /* SBRA */ \ |
1+LINK_SIZE, /* SBRA */ \ |
1+LINK_SIZE, /* SBRAPOS */ \ |
1+LINK_SIZE, /* SBRAPOS */ \ |
3+LINK_SIZE, /* SCBRA */ \ | 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ |
3+LINK_SIZE, /* SCBRAPOS */ \ | 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ |
1+LINK_SIZE, /* SCOND */ \ |
1+LINK_SIZE, /* SCOND */ \ |
3, 3, /* CREF, NCREF */ \ | 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \ |
3, 3, /* RREF, NRREF */ \ | 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \ |
1, /* DEF */ \ |
1, /* DEF */ \ |
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ |
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ |
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ |
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ |
1, 3, /* SKIP, SKIP_ARG */ \ |
1, 3, /* SKIP, SKIP_ARG */ \ |
1, 3, /* THEN, THEN_ARG */ \ |
1, 3, /* THEN, THEN_ARG */ \ |
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ |
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ |
3, 1 /* CLOSE, SKIPZERO */ | 1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ |
|
|
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" |
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" |
condition. */ |
condition. */ |
Line 1665 enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6,
|
Line 1945 enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6,
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, |
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, |
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, |
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, |
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, |
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, |
ERR70, ERR71, ERR72, ERRCOUNT }; | ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT }; |
|
|
|
/* JIT compiling modes. The function list is indexed by them. */ |
|
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, |
|
JIT_NUMBER_OF_COMPILE_MODES }; |
|
|
/* The real format of the start of the pcre block; the index of names and the |
/* The real format of the start of the pcre block; the index of names and the |
code vector run on as long as necessary after the end. We store an explicit |
code vector run on as long as necessary after the end. We store an explicit |
offset to the name table so that if a regex is compiled on one host, saved, and |
offset to the name table so that if a regex is compiled on one host, saved, and |
Line 1684 fields are present. Currently PCRE always sets the dum
|
Line 1968 fields are present. Currently PCRE always sets the dum
|
NOTE NOTE NOTE |
NOTE NOTE NOTE |
*/ |
*/ |
|
|
typedef struct real_pcre { | #ifdef COMPILE_PCRE8 |
| #define REAL_PCRE real_pcre |
| #else |
| #define REAL_PCRE real_pcre16 |
| #endif |
| |
| typedef struct REAL_PCRE { |
pcre_uint32 magic_number; |
pcre_uint32 magic_number; |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 options; /* Public options */ |
pcre_uint32 options; /* Public options */ |
pcre_uint16 flags; /* Private flags */ |
pcre_uint16 flags; /* Private flags */ |
pcre_uint16 dummy1; /* For future use */ | pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ |
pcre_uint16 top_bracket; | pcre_uint16 top_bracket; /* Highest numbered group */ |
pcre_uint16 top_backref; | pcre_uint16 top_backref; /* Highest numbered back reference */ |
pcre_uint16 first_byte; | pcre_uint16 first_char; /* Starting character */ |
pcre_uint16 req_byte; | pcre_uint16 req_char; /* This character must be seen */ |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
pcre_uint16 name_table_offset; /* Offset to name table that follows */ |
pcre_uint16 name_entry_size; /* Size of any name items */ |
pcre_uint16 name_entry_size; /* Size of any name items */ |
pcre_uint16 name_count; /* Number of name items */ |
pcre_uint16 name_count; /* Number of name items */ |
pcre_uint16 ref_count; /* Reference count */ |
pcre_uint16 ref_count; /* Reference count */ |
|
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ |
|
const pcre_uint8 *nullpad; /* NULL padding */ |
|
} REAL_PCRE; |
|
|
const unsigned char *tables; /* Pointer to tables or NULL for std */ |
|
const unsigned char *nullpad; /* NULL padding */ |
|
} real_pcre; |
|
|
|
/* The format of the block used to store data from pcre_study(). The same |
/* The format of the block used to store data from pcre_study(). The same |
remark (see NOTE above) about extending this structure applies. */ |
remark (see NOTE above) about extending this structure applies. */ |
|
|
typedef struct pcre_study_data { |
typedef struct pcre_study_data { |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 size; /* Total that was malloced */ |
pcre_uint32 flags; /* Private flags */ |
pcre_uint32 flags; /* Private flags */ |
uschar start_bits[32]; /* Starting char bits */ | pcre_uint8 start_bits[32]; /* Starting char bits */ |
pcre_uint32 minlength; /* Minimum subject length */ |
pcre_uint32 minlength; /* Minimum subject length */ |
} pcre_study_data; |
} pcre_study_data; |
|
|
Line 1728 typedef struct open_capitem {
|
Line 2017 typedef struct open_capitem {
|
doing the compiling, so that they are thread-safe. */ |
doing the compiling, so that they are thread-safe. */ |
|
|
typedef struct compile_data { |
typedef struct compile_data { |
const uschar *lcc; /* Points to lower casing table */ | const pcre_uint8 *lcc; /* Points to lower casing table */ |
const uschar *fcc; /* Points to case-flipping table */ | const pcre_uint8 *fcc; /* Points to case-flipping table */ |
const uschar *cbits; /* Points to character type table */ | const pcre_uint8 *cbits; /* Points to character type table */ |
const uschar *ctypes; /* Points to table of type maps */ | const pcre_uint8 *ctypes; /* Points to table of type maps */ |
const uschar *start_workspace;/* The start of working space */ | const pcre_uchar *start_workspace;/* The start of working space */ |
const uschar *start_code; /* The start of the compiled code */ | const pcre_uchar *start_code; /* The start of the compiled code */ |
const uschar *start_pattern; /* The start of the pattern */ | const pcre_uchar *start_pattern; /* The start of the pattern */ |
const uschar *end_pattern; /* The end of the pattern */ | const pcre_uchar *end_pattern; /* The end of the pattern */ |
open_capitem *open_caps; /* Chain of open capture items */ | open_capitem *open_caps; /* Chain of open capture items */ |
uschar *hwm; /* High watermark of workspace */ | pcre_uchar *hwm; /* High watermark of workspace */ |
uschar *name_table; /* The name/number table */ | pcre_uchar *name_table; /* The name/number table */ |
int names_found; /* Number of entries so far */ | int names_found; /* Number of entries so far */ |
int name_entry_size; /* Size of each entry */ | int name_entry_size; /* Size of each entry */ |
int workspace_size; /* Size of workspace */ | int workspace_size; /* Size of workspace */ |
int bracount; /* Count of capturing parens as we compile */ | int bracount; /* Count of capturing parens as we compile */ |
int final_bracount; /* Saved value after first pass */ | int final_bracount; /* Saved value after first pass */ |
int top_backref; /* Maximum back reference */ | int max_lookbehind; /* Maximum lookbehind (characters) */ |
unsigned int backref_map; /* Bitmap of low back refs */ | int top_backref; /* Maximum back reference */ |
int assert_depth; /* Depth of nested assertions */ | unsigned int backref_map; /* Bitmap of low back refs */ |
int external_options; /* External (initial) options */ | int assert_depth; /* Depth of nested assertions */ |
int external_flags; /* External flag bits to be set */ | int external_options; /* External (initial) options */ |
int req_varyopt; /* "After variable item" flag for reqbyte */ | int external_flags; /* External flag bits to be set */ |
BOOL had_accept; /* (*ACCEPT) encountered */ | int req_varyopt; /* "After variable item" flag for reqbyte */ |
BOOL check_lookbehind; /* Lookbehinds need later checking */ | BOOL had_accept; /* (*ACCEPT) encountered */ |
int nltype; /* Newline type */ | BOOL check_lookbehind; /* Lookbehinds need later checking */ |
int nllen; /* Newline string length */ | int nltype; /* Newline type */ |
uschar nl[4]; /* Newline string when fixed length */ | int nllen; /* Newline string length */ |
| pcre_uchar nl[4]; /* Newline string when fixed length */ |
} compile_data; |
} compile_data; |
|
|
/* Structure for maintaining a chain of pointers to the currently incomplete |
/* Structure for maintaining a chain of pointers to the currently incomplete |
Line 1762 branches, for testing for left recursion while compili
|
Line 2052 branches, for testing for left recursion while compili
|
|
|
typedef struct branch_chain { |
typedef struct branch_chain { |
struct branch_chain *outer; |
struct branch_chain *outer; |
uschar *current_branch; | pcre_uchar *current_branch; |
} branch_chain; |
} branch_chain; |
|
|
/* Structure for items in a linked list that represents an explicit recursive |
/* Structure for items in a linked list that represents an explicit recursive |
Line 1773 typedef struct recursion_info {
|
Line 2063 typedef struct recursion_info {
|
int group_num; /* Number of group that was called */ |
int group_num; /* Number of group that was called */ |
int *offset_save; /* Pointer to start of saved offsets */ |
int *offset_save; /* Pointer to start of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
USPTR subject_position; /* Position at start of recursion */ | PCRE_PUCHAR subject_position; /* Position at start of recursion */ |
} recursion_info; |
} recursion_info; |
|
|
/* A similar structure for pcre_dfa_exec(). */ |
/* A similar structure for pcre_dfa_exec(). */ |
Line 1781 typedef struct recursion_info {
|
Line 2071 typedef struct recursion_info {
|
typedef struct dfa_recursion_info { |
typedef struct dfa_recursion_info { |
struct dfa_recursion_info *prevrec; |
struct dfa_recursion_info *prevrec; |
int group_num; |
int group_num; |
USPTR subject_position; | PCRE_PUCHAR subject_position; |
} dfa_recursion_info; |
} dfa_recursion_info; |
|
|
/* Structure for building a chain of data for holding the values of the subject |
/* Structure for building a chain of data for holding the values of the subject |
Line 1791 pcre_exec(). */
|
Line 2081 pcre_exec(). */
|
|
|
typedef struct eptrblock { |
typedef struct eptrblock { |
struct eptrblock *epb_prev; |
struct eptrblock *epb_prev; |
USPTR epb_saved_eptr; | PCRE_PUCHAR epb_saved_eptr; |
} eptrblock; |
} eptrblock; |
|
|
|
|
Line 1802 typedef struct match_data {
|
Line 2092 typedef struct match_data {
|
unsigned long int match_call_count; /* As it says */ |
unsigned long int match_call_count; /* As it says */ |
unsigned long int match_limit; /* As it says */ |
unsigned long int match_limit; /* As it says */ |
unsigned long int match_limit_recursion; /* As it says */ |
unsigned long int match_limit_recursion; /* As it says */ |
int *offset_vector; /* Offset vector */ | int *offset_vector; /* Offset vector */ |
int offset_end; /* One past the end */ | int offset_end; /* One past the end */ |
int offset_max; /* The maximum usable for return data */ | int offset_max; /* The maximum usable for return data */ |
int nltype; /* Newline type */ | int nltype; /* Newline type */ |
int nllen; /* Newline string length */ | int nllen; /* Newline string length */ |
int name_count; /* Number of names in name table */ | int name_count; /* Number of names in name table */ |
int name_entry_size; /* Size of entry in names table */ | int name_entry_size; /* Size of entry in names table */ |
uschar *name_table; /* Table of names */ | pcre_uchar *name_table; /* Table of names */ |
uschar nl[4]; /* Newline string when fixed */ | pcre_uchar nl[4]; /* Newline string when fixed */ |
const uschar *lcc; /* Points to lower casing table */ | const pcre_uint8 *lcc; /* Points to lower casing table */ |
const uschar *ctypes; /* Points to table of type maps */ | const pcre_uint8 *fcc; /* Points to case-flipping table */ |
BOOL offset_overflow; /* Set if too many extractions */ | const pcre_uint8 *ctypes; /* Points to table of type maps */ |
BOOL notbol; /* NOTBOL flag */ | BOOL offset_overflow; /* Set if too many extractions */ |
BOOL noteol; /* NOTEOL flag */ | BOOL notbol; /* NOTBOL flag */ |
BOOL utf8; /* UTF8 flag */ | BOOL noteol; /* NOTEOL flag */ |
BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ | BOOL utf; /* UTF-8 / UTF-16 flag */ |
BOOL use_ucp; /* PCRE_UCP flag */ | BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ |
BOOL endonly; /* Dollar not before final \n */ | BOOL use_ucp; /* PCRE_UCP flag */ |
BOOL notempty; /* Empty string match not wanted */ | BOOL endonly; /* Dollar not before final \n */ |
BOOL notempty_atstart; /* Empty string match at start not wanted */ | BOOL notempty; /* Empty string match not wanted */ |
BOOL hitend; /* Hit the end of the subject at some point */ | BOOL notempty_atstart; /* Empty string match at start not wanted */ |
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ | BOOL hitend; /* Hit the end of the subject at some point */ |
BOOL hasthen; /* Pattern contains (*THEN) */ | BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ |
BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ | BOOL hasthen; /* Pattern contains (*THEN) */ |
const uschar *start_code; /* For use when recursing */ | BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ |
USPTR start_subject; /* Start of the subject string */ | const pcre_uchar *start_code; /* For use when recursing */ |
USPTR end_subject; /* End of the subject string */ | PCRE_PUCHAR start_subject; /* Start of the subject string */ |
USPTR start_match_ptr; /* Start of matched string */ | PCRE_PUCHAR end_subject; /* End of the subject string */ |
USPTR end_match_ptr; /* Subject position at end match */ | PCRE_PUCHAR start_match_ptr; /* Start of matched string */ |
USPTR start_used_ptr; /* Earliest consulted character */ | PCRE_PUCHAR end_match_ptr; /* Subject position at end match */ |
int partial; /* PARTIAL options */ | PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */ |
int end_offset_top; /* Highwater mark at end of match */ | int partial; /* PARTIAL options */ |
int capture_last; /* Most recent capture number */ | int end_offset_top; /* Highwater mark at end of match */ |
int start_offset; /* The start offset value */ | int capture_last; /* Most recent capture number */ |
int match_function_type; /* Set for certain special calls of MATCH() */ | int start_offset; /* The start offset value */ |
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ | int match_function_type; /* Set for certain special calls of MATCH() */ |
int eptrn; /* Next free eptrblock */ | eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ |
recursion_info *recursive; /* Linked list of recursion data */ | int eptrn; /* Next free eptrblock */ |
void *callout_data; /* To pass back to callouts */ | recursion_info *recursive; /* Linked list of recursion data */ |
const uschar *mark; /* Mark pointer to pass back on success */ | void *callout_data; /* To pass back to callouts */ |
const uschar *nomatch_mark; /* Mark pointer to pass back on failure */ | const pcre_uchar *mark; /* Mark pointer to pass back on success */ |
const uschar *once_target; /* Where to back up to for atomic groups */ | const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */ |
| const pcre_uchar *once_target; /* Where to back up to for atomic groups */ |
| #ifdef NO_RECURSE |
| void *match_frames_base; /* For remembering malloc'd frames */ |
| #endif |
} match_data; |
} match_data; |
|
|
/* A similar structure is used for the same purpose by the DFA matching |
/* A similar structure is used for the same purpose by the DFA matching |
functions. */ |
functions. */ |
|
|
typedef struct dfa_match_data { |
typedef struct dfa_match_data { |
const uschar *start_code; /* Start of the compiled pattern */ | const pcre_uchar *start_code; /* Start of the compiled pattern */ |
const uschar *start_subject; /* Start of the subject string */ | const pcre_uchar *start_subject ; /* Start of the subject string */ |
const uschar *end_subject; /* End of subject string */ | const pcre_uchar *end_subject; /* End of subject string */ |
const uschar *start_used_ptr; /* Earliest consulted character */ | const pcre_uchar *start_used_ptr; /* Earliest consulted character */ |
const uschar *tables; /* Character tables */ | const pcre_uint8 *tables; /* Character tables */ |
int start_offset; /* The start offset value */ | int start_offset; /* The start offset value */ |
int moptions; /* Match options */ | int moptions; /* Match options */ |
int poptions; /* Pattern options */ | int poptions; /* Pattern options */ |
int nltype; /* Newline type */ | int nltype; /* Newline type */ |
int nllen; /* Newline string length */ | int nllen; /* Newline string length */ |
uschar nl[4]; /* Newline string when fixed */ | pcre_uchar nl[4]; /* Newline string when fixed */ |
void *callout_data; /* To pass back to callouts */ | void *callout_data; /* To pass back to callouts */ |
dfa_recursion_info *recursive; /* Linked list of recursion data */ | dfa_recursion_info *recursive; /* Linked list of recursion data */ |
} dfa_match_data; |
} dfa_match_data; |
|
|
/* Bit definitions for entries in the pcre_ctypes table. */ |
/* Bit definitions for entries in the pcre_ctypes table. */ |
Line 1898 total length. */
|
Line 2192 total length. */
|
#define ctypes_offset (cbits_offset + cbit_length) |
#define ctypes_offset (cbits_offset + cbit_length) |
#define tables_length (ctypes_offset + 256) |
#define tables_length (ctypes_offset + 256) |
|
|
|
/* Internal function and data prefixes. */ |
|
|
|
#ifdef COMPILE_PCRE8 |
|
#ifndef PUBL |
|
#define PUBL(name) pcre_##name |
|
#endif |
|
#ifndef PRIV |
|
#define PRIV(name) _pcre_##name |
|
#endif |
|
#else /* COMPILE_PCRE8 */ |
|
#ifdef COMPILE_PCRE16 |
|
#ifndef PUBL |
|
#define PUBL(name) pcre16_##name |
|
#endif |
|
#ifndef PRIV |
|
#define PRIV(name) _pcre16_##name |
|
#endif |
|
#else |
|
#error Unsupported compiling mode |
|
#endif /* COMPILE_PCRE16 */ |
|
#endif /* COMPILE_PCRE8 */ |
|
|
/* Layout of the UCP type table that translates property names into types and |
/* Layout of the UCP type table that translates property names into types and |
codes. Each entry used to point directly to a name, but to reduce the number of |
codes. Each entry used to point directly to a name, but to reduce the number of |
relocations in shared libraries, it now has an offset into a single string |
relocations in shared libraries, it now has an offset into a single string |
Line 1915 of the exported public functions. They have to be "ext
|
Line 2231 of the exported public functions. They have to be "ext
|
but are not part of the PCRE public API. The data for these tables is in the |
but are not part of the PCRE public API. The data for these tables is in the |
pcre_tables.c module. */ |
pcre_tables.c module. */ |
|
|
extern const int _pcre_utf8_table1[]; | #ifdef COMPILE_PCRE8 |
extern const int _pcre_utf8_table2[]; | |
extern const int _pcre_utf8_table3[]; | |
extern const uschar _pcre_utf8_table4[]; | |
|
|
#ifdef SUPPORT_JIT | extern const int PRIV(utf8_table1)[]; |
extern const uschar _pcre_utf8_char_sizes[]; | extern const int PRIV(utf8_table1_size); |
#endif | extern const int PRIV(utf8_table2)[]; |
| extern const int PRIV(utf8_table3)[]; |
| extern const pcre_uint8 PRIV(utf8_table4)[]; |
|
|
extern const int _pcre_utf8_table1_size; | #endif /* COMPILE_PCRE8 */ |
|
|
extern const char _pcre_utt_names[]; | extern const char PRIV(utt_names)[]; |
extern const ucp_type_table _pcre_utt[]; | extern const ucp_type_table PRIV(utt)[]; |
extern const int _pcre_utt_size; | extern const int PRIV(utt_size); |
|
|
extern const uschar _pcre_default_tables[]; | extern const pcre_uint8 PRIV(default_tables)[]; |
|
|
extern const uschar _pcre_OP_lengths[]; | extern const pcre_uint8 PRIV(OP_lengths)[]; |
|
|
|
|
/* Internal shared functions. These are functions that are used by more than |
/* Internal shared functions. These are functions that are used by more than |
one of the exported public functions. They have to be "external" in the C |
one of the exported public functions. They have to be "external" in the C |
sense, but are not part of the PCRE public API. */ |
sense, but are not part of the PCRE public API. */ |
|
|
extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int); | /* String comparison functions. */ |
extern BOOL _pcre_is_newline(USPTR, int, USPTR, int *, BOOL); | #ifdef COMPILE_PCRE8 |
extern int _pcre_ord2utf8(int, uschar *); | |
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, | |
const pcre_study_data *, pcre_study_data *); | |
extern int _pcre_valid_utf8(USPTR, int, int *); | |
extern BOOL _pcre_was_newline(USPTR, int, USPTR, int *, BOOL); | |
extern BOOL _pcre_xclass(int, const uschar *); | |
|
|
|
#define STRCMP_UC_UC(str1, str2) \ |
|
strcmp((char *)(str1), (char *)(str2)) |
|
#define STRCMP_UC_C8(str1, str2) \ |
|
strcmp((char *)(str1), (str2)) |
|
#define STRNCMP_UC_UC(str1, str2, num) \ |
|
strncmp((char *)(str1), (char *)(str2), (num)) |
|
#define STRNCMP_UC_C8(str1, str2, num) \ |
|
strncmp((char *)(str1), (str2), (num)) |
|
#define STRLEN_UC(str) strlen((const char *)str) |
|
|
|
#else |
|
|
|
extern int PRIV(strcmp_uc_uc)(const pcre_uchar *, |
|
const pcre_uchar *); |
|
extern int PRIV(strcmp_uc_c8)(const pcre_uchar *, |
|
const char *); |
|
extern int PRIV(strncmp_uc_uc)(const pcre_uchar *, |
|
const pcre_uchar *, unsigned int num); |
|
extern int PRIV(strncmp_uc_c8)(const pcre_uchar *, |
|
const char *, unsigned int num); |
|
extern unsigned int PRIV(strlen_uc)(const pcre_uchar *str); |
|
|
|
#define STRCMP_UC_UC(str1, str2) \ |
|
PRIV(strcmp_uc_uc)((str1), (str2)) |
|
#define STRCMP_UC_C8(str1, str2) \ |
|
PRIV(strcmp_uc_c8)((str1), (str2)) |
|
#define STRNCMP_UC_UC(str1, str2, num) \ |
|
PRIV(strncmp_uc_uc)((str1), (str2), (num)) |
|
#define STRNCMP_UC_C8(str1, str2, num) \ |
|
PRIV(strncmp_uc_c8)((str1), (str2), (num)) |
|
#define STRLEN_UC(str) PRIV(strlen_uc)(str) |
|
|
|
#endif /* COMPILE_PCRE8 */ |
|
|
|
extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int); |
|
extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, |
|
int *, BOOL); |
|
extern int PRIV(ord2utf)(pcre_uint32, pcre_uchar *); |
|
extern int PRIV(valid_utf)(PCRE_PUCHAR, int, int *); |
|
extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, |
|
int *, BOOL); |
|
extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL); |
|
|
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
extern void _pcre_jit_compile(const real_pcre *, pcre_extra *); | extern void PRIV(jit_compile)(const REAL_PCRE *, |
extern int _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR, | PUBL(extra) *, int); |
int, int, int, int, int *, int); | extern int PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *, |
extern void _pcre_jit_free(void *); | const pcre_uchar *, int, int, int, int *, int); |
extern int _pcre_jit_get_size(void *); | extern void PRIV(jit_free)(void *); |
| extern int PRIV(jit_get_size)(void *); |
| extern const char* PRIV(jit_get_target)(void); |
#endif |
#endif |
|
|
/* Unicode character database (UCD) */ |
/* Unicode character database (UCD) */ |
|
|
typedef struct { |
typedef struct { |
uschar script; | pcre_uint8 script; |
uschar chartype; | pcre_uint8 chartype; |
pcre_int32 other_case; |
pcre_int32 other_case; |
} ucd_record; |
} ucd_record; |
|
|
extern const ucd_record _pcre_ucd_records[]; | extern const ucd_record PRIV(ucd_records)[]; |
extern const uschar _pcre_ucd_stage1[]; | extern const pcre_uint8 PRIV(ucd_stage1)[]; |
extern const pcre_uint16 _pcre_ucd_stage2[]; | extern const pcre_uint16 PRIV(ucd_stage2)[]; |
extern const int _pcre_ucp_gentype[]; | extern const int PRIV(ucp_gentype)[]; |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
extern const int _pcre_ucp_typerange[]; | extern const int PRIV(ucp_typerange)[]; |
#endif |
#endif |
|
|
|
#ifdef SUPPORT_UCP |
/* UCD access macros */ |
/* UCD access macros */ |
|
|
#define UCD_BLOCK_SIZE 128 |
#define UCD_BLOCK_SIZE 128 |
#define GET_UCD(ch) (_pcre_ucd_records + \ | #define GET_UCD(ch) (PRIV(ucd_records) + \ |
_pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \ | PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \ |
UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) |
UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) |
|
|
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype |
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype |
#define UCD_SCRIPT(ch) GET_UCD(ch)->script |
#define UCD_SCRIPT(ch) GET_UCD(ch)->script |
#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)] | #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] |
#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) |
#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) |
|
|
|
#endif /* SUPPORT_UCP */ |
|
|
#endif |
#endif |
|
|