|
version 1.1.1.1, 2012/02/21 23:05:51
|
version 1.1.1.2, 2012/02/21 23:50:25
|
|
Line 6
|
Line 6
|
| and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| |
|
| Written by Philip Hazel |
Written by Philip Hazel |
| Copyright (c) 1997-2009 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
| |
|
| ----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
|
Line 37 POSSIBILITY OF SUCH DAMAGE.
|
Line 37 POSSIBILITY OF SUCH DAMAGE.
|
| ----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| */ |
*/ |
| |
|
| |
#ifndef PCRE_INCLUDED |
| |
|
| /* This module contains some fixed tables that are used by more than one of the |
/* This module contains some fixed tables that are used by more than one of the |
| PCRE code modules. The tables are also #included by the pcretest program, which |
PCRE code modules. The tables are also #included by the pcretest program, which |
|
Line 50 clashes with the library. */
|
Line 51 clashes with the library. */
|
| |
|
| #include "pcre_internal.h" |
#include "pcre_internal.h" |
| |
|
| |
#endif /* PCRE_INCLUDED */ |
| |
|
| /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| the definition is next to the definition of the opcodes in pcre_internal.h. */ |
the definition is next to the definition of the opcodes in pcre_internal.h. */ |
| |
|
| const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; | const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; |
| |
|
| |
|
| |
|
|
Line 65 const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
Line 67 const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
| /* These are the breakpoints for different numbers of bytes in a UTF-8 |
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
| character. */ |
character. */ |
| |
|
| #ifdef SUPPORT_UTF8 | #if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ |
| | || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16) |
| |
|
| const int _pcre_utf8_table1[] = | /* These tables are also required by pcretest in 16 bit mode. */ |
| | |
| | const int PRIV(utf8_table1)[] = |
| { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
| |
|
| const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); | const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); |
| |
|
| /* These are the indicator bits and the mask for the data bits to set in the |
/* These are the indicator bits and the mask for the data bits to set in the |
| first byte of a character, indexed by the number of additional bytes. */ |
first byte of a character, indexed by the number of additional bytes. */ |
| |
|
| const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; | const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; | const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| |
|
| /* Table of the number of extra bytes, indexed by the first byte masked with |
/* Table of the number of extra bytes, indexed by the first byte masked with |
| 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
| |
|
| const uschar _pcre_utf8_table4[] = { | const pcre_uint8 PRIV(utf8_table4)[] = { |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
| |
|
| #ifdef SUPPORT_JIT | #endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/ |
| /* Full table of the number of extra bytes when the | |
| character code is greater or equal than 0xc0. | |
| See _pcre_utf8_table4 above. */ | |
| |
|
| const uschar _pcre_utf8_char_sizes[] = { | #ifdef SUPPORT_UTF |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
| 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4, | |
| }; | |
| #endif | |
| |
|
| /* Table to translate from particular type value to the general value. */ |
/* Table to translate from particular type value to the general value. */ |
| |
|
| const int _pcre_ucp_gentype[] = { | const int PRIV(ucp_gentype)[] = { |
| ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
| ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
| ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
|
Line 114 const int _pcre_ucp_gentype[] = {
|
Line 110 const int _pcre_ucp_gentype[] = {
|
| }; |
}; |
| |
|
| #ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| /* This table reverses _pcre_ucp_gentype. We can save the cost | /* This table reverses PRIV(ucp_gentype). We can save the cost |
| of a memory load. */ |
of a memory load. */ |
| |
|
| const int _pcre_ucp_typerange[] = { | const int PRIV(ucp_typerange)[] = { |
| ucp_Cc, ucp_Cs, |
ucp_Cc, ucp_Cs, |
| ucp_Ll, ucp_Lu, |
ucp_Ll, ucp_Lu, |
| ucp_Mc, ucp_Mn, |
ucp_Mc, ucp_Mn, |
|
Line 126 const int _pcre_ucp_typerange[] = {
|
Line 122 const int _pcre_ucp_typerange[] = {
|
| ucp_Sc, ucp_So, |
ucp_Sc, ucp_So, |
| ucp_Zl, ucp_Zs, |
ucp_Zl, ucp_Zs, |
| }; |
}; |
| #endif | #endif /* SUPPORT_JIT */ |
| |
|
| /* The pcre_utt[] table below translates Unicode property names into type and |
/* The pcre_utt[] table below translates Unicode property names into type and |
| code values. It is searched by binary chop, so must be in collating sequence of |
code values. It is searched by binary chop, so must be in collating sequence of |
|
Line 284 strings to make sure that UTF-8 support works on EBCDI
|
Line 280 strings to make sure that UTF-8 support works on EBCDI
|
| #define STRING_Zp0 STR_Z STR_p "\0" |
#define STRING_Zp0 STR_Z STR_p "\0" |
| #define STRING_Zs0 STR_Z STR_s "\0" |
#define STRING_Zs0 STR_Z STR_s "\0" |
| |
|
| const char _pcre_utt_names[] = | const char PRIV(utt_names)[] = |
| STRING_Any0 |
STRING_Any0 |
| STRING_Arabic0 |
STRING_Arabic0 |
| STRING_Armenian0 |
STRING_Armenian0 |
|
Line 424 const char _pcre_utt_names[] =
|
Line 420 const char _pcre_utt_names[] =
|
| STRING_Zp0 |
STRING_Zp0 |
| STRING_Zs0; |
STRING_Zs0; |
| |
|
| const ucp_type_table _pcre_utt[] = { | const ucp_type_table PRIV(utt)[] = { |
| { 0, PT_ANY, 0 }, |
{ 0, PT_ANY, 0 }, |
| { 4, PT_SC, ucp_Arabic }, |
{ 4, PT_SC, ucp_Arabic }, |
| { 11, PT_SC, ucp_Armenian }, |
{ 11, PT_SC, ucp_Armenian }, |
|
Line 565 const ucp_type_table _pcre_utt[] = {
|
Line 561 const ucp_type_table _pcre_utt[] = {
|
| { 961, PT_PC, ucp_Zs } |
{ 961, PT_PC, ucp_Zs } |
| }; |
}; |
| |
|
| const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); | const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |
| |
|
| #endif /* SUPPORT_UTF8 */ | #endif /* SUPPORT_UTF */ |
| |
|
| /* End of pcre_tables.c */ |
/* End of pcre_tables.c */ |