embedaddon/pcre/ucp.h - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / ucp.h
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:03 2014 UTC (9 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD

pcre 8.34

1: /************************************************* 2: * Unicode Property Table handler * 3: *************************************************/ 4: 5: #ifndef _UCP_H 6: #define _UCP_H 7: 8: /* This file contains definitions of the property values that are returned by 9: the UCD access macros. New values that are added for new releases of Unicode 10: should always be at the end of each enum, for backwards compatibility. 11: 12: IMPORTANT: Note also that the specific numeric values of the enums have to be 13: the same as the values that are generated by the maint/MultiStage2.py script, 14: where the equivalent property descriptive names are listed in vectors. 15: 16: ALSO: The specific values of the first two enums are assumed for the table 17: called catposstab in pcre_compile.c. */ 18: 19: /* These are the general character categories. */ 20: 21: enum { 22: ucp_C, /* Other */ 23: ucp_L, /* Letter */ 24: ucp_M, /* Mark */ 25: ucp_N, /* Number */ 26: ucp_P, /* Punctuation */ 27: ucp_S, /* Symbol */ 28: ucp_Z /* Separator */ 29: }; 30: 31: /* These are the particular character categories. */ 32: 33: enum { 34: ucp_Cc, /* Control */ 35: ucp_Cf, /* Format */ 36: ucp_Cn, /* Unassigned */ 37: ucp_Co, /* Private use */ 38: ucp_Cs, /* Surrogate */ 39: ucp_Ll, /* Lower case letter */ 40: ucp_Lm, /* Modifier letter */ 41: ucp_Lo, /* Other letter */ 42: ucp_Lt, /* Title case letter */ 43: ucp_Lu, /* Upper case letter */ 44: ucp_Mc, /* Spacing mark */ 45: ucp_Me, /* Enclosing mark */ 46: ucp_Mn, /* Non-spacing mark */ 47: ucp_Nd, /* Decimal number */ 48: ucp_Nl, /* Letter number */ 49: ucp_No, /* Other number */ 50: ucp_Pc, /* Connector punctuation */ 51: ucp_Pd, /* Dash punctuation */ 52: ucp_Pe, /* Close punctuation */ 53: ucp_Pf, /* Final punctuation */ 54: ucp_Pi, /* Initial punctuation */ 55: ucp_Po, /* Other punctuation */ 56: ucp_Ps, /* Open punctuation */ 57: ucp_Sc, /* Currency symbol */ 58: ucp_Sk, /* Modifier symbol */ 59: ucp_Sm, /* Mathematical symbol */ 60: ucp_So, /* Other symbol */ 61: ucp_Zl, /* Line separator */ 62: ucp_Zp, /* Paragraph separator */ 63: ucp_Zs /* Space separator */ 64: }; 65: 66: /* These are grapheme break properties. Note that the code for processing them 67: assumes that the values are less than 16. If more values are added that take 68: the number to 16 or more, the code will have to be rewritten. */ 69: 70: enum { 71: ucp_gbCR, /* 0 */ 72: ucp_gbLF, /* 1 */ 73: ucp_gbControl, /* 2 */ 74: ucp_gbExtend, /* 3 */ 75: ucp_gbPrepend, /* 4 */ 76: ucp_gbSpacingMark, /* 5 */ 77: ucp_gbL, /* 6 Hangul syllable type L */ 78: ucp_gbV, /* 7 Hangul syllable type V */ 79: ucp_gbT, /* 8 Hangul syllable type T */ 80: ucp_gbLV, /* 9 Hangul syllable type LV */ 81: ucp_gbLVT, /* 10 Hangul syllable type LVT */ 82: ucp_gbRegionalIndicator, /* 11 */ 83: ucp_gbOther /* 12 */ 84: }; 85: 86: /* These are the script identifications. */ 87: 88: enum { 89: ucp_Arabic, 90: ucp_Armenian, 91: ucp_Bengali, 92: ucp_Bopomofo, 93: ucp_Braille, 94: ucp_Buginese, 95: ucp_Buhid, 96: ucp_Canadian_Aboriginal, 97: ucp_Cherokee, 98: ucp_Common, 99: ucp_Coptic, 100: ucp_Cypriot, 101: ucp_Cyrillic, 102: ucp_Deseret, 103: ucp_Devanagari, 104: ucp_Ethiopic, 105: ucp_Georgian, 106: ucp_Glagolitic, 107: ucp_Gothic, 108: ucp_Greek, 109: ucp_Gujarati, 110: ucp_Gurmukhi, 111: ucp_Han, 112: ucp_Hangul, 113: ucp_Hanunoo, 114: ucp_Hebrew, 115: ucp_Hiragana, 116: ucp_Inherited, 117: ucp_Kannada, 118: ucp_Katakana, 119: ucp_Kharoshthi, 120: ucp_Khmer, 121: ucp_Lao, 122: ucp_Latin, 123: ucp_Limbu, 124: ucp_Linear_B, 125: ucp_Malayalam, 126: ucp_Mongolian, 127: ucp_Myanmar, 128: ucp_New_Tai_Lue, 129: ucp_Ogham, 130: ucp_Old_Italic, 131: ucp_Old_Persian, 132: ucp_Oriya, 133: ucp_Osmanya, 134: ucp_Runic, 135: ucp_Shavian, 136: ucp_Sinhala, 137: ucp_Syloti_Nagri, 138: ucp_Syriac, 139: ucp_Tagalog, 140: ucp_Tagbanwa, 141: ucp_Tai_Le, 142: ucp_Tamil, 143: ucp_Telugu, 144: ucp_Thaana, 145: ucp_Thai, 146: ucp_Tibetan, 147: ucp_Tifinagh, 148: ucp_Ugaritic, 149: ucp_Yi, 150: /* New for Unicode 5.0: */ 151: ucp_Balinese, 152: ucp_Cuneiform, 153: ucp_Nko, 154: ucp_Phags_Pa, 155: ucp_Phoenician, 156: /* New for Unicode 5.1: */ 157: ucp_Carian, 158: ucp_Cham, 159: ucp_Kayah_Li, 160: ucp_Lepcha, 161: ucp_Lycian, 162: ucp_Lydian, 163: ucp_Ol_Chiki, 164: ucp_Rejang, 165: ucp_Saurashtra, 166: ucp_Sundanese, 167: ucp_Vai, 168: /* New for Unicode 5.2: */ 169: ucp_Avestan, 170: ucp_Bamum, 171: ucp_Egyptian_Hieroglyphs, 172: ucp_Imperial_Aramaic, 173: ucp_Inscriptional_Pahlavi, 174: ucp_Inscriptional_Parthian, 175: ucp_Javanese, 176: ucp_Kaithi, 177: ucp_Lisu, 178: ucp_Meetei_Mayek, 179: ucp_Old_South_Arabian, 180: ucp_Old_Turkic, 181: ucp_Samaritan, 182: ucp_Tai_Tham, 183: ucp_Tai_Viet, 184: /* New for Unicode 6.0.0: */ 185: ucp_Batak, 186: ucp_Brahmi, 187: ucp_Mandaic, 188: /* New for Unicode 6.1.0: */ 189: ucp_Chakma, 190: ucp_Meroitic_Cursive, 191: ucp_Meroitic_Hieroglyphs, 192: ucp_Miao, 193: ucp_Sharada, 194: ucp_Sora_Sompeng, 195: ucp_Takri 196: }; 197: 198: #endif 199: 200: /* End of ucp.h */