embedaddon/pcre/ucp.h - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / ucp.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:55 2013 UTC (10 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_33, HEAD

8.33

1: /************************************************* 2: * Unicode Property Table handler * 3: *************************************************/ 4: 5: #ifndef _UCP_H 6: #define _UCP_H 7: 8: /* This file contains definitions of the property values that are returned by 9: the UCD access macros. New values that are added for new releases of Unicode 10: should always be at the end of each enum, for backwards compatibility. 11: 12: IMPORTANT: Note also that the specific numeric values of the enums have to be 13: the same as the values that are generated by the maint/MultiStage2.py script, 14: where the equivalent property descriptive names are listed in vectors. */ 15: 16: /* These are the general character categories. */ 17: 18: enum { 19: ucp_C, /* Other */ 20: ucp_L, /* Letter */ 21: ucp_M, /* Mark */ 22: ucp_N, /* Number */ 23: ucp_P, /* Punctuation */ 24: ucp_S, /* Symbol */ 25: ucp_Z /* Separator */ 26: }; 27: 28: /* These are the particular character categories. */ 29: 30: enum { 31: ucp_Cc, /* Control */ 32: ucp_Cf, /* Format */ 33: ucp_Cn, /* Unassigned */ 34: ucp_Co, /* Private use */ 35: ucp_Cs, /* Surrogate */ 36: ucp_Ll, /* Lower case letter */ 37: ucp_Lm, /* Modifier letter */ 38: ucp_Lo, /* Other letter */ 39: ucp_Lt, /* Title case letter */ 40: ucp_Lu, /* Upper case letter */ 41: ucp_Mc, /* Spacing mark */ 42: ucp_Me, /* Enclosing mark */ 43: ucp_Mn, /* Non-spacing mark */ 44: ucp_Nd, /* Decimal number */ 45: ucp_Nl, /* Letter number */ 46: ucp_No, /* Other number */ 47: ucp_Pc, /* Connector punctuation */ 48: ucp_Pd, /* Dash punctuation */ 49: ucp_Pe, /* Close punctuation */ 50: ucp_Pf, /* Final punctuation */ 51: ucp_Pi, /* Initial punctuation */ 52: ucp_Po, /* Other punctuation */ 53: ucp_Ps, /* Open punctuation */ 54: ucp_Sc, /* Currency symbol */ 55: ucp_Sk, /* Modifier symbol */ 56: ucp_Sm, /* Mathematical symbol */ 57: ucp_So, /* Other symbol */ 58: ucp_Zl, /* Line separator */ 59: ucp_Zp, /* Paragraph separator */ 60: ucp_Zs /* Space separator */ 61: }; 62: 63: /* These are grapheme break properties. Note that the code for processing them 64: assumes that the values are less than 16. If more values are added that take 65: the number to 16 or more, the code will have to be rewritten. */ 66: 67: enum { 68: ucp_gbCR, /* 0 */ 69: ucp_gbLF, /* 1 */ 70: ucp_gbControl, /* 2 */ 71: ucp_gbExtend, /* 3 */ 72: ucp_gbPrepend, /* 4 */ 73: ucp_gbSpacingMark, /* 5 */ 74: ucp_gbL, /* 6 Hangul syllable type L */ 75: ucp_gbV, /* 7 Hangul syllable type V */ 76: ucp_gbT, /* 8 Hangul syllable type T */ 77: ucp_gbLV, /* 9 Hangul syllable type LV */ 78: ucp_gbLVT, /* 10 Hangul syllable type LVT */ 79: ucp_gbRegionalIndicator, /* 11 */ 80: ucp_gbOther /* 12 */ 81: }; 82: 83: /* These are the script identifications. */ 84: 85: enum { 86: ucp_Arabic, 87: ucp_Armenian, 88: ucp_Bengali, 89: ucp_Bopomofo, 90: ucp_Braille, 91: ucp_Buginese, 92: ucp_Buhid, 93: ucp_Canadian_Aboriginal, 94: ucp_Cherokee, 95: ucp_Common, 96: ucp_Coptic, 97: ucp_Cypriot, 98: ucp_Cyrillic, 99: ucp_Deseret, 100: ucp_Devanagari, 101: ucp_Ethiopic, 102: ucp_Georgian, 103: ucp_Glagolitic, 104: ucp_Gothic, 105: ucp_Greek, 106: ucp_Gujarati, 107: ucp_Gurmukhi, 108: ucp_Han, 109: ucp_Hangul, 110: ucp_Hanunoo, 111: ucp_Hebrew, 112: ucp_Hiragana, 113: ucp_Inherited, 114: ucp_Kannada, 115: ucp_Katakana, 116: ucp_Kharoshthi, 117: ucp_Khmer, 118: ucp_Lao, 119: ucp_Latin, 120: ucp_Limbu, 121: ucp_Linear_B, 122: ucp_Malayalam, 123: ucp_Mongolian, 124: ucp_Myanmar, 125: ucp_New_Tai_Lue, 126: ucp_Ogham, 127: ucp_Old_Italic, 128: ucp_Old_Persian, 129: ucp_Oriya, 130: ucp_Osmanya, 131: ucp_Runic, 132: ucp_Shavian, 133: ucp_Sinhala, 134: ucp_Syloti_Nagri, 135: ucp_Syriac, 136: ucp_Tagalog, 137: ucp_Tagbanwa, 138: ucp_Tai_Le, 139: ucp_Tamil, 140: ucp_Telugu, 141: ucp_Thaana, 142: ucp_Thai, 143: ucp_Tibetan, 144: ucp_Tifinagh, 145: ucp_Ugaritic, 146: ucp_Yi, 147: /* New for Unicode 5.0: */ 148: ucp_Balinese, 149: ucp_Cuneiform, 150: ucp_Nko, 151: ucp_Phags_Pa, 152: ucp_Phoenician, 153: /* New for Unicode 5.1: */ 154: ucp_Carian, 155: ucp_Cham, 156: ucp_Kayah_Li, 157: ucp_Lepcha, 158: ucp_Lycian, 159: ucp_Lydian, 160: ucp_Ol_Chiki, 161: ucp_Rejang, 162: ucp_Saurashtra, 163: ucp_Sundanese, 164: ucp_Vai, 165: /* New for Unicode 5.2: */ 166: ucp_Avestan, 167: ucp_Bamum, 168: ucp_Egyptian_Hieroglyphs, 169: ucp_Imperial_Aramaic, 170: ucp_Inscriptional_Pahlavi, 171: ucp_Inscriptional_Parthian, 172: ucp_Javanese, 173: ucp_Kaithi, 174: ucp_Lisu, 175: ucp_Meetei_Mayek, 176: ucp_Old_South_Arabian, 177: ucp_Old_Turkic, 178: ucp_Samaritan, 179: ucp_Tai_Tham, 180: ucp_Tai_Viet, 181: /* New for Unicode 6.0.0: */ 182: ucp_Batak, 183: ucp_Brahmi, 184: ucp_Mandaic, 185: /* New for Unicode 6.1.0: */ 186: ucp_Chakma, 187: ucp_Meroitic_Cursive, 188: ucp_Meroitic_Hieroglyphs, 189: ucp_Miao, 190: ucp_Sharada, 191: ucp_Sora_Sompeng, 192: ucp_Takri 193: }; 194: 195: #endif 196: 197: /* End of ucp.h */