Annotation of embedaddon/pcre/ucp.h, revision 1.1.1.4
1.1 misho 1: /*************************************************
2: * Unicode Property Table handler *
3: *************************************************/
4:
5: #ifndef _UCP_H
6: #define _UCP_H
7:
8: /* This file contains definitions of the property values that are returned by
9: the UCD access macros. New values that are added for new releases of Unicode
1.1.1.3 misho 10: should always be at the end of each enum, for backwards compatibility.
11:
12: IMPORTANT: Note also that the specific numeric values of the enums have to be
13: the same as the values that are generated by the maint/MultiStage2.py script,
1.1.1.4 ! misho 14: where the equivalent property descriptive names are listed in vectors.
! 15:
! 16: ALSO: The specific values of the first two enums are assumed for the table
! 17: called catposstab in pcre_compile.c. */
1.1 misho 18:
19: /* These are the general character categories. */
20:
21: enum {
22: ucp_C, /* Other */
23: ucp_L, /* Letter */
24: ucp_M, /* Mark */
25: ucp_N, /* Number */
26: ucp_P, /* Punctuation */
27: ucp_S, /* Symbol */
28: ucp_Z /* Separator */
29: };
30:
1.1.1.3 misho 31: /* These are the particular character categories. */
1.1 misho 32:
33: enum {
34: ucp_Cc, /* Control */
35: ucp_Cf, /* Format */
36: ucp_Cn, /* Unassigned */
37: ucp_Co, /* Private use */
38: ucp_Cs, /* Surrogate */
39: ucp_Ll, /* Lower case letter */
40: ucp_Lm, /* Modifier letter */
41: ucp_Lo, /* Other letter */
42: ucp_Lt, /* Title case letter */
43: ucp_Lu, /* Upper case letter */
44: ucp_Mc, /* Spacing mark */
45: ucp_Me, /* Enclosing mark */
46: ucp_Mn, /* Non-spacing mark */
47: ucp_Nd, /* Decimal number */
48: ucp_Nl, /* Letter number */
49: ucp_No, /* Other number */
50: ucp_Pc, /* Connector punctuation */
51: ucp_Pd, /* Dash punctuation */
52: ucp_Pe, /* Close punctuation */
53: ucp_Pf, /* Final punctuation */
54: ucp_Pi, /* Initial punctuation */
55: ucp_Po, /* Other punctuation */
56: ucp_Ps, /* Open punctuation */
57: ucp_Sc, /* Currency symbol */
58: ucp_Sk, /* Modifier symbol */
59: ucp_Sm, /* Mathematical symbol */
60: ucp_So, /* Other symbol */
61: ucp_Zl, /* Line separator */
62: ucp_Zp, /* Paragraph separator */
63: ucp_Zs /* Space separator */
64: };
65:
1.1.1.3 misho 66: /* These are grapheme break properties. Note that the code for processing them
67: assumes that the values are less than 16. If more values are added that take
68: the number to 16 or more, the code will have to be rewritten. */
69:
70: enum {
71: ucp_gbCR, /* 0 */
72: ucp_gbLF, /* 1 */
73: ucp_gbControl, /* 2 */
74: ucp_gbExtend, /* 3 */
75: ucp_gbPrepend, /* 4 */
76: ucp_gbSpacingMark, /* 5 */
77: ucp_gbL, /* 6 Hangul syllable type L */
78: ucp_gbV, /* 7 Hangul syllable type V */
79: ucp_gbT, /* 8 Hangul syllable type T */
80: ucp_gbLV, /* 9 Hangul syllable type LV */
81: ucp_gbLVT, /* 10 Hangul syllable type LVT */
82: ucp_gbRegionalIndicator, /* 11 */
83: ucp_gbOther /* 12 */
84: };
85:
1.1 misho 86: /* These are the script identifications. */
87:
88: enum {
89: ucp_Arabic,
90: ucp_Armenian,
91: ucp_Bengali,
92: ucp_Bopomofo,
93: ucp_Braille,
94: ucp_Buginese,
95: ucp_Buhid,
96: ucp_Canadian_Aboriginal,
97: ucp_Cherokee,
98: ucp_Common,
99: ucp_Coptic,
100: ucp_Cypriot,
101: ucp_Cyrillic,
102: ucp_Deseret,
103: ucp_Devanagari,
104: ucp_Ethiopic,
105: ucp_Georgian,
106: ucp_Glagolitic,
107: ucp_Gothic,
108: ucp_Greek,
109: ucp_Gujarati,
110: ucp_Gurmukhi,
111: ucp_Han,
112: ucp_Hangul,
113: ucp_Hanunoo,
114: ucp_Hebrew,
115: ucp_Hiragana,
116: ucp_Inherited,
117: ucp_Kannada,
118: ucp_Katakana,
119: ucp_Kharoshthi,
120: ucp_Khmer,
121: ucp_Lao,
122: ucp_Latin,
123: ucp_Limbu,
124: ucp_Linear_B,
125: ucp_Malayalam,
126: ucp_Mongolian,
127: ucp_Myanmar,
128: ucp_New_Tai_Lue,
129: ucp_Ogham,
130: ucp_Old_Italic,
131: ucp_Old_Persian,
132: ucp_Oriya,
133: ucp_Osmanya,
134: ucp_Runic,
135: ucp_Shavian,
136: ucp_Sinhala,
137: ucp_Syloti_Nagri,
138: ucp_Syriac,
139: ucp_Tagalog,
140: ucp_Tagbanwa,
141: ucp_Tai_Le,
142: ucp_Tamil,
143: ucp_Telugu,
144: ucp_Thaana,
145: ucp_Thai,
146: ucp_Tibetan,
147: ucp_Tifinagh,
148: ucp_Ugaritic,
149: ucp_Yi,
150: /* New for Unicode 5.0: */
151: ucp_Balinese,
152: ucp_Cuneiform,
153: ucp_Nko,
154: ucp_Phags_Pa,
155: ucp_Phoenician,
156: /* New for Unicode 5.1: */
157: ucp_Carian,
158: ucp_Cham,
159: ucp_Kayah_Li,
160: ucp_Lepcha,
161: ucp_Lycian,
162: ucp_Lydian,
163: ucp_Ol_Chiki,
164: ucp_Rejang,
165: ucp_Saurashtra,
166: ucp_Sundanese,
167: ucp_Vai,
168: /* New for Unicode 5.2: */
169: ucp_Avestan,
170: ucp_Bamum,
171: ucp_Egyptian_Hieroglyphs,
172: ucp_Imperial_Aramaic,
173: ucp_Inscriptional_Pahlavi,
174: ucp_Inscriptional_Parthian,
175: ucp_Javanese,
176: ucp_Kaithi,
177: ucp_Lisu,
178: ucp_Meetei_Mayek,
179: ucp_Old_South_Arabian,
180: ucp_Old_Turkic,
181: ucp_Samaritan,
182: ucp_Tai_Tham,
183: ucp_Tai_Viet,
184: /* New for Unicode 6.0.0: */
185: ucp_Batak,
186: ucp_Brahmi,
1.1.1.2 misho 187: ucp_Mandaic,
188: /* New for Unicode 6.1.0: */
189: ucp_Chakma,
190: ucp_Meroitic_Cursive,
191: ucp_Meroitic_Hieroglyphs,
192: ucp_Miao,
193: ucp_Sharada,
194: ucp_Sora_Sompeng,
195: ucp_Takri
1.1 misho 196: };
197:
198: #endif
199:
200: /* End of ucp.h */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>