Annotation of embedaddon/pcre/ucp.h, revision 1.1.1.3
1.1 misho 1: /*************************************************
2: * Unicode Property Table handler *
3: *************************************************/
4:
5: #ifndef _UCP_H
6: #define _UCP_H
7:
8: /* This file contains definitions of the property values that are returned by
9: the UCD access macros. New values that are added for new releases of Unicode
1.1.1.3 ! misho 10: should always be at the end of each enum, for backwards compatibility.
! 11:
! 12: IMPORTANT: Note also that the specific numeric values of the enums have to be
! 13: the same as the values that are generated by the maint/MultiStage2.py script,
! 14: where the equivalent property descriptive names are listed in vectors. */
1.1 misho 15:
16: /* These are the general character categories. */
17:
18: enum {
19: ucp_C, /* Other */
20: ucp_L, /* Letter */
21: ucp_M, /* Mark */
22: ucp_N, /* Number */
23: ucp_P, /* Punctuation */
24: ucp_S, /* Symbol */
25: ucp_Z /* Separator */
26: };
27:
1.1.1.3 ! misho 28: /* These are the particular character categories. */
1.1 misho 29:
30: enum {
31: ucp_Cc, /* Control */
32: ucp_Cf, /* Format */
33: ucp_Cn, /* Unassigned */
34: ucp_Co, /* Private use */
35: ucp_Cs, /* Surrogate */
36: ucp_Ll, /* Lower case letter */
37: ucp_Lm, /* Modifier letter */
38: ucp_Lo, /* Other letter */
39: ucp_Lt, /* Title case letter */
40: ucp_Lu, /* Upper case letter */
41: ucp_Mc, /* Spacing mark */
42: ucp_Me, /* Enclosing mark */
43: ucp_Mn, /* Non-spacing mark */
44: ucp_Nd, /* Decimal number */
45: ucp_Nl, /* Letter number */
46: ucp_No, /* Other number */
47: ucp_Pc, /* Connector punctuation */
48: ucp_Pd, /* Dash punctuation */
49: ucp_Pe, /* Close punctuation */
50: ucp_Pf, /* Final punctuation */
51: ucp_Pi, /* Initial punctuation */
52: ucp_Po, /* Other punctuation */
53: ucp_Ps, /* Open punctuation */
54: ucp_Sc, /* Currency symbol */
55: ucp_Sk, /* Modifier symbol */
56: ucp_Sm, /* Mathematical symbol */
57: ucp_So, /* Other symbol */
58: ucp_Zl, /* Line separator */
59: ucp_Zp, /* Paragraph separator */
60: ucp_Zs /* Space separator */
61: };
62:
1.1.1.3 ! misho 63: /* These are grapheme break properties. Note that the code for processing them
! 64: assumes that the values are less than 16. If more values are added that take
! 65: the number to 16 or more, the code will have to be rewritten. */
! 66:
! 67: enum {
! 68: ucp_gbCR, /* 0 */
! 69: ucp_gbLF, /* 1 */
! 70: ucp_gbControl, /* 2 */
! 71: ucp_gbExtend, /* 3 */
! 72: ucp_gbPrepend, /* 4 */
! 73: ucp_gbSpacingMark, /* 5 */
! 74: ucp_gbL, /* 6 Hangul syllable type L */
! 75: ucp_gbV, /* 7 Hangul syllable type V */
! 76: ucp_gbT, /* 8 Hangul syllable type T */
! 77: ucp_gbLV, /* 9 Hangul syllable type LV */
! 78: ucp_gbLVT, /* 10 Hangul syllable type LVT */
! 79: ucp_gbRegionalIndicator, /* 11 */
! 80: ucp_gbOther /* 12 */
! 81: };
! 82:
1.1 misho 83: /* These are the script identifications. */
84:
85: enum {
86: ucp_Arabic,
87: ucp_Armenian,
88: ucp_Bengali,
89: ucp_Bopomofo,
90: ucp_Braille,
91: ucp_Buginese,
92: ucp_Buhid,
93: ucp_Canadian_Aboriginal,
94: ucp_Cherokee,
95: ucp_Common,
96: ucp_Coptic,
97: ucp_Cypriot,
98: ucp_Cyrillic,
99: ucp_Deseret,
100: ucp_Devanagari,
101: ucp_Ethiopic,
102: ucp_Georgian,
103: ucp_Glagolitic,
104: ucp_Gothic,
105: ucp_Greek,
106: ucp_Gujarati,
107: ucp_Gurmukhi,
108: ucp_Han,
109: ucp_Hangul,
110: ucp_Hanunoo,
111: ucp_Hebrew,
112: ucp_Hiragana,
113: ucp_Inherited,
114: ucp_Kannada,
115: ucp_Katakana,
116: ucp_Kharoshthi,
117: ucp_Khmer,
118: ucp_Lao,
119: ucp_Latin,
120: ucp_Limbu,
121: ucp_Linear_B,
122: ucp_Malayalam,
123: ucp_Mongolian,
124: ucp_Myanmar,
125: ucp_New_Tai_Lue,
126: ucp_Ogham,
127: ucp_Old_Italic,
128: ucp_Old_Persian,
129: ucp_Oriya,
130: ucp_Osmanya,
131: ucp_Runic,
132: ucp_Shavian,
133: ucp_Sinhala,
134: ucp_Syloti_Nagri,
135: ucp_Syriac,
136: ucp_Tagalog,
137: ucp_Tagbanwa,
138: ucp_Tai_Le,
139: ucp_Tamil,
140: ucp_Telugu,
141: ucp_Thaana,
142: ucp_Thai,
143: ucp_Tibetan,
144: ucp_Tifinagh,
145: ucp_Ugaritic,
146: ucp_Yi,
147: /* New for Unicode 5.0: */
148: ucp_Balinese,
149: ucp_Cuneiform,
150: ucp_Nko,
151: ucp_Phags_Pa,
152: ucp_Phoenician,
153: /* New for Unicode 5.1: */
154: ucp_Carian,
155: ucp_Cham,
156: ucp_Kayah_Li,
157: ucp_Lepcha,
158: ucp_Lycian,
159: ucp_Lydian,
160: ucp_Ol_Chiki,
161: ucp_Rejang,
162: ucp_Saurashtra,
163: ucp_Sundanese,
164: ucp_Vai,
165: /* New for Unicode 5.2: */
166: ucp_Avestan,
167: ucp_Bamum,
168: ucp_Egyptian_Hieroglyphs,
169: ucp_Imperial_Aramaic,
170: ucp_Inscriptional_Pahlavi,
171: ucp_Inscriptional_Parthian,
172: ucp_Javanese,
173: ucp_Kaithi,
174: ucp_Lisu,
175: ucp_Meetei_Mayek,
176: ucp_Old_South_Arabian,
177: ucp_Old_Turkic,
178: ucp_Samaritan,
179: ucp_Tai_Tham,
180: ucp_Tai_Viet,
181: /* New for Unicode 6.0.0: */
182: ucp_Batak,
183: ucp_Brahmi,
1.1.1.2 misho 184: ucp_Mandaic,
185: /* New for Unicode 6.1.0: */
186: ucp_Chakma,
187: ucp_Meroitic_Cursive,
188: ucp_Meroitic_Hieroglyphs,
189: ucp_Miao,
190: ucp_Sharada,
191: ucp_Sora_Sompeng,
192: ucp_Takri
1.1 misho 193: };
194:
195: #endif
196:
197: /* End of ucp.h */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>