embedaddon/pcre/ucp.h - annotate

Return to ucp.h CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre
Annotation of embedaddon/pcre/ucp.h, revision 1.1.1.3

1.1       misho       1: /*************************************************
                      2: *          Unicode Property Table handler        *
                      3: *************************************************/
                      4: 
                      5: #ifndef _UCP_H
                      6: #define _UCP_H
                      7: 
                      8: /* This file contains definitions of the property values that are returned by
                      9: the UCD access macros. New values that are added for new releases of Unicode
1.1.1.3 ! misho      10: should always be at the end of each enum, for backwards compatibility.
        !            11: 
        !            12: IMPORTANT: Note also that the specific numeric values of the enums have to be
        !            13: the same as the values that are generated by the maint/MultiStage2.py script,
        !            14: where the equivalent property descriptive names are listed in vectors. */
1.1       misho      15: 
                     16: /* These are the general character categories. */
                     17: 
                     18: enum {
                     19:   ucp_C,     /* Other */
                     20:   ucp_L,     /* Letter */
                     21:   ucp_M,     /* Mark */
                     22:   ucp_N,     /* Number */
                     23:   ucp_P,     /* Punctuation */
                     24:   ucp_S,     /* Symbol */
                     25:   ucp_Z      /* Separator */
                     26: };
                     27: 
1.1.1.3 ! misho      28: /* These are the particular character categories. */
1.1       misho      29: 
                     30: enum {
                     31:   ucp_Cc,    /* Control */
                     32:   ucp_Cf,    /* Format */
                     33:   ucp_Cn,    /* Unassigned */
                     34:   ucp_Co,    /* Private use */
                     35:   ucp_Cs,    /* Surrogate */
                     36:   ucp_Ll,    /* Lower case letter */
                     37:   ucp_Lm,    /* Modifier letter */
                     38:   ucp_Lo,    /* Other letter */
                     39:   ucp_Lt,    /* Title case letter */
                     40:   ucp_Lu,    /* Upper case letter */
                     41:   ucp_Mc,    /* Spacing mark */
                     42:   ucp_Me,    /* Enclosing mark */
                     43:   ucp_Mn,    /* Non-spacing mark */
                     44:   ucp_Nd,    /* Decimal number */
                     45:   ucp_Nl,    /* Letter number */
                     46:   ucp_No,    /* Other number */
                     47:   ucp_Pc,    /* Connector punctuation */
                     48:   ucp_Pd,    /* Dash punctuation */
                     49:   ucp_Pe,    /* Close punctuation */
                     50:   ucp_Pf,    /* Final punctuation */
                     51:   ucp_Pi,    /* Initial punctuation */
                     52:   ucp_Po,    /* Other punctuation */
                     53:   ucp_Ps,    /* Open punctuation */
                     54:   ucp_Sc,    /* Currency symbol */
                     55:   ucp_Sk,    /* Modifier symbol */
                     56:   ucp_Sm,    /* Mathematical symbol */
                     57:   ucp_So,    /* Other symbol */
                     58:   ucp_Zl,    /* Line separator */
                     59:   ucp_Zp,    /* Paragraph separator */
                     60:   ucp_Zs     /* Space separator */
                     61: };
                     62: 
1.1.1.3 ! misho      63: /* These are grapheme break properties. Note that the code for processing them
        !            64: assumes that the values are less than 16. If more values are added that take
        !            65: the number to 16 or more, the code will have to be rewritten. */
        !            66: 
        !            67: enum {
        !            68:   ucp_gbCR,                /*  0 */
        !            69:   ucp_gbLF,                /*  1 */
        !            70:   ucp_gbControl,           /*  2 */
        !            71:   ucp_gbExtend,            /*  3 */
        !            72:   ucp_gbPrepend,           /*  4 */
        !            73:   ucp_gbSpacingMark,       /*  5 */
        !            74:   ucp_gbL,                 /*  6 Hangul syllable type L */
        !            75:   ucp_gbV,                 /*  7 Hangul syllable type V */
        !            76:   ucp_gbT,                 /*  8 Hangul syllable type T */
        !            77:   ucp_gbLV,                /*  9 Hangul syllable type LV */
        !            78:   ucp_gbLVT,               /* 10 Hangul syllable type LVT */
        !            79:   ucp_gbRegionalIndicator, /* 11 */
        !            80:   ucp_gbOther              /* 12 */
        !            81: };
        !            82: 
1.1       misho      83: /* These are the script identifications. */
                     84: 
                     85: enum {
                     86:   ucp_Arabic,
                     87:   ucp_Armenian,
                     88:   ucp_Bengali,
                     89:   ucp_Bopomofo,
                     90:   ucp_Braille,
                     91:   ucp_Buginese,
                     92:   ucp_Buhid,
                     93:   ucp_Canadian_Aboriginal,
                     94:   ucp_Cherokee,
                     95:   ucp_Common,
                     96:   ucp_Coptic,
                     97:   ucp_Cypriot,
                     98:   ucp_Cyrillic,
                     99:   ucp_Deseret,
                    100:   ucp_Devanagari,
                    101:   ucp_Ethiopic,
                    102:   ucp_Georgian,
                    103:   ucp_Glagolitic,
                    104:   ucp_Gothic,
                    105:   ucp_Greek,
                    106:   ucp_Gujarati,
                    107:   ucp_Gurmukhi,
                    108:   ucp_Han,
                    109:   ucp_Hangul,
                    110:   ucp_Hanunoo,
                    111:   ucp_Hebrew,
                    112:   ucp_Hiragana,
                    113:   ucp_Inherited,
                    114:   ucp_Kannada,
                    115:   ucp_Katakana,
                    116:   ucp_Kharoshthi,
                    117:   ucp_Khmer,
                    118:   ucp_Lao,
                    119:   ucp_Latin,
                    120:   ucp_Limbu,
                    121:   ucp_Linear_B,
                    122:   ucp_Malayalam,
                    123:   ucp_Mongolian,
                    124:   ucp_Myanmar,
                    125:   ucp_New_Tai_Lue,
                    126:   ucp_Ogham,
                    127:   ucp_Old_Italic,
                    128:   ucp_Old_Persian,
                    129:   ucp_Oriya,
                    130:   ucp_Osmanya,
                    131:   ucp_Runic,
                    132:   ucp_Shavian,
                    133:   ucp_Sinhala,
                    134:   ucp_Syloti_Nagri,
                    135:   ucp_Syriac,
                    136:   ucp_Tagalog,
                    137:   ucp_Tagbanwa,
                    138:   ucp_Tai_Le,
                    139:   ucp_Tamil,
                    140:   ucp_Telugu,
                    141:   ucp_Thaana,
                    142:   ucp_Thai,
                    143:   ucp_Tibetan,
                    144:   ucp_Tifinagh,
                    145:   ucp_Ugaritic,
                    146:   ucp_Yi,
                    147:   /* New for Unicode 5.0: */
                    148:   ucp_Balinese,
                    149:   ucp_Cuneiform,
                    150:   ucp_Nko,
                    151:   ucp_Phags_Pa,
                    152:   ucp_Phoenician,
                    153:   /* New for Unicode 5.1: */
                    154:   ucp_Carian,
                    155:   ucp_Cham,
                    156:   ucp_Kayah_Li,
                    157:   ucp_Lepcha,
                    158:   ucp_Lycian,
                    159:   ucp_Lydian,
                    160:   ucp_Ol_Chiki,
                    161:   ucp_Rejang,
                    162:   ucp_Saurashtra,
                    163:   ucp_Sundanese,
                    164:   ucp_Vai,
                    165:   /* New for Unicode 5.2: */
                    166:   ucp_Avestan,
                    167:   ucp_Bamum,
                    168:   ucp_Egyptian_Hieroglyphs,
                    169:   ucp_Imperial_Aramaic,
                    170:   ucp_Inscriptional_Pahlavi,
                    171:   ucp_Inscriptional_Parthian,
                    172:   ucp_Javanese,
                    173:   ucp_Kaithi,
                    174:   ucp_Lisu,
                    175:   ucp_Meetei_Mayek,
                    176:   ucp_Old_South_Arabian,
                    177:   ucp_Old_Turkic,
                    178:   ucp_Samaritan,
                    179:   ucp_Tai_Tham,
                    180:   ucp_Tai_Viet,
                    181:   /* New for Unicode 6.0.0: */
                    182:   ucp_Batak,
                    183:   ucp_Brahmi,
1.1.1.2   misho     184:   ucp_Mandaic,
                    185:   /* New for Unicode 6.1.0: */
                    186:   ucp_Chakma,
                    187:   ucp_Meroitic_Cursive,
                    188:   ucp_Meroitic_Hieroglyphs,
                    189:   ucp_Miao,
                    190:   ucp_Sharada,
                    191:   ucp_Sora_Sompeng,
                    192:   ucp_Takri
1.1       misho     193: };
                    194: 
                    195: #endif
                    196: 
                    197: /* End of ucp.h */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>