File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / ucp.h
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:03 2014 UTC (9 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /*************************************************
    2: *          Unicode Property Table handler        *
    3: *************************************************/
    4: 
    5: #ifndef _UCP_H
    6: #define _UCP_H
    7: 
    8: /* This file contains definitions of the property values that are returned by
    9: the UCD access macros. New values that are added for new releases of Unicode
   10: should always be at the end of each enum, for backwards compatibility.
   11: 
   12: IMPORTANT: Note also that the specific numeric values of the enums have to be
   13: the same as the values that are generated by the maint/MultiStage2.py script,
   14: where the equivalent property descriptive names are listed in vectors.
   15: 
   16: ALSO: The specific values of the first two enums are assumed for the table
   17: called catposstab in pcre_compile.c. */
   18: 
   19: /* These are the general character categories. */
   20: 
   21: enum {
   22:   ucp_C,     /* Other */
   23:   ucp_L,     /* Letter */
   24:   ucp_M,     /* Mark */
   25:   ucp_N,     /* Number */
   26:   ucp_P,     /* Punctuation */
   27:   ucp_S,     /* Symbol */
   28:   ucp_Z      /* Separator */
   29: };
   30: 
   31: /* These are the particular character categories. */
   32: 
   33: enum {
   34:   ucp_Cc,    /* Control */
   35:   ucp_Cf,    /* Format */
   36:   ucp_Cn,    /* Unassigned */
   37:   ucp_Co,    /* Private use */
   38:   ucp_Cs,    /* Surrogate */
   39:   ucp_Ll,    /* Lower case letter */
   40:   ucp_Lm,    /* Modifier letter */
   41:   ucp_Lo,    /* Other letter */
   42:   ucp_Lt,    /* Title case letter */
   43:   ucp_Lu,    /* Upper case letter */
   44:   ucp_Mc,    /* Spacing mark */
   45:   ucp_Me,    /* Enclosing mark */
   46:   ucp_Mn,    /* Non-spacing mark */
   47:   ucp_Nd,    /* Decimal number */
   48:   ucp_Nl,    /* Letter number */
   49:   ucp_No,    /* Other number */
   50:   ucp_Pc,    /* Connector punctuation */
   51:   ucp_Pd,    /* Dash punctuation */
   52:   ucp_Pe,    /* Close punctuation */
   53:   ucp_Pf,    /* Final punctuation */
   54:   ucp_Pi,    /* Initial punctuation */
   55:   ucp_Po,    /* Other punctuation */
   56:   ucp_Ps,    /* Open punctuation */
   57:   ucp_Sc,    /* Currency symbol */
   58:   ucp_Sk,    /* Modifier symbol */
   59:   ucp_Sm,    /* Mathematical symbol */
   60:   ucp_So,    /* Other symbol */
   61:   ucp_Zl,    /* Line separator */
   62:   ucp_Zp,    /* Paragraph separator */
   63:   ucp_Zs     /* Space separator */
   64: };
   65: 
   66: /* These are grapheme break properties. Note that the code for processing them
   67: assumes that the values are less than 16. If more values are added that take
   68: the number to 16 or more, the code will have to be rewritten. */
   69: 
   70: enum {
   71:   ucp_gbCR,                /*  0 */
   72:   ucp_gbLF,                /*  1 */
   73:   ucp_gbControl,           /*  2 */
   74:   ucp_gbExtend,            /*  3 */
   75:   ucp_gbPrepend,           /*  4 */
   76:   ucp_gbSpacingMark,       /*  5 */
   77:   ucp_gbL,                 /*  6 Hangul syllable type L */
   78:   ucp_gbV,                 /*  7 Hangul syllable type V */
   79:   ucp_gbT,                 /*  8 Hangul syllable type T */
   80:   ucp_gbLV,                /*  9 Hangul syllable type LV */
   81:   ucp_gbLVT,               /* 10 Hangul syllable type LVT */
   82:   ucp_gbRegionalIndicator, /* 11 */
   83:   ucp_gbOther              /* 12 */
   84: };
   85: 
   86: /* These are the script identifications. */
   87: 
   88: enum {
   89:   ucp_Arabic,
   90:   ucp_Armenian,
   91:   ucp_Bengali,
   92:   ucp_Bopomofo,
   93:   ucp_Braille,
   94:   ucp_Buginese,
   95:   ucp_Buhid,
   96:   ucp_Canadian_Aboriginal,
   97:   ucp_Cherokee,
   98:   ucp_Common,
   99:   ucp_Coptic,
  100:   ucp_Cypriot,
  101:   ucp_Cyrillic,
  102:   ucp_Deseret,
  103:   ucp_Devanagari,
  104:   ucp_Ethiopic,
  105:   ucp_Georgian,
  106:   ucp_Glagolitic,
  107:   ucp_Gothic,
  108:   ucp_Greek,
  109:   ucp_Gujarati,
  110:   ucp_Gurmukhi,
  111:   ucp_Han,
  112:   ucp_Hangul,
  113:   ucp_Hanunoo,
  114:   ucp_Hebrew,
  115:   ucp_Hiragana,
  116:   ucp_Inherited,
  117:   ucp_Kannada,
  118:   ucp_Katakana,
  119:   ucp_Kharoshthi,
  120:   ucp_Khmer,
  121:   ucp_Lao,
  122:   ucp_Latin,
  123:   ucp_Limbu,
  124:   ucp_Linear_B,
  125:   ucp_Malayalam,
  126:   ucp_Mongolian,
  127:   ucp_Myanmar,
  128:   ucp_New_Tai_Lue,
  129:   ucp_Ogham,
  130:   ucp_Old_Italic,
  131:   ucp_Old_Persian,
  132:   ucp_Oriya,
  133:   ucp_Osmanya,
  134:   ucp_Runic,
  135:   ucp_Shavian,
  136:   ucp_Sinhala,
  137:   ucp_Syloti_Nagri,
  138:   ucp_Syriac,
  139:   ucp_Tagalog,
  140:   ucp_Tagbanwa,
  141:   ucp_Tai_Le,
  142:   ucp_Tamil,
  143:   ucp_Telugu,
  144:   ucp_Thaana,
  145:   ucp_Thai,
  146:   ucp_Tibetan,
  147:   ucp_Tifinagh,
  148:   ucp_Ugaritic,
  149:   ucp_Yi,
  150:   /* New for Unicode 5.0: */
  151:   ucp_Balinese,
  152:   ucp_Cuneiform,
  153:   ucp_Nko,
  154:   ucp_Phags_Pa,
  155:   ucp_Phoenician,
  156:   /* New for Unicode 5.1: */
  157:   ucp_Carian,
  158:   ucp_Cham,
  159:   ucp_Kayah_Li,
  160:   ucp_Lepcha,
  161:   ucp_Lycian,
  162:   ucp_Lydian,
  163:   ucp_Ol_Chiki,
  164:   ucp_Rejang,
  165:   ucp_Saurashtra,
  166:   ucp_Sundanese,
  167:   ucp_Vai,
  168:   /* New for Unicode 5.2: */
  169:   ucp_Avestan,
  170:   ucp_Bamum,
  171:   ucp_Egyptian_Hieroglyphs,
  172:   ucp_Imperial_Aramaic,
  173:   ucp_Inscriptional_Pahlavi,
  174:   ucp_Inscriptional_Parthian,
  175:   ucp_Javanese,
  176:   ucp_Kaithi,
  177:   ucp_Lisu,
  178:   ucp_Meetei_Mayek,
  179:   ucp_Old_South_Arabian,
  180:   ucp_Old_Turkic,
  181:   ucp_Samaritan,
  182:   ucp_Tai_Tham,
  183:   ucp_Tai_Viet,
  184:   /* New for Unicode 6.0.0: */
  185:   ucp_Batak,
  186:   ucp_Brahmi,
  187:   ucp_Mandaic,
  188:   /* New for Unicode 6.1.0: */
  189:   ucp_Chakma,
  190:   ucp_Meroitic_Cursive,
  191:   ucp_Meroitic_Hieroglyphs,
  192:   ucp_Miao,
  193:   ucp_Sharada,
  194:   ucp_Sora_Sompeng,
  195:   ucp_Takri
  196: };
  197: 
  198: #endif
  199: 
  200: /* End of ucp.h */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>