Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/big5.c, revision 1.1.1.1

1.1       misho       1: /**********************************************************************
                      2:   big5.c -  Oniguruma (regular expression library)
                      3: **********************************************************************/
                      4: /*-
                      5:  * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29: 
                     30: #include "regenc.h"
                     31: 
                     32: static const int EncLen_BIG5[] = {
                     33:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     34:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     35:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     36:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     37:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     38:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     39:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     40:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     41:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     42:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     43:   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     44:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     45:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     46:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     47:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     48:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
                     49: };
                     50: 
                     51: static int
                     52: big5_mbc_enc_len(const UChar* p)
                     53: {
                     54:   return EncLen_BIG5[*p];
                     55: }
                     56: 
                     57: static OnigCodePoint
                     58: big5_mbc_to_code(const UChar* p, const UChar* end)
                     59: {
                     60:   return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
                     61: }
                     62: 
                     63: static int
                     64: big5_code_to_mbc(OnigCodePoint code, UChar *buf)
                     65: {
                     66:   return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
                     67: }
                     68: 
                     69: static int
                     70: big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
                     71:                       UChar* lower)
                     72: {
                     73:   return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
                     74:                                       pp, end, lower);
                     75: }
                     76: 
                     77: static int
                     78: big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
                     79: {
                     80:   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
                     81: }
                     82: 
                     83: static int
                     84: big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
                     85: {
                     86:   return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
                     87: }
                     88: 
                     89: static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
                     90:   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     91:   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     92:   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     93:   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     94:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     95:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     96:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     97:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                     98:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     99:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    100:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    101:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    102:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    103:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    104:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    105:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
                    106: };
                    107: 
                    108: #define BIG5_ISMB_FIRST(byte)  (EncLen_BIG5[byte] > 1)
                    109: #define BIG5_ISMB_TRAIL(byte)  BIG5_CAN_BE_TRAIL_TABLE[(byte)]
                    110: 
                    111: static UChar*
                    112: big5_left_adjust_char_head(const UChar* start, const UChar* s)
                    113: {
                    114:   const UChar *p;
                    115:   int len;
                    116: 
                    117:   if (s <= start) return (UChar* )s;
                    118:   p = s;
                    119: 
                    120:   if (BIG5_ISMB_TRAIL(*p)) {
                    121:     while (p > start) {
                    122:       if (! BIG5_ISMB_FIRST(*--p)) {
                    123:        p++;
                    124:        break;
                    125:       }
                    126:     } 
                    127:   }
                    128:   len = enc_len(ONIG_ENCODING_BIG5, p);
                    129:   if (p + len > s) return (UChar* )p;
                    130:   p += len;
                    131:   return (UChar* )(p + ((s - p) & ~1));
                    132: }
                    133: 
                    134: static int
                    135: big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
                    136: {
                    137:   const UChar c = *s;
                    138: 
                    139:   return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
                    140: }
                    141: 
                    142: OnigEncodingType OnigEncodingBIG5 = {
                    143:   big5_mbc_enc_len,
                    144:   "Big5",     /* name */
                    145:   2,          /* max enc length */
                    146:   1,          /* min enc length */
                    147:   ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
                    148:   {
                    149:       (OnigCodePoint )'\\'                       /* esc */
                    150:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
                    151:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
                    152:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
                    153:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
                    154:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
                    155:   },
                    156:   onigenc_is_mbc_newline_0x0a,
                    157:   big5_mbc_to_code,
                    158:   onigenc_mb2_code_to_mbclen,
                    159:   big5_code_to_mbc,
                    160:   big5_mbc_to_normalize,
                    161:   big5_is_mbc_ambiguous,
                    162:   onigenc_ascii_get_all_pair_ambig_codes,
                    163:   onigenc_nothing_get_all_comp_ambig_codes,
                    164:   big5_is_code_ctype,
                    165:   onigenc_not_support_get_ctype_code_range,
                    166:   big5_left_adjust_char_head,
                    167:   big5_is_allowed_reverse_match
                    168: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>