Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/euc_kr.c, revision 1.1.1.1

1.1       misho       1: /**********************************************************************
                      2:   euc_kr.c -  Oniguruma (regular expression library)
                      3: **********************************************************************/
                      4: /*-
                      5:  * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29: 
                     30: #include "regenc.h"
                     31: 
                     32: static const int EncLen_EUCKR[] = {
                     33:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     34:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     35:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     36:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     37:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     38:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     39:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     40:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     41:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     42:   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     43:   1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     44:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     45:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     46:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     47:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                     48:   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
                     49: };
                     50: 
                     51: static int
                     52: euckr_mbc_enc_len(const UChar* p)
                     53: {
                     54:   return EncLen_EUCKR[*p];
                     55: }
                     56: 
                     57: static OnigCodePoint
                     58: euckr_mbc_to_code(const UChar* p, const UChar* end)
                     59: {
                     60:   return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
                     61: }
                     62: 
                     63: static int
                     64: euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
                     65: {
                     66:   return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
                     67: }
                     68: 
                     69: static int
                     70: euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
                     71:                        UChar* lower)
                     72: {
                     73:   return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
                     74:                                       pp, end, lower);
                     75: }
                     76: 
                     77: static int
                     78: euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
                     79: {
                     80:   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
                     81: }
                     82: 
                     83: static int
                     84: euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
                     85: {
                     86:   return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
                     87: }
                     88: 
                     89: #define euckr_islead(c)    ((c) < 0xa1 || (c) == 0xff)
                     90: 
                     91: static UChar*
                     92: euckr_left_adjust_char_head(const UChar* start, const UChar* s)
                     93: {
                     94:   /* Assumed in this encoding,
                     95:      mb-trail bytes don't mix with single bytes.
                     96:   */
                     97:   const UChar *p;
                     98:   int len;
                     99: 
                    100:   if (s <= start) return (UChar* )s;
                    101:   p = s;
                    102: 
                    103:   while (!euckr_islead(*p) && p > start) p--;
                    104:   len = enc_len(ONIG_ENCODING_EUC_KR, p);
                    105:   if (p + len > s) return (UChar* )p;
                    106:   p += len;
                    107:   return (UChar* )(p + ((s - p) & ~1));
                    108: }
                    109: 
                    110: static int
                    111: euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
                    112: {
                    113:   const UChar c = *s;
                    114:   if (c <= 0x7e) return TRUE;
                    115:   else           return FALSE;
                    116: }
                    117: 
                    118: OnigEncodingType OnigEncodingEUC_KR = {
                    119:   euckr_mbc_enc_len,
                    120:   "EUC-KR",   /* name */
                    121:   2,          /* max enc length */
                    122:   1,          /* min enc length */
                    123:   ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
                    124:   {
                    125:       (OnigCodePoint )'\\'                       /* esc */
                    126:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
                    127:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
                    128:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
                    129:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
                    130:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
                    131:   },
                    132:   onigenc_is_mbc_newline_0x0a,
                    133:   euckr_mbc_to_code,
                    134:   onigenc_mb2_code_to_mbclen,
                    135:   euckr_code_to_mbc,
                    136:   euckr_mbc_to_normalize,
                    137:   euckr_is_mbc_ambiguous,
                    138:   onigenc_ascii_get_all_pair_ambig_codes,
                    139:   onigenc_nothing_get_all_comp_ambig_codes,
                    140:   euckr_is_code_ctype,
                    141:   onigenc_not_support_get_ctype_code_range,
                    142:   euckr_left_adjust_char_head,
                    143:   euckr_is_allowed_reverse_match
                    144: };
                    145: 
                    146: /* Same with OnigEncodingEUC_KR except the name */
                    147: OnigEncodingType OnigEncodingEUC_CN = {
                    148:   euckr_mbc_enc_len,
                    149:   "EUC-CN",   /* name */
                    150:   2,          /* max enc length */
                    151:   1,          /* min enc length */
                    152:   ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
                    153:   {
                    154:       (OnigCodePoint )'\\'                       /* esc */
                    155:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
                    156:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
                    157:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
                    158:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
                    159:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
                    160:   },
                    161:   onigenc_is_mbc_newline_0x0a,
                    162:   euckr_mbc_to_code,
                    163:   onigenc_mb2_code_to_mbclen,
                    164:   euckr_code_to_mbc,
                    165:   euckr_mbc_to_normalize,
                    166:   euckr_is_mbc_ambiguous,
                    167:   onigenc_ascii_get_all_pair_ambig_codes,
                    168:   onigenc_nothing_get_all_comp_ambig_codes,
                    169:   euckr_is_code_ctype,
                    170:   onigenc_not_support_get_ctype_code_range,
                    171:   euckr_left_adjust_char_head,
                    172:   euckr_is_allowed_reverse_match
                    173: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>