Return to mbfilter_euc_kr.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / filters |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter_kr.c 26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. 27: * 28: */ 29: 30: #ifdef HAVE_CONFIG_H 31: #include "config.h" 32: #endif 33: 34: #include "mbfilter.h" 35: #include "mbfilter_euc_kr.h" 36: #include "unicode_table_uhc.h" 37: 38: static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter); 39: 40: static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */ 41: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 44: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 50: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 53: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 54: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 56: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 57: }; 58: 59: static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; 60: 61: const mbfl_encoding mbfl_encoding_euc_kr = { 62: mbfl_no_encoding_euc_kr, 63: "EUC-KR", 64: "EUC-KR", 65: (const char *(*)[])&mbfl_encoding_euc_kr_aliases, 66: mblen_table_euckr, 67: MBFL_ENCTYPE_MBCS 68: }; 69: 70: const struct mbfl_identify_vtbl vtbl_identify_euckr = { 71: mbfl_no_encoding_euc_kr, 72: mbfl_filt_ident_common_ctor, 73: mbfl_filt_ident_common_dtor, 74: mbfl_filt_ident_euckr 75: }; 76: 77: const struct mbfl_convert_vtbl vtbl_euckr_wchar = { 78: mbfl_no_encoding_euc_kr, 79: mbfl_no_encoding_wchar, 80: mbfl_filt_conv_common_ctor, 81: mbfl_filt_conv_common_dtor, 82: mbfl_filt_conv_euckr_wchar, 83: mbfl_filt_conv_common_flush 84: }; 85: 86: const struct mbfl_convert_vtbl vtbl_wchar_euckr = { 87: mbfl_no_encoding_wchar, 88: mbfl_no_encoding_euc_kr, 89: mbfl_filt_conv_common_ctor, 90: mbfl_filt_conv_common_dtor, 91: mbfl_filt_conv_wchar_euckr, 92: mbfl_filt_conv_common_flush 93: }; 94: 95: 96: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 97: 98: /* 99: * EUC-KR => wchar 100: */ 101: int 102: mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter) 103: { 104: int c1, w, flag; 105: 106: switch (filter->status) { 107: case 0: 108: if (c >= 0 && c < 0x80) { /* latin */ 109: CK((*filter->output_function)(c, filter->data)); 110: } else if (c > 0xa0 && c < 0xff && c != 0xc9) { /* dbcs lead byte */ 111: filter->status = 1; 112: filter->cache = c; 113: } else { 114: w = c & MBFL_WCSGROUP_MASK; 115: w |= MBFL_WCSGROUP_THROUGH; 116: CK((*filter->output_function)(w, filter->data)); 117: } 118: break; 119: 120: case 1: /* dbcs second byte */ 121: filter->status = 0; 122: c1 = filter->cache; 123: flag = 0; 124: if (c1 >= 0xa1 && c1 <= 0xc6) { 125: flag = 1; 126: } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) { 127: flag = 2; 128: } 129: if (flag > 0 && c >= 0xa1 && c <= 0xfe) { 130: if (flag == 1){ /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */ 131: w = (c1 - 0xa1)*190 + (c - 0x41); 132: if (w >= 0 && w < uhc2_ucs_table_size) { 133: w = uhc2_ucs_table[w]; 134: } else { 135: w = 0; 136: } 137: } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */ 138: w = (c1 - 0xc7)*94 + (c - 0xa1); 139: if (w >= 0 && w < uhc3_ucs_table_size) { 140: w = uhc3_ucs_table[w]; 141: } else { 142: w = 0; 143: } 144: } 145: 146: if (w <= 0) { 147: w = (c1 << 8) | c; 148: w &= MBFL_WCSPLANE_MASK; 149: w |= MBFL_WCSPLANE_KSC5601; 150: } 151: CK((*filter->output_function)(w, filter->data)); 152: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 153: CK((*filter->output_function)(c, filter->data)); 154: } else { 155: w = (c1 << 8) | c; 156: w &= MBFL_WCSGROUP_MASK; 157: w |= MBFL_WCSGROUP_THROUGH; 158: CK((*filter->output_function)(w, filter->data)); 159: } 160: break; 161: 162: default: 163: filter->status = 0; 164: break; 165: } 166: 167: return c; 168: } 169: 170: /* 171: * wchar => EUC-KR 172: */ 173: int 174: mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) 175: { 176: int c1, c2, s; 177: 178: s = 0; 179: 180: if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { 181: s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; 182: } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { 183: s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; 184: } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { 185: s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; 186: } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { 187: s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; 188: } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { 189: s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; 190: } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { 191: s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; 192: } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { 193: s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; 194: } 195: 196: c1 = (s >> 8) & 0xff; 197: c2 = s & 0xff; 198: /* exclude UHC extension area */ 199: if (c1 < 0xa1 || c2 < 0xa1){ 200: s = c; 201: } 202: 203: if (s <= 0) { 204: c1 = c & ~MBFL_WCSPLANE_MASK; 205: if (c1 == MBFL_WCSPLANE_KSC5601) { 206: s = c & MBFL_WCSPLANE_MASK; 207: } 208: if (c == 0) { 209: s = 0; 210: } else if (s <= 0) { 211: s = -1; 212: } 213: } 214: if (s >= 0) { 215: if (s < 0x80) { /* latin */ 216: CK((*filter->output_function)(s, filter->data)); 217: } else { 218: CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); 219: CK((*filter->output_function)(s & 0xff, filter->data)); 220: } 221: } else { 222: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 223: CK(mbfl_filt_conv_illegal_output(c, filter)); 224: } 225: } 226: 227: return c; 228: } 229: 230: static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter) 231: { 232: switch (filter->status) { 233: case 0: /* latin */ 234: if (c >= 0 && c < 0x80) { /* ok */ 235: ; 236: } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ 237: filter->status = 1; 238: } else { /* bad */ 239: filter->flag = 1; 240: } 241: break; 242: 243: case 1: /* got lead byte */ 244: if (c < 0xa1 || c > 0xfe) { /* bad */ 245: filter->flag = 1; 246: } 247: filter->status = 0; 248: break; 249: 250: default: 251: filter->status = 0; 252: break; 253: } 254: 255: return c; 256: }