Return to mbfilter_euc_tw.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / filters |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: Rui Hirokawa <hirokawa@php.net> 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter_tw.c 26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. 27: * 28: */ 29: 30: #ifdef HAVE_CONFIG_H 31: #include "config.h" 32: #endif 33: 34: #include "mbfilter.h" 35: #include "mbfilter_euc_tw.h" 36: 37: #include "unicode_table_cns11643.h" 38: 39: static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter); 40: 41: static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */ 42: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 44: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 50: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 51: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 52: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 53: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 54: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 56: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 57: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 58: }; 59: 60: 61: static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; 62: 63: const mbfl_encoding mbfl_encoding_euc_tw = { 64: mbfl_no_encoding_euc_tw, 65: "EUC-TW", 66: "EUC-TW", 67: (const char *(*)[])&mbfl_encoding_euc_tw_aliases, 68: mblen_table_euctw, 69: MBFL_ENCTYPE_MBCS 70: }; 71: 72: const struct mbfl_identify_vtbl vtbl_identify_euctw = { 73: mbfl_no_encoding_euc_tw, 74: mbfl_filt_ident_common_ctor, 75: mbfl_filt_ident_common_dtor, 76: mbfl_filt_ident_euctw 77: }; 78: 79: const struct mbfl_convert_vtbl vtbl_euctw_wchar = { 80: mbfl_no_encoding_euc_tw, 81: mbfl_no_encoding_wchar, 82: mbfl_filt_conv_common_ctor, 83: mbfl_filt_conv_common_dtor, 84: mbfl_filt_conv_euctw_wchar, 85: mbfl_filt_conv_common_flush 86: }; 87: 88: const struct mbfl_convert_vtbl vtbl_wchar_euctw = { 89: mbfl_no_encoding_wchar, 90: mbfl_no_encoding_euc_tw, 91: mbfl_filt_conv_common_ctor, 92: mbfl_filt_conv_common_dtor, 93: mbfl_filt_conv_wchar_euctw, 94: mbfl_filt_conv_common_flush 95: }; 96: 97: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 98: 99: /* 100: * EUC-TW => wchar 101: */ 102: int 103: mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter) 104: { 105: int c1, s, w, plane; 106: 107: switch (filter->status) { 108: case 0: 109: if (c >= 0 && c < 0x80) { /* latin */ 110: CK((*filter->output_function)(c, filter->data)); 111: } else if (c > 0xa0 && c < 0xff) { /* dbcs first byte */ 112: filter->status = 1; 113: filter->cache = c; 114: } else if (c == 0x8e) { /* mbcs first byte */ 115: filter->status = 2; 116: filter->cache = c; 117: } else { 118: w = c & MBFL_WCSGROUP_MASK; 119: w |= MBFL_WCSGROUP_THROUGH; 120: CK((*filter->output_function)(w, filter->data)); 121: } 122: break; 123: 124: case 1: /* mbcs second byte */ 125: filter->status = 0; 126: c1 = filter->cache; 127: if (c > 0xa0 && c < 0xff) { 128: w = (c1 - 0xa1)*94 + (c - 0xa1); 129: if (w >= 0 && w < cns11643_1_ucs_table_size) { 130: w = cns11643_1_ucs_table[w]; 131: } else { 132: w = 0; 133: } 134: if (w <= 0) { 135: w = (c1 << 8) | c; 136: w &= MBFL_WCSPLANE_MASK; 137: w |= MBFL_WCSPLANE_CNS11643; 138: } 139: CK((*filter->output_function)(w, filter->data)); 140: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 141: CK((*filter->output_function)(c, filter->data)); 142: } else { 143: w = (c1 << 8) | c; 144: w &= MBFL_WCSGROUP_MASK; 145: w |= MBFL_WCSGROUP_THROUGH; 146: CK((*filter->output_function)(w, filter->data)); 147: } 148: break; 149: 150: case 2: /* got 0x8e, first char */ 151: c1 = filter->cache; 152: if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 153: CK((*filter->output_function)(c, filter->data)); 154: filter->status = 0; 155: } else if (c > 0xa0 && c < 0xaf) { 156: filter->status = 3; 157: filter->cache = c - 0xa1; 158: } else { 159: w = (c1 << 8) | c; 160: w &= MBFL_WCSGROUP_MASK; 161: w |= MBFL_WCSGROUP_THROUGH; 162: CK((*filter->output_function)(w, filter->data)); 163: } 164: break; 165: 166: case 3: /* got 0x8e, third char */ 167: filter->status = 0; 168: c1 = filter->cache; 169: if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 170: CK((*filter->output_function)(c, filter->data)); 171: filter->status = 0; 172: } else if (c > 0xa0 && c < 0xff) { 173: filter->status = 4; 174: filter->cache = (c1 << 8) + c - 0xa1; 175: } else { 176: w = (c1 << 8) | c; 177: w &= MBFL_WCSGROUP_MASK; 178: w |= MBFL_WCSGROUP_THROUGH; 179: CK((*filter->output_function)(w, filter->data)); 180: } 181: break; 182: 183: case 4: /* mbcs fourth char */ 184: filter->status = 0; 185: c1 = filter->cache; 186: if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff) { 187: plane = (c1 & 0xf00) >> 8; 188: s = (c1 & 0xff)*94 + c - 0xa1; 189: w = 0; 190: if (s >= 0) { 191: if (plane == 1 && s < cns11643_2_ucs_table_size) { 192: w = cns11643_2_ucs_table[s]; 193: } 194: if (plane == 13 && s < cns11643_14_ucs_table_size) { 195: w = cns11643_14_ucs_table[s]; 196: } 197: } 198: if (w <= 0) { 199: w = ((c1 & 0x7f) << 8) | (c & 0x7f); 200: w &= MBFL_WCSPLANE_MASK; 201: w |= MBFL_WCSPLANE_CNS11643; 202: } 203: CK((*filter->output_function)(w, filter->data)); 204: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 205: CK((*filter->output_function)(c, filter->data)); 206: } else { 207: w = (c1 << 8) | c | 0x8e0000; 208: w &= MBFL_WCSGROUP_MASK; 209: w |= MBFL_WCSGROUP_THROUGH; 210: CK((*filter->output_function)(w, filter->data)); 211: } 212: break; 213: 214: default: 215: filter->status = 0; 216: break; 217: } 218: 219: return c; 220: } 221: 222: /* 223: * wchar => EUC-TW 224: */ 225: int 226: mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) 227: { 228: int c1, s, plane; 229: 230: s = 0; 231: if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) { 232: s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min]; 233: } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) { 234: s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min]; 235: } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) { 236: s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min]; 237: } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) { 238: s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min]; 239: } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) { 240: s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min]; 241: } 242: if (s <= 0) { 243: c1 = c & ~MBFL_WCSPLANE_MASK; 244: if (c1 == MBFL_WCSPLANE_CNS11643) { 245: s = c & MBFL_WCSPLANE_MASK; 246: } 247: if (c == 0) { 248: s = 0; 249: } else if (s <= 0) { 250: s = -1; 251: } 252: } 253: if (s >= 0) { 254: plane = (s & 0x1f0000) >> 16; 255: if (plane <= 1){ 256: if (s < 0x80) { /* latin */ 257: CK((*filter->output_function)(s, filter->data)); 258: } else { 259: s = (s & 0xffff) | 0x8080; 260: CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); 261: CK((*filter->output_function)(s & 0xff, filter->data)); 262: } 263: } else { 264: s = (0x8ea00000 + (plane << 16)) | ((s & 0xffff) | 0x8080); 265: CK((*filter->output_function)(0x8e , filter->data)); 266: CK((*filter->output_function)((s >> 16) & 0xff, filter->data)); 267: CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); 268: CK((*filter->output_function)(s & 0xff, filter->data)); 269: } 270: } else { 271: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 272: CK(mbfl_filt_conv_illegal_output(c, filter)); 273: } 274: } 275: return c; 276: } 277: 278: static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter) 279: { 280: switch (filter->status) { 281: case 0: /* latin */ 282: if (c >= 0 && c < 0x80) { /* ok */ 283: ; 284: } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ 285: filter->status = 1; 286: } else if (c == 0x8e) { /* DBCS lead byte */ 287: filter->status = 2; 288: } else { /* bad */ 289: filter->flag = 1; 290: } 291: break; 292: 293: case 1: /* got lead byte */ 294: if (c < 0xa1 || c > 0xfe) { /* bad */ 295: filter->flag = 1; 296: } 297: filter->status = 0; 298: break; 299: 300: case 2: /* got lead byte */ 301: if (c >= 0xa1 && c < 0xaf) { /* ok */ 302: filter->status = 3; 303: } else { 304: filter->flag = 1; /* bad */ 305: } 306: break; 307: 308: case 3: /* got lead byte */ 309: if (c < 0xa1 || c > 0xfe) { /* bad */ 310: filter->flag = 1; 311: } 312: filter->status = 4; 313: break; 314: 315: case 4: /* got lead byte */ 316: if (c < 0xa1 || c > 0xfe) { /* bad */ 317: filter->flag = 1; 318: } 319: filter->status = 0; 320: break; 321: 322: default: 323: filter->status = 0; 324: break; 325: } 326: 327: return c; 328: } 329: