Return to mbfilter_big5.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / filters |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: Rui Hirokawa <hirokawa@php.net> 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter_tw.c 26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. 27: * 28: */ 29: 30: #ifdef HAVE_CONFIG_H 31: #include "config.h" 32: #endif 33: 34: #include "mbfilter.h" 35: #include "mbfilter_big5.h" 36: 37: #include "unicode_table_big5.h" 38: 39: static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter); 40: 41: static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */ 42: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 44: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 50: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 51: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 53: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 54: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 56: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 57: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 58: }; 59: 60: static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL}; 61: 62: const mbfl_encoding mbfl_encoding_big5 = { 63: mbfl_no_encoding_big5, 64: "BIG-5", 65: "BIG5", 66: (const char *(*)[])&mbfl_encoding_big5_aliases, 67: mblen_table_big5, 68: MBFL_ENCTYPE_MBCS 69: }; 70: 71: const struct mbfl_identify_vtbl vtbl_identify_big5 = { 72: mbfl_no_encoding_big5, 73: mbfl_filt_ident_common_ctor, 74: mbfl_filt_ident_common_dtor, 75: mbfl_filt_ident_big5 76: }; 77: 78: const struct mbfl_convert_vtbl vtbl_big5_wchar = { 79: mbfl_no_encoding_big5, 80: mbfl_no_encoding_wchar, 81: mbfl_filt_conv_common_ctor, 82: mbfl_filt_conv_common_dtor, 83: mbfl_filt_conv_big5_wchar, 84: mbfl_filt_conv_common_flush 85: }; 86: 87: const struct mbfl_convert_vtbl vtbl_wchar_big5 = { 88: mbfl_no_encoding_wchar, 89: mbfl_no_encoding_big5, 90: mbfl_filt_conv_common_ctor, 91: mbfl_filt_conv_common_dtor, 92: mbfl_filt_conv_wchar_big5, 93: mbfl_filt_conv_common_flush 94: }; 95: 96: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 97: 98: /* 99: * Big5 => wchar 100: */ 101: int 102: mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter) 103: { 104: int c1, w; 105: 106: switch (filter->status) { 107: case 0: 108: if (c >= 0 && c < 0x80) { /* latin */ 109: CK((*filter->output_function)(c, filter->data)); 110: } else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */ 111: filter->status = 1; 112: filter->cache = c; 113: } else { 114: w = c & MBFL_WCSGROUP_MASK; 115: w |= MBFL_WCSGROUP_THROUGH; 116: CK((*filter->output_function)(w, filter->data)); 117: } 118: break; 119: 120: case 1: /* dbcs second byte */ 121: filter->status = 0; 122: c1 = filter->cache; 123: if ((c > 0x39 && c < 0x7f) | (c > 0xa0 && c < 0xff)) { 124: if (c < 0x7f){ 125: w = (c1 - 0xa1)*157 + (c - 0x40); 126: } else { 127: w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f; 128: } 129: if (w >= 0 && w < big5_ucs_table_size) { 130: w = big5_ucs_table[w]; 131: } else { 132: w = 0; 133: } 134: if (w <= 0) { 135: w = (c1 << 8) | c; 136: w &= MBFL_WCSPLANE_MASK; 137: w |= MBFL_WCSPLANE_BIG5; 138: } 139: CK((*filter->output_function)(w, filter->data)); 140: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 141: CK((*filter->output_function)(c, filter->data)); 142: } else { 143: w = (c1 << 8) | c; 144: w &= MBFL_WCSGROUP_MASK; 145: w |= MBFL_WCSGROUP_THROUGH; 146: CK((*filter->output_function)(w, filter->data)); 147: } 148: break; 149: 150: default: 151: filter->status = 0; 152: break; 153: } 154: 155: return c; 156: } 157: 158: /* 159: * wchar => Big5 160: */ 161: int 162: mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) 163: { 164: int c1, s; 165: 166: s = 0; 167: if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) { 168: s = ucs_a1_big5_table[c - ucs_a1_big5_table_min]; 169: } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) { 170: s = ucs_a2_big5_table[c - ucs_a2_big5_table_min]; 171: } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) { 172: s = ucs_a3_big5_table[c - ucs_a3_big5_table_min]; 173: } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) { 174: s = ucs_i_big5_table[c - ucs_i_big5_table_min]; 175: } else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max) { 176: s = ucs_pua_big5_table[c - ucs_pua_big5_table_min]; 177: } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) { 178: s = ucs_r1_big5_table[c - ucs_r1_big5_table_min]; 179: } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) { 180: s = ucs_r2_big5_table[c - ucs_r2_big5_table_min]; 181: } 182: if (s <= 0) { 183: c1 = c & ~MBFL_WCSPLANE_MASK; 184: if (c1 == MBFL_WCSPLANE_BIG5) { 185: s = c & MBFL_WCSPLANE_MASK; 186: } 187: if (c == 0) { 188: s = 0; 189: } else if (s <= 0) { 190: s = -1; 191: } 192: } 193: if (s >= 0) { 194: if (s < 0x80) { /* latin */ 195: CK((*filter->output_function)(s, filter->data)); 196: } else { 197: CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); 198: CK((*filter->output_function)(s & 0xff, filter->data)); 199: } 200: } else { 201: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 202: CK(mbfl_filt_conv_illegal_output(c, filter)); 203: } 204: } 205: 206: return c; 207: } 208: 209: static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter) 210: { 211: if (filter->status) { /* kanji second char */ 212: if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */ 213: filter->flag = 1; 214: } 215: filter->status = 0; 216: } else if (c >= 0 && c < 0x80) { /* latin ok */ 217: ; 218: } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ 219: filter->status = 1; 220: } else { /* bad */ 221: filter->flag = 1; 222: } 223: 224: return c; 225: } 226: 227: