Return to mbfl_ident.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / mbfl |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter.c 26: * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file 27: * mbfilter.c is included in this package . 28: * 29: */ 30: 31: #ifdef HAVE_CONFIG_H 32: #include "config.h" 33: #endif 34: 35: #ifdef HAVE_STDDEF_H 36: #include <stddef.h> 37: #endif 38: 39: #include "mbfl_ident.h" 40: #include "mbfl_allocators.h" 41: #include "mbfilter_pass.h" 42: #include "mbfilter_8bit.h" 43: #include "mbfilter_wchar.h" 44: 45: #include "filters/mbfilter_euc_cn.h" 46: #include "filters/mbfilter_hz.h" 47: #include "filters/mbfilter_euc_tw.h" 48: #include "filters/mbfilter_big5.h" 49: #include "filters/mbfilter_uhc.h" 50: #include "filters/mbfilter_euc_kr.h" 51: #include "filters/mbfilter_iso2022_kr.h" 52: #include "filters/mbfilter_sjis.h" 53: #include "filters/mbfilter_sjis_open.h" 1.1.1.2 ! misho 54: #include "filters/mbfilter_sjis_mobile.h" 1.1 misho 55: #include "filters/mbfilter_jis.h" 56: #include "filters/mbfilter_iso2022_jp_ms.h" 1.1.1.2 ! misho 57: #include "filters/mbfilter_iso2022jp_2004.h" ! 58: #include "filters/mbfilter_iso2022jp_mobile.h" 1.1 misho 59: #include "filters/mbfilter_euc_jp.h" 60: #include "filters/mbfilter_euc_jp_win.h" 1.1.1.2 ! misho 61: #include "filters/mbfilter_euc_jp_2004.h" ! 62: #include "filters/mbfilter_utf8_mobile.h" 1.1 misho 63: #include "filters/mbfilter_ascii.h" 64: #include "filters/mbfilter_koi8r.h" 65: #include "filters/mbfilter_koi8u.h" 66: #include "filters/mbfilter_cp866.h" 67: #include "filters/mbfilter_cp932.h" 68: #include "filters/mbfilter_cp936.h" 69: #include "filters/mbfilter_cp1251.h" 70: #include "filters/mbfilter_cp1252.h" 71: #include "filters/mbfilter_cp1254.h" 72: #include "filters/mbfilter_cp51932.h" 73: #include "filters/mbfilter_cp5022x.h" 1.1.1.2 ! misho 74: #include "filters/mbfilter_gb18030.h" 1.1 misho 75: #include "filters/mbfilter_iso8859_1.h" 76: #include "filters/mbfilter_iso8859_2.h" 77: #include "filters/mbfilter_iso8859_3.h" 78: #include "filters/mbfilter_iso8859_4.h" 79: #include "filters/mbfilter_iso8859_5.h" 80: #include "filters/mbfilter_iso8859_6.h" 81: #include "filters/mbfilter_iso8859_7.h" 82: #include "filters/mbfilter_iso8859_8.h" 83: #include "filters/mbfilter_iso8859_9.h" 84: #include "filters/mbfilter_iso8859_10.h" 85: #include "filters/mbfilter_iso8859_13.h" 86: #include "filters/mbfilter_iso8859_14.h" 87: #include "filters/mbfilter_iso8859_15.h" 88: #include "filters/mbfilter_base64.h" 89: #include "filters/mbfilter_qprint.h" 90: #include "filters/mbfilter_uuencode.h" 91: #include "filters/mbfilter_7bit.h" 92: #include "filters/mbfilter_utf7.h" 93: #include "filters/mbfilter_utf7imap.h" 94: #include "filters/mbfilter_utf8.h" 95: #include "filters/mbfilter_utf16.h" 96: #include "filters/mbfilter_utf32.h" 97: #include "filters/mbfilter_byte2.h" 98: #include "filters/mbfilter_byte4.h" 99: #include "filters/mbfilter_ucs4.h" 100: #include "filters/mbfilter_ucs2.h" 101: #include "filters/mbfilter_htmlent.h" 102: #include "filters/mbfilter_armscii8.h" 103: #include "filters/mbfilter_cp850.h" 104: 105: static const struct mbfl_identify_vtbl vtbl_identify_false = { 106: mbfl_no_encoding_pass, 107: mbfl_filt_ident_false_ctor, 108: mbfl_filt_ident_common_dtor, 109: mbfl_filt_ident_false }; 110: 111: 112: static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { 113: &vtbl_identify_utf8, 114: &vtbl_identify_utf7, 115: &vtbl_identify_ascii, 116: &vtbl_identify_eucjp, 117: &vtbl_identify_sjis, 118: &vtbl_identify_sjis_open, 119: &vtbl_identify_eucjpwin, 1.1.1.2 ! misho 120: &vtbl_identify_eucjp2004, 1.1 misho 121: &vtbl_identify_cp932, 122: &vtbl_identify_jis, 123: &vtbl_identify_2022jp, 124: &vtbl_identify_2022jpms, 1.1.1.2 ! misho 125: &vtbl_identify_2022jp_2004, ! 126: &vtbl_identify_2022jp_kddi, 1.1 misho 127: &vtbl_identify_cp51932, 1.1.1.2 ! misho 128: &vtbl_identify_sjis_docomo, ! 129: &vtbl_identify_sjis_kddi, ! 130: &vtbl_identify_sjis_sb, ! 131: &vtbl_identify_utf8_docomo, ! 132: &vtbl_identify_utf8_kddi_a, ! 133: &vtbl_identify_utf8_kddi_b, ! 134: &vtbl_identify_utf8_sb, 1.1 misho 135: &vtbl_identify_euccn, 136: &vtbl_identify_cp936, 137: &vtbl_identify_hz, 138: &vtbl_identify_euctw, 139: &vtbl_identify_big5, 1.1.1.2 ! misho 140: &vtbl_identify_cp950, 1.1 misho 141: &vtbl_identify_euckr, 142: &vtbl_identify_uhc, 143: &vtbl_identify_2022kr, 144: &vtbl_identify_cp1251, 145: &vtbl_identify_cp866, 146: &vtbl_identify_koi8r, 147: &vtbl_identify_koi8u, 148: &vtbl_identify_cp1252, 149: &vtbl_identify_cp1254, 150: &vtbl_identify_8859_1, 151: &vtbl_identify_8859_2, 152: &vtbl_identify_8859_3, 153: &vtbl_identify_8859_4, 154: &vtbl_identify_8859_5, 155: &vtbl_identify_8859_6, 156: &vtbl_identify_8859_7, 157: &vtbl_identify_8859_8, 158: &vtbl_identify_8859_9, 159: &vtbl_identify_8859_10, 160: &vtbl_identify_8859_13, 161: &vtbl_identify_8859_14, 162: &vtbl_identify_8859_15, 163: &vtbl_identify_armscii8, 164: &vtbl_identify_cp850, 165: &vtbl_identify_jis_ms, 166: &vtbl_identify_cp50220, 167: &vtbl_identify_cp50221, 168: &vtbl_identify_cp50222, 1.1.1.2 ! misho 169: &vtbl_identify_gb18030, 1.1 misho 170: &vtbl_identify_false, 171: NULL 172: }; 173: 174: 175: 176: /* 177: * identify filter 178: */ 179: const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding) 180: { 181: const struct mbfl_identify_vtbl * vtbl; 182: int i; 183: 184: i = 0; 185: while ((vtbl = mbfl_identify_filter_list[i++]) != NULL) { 186: if (vtbl->encoding == encoding) { 187: break; 188: } 189: } 190: 191: return vtbl; 192: } 193: 194: mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding) 195: { 196: mbfl_identify_filter *filter; 197: 198: /* allocate */ 199: filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); 200: if (filter == NULL) { 201: return NULL; 202: } 203: 204: if (mbfl_identify_filter_init(filter, encoding)) { 205: mbfl_free(filter); 206: return NULL; 207: } 208: 209: return filter; 210: } 211: 1.1.1.2 ! misho 212: mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding) ! 213: { ! 214: mbfl_identify_filter *filter; ! 215: ! 216: /* allocate */ ! 217: filter = (mbfl_identify_filter *)mbfl_malloc(sizeof(mbfl_identify_filter)); ! 218: if (filter == NULL) { ! 219: return NULL; ! 220: } ! 221: ! 222: if (mbfl_identify_filter_init2(filter, encoding)) { ! 223: mbfl_free(filter); ! 224: return NULL; ! 225: } ! 226: ! 227: return filter; ! 228: } ! 229: ! 230: 1.1 misho 231: int mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding) 232: { 1.1.1.2 ! misho 233: const mbfl_encoding *enc = mbfl_no2encoding(encoding); ! 234: return mbfl_identify_filter_init2(filter, enc ? enc: &mbfl_encoding_pass); ! 235: } ! 236: ! 237: int mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding) ! 238: { 1.1 misho 239: const struct mbfl_identify_vtbl *vtbl; 240: 241: /* encoding structure */ 1.1.1.2 ! misho 242: filter->encoding = encoding; 1.1 misho 243: 244: filter->status = 0; 245: filter->flag = 0; 246: filter->score = 0; 247: 248: /* setup the function table */ 249: vtbl = mbfl_identify_filter_get_vtbl(filter->encoding->no_encoding); 250: if (vtbl == NULL) { 251: vtbl = &vtbl_identify_false; 252: } 253: filter->filter_ctor = vtbl->filter_ctor; 254: filter->filter_dtor = vtbl->filter_dtor; 255: filter->filter_function = vtbl->filter_function; 256: 257: /* constructor */ 258: (*filter->filter_ctor)(filter); 259: 260: return 0; 261: } 262: 263: void mbfl_identify_filter_delete(mbfl_identify_filter *filter) 264: { 265: if (filter == NULL) { 266: return; 267: } 268: 269: mbfl_identify_filter_cleanup(filter); 270: mbfl_free((void*)filter); 271: } 272: 273: void mbfl_identify_filter_cleanup(mbfl_identify_filter *filter) 274: { 275: (*filter->filter_dtor)(filter); 276: } 277: 278: void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter) 279: { 280: filter->status = 0; 281: filter->flag = 0; 282: } 283: 284: void mbfl_filt_ident_common_dtor(mbfl_identify_filter *filter) 285: { 286: filter->status = 0; 287: } 288: 289: int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter) 290: { 291: filter->flag = 1; /* bad */ 292: return c; 293: } 294: 295: void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter) 296: { 297: filter->status = 0; 298: filter->flag = 1; 299: } 300: 301: int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter) 302: { 303: return c; 304: }