Return to mbfilter_iso2022_jp_ms.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / libmbfl / filters |
1.1 misho 1: /* 2: * "streamable kanji code filter and converter" 3: * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4: * 5: * LICENSE NOTICES 6: * 7: * This file is part of "streamable kanji code filter and converter", 8: * which is distributed under the terms of GNU Lesser General Public 9: * License (version 2) as published by the Free Software Foundation. 10: * 11: * This software is distributed in the hope that it will be useful, 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14: * GNU Lesser General Public License for more details. 15: * 16: * You should have received a copy of the GNU Lesser General Public 17: * License along with "streamable kanji code filter and converter"; 18: * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19: * Suite 330, Boston, MA 02111-1307 USA 20: * 21: * The author of this file: 22: * 23: */ 24: /* 25: * The source code included in this files was separated from mbfilter_ja.c 26: * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. 27: * 28: */ 29: 30: #ifdef HAVE_CONFIG_H 31: #include "config.h" 32: #endif 33: 34: #include "mbfilter.h" 35: #include "mbfilter_iso2022_jp_ms.h" 36: 37: #include "unicode_table_cp932_ext.h" 38: #include "unicode_table_jis.h" 39: #include "cp932_table.h" 40: 1.1.1.2 ! misho 41: int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); 1.1 misho 42: 43: static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; 44: 45: const mbfl_encoding mbfl_encoding_2022jpms = { 46: mbfl_no_encoding_2022jpms, 47: "ISO-2022-JP-MS", 48: "ISO-2022-JP", 49: (const char *(*)[])&mbfl_encoding_2022jpms_aliases, 50: NULL, 1.1.1.2 ! misho 51: MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE 1.1 misho 52: }; 53: 54: const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { 55: mbfl_no_encoding_2022jpms, 56: mbfl_filt_ident_common_ctor, 57: mbfl_filt_ident_common_dtor, 58: mbfl_filt_ident_2022jpms 59: }; 60: 61: const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { 62: mbfl_no_encoding_2022jpms, 63: mbfl_no_encoding_wchar, 64: mbfl_filt_conv_common_ctor, 65: mbfl_filt_conv_common_dtor, 66: mbfl_filt_conv_2022jpms_wchar, 67: mbfl_filt_conv_common_flush 68: }; 69: 70: const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { 71: mbfl_no_encoding_wchar, 72: mbfl_no_encoding_2022jpms, 73: mbfl_filt_conv_common_ctor, 74: mbfl_filt_conv_common_dtor, 75: mbfl_filt_conv_wchar_2022jpms, 76: mbfl_filt_conv_any_2022jpms_flush 77: }; 78: 79: #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 80: 81: #define sjistoidx(c1, c2) \ 82: (((c1) > 0x9f) \ 83: ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ 84: : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) 85: #define idxtojis1(c) (((c) / 94) + 0x21) 86: #define idxtojis2(c) (((c) % 94) + 0x21) 87: 88: /* 89: * ISO-2022-JP-MS => wchar 90: */ 91: int 92: mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) 93: { 94: int c1, s, w; 95: 96: retry: 97: switch (filter->status & 0xf) { 98: /* case 0x00: ASCII */ 99: /* case 0x10: X 0201 latin */ 100: /* case 0x20: X 0201 kana */ 101: /* case 0x80: X 0208 */ 102: /* case 0xa0: UDC */ 103: case 0: 104: if (c == 0x1b) { 105: filter->status += 2; 106: } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ 107: CK((*filter->output_function)(0xff40 + c, filter->data)); 108: } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ 109: filter->cache = c; 110: filter->status += 1; 111: } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ 112: CK((*filter->output_function)(c, filter->data)); 113: } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ 114: CK((*filter->output_function)(0xfec0 + c, filter->data)); 115: } else { 116: w = c & MBFL_WCSGROUP_MASK; 117: w |= MBFL_WCSGROUP_THROUGH; 118: CK((*filter->output_function)(w, filter->data)); 119: } 120: break; 121: 122: /* case 0x81: X 0208 second char */ 123: /* case 0xa1: UDC second char */ 124: case 1: 125: w = 0; 126: filter->status &= ~0xf; 127: c1 = filter->cache; 128: if (c > 0x20 && c < 0x7f) { 129: s = (c1 - 0x21)*94 + c - 0x21; 130: if (filter->status == 0x80) { 131: if (s <= 137) { 132: if (s == 31) { 133: w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ 134: } else if (s == 32) { 135: w = 0xff5e; /* FULLWIDTH TILDE */ 136: } else if (s == 33) { 137: w = 0x2225; /* PARALLEL TO */ 138: } else if (s == 60) { 139: w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ 140: } else if (s == 80) { 141: w = 0xffe0; /* FULLWIDTH CENT SIGN */ 142: } else if (s == 81) { 143: w = 0xffe1; /* FULLWIDTH POUND SIGN */ 144: } else if (s == 137) { 145: w = 0xffe2; /* FULLWIDTH NOT SIGN */ 146: } 147: } 148: if (w == 0) { 149: if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ 150: w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; 151: } else if (s >= 0 && s < jisx0208_ucs_table_size) { 152: w = jisx0208_ucs_table[s]; 153: } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ 154: w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; 155: } else { 156: w = 0; 157: } 158: } 159: if (w <= 0) { 160: w = (c1 << 8) | c; 161: w &= MBFL_WCSPLANE_MASK; 162: w |= MBFL_WCSPLANE_JIS0208; 163: } 164: CK((*filter->output_function)(w, filter->data)); 165: } else { 166: if (c1 > 0x20 && c1 < 0x35) { 167: w = 0xe000 + (c1 - 0x21)*94 + c - 0x21; 168: } 169: if (w <= 0) { 170: w = (((c1 - 0x21) + 0x7f) << 8) | c; 171: w &= MBFL_WCSPLANE_MASK; 172: w |= MBFL_WCSPLANE_JIS0208; 173: } 174: CK((*filter->output_function)(w, filter->data)); 175: } 176: } else if (c == 0x1b) { 177: filter->status += 2; 178: } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 179: CK((*filter->output_function)(c, filter->data)); 180: } else { 181: w = (c1 << 8) | c; 182: w &= MBFL_WCSGROUP_MASK; 183: w |= MBFL_WCSGROUP_THROUGH; 184: CK((*filter->output_function)(w, filter->data)); 185: } 186: break; 187: 188: /* ESC */ 189: /* case 0x02: */ 190: /* case 0x12: */ 191: /* case 0x22: */ 192: /* case 0x82: */ 193: /* case 0xa2: */ 194: case 2: 195: if (c == 0x24) { /* '$' */ 196: filter->status++; 197: } else if (c == 0x28) { /* '(' */ 198: filter->status += 3; 199: } else { 200: filter->status &= ~0xf; 201: CK((*filter->output_function)(0x1b, filter->data)); 202: goto retry; 203: } 204: break; 205: 206: /* ESC $ */ 207: /* case 0x03: */ 208: /* case 0x13: */ 209: /* case 0x23: */ 210: /* case 0x83: */ 211: /* case 0xa3: */ 212: case 3: 213: if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 214: filter->status = 0x80; 215: } else if (c == 0x28) { /* '(' */ 216: filter->status++; 217: } else { 218: filter->status &= ~0xf; 219: CK((*filter->output_function)(0x1b, filter->data)); 220: CK((*filter->output_function)(0x24, filter->data)); 221: goto retry; 222: } 223: break; 224: 225: /* ESC $ ( */ 226: /* case 0x04: */ 227: /* case 0x14: */ 228: /* case 0x24: */ 229: /* case 0x84: */ 230: /* case 0xa4: */ 231: case 4: 232: if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 233: filter->status = 0x80; 234: } else if (c == 0x3f) { /* '?' */ 235: filter->status = 0xa0; 236: } else { 237: filter->status &= ~0xf; 238: CK((*filter->output_function)(0x1b, filter->data)); 239: CK((*filter->output_function)(0x24, filter->data)); 240: CK((*filter->output_function)(0x28, filter->data)); 241: goto retry; 242: } 243: break; 244: 245: /* ESC ( */ 246: /* case 0x05: */ 247: /* case 0x15: */ 248: /* case 0x25: */ 249: /* case 0x85: */ 250: /* case 0xa5: */ 251: case 5: 252: if (c == 0x42) { /* 'B' */ 253: filter->status = 0; 254: } else if (c == 0x4a) { /* 'J' */ 255: filter->status = 0; 256: } else if (c == 0x49) { /* 'I' */ 257: filter->status = 0x20; 258: } else { 259: filter->status &= ~0xf; 260: CK((*filter->output_function)(0x1b, filter->data)); 261: CK((*filter->output_function)(0x28, filter->data)); 262: goto retry; 263: } 264: break; 265: 266: default: 267: filter->status = 0; 268: break; 269: } 270: 271: return c; 272: } 273: 274: static int 275: cp932ext3_cp932ext2_jis(int c) 276: { 277: int idx; 278: 279: idx = sjistoidx(0xfa, 0x40) + c; 280: if (idx >= sjistoidx(0xfa, 0x5c)) 281: idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); 282: else if (idx >= sjistoidx(0xfa, 0x55)) 283: idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); 284: else if (idx >= sjistoidx(0xfa, 0x40)) 285: idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); 286: return idxtojis1(idx) << 8 | idxtojis2(idx); 287: } 288: 289: /* 290: * wchar => ISO-2022-JP-MS 291: */ 292: int 293: mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) 294: { 295: int c1, c2, s1, s2; 296: 297: s1 = 0; 298: s2 = 0; 299: if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { 300: s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; 301: } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { 302: s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; 303: } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { 304: s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; 305: } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { 306: s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; 307: } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ 308: s1 = c - 0xe000; 309: c1 = s1/94 + 0x7f; 310: c2 = s1%94 + 0x21; 311: s1 = (c1 << 8) | c2; 312: } 313: if (s1 <= 0) { 314: c1 = c & ~MBFL_WCSPLANE_MASK; 315: if (c1 == MBFL_WCSPLANE_WINCP932) { 316: s1 = c & MBFL_WCSPLANE_MASK; 317: s2 = 1; 318: } else if (c1 == MBFL_WCSPLANE_JIS0208) { 319: s1 = c & MBFL_WCSPLANE_MASK; 320: } else if (c1 == MBFL_WCSPLANE_JIS0212) { 321: s1 = c & MBFL_WCSPLANE_MASK; 322: s1 |= 0x8080; 323: } else if (c == 0xa5) { /* YEN SIGN */ 324: s1 = 0x216f; /* FULLWIDTH YEN SIGN */ 325: } else if (c == 0x203e) { /* OVER LINE */ 326: s1 = 0x2131; /* FULLWIDTH MACRON */ 327: } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ 328: s1 = 0x2140; 329: } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ 330: s1 = 0x2141; 331: } else if (c == 0x2225) { /* PARALLEL TO */ 332: s1 = 0x2142; 333: } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ 334: s1 = 0x215d; 335: } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ 336: s1 = 0x2171; 337: } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ 338: s1 = 0x2172; 339: } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ 340: s1 = 0x224c; 341: } 342: } 343: if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ 344: s1 = -1; 345: c1 = 0; 346: c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; 347: while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ 348: if (c == cp932ext1_ucs_table[c1]) { 349: s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); 350: break; 351: } 352: c1++; 353: } 354: if (s1 <= 0) { 355: c1 = 0; 356: c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; 357: while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ 358: if (c == cp932ext3_ucs_table[c1]) { 359: s1 = cp932ext3_cp932ext2_jis(c1); 360: break; 361: } 362: c1++; 363: } 364: } 365: if (c == 0) { 366: s1 = 0; 367: } else if (s1 <= 0) { 368: s1 = -1; 369: } 370: } 371: if (s1 >= 0) { 372: if (s1 < 0x80) { /* latin */ 373: if ((filter->status & 0xff00) != 0) { 374: CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 375: CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 376: CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ 377: } 378: CK((*filter->output_function)(s1, filter->data)); 379: filter->status = 0; 380: } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ 381: if ((filter->status & 0xff00) != 0x100) { 382: CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 383: CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 384: CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ 385: } 386: filter->status = 0x100; 387: CK((*filter->output_function)(s1 & 0x7f, filter->data)); 388: } else if (s1 < 0x7e7f) { /* X 0208 */ 389: if ((filter->status & 0xff00) != 0x200) { 390: CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 391: CK((*filter->output_function)(0x24, filter->data)); /* '$' */ 392: CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ 393: } 394: filter->status = 0x200; 395: CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); 396: CK((*filter->output_function)(s1 & 0x7f, filter->data)); 397: } else if (s1 < 0x927f) { /* UDC */ 398: if ((filter->status & 0xff00) != 0x800) { 399: CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 400: CK((*filter->output_function)(0x24, filter->data)); /* '$' */ 401: CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 402: CK((*filter->output_function)(0x3f, filter->data)); /* '?' */ 403: } 404: filter->status = 0x800; 405: CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data)); 406: CK((*filter->output_function)(s1 & 0x7f, filter->data)); 407: } 408: } else { 409: if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 410: CK(mbfl_filt_conv_illegal_output(c, filter)); 411: } 412: } 413: 414: return c; 415: } 416: 417: int 418: mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) 419: { 420: /* back to latin */ 421: if ((filter->status & 0xff00) != 0) { 422: CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 423: CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 424: CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ 425: } 426: 427: filter->status &= 0xff; 428: 429: if (filter->flush_function != NULL) { 430: return (*filter->flush_function)(filter->data); 431: } 432: 433: return 0; 434: } 435: 1.1.1.2 ! misho 436: int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) 1.1 misho 437: { 438: retry: 439: switch (filter->status & 0xf) { 440: /* case 0x00: ASCII */ 441: /* case 0x10: X 0201 latin */ 442: /* case 0x20: X 0201 kana */ 443: /* case 0x80: X 0208 */ 444: /* case 0xa0: X UDC */ 445: case 0: 446: if (c == 0x1b) { 447: filter->status += 2; 448: } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ 449: filter->status += 1; 450: } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ 451: ; 452: } else { 453: filter->flag = 1; /* bad */ 454: } 455: break; 456: 457: /* case 0x81: X 0208 second char */ 458: /* case 0xa1: UDC second char */ 459: case 1: 460: filter->status &= ~0xf; 461: if (c == 0x1b) { 462: goto retry; 463: } else if (c < 0x21 || c > 0x7e) { /* bad */ 464: filter->flag = 1; 465: } 466: break; 467: 468: /* ESC */ 469: case 2: 470: if (c == 0x24) { /* '$' */ 471: filter->status++; 472: } else if (c == 0x28) { /* '(' */ 473: filter->status += 3; 474: } else { 475: filter->flag = 1; /* bad */ 476: filter->status &= ~0xf; 477: goto retry; 478: } 479: break; 480: 481: /* ESC $ */ 482: case 3: 483: if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 484: filter->status = 0x80; 485: } else if (c == 0x28) { /* '(' */ 486: filter->status++; 487: } else { 488: filter->flag = 1; /* bad */ 489: filter->status &= ~0xf; 490: goto retry; 491: } 492: break; 493: 494: /* ESC $ ( */ 495: case 4: 496: if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 497: filter->status = 0x80; 498: } else if (c == 0x3f) { /* '?' */ 499: filter->status = 0xa0; 500: } else { 501: filter->flag = 1; /* bad */ 502: filter->status &= ~0xf; 503: goto retry; 504: } 505: break; 506: 507: /* ESC ( */ 508: case 5: 509: if (c == 0x42) { /* 'B' */ 510: filter->status = 0; 511: } else if (c == 0x4a) { /* 'J' */ 512: filter->status = 0; 513: } else if (c == 0x49) { /* 'I' */ 514: filter->status = 0x20; 515: } else { 516: filter->flag = 1; /* bad */ 517: filter->status &= ~0xf; 518: goto retry; 519: } 520: break; 521: 522: default: 523: filter->status = 0; 524: break; 525: } 526: 527: return c; 528: }