Annotation of embedaddon/libiconv/lib/iso2022_jp3.h, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 1999-2004, 2008 Free Software Foundation, Inc.
! 3: * This file is part of the GNU LIBICONV Library.
! 4: *
! 5: * The GNU LIBICONV Library is free software; you can redistribute it
! 6: * and/or modify it under the terms of the GNU Library General Public
! 7: * License as published by the Free Software Foundation; either version 2
! 8: * of the License, or (at your option) any later version.
! 9: *
! 10: * The GNU LIBICONV Library is distributed in the hope that it will be
! 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 13: * Library General Public License for more details.
! 14: *
! 15: * You should have received a copy of the GNU Library General Public
! 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 18: * Fifth Floor, Boston, MA 02110-1301, USA.
! 19: */
! 20:
! 21: /*
! 22: * ISO-2022-JP-3
! 23: */
! 24:
! 25: #include "jisx0213.h"
! 26:
! 27: #define ESC 0x1b
! 28:
! 29: /*
! 30: * The state is composed of one of the following values
! 31: */
! 32: #define STATE_ASCII 0 /* Esc ( B */
! 33: #define STATE_JISX0201ROMAN 1 /* Esc ( J */
! 34: #define STATE_JISX0201KATAKANA 2 /* Esc ( I */
! 35: #define STATE_JISX0208 3 /* Esc $ @ or Esc $ B */
! 36: #define STATE_JISX02131 4 /* Esc $ ( O or Esc $ ( Q*/
! 37: #define STATE_JISX02132 5 /* Esc $ ( P */
! 38:
! 39: /*
! 40: * In the ISO-2022-JP-3 to UCS-4 direction, the state also holds the last
! 41: * character to be output, shifted by 3 bits.
! 42: */
! 43:
! 44: static int
! 45: iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
! 46: {
! 47: ucs4_t last_wc = conv->istate >> 3;
! 48: if (last_wc) {
! 49: /* Output the buffered character. */
! 50: conv->istate &= 7;
! 51: *pwc = last_wc;
! 52: return 0; /* Don't advance the input pointer. */
! 53: } else {
! 54: state_t state = conv->istate;
! 55: int count = 0;
! 56: unsigned char c;
! 57: for (;;) {
! 58: c = *s;
! 59: if (c == ESC) {
! 60: if (n < count+3)
! 61: goto none;
! 62: if (s[1] == '(') {
! 63: if (s[2] == 'B') {
! 64: state = STATE_ASCII;
! 65: s += 3; count += 3;
! 66: if (n < count+1)
! 67: goto none;
! 68: continue;
! 69: }
! 70: if (s[2] == 'J') {
! 71: state = STATE_JISX0201ROMAN;
! 72: s += 3; count += 3;
! 73: if (n < count+1)
! 74: goto none;
! 75: continue;
! 76: }
! 77: if (s[2] == 'I') {
! 78: state = STATE_JISX0201KATAKANA;
! 79: s += 3; count += 3;
! 80: if (n < count+1)
! 81: goto none;
! 82: continue;
! 83: }
! 84: goto ilseq;
! 85: }
! 86: if (s[1] == '$') {
! 87: if (s[2] == '@' || s[2] == 'B') {
! 88: /* We don't distinguish JIS X 0208-1978 and JIS X 0208-1983. */
! 89: state = STATE_JISX0208;
! 90: s += 3; count += 3;
! 91: if (n < count+1)
! 92: goto none;
! 93: continue;
! 94: }
! 95: if (s[2] == '(') {
! 96: if (n < count+4)
! 97: goto none;
! 98: if (s[3] == 'O' || s[3] == 'Q') {
! 99: state = STATE_JISX02131;
! 100: s += 4; count += 4;
! 101: if (n < count+1)
! 102: goto none;
! 103: continue;
! 104: }
! 105: if (s[3] == 'P') {
! 106: state = STATE_JISX02132;
! 107: s += 4; count += 4;
! 108: if (n < count+1)
! 109: goto none;
! 110: continue;
! 111: }
! 112: }
! 113: goto ilseq;
! 114: }
! 115: goto ilseq;
! 116: }
! 117: break;
! 118: }
! 119: switch (state) {
! 120: case STATE_ASCII:
! 121: if (c < 0x80) {
! 122: int ret = ascii_mbtowc(conv,pwc,s,1);
! 123: if (ret == RET_ILSEQ)
! 124: goto ilseq;
! 125: if (ret != 1) abort();
! 126: conv->istate = state;
! 127: return count+1;
! 128: } else
! 129: goto ilseq;
! 130: case STATE_JISX0201ROMAN:
! 131: if (c < 0x80) {
! 132: int ret = jisx0201_mbtowc(conv,pwc,s,1);
! 133: if (ret == RET_ILSEQ)
! 134: goto ilseq;
! 135: if (ret != 1) abort();
! 136: conv->istate = state;
! 137: return count+1;
! 138: } else
! 139: goto ilseq;
! 140: case STATE_JISX0201KATAKANA:
! 141: if (c < 0x80) {
! 142: unsigned char buf = c+0x80;
! 143: int ret = jisx0201_mbtowc(conv,pwc,&buf,1);
! 144: if (ret == RET_ILSEQ)
! 145: goto ilseq;
! 146: if (ret != 1) abort();
! 147: conv->istate = state;
! 148: return count+1;
! 149: } else
! 150: goto ilseq;
! 151: case STATE_JISX0208:
! 152: if (n < count+2)
! 153: goto none;
! 154: if (s[0] < 0x80 && s[1] < 0x80) {
! 155: int ret = jisx0208_mbtowc(conv,pwc,s,2);
! 156: if (ret == RET_ILSEQ)
! 157: goto ilseq;
! 158: if (ret != 2) abort();
! 159: conv->istate = state;
! 160: return count+2;
! 161: } else
! 162: goto ilseq;
! 163: case STATE_JISX02131:
! 164: case STATE_JISX02132:
! 165: if (n < count+2)
! 166: goto none;
! 167: if (s[0] < 0x80 && s[1] < 0x80) {
! 168: ucs4_t wc = jisx0213_to_ucs4(((state-STATE_JISX02131+1)<<8)+s[0],s[1]);
! 169: if (wc) {
! 170: if (wc < 0x80) {
! 171: /* It's a combining character. */
! 172: ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
! 173: ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
! 174: /* We cannot output two Unicode characters at once. So,
! 175: output the first character and buffer the second one. */
! 176: *pwc = wc1;
! 177: conv->istate = (wc2 << 3) | state;
! 178: } else {
! 179: *pwc = wc;
! 180: conv->istate = state;
! 181: }
! 182: return count+2;
! 183: }
! 184: }
! 185: goto ilseq;
! 186: default: abort();
! 187: }
! 188: none:
! 189: conv->istate = state;
! 190: return RET_TOOFEW(count);
! 191:
! 192: ilseq:
! 193: conv->istate = state;
! 194: return RET_SHIFT_ILSEQ(count);
! 195: }
! 196: }
! 197:
! 198: static int
! 199: iso2022_jp3_flushwc (conv_t conv, ucs4_t *pwc)
! 200: {
! 201: ucs4_t last_wc = conv->istate >> 3;
! 202: if (last_wc) {
! 203: /* Output the buffered character. */
! 204: conv->istate &= 7;
! 205: *pwc = last_wc;
! 206: return 1;
! 207: } else
! 208: return 0;
! 209: }
! 210:
! 211: /*
! 212: * In the UCS-4 to ISO-2022-JP-3 direction, the state also holds the last two
! 213: * bytes to be output, shifted by 3 bits, and the STATE_xxxxx value that was
! 214: * effective before this buffered character, shifted by 19 bits.
! 215: */
! 216:
! 217: /* Composition tables for each of the relevant combining characters. */
! 218: static const struct { unsigned short base; unsigned short composed; } iso2022_jp3_comp_table_data[] = {
! 219: #define iso2022_jp3_comp_table02e5_idx 0
! 220: #define iso2022_jp3_comp_table02e5_len 1
! 221: { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
! 222: #define iso2022_jp3_comp_table02e9_idx (iso2022_jp3_comp_table02e5_idx+iso2022_jp3_comp_table02e5_len)
! 223: #define iso2022_jp3_comp_table02e9_len 1
! 224: { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
! 225: #define iso2022_jp3_comp_table0300_idx (iso2022_jp3_comp_table02e9_idx+iso2022_jp3_comp_table02e9_len)
! 226: #define iso2022_jp3_comp_table0300_len 5
! 227: { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
! 228: { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
! 229: { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
! 230: { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
! 231: { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
! 232: #define iso2022_jp3_comp_table0301_idx (iso2022_jp3_comp_table0300_idx+iso2022_jp3_comp_table0300_len)
! 233: #define iso2022_jp3_comp_table0301_len 4
! 234: { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
! 235: { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
! 236: { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
! 237: { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
! 238: #define iso2022_jp3_comp_table309a_idx (iso2022_jp3_comp_table0301_idx+iso2022_jp3_comp_table0301_len)
! 239: #define iso2022_jp3_comp_table309a_len 14
! 240: { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
! 241: { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
! 242: { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
! 243: { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
! 244: { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
! 245: { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
! 246: { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
! 247: { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
! 248: { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
! 249: { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
! 250: { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
! 251: { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
! 252: { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
! 253: { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
! 254: };
! 255:
! 256: #define SPLIT_STATE \
! 257: unsigned short lasttwo = state >> 3; state_t prevstate = state >> 19; state &= 7
! 258: #define COMBINE_STATE \
! 259: state |= (prevstate << 19) | (lasttwo << 3)
! 260: #define COMBINE_STATE_NO_LASTTWO \
! 261: /* assume lasttwo == 0, then prevstate is ignored */
! 262:
! 263: static int
! 264: iso2022_jp3_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
! 265: {
! 266: int count = 0;
! 267: unsigned char buf[2];
! 268: unsigned short jch;
! 269: int ret;
! 270: state_t state = conv->ostate;
! 271: SPLIT_STATE;
! 272:
! 273: if (lasttwo) {
! 274: /* Attempt to combine the last character with this one. */
! 275: unsigned int idx;
! 276: unsigned int len;
! 277:
! 278: if (wc == 0x02e5)
! 279: idx = iso2022_jp3_comp_table02e5_idx,
! 280: len = iso2022_jp3_comp_table02e5_len;
! 281: else if (wc == 0x02e9)
! 282: idx = iso2022_jp3_comp_table02e9_idx,
! 283: len = iso2022_jp3_comp_table02e9_len;
! 284: else if (wc == 0x0300)
! 285: idx = iso2022_jp3_comp_table0300_idx,
! 286: len = iso2022_jp3_comp_table0300_len;
! 287: else if (wc == 0x0301)
! 288: idx = iso2022_jp3_comp_table0301_idx,
! 289: len = iso2022_jp3_comp_table0301_len;
! 290: else if (wc == 0x309a)
! 291: idx = iso2022_jp3_comp_table309a_idx,
! 292: len = iso2022_jp3_comp_table309a_len;
! 293: else
! 294: goto not_combining;
! 295:
! 296: do
! 297: if (iso2022_jp3_comp_table_data[idx].base == lasttwo)
! 298: break;
! 299: while (++idx, --len > 0);
! 300:
! 301: if (len > 0) {
! 302: /* Output the combined character. */
! 303: /* We know the combined character is in JISX0213 plane 1, but
! 304: the buffered character may have been in JISX0208 or in
! 305: JISX0213 plane 1. */
! 306: count = (state != STATE_JISX02131 ? 4 : 0) + 2;
! 307: if (n < count)
! 308: return RET_TOOSMALL;
! 309: if (state != STATE_JISX02131) {
! 310: r[0] = ESC;
! 311: r[1] = '$';
! 312: r[2] = '(';
! 313: r[3] = 'Q';
! 314: r += 4;
! 315: state = STATE_JISX02131;
! 316: }
! 317: lasttwo = iso2022_jp3_comp_table_data[idx].composed;
! 318: r[0] = (lasttwo >> 8) & 0xff;
! 319: r[1] = lasttwo & 0xff;
! 320: COMBINE_STATE_NO_LASTTWO;
! 321: conv->ostate = state;
! 322: return count;
! 323: }
! 324:
! 325: not_combining:
! 326: /* Output the buffered character. */
! 327: /* We know it is in JISX0208 or in JISX0213 plane 1. */
! 328: count = (prevstate != state ? 3 : 0) + 2;
! 329: if (n < count)
! 330: return RET_TOOSMALL;
! 331: if (prevstate != state) {
! 332: if (state != STATE_JISX0208) abort();
! 333: r[0] = ESC;
! 334: r[1] = '$';
! 335: r[2] = 'B';
! 336: r += 3;
! 337: }
! 338: r[0] = (lasttwo >> 8) & 0xff;
! 339: r[1] = lasttwo & 0xff;
! 340: r += 2;
! 341: }
! 342:
! 343: /* Try ASCII. */
! 344: ret = ascii_wctomb(conv,buf,wc,1);
! 345: if (ret != RET_ILUNI) {
! 346: if (ret != 1) abort();
! 347: if (buf[0] < 0x80) {
! 348: count += (state == STATE_ASCII ? 1 : 4);
! 349: if (n < count)
! 350: return RET_TOOSMALL;
! 351: if (state != STATE_ASCII) {
! 352: r[0] = ESC;
! 353: r[1] = '(';
! 354: r[2] = 'B';
! 355: r += 3;
! 356: state = STATE_ASCII;
! 357: }
! 358: r[0] = buf[0];
! 359: COMBINE_STATE_NO_LASTTWO;
! 360: conv->ostate = state;
! 361: return count;
! 362: }
! 363: }
! 364:
! 365: /* Try JIS X 0201-1976 Roman. */
! 366: ret = jisx0201_wctomb(conv,buf,wc,1);
! 367: if (ret != RET_ILUNI) {
! 368: if (ret != 1) abort();
! 369: if (buf[0] < 0x80) {
! 370: count += (state == STATE_JISX0201ROMAN ? 1 : 4);
! 371: if (n < count)
! 372: return RET_TOOSMALL;
! 373: if (state != STATE_JISX0201ROMAN) {
! 374: r[0] = ESC;
! 375: r[1] = '(';
! 376: r[2] = 'J';
! 377: r += 3;
! 378: state = STATE_JISX0201ROMAN;
! 379: }
! 380: r[0] = buf[0];
! 381: COMBINE_STATE_NO_LASTTWO;
! 382: conv->ostate = state;
! 383: return count;
! 384: }
! 385: }
! 386:
! 387: jch = ucs4_to_jisx0213(wc);
! 388:
! 389: /* Try JIS X 0208-1990 in place of JIS X 0208-1978 and JIS X 0208-1983. */
! 390: ret = jisx0208_wctomb(conv,buf,wc,2);
! 391: if (ret != RET_ILUNI) {
! 392: if (ret != 2) abort();
! 393: if (buf[0] < 0x80 && buf[1] < 0x80) {
! 394: if (jch & 0x0080) {
! 395: /* A possible match in comp_table_data. Buffer it. */
! 396: prevstate = state;
! 397: lasttwo = jch & 0x7f7f;
! 398: state = STATE_JISX0208;
! 399: COMBINE_STATE;
! 400: conv->ostate = state;
! 401: return count;
! 402: } else {
! 403: count += (state == STATE_JISX0208 ? 2 : 5);
! 404: if (n < count)
! 405: return RET_TOOSMALL;
! 406: if (state != STATE_JISX0208) {
! 407: r[0] = ESC;
! 408: r[1] = '$';
! 409: r[2] = 'B';
! 410: r += 3;
! 411: state = STATE_JISX0208;
! 412: }
! 413: r[0] = buf[0];
! 414: r[1] = buf[1];
! 415: COMBINE_STATE_NO_LASTTWO;
! 416: conv->ostate = state;
! 417: return count;
! 418: }
! 419: }
! 420: }
! 421:
! 422: /* Try JISX 0213 plane 1 and JISX 0213 plane 2. */
! 423: if (jch != 0) {
! 424: if (jch & 0x8000) {
! 425: /* JISX 0213 plane 2. */
! 426: if (state != STATE_JISX02132) {
! 427: count += 4;
! 428: if (n < count)
! 429: return RET_TOOSMALL;
! 430: r[0] = ESC;
! 431: r[1] = '$';
! 432: r[2] = '(';
! 433: r[3] = 'P';
! 434: r += 4;
! 435: state = STATE_JISX02132;
! 436: }
! 437: } else {
! 438: /* JISX 0213 plane 1. */
! 439: if (state != STATE_JISX02131) {
! 440: count += 4;
! 441: if (n < count)
! 442: return RET_TOOSMALL;
! 443: r[0] = ESC;
! 444: r[1] = '$';
! 445: r[2] = '(';
! 446: r[3] = 'Q';
! 447: r += 4;
! 448: state = STATE_JISX02131;
! 449: }
! 450: }
! 451: if (jch & 0x0080) {
! 452: /* A possible match in comp_table_data. We have to buffer it. */
! 453: /* We know it's a JISX 0213 plane 1 character. */
! 454: if (jch & 0x8000) abort();
! 455: prevstate = state;
! 456: lasttwo = jch & 0x7f7f;
! 457: COMBINE_STATE;
! 458: conv->ostate = state;
! 459: return count;
! 460: }
! 461: count += 2;
! 462: if (n < count)
! 463: return RET_TOOSMALL;
! 464: r[0] = (jch >> 8) & 0x7f;
! 465: r[1] = jch & 0x7f;
! 466: COMBINE_STATE_NO_LASTTWO;
! 467: conv->ostate = state;
! 468: return count;
! 469: }
! 470:
! 471: /* Try JIS X 0201-1976 Katakana. This is not officially part of
! 472: ISO-2022-JP-3. Therefore we try it after all other attempts. */
! 473: ret = jisx0201_wctomb(conv,buf,wc,1);
! 474: if (ret != RET_ILUNI) {
! 475: if (ret != 1) abort();
! 476: if (buf[0] >= 0x80) {
! 477: count += (state == STATE_JISX0201KATAKANA ? 1 : 4);
! 478: if (n < count)
! 479: return RET_TOOSMALL;
! 480: if (state != STATE_JISX0201KATAKANA) {
! 481: r[0] = ESC;
! 482: r[1] = '(';
! 483: r[2] = 'I';
! 484: r += 3;
! 485: state = STATE_JISX0201KATAKANA;
! 486: }
! 487: r[0] = buf[0]-0x80;
! 488: COMBINE_STATE_NO_LASTTWO;
! 489: conv->ostate = state;
! 490: return count;
! 491: }
! 492: }
! 493:
! 494: return RET_ILUNI;
! 495: }
! 496:
! 497: static int
! 498: iso2022_jp3_reset (conv_t conv, unsigned char *r, int n)
! 499: {
! 500: state_t state = conv->ostate;
! 501: SPLIT_STATE;
! 502: {
! 503: int count =
! 504: (lasttwo ? (prevstate != state ? 3 : 0) + 2 : 0)
! 505: + (state != STATE_ASCII ? 3 : 0);
! 506: if (n < count)
! 507: return RET_TOOSMALL;
! 508: if (lasttwo) {
! 509: if (prevstate != state) {
! 510: if (state != STATE_JISX0208) abort();
! 511: r[0] = ESC;
! 512: r[1] = '$';
! 513: r[2] = 'B';
! 514: r += 3;
! 515: }
! 516: r[0] = (lasttwo >> 8) & 0xff;
! 517: r[1] = lasttwo & 0xff;
! 518: r += 2;
! 519: }
! 520: if (state != STATE_ASCII) {
! 521: r[0] = ESC;
! 522: r[1] = '(';
! 523: r[2] = 'B';
! 524: }
! 525: /* conv->ostate = 0; will be done by the caller */
! 526: return count;
! 527: }
! 528: }
! 529:
! 530: #undef COMBINE_STATE_NO_LASTTWO
! 531: #undef COMBINE_STATE
! 532: #undef SPLIT_STATE
! 533: #undef STATE_JISX02132
! 534: #undef STATE_JISX02131
! 535: #undef STATE_JISX0208
! 536: #undef STATE_JISX0201KATAKANA
! 537: #undef STATE_JISX0201ROMAN
! 538: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>