Annotation of embedaddon/libiconv/lib/iso2022_cnext.h, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
! 3: * This file is part of the GNU LIBICONV Library.
! 4: *
! 5: * The GNU LIBICONV Library is free software; you can redistribute it
! 6: * and/or modify it under the terms of the GNU Library General Public
! 7: * License as published by the Free Software Foundation; either version 2
! 8: * of the License, or (at your option) any later version.
! 9: *
! 10: * The GNU LIBICONV Library is distributed in the hope that it will be
! 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 13: * Library General Public License for more details.
! 14: *
! 15: * You should have received a copy of the GNU Library General Public
! 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 18: * Fifth Floor, Boston, MA 02110-1301, USA.
! 19: */
! 20:
! 21: /*
! 22: * ISO-2022-CN-EXT
! 23: */
! 24:
! 25: /* Specification: RFC 1922 */
! 26:
! 27: #define ESC 0x1b
! 28: #define SO 0x0e
! 29: #define SI 0x0f
! 30:
! 31: /*
! 32: * The state is composed of one of the following values
! 33: */
! 34: #define STATE_ASCII 0
! 35: #define STATE_TWOBYTE 1
! 36: /*
! 37: * and one of the following values, << 8
! 38: */
! 39: #define STATE2_NONE 0
! 40: #define STATE2_DESIGNATED_GB2312 1
! 41: #define STATE2_DESIGNATED_CNS11643_1 2
! 42: #define STATE2_DESIGNATED_ISO_IR_165 3
! 43: /*
! 44: * and one of the following values, << 16
! 45: */
! 46: #define STATE3_NONE 0
! 47: #define STATE3_DESIGNATED_CNS11643_2 1
! 48: /*
! 49: * and one of the following values, << 24
! 50: */
! 51: #define STATE4_NONE 0
! 52: #define STATE4_DESIGNATED_CNS11643_3 1
! 53: #define STATE4_DESIGNATED_CNS11643_4 2
! 54: #define STATE4_DESIGNATED_CNS11643_5 3
! 55: #define STATE4_DESIGNATED_CNS11643_6 4
! 56: #define STATE4_DESIGNATED_CNS11643_7 5
! 57:
! 58: #define SPLIT_STATE \
! 59: unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
! 60: #define COMBINE_STATE \
! 61: state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
! 62:
! 63: static int
! 64: iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
! 65: {
! 66: state_t state = conv->istate;
! 67: SPLIT_STATE;
! 68: int count = 0;
! 69: unsigned char c;
! 70: for (;;) {
! 71: c = *s;
! 72: if (c == ESC) {
! 73: if (n < count+4)
! 74: goto none;
! 75: if (s[1] == '$') {
! 76: if (s[2] == ')') {
! 77: if (s[3] == 'A') {
! 78: state2 = STATE2_DESIGNATED_GB2312;
! 79: s += 4; count += 4;
! 80: if (n < count+1)
! 81: goto none;
! 82: continue;
! 83: }
! 84: if (s[3] == 'G') {
! 85: state2 = STATE2_DESIGNATED_CNS11643_1;
! 86: s += 4; count += 4;
! 87: if (n < count+1)
! 88: goto none;
! 89: continue;
! 90: }
! 91: if (s[3] == 'E') {
! 92: state2 = STATE2_DESIGNATED_ISO_IR_165;
! 93: s += 4; count += 4;
! 94: if (n < count+1)
! 95: goto none;
! 96: continue;
! 97: }
! 98: }
! 99: if (s[2] == '*') {
! 100: if (s[3] == 'H') {
! 101: state3 = STATE3_DESIGNATED_CNS11643_2;
! 102: s += 4; count += 4;
! 103: if (n < count+1)
! 104: goto none;
! 105: continue;
! 106: }
! 107: }
! 108: if (s[2] == '+') {
! 109: if (s[3] == 'I') {
! 110: state4 = STATE4_DESIGNATED_CNS11643_3;
! 111: s += 4; count += 4;
! 112: if (n < count+1)
! 113: goto none;
! 114: continue;
! 115: }
! 116: if (s[3] == 'J') {
! 117: state4 = STATE4_DESIGNATED_CNS11643_4;
! 118: s += 4; count += 4;
! 119: if (n < count+1)
! 120: goto none;
! 121: continue;
! 122: }
! 123: if (s[3] == 'K') {
! 124: state4 = STATE4_DESIGNATED_CNS11643_5;
! 125: s += 4; count += 4;
! 126: if (n < count+1)
! 127: goto none;
! 128: continue;
! 129: }
! 130: if (s[3] == 'L') {
! 131: state4 = STATE4_DESIGNATED_CNS11643_6;
! 132: s += 4; count += 4;
! 133: if (n < count+1)
! 134: goto none;
! 135: continue;
! 136: }
! 137: if (s[3] == 'M') {
! 138: state4 = STATE4_DESIGNATED_CNS11643_7;
! 139: s += 4; count += 4;
! 140: if (n < count+1)
! 141: goto none;
! 142: continue;
! 143: }
! 144: }
! 145: }
! 146: if (s[1] == 'N') {
! 147: switch (state3) {
! 148: case STATE3_NONE:
! 149: goto ilseq;
! 150: case STATE3_DESIGNATED_CNS11643_2:
! 151: if (s[2] < 0x80 && s[3] < 0x80) {
! 152: int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
! 153: if (ret == RET_ILSEQ)
! 154: goto ilseq;
! 155: if (ret != 2) abort();
! 156: COMBINE_STATE;
! 157: conv->istate = state;
! 158: return count+4;
! 159: } else
! 160: goto ilseq;
! 161: default: abort();
! 162: }
! 163: }
! 164: if (s[1] == 'O') {
! 165: switch (state4) {
! 166: case STATE4_NONE:
! 167: goto ilseq;
! 168: case STATE4_DESIGNATED_CNS11643_3:
! 169: if (s[2] < 0x80 && s[3] < 0x80) {
! 170: int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
! 171: if (ret == RET_ILSEQ)
! 172: goto ilseq;
! 173: if (ret != 2) abort();
! 174: COMBINE_STATE;
! 175: conv->istate = state;
! 176: return count+4;
! 177: } else
! 178: goto ilseq;
! 179: case STATE4_DESIGNATED_CNS11643_4:
! 180: if (s[2] < 0x80 && s[3] < 0x80) {
! 181: int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
! 182: if (ret == RET_ILSEQ)
! 183: goto ilseq;
! 184: if (ret != 2) abort();
! 185: COMBINE_STATE;
! 186: conv->istate = state;
! 187: return count+4;
! 188: } else
! 189: goto ilseq;
! 190: case STATE4_DESIGNATED_CNS11643_5:
! 191: if (s[2] < 0x80 && s[3] < 0x80) {
! 192: int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
! 193: if (ret == RET_ILSEQ)
! 194: goto ilseq;
! 195: if (ret != 2) abort();
! 196: COMBINE_STATE;
! 197: conv->istate = state;
! 198: return count+4;
! 199: } else
! 200: goto ilseq;
! 201: case STATE4_DESIGNATED_CNS11643_6:
! 202: if (s[2] < 0x80 && s[3] < 0x80) {
! 203: int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
! 204: if (ret == RET_ILSEQ)
! 205: goto ilseq;
! 206: if (ret != 2) abort();
! 207: COMBINE_STATE;
! 208: conv->istate = state;
! 209: return count+4;
! 210: } else
! 211: goto ilseq;
! 212: case STATE4_DESIGNATED_CNS11643_7:
! 213: if (s[2] < 0x80 && s[3] < 0x80) {
! 214: int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
! 215: if (ret == RET_ILSEQ)
! 216: goto ilseq;
! 217: if (ret != 2) abort();
! 218: COMBINE_STATE;
! 219: conv->istate = state;
! 220: return count+4;
! 221: } else
! 222: goto ilseq;
! 223: default: abort();
! 224: }
! 225: }
! 226: goto ilseq;
! 227: }
! 228: if (c == SO) {
! 229: if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
! 230: goto ilseq;
! 231: state1 = STATE_TWOBYTE;
! 232: s++; count++;
! 233: if (n < count+1)
! 234: goto none;
! 235: continue;
! 236: }
! 237: if (c == SI) {
! 238: state1 = STATE_ASCII;
! 239: s++; count++;
! 240: if (n < count+1)
! 241: goto none;
! 242: continue;
! 243: }
! 244: break;
! 245: }
! 246: switch (state1) {
! 247: case STATE_ASCII:
! 248: if (c < 0x80) {
! 249: int ret = ascii_mbtowc(conv,pwc,s,1);
! 250: if (ret == RET_ILSEQ)
! 251: goto ilseq;
! 252: if (ret != 1) abort();
! 253: if (*pwc == 0x000a || *pwc == 0x000d) {
! 254: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
! 255: }
! 256: COMBINE_STATE;
! 257: conv->istate = state;
! 258: return count+1;
! 259: } else
! 260: goto ilseq;
! 261: case STATE_TWOBYTE:
! 262: if (n < count+2)
! 263: goto none;
! 264: if (s[0] < 0x80 && s[1] < 0x80) {
! 265: int ret;
! 266: switch (state2) {
! 267: case STATE2_NONE:
! 268: goto ilseq;
! 269: case STATE2_DESIGNATED_GB2312:
! 270: ret = gb2312_mbtowc(conv,pwc,s,2); break;
! 271: case STATE2_DESIGNATED_CNS11643_1:
! 272: ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
! 273: case STATE2_DESIGNATED_ISO_IR_165:
! 274: ret = isoir165_mbtowc(conv,pwc,s,2); break;
! 275: default: abort();
! 276: }
! 277: if (ret == RET_ILSEQ)
! 278: goto ilseq;
! 279: if (ret != 2) abort();
! 280: COMBINE_STATE;
! 281: conv->istate = state;
! 282: return count+2;
! 283: } else
! 284: goto ilseq;
! 285: default: abort();
! 286: }
! 287:
! 288: none:
! 289: COMBINE_STATE;
! 290: conv->istate = state;
! 291: return RET_TOOFEW(count);
! 292:
! 293: ilseq:
! 294: COMBINE_STATE;
! 295: conv->istate = state;
! 296: return RET_SHIFT_ILSEQ(count);
! 297: }
! 298:
! 299: static int
! 300: iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
! 301: {
! 302: state_t state = conv->ostate;
! 303: SPLIT_STATE;
! 304: unsigned char buf[3];
! 305: int ret;
! 306:
! 307: /* There is no need to handle Unicode 3.1 tag characters and to look for
! 308: "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
! 309:
! 310: /* Try ASCII. */
! 311: ret = ascii_wctomb(conv,buf,wc,1);
! 312: if (ret != RET_ILUNI) {
! 313: if (ret != 1) abort();
! 314: if (buf[0] < 0x80) {
! 315: int count = (state1 == STATE_ASCII ? 1 : 2);
! 316: if (n < count)
! 317: return RET_TOOSMALL;
! 318: if (state1 != STATE_ASCII) {
! 319: r[0] = SI;
! 320: r += 1;
! 321: state1 = STATE_ASCII;
! 322: }
! 323: r[0] = buf[0];
! 324: if (wc == 0x000a || wc == 0x000d) {
! 325: state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
! 326: }
! 327: COMBINE_STATE;
! 328: conv->ostate = state;
! 329: return count;
! 330: }
! 331: }
! 332:
! 333: /* Try GB 2312-1980. */
! 334: ret = gb2312_wctomb(conv,buf,wc,2);
! 335: if (ret != RET_ILUNI) {
! 336: if (ret != 2) abort();
! 337: if (buf[0] < 0x80 && buf[1] < 0x80) {
! 338: int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
! 339: if (n < count)
! 340: return RET_TOOSMALL;
! 341: if (state2 != STATE2_DESIGNATED_GB2312) {
! 342: r[0] = ESC;
! 343: r[1] = '$';
! 344: r[2] = ')';
! 345: r[3] = 'A';
! 346: r += 4;
! 347: state2 = STATE2_DESIGNATED_GB2312;
! 348: }
! 349: if (state1 != STATE_TWOBYTE) {
! 350: r[0] = SO;
! 351: r += 1;
! 352: state1 = STATE_TWOBYTE;
! 353: }
! 354: r[0] = buf[0];
! 355: r[1] = buf[1];
! 356: COMBINE_STATE;
! 357: conv->ostate = state;
! 358: return count;
! 359: }
! 360: }
! 361:
! 362: ret = cns11643_wctomb(conv,buf,wc,3);
! 363: if (ret != RET_ILUNI) {
! 364: if (ret != 3) abort();
! 365:
! 366: /* Try CNS 11643-1992 Plane 1. */
! 367: if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
! 368: int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
! 369: if (n < count)
! 370: return RET_TOOSMALL;
! 371: if (state2 != STATE2_DESIGNATED_CNS11643_1) {
! 372: r[0] = ESC;
! 373: r[1] = '$';
! 374: r[2] = ')';
! 375: r[3] = 'G';
! 376: r += 4;
! 377: state2 = STATE2_DESIGNATED_CNS11643_1;
! 378: }
! 379: if (state1 != STATE_TWOBYTE) {
! 380: r[0] = SO;
! 381: r += 1;
! 382: state1 = STATE_TWOBYTE;
! 383: }
! 384: r[0] = buf[1];
! 385: r[1] = buf[2];
! 386: COMBINE_STATE;
! 387: conv->ostate = state;
! 388: return count;
! 389: }
! 390:
! 391: /* Try CNS 11643-1992 Plane 2. */
! 392: if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
! 393: int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
! 394: if (n < count)
! 395: return RET_TOOSMALL;
! 396: if (state3 != STATE3_DESIGNATED_CNS11643_2) {
! 397: r[0] = ESC;
! 398: r[1] = '$';
! 399: r[2] = '*';
! 400: r[3] = 'H';
! 401: r += 4;
! 402: state3 = STATE3_DESIGNATED_CNS11643_2;
! 403: }
! 404: r[0] = ESC;
! 405: r[1] = 'N';
! 406: r[2] = buf[1];
! 407: r[3] = buf[2];
! 408: COMBINE_STATE;
! 409: conv->ostate = state;
! 410: return count;
! 411: }
! 412:
! 413: /* Try CNS 11643-1992 Plane 3. */
! 414: if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
! 415: int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
! 416: if (n < count)
! 417: return RET_TOOSMALL;
! 418: if (state4 != STATE4_DESIGNATED_CNS11643_3) {
! 419: r[0] = ESC;
! 420: r[1] = '$';
! 421: r[2] = '+';
! 422: r[3] = 'I';
! 423: r += 4;
! 424: state4 = STATE4_DESIGNATED_CNS11643_3;
! 425: }
! 426: r[0] = ESC;
! 427: r[1] = 'O';
! 428: r[2] = buf[1];
! 429: r[3] = buf[2];
! 430: COMBINE_STATE;
! 431: conv->ostate = state;
! 432: return count;
! 433: }
! 434:
! 435: /* Try CNS 11643-1992 Plane 4. */
! 436: if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
! 437: int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
! 438: if (n < count)
! 439: return RET_TOOSMALL;
! 440: if (state4 != STATE4_DESIGNATED_CNS11643_4) {
! 441: r[0] = ESC;
! 442: r[1] = '$';
! 443: r[2] = '+';
! 444: r[3] = 'J';
! 445: r += 4;
! 446: state4 = STATE4_DESIGNATED_CNS11643_4;
! 447: }
! 448: r[0] = ESC;
! 449: r[1] = 'O';
! 450: r[2] = buf[1];
! 451: r[3] = buf[2];
! 452: COMBINE_STATE;
! 453: conv->ostate = state;
! 454: return count;
! 455: }
! 456:
! 457: /* Try CNS 11643-1992 Plane 5. */
! 458: if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
! 459: int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
! 460: if (n < count)
! 461: return RET_TOOSMALL;
! 462: if (state4 != STATE4_DESIGNATED_CNS11643_5) {
! 463: r[0] = ESC;
! 464: r[1] = '$';
! 465: r[2] = '+';
! 466: r[3] = 'K';
! 467: r += 4;
! 468: state4 = STATE4_DESIGNATED_CNS11643_5;
! 469: }
! 470: r[0] = ESC;
! 471: r[1] = 'O';
! 472: r[2] = buf[1];
! 473: r[3] = buf[2];
! 474: COMBINE_STATE;
! 475: conv->ostate = state;
! 476: return count;
! 477: }
! 478:
! 479: /* Try CNS 11643-1992 Plane 6. */
! 480: if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
! 481: int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
! 482: if (n < count)
! 483: return RET_TOOSMALL;
! 484: if (state4 != STATE4_DESIGNATED_CNS11643_6) {
! 485: r[0] = ESC;
! 486: r[1] = '$';
! 487: r[2] = '+';
! 488: r[3] = 'L';
! 489: r += 4;
! 490: state4 = STATE4_DESIGNATED_CNS11643_6;
! 491: }
! 492: r[0] = ESC;
! 493: r[1] = 'O';
! 494: r[2] = buf[1];
! 495: r[3] = buf[2];
! 496: COMBINE_STATE;
! 497: conv->ostate = state;
! 498: return count;
! 499: }
! 500:
! 501: /* Try CNS 11643-1992 Plane 7. */
! 502: if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
! 503: int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
! 504: if (n < count)
! 505: return RET_TOOSMALL;
! 506: if (state4 != STATE4_DESIGNATED_CNS11643_7) {
! 507: r[0] = ESC;
! 508: r[1] = '$';
! 509: r[2] = '+';
! 510: r[3] = 'M';
! 511: r += 4;
! 512: state4 = STATE4_DESIGNATED_CNS11643_7;
! 513: }
! 514: r[0] = ESC;
! 515: r[1] = 'O';
! 516: r[2] = buf[1];
! 517: r[3] = buf[2];
! 518: COMBINE_STATE;
! 519: conv->ostate = state;
! 520: return count;
! 521: }
! 522:
! 523: }
! 524:
! 525: /* Try ISO-IR-165. */
! 526: ret = isoir165_wctomb(conv,buf,wc,2);
! 527: if (ret != RET_ILUNI) {
! 528: if (ret != 2) abort();
! 529: if (buf[0] < 0x80 && buf[1] < 0x80) {
! 530: int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
! 531: if (n < count)
! 532: return RET_TOOSMALL;
! 533: if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
! 534: r[0] = ESC;
! 535: r[1] = '$';
! 536: r[2] = ')';
! 537: r[3] = 'E';
! 538: r += 4;
! 539: state2 = STATE2_DESIGNATED_ISO_IR_165;
! 540: }
! 541: if (state1 != STATE_TWOBYTE) {
! 542: r[0] = SO;
! 543: r += 1;
! 544: state1 = STATE_TWOBYTE;
! 545: }
! 546: r[0] = buf[0];
! 547: r[1] = buf[1];
! 548: COMBINE_STATE;
! 549: conv->ostate = state;
! 550: return count;
! 551: }
! 552: }
! 553:
! 554: return RET_ILUNI;
! 555: }
! 556:
! 557: static int
! 558: iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
! 559: {
! 560: state_t state = conv->ostate;
! 561: SPLIT_STATE;
! 562: (void)state2;
! 563: (void)state3;
! 564: (void)state4;
! 565: if (state1 != STATE_ASCII) {
! 566: if (n < 1)
! 567: return RET_TOOSMALL;
! 568: r[0] = SI;
! 569: /* conv->ostate = 0; will be done by the caller */
! 570: return 1;
! 571: } else
! 572: return 0;
! 573: }
! 574:
! 575: #undef COMBINE_STATE
! 576: #undef SPLIT_STATE
! 577: #undef STATE4_DESIGNATED_CNS11643_7
! 578: #undef STATE4_DESIGNATED_CNS11643_6
! 579: #undef STATE4_DESIGNATED_CNS11643_5
! 580: #undef STATE4_DESIGNATED_CNS11643_4
! 581: #undef STATE4_DESIGNATED_CNS11643_3
! 582: #undef STATE4_NONE
! 583: #undef STATE3_DESIGNATED_CNS11643_2
! 584: #undef STATE3_NONE
! 585: #undef STATE2_DESIGNATED_ISO_IR_165
! 586: #undef STATE2_DESIGNATED_CNS11643_1
! 587: #undef STATE2_DESIGNATED_GB2312
! 588: #undef STATE2_NONE
! 589: #undef STATE_TWOBYTE
! 590: #undef STATE_ASCII
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>