Annotation of embedaddon/libiconv/lib/loop_unicode.h, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
! 3: * This file is part of the GNU LIBICONV Library.
! 4: *
! 5: * The GNU LIBICONV Library is free software; you can redistribute it
! 6: * and/or modify it under the terms of the GNU Library General Public
! 7: * License as published by the Free Software Foundation; either version 2
! 8: * of the License, or (at your option) any later version.
! 9: *
! 10: * The GNU LIBICONV Library is distributed in the hope that it will be
! 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 13: * Library General Public License for more details.
! 14: *
! 15: * You should have received a copy of the GNU Library General Public
! 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 18: * Fifth Floor, Boston, MA 02110-1301, USA.
! 19: */
! 20:
! 21: /* This file defines the conversion loop via Unicode as a pivot encoding. */
! 22:
! 23: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
! 24: static int unicode_transliterate (conv_t cd, ucs4_t wc,
! 25: unsigned char* outptr, size_t outleft)
! 26: {
! 27: if (cd->oflags & HAVE_HANGUL_JAMO) {
! 28: /* Decompose Hangul into Jamo. Use double-width Jamo (contained
! 29: in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
! 30: (contained in Unicode only). */
! 31: ucs4_t buf[3];
! 32: int ret = johab_hangul_decompose(cd,buf,wc);
! 33: if (ret != RET_ILUNI) {
! 34: /* we know 1 <= ret <= 3 */
! 35: state_t backup_state = cd->ostate;
! 36: unsigned char* backup_outptr = outptr;
! 37: size_t backup_outleft = outleft;
! 38: int i, sub_outcount;
! 39: for (i = 0; i < ret; i++) {
! 40: if (outleft == 0) {
! 41: sub_outcount = RET_TOOSMALL;
! 42: goto johab_hangul_failed;
! 43: }
! 44: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
! 45: if (sub_outcount <= RET_ILUNI)
! 46: goto johab_hangul_failed;
! 47: if (!(sub_outcount <= outleft)) abort();
! 48: outptr += sub_outcount; outleft -= sub_outcount;
! 49: }
! 50: return outptr-backup_outptr;
! 51: johab_hangul_failed:
! 52: cd->ostate = backup_state;
! 53: outptr = backup_outptr;
! 54: outleft = backup_outleft;
! 55: if (sub_outcount != RET_ILUNI)
! 56: return RET_TOOSMALL;
! 57: }
! 58: }
! 59: {
! 60: /* Try to use a variant, but postfix it with
! 61: U+303E IDEOGRAPHIC VARIATION INDICATOR
! 62: (cf. Ken Lunde's "CJKV information processing", p. 188). */
! 63: int indx = -1;
! 64: if (wc == 0x3006)
! 65: indx = 0;
! 66: else if (wc == 0x30f6)
! 67: indx = 1;
! 68: else if (wc >= 0x4e00 && wc < 0xa000)
! 69: indx = cjk_variants_indx[wc-0x4e00];
! 70: if (indx >= 0) {
! 71: for (;; indx++) {
! 72: ucs4_t buf[2];
! 73: unsigned short variant = cjk_variants[indx];
! 74: unsigned short last = variant & 0x8000;
! 75: variant &= 0x7fff;
! 76: variant += 0x3000;
! 77: buf[0] = variant; buf[1] = 0x303e;
! 78: {
! 79: state_t backup_state = cd->ostate;
! 80: unsigned char* backup_outptr = outptr;
! 81: size_t backup_outleft = outleft;
! 82: int i, sub_outcount;
! 83: for (i = 0; i < 2; i++) {
! 84: if (outleft == 0) {
! 85: sub_outcount = RET_TOOSMALL;
! 86: goto variant_failed;
! 87: }
! 88: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
! 89: if (sub_outcount <= RET_ILUNI)
! 90: goto variant_failed;
! 91: if (!(sub_outcount <= outleft)) abort();
! 92: outptr += sub_outcount; outleft -= sub_outcount;
! 93: }
! 94: return outptr-backup_outptr;
! 95: variant_failed:
! 96: cd->ostate = backup_state;
! 97: outptr = backup_outptr;
! 98: outleft = backup_outleft;
! 99: if (sub_outcount != RET_ILUNI)
! 100: return RET_TOOSMALL;
! 101: }
! 102: if (last)
! 103: break;
! 104: }
! 105: }
! 106: }
! 107: if (wc >= 0x2018 && wc <= 0x201a) {
! 108: /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
! 109: ucs4_t substitute =
! 110: (cd->oflags & HAVE_QUOTATION_MARKS
! 111: ? (wc == 0x201a ? 0x2018 : wc)
! 112: : (cd->oflags & HAVE_ACCENTS
! 113: ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
! 114: : 0x0027 /* use apostrophe */
! 115: ) );
! 116: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
! 117: if (outcount != RET_ILUNI)
! 118: return outcount;
! 119: }
! 120: {
! 121: /* Use the transliteration table. */
! 122: int indx = translit_index(wc);
! 123: if (indx >= 0) {
! 124: const unsigned int * cp = &translit_data[indx];
! 125: unsigned int num = *cp++;
! 126: state_t backup_state = cd->ostate;
! 127: unsigned char* backup_outptr = outptr;
! 128: size_t backup_outleft = outleft;
! 129: unsigned int i;
! 130: int sub_outcount;
! 131: for (i = 0; i < num; i++) {
! 132: if (outleft == 0) {
! 133: sub_outcount = RET_TOOSMALL;
! 134: goto translit_failed;
! 135: }
! 136: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
! 137: if (sub_outcount == RET_ILUNI)
! 138: /* Recursive transliteration. */
! 139: sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
! 140: if (sub_outcount <= RET_ILUNI)
! 141: goto translit_failed;
! 142: if (!(sub_outcount <= outleft)) abort();
! 143: outptr += sub_outcount; outleft -= sub_outcount;
! 144: }
! 145: return outptr-backup_outptr;
! 146: translit_failed:
! 147: cd->ostate = backup_state;
! 148: outptr = backup_outptr;
! 149: outleft = backup_outleft;
! 150: if (sub_outcount != RET_ILUNI)
! 151: return RET_TOOSMALL;
! 152: }
! 153: }
! 154: return RET_ILUNI;
! 155: }
! 156:
! 157: #ifndef LIBICONV_PLUG
! 158:
! 159: struct uc_to_mb_fallback_locals {
! 160: unsigned char* l_outbuf;
! 161: size_t l_outbytesleft;
! 162: int l_errno;
! 163: };
! 164:
! 165: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
! 166: void* callback_arg)
! 167: {
! 168: struct uc_to_mb_fallback_locals * plocals =
! 169: (struct uc_to_mb_fallback_locals *) callback_arg;
! 170: /* Do nothing if already encountered an error in a previous call. */
! 171: if (plocals->l_errno == 0) {
! 172: /* Attempt to copy the passed buffer to the output buffer. */
! 173: if (plocals->l_outbytesleft < buflen)
! 174: plocals->l_errno = E2BIG;
! 175: else {
! 176: memcpy(plocals->l_outbuf, buf, buflen);
! 177: plocals->l_outbuf += buflen;
! 178: plocals->l_outbytesleft -= buflen;
! 179: }
! 180: }
! 181: }
! 182:
! 183: struct mb_to_uc_fallback_locals {
! 184: conv_t l_cd;
! 185: unsigned char* l_outbuf;
! 186: size_t l_outbytesleft;
! 187: int l_errno;
! 188: };
! 189:
! 190: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
! 191: void* callback_arg)
! 192: {
! 193: struct mb_to_uc_fallback_locals * plocals =
! 194: (struct mb_to_uc_fallback_locals *) callback_arg;
! 195: /* Do nothing if already encountered an error in a previous call. */
! 196: if (plocals->l_errno == 0) {
! 197: /* Attempt to convert the passed buffer to the target encoding. */
! 198: conv_t cd = plocals->l_cd;
! 199: unsigned char* outptr = plocals->l_outbuf;
! 200: size_t outleft = plocals->l_outbytesleft;
! 201: for (; buflen > 0; buf++, buflen--) {
! 202: ucs4_t wc = *buf;
! 203: int outcount;
! 204: if (outleft == 0) {
! 205: plocals->l_errno = E2BIG;
! 206: break;
! 207: }
! 208: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
! 209: if (outcount != RET_ILUNI)
! 210: goto outcount_ok;
! 211: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
! 212: if ((wc >> 7) == (0xe0000 >> 7))
! 213: goto outcount_zero;
! 214: /* Try transliteration. */
! 215: if (cd->transliterate) {
! 216: outcount = unicode_transliterate(cd,wc,outptr,outleft);
! 217: if (outcount != RET_ILUNI)
! 218: goto outcount_ok;
! 219: }
! 220: if (cd->discard_ilseq) {
! 221: outcount = 0;
! 222: goto outcount_ok;
! 223: }
! 224: #ifndef LIBICONV_PLUG
! 225: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
! 226: struct uc_to_mb_fallback_locals locals;
! 227: locals.l_outbuf = outptr;
! 228: locals.l_outbytesleft = outleft;
! 229: locals.l_errno = 0;
! 230: cd->fallbacks.uc_to_mb_fallback(wc,
! 231: uc_to_mb_write_replacement,
! 232: &locals,
! 233: cd->fallbacks.data);
! 234: if (locals.l_errno != 0) {
! 235: plocals->l_errno = locals.l_errno;
! 236: break;
! 237: }
! 238: outptr = locals.l_outbuf;
! 239: outleft = locals.l_outbytesleft;
! 240: outcount = 0;
! 241: goto outcount_ok;
! 242: }
! 243: #endif
! 244: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
! 245: if (outcount != RET_ILUNI)
! 246: goto outcount_ok;
! 247: plocals->l_errno = EILSEQ;
! 248: break;
! 249: outcount_ok:
! 250: if (outcount < 0) {
! 251: plocals->l_errno = E2BIG;
! 252: break;
! 253: }
! 254: #ifndef LIBICONV_PLUG
! 255: if (cd->hooks.uc_hook)
! 256: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
! 257: #endif
! 258: if (!(outcount <= outleft)) abort();
! 259: outptr += outcount; outleft -= outcount;
! 260: outcount_zero: ;
! 261: }
! 262: plocals->l_outbuf = outptr;
! 263: plocals->l_outbytesleft = outleft;
! 264: }
! 265: }
! 266:
! 267: #endif /* !LIBICONV_PLUG */
! 268:
! 269: static size_t unicode_loop_convert (iconv_t icd,
! 270: const char* * inbuf, size_t *inbytesleft,
! 271: char* * outbuf, size_t *outbytesleft)
! 272: {
! 273: conv_t cd = (conv_t) icd;
! 274: size_t result = 0;
! 275: const unsigned char* inptr = (const unsigned char*) *inbuf;
! 276: size_t inleft = *inbytesleft;
! 277: unsigned char* outptr = (unsigned char*) *outbuf;
! 278: size_t outleft = *outbytesleft;
! 279: while (inleft > 0) {
! 280: state_t last_istate = cd->istate;
! 281: ucs4_t wc;
! 282: int incount;
! 283: int outcount;
! 284: incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
! 285: if (incount < 0) {
! 286: if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
! 287: /* Case 1: invalid input, possibly after a shift sequence */
! 288: incount = DECODE_SHIFT_ILSEQ(incount);
! 289: if (cd->discard_ilseq) {
! 290: switch (cd->iindex) {
! 291: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
! 292: case ei_utf32: case ei_utf32be: case ei_utf32le:
! 293: case ei_ucs4internal: case ei_ucs4swapped:
! 294: incount += 4; break;
! 295: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
! 296: case ei_utf16: case ei_utf16be: case ei_utf16le:
! 297: case ei_ucs2internal: case ei_ucs2swapped:
! 298: incount += 2; break;
! 299: default:
! 300: incount += 1; break;
! 301: }
! 302: goto outcount_zero;
! 303: }
! 304: #ifndef LIBICONV_PLUG
! 305: else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
! 306: unsigned int incount2;
! 307: struct mb_to_uc_fallback_locals locals;
! 308: switch (cd->iindex) {
! 309: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
! 310: case ei_utf32: case ei_utf32be: case ei_utf32le:
! 311: case ei_ucs4internal: case ei_ucs4swapped:
! 312: incount2 = 4; break;
! 313: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
! 314: case ei_utf16: case ei_utf16be: case ei_utf16le:
! 315: case ei_ucs2internal: case ei_ucs2swapped:
! 316: incount2 = 2; break;
! 317: default:
! 318: incount2 = 1; break;
! 319: }
! 320: locals.l_cd = cd;
! 321: locals.l_outbuf = outptr;
! 322: locals.l_outbytesleft = outleft;
! 323: locals.l_errno = 0;
! 324: cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
! 325: mb_to_uc_write_replacement,
! 326: &locals,
! 327: cd->fallbacks.data);
! 328: if (locals.l_errno != 0) {
! 329: inptr += incount; inleft -= incount;
! 330: errno = locals.l_errno;
! 331: result = -1;
! 332: break;
! 333: }
! 334: incount += incount2;
! 335: outptr = locals.l_outbuf;
! 336: outleft = locals.l_outbytesleft;
! 337: result += 1;
! 338: goto outcount_zero;
! 339: }
! 340: #endif
! 341: inptr += incount; inleft -= incount;
! 342: errno = EILSEQ;
! 343: result = -1;
! 344: break;
! 345: }
! 346: if (incount == RET_TOOFEW(0)) {
! 347: /* Case 2: not enough bytes available to detect anything */
! 348: errno = EINVAL;
! 349: result = -1;
! 350: break;
! 351: }
! 352: /* Case 3: k bytes read, but only a shift sequence */
! 353: incount = DECODE_TOOFEW(incount);
! 354: } else {
! 355: /* Case 4: k bytes read, making up a wide character */
! 356: if (outleft == 0) {
! 357: cd->istate = last_istate;
! 358: errno = E2BIG;
! 359: result = -1;
! 360: break;
! 361: }
! 362: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
! 363: if (outcount != RET_ILUNI)
! 364: goto outcount_ok;
! 365: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
! 366: if ((wc >> 7) == (0xe0000 >> 7))
! 367: goto outcount_zero;
! 368: /* Try transliteration. */
! 369: result++;
! 370: if (cd->transliterate) {
! 371: outcount = unicode_transliterate(cd,wc,outptr,outleft);
! 372: if (outcount != RET_ILUNI)
! 373: goto outcount_ok;
! 374: }
! 375: if (cd->discard_ilseq) {
! 376: outcount = 0;
! 377: goto outcount_ok;
! 378: }
! 379: #ifndef LIBICONV_PLUG
! 380: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
! 381: struct uc_to_mb_fallback_locals locals;
! 382: locals.l_outbuf = outptr;
! 383: locals.l_outbytesleft = outleft;
! 384: locals.l_errno = 0;
! 385: cd->fallbacks.uc_to_mb_fallback(wc,
! 386: uc_to_mb_write_replacement,
! 387: &locals,
! 388: cd->fallbacks.data);
! 389: if (locals.l_errno != 0) {
! 390: cd->istate = last_istate;
! 391: errno = locals.l_errno;
! 392: return -1;
! 393: }
! 394: outptr = locals.l_outbuf;
! 395: outleft = locals.l_outbytesleft;
! 396: outcount = 0;
! 397: goto outcount_ok;
! 398: }
! 399: #endif
! 400: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
! 401: if (outcount != RET_ILUNI)
! 402: goto outcount_ok;
! 403: cd->istate = last_istate;
! 404: errno = EILSEQ;
! 405: result = -1;
! 406: break;
! 407: outcount_ok:
! 408: if (outcount < 0) {
! 409: cd->istate = last_istate;
! 410: errno = E2BIG;
! 411: result = -1;
! 412: break;
! 413: }
! 414: #ifndef LIBICONV_PLUG
! 415: if (cd->hooks.uc_hook)
! 416: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
! 417: #endif
! 418: if (!(outcount <= outleft)) abort();
! 419: outptr += outcount; outleft -= outcount;
! 420: }
! 421: outcount_zero:
! 422: if (!(incount <= inleft)) abort();
! 423: inptr += incount; inleft -= incount;
! 424: }
! 425: *inbuf = (const char*) inptr;
! 426: *inbytesleft = inleft;
! 427: *outbuf = (char*) outptr;
! 428: *outbytesleft = outleft;
! 429: return result;
! 430: }
! 431:
! 432: static size_t unicode_loop_reset (iconv_t icd,
! 433: char* * outbuf, size_t *outbytesleft)
! 434: {
! 435: conv_t cd = (conv_t) icd;
! 436: if (outbuf == NULL || *outbuf == NULL) {
! 437: /* Reset the states. */
! 438: memset(&cd->istate,'\0',sizeof(state_t));
! 439: memset(&cd->ostate,'\0',sizeof(state_t));
! 440: return 0;
! 441: } else {
! 442: size_t result = 0;
! 443: if (cd->ifuncs.xxx_flushwc) {
! 444: state_t last_istate = cd->istate;
! 445: ucs4_t wc;
! 446: if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
! 447: unsigned char* outptr = (unsigned char*) *outbuf;
! 448: size_t outleft = *outbytesleft;
! 449: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
! 450: if (outcount != RET_ILUNI)
! 451: goto outcount_ok;
! 452: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
! 453: if ((wc >> 7) == (0xe0000 >> 7))
! 454: goto outcount_zero;
! 455: /* Try transliteration. */
! 456: result++;
! 457: if (cd->transliterate) {
! 458: outcount = unicode_transliterate(cd,wc,outptr,outleft);
! 459: if (outcount != RET_ILUNI)
! 460: goto outcount_ok;
! 461: }
! 462: if (cd->discard_ilseq) {
! 463: outcount = 0;
! 464: goto outcount_ok;
! 465: }
! 466: #ifndef LIBICONV_PLUG
! 467: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
! 468: struct uc_to_mb_fallback_locals locals;
! 469: locals.l_outbuf = outptr;
! 470: locals.l_outbytesleft = outleft;
! 471: locals.l_errno = 0;
! 472: cd->fallbacks.uc_to_mb_fallback(wc,
! 473: uc_to_mb_write_replacement,
! 474: &locals,
! 475: cd->fallbacks.data);
! 476: if (locals.l_errno != 0) {
! 477: cd->istate = last_istate;
! 478: errno = locals.l_errno;
! 479: return -1;
! 480: }
! 481: outptr = locals.l_outbuf;
! 482: outleft = locals.l_outbytesleft;
! 483: outcount = 0;
! 484: goto outcount_ok;
! 485: }
! 486: #endif
! 487: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
! 488: if (outcount != RET_ILUNI)
! 489: goto outcount_ok;
! 490: cd->istate = last_istate;
! 491: errno = EILSEQ;
! 492: return -1;
! 493: outcount_ok:
! 494: if (outcount < 0) {
! 495: cd->istate = last_istate;
! 496: errno = E2BIG;
! 497: return -1;
! 498: }
! 499: #ifndef LIBICONV_PLUG
! 500: if (cd->hooks.uc_hook)
! 501: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
! 502: #endif
! 503: if (!(outcount <= outleft)) abort();
! 504: outptr += outcount;
! 505: outleft -= outcount;
! 506: outcount_zero:
! 507: *outbuf = (char*) outptr;
! 508: *outbytesleft = outleft;
! 509: }
! 510: }
! 511: if (cd->ofuncs.xxx_reset) {
! 512: unsigned char* outptr = (unsigned char*) *outbuf;
! 513: size_t outleft = *outbytesleft;
! 514: int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
! 515: if (outcount < 0) {
! 516: errno = E2BIG;
! 517: return -1;
! 518: }
! 519: if (!(outcount <= outleft)) abort();
! 520: *outbuf = (char*) (outptr + outcount);
! 521: *outbytesleft = outleft - outcount;
! 522: }
! 523: memset(&cd->istate,'\0',sizeof(state_t));
! 524: memset(&cd->ostate,'\0',sizeof(state_t));
! 525: return result;
! 526: }
! 527: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>