Annotation of embedaddon/libiconv/lib/loop_unicode.h, revision 1.1.1.2
1.1 misho 1: /*
2: * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.2 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /* This file defines the conversion loop via Unicode as a pivot encoding. */
21:
22: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
23: static int unicode_transliterate (conv_t cd, ucs4_t wc,
24: unsigned char* outptr, size_t outleft)
25: {
26: if (cd->oflags & HAVE_HANGUL_JAMO) {
27: /* Decompose Hangul into Jamo. Use double-width Jamo (contained
28: in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
29: (contained in Unicode only). */
30: ucs4_t buf[3];
31: int ret = johab_hangul_decompose(cd,buf,wc);
32: if (ret != RET_ILUNI) {
33: /* we know 1 <= ret <= 3 */
34: state_t backup_state = cd->ostate;
35: unsigned char* backup_outptr = outptr;
36: size_t backup_outleft = outleft;
37: int i, sub_outcount;
38: for (i = 0; i < ret; i++) {
39: if (outleft == 0) {
40: sub_outcount = RET_TOOSMALL;
41: goto johab_hangul_failed;
42: }
43: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
44: if (sub_outcount <= RET_ILUNI)
45: goto johab_hangul_failed;
46: if (!(sub_outcount <= outleft)) abort();
47: outptr += sub_outcount; outleft -= sub_outcount;
48: }
49: return outptr-backup_outptr;
50: johab_hangul_failed:
51: cd->ostate = backup_state;
52: outptr = backup_outptr;
53: outleft = backup_outleft;
54: if (sub_outcount != RET_ILUNI)
55: return RET_TOOSMALL;
56: }
57: }
58: {
59: /* Try to use a variant, but postfix it with
60: U+303E IDEOGRAPHIC VARIATION INDICATOR
61: (cf. Ken Lunde's "CJKV information processing", p. 188). */
62: int indx = -1;
63: if (wc == 0x3006)
64: indx = 0;
65: else if (wc == 0x30f6)
66: indx = 1;
67: else if (wc >= 0x4e00 && wc < 0xa000)
68: indx = cjk_variants_indx[wc-0x4e00];
69: if (indx >= 0) {
70: for (;; indx++) {
71: ucs4_t buf[2];
72: unsigned short variant = cjk_variants[indx];
73: unsigned short last = variant & 0x8000;
74: variant &= 0x7fff;
75: variant += 0x3000;
76: buf[0] = variant; buf[1] = 0x303e;
77: {
78: state_t backup_state = cd->ostate;
79: unsigned char* backup_outptr = outptr;
80: size_t backup_outleft = outleft;
81: int i, sub_outcount;
82: for (i = 0; i < 2; i++) {
83: if (outleft == 0) {
84: sub_outcount = RET_TOOSMALL;
85: goto variant_failed;
86: }
87: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
88: if (sub_outcount <= RET_ILUNI)
89: goto variant_failed;
90: if (!(sub_outcount <= outleft)) abort();
91: outptr += sub_outcount; outleft -= sub_outcount;
92: }
93: return outptr-backup_outptr;
94: variant_failed:
95: cd->ostate = backup_state;
96: outptr = backup_outptr;
97: outleft = backup_outleft;
98: if (sub_outcount != RET_ILUNI)
99: return RET_TOOSMALL;
100: }
101: if (last)
102: break;
103: }
104: }
105: }
106: if (wc >= 0x2018 && wc <= 0x201a) {
107: /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
108: ucs4_t substitute =
109: (cd->oflags & HAVE_QUOTATION_MARKS
110: ? (wc == 0x201a ? 0x2018 : wc)
111: : (cd->oflags & HAVE_ACCENTS
112: ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
113: : 0x0027 /* use apostrophe */
114: ) );
115: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
116: if (outcount != RET_ILUNI)
117: return outcount;
118: }
119: {
120: /* Use the transliteration table. */
121: int indx = translit_index(wc);
122: if (indx >= 0) {
123: const unsigned int * cp = &translit_data[indx];
124: unsigned int num = *cp++;
125: state_t backup_state = cd->ostate;
126: unsigned char* backup_outptr = outptr;
127: size_t backup_outleft = outleft;
128: unsigned int i;
129: int sub_outcount;
130: for (i = 0; i < num; i++) {
131: if (outleft == 0) {
132: sub_outcount = RET_TOOSMALL;
133: goto translit_failed;
134: }
135: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
136: if (sub_outcount == RET_ILUNI)
137: /* Recursive transliteration. */
138: sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
139: if (sub_outcount <= RET_ILUNI)
140: goto translit_failed;
141: if (!(sub_outcount <= outleft)) abort();
142: outptr += sub_outcount; outleft -= sub_outcount;
143: }
144: return outptr-backup_outptr;
145: translit_failed:
146: cd->ostate = backup_state;
147: outptr = backup_outptr;
148: outleft = backup_outleft;
149: if (sub_outcount != RET_ILUNI)
150: return RET_TOOSMALL;
151: }
152: }
153: return RET_ILUNI;
154: }
155:
156: #ifndef LIBICONV_PLUG
157:
158: struct uc_to_mb_fallback_locals {
159: unsigned char* l_outbuf;
160: size_t l_outbytesleft;
161: int l_errno;
162: };
163:
164: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
165: void* callback_arg)
166: {
167: struct uc_to_mb_fallback_locals * plocals =
168: (struct uc_to_mb_fallback_locals *) callback_arg;
169: /* Do nothing if already encountered an error in a previous call. */
170: if (plocals->l_errno == 0) {
171: /* Attempt to copy the passed buffer to the output buffer. */
172: if (plocals->l_outbytesleft < buflen)
173: plocals->l_errno = E2BIG;
174: else {
175: memcpy(plocals->l_outbuf, buf, buflen);
176: plocals->l_outbuf += buflen;
177: plocals->l_outbytesleft -= buflen;
178: }
179: }
180: }
181:
182: struct mb_to_uc_fallback_locals {
183: conv_t l_cd;
184: unsigned char* l_outbuf;
185: size_t l_outbytesleft;
186: int l_errno;
187: };
188:
189: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
190: void* callback_arg)
191: {
192: struct mb_to_uc_fallback_locals * plocals =
193: (struct mb_to_uc_fallback_locals *) callback_arg;
194: /* Do nothing if already encountered an error in a previous call. */
195: if (plocals->l_errno == 0) {
196: /* Attempt to convert the passed buffer to the target encoding. */
197: conv_t cd = plocals->l_cd;
198: unsigned char* outptr = plocals->l_outbuf;
199: size_t outleft = plocals->l_outbytesleft;
200: for (; buflen > 0; buf++, buflen--) {
201: ucs4_t wc = *buf;
202: int outcount;
203: if (outleft == 0) {
204: plocals->l_errno = E2BIG;
205: break;
206: }
207: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
208: if (outcount != RET_ILUNI)
209: goto outcount_ok;
210: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
211: if ((wc >> 7) == (0xe0000 >> 7))
212: goto outcount_zero;
213: /* Try transliteration. */
214: if (cd->transliterate) {
215: outcount = unicode_transliterate(cd,wc,outptr,outleft);
216: if (outcount != RET_ILUNI)
217: goto outcount_ok;
218: }
219: if (cd->discard_ilseq) {
220: outcount = 0;
221: goto outcount_ok;
222: }
223: #ifndef LIBICONV_PLUG
224: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
225: struct uc_to_mb_fallback_locals locals;
226: locals.l_outbuf = outptr;
227: locals.l_outbytesleft = outleft;
228: locals.l_errno = 0;
229: cd->fallbacks.uc_to_mb_fallback(wc,
230: uc_to_mb_write_replacement,
231: &locals,
232: cd->fallbacks.data);
233: if (locals.l_errno != 0) {
234: plocals->l_errno = locals.l_errno;
235: break;
236: }
237: outptr = locals.l_outbuf;
238: outleft = locals.l_outbytesleft;
239: outcount = 0;
240: goto outcount_ok;
241: }
242: #endif
243: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
244: if (outcount != RET_ILUNI)
245: goto outcount_ok;
246: plocals->l_errno = EILSEQ;
247: break;
248: outcount_ok:
249: if (outcount < 0) {
250: plocals->l_errno = E2BIG;
251: break;
252: }
253: #ifndef LIBICONV_PLUG
254: if (cd->hooks.uc_hook)
255: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
256: #endif
257: if (!(outcount <= outleft)) abort();
258: outptr += outcount; outleft -= outcount;
259: outcount_zero: ;
260: }
261: plocals->l_outbuf = outptr;
262: plocals->l_outbytesleft = outleft;
263: }
264: }
265:
266: #endif /* !LIBICONV_PLUG */
267:
268: static size_t unicode_loop_convert (iconv_t icd,
269: const char* * inbuf, size_t *inbytesleft,
270: char* * outbuf, size_t *outbytesleft)
271: {
272: conv_t cd = (conv_t) icd;
273: size_t result = 0;
274: const unsigned char* inptr = (const unsigned char*) *inbuf;
275: size_t inleft = *inbytesleft;
276: unsigned char* outptr = (unsigned char*) *outbuf;
277: size_t outleft = *outbytesleft;
278: while (inleft > 0) {
279: state_t last_istate = cd->istate;
280: ucs4_t wc;
281: int incount;
282: int outcount;
283: incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
284: if (incount < 0) {
285: if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
286: /* Case 1: invalid input, possibly after a shift sequence */
287: incount = DECODE_SHIFT_ILSEQ(incount);
288: if (cd->discard_ilseq) {
289: switch (cd->iindex) {
290: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
291: case ei_utf32: case ei_utf32be: case ei_utf32le:
292: case ei_ucs4internal: case ei_ucs4swapped:
293: incount += 4; break;
294: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
295: case ei_utf16: case ei_utf16be: case ei_utf16le:
296: case ei_ucs2internal: case ei_ucs2swapped:
297: incount += 2; break;
298: default:
299: incount += 1; break;
300: }
301: goto outcount_zero;
302: }
303: #ifndef LIBICONV_PLUG
304: else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
305: unsigned int incount2;
306: struct mb_to_uc_fallback_locals locals;
307: switch (cd->iindex) {
308: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
309: case ei_utf32: case ei_utf32be: case ei_utf32le:
310: case ei_ucs4internal: case ei_ucs4swapped:
311: incount2 = 4; break;
312: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
313: case ei_utf16: case ei_utf16be: case ei_utf16le:
314: case ei_ucs2internal: case ei_ucs2swapped:
315: incount2 = 2; break;
316: default:
317: incount2 = 1; break;
318: }
319: locals.l_cd = cd;
320: locals.l_outbuf = outptr;
321: locals.l_outbytesleft = outleft;
322: locals.l_errno = 0;
323: cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
324: mb_to_uc_write_replacement,
325: &locals,
326: cd->fallbacks.data);
327: if (locals.l_errno != 0) {
328: inptr += incount; inleft -= incount;
329: errno = locals.l_errno;
330: result = -1;
331: break;
332: }
333: incount += incount2;
334: outptr = locals.l_outbuf;
335: outleft = locals.l_outbytesleft;
336: result += 1;
337: goto outcount_zero;
338: }
339: #endif
340: inptr += incount; inleft -= incount;
341: errno = EILSEQ;
342: result = -1;
343: break;
344: }
345: if (incount == RET_TOOFEW(0)) {
346: /* Case 2: not enough bytes available to detect anything */
347: errno = EINVAL;
348: result = -1;
349: break;
350: }
351: /* Case 3: k bytes read, but only a shift sequence */
352: incount = DECODE_TOOFEW(incount);
353: } else {
354: /* Case 4: k bytes read, making up a wide character */
355: if (outleft == 0) {
356: cd->istate = last_istate;
357: errno = E2BIG;
358: result = -1;
359: break;
360: }
361: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
362: if (outcount != RET_ILUNI)
363: goto outcount_ok;
364: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
365: if ((wc >> 7) == (0xe0000 >> 7))
366: goto outcount_zero;
367: /* Try transliteration. */
368: result++;
369: if (cd->transliterate) {
370: outcount = unicode_transliterate(cd,wc,outptr,outleft);
371: if (outcount != RET_ILUNI)
372: goto outcount_ok;
373: }
374: if (cd->discard_ilseq) {
375: outcount = 0;
376: goto outcount_ok;
377: }
378: #ifndef LIBICONV_PLUG
379: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
380: struct uc_to_mb_fallback_locals locals;
381: locals.l_outbuf = outptr;
382: locals.l_outbytesleft = outleft;
383: locals.l_errno = 0;
384: cd->fallbacks.uc_to_mb_fallback(wc,
385: uc_to_mb_write_replacement,
386: &locals,
387: cd->fallbacks.data);
388: if (locals.l_errno != 0) {
389: cd->istate = last_istate;
390: errno = locals.l_errno;
391: return -1;
392: }
393: outptr = locals.l_outbuf;
394: outleft = locals.l_outbytesleft;
395: outcount = 0;
396: goto outcount_ok;
397: }
398: #endif
399: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
400: if (outcount != RET_ILUNI)
401: goto outcount_ok;
402: cd->istate = last_istate;
403: errno = EILSEQ;
404: result = -1;
405: break;
406: outcount_ok:
407: if (outcount < 0) {
408: cd->istate = last_istate;
409: errno = E2BIG;
410: result = -1;
411: break;
412: }
413: #ifndef LIBICONV_PLUG
414: if (cd->hooks.uc_hook)
415: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
416: #endif
417: if (!(outcount <= outleft)) abort();
418: outptr += outcount; outleft -= outcount;
419: }
420: outcount_zero:
421: if (!(incount <= inleft)) abort();
422: inptr += incount; inleft -= incount;
423: }
424: *inbuf = (const char*) inptr;
425: *inbytesleft = inleft;
426: *outbuf = (char*) outptr;
427: *outbytesleft = outleft;
428: return result;
429: }
430:
431: static size_t unicode_loop_reset (iconv_t icd,
432: char* * outbuf, size_t *outbytesleft)
433: {
434: conv_t cd = (conv_t) icd;
435: if (outbuf == NULL || *outbuf == NULL) {
436: /* Reset the states. */
437: memset(&cd->istate,'\0',sizeof(state_t));
438: memset(&cd->ostate,'\0',sizeof(state_t));
439: return 0;
440: } else {
441: size_t result = 0;
442: if (cd->ifuncs.xxx_flushwc) {
443: state_t last_istate = cd->istate;
444: ucs4_t wc;
445: if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
446: unsigned char* outptr = (unsigned char*) *outbuf;
447: size_t outleft = *outbytesleft;
448: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
449: if (outcount != RET_ILUNI)
450: goto outcount_ok;
451: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
452: if ((wc >> 7) == (0xe0000 >> 7))
453: goto outcount_zero;
454: /* Try transliteration. */
455: result++;
456: if (cd->transliterate) {
457: outcount = unicode_transliterate(cd,wc,outptr,outleft);
458: if (outcount != RET_ILUNI)
459: goto outcount_ok;
460: }
461: if (cd->discard_ilseq) {
462: outcount = 0;
463: goto outcount_ok;
464: }
465: #ifndef LIBICONV_PLUG
466: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
467: struct uc_to_mb_fallback_locals locals;
468: locals.l_outbuf = outptr;
469: locals.l_outbytesleft = outleft;
470: locals.l_errno = 0;
471: cd->fallbacks.uc_to_mb_fallback(wc,
472: uc_to_mb_write_replacement,
473: &locals,
474: cd->fallbacks.data);
475: if (locals.l_errno != 0) {
476: cd->istate = last_istate;
477: errno = locals.l_errno;
478: return -1;
479: }
480: outptr = locals.l_outbuf;
481: outleft = locals.l_outbytesleft;
482: outcount = 0;
483: goto outcount_ok;
484: }
485: #endif
486: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
487: if (outcount != RET_ILUNI)
488: goto outcount_ok;
489: cd->istate = last_istate;
490: errno = EILSEQ;
491: return -1;
492: outcount_ok:
493: if (outcount < 0) {
494: cd->istate = last_istate;
495: errno = E2BIG;
496: return -1;
497: }
498: #ifndef LIBICONV_PLUG
499: if (cd->hooks.uc_hook)
500: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
501: #endif
502: if (!(outcount <= outleft)) abort();
503: outptr += outcount;
504: outleft -= outcount;
505: outcount_zero:
506: *outbuf = (char*) outptr;
507: *outbytesleft = outleft;
508: }
509: }
510: if (cd->ofuncs.xxx_reset) {
511: unsigned char* outptr = (unsigned char*) *outbuf;
512: size_t outleft = *outbytesleft;
513: int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
514: if (outcount < 0) {
515: errno = E2BIG;
516: return -1;
517: }
518: if (!(outcount <= outleft)) abort();
519: *outbuf = (char*) (outptr + outcount);
520: *outbytesleft = outleft - outcount;
521: }
522: memset(&cd->istate,'\0',sizeof(state_t));
523: memset(&cd->ostate,'\0',sizeof(state_t));
524: return result;
525: }
526: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>