Annotation of embedaddon/libiconv/lib/loop_unicode.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /* This file defines the conversion loop via Unicode as a pivot encoding. */
22:
23: /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24: static int unicode_transliterate (conv_t cd, ucs4_t wc,
25: unsigned char* outptr, size_t outleft)
26: {
27: if (cd->oflags & HAVE_HANGUL_JAMO) {
28: /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29: in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30: (contained in Unicode only). */
31: ucs4_t buf[3];
32: int ret = johab_hangul_decompose(cd,buf,wc);
33: if (ret != RET_ILUNI) {
34: /* we know 1 <= ret <= 3 */
35: state_t backup_state = cd->ostate;
36: unsigned char* backup_outptr = outptr;
37: size_t backup_outleft = outleft;
38: int i, sub_outcount;
39: for (i = 0; i < ret; i++) {
40: if (outleft == 0) {
41: sub_outcount = RET_TOOSMALL;
42: goto johab_hangul_failed;
43: }
44: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
45: if (sub_outcount <= RET_ILUNI)
46: goto johab_hangul_failed;
47: if (!(sub_outcount <= outleft)) abort();
48: outptr += sub_outcount; outleft -= sub_outcount;
49: }
50: return outptr-backup_outptr;
51: johab_hangul_failed:
52: cd->ostate = backup_state;
53: outptr = backup_outptr;
54: outleft = backup_outleft;
55: if (sub_outcount != RET_ILUNI)
56: return RET_TOOSMALL;
57: }
58: }
59: {
60: /* Try to use a variant, but postfix it with
61: U+303E IDEOGRAPHIC VARIATION INDICATOR
62: (cf. Ken Lunde's "CJKV information processing", p. 188). */
63: int indx = -1;
64: if (wc == 0x3006)
65: indx = 0;
66: else if (wc == 0x30f6)
67: indx = 1;
68: else if (wc >= 0x4e00 && wc < 0xa000)
69: indx = cjk_variants_indx[wc-0x4e00];
70: if (indx >= 0) {
71: for (;; indx++) {
72: ucs4_t buf[2];
73: unsigned short variant = cjk_variants[indx];
74: unsigned short last = variant & 0x8000;
75: variant &= 0x7fff;
76: variant += 0x3000;
77: buf[0] = variant; buf[1] = 0x303e;
78: {
79: state_t backup_state = cd->ostate;
80: unsigned char* backup_outptr = outptr;
81: size_t backup_outleft = outleft;
82: int i, sub_outcount;
83: for (i = 0; i < 2; i++) {
84: if (outleft == 0) {
85: sub_outcount = RET_TOOSMALL;
86: goto variant_failed;
87: }
88: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
89: if (sub_outcount <= RET_ILUNI)
90: goto variant_failed;
91: if (!(sub_outcount <= outleft)) abort();
92: outptr += sub_outcount; outleft -= sub_outcount;
93: }
94: return outptr-backup_outptr;
95: variant_failed:
96: cd->ostate = backup_state;
97: outptr = backup_outptr;
98: outleft = backup_outleft;
99: if (sub_outcount != RET_ILUNI)
100: return RET_TOOSMALL;
101: }
102: if (last)
103: break;
104: }
105: }
106: }
107: if (wc >= 0x2018 && wc <= 0x201a) {
108: /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109: ucs4_t substitute =
110: (cd->oflags & HAVE_QUOTATION_MARKS
111: ? (wc == 0x201a ? 0x2018 : wc)
112: : (cd->oflags & HAVE_ACCENTS
113: ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114: : 0x0027 /* use apostrophe */
115: ) );
116: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
117: if (outcount != RET_ILUNI)
118: return outcount;
119: }
120: {
121: /* Use the transliteration table. */
122: int indx = translit_index(wc);
123: if (indx >= 0) {
124: const unsigned int * cp = &translit_data[indx];
125: unsigned int num = *cp++;
126: state_t backup_state = cd->ostate;
127: unsigned char* backup_outptr = outptr;
128: size_t backup_outleft = outleft;
129: unsigned int i;
130: int sub_outcount;
131: for (i = 0; i < num; i++) {
132: if (outleft == 0) {
133: sub_outcount = RET_TOOSMALL;
134: goto translit_failed;
135: }
136: sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
137: if (sub_outcount == RET_ILUNI)
138: /* Recursive transliteration. */
139: sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
140: if (sub_outcount <= RET_ILUNI)
141: goto translit_failed;
142: if (!(sub_outcount <= outleft)) abort();
143: outptr += sub_outcount; outleft -= sub_outcount;
144: }
145: return outptr-backup_outptr;
146: translit_failed:
147: cd->ostate = backup_state;
148: outptr = backup_outptr;
149: outleft = backup_outleft;
150: if (sub_outcount != RET_ILUNI)
151: return RET_TOOSMALL;
152: }
153: }
154: return RET_ILUNI;
155: }
156:
157: #ifndef LIBICONV_PLUG
158:
159: struct uc_to_mb_fallback_locals {
160: unsigned char* l_outbuf;
161: size_t l_outbytesleft;
162: int l_errno;
163: };
164:
165: static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
166: void* callback_arg)
167: {
168: struct uc_to_mb_fallback_locals * plocals =
169: (struct uc_to_mb_fallback_locals *) callback_arg;
170: /* Do nothing if already encountered an error in a previous call. */
171: if (plocals->l_errno == 0) {
172: /* Attempt to copy the passed buffer to the output buffer. */
173: if (plocals->l_outbytesleft < buflen)
174: plocals->l_errno = E2BIG;
175: else {
176: memcpy(plocals->l_outbuf, buf, buflen);
177: plocals->l_outbuf += buflen;
178: plocals->l_outbytesleft -= buflen;
179: }
180: }
181: }
182:
183: struct mb_to_uc_fallback_locals {
184: conv_t l_cd;
185: unsigned char* l_outbuf;
186: size_t l_outbytesleft;
187: int l_errno;
188: };
189:
190: static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
191: void* callback_arg)
192: {
193: struct mb_to_uc_fallback_locals * plocals =
194: (struct mb_to_uc_fallback_locals *) callback_arg;
195: /* Do nothing if already encountered an error in a previous call. */
196: if (plocals->l_errno == 0) {
197: /* Attempt to convert the passed buffer to the target encoding. */
198: conv_t cd = plocals->l_cd;
199: unsigned char* outptr = plocals->l_outbuf;
200: size_t outleft = plocals->l_outbytesleft;
201: for (; buflen > 0; buf++, buflen--) {
202: ucs4_t wc = *buf;
203: int outcount;
204: if (outleft == 0) {
205: plocals->l_errno = E2BIG;
206: break;
207: }
208: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
209: if (outcount != RET_ILUNI)
210: goto outcount_ok;
211: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212: if ((wc >> 7) == (0xe0000 >> 7))
213: goto outcount_zero;
214: /* Try transliteration. */
215: if (cd->transliterate) {
216: outcount = unicode_transliterate(cd,wc,outptr,outleft);
217: if (outcount != RET_ILUNI)
218: goto outcount_ok;
219: }
220: if (cd->discard_ilseq) {
221: outcount = 0;
222: goto outcount_ok;
223: }
224: #ifndef LIBICONV_PLUG
225: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
226: struct uc_to_mb_fallback_locals locals;
227: locals.l_outbuf = outptr;
228: locals.l_outbytesleft = outleft;
229: locals.l_errno = 0;
230: cd->fallbacks.uc_to_mb_fallback(wc,
231: uc_to_mb_write_replacement,
232: &locals,
233: cd->fallbacks.data);
234: if (locals.l_errno != 0) {
235: plocals->l_errno = locals.l_errno;
236: break;
237: }
238: outptr = locals.l_outbuf;
239: outleft = locals.l_outbytesleft;
240: outcount = 0;
241: goto outcount_ok;
242: }
243: #endif
244: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
245: if (outcount != RET_ILUNI)
246: goto outcount_ok;
247: plocals->l_errno = EILSEQ;
248: break;
249: outcount_ok:
250: if (outcount < 0) {
251: plocals->l_errno = E2BIG;
252: break;
253: }
254: #ifndef LIBICONV_PLUG
255: if (cd->hooks.uc_hook)
256: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
257: #endif
258: if (!(outcount <= outleft)) abort();
259: outptr += outcount; outleft -= outcount;
260: outcount_zero: ;
261: }
262: plocals->l_outbuf = outptr;
263: plocals->l_outbytesleft = outleft;
264: }
265: }
266:
267: #endif /* !LIBICONV_PLUG */
268:
269: static size_t unicode_loop_convert (iconv_t icd,
270: const char* * inbuf, size_t *inbytesleft,
271: char* * outbuf, size_t *outbytesleft)
272: {
273: conv_t cd = (conv_t) icd;
274: size_t result = 0;
275: const unsigned char* inptr = (const unsigned char*) *inbuf;
276: size_t inleft = *inbytesleft;
277: unsigned char* outptr = (unsigned char*) *outbuf;
278: size_t outleft = *outbytesleft;
279: while (inleft > 0) {
280: state_t last_istate = cd->istate;
281: ucs4_t wc;
282: int incount;
283: int outcount;
284: incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
285: if (incount < 0) {
286: if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
287: /* Case 1: invalid input, possibly after a shift sequence */
288: incount = DECODE_SHIFT_ILSEQ(incount);
289: if (cd->discard_ilseq) {
290: switch (cd->iindex) {
291: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
292: case ei_utf32: case ei_utf32be: case ei_utf32le:
293: case ei_ucs4internal: case ei_ucs4swapped:
294: incount += 4; break;
295: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
296: case ei_utf16: case ei_utf16be: case ei_utf16le:
297: case ei_ucs2internal: case ei_ucs2swapped:
298: incount += 2; break;
299: default:
300: incount += 1; break;
301: }
302: goto outcount_zero;
303: }
304: #ifndef LIBICONV_PLUG
305: else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
306: unsigned int incount2;
307: struct mb_to_uc_fallback_locals locals;
308: switch (cd->iindex) {
309: case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
310: case ei_utf32: case ei_utf32be: case ei_utf32le:
311: case ei_ucs4internal: case ei_ucs4swapped:
312: incount2 = 4; break;
313: case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
314: case ei_utf16: case ei_utf16be: case ei_utf16le:
315: case ei_ucs2internal: case ei_ucs2swapped:
316: incount2 = 2; break;
317: default:
318: incount2 = 1; break;
319: }
320: locals.l_cd = cd;
321: locals.l_outbuf = outptr;
322: locals.l_outbytesleft = outleft;
323: locals.l_errno = 0;
324: cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
325: mb_to_uc_write_replacement,
326: &locals,
327: cd->fallbacks.data);
328: if (locals.l_errno != 0) {
329: inptr += incount; inleft -= incount;
330: errno = locals.l_errno;
331: result = -1;
332: break;
333: }
334: incount += incount2;
335: outptr = locals.l_outbuf;
336: outleft = locals.l_outbytesleft;
337: result += 1;
338: goto outcount_zero;
339: }
340: #endif
341: inptr += incount; inleft -= incount;
342: errno = EILSEQ;
343: result = -1;
344: break;
345: }
346: if (incount == RET_TOOFEW(0)) {
347: /* Case 2: not enough bytes available to detect anything */
348: errno = EINVAL;
349: result = -1;
350: break;
351: }
352: /* Case 3: k bytes read, but only a shift sequence */
353: incount = DECODE_TOOFEW(incount);
354: } else {
355: /* Case 4: k bytes read, making up a wide character */
356: if (outleft == 0) {
357: cd->istate = last_istate;
358: errno = E2BIG;
359: result = -1;
360: break;
361: }
362: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
363: if (outcount != RET_ILUNI)
364: goto outcount_ok;
365: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
366: if ((wc >> 7) == (0xe0000 >> 7))
367: goto outcount_zero;
368: /* Try transliteration. */
369: result++;
370: if (cd->transliterate) {
371: outcount = unicode_transliterate(cd,wc,outptr,outleft);
372: if (outcount != RET_ILUNI)
373: goto outcount_ok;
374: }
375: if (cd->discard_ilseq) {
376: outcount = 0;
377: goto outcount_ok;
378: }
379: #ifndef LIBICONV_PLUG
380: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
381: struct uc_to_mb_fallback_locals locals;
382: locals.l_outbuf = outptr;
383: locals.l_outbytesleft = outleft;
384: locals.l_errno = 0;
385: cd->fallbacks.uc_to_mb_fallback(wc,
386: uc_to_mb_write_replacement,
387: &locals,
388: cd->fallbacks.data);
389: if (locals.l_errno != 0) {
390: cd->istate = last_istate;
391: errno = locals.l_errno;
392: return -1;
393: }
394: outptr = locals.l_outbuf;
395: outleft = locals.l_outbytesleft;
396: outcount = 0;
397: goto outcount_ok;
398: }
399: #endif
400: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
401: if (outcount != RET_ILUNI)
402: goto outcount_ok;
403: cd->istate = last_istate;
404: errno = EILSEQ;
405: result = -1;
406: break;
407: outcount_ok:
408: if (outcount < 0) {
409: cd->istate = last_istate;
410: errno = E2BIG;
411: result = -1;
412: break;
413: }
414: #ifndef LIBICONV_PLUG
415: if (cd->hooks.uc_hook)
416: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
417: #endif
418: if (!(outcount <= outleft)) abort();
419: outptr += outcount; outleft -= outcount;
420: }
421: outcount_zero:
422: if (!(incount <= inleft)) abort();
423: inptr += incount; inleft -= incount;
424: }
425: *inbuf = (const char*) inptr;
426: *inbytesleft = inleft;
427: *outbuf = (char*) outptr;
428: *outbytesleft = outleft;
429: return result;
430: }
431:
432: static size_t unicode_loop_reset (iconv_t icd,
433: char* * outbuf, size_t *outbytesleft)
434: {
435: conv_t cd = (conv_t) icd;
436: if (outbuf == NULL || *outbuf == NULL) {
437: /* Reset the states. */
438: memset(&cd->istate,'\0',sizeof(state_t));
439: memset(&cd->ostate,'\0',sizeof(state_t));
440: return 0;
441: } else {
442: size_t result = 0;
443: if (cd->ifuncs.xxx_flushwc) {
444: state_t last_istate = cd->istate;
445: ucs4_t wc;
446: if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
447: unsigned char* outptr = (unsigned char*) *outbuf;
448: size_t outleft = *outbytesleft;
449: int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
450: if (outcount != RET_ILUNI)
451: goto outcount_ok;
452: /* Handle Unicode tag characters (range U+E0000..U+E007F). */
453: if ((wc >> 7) == (0xe0000 >> 7))
454: goto outcount_zero;
455: /* Try transliteration. */
456: result++;
457: if (cd->transliterate) {
458: outcount = unicode_transliterate(cd,wc,outptr,outleft);
459: if (outcount != RET_ILUNI)
460: goto outcount_ok;
461: }
462: if (cd->discard_ilseq) {
463: outcount = 0;
464: goto outcount_ok;
465: }
466: #ifndef LIBICONV_PLUG
467: else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
468: struct uc_to_mb_fallback_locals locals;
469: locals.l_outbuf = outptr;
470: locals.l_outbytesleft = outleft;
471: locals.l_errno = 0;
472: cd->fallbacks.uc_to_mb_fallback(wc,
473: uc_to_mb_write_replacement,
474: &locals,
475: cd->fallbacks.data);
476: if (locals.l_errno != 0) {
477: cd->istate = last_istate;
478: errno = locals.l_errno;
479: return -1;
480: }
481: outptr = locals.l_outbuf;
482: outleft = locals.l_outbytesleft;
483: outcount = 0;
484: goto outcount_ok;
485: }
486: #endif
487: outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
488: if (outcount != RET_ILUNI)
489: goto outcount_ok;
490: cd->istate = last_istate;
491: errno = EILSEQ;
492: return -1;
493: outcount_ok:
494: if (outcount < 0) {
495: cd->istate = last_istate;
496: errno = E2BIG;
497: return -1;
498: }
499: #ifndef LIBICONV_PLUG
500: if (cd->hooks.uc_hook)
501: (*cd->hooks.uc_hook)(wc, cd->hooks.data);
502: #endif
503: if (!(outcount <= outleft)) abort();
504: outptr += outcount;
505: outleft -= outcount;
506: outcount_zero:
507: *outbuf = (char*) outptr;
508: *outbytesleft = outleft;
509: }
510: }
511: if (cd->ofuncs.xxx_reset) {
512: unsigned char* outptr = (unsigned char*) *outbuf;
513: size_t outleft = *outbytesleft;
514: int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
515: if (outcount < 0) {
516: errno = E2BIG;
517: return -1;
518: }
519: if (!(outcount <= outleft)) abort();
520: *outbuf = (char*) (outptr + outcount);
521: *outbytesleft = outleft - outcount;
522: }
523: memset(&cd->istate,'\0',sizeof(state_t));
524: memset(&cd->ostate,'\0',sizeof(state_t));
525: return result;
526: }
527: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>