Annotation of embedaddon/libiconv/lib/loop_wchar.h, revision 1.1.1.3
1.1 misho 1: /*
1.1.1.2 misho 2: * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /* This file defines three conversion loops:
21: - from wchar_t to anything else,
22: - from anything else to wchar_t,
23: - from wchar_t to wchar_t.
24: */
25:
26: #if HAVE_WCRTOMB || HAVE_MBRTOWC
1.1.1.2 misho 27: /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
28: <wchar.h>.
29: BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
30: included before <wchar.h>.
31: In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
32: by <stddef.h>. */
33: # include <stddef.h>
34: # include <stdio.h>
35: # include <time.h>
1.1 misho 36: # include <wchar.h>
37: # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
38: /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
39: extern size_t mbrtowc ();
40: # ifdef mbstate_t
41: # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
42: # define mbsinit(ps) 1
43: # endif
44: # ifndef mbsinit
45: # if !HAVE_MBSINIT
46: # define mbsinit(ps) 1
47: # endif
48: # endif
49: #endif
50:
51: /*
52: * The first two conversion loops have an extended conversion descriptor.
53: */
54: struct wchar_conv_struct {
55: struct conv_struct parent;
56: #if HAVE_WCRTOMB || HAVE_MBRTOWC
57: mbstate_t state;
58: #endif
59: };
60:
61:
62: #if HAVE_WCRTOMB
63:
64: /* From wchar_t to anything else. */
65:
66: #ifndef LIBICONV_PLUG
67:
68: #if 0
69:
70: struct wc_to_mb_fallback_locals {
71: struct wchar_conv_struct * l_wcd;
72: char* l_outbuf;
73: size_t l_outbytesleft;
74: int l_errno;
75: };
76:
77: /* A callback that writes a string given in the locale encoding. */
78: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
79: void* callback_arg)
80: {
81: struct wc_to_mb_fallback_locals * plocals =
82: (struct wc_to_mb_fallback_locals *) callback_arg;
83: /* Do nothing if already encountered an error in a previous call. */
84: if (plocals->l_errno == 0) {
85: /* Attempt to convert the passed buffer to the target encoding.
86: Here we don't support characters split across multiple calls. */
87: const char* bufptr = buf;
88: size_t bufleft = buflen;
89: size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
90: &bufptr,&bufleft,
91: &plocals->l_outbuf,&plocals->l_outbytesleft);
92: if (res == (size_t)(-1)) {
93: if (errno == EILSEQ || errno == EINVAL)
94: /* Invalid buf contents. */
95: plocals->l_errno = EILSEQ;
96: else if (errno == E2BIG)
97: /* Output buffer too small. */
98: plocals->l_errno = E2BIG;
99: else
100: abort();
101: } else {
102: /* Successful conversion. */
103: if (bufleft > 0)
104: abort();
105: }
106: }
107: }
108:
109: #else
110:
111: struct wc_to_mb_fallback_locals {
112: char* l_outbuf;
113: size_t l_outbytesleft;
114: int l_errno;
115: };
116:
117: /* A callback that writes a string given in the target encoding. */
118: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
119: void* callback_arg)
120: {
121: struct wc_to_mb_fallback_locals * plocals =
122: (struct wc_to_mb_fallback_locals *) callback_arg;
123: /* Do nothing if already encountered an error in a previous call. */
124: if (plocals->l_errno == 0) {
125: /* Attempt to copy the passed buffer to the output buffer. */
126: if (plocals->l_outbytesleft < buflen)
127: plocals->l_errno = E2BIG;
128: else {
129: memcpy(plocals->l_outbuf, buf, buflen);
130: plocals->l_outbuf += buflen;
131: plocals->l_outbytesleft -= buflen;
132: }
133: }
134: }
135:
136: #endif
137:
138: #endif /* !LIBICONV_PLUG */
139:
140: static size_t wchar_from_loop_convert (iconv_t icd,
141: const char* * inbuf, size_t *inbytesleft,
142: char* * outbuf, size_t *outbytesleft)
143: {
144: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
145: size_t result = 0;
146: while (*inbytesleft >= sizeof(wchar_t)) {
147: const wchar_t * inptr = (const wchar_t *) *inbuf;
148: size_t inleft = *inbytesleft;
149: char buf[BUF_SIZE];
150: mbstate_t state = wcd->state;
151: size_t bufcount = 0;
152: while (inleft >= sizeof(wchar_t)) {
153: /* Convert one wchar_t to multibyte representation. */
154: size_t count = wcrtomb(buf+bufcount,*inptr,&state);
155: if (count == (size_t)(-1)) {
156: /* Invalid input. */
157: if (wcd->parent.discard_ilseq) {
158: count = 0;
159: }
160: #ifndef LIBICONV_PLUG
161: else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
162: /* Drop the contents of buf[] accumulated so far, and instead
163: pass all queued wide characters to the fallback handler. */
164: struct wc_to_mb_fallback_locals locals;
165: const wchar_t * fallback_inptr;
166: #if 0
167: locals.l_wcd = wcd;
168: #endif
169: locals.l_outbuf = *outbuf;
170: locals.l_outbytesleft = *outbytesleft;
171: locals.l_errno = 0;
172: for (fallback_inptr = (const wchar_t *) *inbuf;
173: fallback_inptr <= inptr;
174: fallback_inptr++)
175: wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
176: wc_to_mb_write_replacement,
177: &locals,
178: wcd->parent.fallbacks.data);
179: if (locals.l_errno != 0) {
180: errno = locals.l_errno;
181: return -1;
182: }
183: wcd->state = state;
184: *inbuf = (const char *) (inptr + 1);
185: *inbytesleft = inleft - sizeof(wchar_t);
186: *outbuf = locals.l_outbuf;
187: *outbytesleft = locals.l_outbytesleft;
188: result += 1;
189: break;
190: }
191: #endif
192: else {
193: errno = EILSEQ;
194: return -1;
195: }
196: }
197: inptr++;
198: inleft -= sizeof(wchar_t);
199: bufcount += count;
200: if (count == 0) {
201: /* Continue, append next wchar_t. */
202: } else {
203: /* Attempt to convert the accumulated multibyte representations
204: to the target encoding. */
205: const char* bufptr = buf;
206: size_t bufleft = bufcount;
207: char* outptr = *outbuf;
208: size_t outleft = *outbytesleft;
209: size_t res = unicode_loop_convert(&wcd->parent,
210: &bufptr,&bufleft,
211: &outptr,&outleft);
212: if (res == (size_t)(-1)) {
213: if (errno == EILSEQ)
214: /* Invalid input. */
215: return -1;
216: else if (errno == E2BIG)
217: /* Output buffer too small. */
218: return -1;
219: else if (errno == EINVAL) {
220: /* Continue, append next wchar_t, but avoid buffer overrun. */
221: if (bufcount + MB_CUR_MAX > BUF_SIZE)
222: abort();
223: } else
224: abort();
225: } else {
226: /* Successful conversion. */
227: wcd->state = state;
228: *inbuf = (const char *) inptr;
229: *inbytesleft = inleft;
230: *outbuf = outptr;
231: *outbytesleft = outleft;
232: result += res;
233: break;
234: }
235: }
236: }
237: }
238: return result;
239: }
240:
241: static size_t wchar_from_loop_reset (iconv_t icd,
242: char* * outbuf, size_t *outbytesleft)
243: {
244: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
245: if (outbuf == NULL || *outbuf == NULL) {
246: /* Reset the states. */
247: memset(&wcd->state,'\0',sizeof(mbstate_t));
248: return unicode_loop_reset(&wcd->parent,NULL,NULL);
249: } else {
250: if (!mbsinit(&wcd->state)) {
251: mbstate_t state = wcd->state;
252: char buf[BUF_SIZE];
253: size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
254: if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
255: abort();
256: else {
257: const char* bufptr = buf;
258: size_t bufleft = bufcount-1;
259: char* outptr = *outbuf;
260: size_t outleft = *outbytesleft;
261: size_t res = unicode_loop_convert(&wcd->parent,
262: &bufptr,&bufleft,
263: &outptr,&outleft);
264: if (res == (size_t)(-1)) {
265: if (errno == E2BIG)
266: return -1;
267: else
268: abort();
269: } else {
270: res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
271: if (res == (size_t)(-1))
272: return res;
273: else {
274: /* Successful. */
275: wcd->state = state;
276: *outbuf = outptr;
277: *outbytesleft = outleft;
278: return 0;
279: }
280: }
281: }
282: } else
283: return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
284: }
285: }
286:
287: #endif
288:
289:
290: #if HAVE_MBRTOWC
291:
292: /* From anything else to wchar_t. */
293:
294: #ifndef LIBICONV_PLUG
295:
296: struct mb_to_wc_fallback_locals {
297: char* l_outbuf;
298: size_t l_outbytesleft;
299: int l_errno;
300: };
301:
302: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
303: void* callback_arg)
304: {
305: struct mb_to_wc_fallback_locals * plocals =
306: (struct mb_to_wc_fallback_locals *) callback_arg;
307: /* Do nothing if already encountered an error in a previous call. */
308: if (plocals->l_errno == 0) {
309: /* Attempt to copy the passed buffer to the output buffer. */
310: if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
311: plocals->l_errno = E2BIG;
312: else {
313: for (; buflen > 0; buf++, buflen--) {
314: *(wchar_t*) plocals->l_outbuf = *buf;
315: plocals->l_outbuf += sizeof(wchar_t);
316: plocals->l_outbytesleft -= sizeof(wchar_t);
317: }
318: }
319: }
320: }
321:
322: #endif /* !LIBICONV_PLUG */
323:
324: static size_t wchar_to_loop_convert (iconv_t icd,
325: const char* * inbuf, size_t *inbytesleft,
326: char* * outbuf, size_t *outbytesleft)
327: {
328: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
329: size_t result = 0;
330: while (*inbytesleft > 0) {
331: size_t incount;
1.1.1.2 misho 332: for (incount = 1; ; ) {
333: /* Here incount <= *inbytesleft. */
1.1 misho 334: char buf[BUF_SIZE];
335: const char* inptr = *inbuf;
336: size_t inleft = incount;
337: char* bufptr = buf;
338: size_t bufleft = BUF_SIZE;
339: size_t res = unicode_loop_convert(&wcd->parent,
340: &inptr,&inleft,
341: &bufptr,&bufleft);
342: if (res == (size_t)(-1)) {
343: if (errno == EILSEQ)
344: /* Invalid input. */
345: return -1;
346: else if (errno == EINVAL) {
347: /* Incomplete input. Next try with one more input byte. */
348: } else
349: /* E2BIG shouldn't occur. */
350: abort();
351: } else {
352: /* Successful conversion. */
353: size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
354: mbstate_t state = wcd->state;
355: wchar_t wc;
356: res = mbrtowc(&wc,buf,bufcount,&state);
357: if (res == (size_t)(-2)) {
358: /* Next try with one more input byte. */
359: } else {
360: if (res == (size_t)(-1)) {
361: /* Invalid input. */
362: if (wcd->parent.discard_ilseq) {
363: }
364: #ifndef LIBICONV_PLUG
365: else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
366: /* Drop the contents of buf[] accumulated so far, and instead
367: pass all queued chars to the fallback handler. */
368: struct mb_to_wc_fallback_locals locals;
369: locals.l_outbuf = *outbuf;
370: locals.l_outbytesleft = *outbytesleft;
371: locals.l_errno = 0;
372: wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
373: mb_to_wc_write_replacement,
374: &locals,
375: wcd->parent.fallbacks.data);
376: if (locals.l_errno != 0) {
377: errno = locals.l_errno;
378: return -1;
379: }
380: /* Restoring the state is not needed because it is the initial
381: state anyway: For all known locale encodings, the multibyte
382: to wchar_t conversion doesn't have shift state, and we have
383: excluded partial accumulated characters. */
384: /* wcd->state = state; */
385: *inbuf += incount;
386: *inbytesleft -= incount;
387: *outbuf = locals.l_outbuf;
388: *outbytesleft = locals.l_outbytesleft;
389: result += 1;
390: break;
391: }
392: #endif
393: else
394: return -1;
395: } else {
396: if (*outbytesleft < sizeof(wchar_t)) {
397: errno = E2BIG;
398: return -1;
399: }
400: *(wchar_t*) *outbuf = wc;
401: /* Restoring the state is not needed because it is the initial
402: state anyway: For all known locale encodings, the multibyte
403: to wchar_t conversion doesn't have shift state, and we have
404: excluded partial accumulated characters. */
405: /* wcd->state = state; */
406: *outbuf += sizeof(wchar_t);
407: *outbytesleft -= sizeof(wchar_t);
408: }
409: *inbuf += incount;
410: *inbytesleft -= incount;
411: result += res;
412: break;
413: }
414: }
1.1.1.2 misho 415: incount++;
416: if (incount > *inbytesleft) {
417: /* Incomplete input. */
418: errno = EINVAL;
419: return -1;
420: }
1.1 misho 421: }
422: }
423: return result;
424: }
425:
426: static size_t wchar_to_loop_reset (iconv_t icd,
427: char* * outbuf, size_t *outbytesleft)
428: {
429: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
430: size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
431: if (res == (size_t)(-1))
432: return res;
433: memset(&wcd->state,0,sizeof(mbstate_t));
434: return 0;
435: }
436:
437: #endif
438:
439:
440: /* From wchar_t to wchar_t. */
441:
442: static size_t wchar_id_loop_convert (iconv_t icd,
443: const char* * inbuf, size_t *inbytesleft,
444: char* * outbuf, size_t *outbytesleft)
445: {
446: struct conv_struct * cd = (struct conv_struct *) icd;
447: const wchar_t* inptr = (const wchar_t*) *inbuf;
448: size_t inleft = *inbytesleft / sizeof(wchar_t);
449: wchar_t* outptr = (wchar_t*) *outbuf;
450: size_t outleft = *outbytesleft / sizeof(wchar_t);
451: size_t count = (inleft <= outleft ? inleft : outleft);
452: if (count > 0) {
453: *inbytesleft -= count * sizeof(wchar_t);
454: *outbytesleft -= count * sizeof(wchar_t);
455: do {
456: wchar_t wc = *inptr++;
457: *outptr++ = wc;
458: #ifndef LIBICONV_PLUG
459: if (cd->hooks.wc_hook)
460: (*cd->hooks.wc_hook)(wc, cd->hooks.data);
461: #endif
462: } while (--count > 0);
463: *inbuf = (const char*) inptr;
464: *outbuf = (char*) outptr;
465: }
466: return 0;
467: }
468:
469: static size_t wchar_id_loop_reset (iconv_t icd,
470: char* * outbuf, size_t *outbytesleft)
471: {
472: return 0;
473: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>