Annotation of embedaddon/libiconv/lib/loop_wchar.h, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /* This file defines three conversion loops:
22: - from wchar_t to anything else,
23: - from anything else to wchar_t,
24: - from wchar_t to wchar_t.
25: */
26:
27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
1.1.1.2 ! misho 28: /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
! 29: <wchar.h>.
! 30: BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
! 31: included before <wchar.h>.
! 32: In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
! 33: by <stddef.h>. */
! 34: # include <stddef.h>
! 35: # include <stdio.h>
! 36: # include <time.h>
1.1 misho 37: # include <wchar.h>
38: # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
39: /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
40: extern size_t mbrtowc ();
41: # ifdef mbstate_t
42: # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
43: # define mbsinit(ps) 1
44: # endif
45: # ifndef mbsinit
46: # if !HAVE_MBSINIT
47: # define mbsinit(ps) 1
48: # endif
49: # endif
50: #endif
51:
52: /*
53: * The first two conversion loops have an extended conversion descriptor.
54: */
55: struct wchar_conv_struct {
56: struct conv_struct parent;
57: #if HAVE_WCRTOMB || HAVE_MBRTOWC
58: mbstate_t state;
59: #endif
60: };
61:
62:
63: #if HAVE_WCRTOMB
64:
65: /* From wchar_t to anything else. */
66:
67: #ifndef LIBICONV_PLUG
68:
69: #if 0
70:
71: struct wc_to_mb_fallback_locals {
72: struct wchar_conv_struct * l_wcd;
73: char* l_outbuf;
74: size_t l_outbytesleft;
75: int l_errno;
76: };
77:
78: /* A callback that writes a string given in the locale encoding. */
79: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
80: void* callback_arg)
81: {
82: struct wc_to_mb_fallback_locals * plocals =
83: (struct wc_to_mb_fallback_locals *) callback_arg;
84: /* Do nothing if already encountered an error in a previous call. */
85: if (plocals->l_errno == 0) {
86: /* Attempt to convert the passed buffer to the target encoding.
87: Here we don't support characters split across multiple calls. */
88: const char* bufptr = buf;
89: size_t bufleft = buflen;
90: size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
91: &bufptr,&bufleft,
92: &plocals->l_outbuf,&plocals->l_outbytesleft);
93: if (res == (size_t)(-1)) {
94: if (errno == EILSEQ || errno == EINVAL)
95: /* Invalid buf contents. */
96: plocals->l_errno = EILSEQ;
97: else if (errno == E2BIG)
98: /* Output buffer too small. */
99: plocals->l_errno = E2BIG;
100: else
101: abort();
102: } else {
103: /* Successful conversion. */
104: if (bufleft > 0)
105: abort();
106: }
107: }
108: }
109:
110: #else
111:
112: struct wc_to_mb_fallback_locals {
113: char* l_outbuf;
114: size_t l_outbytesleft;
115: int l_errno;
116: };
117:
118: /* A callback that writes a string given in the target encoding. */
119: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
120: void* callback_arg)
121: {
122: struct wc_to_mb_fallback_locals * plocals =
123: (struct wc_to_mb_fallback_locals *) callback_arg;
124: /* Do nothing if already encountered an error in a previous call. */
125: if (plocals->l_errno == 0) {
126: /* Attempt to copy the passed buffer to the output buffer. */
127: if (plocals->l_outbytesleft < buflen)
128: plocals->l_errno = E2BIG;
129: else {
130: memcpy(plocals->l_outbuf, buf, buflen);
131: plocals->l_outbuf += buflen;
132: plocals->l_outbytesleft -= buflen;
133: }
134: }
135: }
136:
137: #endif
138:
139: #endif /* !LIBICONV_PLUG */
140:
141: static size_t wchar_from_loop_convert (iconv_t icd,
142: const char* * inbuf, size_t *inbytesleft,
143: char* * outbuf, size_t *outbytesleft)
144: {
145: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
146: size_t result = 0;
147: while (*inbytesleft >= sizeof(wchar_t)) {
148: const wchar_t * inptr = (const wchar_t *) *inbuf;
149: size_t inleft = *inbytesleft;
150: char buf[BUF_SIZE];
151: mbstate_t state = wcd->state;
152: size_t bufcount = 0;
153: while (inleft >= sizeof(wchar_t)) {
154: /* Convert one wchar_t to multibyte representation. */
155: size_t count = wcrtomb(buf+bufcount,*inptr,&state);
156: if (count == (size_t)(-1)) {
157: /* Invalid input. */
158: if (wcd->parent.discard_ilseq) {
159: count = 0;
160: }
161: #ifndef LIBICONV_PLUG
162: else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
163: /* Drop the contents of buf[] accumulated so far, and instead
164: pass all queued wide characters to the fallback handler. */
165: struct wc_to_mb_fallback_locals locals;
166: const wchar_t * fallback_inptr;
167: #if 0
168: locals.l_wcd = wcd;
169: #endif
170: locals.l_outbuf = *outbuf;
171: locals.l_outbytesleft = *outbytesleft;
172: locals.l_errno = 0;
173: for (fallback_inptr = (const wchar_t *) *inbuf;
174: fallback_inptr <= inptr;
175: fallback_inptr++)
176: wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
177: wc_to_mb_write_replacement,
178: &locals,
179: wcd->parent.fallbacks.data);
180: if (locals.l_errno != 0) {
181: errno = locals.l_errno;
182: return -1;
183: }
184: wcd->state = state;
185: *inbuf = (const char *) (inptr + 1);
186: *inbytesleft = inleft - sizeof(wchar_t);
187: *outbuf = locals.l_outbuf;
188: *outbytesleft = locals.l_outbytesleft;
189: result += 1;
190: break;
191: }
192: #endif
193: else {
194: errno = EILSEQ;
195: return -1;
196: }
197: }
198: inptr++;
199: inleft -= sizeof(wchar_t);
200: bufcount += count;
201: if (count == 0) {
202: /* Continue, append next wchar_t. */
203: } else {
204: /* Attempt to convert the accumulated multibyte representations
205: to the target encoding. */
206: const char* bufptr = buf;
207: size_t bufleft = bufcount;
208: char* outptr = *outbuf;
209: size_t outleft = *outbytesleft;
210: size_t res = unicode_loop_convert(&wcd->parent,
211: &bufptr,&bufleft,
212: &outptr,&outleft);
213: if (res == (size_t)(-1)) {
214: if (errno == EILSEQ)
215: /* Invalid input. */
216: return -1;
217: else if (errno == E2BIG)
218: /* Output buffer too small. */
219: return -1;
220: else if (errno == EINVAL) {
221: /* Continue, append next wchar_t, but avoid buffer overrun. */
222: if (bufcount + MB_CUR_MAX > BUF_SIZE)
223: abort();
224: } else
225: abort();
226: } else {
227: /* Successful conversion. */
228: wcd->state = state;
229: *inbuf = (const char *) inptr;
230: *inbytesleft = inleft;
231: *outbuf = outptr;
232: *outbytesleft = outleft;
233: result += res;
234: break;
235: }
236: }
237: }
238: }
239: return result;
240: }
241:
242: static size_t wchar_from_loop_reset (iconv_t icd,
243: char* * outbuf, size_t *outbytesleft)
244: {
245: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
246: if (outbuf == NULL || *outbuf == NULL) {
247: /* Reset the states. */
248: memset(&wcd->state,'\0',sizeof(mbstate_t));
249: return unicode_loop_reset(&wcd->parent,NULL,NULL);
250: } else {
251: if (!mbsinit(&wcd->state)) {
252: mbstate_t state = wcd->state;
253: char buf[BUF_SIZE];
254: size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
255: if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
256: abort();
257: else {
258: const char* bufptr = buf;
259: size_t bufleft = bufcount-1;
260: char* outptr = *outbuf;
261: size_t outleft = *outbytesleft;
262: size_t res = unicode_loop_convert(&wcd->parent,
263: &bufptr,&bufleft,
264: &outptr,&outleft);
265: if (res == (size_t)(-1)) {
266: if (errno == E2BIG)
267: return -1;
268: else
269: abort();
270: } else {
271: res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
272: if (res == (size_t)(-1))
273: return res;
274: else {
275: /* Successful. */
276: wcd->state = state;
277: *outbuf = outptr;
278: *outbytesleft = outleft;
279: return 0;
280: }
281: }
282: }
283: } else
284: return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
285: }
286: }
287:
288: #endif
289:
290:
291: #if HAVE_MBRTOWC
292:
293: /* From anything else to wchar_t. */
294:
295: #ifndef LIBICONV_PLUG
296:
297: struct mb_to_wc_fallback_locals {
298: char* l_outbuf;
299: size_t l_outbytesleft;
300: int l_errno;
301: };
302:
303: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
304: void* callback_arg)
305: {
306: struct mb_to_wc_fallback_locals * plocals =
307: (struct mb_to_wc_fallback_locals *) callback_arg;
308: /* Do nothing if already encountered an error in a previous call. */
309: if (plocals->l_errno == 0) {
310: /* Attempt to copy the passed buffer to the output buffer. */
311: if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
312: plocals->l_errno = E2BIG;
313: else {
314: for (; buflen > 0; buf++, buflen--) {
315: *(wchar_t*) plocals->l_outbuf = *buf;
316: plocals->l_outbuf += sizeof(wchar_t);
317: plocals->l_outbytesleft -= sizeof(wchar_t);
318: }
319: }
320: }
321: }
322:
323: #endif /* !LIBICONV_PLUG */
324:
325: static size_t wchar_to_loop_convert (iconv_t icd,
326: const char* * inbuf, size_t *inbytesleft,
327: char* * outbuf, size_t *outbytesleft)
328: {
329: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
330: size_t result = 0;
331: while (*inbytesleft > 0) {
332: size_t incount;
1.1.1.2 ! misho 333: for (incount = 1; ; ) {
! 334: /* Here incount <= *inbytesleft. */
1.1 misho 335: char buf[BUF_SIZE];
336: const char* inptr = *inbuf;
337: size_t inleft = incount;
338: char* bufptr = buf;
339: size_t bufleft = BUF_SIZE;
340: size_t res = unicode_loop_convert(&wcd->parent,
341: &inptr,&inleft,
342: &bufptr,&bufleft);
343: if (res == (size_t)(-1)) {
344: if (errno == EILSEQ)
345: /* Invalid input. */
346: return -1;
347: else if (errno == EINVAL) {
348: /* Incomplete input. Next try with one more input byte. */
349: } else
350: /* E2BIG shouldn't occur. */
351: abort();
352: } else {
353: /* Successful conversion. */
354: size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
355: mbstate_t state = wcd->state;
356: wchar_t wc;
357: res = mbrtowc(&wc,buf,bufcount,&state);
358: if (res == (size_t)(-2)) {
359: /* Next try with one more input byte. */
360: } else {
361: if (res == (size_t)(-1)) {
362: /* Invalid input. */
363: if (wcd->parent.discard_ilseq) {
364: }
365: #ifndef LIBICONV_PLUG
366: else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
367: /* Drop the contents of buf[] accumulated so far, and instead
368: pass all queued chars to the fallback handler. */
369: struct mb_to_wc_fallback_locals locals;
370: locals.l_outbuf = *outbuf;
371: locals.l_outbytesleft = *outbytesleft;
372: locals.l_errno = 0;
373: wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
374: mb_to_wc_write_replacement,
375: &locals,
376: wcd->parent.fallbacks.data);
377: if (locals.l_errno != 0) {
378: errno = locals.l_errno;
379: return -1;
380: }
381: /* Restoring the state is not needed because it is the initial
382: state anyway: For all known locale encodings, the multibyte
383: to wchar_t conversion doesn't have shift state, and we have
384: excluded partial accumulated characters. */
385: /* wcd->state = state; */
386: *inbuf += incount;
387: *inbytesleft -= incount;
388: *outbuf = locals.l_outbuf;
389: *outbytesleft = locals.l_outbytesleft;
390: result += 1;
391: break;
392: }
393: #endif
394: else
395: return -1;
396: } else {
397: if (*outbytesleft < sizeof(wchar_t)) {
398: errno = E2BIG;
399: return -1;
400: }
401: *(wchar_t*) *outbuf = wc;
402: /* Restoring the state is not needed because it is the initial
403: state anyway: For all known locale encodings, the multibyte
404: to wchar_t conversion doesn't have shift state, and we have
405: excluded partial accumulated characters. */
406: /* wcd->state = state; */
407: *outbuf += sizeof(wchar_t);
408: *outbytesleft -= sizeof(wchar_t);
409: }
410: *inbuf += incount;
411: *inbytesleft -= incount;
412: result += res;
413: break;
414: }
415: }
1.1.1.2 ! misho 416: incount++;
! 417: if (incount > *inbytesleft) {
! 418: /* Incomplete input. */
! 419: errno = EINVAL;
! 420: return -1;
! 421: }
1.1 misho 422: }
423: }
424: return result;
425: }
426:
427: static size_t wchar_to_loop_reset (iconv_t icd,
428: char* * outbuf, size_t *outbytesleft)
429: {
430: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
431: size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
432: if (res == (size_t)(-1))
433: return res;
434: memset(&wcd->state,0,sizeof(mbstate_t));
435: return 0;
436: }
437:
438: #endif
439:
440:
441: /* From wchar_t to wchar_t. */
442:
443: static size_t wchar_id_loop_convert (iconv_t icd,
444: const char* * inbuf, size_t *inbytesleft,
445: char* * outbuf, size_t *outbytesleft)
446: {
447: struct conv_struct * cd = (struct conv_struct *) icd;
448: const wchar_t* inptr = (const wchar_t*) *inbuf;
449: size_t inleft = *inbytesleft / sizeof(wchar_t);
450: wchar_t* outptr = (wchar_t*) *outbuf;
451: size_t outleft = *outbytesleft / sizeof(wchar_t);
452: size_t count = (inleft <= outleft ? inleft : outleft);
453: if (count > 0) {
454: *inbytesleft -= count * sizeof(wchar_t);
455: *outbytesleft -= count * sizeof(wchar_t);
456: do {
457: wchar_t wc = *inptr++;
458: *outptr++ = wc;
459: #ifndef LIBICONV_PLUG
460: if (cd->hooks.wc_hook)
461: (*cd->hooks.wc_hook)(wc, cd->hooks.data);
462: #endif
463: } while (--count > 0);
464: *inbuf = (const char*) inptr;
465: *outbuf = (char*) outptr;
466: }
467: return 0;
468: }
469:
470: static size_t wchar_id_loop_reset (iconv_t icd,
471: char* * outbuf, size_t *outbytesleft)
472: {
473: return 0;
474: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>