Annotation of embedaddon/libiconv/lib/loop_wchar.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 2000-2002, 2005-2006, 2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /* This file defines three conversion loops:
22: - from wchar_t to anything else,
23: - from anything else to wchar_t,
24: - from wchar_t to wchar_t.
25: */
26:
27: #if HAVE_WCRTOMB || HAVE_MBRTOWC
28: # include <wchar.h>
29: # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
30: /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
31: extern size_t mbrtowc ();
32: # ifdef mbstate_t
33: # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
34: # define mbsinit(ps) 1
35: # endif
36: # ifndef mbsinit
37: # if !HAVE_MBSINIT
38: # define mbsinit(ps) 1
39: # endif
40: # endif
41: #endif
42:
43: /*
44: * The first two conversion loops have an extended conversion descriptor.
45: */
46: struct wchar_conv_struct {
47: struct conv_struct parent;
48: #if HAVE_WCRTOMB || HAVE_MBRTOWC
49: mbstate_t state;
50: #endif
51: };
52:
53:
54: #if HAVE_WCRTOMB
55:
56: /* From wchar_t to anything else. */
57:
58: #ifndef LIBICONV_PLUG
59:
60: #if 0
61:
62: struct wc_to_mb_fallback_locals {
63: struct wchar_conv_struct * l_wcd;
64: char* l_outbuf;
65: size_t l_outbytesleft;
66: int l_errno;
67: };
68:
69: /* A callback that writes a string given in the locale encoding. */
70: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
71: void* callback_arg)
72: {
73: struct wc_to_mb_fallback_locals * plocals =
74: (struct wc_to_mb_fallback_locals *) callback_arg;
75: /* Do nothing if already encountered an error in a previous call. */
76: if (plocals->l_errno == 0) {
77: /* Attempt to convert the passed buffer to the target encoding.
78: Here we don't support characters split across multiple calls. */
79: const char* bufptr = buf;
80: size_t bufleft = buflen;
81: size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
82: &bufptr,&bufleft,
83: &plocals->l_outbuf,&plocals->l_outbytesleft);
84: if (res == (size_t)(-1)) {
85: if (errno == EILSEQ || errno == EINVAL)
86: /* Invalid buf contents. */
87: plocals->l_errno = EILSEQ;
88: else if (errno == E2BIG)
89: /* Output buffer too small. */
90: plocals->l_errno = E2BIG;
91: else
92: abort();
93: } else {
94: /* Successful conversion. */
95: if (bufleft > 0)
96: abort();
97: }
98: }
99: }
100:
101: #else
102:
103: struct wc_to_mb_fallback_locals {
104: char* l_outbuf;
105: size_t l_outbytesleft;
106: int l_errno;
107: };
108:
109: /* A callback that writes a string given in the target encoding. */
110: static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
111: void* callback_arg)
112: {
113: struct wc_to_mb_fallback_locals * plocals =
114: (struct wc_to_mb_fallback_locals *) callback_arg;
115: /* Do nothing if already encountered an error in a previous call. */
116: if (plocals->l_errno == 0) {
117: /* Attempt to copy the passed buffer to the output buffer. */
118: if (plocals->l_outbytesleft < buflen)
119: plocals->l_errno = E2BIG;
120: else {
121: memcpy(plocals->l_outbuf, buf, buflen);
122: plocals->l_outbuf += buflen;
123: plocals->l_outbytesleft -= buflen;
124: }
125: }
126: }
127:
128: #endif
129:
130: #endif /* !LIBICONV_PLUG */
131:
132: static size_t wchar_from_loop_convert (iconv_t icd,
133: const char* * inbuf, size_t *inbytesleft,
134: char* * outbuf, size_t *outbytesleft)
135: {
136: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
137: size_t result = 0;
138: while (*inbytesleft >= sizeof(wchar_t)) {
139: const wchar_t * inptr = (const wchar_t *) *inbuf;
140: size_t inleft = *inbytesleft;
141: char buf[BUF_SIZE];
142: mbstate_t state = wcd->state;
143: size_t bufcount = 0;
144: while (inleft >= sizeof(wchar_t)) {
145: /* Convert one wchar_t to multibyte representation. */
146: size_t count = wcrtomb(buf+bufcount,*inptr,&state);
147: if (count == (size_t)(-1)) {
148: /* Invalid input. */
149: if (wcd->parent.discard_ilseq) {
150: count = 0;
151: }
152: #ifndef LIBICONV_PLUG
153: else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
154: /* Drop the contents of buf[] accumulated so far, and instead
155: pass all queued wide characters to the fallback handler. */
156: struct wc_to_mb_fallback_locals locals;
157: const wchar_t * fallback_inptr;
158: #if 0
159: locals.l_wcd = wcd;
160: #endif
161: locals.l_outbuf = *outbuf;
162: locals.l_outbytesleft = *outbytesleft;
163: locals.l_errno = 0;
164: for (fallback_inptr = (const wchar_t *) *inbuf;
165: fallback_inptr <= inptr;
166: fallback_inptr++)
167: wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
168: wc_to_mb_write_replacement,
169: &locals,
170: wcd->parent.fallbacks.data);
171: if (locals.l_errno != 0) {
172: errno = locals.l_errno;
173: return -1;
174: }
175: wcd->state = state;
176: *inbuf = (const char *) (inptr + 1);
177: *inbytesleft = inleft - sizeof(wchar_t);
178: *outbuf = locals.l_outbuf;
179: *outbytesleft = locals.l_outbytesleft;
180: result += 1;
181: break;
182: }
183: #endif
184: else {
185: errno = EILSEQ;
186: return -1;
187: }
188: }
189: inptr++;
190: inleft -= sizeof(wchar_t);
191: bufcount += count;
192: if (count == 0) {
193: /* Continue, append next wchar_t. */
194: } else {
195: /* Attempt to convert the accumulated multibyte representations
196: to the target encoding. */
197: const char* bufptr = buf;
198: size_t bufleft = bufcount;
199: char* outptr = *outbuf;
200: size_t outleft = *outbytesleft;
201: size_t res = unicode_loop_convert(&wcd->parent,
202: &bufptr,&bufleft,
203: &outptr,&outleft);
204: if (res == (size_t)(-1)) {
205: if (errno == EILSEQ)
206: /* Invalid input. */
207: return -1;
208: else if (errno == E2BIG)
209: /* Output buffer too small. */
210: return -1;
211: else if (errno == EINVAL) {
212: /* Continue, append next wchar_t, but avoid buffer overrun. */
213: if (bufcount + MB_CUR_MAX > BUF_SIZE)
214: abort();
215: } else
216: abort();
217: } else {
218: /* Successful conversion. */
219: wcd->state = state;
220: *inbuf = (const char *) inptr;
221: *inbytesleft = inleft;
222: *outbuf = outptr;
223: *outbytesleft = outleft;
224: result += res;
225: break;
226: }
227: }
228: }
229: }
230: return result;
231: }
232:
233: static size_t wchar_from_loop_reset (iconv_t icd,
234: char* * outbuf, size_t *outbytesleft)
235: {
236: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
237: if (outbuf == NULL || *outbuf == NULL) {
238: /* Reset the states. */
239: memset(&wcd->state,'\0',sizeof(mbstate_t));
240: return unicode_loop_reset(&wcd->parent,NULL,NULL);
241: } else {
242: if (!mbsinit(&wcd->state)) {
243: mbstate_t state = wcd->state;
244: char buf[BUF_SIZE];
245: size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
246: if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
247: abort();
248: else {
249: const char* bufptr = buf;
250: size_t bufleft = bufcount-1;
251: char* outptr = *outbuf;
252: size_t outleft = *outbytesleft;
253: size_t res = unicode_loop_convert(&wcd->parent,
254: &bufptr,&bufleft,
255: &outptr,&outleft);
256: if (res == (size_t)(-1)) {
257: if (errno == E2BIG)
258: return -1;
259: else
260: abort();
261: } else {
262: res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
263: if (res == (size_t)(-1))
264: return res;
265: else {
266: /* Successful. */
267: wcd->state = state;
268: *outbuf = outptr;
269: *outbytesleft = outleft;
270: return 0;
271: }
272: }
273: }
274: } else
275: return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
276: }
277: }
278:
279: #endif
280:
281:
282: #if HAVE_MBRTOWC
283:
284: /* From anything else to wchar_t. */
285:
286: #ifndef LIBICONV_PLUG
287:
288: struct mb_to_wc_fallback_locals {
289: char* l_outbuf;
290: size_t l_outbytesleft;
291: int l_errno;
292: };
293:
294: static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
295: void* callback_arg)
296: {
297: struct mb_to_wc_fallback_locals * plocals =
298: (struct mb_to_wc_fallback_locals *) callback_arg;
299: /* Do nothing if already encountered an error in a previous call. */
300: if (plocals->l_errno == 0) {
301: /* Attempt to copy the passed buffer to the output buffer. */
302: if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
303: plocals->l_errno = E2BIG;
304: else {
305: for (; buflen > 0; buf++, buflen--) {
306: *(wchar_t*) plocals->l_outbuf = *buf;
307: plocals->l_outbuf += sizeof(wchar_t);
308: plocals->l_outbytesleft -= sizeof(wchar_t);
309: }
310: }
311: }
312: }
313:
314: #endif /* !LIBICONV_PLUG */
315:
316: static size_t wchar_to_loop_convert (iconv_t icd,
317: const char* * inbuf, size_t *inbytesleft,
318: char* * outbuf, size_t *outbytesleft)
319: {
320: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
321: size_t result = 0;
322: while (*inbytesleft > 0) {
323: size_t incount;
324: for (incount = 1; incount <= *inbytesleft; incount++) {
325: char buf[BUF_SIZE];
326: const char* inptr = *inbuf;
327: size_t inleft = incount;
328: char* bufptr = buf;
329: size_t bufleft = BUF_SIZE;
330: size_t res = unicode_loop_convert(&wcd->parent,
331: &inptr,&inleft,
332: &bufptr,&bufleft);
333: if (res == (size_t)(-1)) {
334: if (errno == EILSEQ)
335: /* Invalid input. */
336: return -1;
337: else if (errno == EINVAL) {
338: /* Incomplete input. Next try with one more input byte. */
339: } else
340: /* E2BIG shouldn't occur. */
341: abort();
342: } else {
343: /* Successful conversion. */
344: size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
345: mbstate_t state = wcd->state;
346: wchar_t wc;
347: res = mbrtowc(&wc,buf,bufcount,&state);
348: if (res == (size_t)(-2)) {
349: /* Next try with one more input byte. */
350: } else {
351: if (res == (size_t)(-1)) {
352: /* Invalid input. */
353: if (wcd->parent.discard_ilseq) {
354: }
355: #ifndef LIBICONV_PLUG
356: else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
357: /* Drop the contents of buf[] accumulated so far, and instead
358: pass all queued chars to the fallback handler. */
359: struct mb_to_wc_fallback_locals locals;
360: locals.l_outbuf = *outbuf;
361: locals.l_outbytesleft = *outbytesleft;
362: locals.l_errno = 0;
363: wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
364: mb_to_wc_write_replacement,
365: &locals,
366: wcd->parent.fallbacks.data);
367: if (locals.l_errno != 0) {
368: errno = locals.l_errno;
369: return -1;
370: }
371: /* Restoring the state is not needed because it is the initial
372: state anyway: For all known locale encodings, the multibyte
373: to wchar_t conversion doesn't have shift state, and we have
374: excluded partial accumulated characters. */
375: /* wcd->state = state; */
376: *inbuf += incount;
377: *inbytesleft -= incount;
378: *outbuf = locals.l_outbuf;
379: *outbytesleft = locals.l_outbytesleft;
380: result += 1;
381: break;
382: }
383: #endif
384: else
385: return -1;
386: } else {
387: if (*outbytesleft < sizeof(wchar_t)) {
388: errno = E2BIG;
389: return -1;
390: }
391: *(wchar_t*) *outbuf = wc;
392: /* Restoring the state is not needed because it is the initial
393: state anyway: For all known locale encodings, the multibyte
394: to wchar_t conversion doesn't have shift state, and we have
395: excluded partial accumulated characters. */
396: /* wcd->state = state; */
397: *outbuf += sizeof(wchar_t);
398: *outbytesleft -= sizeof(wchar_t);
399: }
400: *inbuf += incount;
401: *inbytesleft -= incount;
402: result += res;
403: break;
404: }
405: }
406: }
407: }
408: return result;
409: }
410:
411: static size_t wchar_to_loop_reset (iconv_t icd,
412: char* * outbuf, size_t *outbytesleft)
413: {
414: struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
415: size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
416: if (res == (size_t)(-1))
417: return res;
418: memset(&wcd->state,0,sizeof(mbstate_t));
419: return 0;
420: }
421:
422: #endif
423:
424:
425: /* From wchar_t to wchar_t. */
426:
427: static size_t wchar_id_loop_convert (iconv_t icd,
428: const char* * inbuf, size_t *inbytesleft,
429: char* * outbuf, size_t *outbytesleft)
430: {
431: struct conv_struct * cd = (struct conv_struct *) icd;
432: const wchar_t* inptr = (const wchar_t*) *inbuf;
433: size_t inleft = *inbytesleft / sizeof(wchar_t);
434: wchar_t* outptr = (wchar_t*) *outbuf;
435: size_t outleft = *outbytesleft / sizeof(wchar_t);
436: size_t count = (inleft <= outleft ? inleft : outleft);
437: if (count > 0) {
438: *inbytesleft -= count * sizeof(wchar_t);
439: *outbytesleft -= count * sizeof(wchar_t);
440: do {
441: wchar_t wc = *inptr++;
442: *outptr++ = wc;
443: #ifndef LIBICONV_PLUG
444: if (cd->hooks.wc_hook)
445: (*cd->hooks.wc_hook)(wc, cd->hooks.data);
446: #endif
447: } while (--count > 0);
448: *inbuf = (const char*) inptr;
449: *outbuf = (char*) outptr;
450: }
451: return 0;
452: }
453:
454: static size_t wchar_id_loop_reset (iconv_t icd,
455: char* * outbuf, size_t *outbytesleft)
456: {
457: return 0;
458: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>