Annotation of embedaddon/libiconv/lib/iconv.c, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: #include <iconv.h>
22:
23: #include <stdlib.h>
24: #include <string.h>
25: #include "config.h"
26: #include "localcharset.h"
27:
1.1.1.2 ! misho 28: #ifdef __CYGWIN__
! 29: #include <cygwin/version.h>
! 30: #endif
! 31:
1.1 misho 32: #if ENABLE_EXTRA
33: /*
34: * Consider all system dependent encodings, for any system,
35: * and the extra encodings.
36: */
37: #define USE_AIX
38: #define USE_OSF1
39: #define USE_DOS
40: #define USE_EXTRA
41: #else
42: /*
43: * Consider those system dependent encodings that are needed for the
44: * current system.
45: */
46: #ifdef _AIX
47: #define USE_AIX
48: #endif
49: #if defined(__osf__) || defined(VMS)
50: #define USE_OSF1
51: #endif
52: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
53: #define USE_DOS
54: #endif
55: #endif
56:
57: /*
58: * Data type for general conversion loop.
59: */
60: struct loop_funcs {
61: size_t (*loop_convert) (iconv_t icd,
62: const char* * inbuf, size_t *inbytesleft,
63: char* * outbuf, size_t *outbytesleft);
64: size_t (*loop_reset) (iconv_t icd,
65: char* * outbuf, size_t *outbytesleft);
66: };
67:
68: /*
69: * Converters.
70: */
71: #include "converters.h"
72:
73: /*
74: * Transliteration tables.
75: */
76: #include "cjk_variants.h"
77: #include "translit.h"
78:
79: /*
80: * Table of all supported encodings.
81: */
82: struct encoding {
83: struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
84: struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
85: int oflags; /* flags for unicode -> multibyte conversion */
86: };
87: #define DEFALIAS(xxx_alias,xxx) /* nothing */
88: enum {
89: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
90: ei_##xxx ,
91: #include "encodings.def"
92: #ifdef USE_AIX
93: # include "encodings_aix.def"
94: #endif
95: #ifdef USE_OSF1
96: # include "encodings_osf1.def"
97: #endif
98: #ifdef USE_DOS
99: # include "encodings_dos.def"
100: #endif
101: #ifdef USE_EXTRA
102: # include "encodings_extra.def"
103: #endif
104: #include "encodings_local.def"
105: #undef DEFENCODING
106: ei_for_broken_compilers_that_dont_like_trailing_commas
107: };
108: #include "flags.h"
109: static struct encoding const all_encodings[] = {
110: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
111: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
112: #include "encodings.def"
113: #ifdef USE_AIX
114: # include "encodings_aix.def"
115: #endif
116: #ifdef USE_OSF1
117: # include "encodings_osf1.def"
118: #endif
119: #ifdef USE_DOS
120: # include "encodings_dos.def"
121: #endif
122: #ifdef USE_EXTRA
123: # include "encodings_extra.def"
124: #endif
125: #undef DEFENCODING
126: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
127: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
128: #include "encodings_local.def"
129: #undef DEFENCODING
130: };
131: #undef DEFALIAS
132:
133: /*
134: * Conversion loops.
135: */
136: #include "loops.h"
137:
138: /*
139: * Alias lookup function.
140: * Defines
141: * struct alias { int name; unsigned int encoding_index; };
142: * const struct alias * aliases_lookup (const char *str, unsigned int len);
143: * #define MAX_WORD_LENGTH ...
144: */
145: #if defined _AIX
146: # include "aliases_sysaix.h"
147: #elif defined hpux || defined __hpux
148: # include "aliases_syshpux.h"
149: #elif defined __osf__
150: # include "aliases_sysosf1.h"
151: #elif defined __sun
152: # include "aliases_syssolaris.h"
153: #else
154: # include "aliases.h"
155: #endif
156:
157: /*
158: * System dependent alias lookup function.
159: * Defines
160: * const struct alias * aliases2_lookup (const char *str);
161: */
162: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
163: struct stringpool2_t {
164: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
165: #include "aliases2.h"
166: #undef S
167: };
168: static const struct stringpool2_t stringpool2_contents = {
169: #define S(tag,name,encoding_index) name,
170: #include "aliases2.h"
171: #undef S
172: };
173: #define stringpool2 ((const char *) &stringpool2_contents)
174: static const struct alias sysdep_aliases[] = {
175: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
176: #include "aliases2.h"
177: #undef S
178: };
179: #ifdef __GNUC__
180: __inline
181: #endif
182: const struct alias *
183: aliases2_lookup (register const char *str)
184: {
185: const struct alias * ptr;
186: unsigned int count;
187: for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
188: if (!strcmp(str, stringpool2 + ptr->name))
189: return ptr;
190: return NULL;
191: }
192: #else
193: #define aliases2_lookup(str) NULL
194: #define stringpool2 NULL
195: #endif
196:
197: #if 0
198: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
199: and the first string can be assumed to be in uppercase. */
200: static int strequal (const char* str1, const char* str2)
201: {
202: unsigned char c1;
203: unsigned char c2;
204: for (;;) {
205: c1 = * (unsigned char *) str1++;
206: c2 = * (unsigned char *) str2++;
207: if (c1 == 0)
208: break;
209: if (c2 >= 'a' && c2 <= 'z')
210: c2 -= 'a'-'A';
211: if (c1 != c2)
212: break;
213: }
214: return (c1 == c2);
215: }
216: #endif
217:
218: iconv_t iconv_open (const char* tocode, const char* fromcode)
219: {
220: struct conv_struct * cd;
221: unsigned int from_index;
222: int from_wchar;
223: unsigned int to_index;
224: int to_wchar;
225: int transliterate;
226: int discard_ilseq;
227:
228: #include "iconv_open1.h"
229:
230: cd = (struct conv_struct *) malloc(from_wchar != to_wchar
231: ? sizeof(struct wchar_conv_struct)
232: : sizeof(struct conv_struct));
233: if (cd == NULL) {
234: errno = ENOMEM;
235: return (iconv_t)(-1);
236: }
237:
238: #include "iconv_open2.h"
239:
240: return (iconv_t)cd;
241: invalid:
242: errno = EINVAL;
243: return (iconv_t)(-1);
244: }
245:
246: size_t iconv (iconv_t icd,
247: ICONV_CONST char* * inbuf, size_t *inbytesleft,
248: char* * outbuf, size_t *outbytesleft)
249: {
250: conv_t cd = (conv_t) icd;
251: if (inbuf == NULL || *inbuf == NULL)
252: return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
253: else
254: return cd->lfuncs.loop_convert(icd,
255: (const char* *)inbuf,inbytesleft,
256: outbuf,outbytesleft);
257: }
258:
259: int iconv_close (iconv_t icd)
260: {
261: conv_t cd = (conv_t) icd;
262: free(cd);
263: return 0;
264: }
265:
266: #ifndef LIBICONV_PLUG
267:
268: /*
269: * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
270: * fit in an iconv_allocation_t.
271: * If this verification fails, iconv_allocation_t must be made larger and
272: * the major version in LIBICONV_VERSION_INFO must be bumped.
273: * Currently 'struct conv_struct' has 21 integer/pointer fields, and
274: * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
275: */
276: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
277: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
278:
279: int iconv_open_into (const char* tocode, const char* fromcode,
280: iconv_allocation_t* resultp)
281: {
282: struct conv_struct * cd;
283: unsigned int from_index;
284: int from_wchar;
285: unsigned int to_index;
286: int to_wchar;
287: int transliterate;
288: int discard_ilseq;
289:
290: #include "iconv_open1.h"
291:
292: cd = (struct conv_struct *) resultp;
293:
294: #include "iconv_open2.h"
295:
296: return 0;
297: invalid:
298: errno = EINVAL;
299: return -1;
300: }
301:
302: int iconvctl (iconv_t icd, int request, void* argument)
303: {
304: conv_t cd = (conv_t) icd;
305: switch (request) {
306: case ICONV_TRIVIALP:
307: *(int *)argument =
308: ((cd->lfuncs.loop_convert == unicode_loop_convert
309: && cd->iindex == cd->oindex)
310: || cd->lfuncs.loop_convert == wchar_id_loop_convert
311: ? 1 : 0);
312: return 0;
313: case ICONV_GET_TRANSLITERATE:
314: *(int *)argument = cd->transliterate;
315: return 0;
316: case ICONV_SET_TRANSLITERATE:
317: cd->transliterate = (*(const int *)argument ? 1 : 0);
318: return 0;
319: case ICONV_GET_DISCARD_ILSEQ:
320: *(int *)argument = cd->discard_ilseq;
321: return 0;
322: case ICONV_SET_DISCARD_ILSEQ:
323: cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
324: return 0;
325: case ICONV_SET_HOOKS:
326: if (argument != NULL) {
327: cd->hooks = *(const struct iconv_hooks *)argument;
328: } else {
329: cd->hooks.uc_hook = NULL;
330: cd->hooks.wc_hook = NULL;
331: cd->hooks.data = NULL;
332: }
333: return 0;
334: case ICONV_SET_FALLBACKS:
335: if (argument != NULL) {
336: cd->fallbacks = *(const struct iconv_fallbacks *)argument;
337: } else {
338: cd->fallbacks.mb_to_uc_fallback = NULL;
339: cd->fallbacks.uc_to_mb_fallback = NULL;
340: cd->fallbacks.mb_to_wc_fallback = NULL;
341: cd->fallbacks.wc_to_mb_fallback = NULL;
342: cd->fallbacks.data = NULL;
343: }
344: return 0;
345: default:
346: errno = EINVAL;
347: return -1;
348: }
349: }
350:
351: /* An alias after its name has been converted from 'int' to 'const char*'. */
352: struct nalias { const char* name; unsigned int encoding_index; };
353:
354: static int compare_by_index (const void * arg1, const void * arg2)
355: {
356: const struct nalias * alias1 = (const struct nalias *) arg1;
357: const struct nalias * alias2 = (const struct nalias *) arg2;
358: return (int)alias1->encoding_index - (int)alias2->encoding_index;
359: }
360:
361: static int compare_by_name (const void * arg1, const void * arg2)
362: {
363: const char * name1 = *(const char **)arg1;
364: const char * name2 = *(const char **)arg2;
365: /* Compare alphabetically, but put "CS" names at the end. */
366: int sign = strcmp(name1,name2);
367: if (sign != 0) {
368: sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
369: * 4 + (sign >= 0 ? 1 : -1);
370: }
371: return sign;
372: }
373:
374: void iconvlist (int (*do_one) (unsigned int namescount,
375: const char * const * names,
376: void* data),
377: void* data)
378: {
379: #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
380: #ifndef aliases2_lookup
381: #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
382: #else
383: #define aliascount2 0
384: #endif
385: #define aliascount (aliascount1+aliascount2)
386: struct nalias aliasbuf[aliascount];
387: const char * namesbuf[aliascount];
388: size_t num_aliases;
389: {
390: /* Put all existing aliases into a buffer. */
391: size_t i;
392: size_t j;
393: j = 0;
394: for (i = 0; i < aliascount1; i++) {
395: const struct alias * p = &aliases[i];
396: if (p->name >= 0
397: && p->encoding_index != ei_local_char
398: && p->encoding_index != ei_local_wchar_t) {
399: aliasbuf[j].name = stringpool + p->name;
400: aliasbuf[j].encoding_index = p->encoding_index;
401: j++;
402: }
403: }
404: #ifndef aliases2_lookup
405: for (i = 0; i < aliascount2; i++) {
406: aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
407: aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
408: j++;
409: }
410: #endif
411: num_aliases = j;
412: }
413: /* Sort by encoding_index. */
414: if (num_aliases > 1)
415: qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
416: {
417: /* Process all aliases with the same encoding_index together. */
418: size_t j;
419: j = 0;
420: while (j < num_aliases) {
421: unsigned int ei = aliasbuf[j].encoding_index;
422: size_t i = 0;
423: do
424: namesbuf[i++] = aliasbuf[j++].name;
425: while (j < num_aliases && aliasbuf[j].encoding_index == ei);
426: if (i > 1)
427: qsort(namesbuf, i, sizeof(const char *), compare_by_name);
428: /* Call the callback. */
429: if (do_one(i,namesbuf,data))
430: break;
431: }
432: }
433: #undef aliascount
434: #undef aliascount2
435: #undef aliascount1
436: }
437:
438: /*
439: * Table of canonical names of encodings.
440: * Instead of strings, it contains offsets into stringpool and stringpool2.
441: */
442: static const unsigned short all_canonical[] = {
443: #if defined _AIX
444: # include "canonical_sysaix.h"
445: #elif defined hpux || defined __hpux
446: # include "canonical_syshpux.h"
447: #elif defined __osf__
448: # include "canonical_sysosf1.h"
449: #elif defined __sun
450: # include "canonical_syssolaris.h"
451: #else
452: # include "canonical.h"
453: #endif
454: #ifdef USE_AIX
455: # if defined _AIX
456: # include "canonical_aix_sysaix.h"
457: # else
458: # include "canonical_aix.h"
459: # endif
460: #endif
461: #ifdef USE_OSF1
462: # if defined __osf__
463: # include "canonical_osf1_sysosf1.h"
464: # else
465: # include "canonical_osf1.h"
466: # endif
467: #endif
468: #ifdef USE_DOS
469: # include "canonical_dos.h"
470: #endif
471: #ifdef USE_EXTRA
472: # include "canonical_extra.h"
473: #endif
474: #if defined _AIX
475: # include "canonical_local_sysaix.h"
476: #elif defined hpux || defined __hpux
477: # include "canonical_local_syshpux.h"
478: #elif defined __osf__
479: # include "canonical_local_sysosf1.h"
480: #elif defined __sun
481: # include "canonical_local_syssolaris.h"
482: #else
483: # include "canonical_local.h"
484: #endif
485: };
486:
487: const char * iconv_canonicalize (const char * name)
488: {
489: const char* code;
490: char buf[MAX_WORD_LENGTH+10+1];
491: const char* cp;
492: char* bp;
493: const struct alias * ap;
494: unsigned int count;
495: unsigned int index;
496: const char* pool;
497:
498: /* Before calling aliases_lookup, convert the input string to upper case,
499: * and check whether it's entirely ASCII (we call gperf with option "-7"
500: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
501: * or if it's too long, it is not a valid encoding name.
502: */
503: for (code = name;;) {
504: /* Search code in the table. */
505: for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
506: unsigned char c = * (unsigned char *) cp;
507: if (c >= 0x80)
508: goto invalid;
509: if (c >= 'a' && c <= 'z')
510: c -= 'a'-'A';
511: *bp = c;
512: if (c == '\0')
513: break;
514: if (--count == 0)
515: goto invalid;
516: }
517: for (;;) {
518: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
519: bp -= 10;
520: *bp = '\0';
521: continue;
522: }
523: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
524: bp -= 8;
525: *bp = '\0';
526: continue;
527: }
528: break;
529: }
530: if (buf[0] == '\0') {
531: code = locale_charset();
532: /* Avoid an endless loop that could occur when using an older version
533: of localcharset.c. */
534: if (code[0] == '\0')
535: goto invalid;
536: continue;
537: }
538: pool = stringpool;
539: ap = aliases_lookup(buf,bp-buf);
540: if (ap == NULL) {
541: pool = stringpool2;
542: ap = aliases2_lookup(buf);
543: if (ap == NULL)
544: goto invalid;
545: }
546: if (ap->encoding_index == ei_local_char) {
547: code = locale_charset();
548: /* Avoid an endless loop that could occur when using an older version
549: of localcharset.c. */
550: if (code[0] == '\0')
551: goto invalid;
552: continue;
553: }
554: if (ap->encoding_index == ei_local_wchar_t) {
555: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 ! misho 556: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
! 557: we know that it is UTF-16. */
! 558: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
! 559: if (sizeof(wchar_t) == 4) {
! 560: index = ei_ucs4internal;
! 561: break;
! 562: }
! 563: if (sizeof(wchar_t) == 2) {
! 564: # if WORDS_LITTLEENDIAN
! 565: index = ei_utf16le;
! 566: # else
! 567: index = ei_utf16be;
! 568: # endif
! 569: break;
! 570: }
! 571: #elif __STDC_ISO_10646__
1.1 misho 572: if (sizeof(wchar_t) == 4) {
573: index = ei_ucs4internal;
574: break;
575: }
576: if (sizeof(wchar_t) == 2) {
577: index = ei_ucs2internal;
578: break;
579: }
580: if (sizeof(wchar_t) == 1) {
581: index = ei_iso8859_1;
582: break;
583: }
584: #endif
585: }
586: index = ap->encoding_index;
587: break;
588: }
589: return all_canonical[index] + pool;
590: invalid:
591: return name;
592: }
593:
594: int _libiconv_version = _LIBICONV_VERSION;
595:
596: #if defined __FreeBSD__ && !defined __gnu_freebsd__
597: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
598: It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
599: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
600: #define _strong_alias(name, aliasname) \
601: extern __typeof (name) aliasname __attribute__ ((alias (#name)));
602: #undef iconv_open
603: #undef iconv
604: #undef iconv_close
605: strong_alias (libiconv_open, iconv_open)
606: strong_alias (libiconv, iconv)
607: strong_alias (libiconv_close, iconv_close)
608: #endif
609:
610: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>