Annotation of embedaddon/libiconv/lib/iconv.c, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 1999-2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: #include <iconv.h>
22:
23: #include <stdlib.h>
24: #include <string.h>
25: #include "config.h"
26: #include "localcharset.h"
27:
28: #if ENABLE_EXTRA
29: /*
30: * Consider all system dependent encodings, for any system,
31: * and the extra encodings.
32: */
33: #define USE_AIX
34: #define USE_OSF1
35: #define USE_DOS
36: #define USE_EXTRA
37: #else
38: /*
39: * Consider those system dependent encodings that are needed for the
40: * current system.
41: */
42: #ifdef _AIX
43: #define USE_AIX
44: #endif
45: #if defined(__osf__) || defined(VMS)
46: #define USE_OSF1
47: #endif
48: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
49: #define USE_DOS
50: #endif
51: #endif
52:
53: /*
54: * Data type for general conversion loop.
55: */
56: struct loop_funcs {
57: size_t (*loop_convert) (iconv_t icd,
58: const char* * inbuf, size_t *inbytesleft,
59: char* * outbuf, size_t *outbytesleft);
60: size_t (*loop_reset) (iconv_t icd,
61: char* * outbuf, size_t *outbytesleft);
62: };
63:
64: /*
65: * Converters.
66: */
67: #include "converters.h"
68:
69: /*
70: * Transliteration tables.
71: */
72: #include "cjk_variants.h"
73: #include "translit.h"
74:
75: /*
76: * Table of all supported encodings.
77: */
78: struct encoding {
79: struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
80: struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
81: int oflags; /* flags for unicode -> multibyte conversion */
82: };
83: #define DEFALIAS(xxx_alias,xxx) /* nothing */
84: enum {
85: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
86: ei_##xxx ,
87: #include "encodings.def"
88: #ifdef USE_AIX
89: # include "encodings_aix.def"
90: #endif
91: #ifdef USE_OSF1
92: # include "encodings_osf1.def"
93: #endif
94: #ifdef USE_DOS
95: # include "encodings_dos.def"
96: #endif
97: #ifdef USE_EXTRA
98: # include "encodings_extra.def"
99: #endif
100: #include "encodings_local.def"
101: #undef DEFENCODING
102: ei_for_broken_compilers_that_dont_like_trailing_commas
103: };
104: #include "flags.h"
105: static struct encoding const all_encodings[] = {
106: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
107: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
108: #include "encodings.def"
109: #ifdef USE_AIX
110: # include "encodings_aix.def"
111: #endif
112: #ifdef USE_OSF1
113: # include "encodings_osf1.def"
114: #endif
115: #ifdef USE_DOS
116: # include "encodings_dos.def"
117: #endif
118: #ifdef USE_EXTRA
119: # include "encodings_extra.def"
120: #endif
121: #undef DEFENCODING
122: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
123: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
124: #include "encodings_local.def"
125: #undef DEFENCODING
126: };
127: #undef DEFALIAS
128:
129: /*
130: * Conversion loops.
131: */
132: #include "loops.h"
133:
134: /*
135: * Alias lookup function.
136: * Defines
137: * struct alias { int name; unsigned int encoding_index; };
138: * const struct alias * aliases_lookup (const char *str, unsigned int len);
139: * #define MAX_WORD_LENGTH ...
140: */
141: #if defined _AIX
142: # include "aliases_sysaix.h"
143: #elif defined hpux || defined __hpux
144: # include "aliases_syshpux.h"
145: #elif defined __osf__
146: # include "aliases_sysosf1.h"
147: #elif defined __sun
148: # include "aliases_syssolaris.h"
149: #else
150: # include "aliases.h"
151: #endif
152:
153: /*
154: * System dependent alias lookup function.
155: * Defines
156: * const struct alias * aliases2_lookup (const char *str);
157: */
158: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
159: struct stringpool2_t {
160: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
161: #include "aliases2.h"
162: #undef S
163: };
164: static const struct stringpool2_t stringpool2_contents = {
165: #define S(tag,name,encoding_index) name,
166: #include "aliases2.h"
167: #undef S
168: };
169: #define stringpool2 ((const char *) &stringpool2_contents)
170: static const struct alias sysdep_aliases[] = {
171: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
172: #include "aliases2.h"
173: #undef S
174: };
175: #ifdef __GNUC__
176: __inline
177: #endif
178: const struct alias *
179: aliases2_lookup (register const char *str)
180: {
181: const struct alias * ptr;
182: unsigned int count;
183: for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
184: if (!strcmp(str, stringpool2 + ptr->name))
185: return ptr;
186: return NULL;
187: }
188: #else
189: #define aliases2_lookup(str) NULL
190: #define stringpool2 NULL
191: #endif
192:
193: #if 0
194: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
195: and the first string can be assumed to be in uppercase. */
196: static int strequal (const char* str1, const char* str2)
197: {
198: unsigned char c1;
199: unsigned char c2;
200: for (;;) {
201: c1 = * (unsigned char *) str1++;
202: c2 = * (unsigned char *) str2++;
203: if (c1 == 0)
204: break;
205: if (c2 >= 'a' && c2 <= 'z')
206: c2 -= 'a'-'A';
207: if (c1 != c2)
208: break;
209: }
210: return (c1 == c2);
211: }
212: #endif
213:
214: iconv_t iconv_open (const char* tocode, const char* fromcode)
215: {
216: struct conv_struct * cd;
217: unsigned int from_index;
218: int from_wchar;
219: unsigned int to_index;
220: int to_wchar;
221: int transliterate;
222: int discard_ilseq;
223:
224: #include "iconv_open1.h"
225:
226: cd = (struct conv_struct *) malloc(from_wchar != to_wchar
227: ? sizeof(struct wchar_conv_struct)
228: : sizeof(struct conv_struct));
229: if (cd == NULL) {
230: errno = ENOMEM;
231: return (iconv_t)(-1);
232: }
233:
234: #include "iconv_open2.h"
235:
236: return (iconv_t)cd;
237: invalid:
238: errno = EINVAL;
239: return (iconv_t)(-1);
240: }
241:
242: size_t iconv (iconv_t icd,
243: ICONV_CONST char* * inbuf, size_t *inbytesleft,
244: char* * outbuf, size_t *outbytesleft)
245: {
246: conv_t cd = (conv_t) icd;
247: if (inbuf == NULL || *inbuf == NULL)
248: return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
249: else
250: return cd->lfuncs.loop_convert(icd,
251: (const char* *)inbuf,inbytesleft,
252: outbuf,outbytesleft);
253: }
254:
255: int iconv_close (iconv_t icd)
256: {
257: conv_t cd = (conv_t) icd;
258: free(cd);
259: return 0;
260: }
261:
262: #ifndef LIBICONV_PLUG
263:
264: /*
265: * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
266: * fit in an iconv_allocation_t.
267: * If this verification fails, iconv_allocation_t must be made larger and
268: * the major version in LIBICONV_VERSION_INFO must be bumped.
269: * Currently 'struct conv_struct' has 21 integer/pointer fields, and
270: * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
271: */
272: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
273: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
274:
275: int iconv_open_into (const char* tocode, const char* fromcode,
276: iconv_allocation_t* resultp)
277: {
278: struct conv_struct * cd;
279: unsigned int from_index;
280: int from_wchar;
281: unsigned int to_index;
282: int to_wchar;
283: int transliterate;
284: int discard_ilseq;
285:
286: #include "iconv_open1.h"
287:
288: cd = (struct conv_struct *) resultp;
289:
290: #include "iconv_open2.h"
291:
292: return 0;
293: invalid:
294: errno = EINVAL;
295: return -1;
296: }
297:
298: int iconvctl (iconv_t icd, int request, void* argument)
299: {
300: conv_t cd = (conv_t) icd;
301: switch (request) {
302: case ICONV_TRIVIALP:
303: *(int *)argument =
304: ((cd->lfuncs.loop_convert == unicode_loop_convert
305: && cd->iindex == cd->oindex)
306: || cd->lfuncs.loop_convert == wchar_id_loop_convert
307: ? 1 : 0);
308: return 0;
309: case ICONV_GET_TRANSLITERATE:
310: *(int *)argument = cd->transliterate;
311: return 0;
312: case ICONV_SET_TRANSLITERATE:
313: cd->transliterate = (*(const int *)argument ? 1 : 0);
314: return 0;
315: case ICONV_GET_DISCARD_ILSEQ:
316: *(int *)argument = cd->discard_ilseq;
317: return 0;
318: case ICONV_SET_DISCARD_ILSEQ:
319: cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
320: return 0;
321: case ICONV_SET_HOOKS:
322: if (argument != NULL) {
323: cd->hooks = *(const struct iconv_hooks *)argument;
324: } else {
325: cd->hooks.uc_hook = NULL;
326: cd->hooks.wc_hook = NULL;
327: cd->hooks.data = NULL;
328: }
329: return 0;
330: case ICONV_SET_FALLBACKS:
331: if (argument != NULL) {
332: cd->fallbacks = *(const struct iconv_fallbacks *)argument;
333: } else {
334: cd->fallbacks.mb_to_uc_fallback = NULL;
335: cd->fallbacks.uc_to_mb_fallback = NULL;
336: cd->fallbacks.mb_to_wc_fallback = NULL;
337: cd->fallbacks.wc_to_mb_fallback = NULL;
338: cd->fallbacks.data = NULL;
339: }
340: return 0;
341: default:
342: errno = EINVAL;
343: return -1;
344: }
345: }
346:
347: /* An alias after its name has been converted from 'int' to 'const char*'. */
348: struct nalias { const char* name; unsigned int encoding_index; };
349:
350: static int compare_by_index (const void * arg1, const void * arg2)
351: {
352: const struct nalias * alias1 = (const struct nalias *) arg1;
353: const struct nalias * alias2 = (const struct nalias *) arg2;
354: return (int)alias1->encoding_index - (int)alias2->encoding_index;
355: }
356:
357: static int compare_by_name (const void * arg1, const void * arg2)
358: {
359: const char * name1 = *(const char **)arg1;
360: const char * name2 = *(const char **)arg2;
361: /* Compare alphabetically, but put "CS" names at the end. */
362: int sign = strcmp(name1,name2);
363: if (sign != 0) {
364: sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
365: * 4 + (sign >= 0 ? 1 : -1);
366: }
367: return sign;
368: }
369:
370: void iconvlist (int (*do_one) (unsigned int namescount,
371: const char * const * names,
372: void* data),
373: void* data)
374: {
375: #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
376: #ifndef aliases2_lookup
377: #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
378: #else
379: #define aliascount2 0
380: #endif
381: #define aliascount (aliascount1+aliascount2)
382: struct nalias aliasbuf[aliascount];
383: const char * namesbuf[aliascount];
384: size_t num_aliases;
385: {
386: /* Put all existing aliases into a buffer. */
387: size_t i;
388: size_t j;
389: j = 0;
390: for (i = 0; i < aliascount1; i++) {
391: const struct alias * p = &aliases[i];
392: if (p->name >= 0
393: && p->encoding_index != ei_local_char
394: && p->encoding_index != ei_local_wchar_t) {
395: aliasbuf[j].name = stringpool + p->name;
396: aliasbuf[j].encoding_index = p->encoding_index;
397: j++;
398: }
399: }
400: #ifndef aliases2_lookup
401: for (i = 0; i < aliascount2; i++) {
402: aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
403: aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
404: j++;
405: }
406: #endif
407: num_aliases = j;
408: }
409: /* Sort by encoding_index. */
410: if (num_aliases > 1)
411: qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
412: {
413: /* Process all aliases with the same encoding_index together. */
414: size_t j;
415: j = 0;
416: while (j < num_aliases) {
417: unsigned int ei = aliasbuf[j].encoding_index;
418: size_t i = 0;
419: do
420: namesbuf[i++] = aliasbuf[j++].name;
421: while (j < num_aliases && aliasbuf[j].encoding_index == ei);
422: if (i > 1)
423: qsort(namesbuf, i, sizeof(const char *), compare_by_name);
424: /* Call the callback. */
425: if (do_one(i,namesbuf,data))
426: break;
427: }
428: }
429: #undef aliascount
430: #undef aliascount2
431: #undef aliascount1
432: }
433:
434: /*
435: * Table of canonical names of encodings.
436: * Instead of strings, it contains offsets into stringpool and stringpool2.
437: */
438: static const unsigned short all_canonical[] = {
439: #if defined _AIX
440: # include "canonical_sysaix.h"
441: #elif defined hpux || defined __hpux
442: # include "canonical_syshpux.h"
443: #elif defined __osf__
444: # include "canonical_sysosf1.h"
445: #elif defined __sun
446: # include "canonical_syssolaris.h"
447: #else
448: # include "canonical.h"
449: #endif
450: #ifdef USE_AIX
451: # if defined _AIX
452: # include "canonical_aix_sysaix.h"
453: # else
454: # include "canonical_aix.h"
455: # endif
456: #endif
457: #ifdef USE_OSF1
458: # if defined __osf__
459: # include "canonical_osf1_sysosf1.h"
460: # else
461: # include "canonical_osf1.h"
462: # endif
463: #endif
464: #ifdef USE_DOS
465: # include "canonical_dos.h"
466: #endif
467: #ifdef USE_EXTRA
468: # include "canonical_extra.h"
469: #endif
470: #if defined _AIX
471: # include "canonical_local_sysaix.h"
472: #elif defined hpux || defined __hpux
473: # include "canonical_local_syshpux.h"
474: #elif defined __osf__
475: # include "canonical_local_sysosf1.h"
476: #elif defined __sun
477: # include "canonical_local_syssolaris.h"
478: #else
479: # include "canonical_local.h"
480: #endif
481: };
482:
483: const char * iconv_canonicalize (const char * name)
484: {
485: const char* code;
486: char buf[MAX_WORD_LENGTH+10+1];
487: const char* cp;
488: char* bp;
489: const struct alias * ap;
490: unsigned int count;
491: unsigned int index;
492: const char* pool;
493:
494: /* Before calling aliases_lookup, convert the input string to upper case,
495: * and check whether it's entirely ASCII (we call gperf with option "-7"
496: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
497: * or if it's too long, it is not a valid encoding name.
498: */
499: for (code = name;;) {
500: /* Search code in the table. */
501: for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
502: unsigned char c = * (unsigned char *) cp;
503: if (c >= 0x80)
504: goto invalid;
505: if (c >= 'a' && c <= 'z')
506: c -= 'a'-'A';
507: *bp = c;
508: if (c == '\0')
509: break;
510: if (--count == 0)
511: goto invalid;
512: }
513: for (;;) {
514: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
515: bp -= 10;
516: *bp = '\0';
517: continue;
518: }
519: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
520: bp -= 8;
521: *bp = '\0';
522: continue;
523: }
524: break;
525: }
526: if (buf[0] == '\0') {
527: code = locale_charset();
528: /* Avoid an endless loop that could occur when using an older version
529: of localcharset.c. */
530: if (code[0] == '\0')
531: goto invalid;
532: continue;
533: }
534: pool = stringpool;
535: ap = aliases_lookup(buf,bp-buf);
536: if (ap == NULL) {
537: pool = stringpool2;
538: ap = aliases2_lookup(buf);
539: if (ap == NULL)
540: goto invalid;
541: }
542: if (ap->encoding_index == ei_local_char) {
543: code = locale_charset();
544: /* Avoid an endless loop that could occur when using an older version
545: of localcharset.c. */
546: if (code[0] == '\0')
547: goto invalid;
548: continue;
549: }
550: if (ap->encoding_index == ei_local_wchar_t) {
551: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
552: This is also the case on native Woe32 systems. */
553: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
554: if (sizeof(wchar_t) == 4) {
555: index = ei_ucs4internal;
556: break;
557: }
558: if (sizeof(wchar_t) == 2) {
559: index = ei_ucs2internal;
560: break;
561: }
562: if (sizeof(wchar_t) == 1) {
563: index = ei_iso8859_1;
564: break;
565: }
566: #endif
567: }
568: index = ap->encoding_index;
569: break;
570: }
571: return all_canonical[index] + pool;
572: invalid:
573: return name;
574: }
575:
576: int _libiconv_version = _LIBICONV_VERSION;
577:
578: #if defined __FreeBSD__ && !defined __gnu_freebsd__
579: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
580: It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
581: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
582: #define _strong_alias(name, aliasname) \
583: extern __typeof (name) aliasname __attribute__ ((alias (#name)));
584: #undef iconv_open
585: #undef iconv
586: #undef iconv_close
587: strong_alias (libiconv_open, iconv_open)
588: strong_alias (libiconv, iconv)
589: strong_alias (libiconv_close, iconv_close)
590: #endif
591:
592: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>