Annotation of embedaddon/libiconv/lib/iconv.c, revision 1.1.1.3
1.1 misho 1: /*
1.1.1.3 ! misho 2: * Copyright (C) 1999-2008, 2011, 2016, 2018 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: #include <iconv.h>
21:
1.1.1.3 ! misho 22: #include <limits.h>
1.1 misho 23: #include <stdlib.h>
24: #include <string.h>
25: #include "config.h"
26: #include "localcharset.h"
27:
1.1.1.2 misho 28: #ifdef __CYGWIN__
29: #include <cygwin/version.h>
30: #endif
31:
1.1 misho 32: #if ENABLE_EXTRA
33: /*
34: * Consider all system dependent encodings, for any system,
35: * and the extra encodings.
36: */
37: #define USE_AIX
38: #define USE_OSF1
39: #define USE_DOS
40: #define USE_EXTRA
41: #else
42: /*
43: * Consider those system dependent encodings that are needed for the
44: * current system.
45: */
46: #ifdef _AIX
47: #define USE_AIX
48: #endif
49: #if defined(__osf__) || defined(VMS)
50: #define USE_OSF1
51: #endif
52: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
53: #define USE_DOS
54: #endif
55: #endif
56:
57: /*
58: * Data type for general conversion loop.
59: */
60: struct loop_funcs {
61: size_t (*loop_convert) (iconv_t icd,
62: const char* * inbuf, size_t *inbytesleft,
63: char* * outbuf, size_t *outbytesleft);
64: size_t (*loop_reset) (iconv_t icd,
65: char* * outbuf, size_t *outbytesleft);
66: };
67:
68: /*
69: * Converters.
70: */
71: #include "converters.h"
72:
73: /*
74: * Transliteration tables.
75: */
76: #include "cjk_variants.h"
77: #include "translit.h"
78:
79: /*
80: * Table of all supported encodings.
81: */
82: struct encoding {
83: struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
84: struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
85: int oflags; /* flags for unicode -> multibyte conversion */
86: };
87: #define DEFALIAS(xxx_alias,xxx) /* nothing */
88: enum {
89: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
90: ei_##xxx ,
91: #include "encodings.def"
92: #ifdef USE_AIX
93: # include "encodings_aix.def"
94: #endif
95: #ifdef USE_OSF1
96: # include "encodings_osf1.def"
97: #endif
98: #ifdef USE_DOS
99: # include "encodings_dos.def"
100: #endif
101: #ifdef USE_EXTRA
102: # include "encodings_extra.def"
103: #endif
104: #include "encodings_local.def"
105: #undef DEFENCODING
106: ei_for_broken_compilers_that_dont_like_trailing_commas
107: };
108: #include "flags.h"
109: static struct encoding const all_encodings[] = {
110: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
111: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
112: #include "encodings.def"
113: #ifdef USE_AIX
114: # include "encodings_aix.def"
115: #endif
116: #ifdef USE_OSF1
117: # include "encodings_osf1.def"
118: #endif
119: #ifdef USE_DOS
120: # include "encodings_dos.def"
121: #endif
122: #ifdef USE_EXTRA
123: # include "encodings_extra.def"
124: #endif
125: #undef DEFENCODING
126: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
127: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
128: #include "encodings_local.def"
129: #undef DEFENCODING
130: };
131: #undef DEFALIAS
132:
133: /*
134: * Conversion loops.
135: */
136: #include "loops.h"
137:
138: /*
139: * Alias lookup function.
140: * Defines
141: * struct alias { int name; unsigned int encoding_index; };
142: * const struct alias * aliases_lookup (const char *str, unsigned int len);
143: * #define MAX_WORD_LENGTH ...
144: */
145: #if defined _AIX
146: # include "aliases_sysaix.h"
147: #elif defined hpux || defined __hpux
148: # include "aliases_syshpux.h"
149: #elif defined __osf__
150: # include "aliases_sysosf1.h"
151: #elif defined __sun
152: # include "aliases_syssolaris.h"
153: #else
154: # include "aliases.h"
155: #endif
156:
157: /*
158: * System dependent alias lookup function.
159: * Defines
160: * const struct alias * aliases2_lookup (const char *str);
161: */
162: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
163: struct stringpool2_t {
164: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
165: #include "aliases2.h"
166: #undef S
167: };
168: static const struct stringpool2_t stringpool2_contents = {
169: #define S(tag,name,encoding_index) name,
170: #include "aliases2.h"
171: #undef S
172: };
173: #define stringpool2 ((const char *) &stringpool2_contents)
174: static const struct alias sysdep_aliases[] = {
175: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
176: #include "aliases2.h"
177: #undef S
178: };
179: #ifdef __GNUC__
180: __inline
1.1.1.3 ! misho 181: #else
! 182: #ifdef __cplusplus
! 183: inline
! 184: #endif
1.1 misho 185: #endif
1.1.1.3 ! misho 186: static const struct alias *
1.1 misho 187: aliases2_lookup (register const char *str)
188: {
189: const struct alias * ptr;
190: unsigned int count;
191: for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
192: if (!strcmp(str, stringpool2 + ptr->name))
193: return ptr;
194: return NULL;
195: }
196: #else
197: #define aliases2_lookup(str) NULL
198: #define stringpool2 NULL
199: #endif
200:
201: #if 0
202: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
203: and the first string can be assumed to be in uppercase. */
204: static int strequal (const char* str1, const char* str2)
205: {
206: unsigned char c1;
207: unsigned char c2;
208: for (;;) {
209: c1 = * (unsigned char *) str1++;
210: c2 = * (unsigned char *) str2++;
211: if (c1 == 0)
212: break;
213: if (c2 >= 'a' && c2 <= 'z')
214: c2 -= 'a'-'A';
215: if (c1 != c2)
216: break;
217: }
218: return (c1 == c2);
219: }
220: #endif
221:
222: iconv_t iconv_open (const char* tocode, const char* fromcode)
223: {
224: struct conv_struct * cd;
225: unsigned int from_index;
226: int from_wchar;
227: unsigned int to_index;
228: int to_wchar;
229: int transliterate;
230: int discard_ilseq;
231:
232: #include "iconv_open1.h"
233:
234: cd = (struct conv_struct *) malloc(from_wchar != to_wchar
235: ? sizeof(struct wchar_conv_struct)
236: : sizeof(struct conv_struct));
237: if (cd == NULL) {
238: errno = ENOMEM;
239: return (iconv_t)(-1);
240: }
241:
242: #include "iconv_open2.h"
243:
244: return (iconv_t)cd;
245: invalid:
246: errno = EINVAL;
247: return (iconv_t)(-1);
248: }
249:
250: size_t iconv (iconv_t icd,
251: ICONV_CONST char* * inbuf, size_t *inbytesleft,
252: char* * outbuf, size_t *outbytesleft)
253: {
254: conv_t cd = (conv_t) icd;
255: if (inbuf == NULL || *inbuf == NULL)
256: return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
257: else
258: return cd->lfuncs.loop_convert(icd,
259: (const char* *)inbuf,inbytesleft,
260: outbuf,outbytesleft);
261: }
262:
263: int iconv_close (iconv_t icd)
264: {
265: conv_t cd = (conv_t) icd;
266: free(cd);
267: return 0;
268: }
269:
270: #ifndef LIBICONV_PLUG
271:
272: /*
273: * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
274: * fit in an iconv_allocation_t.
275: * If this verification fails, iconv_allocation_t must be made larger and
276: * the major version in LIBICONV_VERSION_INFO must be bumped.
277: * Currently 'struct conv_struct' has 21 integer/pointer fields, and
278: * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
279: */
280: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
281: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
282:
283: int iconv_open_into (const char* tocode, const char* fromcode,
284: iconv_allocation_t* resultp)
285: {
286: struct conv_struct * cd;
287: unsigned int from_index;
288: int from_wchar;
289: unsigned int to_index;
290: int to_wchar;
291: int transliterate;
292: int discard_ilseq;
293:
294: #include "iconv_open1.h"
295:
296: cd = (struct conv_struct *) resultp;
297:
298: #include "iconv_open2.h"
299:
300: return 0;
301: invalid:
302: errno = EINVAL;
303: return -1;
304: }
305:
306: int iconvctl (iconv_t icd, int request, void* argument)
307: {
308: conv_t cd = (conv_t) icd;
309: switch (request) {
310: case ICONV_TRIVIALP:
311: *(int *)argument =
312: ((cd->lfuncs.loop_convert == unicode_loop_convert
313: && cd->iindex == cd->oindex)
314: || cd->lfuncs.loop_convert == wchar_id_loop_convert
315: ? 1 : 0);
316: return 0;
317: case ICONV_GET_TRANSLITERATE:
318: *(int *)argument = cd->transliterate;
319: return 0;
320: case ICONV_SET_TRANSLITERATE:
321: cd->transliterate = (*(const int *)argument ? 1 : 0);
322: return 0;
323: case ICONV_GET_DISCARD_ILSEQ:
324: *(int *)argument = cd->discard_ilseq;
325: return 0;
326: case ICONV_SET_DISCARD_ILSEQ:
327: cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
328: return 0;
329: case ICONV_SET_HOOKS:
330: if (argument != NULL) {
331: cd->hooks = *(const struct iconv_hooks *)argument;
332: } else {
333: cd->hooks.uc_hook = NULL;
334: cd->hooks.wc_hook = NULL;
335: cd->hooks.data = NULL;
336: }
337: return 0;
338: case ICONV_SET_FALLBACKS:
339: if (argument != NULL) {
340: cd->fallbacks = *(const struct iconv_fallbacks *)argument;
341: } else {
342: cd->fallbacks.mb_to_uc_fallback = NULL;
343: cd->fallbacks.uc_to_mb_fallback = NULL;
344: cd->fallbacks.mb_to_wc_fallback = NULL;
345: cd->fallbacks.wc_to_mb_fallback = NULL;
346: cd->fallbacks.data = NULL;
347: }
348: return 0;
349: default:
350: errno = EINVAL;
351: return -1;
352: }
353: }
354:
355: /* An alias after its name has been converted from 'int' to 'const char*'. */
356: struct nalias { const char* name; unsigned int encoding_index; };
357:
358: static int compare_by_index (const void * arg1, const void * arg2)
359: {
360: const struct nalias * alias1 = (const struct nalias *) arg1;
361: const struct nalias * alias2 = (const struct nalias *) arg2;
362: return (int)alias1->encoding_index - (int)alias2->encoding_index;
363: }
364:
365: static int compare_by_name (const void * arg1, const void * arg2)
366: {
367: const char * name1 = *(const char **)arg1;
368: const char * name2 = *(const char **)arg2;
369: /* Compare alphabetically, but put "CS" names at the end. */
370: int sign = strcmp(name1,name2);
371: if (sign != 0) {
372: sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
373: * 4 + (sign >= 0 ? 1 : -1);
374: }
375: return sign;
376: }
377:
378: void iconvlist (int (*do_one) (unsigned int namescount,
379: const char * const * names,
380: void* data),
381: void* data)
382: {
383: #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
384: #ifndef aliases2_lookup
385: #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
386: #else
387: #define aliascount2 0
388: #endif
389: #define aliascount (aliascount1+aliascount2)
390: struct nalias aliasbuf[aliascount];
391: const char * namesbuf[aliascount];
392: size_t num_aliases;
393: {
394: /* Put all existing aliases into a buffer. */
395: size_t i;
396: size_t j;
397: j = 0;
398: for (i = 0; i < aliascount1; i++) {
399: const struct alias * p = &aliases[i];
400: if (p->name >= 0
401: && p->encoding_index != ei_local_char
402: && p->encoding_index != ei_local_wchar_t) {
403: aliasbuf[j].name = stringpool + p->name;
404: aliasbuf[j].encoding_index = p->encoding_index;
405: j++;
406: }
407: }
408: #ifndef aliases2_lookup
409: for (i = 0; i < aliascount2; i++) {
410: aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
411: aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
412: j++;
413: }
414: #endif
415: num_aliases = j;
416: }
417: /* Sort by encoding_index. */
418: if (num_aliases > 1)
419: qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
420: {
421: /* Process all aliases with the same encoding_index together. */
422: size_t j;
423: j = 0;
424: while (j < num_aliases) {
425: unsigned int ei = aliasbuf[j].encoding_index;
426: size_t i = 0;
427: do
428: namesbuf[i++] = aliasbuf[j++].name;
429: while (j < num_aliases && aliasbuf[j].encoding_index == ei);
430: if (i > 1)
431: qsort(namesbuf, i, sizeof(const char *), compare_by_name);
432: /* Call the callback. */
433: if (do_one(i,namesbuf,data))
434: break;
435: }
436: }
437: #undef aliascount
438: #undef aliascount2
439: #undef aliascount1
440: }
441:
442: /*
443: * Table of canonical names of encodings.
444: * Instead of strings, it contains offsets into stringpool and stringpool2.
445: */
446: static const unsigned short all_canonical[] = {
447: #if defined _AIX
448: # include "canonical_sysaix.h"
449: #elif defined hpux || defined __hpux
450: # include "canonical_syshpux.h"
451: #elif defined __osf__
452: # include "canonical_sysosf1.h"
453: #elif defined __sun
454: # include "canonical_syssolaris.h"
455: #else
456: # include "canonical.h"
457: #endif
458: #ifdef USE_AIX
459: # if defined _AIX
460: # include "canonical_aix_sysaix.h"
461: # else
462: # include "canonical_aix.h"
463: # endif
464: #endif
465: #ifdef USE_OSF1
466: # if defined __osf__
467: # include "canonical_osf1_sysosf1.h"
468: # else
469: # include "canonical_osf1.h"
470: # endif
471: #endif
472: #ifdef USE_DOS
473: # include "canonical_dos.h"
474: #endif
475: #ifdef USE_EXTRA
476: # include "canonical_extra.h"
477: #endif
478: #if defined _AIX
479: # include "canonical_local_sysaix.h"
480: #elif defined hpux || defined __hpux
481: # include "canonical_local_syshpux.h"
482: #elif defined __osf__
483: # include "canonical_local_sysosf1.h"
484: #elif defined __sun
485: # include "canonical_local_syssolaris.h"
486: #else
487: # include "canonical_local.h"
488: #endif
489: };
490:
491: const char * iconv_canonicalize (const char * name)
492: {
493: const char* code;
494: char buf[MAX_WORD_LENGTH+10+1];
495: const char* cp;
496: char* bp;
497: const struct alias * ap;
498: unsigned int count;
499: unsigned int index;
500: const char* pool;
501:
502: /* Before calling aliases_lookup, convert the input string to upper case,
503: * and check whether it's entirely ASCII (we call gperf with option "-7"
504: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
505: * or if it's too long, it is not a valid encoding name.
506: */
507: for (code = name;;) {
508: /* Search code in the table. */
509: for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
510: unsigned char c = * (unsigned char *) cp;
511: if (c >= 0x80)
512: goto invalid;
513: if (c >= 'a' && c <= 'z')
514: c -= 'a'-'A';
515: *bp = c;
516: if (c == '\0')
517: break;
518: if (--count == 0)
519: goto invalid;
520: }
521: for (;;) {
522: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
523: bp -= 10;
524: *bp = '\0';
525: continue;
526: }
527: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
528: bp -= 8;
529: *bp = '\0';
530: continue;
531: }
532: break;
533: }
534: if (buf[0] == '\0') {
535: code = locale_charset();
536: /* Avoid an endless loop that could occur when using an older version
537: of localcharset.c. */
538: if (code[0] == '\0')
539: goto invalid;
540: continue;
541: }
542: pool = stringpool;
543: ap = aliases_lookup(buf,bp-buf);
544: if (ap == NULL) {
545: pool = stringpool2;
546: ap = aliases2_lookup(buf);
547: if (ap == NULL)
548: goto invalid;
549: }
550: if (ap->encoding_index == ei_local_char) {
551: code = locale_charset();
552: /* Avoid an endless loop that could occur when using an older version
553: of localcharset.c. */
554: if (code[0] == '\0')
555: goto invalid;
556: continue;
557: }
558: if (ap->encoding_index == ei_local_wchar_t) {
559: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 misho 560: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
561: we know that it is UTF-16. */
1.1.1.3 ! misho 562: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
1.1.1.2 misho 563: if (sizeof(wchar_t) == 4) {
564: index = ei_ucs4internal;
565: break;
566: }
567: if (sizeof(wchar_t) == 2) {
568: # if WORDS_LITTLEENDIAN
569: index = ei_utf16le;
570: # else
571: index = ei_utf16be;
572: # endif
573: break;
574: }
575: #elif __STDC_ISO_10646__
1.1 misho 576: if (sizeof(wchar_t) == 4) {
577: index = ei_ucs4internal;
578: break;
579: }
580: if (sizeof(wchar_t) == 2) {
581: index = ei_ucs2internal;
582: break;
583: }
584: if (sizeof(wchar_t) == 1) {
585: index = ei_iso8859_1;
586: break;
587: }
588: #endif
589: }
590: index = ap->encoding_index;
591: break;
592: }
593: return all_canonical[index] + pool;
594: invalid:
595: return name;
596: }
597:
598: int _libiconv_version = _LIBICONV_VERSION;
599:
600: #if defined __FreeBSD__ && !defined __gnu_freebsd__
601: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
602: It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
603: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
604: #define _strong_alias(name, aliasname) \
605: extern __typeof (name) aliasname __attribute__ ((alias (#name)));
606: #undef iconv_open
607: #undef iconv
608: #undef iconv_close
609: strong_alias (libiconv_open, iconv_open)
610: strong_alias (libiconv, iconv)
611: strong_alias (libiconv_close, iconv_close)
612: #endif
613:
614: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>