Annotation of embedaddon/libiconv/lib/iconv_open1.h, revision 1.1.1.3
1.1 misho 1: /*
1.1.1.3 ! misho 2: * Copyright (C) 1999-2008, 2011, 2018 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho 17: * If not, see <https://www.gnu.org/licenses/>.
1.1 misho 18: */
19:
20: /* Part 1 of iconv_open.
21: Input: const char* tocode, const char* fromcode.
22: Output:
23: unsigned int from_index;
24: int from_wchar;
25: unsigned int to_index;
26: int to_wchar;
27: int transliterate;
28: int discard_ilseq;
29: Jumps to 'invalid' in case of errror.
30: */
31: {
32: char buf[MAX_WORD_LENGTH+10+1];
33: const char* cp;
34: char* bp;
35: const struct alias * ap;
36: unsigned int count;
37:
38: transliterate = 0;
39: discard_ilseq = 0;
40:
41: /* Before calling aliases_lookup, convert the input string to upper case,
42: * and check whether it's entirely ASCII (we call gperf with option "-7"
43: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
44: * or if it's too long, it is not a valid encoding name.
45: */
46: for (to_wchar = 0;;) {
47: /* Search tocode in the table. */
48: for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
49: unsigned char c = * (unsigned char *) cp;
50: if (c >= 0x80)
51: goto invalid;
52: if (c >= 'a' && c <= 'z')
53: c -= 'a'-'A';
54: *bp = c;
55: if (c == '\0')
56: break;
57: if (--count == 0)
58: goto invalid;
59: }
60: for (;;) {
61: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
62: bp -= 10;
63: *bp = '\0';
64: transliterate = 1;
65: continue;
66: }
67: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
68: bp -= 8;
69: *bp = '\0';
70: discard_ilseq = 1;
71: continue;
72: }
73: break;
74: }
75: if (buf[0] == '\0') {
76: tocode = locale_charset();
77: /* Avoid an endless loop that could occur when using an older version
78: of localcharset.c. */
79: if (tocode[0] == '\0')
80: goto invalid;
81: continue;
82: }
83: ap = aliases_lookup(buf,bp-buf);
84: if (ap == NULL) {
85: ap = aliases2_lookup(buf);
86: if (ap == NULL)
87: goto invalid;
88: }
89: if (ap->encoding_index == ei_local_char) {
90: tocode = locale_charset();
91: /* Avoid an endless loop that could occur when using an older version
92: of localcharset.c. */
93: if (tocode[0] == '\0')
94: goto invalid;
95: continue;
96: }
97: if (ap->encoding_index == ei_local_wchar_t) {
98: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 misho 99: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
100: we know that it is UTF-16. */
1.1.1.3 ! misho 101: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
1.1.1.2 misho 102: if (sizeof(wchar_t) == 4) {
103: to_index = ei_ucs4internal;
104: break;
105: }
106: if (sizeof(wchar_t) == 2) {
107: # if WORDS_LITTLEENDIAN
108: to_index = ei_utf16le;
109: # else
110: to_index = ei_utf16be;
111: # endif
112: break;
113: }
114: #elif __STDC_ISO_10646__
1.1 misho 115: if (sizeof(wchar_t) == 4) {
116: to_index = ei_ucs4internal;
117: break;
118: }
119: if (sizeof(wchar_t) == 2) {
120: to_index = ei_ucs2internal;
121: break;
122: }
123: if (sizeof(wchar_t) == 1) {
124: to_index = ei_iso8859_1;
125: break;
126: }
127: #endif
128: #if HAVE_MBRTOWC
129: to_wchar = 1;
130: tocode = locale_charset();
131: continue;
132: #endif
133: goto invalid;
134: }
135: to_index = ap->encoding_index;
136: break;
137: }
138: for (from_wchar = 0;;) {
139: /* Search fromcode in the table. */
140: for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
141: unsigned char c = * (unsigned char *) cp;
142: if (c >= 0x80)
143: goto invalid;
144: if (c >= 'a' && c <= 'z')
145: c -= 'a'-'A';
146: *bp = c;
147: if (c == '\0')
148: break;
149: if (--count == 0)
150: goto invalid;
151: }
152: for (;;) {
153: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
154: bp -= 10;
155: *bp = '\0';
156: continue;
157: }
158: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
159: bp -= 8;
160: *bp = '\0';
161: continue;
162: }
163: break;
164: }
165: if (buf[0] == '\0') {
166: fromcode = locale_charset();
167: /* Avoid an endless loop that could occur when using an older version
168: of localcharset.c. */
169: if (fromcode[0] == '\0')
170: goto invalid;
171: continue;
172: }
173: ap = aliases_lookup(buf,bp-buf);
174: if (ap == NULL) {
175: ap = aliases2_lookup(buf);
176: if (ap == NULL)
177: goto invalid;
178: }
179: if (ap->encoding_index == ei_local_char) {
180: fromcode = locale_charset();
181: /* Avoid an endless loop that could occur when using an older version
182: of localcharset.c. */
183: if (fromcode[0] == '\0')
184: goto invalid;
185: continue;
186: }
187: if (ap->encoding_index == ei_local_wchar_t) {
188: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 misho 189: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
190: we know that it is UTF-16. */
1.1.1.3 ! misho 191: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
1.1.1.2 misho 192: if (sizeof(wchar_t) == 4) {
193: from_index = ei_ucs4internal;
194: break;
195: }
196: if (sizeof(wchar_t) == 2) {
197: # if WORDS_LITTLEENDIAN
198: from_index = ei_utf16le;
199: # else
200: from_index = ei_utf16be;
201: # endif
202: break;
203: }
204: #elif __STDC_ISO_10646__
1.1 misho 205: if (sizeof(wchar_t) == 4) {
206: from_index = ei_ucs4internal;
207: break;
208: }
209: if (sizeof(wchar_t) == 2) {
210: from_index = ei_ucs2internal;
211: break;
212: }
213: if (sizeof(wchar_t) == 1) {
214: from_index = ei_iso8859_1;
215: break;
216: }
217: #endif
218: #if HAVE_WCRTOMB
219: from_wchar = 1;
220: fromcode = locale_charset();
221: continue;
222: #endif
223: goto invalid;
224: }
225: from_index = ap->encoding_index;
226: break;
227: }
228: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>