Annotation of embedaddon/libiconv/lib/iconv_open1.h, revision 1.1.1.2
1.1 misho 1: /*
1.1.1.2 ! misho 2: * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
1.1 misho 3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /* Part 1 of iconv_open.
22: Input: const char* tocode, const char* fromcode.
23: Output:
24: unsigned int from_index;
25: int from_wchar;
26: unsigned int to_index;
27: int to_wchar;
28: int transliterate;
29: int discard_ilseq;
30: Jumps to 'invalid' in case of errror.
31: */
32: {
33: char buf[MAX_WORD_LENGTH+10+1];
34: const char* cp;
35: char* bp;
36: const struct alias * ap;
37: unsigned int count;
38:
39: transliterate = 0;
40: discard_ilseq = 0;
41:
42: /* Before calling aliases_lookup, convert the input string to upper case,
43: * and check whether it's entirely ASCII (we call gperf with option "-7"
44: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
45: * or if it's too long, it is not a valid encoding name.
46: */
47: for (to_wchar = 0;;) {
48: /* Search tocode in the table. */
49: for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
50: unsigned char c = * (unsigned char *) cp;
51: if (c >= 0x80)
52: goto invalid;
53: if (c >= 'a' && c <= 'z')
54: c -= 'a'-'A';
55: *bp = c;
56: if (c == '\0')
57: break;
58: if (--count == 0)
59: goto invalid;
60: }
61: for (;;) {
62: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
63: bp -= 10;
64: *bp = '\0';
65: transliterate = 1;
66: continue;
67: }
68: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
69: bp -= 8;
70: *bp = '\0';
71: discard_ilseq = 1;
72: continue;
73: }
74: break;
75: }
76: if (buf[0] == '\0') {
77: tocode = locale_charset();
78: /* Avoid an endless loop that could occur when using an older version
79: of localcharset.c. */
80: if (tocode[0] == '\0')
81: goto invalid;
82: continue;
83: }
84: ap = aliases_lookup(buf,bp-buf);
85: if (ap == NULL) {
86: ap = aliases2_lookup(buf);
87: if (ap == NULL)
88: goto invalid;
89: }
90: if (ap->encoding_index == ei_local_char) {
91: tocode = locale_charset();
92: /* Avoid an endless loop that could occur when using an older version
93: of localcharset.c. */
94: if (tocode[0] == '\0')
95: goto invalid;
96: continue;
97: }
98: if (ap->encoding_index == ei_local_wchar_t) {
99: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 ! misho 100: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
! 101: we know that it is UTF-16. */
! 102: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
! 103: if (sizeof(wchar_t) == 4) {
! 104: to_index = ei_ucs4internal;
! 105: break;
! 106: }
! 107: if (sizeof(wchar_t) == 2) {
! 108: # if WORDS_LITTLEENDIAN
! 109: to_index = ei_utf16le;
! 110: # else
! 111: to_index = ei_utf16be;
! 112: # endif
! 113: break;
! 114: }
! 115: #elif __STDC_ISO_10646__
1.1 misho 116: if (sizeof(wchar_t) == 4) {
117: to_index = ei_ucs4internal;
118: break;
119: }
120: if (sizeof(wchar_t) == 2) {
121: to_index = ei_ucs2internal;
122: break;
123: }
124: if (sizeof(wchar_t) == 1) {
125: to_index = ei_iso8859_1;
126: break;
127: }
128: #endif
129: #if HAVE_MBRTOWC
130: to_wchar = 1;
131: tocode = locale_charset();
132: continue;
133: #endif
134: goto invalid;
135: }
136: to_index = ap->encoding_index;
137: break;
138: }
139: for (from_wchar = 0;;) {
140: /* Search fromcode in the table. */
141: for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
142: unsigned char c = * (unsigned char *) cp;
143: if (c >= 0x80)
144: goto invalid;
145: if (c >= 'a' && c <= 'z')
146: c -= 'a'-'A';
147: *bp = c;
148: if (c == '\0')
149: break;
150: if (--count == 0)
151: goto invalid;
152: }
153: for (;;) {
154: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
155: bp -= 10;
156: *bp = '\0';
157: continue;
158: }
159: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
160: bp -= 8;
161: *bp = '\0';
162: continue;
163: }
164: break;
165: }
166: if (buf[0] == '\0') {
167: fromcode = locale_charset();
168: /* Avoid an endless loop that could occur when using an older version
169: of localcharset.c. */
170: if (fromcode[0] == '\0')
171: goto invalid;
172: continue;
173: }
174: ap = aliases_lookup(buf,bp-buf);
175: if (ap == NULL) {
176: ap = aliases2_lookup(buf);
177: if (ap == NULL)
178: goto invalid;
179: }
180: if (ap->encoding_index == ei_local_char) {
181: fromcode = locale_charset();
182: /* Avoid an endless loop that could occur when using an older version
183: of localcharset.c. */
184: if (fromcode[0] == '\0')
185: goto invalid;
186: continue;
187: }
188: if (ap->encoding_index == ei_local_wchar_t) {
189: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 ! misho 190: This is also the case on native Woe32 systems and Cygwin >= 1.7, where
! 191: we know that it is UTF-16. */
! 192: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
! 193: if (sizeof(wchar_t) == 4) {
! 194: from_index = ei_ucs4internal;
! 195: break;
! 196: }
! 197: if (sizeof(wchar_t) == 2) {
! 198: # if WORDS_LITTLEENDIAN
! 199: from_index = ei_utf16le;
! 200: # else
! 201: from_index = ei_utf16be;
! 202: # endif
! 203: break;
! 204: }
! 205: #elif __STDC_ISO_10646__
1.1 misho 206: if (sizeof(wchar_t) == 4) {
207: from_index = ei_ucs4internal;
208: break;
209: }
210: if (sizeof(wchar_t) == 2) {
211: from_index = ei_ucs2internal;
212: break;
213: }
214: if (sizeof(wchar_t) == 1) {
215: from_index = ei_iso8859_1;
216: break;
217: }
218: #endif
219: #if HAVE_WCRTOMB
220: from_wchar = 1;
221: fromcode = locale_charset();
222: continue;
223: #endif
224: goto invalid;
225: }
226: from_index = ap->encoding_index;
227: break;
228: }
229: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>