Annotation of embedaddon/libiconv/lib/iconv_open1.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright (C) 1999-2008 Free Software Foundation, Inc.
3: * This file is part of the GNU LIBICONV Library.
4: *
5: * The GNU LIBICONV Library is free software; you can redistribute it
6: * and/or modify it under the terms of the GNU Library General Public
7: * License as published by the Free Software Foundation; either version 2
8: * of the License, or (at your option) any later version.
9: *
10: * The GNU LIBICONV Library is distributed in the hope that it will be
11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Library General Public License for more details.
14: *
15: * You should have received a copy of the GNU Library General Public
16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18: * Fifth Floor, Boston, MA 02110-1301, USA.
19: */
20:
21: /* Part 1 of iconv_open.
22: Input: const char* tocode, const char* fromcode.
23: Output:
24: unsigned int from_index;
25: int from_wchar;
26: unsigned int to_index;
27: int to_wchar;
28: int transliterate;
29: int discard_ilseq;
30: Jumps to 'invalid' in case of errror.
31: */
32: {
33: char buf[MAX_WORD_LENGTH+10+1];
34: const char* cp;
35: char* bp;
36: const struct alias * ap;
37: unsigned int count;
38:
39: transliterate = 0;
40: discard_ilseq = 0;
41:
42: /* Before calling aliases_lookup, convert the input string to upper case,
43: * and check whether it's entirely ASCII (we call gperf with option "-7"
44: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
45: * or if it's too long, it is not a valid encoding name.
46: */
47: for (to_wchar = 0;;) {
48: /* Search tocode in the table. */
49: for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
50: unsigned char c = * (unsigned char *) cp;
51: if (c >= 0x80)
52: goto invalid;
53: if (c >= 'a' && c <= 'z')
54: c -= 'a'-'A';
55: *bp = c;
56: if (c == '\0')
57: break;
58: if (--count == 0)
59: goto invalid;
60: }
61: for (;;) {
62: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
63: bp -= 10;
64: *bp = '\0';
65: transliterate = 1;
66: continue;
67: }
68: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
69: bp -= 8;
70: *bp = '\0';
71: discard_ilseq = 1;
72: continue;
73: }
74: break;
75: }
76: if (buf[0] == '\0') {
77: tocode = locale_charset();
78: /* Avoid an endless loop that could occur when using an older version
79: of localcharset.c. */
80: if (tocode[0] == '\0')
81: goto invalid;
82: continue;
83: }
84: ap = aliases_lookup(buf,bp-buf);
85: if (ap == NULL) {
86: ap = aliases2_lookup(buf);
87: if (ap == NULL)
88: goto invalid;
89: }
90: if (ap->encoding_index == ei_local_char) {
91: tocode = locale_charset();
92: /* Avoid an endless loop that could occur when using an older version
93: of localcharset.c. */
94: if (tocode[0] == '\0')
95: goto invalid;
96: continue;
97: }
98: if (ap->encoding_index == ei_local_wchar_t) {
99: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
100: This is also the case on native Woe32 systems. */
101: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
102: if (sizeof(wchar_t) == 4) {
103: to_index = ei_ucs4internal;
104: break;
105: }
106: if (sizeof(wchar_t) == 2) {
107: to_index = ei_ucs2internal;
108: break;
109: }
110: if (sizeof(wchar_t) == 1) {
111: to_index = ei_iso8859_1;
112: break;
113: }
114: #endif
115: #if HAVE_MBRTOWC
116: to_wchar = 1;
117: tocode = locale_charset();
118: continue;
119: #endif
120: goto invalid;
121: }
122: to_index = ap->encoding_index;
123: break;
124: }
125: for (from_wchar = 0;;) {
126: /* Search fromcode in the table. */
127: for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
128: unsigned char c = * (unsigned char *) cp;
129: if (c >= 0x80)
130: goto invalid;
131: if (c >= 'a' && c <= 'z')
132: c -= 'a'-'A';
133: *bp = c;
134: if (c == '\0')
135: break;
136: if (--count == 0)
137: goto invalid;
138: }
139: for (;;) {
140: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
141: bp -= 10;
142: *bp = '\0';
143: continue;
144: }
145: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
146: bp -= 8;
147: *bp = '\0';
148: continue;
149: }
150: break;
151: }
152: if (buf[0] == '\0') {
153: fromcode = locale_charset();
154: /* Avoid an endless loop that could occur when using an older version
155: of localcharset.c. */
156: if (fromcode[0] == '\0')
157: goto invalid;
158: continue;
159: }
160: ap = aliases_lookup(buf,bp-buf);
161: if (ap == NULL) {
162: ap = aliases2_lookup(buf);
163: if (ap == NULL)
164: goto invalid;
165: }
166: if (ap->encoding_index == ei_local_char) {
167: fromcode = locale_charset();
168: /* Avoid an endless loop that could occur when using an older version
169: of localcharset.c. */
170: if (fromcode[0] == '\0')
171: goto invalid;
172: continue;
173: }
174: if (ap->encoding_index == ei_local_wchar_t) {
175: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
176: This is also the case on native Woe32 systems. */
177: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
178: if (sizeof(wchar_t) == 4) {
179: from_index = ei_ucs4internal;
180: break;
181: }
182: if (sizeof(wchar_t) == 2) {
183: from_index = ei_ucs2internal;
184: break;
185: }
186: if (sizeof(wchar_t) == 1) {
187: from_index = ei_iso8859_1;
188: break;
189: }
190: #endif
191: #if HAVE_WCRTOMB
192: from_wchar = 1;
193: fromcode = locale_charset();
194: continue;
195: #endif
196: goto invalid;
197: }
198: from_index = ap->encoding_index;
199: break;
200: }
201: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>