embedaddon/libiconv/lib/iconv_open1.h - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv_open1.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 09:29:43 2012 UTC (12 years, 1 month ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, HEAD

libiconv v1.14

1: /* 2: * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc. 3: * This file is part of the GNU LIBICONV Library. 4: * 5: * The GNU LIBICONV Library is free software; you can redistribute it 6: * and/or modify it under the terms of the GNU Library General Public 7: * License as published by the Free Software Foundation; either version 2 8: * of the License, or (at your option) any later version. 9: * 10: * The GNU LIBICONV Library is distributed in the hope that it will be 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13: * Library General Public License for more details. 14: * 15: * You should have received a copy of the GNU Library General Public 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18: * Fifth Floor, Boston, MA 02110-1301, USA. 19: */ 20: 21: /* Part 1 of iconv_open. 22: Input: const char* tocode, const char* fromcode. 23: Output: 24: unsigned int from_index; 25: int from_wchar; 26: unsigned int to_index; 27: int to_wchar; 28: int transliterate; 29: int discard_ilseq; 30: Jumps to 'invalid' in case of errror. 31: */ 32: { 33: char buf[MAX_WORD_LENGTH+10+1]; 34: const char* cp; 35: char* bp; 36: const struct alias * ap; 37: unsigned int count; 38: 39: transliterate = 0; 40: discard_ilseq = 0; 41: 42: /* Before calling aliases_lookup, convert the input string to upper case, 43: * and check whether it's entirely ASCII (we call gperf with option "-7" 44: * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 45: * or if it's too long, it is not a valid encoding name. 46: */ 47: for (to_wchar = 0;;) { 48: /* Search tocode in the table. */ 49: for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 50: unsigned char c = * (unsigned char *) cp; 51: if (c >= 0x80) 52: goto invalid; 53: if (c >= 'a' && c <= 'z') 54: c -= 'a'-'A'; 55: *bp = c; 56: if (c == '\0') 57: break; 58: if (--count == 0) 59: goto invalid; 60: } 61: for (;;) { 62: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 63: bp -= 10; 64: *bp = '\0'; 65: transliterate = 1; 66: continue; 67: } 68: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 69: bp -= 8; 70: *bp = '\0'; 71: discard_ilseq = 1; 72: continue; 73: } 74: break; 75: } 76: if (buf[0] == '\0') { 77: tocode = locale_charset(); 78: /* Avoid an endless loop that could occur when using an older version 79: of localcharset.c. */ 80: if (tocode[0] == '\0') 81: goto invalid; 82: continue; 83: } 84: ap = aliases_lookup(buf,bp-buf); 85: if (ap == NULL) { 86: ap = aliases2_lookup(buf); 87: if (ap == NULL) 88: goto invalid; 89: } 90: if (ap->encoding_index == ei_local_char) { 91: tocode = locale_charset(); 92: /* Avoid an endless loop that could occur when using an older version 93: of localcharset.c. */ 94: if (tocode[0] == '\0') 95: goto invalid; 96: continue; 97: } 98: if (ap->encoding_index == ei_local_wchar_t) { 99: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 100: This is also the case on native Woe32 systems and Cygwin >= 1.7, where 101: we know that it is UTF-16. */ 102: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 103: if (sizeof(wchar_t) == 4) { 104: to_index = ei_ucs4internal; 105: break; 106: } 107: if (sizeof(wchar_t) == 2) { 108: # if WORDS_LITTLEENDIAN 109: to_index = ei_utf16le; 110: # else 111: to_index = ei_utf16be; 112: # endif 113: break; 114: } 115: #elif __STDC_ISO_10646__ 116: if (sizeof(wchar_t) == 4) { 117: to_index = ei_ucs4internal; 118: break; 119: } 120: if (sizeof(wchar_t) == 2) { 121: to_index = ei_ucs2internal; 122: break; 123: } 124: if (sizeof(wchar_t) == 1) { 125: to_index = ei_iso8859_1; 126: break; 127: } 128: #endif 129: #if HAVE_MBRTOWC 130: to_wchar = 1; 131: tocode = locale_charset(); 132: continue; 133: #endif 134: goto invalid; 135: } 136: to_index = ap->encoding_index; 137: break; 138: } 139: for (from_wchar = 0;;) { 140: /* Search fromcode in the table. */ 141: for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 142: unsigned char c = * (unsigned char *) cp; 143: if (c >= 0x80) 144: goto invalid; 145: if (c >= 'a' && c <= 'z') 146: c -= 'a'-'A'; 147: *bp = c; 148: if (c == '\0') 149: break; 150: if (--count == 0) 151: goto invalid; 152: } 153: for (;;) { 154: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 155: bp -= 10; 156: *bp = '\0'; 157: continue; 158: } 159: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 160: bp -= 8; 161: *bp = '\0'; 162: continue; 163: } 164: break; 165: } 166: if (buf[0] == '\0') { 167: fromcode = locale_charset(); 168: /* Avoid an endless loop that could occur when using an older version 169: of localcharset.c. */ 170: if (fromcode[0] == '\0') 171: goto invalid; 172: continue; 173: } 174: ap = aliases_lookup(buf,bp-buf); 175: if (ap == NULL) { 176: ap = aliases2_lookup(buf); 177: if (ap == NULL) 178: goto invalid; 179: } 180: if (ap->encoding_index == ei_local_char) { 181: fromcode = locale_charset(); 182: /* Avoid an endless loop that could occur when using an older version 183: of localcharset.c. */ 184: if (fromcode[0] == '\0') 185: goto invalid; 186: continue; 187: } 188: if (ap->encoding_index == ei_local_wchar_t) { 189: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 190: This is also the case on native Woe32 systems and Cygwin >= 1.7, where 191: we know that it is UTF-16. */ 192: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 193: if (sizeof(wchar_t) == 4) { 194: from_index = ei_ucs4internal; 195: break; 196: } 197: if (sizeof(wchar_t) == 2) { 198: # if WORDS_LITTLEENDIAN 199: from_index = ei_utf16le; 200: # else 201: from_index = ei_utf16be; 202: # endif 203: break; 204: } 205: #elif __STDC_ISO_10646__ 206: if (sizeof(wchar_t) == 4) { 207: from_index = ei_ucs4internal; 208: break; 209: } 210: if (sizeof(wchar_t) == 2) { 211: from_index = ei_ucs2internal; 212: break; 213: } 214: if (sizeof(wchar_t) == 1) { 215: from_index = ei_iso8859_1; 216: break; 217: } 218: #endif 219: #if HAVE_WCRTOMB 220: from_wchar = 1; 221: fromcode = locale_charset(); 222: continue; 223: #endif 224: goto invalid; 225: } 226: from_index = ap->encoding_index; 227: break; 228: } 229: }