embedaddon/libiconv/lib/iconv_open1.h - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv_open1.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD

libiconv 1.16

1: /* 2: * Copyright (C) 1999-2008, 2011, 2018 Free Software Foundation, Inc. 3: * This file is part of the GNU LIBICONV Library. 4: * 5: * The GNU LIBICONV Library is free software; you can redistribute it 6: * and/or modify it under the terms of the GNU Library General Public 7: * License as published by the Free Software Foundation; either version 2 8: * of the License, or (at your option) any later version. 9: * 10: * The GNU LIBICONV Library is distributed in the hope that it will be 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13: * Library General Public License for more details. 14: * 15: * You should have received a copy of the GNU Library General Public 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17: * If not, see <https://www.gnu.org/licenses/>. 18: */ 19: 20: /* Part 1 of iconv_open. 21: Input: const char* tocode, const char* fromcode. 22: Output: 23: unsigned int from_index; 24: int from_wchar; 25: unsigned int to_index; 26: int to_wchar; 27: int transliterate; 28: int discard_ilseq; 29: Jumps to 'invalid' in case of errror. 30: */ 31: { 32: char buf[MAX_WORD_LENGTH+10+1]; 33: const char* cp; 34: char* bp; 35: const struct alias * ap; 36: unsigned int count; 37: 38: transliterate = 0; 39: discard_ilseq = 0; 40: 41: /* Before calling aliases_lookup, convert the input string to upper case, 42: * and check whether it's entirely ASCII (we call gperf with option "-7" 43: * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 44: * or if it's too long, it is not a valid encoding name. 45: */ 46: for (to_wchar = 0;;) { 47: /* Search tocode in the table. */ 48: for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 49: unsigned char c = * (unsigned char *) cp; 50: if (c >= 0x80) 51: goto invalid; 52: if (c >= 'a' && c <= 'z') 53: c -= 'a'-'A'; 54: *bp = c; 55: if (c == '\0') 56: break; 57: if (--count == 0) 58: goto invalid; 59: } 60: for (;;) { 61: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 62: bp -= 10; 63: *bp = '\0'; 64: transliterate = 1; 65: continue; 66: } 67: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 68: bp -= 8; 69: *bp = '\0'; 70: discard_ilseq = 1; 71: continue; 72: } 73: break; 74: } 75: if (buf[0] == '\0') { 76: tocode = locale_charset(); 77: /* Avoid an endless loop that could occur when using an older version 78: of localcharset.c. */ 79: if (tocode[0] == '\0') 80: goto invalid; 81: continue; 82: } 83: ap = aliases_lookup(buf,bp-buf); 84: if (ap == NULL) { 85: ap = aliases2_lookup(buf); 86: if (ap == NULL) 87: goto invalid; 88: } 89: if (ap->encoding_index == ei_local_char) { 90: tocode = locale_charset(); 91: /* Avoid an endless loop that could occur when using an older version 92: of localcharset.c. */ 93: if (tocode[0] == '\0') 94: goto invalid; 95: continue; 96: } 97: if (ap->encoding_index == ei_local_wchar_t) { 98: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 99: This is also the case on native Woe32 systems and Cygwin >= 1.7, where 100: we know that it is UTF-16. */ 101: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 102: if (sizeof(wchar_t) == 4) { 103: to_index = ei_ucs4internal; 104: break; 105: } 106: if (sizeof(wchar_t) == 2) { 107: # if WORDS_LITTLEENDIAN 108: to_index = ei_utf16le; 109: # else 110: to_index = ei_utf16be; 111: # endif 112: break; 113: } 114: #elif __STDC_ISO_10646__ 115: if (sizeof(wchar_t) == 4) { 116: to_index = ei_ucs4internal; 117: break; 118: } 119: if (sizeof(wchar_t) == 2) { 120: to_index = ei_ucs2internal; 121: break; 122: } 123: if (sizeof(wchar_t) == 1) { 124: to_index = ei_iso8859_1; 125: break; 126: } 127: #endif 128: #if HAVE_MBRTOWC 129: to_wchar = 1; 130: tocode = locale_charset(); 131: continue; 132: #endif 133: goto invalid; 134: } 135: to_index = ap->encoding_index; 136: break; 137: } 138: for (from_wchar = 0;;) { 139: /* Search fromcode in the table. */ 140: for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 141: unsigned char c = * (unsigned char *) cp; 142: if (c >= 0x80) 143: goto invalid; 144: if (c >= 'a' && c <= 'z') 145: c -= 'a'-'A'; 146: *bp = c; 147: if (c == '\0') 148: break; 149: if (--count == 0) 150: goto invalid; 151: } 152: for (;;) { 153: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 154: bp -= 10; 155: *bp = '\0'; 156: continue; 157: } 158: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 159: bp -= 8; 160: *bp = '\0'; 161: continue; 162: } 163: break; 164: } 165: if (buf[0] == '\0') { 166: fromcode = locale_charset(); 167: /* Avoid an endless loop that could occur when using an older version 168: of localcharset.c. */ 169: if (fromcode[0] == '\0') 170: goto invalid; 171: continue; 172: } 173: ap = aliases_lookup(buf,bp-buf); 174: if (ap == NULL) { 175: ap = aliases2_lookup(buf); 176: if (ap == NULL) 177: goto invalid; 178: } 179: if (ap->encoding_index == ei_local_char) { 180: fromcode = locale_charset(); 181: /* Avoid an endless loop that could occur when using an older version 182: of localcharset.c. */ 183: if (fromcode[0] == '\0') 184: goto invalid; 185: continue; 186: } 187: if (ap->encoding_index == ei_local_wchar_t) { 188: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 189: This is also the case on native Woe32 systems and Cygwin >= 1.7, where 190: we know that it is UTF-16. */ 191: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 192: if (sizeof(wchar_t) == 4) { 193: from_index = ei_ucs4internal; 194: break; 195: } 196: if (sizeof(wchar_t) == 2) { 197: # if WORDS_LITTLEENDIAN 198: from_index = ei_utf16le; 199: # else 200: from_index = ei_utf16be; 201: # endif 202: break; 203: } 204: #elif __STDC_ISO_10646__ 205: if (sizeof(wchar_t) == 4) { 206: from_index = ei_ucs4internal; 207: break; 208: } 209: if (sizeof(wchar_t) == 2) { 210: from_index = ei_ucs2internal; 211: break; 212: } 213: if (sizeof(wchar_t) == 1) { 214: from_index = ei_iso8859_1; 215: break; 216: } 217: #endif 218: #if HAVE_WCRTOMB 219: from_wchar = 1; 220: fromcode = locale_charset(); 221: continue; 222: #endif 223: goto invalid; 224: } 225: from_index = ap->encoding_index; 226: break; 227: } 228: }