Annotation of embedaddon/libiconv/lib/iconv_open1.h, revision 1.1.1.3

1.1       misho       1: /*
1.1.1.3 ! misho       2:  * Copyright (C) 1999-2008, 2011, 2018 Free Software Foundation, Inc.
1.1       misho       3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho      17:  * If not, see <https://www.gnu.org/licenses/>.
1.1       misho      18:  */
                     19: 
                     20: /* Part 1 of iconv_open.
                     21:    Input: const char* tocode, const char* fromcode.
                     22:    Output:
                     23:      unsigned int from_index;
                     24:      int from_wchar;
                     25:      unsigned int to_index;
                     26:      int to_wchar;
                     27:      int transliterate;
                     28:      int discard_ilseq;
                     29:    Jumps to 'invalid' in case of errror.
                     30:  */
                     31: {
                     32:   char buf[MAX_WORD_LENGTH+10+1];
                     33:   const char* cp;
                     34:   char* bp;
                     35:   const struct alias * ap;
                     36:   unsigned int count;
                     37: 
                     38:   transliterate = 0;
                     39:   discard_ilseq = 0;
                     40: 
                     41:   /* Before calling aliases_lookup, convert the input string to upper case,
                     42:    * and check whether it's entirely ASCII (we call gperf with option "-7"
                     43:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
                     44:    * or if it's too long, it is not a valid encoding name.
                     45:    */
                     46:   for (to_wchar = 0;;) {
                     47:     /* Search tocode in the table. */
                     48:     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
                     49:       unsigned char c = * (unsigned char *) cp;
                     50:       if (c >= 0x80)
                     51:         goto invalid;
                     52:       if (c >= 'a' && c <= 'z')
                     53:         c -= 'a'-'A';
                     54:       *bp = c;
                     55:       if (c == '\0')
                     56:         break;
                     57:       if (--count == 0)
                     58:         goto invalid;
                     59:     }
                     60:     for (;;) {
                     61:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
                     62:         bp -= 10;
                     63:         *bp = '\0';
                     64:         transliterate = 1;
                     65:         continue;
                     66:       }
                     67:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
                     68:         bp -= 8;
                     69:         *bp = '\0';
                     70:         discard_ilseq = 1;
                     71:         continue;
                     72:       }
                     73:       break;
                     74:     }
                     75:     if (buf[0] == '\0') {
                     76:       tocode = locale_charset();
                     77:       /* Avoid an endless loop that could occur when using an older version
                     78:          of localcharset.c. */
                     79:       if (tocode[0] == '\0')
                     80:         goto invalid;
                     81:       continue;
                     82:     }
                     83:     ap = aliases_lookup(buf,bp-buf);
                     84:     if (ap == NULL) {
                     85:       ap = aliases2_lookup(buf);
                     86:       if (ap == NULL)
                     87:         goto invalid;
                     88:     }
                     89:     if (ap->encoding_index == ei_local_char) {
                     90:       tocode = locale_charset();
                     91:       /* Avoid an endless loop that could occur when using an older version
                     92:          of localcharset.c. */
                     93:       if (tocode[0] == '\0')
                     94:         goto invalid;
                     95:       continue;
                     96:     }
                     97:     if (ap->encoding_index == ei_local_wchar_t) {
                     98:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2   misho      99:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
                    100:          we know that it is UTF-16.  */
1.1.1.3 ! misho     101: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
1.1.1.2   misho     102:       if (sizeof(wchar_t) == 4) {
                    103:         to_index = ei_ucs4internal;
                    104:         break;
                    105:       }
                    106:       if (sizeof(wchar_t) == 2) {
                    107: # if WORDS_LITTLEENDIAN
                    108:         to_index = ei_utf16le;
                    109: # else
                    110:         to_index = ei_utf16be;
                    111: # endif
                    112:         break;
                    113:       }
                    114: #elif __STDC_ISO_10646__
1.1       misho     115:       if (sizeof(wchar_t) == 4) {
                    116:         to_index = ei_ucs4internal;
                    117:         break;
                    118:       }
                    119:       if (sizeof(wchar_t) == 2) {
                    120:         to_index = ei_ucs2internal;
                    121:         break;
                    122:       }
                    123:       if (sizeof(wchar_t) == 1) {
                    124:         to_index = ei_iso8859_1;
                    125:         break;
                    126:       }
                    127: #endif
                    128: #if HAVE_MBRTOWC
                    129:       to_wchar = 1;
                    130:       tocode = locale_charset();
                    131:       continue;
                    132: #endif
                    133:       goto invalid;
                    134:     }
                    135:     to_index = ap->encoding_index;
                    136:     break;
                    137:   }
                    138:   for (from_wchar = 0;;) {
                    139:     /* Search fromcode in the table. */
                    140:     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
                    141:       unsigned char c = * (unsigned char *) cp;
                    142:       if (c >= 0x80)
                    143:         goto invalid;
                    144:       if (c >= 'a' && c <= 'z')
                    145:         c -= 'a'-'A';
                    146:       *bp = c;
                    147:       if (c == '\0')
                    148:         break;
                    149:       if (--count == 0)
                    150:         goto invalid;
                    151:     }
                    152:     for (;;) {
                    153:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
                    154:         bp -= 10;
                    155:         *bp = '\0';
                    156:         continue;
                    157:       }
                    158:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
                    159:         bp -= 8;
                    160:         *bp = '\0';
                    161:         continue;
                    162:       }
                    163:       break;
                    164:     }
                    165:     if (buf[0] == '\0') {
                    166:       fromcode = locale_charset();
                    167:       /* Avoid an endless loop that could occur when using an older version
                    168:          of localcharset.c. */
                    169:       if (fromcode[0] == '\0')
                    170:         goto invalid;
                    171:       continue;
                    172:     }
                    173:     ap = aliases_lookup(buf,bp-buf);
                    174:     if (ap == NULL) {
                    175:       ap = aliases2_lookup(buf);
                    176:       if (ap == NULL)
                    177:         goto invalid;
                    178:     }
                    179:     if (ap->encoding_index == ei_local_char) {
                    180:       fromcode = locale_charset();
                    181:       /* Avoid an endless loop that could occur when using an older version
                    182:          of localcharset.c. */
                    183:       if (fromcode[0] == '\0')
                    184:         goto invalid;
                    185:       continue;
                    186:     }
                    187:     if (ap->encoding_index == ei_local_wchar_t) {
                    188:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2   misho     189:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
                    190:          we know that it is UTF-16.  */
1.1.1.3 ! misho     191: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
1.1.1.2   misho     192:       if (sizeof(wchar_t) == 4) {
                    193:         from_index = ei_ucs4internal;
                    194:         break;
                    195:       }
                    196:       if (sizeof(wchar_t) == 2) {
                    197: # if WORDS_LITTLEENDIAN
                    198:         from_index = ei_utf16le;
                    199: # else
                    200:         from_index = ei_utf16be;
                    201: # endif
                    202:         break;
                    203:       }
                    204: #elif __STDC_ISO_10646__
1.1       misho     205:       if (sizeof(wchar_t) == 4) {
                    206:         from_index = ei_ucs4internal;
                    207:         break;
                    208:       }
                    209:       if (sizeof(wchar_t) == 2) {
                    210:         from_index = ei_ucs2internal;
                    211:         break;
                    212:       }
                    213:       if (sizeof(wchar_t) == 1) {
                    214:         from_index = ei_iso8859_1;
                    215:         break;
                    216:       }
                    217: #endif
                    218: #if HAVE_WCRTOMB
                    219:       from_wchar = 1;
                    220:       fromcode = locale_charset();
                    221:       continue;
                    222: #endif
                    223:       goto invalid;
                    224:     }
                    225:     from_index = ap->encoding_index;
                    226:     break;
                    227:   }
                    228: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>