Annotation of embedaddon/libiconv/lib/iconv_open1.h, revision 1.1.1.2

1.1       misho       1: /*
1.1.1.2 ! misho       2:  * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
1.1       misho       3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
                     17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
                     18:  * Fifth Floor, Boston, MA 02110-1301, USA.
                     19:  */
                     20: 
                     21: /* Part 1 of iconv_open.
                     22:    Input: const char* tocode, const char* fromcode.
                     23:    Output:
                     24:      unsigned int from_index;
                     25:      int from_wchar;
                     26:      unsigned int to_index;
                     27:      int to_wchar;
                     28:      int transliterate;
                     29:      int discard_ilseq;
                     30:    Jumps to 'invalid' in case of errror.
                     31:  */
                     32: {
                     33:   char buf[MAX_WORD_LENGTH+10+1];
                     34:   const char* cp;
                     35:   char* bp;
                     36:   const struct alias * ap;
                     37:   unsigned int count;
                     38: 
                     39:   transliterate = 0;
                     40:   discard_ilseq = 0;
                     41: 
                     42:   /* Before calling aliases_lookup, convert the input string to upper case,
                     43:    * and check whether it's entirely ASCII (we call gperf with option "-7"
                     44:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
                     45:    * or if it's too long, it is not a valid encoding name.
                     46:    */
                     47:   for (to_wchar = 0;;) {
                     48:     /* Search tocode in the table. */
                     49:     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
                     50:       unsigned char c = * (unsigned char *) cp;
                     51:       if (c >= 0x80)
                     52:         goto invalid;
                     53:       if (c >= 'a' && c <= 'z')
                     54:         c -= 'a'-'A';
                     55:       *bp = c;
                     56:       if (c == '\0')
                     57:         break;
                     58:       if (--count == 0)
                     59:         goto invalid;
                     60:     }
                     61:     for (;;) {
                     62:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
                     63:         bp -= 10;
                     64:         *bp = '\0';
                     65:         transliterate = 1;
                     66:         continue;
                     67:       }
                     68:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
                     69:         bp -= 8;
                     70:         *bp = '\0';
                     71:         discard_ilseq = 1;
                     72:         continue;
                     73:       }
                     74:       break;
                     75:     }
                     76:     if (buf[0] == '\0') {
                     77:       tocode = locale_charset();
                     78:       /* Avoid an endless loop that could occur when using an older version
                     79:          of localcharset.c. */
                     80:       if (tocode[0] == '\0')
                     81:         goto invalid;
                     82:       continue;
                     83:     }
                     84:     ap = aliases_lookup(buf,bp-buf);
                     85:     if (ap == NULL) {
                     86:       ap = aliases2_lookup(buf);
                     87:       if (ap == NULL)
                     88:         goto invalid;
                     89:     }
                     90:     if (ap->encoding_index == ei_local_char) {
                     91:       tocode = locale_charset();
                     92:       /* Avoid an endless loop that could occur when using an older version
                     93:          of localcharset.c. */
                     94:       if (tocode[0] == '\0')
                     95:         goto invalid;
                     96:       continue;
                     97:     }
                     98:     if (ap->encoding_index == ei_local_wchar_t) {
                     99:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 ! misho     100:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
        !           101:          we know that it is UTF-16.  */
        !           102: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
        !           103:       if (sizeof(wchar_t) == 4) {
        !           104:         to_index = ei_ucs4internal;
        !           105:         break;
        !           106:       }
        !           107:       if (sizeof(wchar_t) == 2) {
        !           108: # if WORDS_LITTLEENDIAN
        !           109:         to_index = ei_utf16le;
        !           110: # else
        !           111:         to_index = ei_utf16be;
        !           112: # endif
        !           113:         break;
        !           114:       }
        !           115: #elif __STDC_ISO_10646__
1.1       misho     116:       if (sizeof(wchar_t) == 4) {
                    117:         to_index = ei_ucs4internal;
                    118:         break;
                    119:       }
                    120:       if (sizeof(wchar_t) == 2) {
                    121:         to_index = ei_ucs2internal;
                    122:         break;
                    123:       }
                    124:       if (sizeof(wchar_t) == 1) {
                    125:         to_index = ei_iso8859_1;
                    126:         break;
                    127:       }
                    128: #endif
                    129: #if HAVE_MBRTOWC
                    130:       to_wchar = 1;
                    131:       tocode = locale_charset();
                    132:       continue;
                    133: #endif
                    134:       goto invalid;
                    135:     }
                    136:     to_index = ap->encoding_index;
                    137:     break;
                    138:   }
                    139:   for (from_wchar = 0;;) {
                    140:     /* Search fromcode in the table. */
                    141:     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
                    142:       unsigned char c = * (unsigned char *) cp;
                    143:       if (c >= 0x80)
                    144:         goto invalid;
                    145:       if (c >= 'a' && c <= 'z')
                    146:         c -= 'a'-'A';
                    147:       *bp = c;
                    148:       if (c == '\0')
                    149:         break;
                    150:       if (--count == 0)
                    151:         goto invalid;
                    152:     }
                    153:     for (;;) {
                    154:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
                    155:         bp -= 10;
                    156:         *bp = '\0';
                    157:         continue;
                    158:       }
                    159:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
                    160:         bp -= 8;
                    161:         *bp = '\0';
                    162:         continue;
                    163:       }
                    164:       break;
                    165:     }
                    166:     if (buf[0] == '\0') {
                    167:       fromcode = locale_charset();
                    168:       /* Avoid an endless loop that could occur when using an older version
                    169:          of localcharset.c. */
                    170:       if (fromcode[0] == '\0')
                    171:         goto invalid;
                    172:       continue;
                    173:     }
                    174:     ap = aliases_lookup(buf,bp-buf);
                    175:     if (ap == NULL) {
                    176:       ap = aliases2_lookup(buf);
                    177:       if (ap == NULL)
                    178:         goto invalid;
                    179:     }
                    180:     if (ap->encoding_index == ei_local_char) {
                    181:       fromcode = locale_charset();
                    182:       /* Avoid an endless loop that could occur when using an older version
                    183:          of localcharset.c. */
                    184:       if (fromcode[0] == '\0')
                    185:         goto invalid;
                    186:       continue;
                    187:     }
                    188:     if (ap->encoding_index == ei_local_wchar_t) {
                    189:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
1.1.1.2 ! misho     190:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
        !           191:          we know that it is UTF-16.  */
        !           192: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
        !           193:       if (sizeof(wchar_t) == 4) {
        !           194:         from_index = ei_ucs4internal;
        !           195:         break;
        !           196:       }
        !           197:       if (sizeof(wchar_t) == 2) {
        !           198: # if WORDS_LITTLEENDIAN
        !           199:         from_index = ei_utf16le;
        !           200: # else
        !           201:         from_index = ei_utf16be;
        !           202: # endif
        !           203:         break;
        !           204:       }
        !           205: #elif __STDC_ISO_10646__
1.1       misho     206:       if (sizeof(wchar_t) == 4) {
                    207:         from_index = ei_ucs4internal;
                    208:         break;
                    209:       }
                    210:       if (sizeof(wchar_t) == 2) {
                    211:         from_index = ei_ucs2internal;
                    212:         break;
                    213:       }
                    214:       if (sizeof(wchar_t) == 1) {
                    215:         from_index = ei_iso8859_1;
                    216:         break;
                    217:       }
                    218: #endif
                    219: #if HAVE_WCRTOMB
                    220:       from_wchar = 1;
                    221:       fromcode = locale_charset();
                    222:       continue;
                    223: #endif
                    224:       goto invalid;
                    225:     }
                    226:     from_index = ap->encoding_index;
                    227:     break;
                    228:   }
                    229: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>