File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv_open1.h
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2008, 2011, 2018 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: /* Part 1 of iconv_open.
   21:    Input: const char* tocode, const char* fromcode.
   22:    Output:
   23:      unsigned int from_index;
   24:      int from_wchar;
   25:      unsigned int to_index;
   26:      int to_wchar;
   27:      int transliterate;
   28:      int discard_ilseq;
   29:    Jumps to 'invalid' in case of errror.
   30:  */
   31: {
   32:   char buf[MAX_WORD_LENGTH+10+1];
   33:   const char* cp;
   34:   char* bp;
   35:   const struct alias * ap;
   36:   unsigned int count;
   37: 
   38:   transliterate = 0;
   39:   discard_ilseq = 0;
   40: 
   41:   /* Before calling aliases_lookup, convert the input string to upper case,
   42:    * and check whether it's entirely ASCII (we call gperf with option "-7"
   43:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   44:    * or if it's too long, it is not a valid encoding name.
   45:    */
   46:   for (to_wchar = 0;;) {
   47:     /* Search tocode in the table. */
   48:     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
   49:       unsigned char c = * (unsigned char *) cp;
   50:       if (c >= 0x80)
   51:         goto invalid;
   52:       if (c >= 'a' && c <= 'z')
   53:         c -= 'a'-'A';
   54:       *bp = c;
   55:       if (c == '\0')
   56:         break;
   57:       if (--count == 0)
   58:         goto invalid;
   59:     }
   60:     for (;;) {
   61:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
   62:         bp -= 10;
   63:         *bp = '\0';
   64:         transliterate = 1;
   65:         continue;
   66:       }
   67:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
   68:         bp -= 8;
   69:         *bp = '\0';
   70:         discard_ilseq = 1;
   71:         continue;
   72:       }
   73:       break;
   74:     }
   75:     if (buf[0] == '\0') {
   76:       tocode = locale_charset();
   77:       /* Avoid an endless loop that could occur when using an older version
   78:          of localcharset.c. */
   79:       if (tocode[0] == '\0')
   80:         goto invalid;
   81:       continue;
   82:     }
   83:     ap = aliases_lookup(buf,bp-buf);
   84:     if (ap == NULL) {
   85:       ap = aliases2_lookup(buf);
   86:       if (ap == NULL)
   87:         goto invalid;
   88:     }
   89:     if (ap->encoding_index == ei_local_char) {
   90:       tocode = locale_charset();
   91:       /* Avoid an endless loop that could occur when using an older version
   92:          of localcharset.c. */
   93:       if (tocode[0] == '\0')
   94:         goto invalid;
   95:       continue;
   96:     }
   97:     if (ap->encoding_index == ei_local_wchar_t) {
   98:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
   99:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  100:          we know that it is UTF-16.  */
  101: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  102:       if (sizeof(wchar_t) == 4) {
  103:         to_index = ei_ucs4internal;
  104:         break;
  105:       }
  106:       if (sizeof(wchar_t) == 2) {
  107: # if WORDS_LITTLEENDIAN
  108:         to_index = ei_utf16le;
  109: # else
  110:         to_index = ei_utf16be;
  111: # endif
  112:         break;
  113:       }
  114: #elif __STDC_ISO_10646__
  115:       if (sizeof(wchar_t) == 4) {
  116:         to_index = ei_ucs4internal;
  117:         break;
  118:       }
  119:       if (sizeof(wchar_t) == 2) {
  120:         to_index = ei_ucs2internal;
  121:         break;
  122:       }
  123:       if (sizeof(wchar_t) == 1) {
  124:         to_index = ei_iso8859_1;
  125:         break;
  126:       }
  127: #endif
  128: #if HAVE_MBRTOWC
  129:       to_wchar = 1;
  130:       tocode = locale_charset();
  131:       continue;
  132: #endif
  133:       goto invalid;
  134:     }
  135:     to_index = ap->encoding_index;
  136:     break;
  137:   }
  138:   for (from_wchar = 0;;) {
  139:     /* Search fromcode in the table. */
  140:     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  141:       unsigned char c = * (unsigned char *) cp;
  142:       if (c >= 0x80)
  143:         goto invalid;
  144:       if (c >= 'a' && c <= 'z')
  145:         c -= 'a'-'A';
  146:       *bp = c;
  147:       if (c == '\0')
  148:         break;
  149:       if (--count == 0)
  150:         goto invalid;
  151:     }
  152:     for (;;) {
  153:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  154:         bp -= 10;
  155:         *bp = '\0';
  156:         continue;
  157:       }
  158:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  159:         bp -= 8;
  160:         *bp = '\0';
  161:         continue;
  162:       }
  163:       break;
  164:     }
  165:     if (buf[0] == '\0') {
  166:       fromcode = locale_charset();
  167:       /* Avoid an endless loop that could occur when using an older version
  168:          of localcharset.c. */
  169:       if (fromcode[0] == '\0')
  170:         goto invalid;
  171:       continue;
  172:     }
  173:     ap = aliases_lookup(buf,bp-buf);
  174:     if (ap == NULL) {
  175:       ap = aliases2_lookup(buf);
  176:       if (ap == NULL)
  177:         goto invalid;
  178:     }
  179:     if (ap->encoding_index == ei_local_char) {
  180:       fromcode = locale_charset();
  181:       /* Avoid an endless loop that could occur when using an older version
  182:          of localcharset.c. */
  183:       if (fromcode[0] == '\0')
  184:         goto invalid;
  185:       continue;
  186:     }
  187:     if (ap->encoding_index == ei_local_wchar_t) {
  188:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  189:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  190:          we know that it is UTF-16.  */
  191: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  192:       if (sizeof(wchar_t) == 4) {
  193:         from_index = ei_ucs4internal;
  194:         break;
  195:       }
  196:       if (sizeof(wchar_t) == 2) {
  197: # if WORDS_LITTLEENDIAN
  198:         from_index = ei_utf16le;
  199: # else
  200:         from_index = ei_utf16be;
  201: # endif
  202:         break;
  203:       }
  204: #elif __STDC_ISO_10646__
  205:       if (sizeof(wchar_t) == 4) {
  206:         from_index = ei_ucs4internal;
  207:         break;
  208:       }
  209:       if (sizeof(wchar_t) == 2) {
  210:         from_index = ei_ucs2internal;
  211:         break;
  212:       }
  213:       if (sizeof(wchar_t) == 1) {
  214:         from_index = ei_iso8859_1;
  215:         break;
  216:       }
  217: #endif
  218: #if HAVE_WCRTOMB
  219:       from_wchar = 1;
  220:       fromcode = locale_charset();
  221:       continue;
  222: #endif
  223:       goto invalid;
  224:     }
  225:     from_index = ap->encoding_index;
  226:     break;
  227:   }
  228: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>