File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv_open1.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 09:29:43 2012 UTC (12 years, 1 month ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, HEAD
libiconv v1.14

    1: /*
    2:  * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /* Part 1 of iconv_open.
   22:    Input: const char* tocode, const char* fromcode.
   23:    Output:
   24:      unsigned int from_index;
   25:      int from_wchar;
   26:      unsigned int to_index;
   27:      int to_wchar;
   28:      int transliterate;
   29:      int discard_ilseq;
   30:    Jumps to 'invalid' in case of errror.
   31:  */
   32: {
   33:   char buf[MAX_WORD_LENGTH+10+1];
   34:   const char* cp;
   35:   char* bp;
   36:   const struct alias * ap;
   37:   unsigned int count;
   38: 
   39:   transliterate = 0;
   40:   discard_ilseq = 0;
   41: 
   42:   /* Before calling aliases_lookup, convert the input string to upper case,
   43:    * and check whether it's entirely ASCII (we call gperf with option "-7"
   44:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   45:    * or if it's too long, it is not a valid encoding name.
   46:    */
   47:   for (to_wchar = 0;;) {
   48:     /* Search tocode in the table. */
   49:     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
   50:       unsigned char c = * (unsigned char *) cp;
   51:       if (c >= 0x80)
   52:         goto invalid;
   53:       if (c >= 'a' && c <= 'z')
   54:         c -= 'a'-'A';
   55:       *bp = c;
   56:       if (c == '\0')
   57:         break;
   58:       if (--count == 0)
   59:         goto invalid;
   60:     }
   61:     for (;;) {
   62:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
   63:         bp -= 10;
   64:         *bp = '\0';
   65:         transliterate = 1;
   66:         continue;
   67:       }
   68:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
   69:         bp -= 8;
   70:         *bp = '\0';
   71:         discard_ilseq = 1;
   72:         continue;
   73:       }
   74:       break;
   75:     }
   76:     if (buf[0] == '\0') {
   77:       tocode = locale_charset();
   78:       /* Avoid an endless loop that could occur when using an older version
   79:          of localcharset.c. */
   80:       if (tocode[0] == '\0')
   81:         goto invalid;
   82:       continue;
   83:     }
   84:     ap = aliases_lookup(buf,bp-buf);
   85:     if (ap == NULL) {
   86:       ap = aliases2_lookup(buf);
   87:       if (ap == NULL)
   88:         goto invalid;
   89:     }
   90:     if (ap->encoding_index == ei_local_char) {
   91:       tocode = locale_charset();
   92:       /* Avoid an endless loop that could occur when using an older version
   93:          of localcharset.c. */
   94:       if (tocode[0] == '\0')
   95:         goto invalid;
   96:       continue;
   97:     }
   98:     if (ap->encoding_index == ei_local_wchar_t) {
   99:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  100:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  101:          we know that it is UTF-16.  */
  102: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  103:       if (sizeof(wchar_t) == 4) {
  104:         to_index = ei_ucs4internal;
  105:         break;
  106:       }
  107:       if (sizeof(wchar_t) == 2) {
  108: # if WORDS_LITTLEENDIAN
  109:         to_index = ei_utf16le;
  110: # else
  111:         to_index = ei_utf16be;
  112: # endif
  113:         break;
  114:       }
  115: #elif __STDC_ISO_10646__
  116:       if (sizeof(wchar_t) == 4) {
  117:         to_index = ei_ucs4internal;
  118:         break;
  119:       }
  120:       if (sizeof(wchar_t) == 2) {
  121:         to_index = ei_ucs2internal;
  122:         break;
  123:       }
  124:       if (sizeof(wchar_t) == 1) {
  125:         to_index = ei_iso8859_1;
  126:         break;
  127:       }
  128: #endif
  129: #if HAVE_MBRTOWC
  130:       to_wchar = 1;
  131:       tocode = locale_charset();
  132:       continue;
  133: #endif
  134:       goto invalid;
  135:     }
  136:     to_index = ap->encoding_index;
  137:     break;
  138:   }
  139:   for (from_wchar = 0;;) {
  140:     /* Search fromcode in the table. */
  141:     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  142:       unsigned char c = * (unsigned char *) cp;
  143:       if (c >= 0x80)
  144:         goto invalid;
  145:       if (c >= 'a' && c <= 'z')
  146:         c -= 'a'-'A';
  147:       *bp = c;
  148:       if (c == '\0')
  149:         break;
  150:       if (--count == 0)
  151:         goto invalid;
  152:     }
  153:     for (;;) {
  154:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  155:         bp -= 10;
  156:         *bp = '\0';
  157:         continue;
  158:       }
  159:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  160:         bp -= 8;
  161:         *bp = '\0';
  162:         continue;
  163:       }
  164:       break;
  165:     }
  166:     if (buf[0] == '\0') {
  167:       fromcode = locale_charset();
  168:       /* Avoid an endless loop that could occur when using an older version
  169:          of localcharset.c. */
  170:       if (fromcode[0] == '\0')
  171:         goto invalid;
  172:       continue;
  173:     }
  174:     ap = aliases_lookup(buf,bp-buf);
  175:     if (ap == NULL) {
  176:       ap = aliases2_lookup(buf);
  177:       if (ap == NULL)
  178:         goto invalid;
  179:     }
  180:     if (ap->encoding_index == ei_local_char) {
  181:       fromcode = locale_charset();
  182:       /* Avoid an endless loop that could occur when using an older version
  183:          of localcharset.c. */
  184:       if (fromcode[0] == '\0')
  185:         goto invalid;
  186:       continue;
  187:     }
  188:     if (ap->encoding_index == ei_local_wchar_t) {
  189:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  190:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  191:          we know that it is UTF-16.  */
  192: #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  193:       if (sizeof(wchar_t) == 4) {
  194:         from_index = ei_ucs4internal;
  195:         break;
  196:       }
  197:       if (sizeof(wchar_t) == 2) {
  198: # if WORDS_LITTLEENDIAN
  199:         from_index = ei_utf16le;
  200: # else
  201:         from_index = ei_utf16be;
  202: # endif
  203:         break;
  204:       }
  205: #elif __STDC_ISO_10646__
  206:       if (sizeof(wchar_t) == 4) {
  207:         from_index = ei_ucs4internal;
  208:         break;
  209:       }
  210:       if (sizeof(wchar_t) == 2) {
  211:         from_index = ei_ucs2internal;
  212:         break;
  213:       }
  214:       if (sizeof(wchar_t) == 1) {
  215:         from_index = ei_iso8859_1;
  216:         break;
  217:       }
  218: #endif
  219: #if HAVE_WCRTOMB
  220:       from_wchar = 1;
  221:       fromcode = locale_charset();
  222:       continue;
  223: #endif
  224:       goto invalid;
  225:     }
  226:     from_index = ap->encoding_index;
  227:     break;
  228:   }
  229: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>