File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv_open1.h
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
CVS tags: MAIN, HEAD
Initial revision

    1: /*
    2:  * Copyright (C) 1999-2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: /* Part 1 of iconv_open.
   22:    Input: const char* tocode, const char* fromcode.
   23:    Output:
   24:      unsigned int from_index;
   25:      int from_wchar;
   26:      unsigned int to_index;
   27:      int to_wchar;
   28:      int transliterate;
   29:      int discard_ilseq;
   30:    Jumps to 'invalid' in case of errror.
   31:  */
   32: {
   33:   char buf[MAX_WORD_LENGTH+10+1];
   34:   const char* cp;
   35:   char* bp;
   36:   const struct alias * ap;
   37:   unsigned int count;
   38: 
   39:   transliterate = 0;
   40:   discard_ilseq = 0;
   41: 
   42:   /* Before calling aliases_lookup, convert the input string to upper case,
   43:    * and check whether it's entirely ASCII (we call gperf with option "-7"
   44:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   45:    * or if it's too long, it is not a valid encoding name.
   46:    */
   47:   for (to_wchar = 0;;) {
   48:     /* Search tocode in the table. */
   49:     for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
   50:       unsigned char c = * (unsigned char *) cp;
   51:       if (c >= 0x80)
   52:         goto invalid;
   53:       if (c >= 'a' && c <= 'z')
   54:         c -= 'a'-'A';
   55:       *bp = c;
   56:       if (c == '\0')
   57:         break;
   58:       if (--count == 0)
   59:         goto invalid;
   60:     }
   61:     for (;;) {
   62:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
   63:         bp -= 10;
   64:         *bp = '\0';
   65:         transliterate = 1;
   66:         continue;
   67:       }
   68:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
   69:         bp -= 8;
   70:         *bp = '\0';
   71:         discard_ilseq = 1;
   72:         continue;
   73:       }
   74:       break;
   75:     }
   76:     if (buf[0] == '\0') {
   77:       tocode = locale_charset();
   78:       /* Avoid an endless loop that could occur when using an older version
   79:          of localcharset.c. */
   80:       if (tocode[0] == '\0')
   81:         goto invalid;
   82:       continue;
   83:     }
   84:     ap = aliases_lookup(buf,bp-buf);
   85:     if (ap == NULL) {
   86:       ap = aliases2_lookup(buf);
   87:       if (ap == NULL)
   88:         goto invalid;
   89:     }
   90:     if (ap->encoding_index == ei_local_char) {
   91:       tocode = locale_charset();
   92:       /* Avoid an endless loop that could occur when using an older version
   93:          of localcharset.c. */
   94:       if (tocode[0] == '\0')
   95:         goto invalid;
   96:       continue;
   97:     }
   98:     if (ap->encoding_index == ei_local_wchar_t) {
   99:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  100:          This is also the case on native Woe32 systems.  */
  101: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
  102:       if (sizeof(wchar_t) == 4) {
  103:         to_index = ei_ucs4internal;
  104:         break;
  105:       }
  106:       if (sizeof(wchar_t) == 2) {
  107:         to_index = ei_ucs2internal;
  108:         break;
  109:       }
  110:       if (sizeof(wchar_t) == 1) {
  111:         to_index = ei_iso8859_1;
  112:         break;
  113:       }
  114: #endif
  115: #if HAVE_MBRTOWC
  116:       to_wchar = 1;
  117:       tocode = locale_charset();
  118:       continue;
  119: #endif
  120:       goto invalid;
  121:     }
  122:     to_index = ap->encoding_index;
  123:     break;
  124:   }
  125:   for (from_wchar = 0;;) {
  126:     /* Search fromcode in the table. */
  127:     for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  128:       unsigned char c = * (unsigned char *) cp;
  129:       if (c >= 0x80)
  130:         goto invalid;
  131:       if (c >= 'a' && c <= 'z')
  132:         c -= 'a'-'A';
  133:       *bp = c;
  134:       if (c == '\0')
  135:         break;
  136:       if (--count == 0)
  137:         goto invalid;
  138:     }
  139:     for (;;) {
  140:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  141:         bp -= 10;
  142:         *bp = '\0';
  143:         continue;
  144:       }
  145:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  146:         bp -= 8;
  147:         *bp = '\0';
  148:         continue;
  149:       }
  150:       break;
  151:     }
  152:     if (buf[0] == '\0') {
  153:       fromcode = locale_charset();
  154:       /* Avoid an endless loop that could occur when using an older version
  155:          of localcharset.c. */
  156:       if (fromcode[0] == '\0')
  157:         goto invalid;
  158:       continue;
  159:     }
  160:     ap = aliases_lookup(buf,bp-buf);
  161:     if (ap == NULL) {
  162:       ap = aliases2_lookup(buf);
  163:       if (ap == NULL)
  164:         goto invalid;
  165:     }
  166:     if (ap->encoding_index == ei_local_char) {
  167:       fromcode = locale_charset();
  168:       /* Avoid an endless loop that could occur when using an older version
  169:          of localcharset.c. */
  170:       if (fromcode[0] == '\0')
  171:         goto invalid;
  172:       continue;
  173:     }
  174:     if (ap->encoding_index == ei_local_wchar_t) {
  175:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  176:          This is also the case on native Woe32 systems.  */
  177: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
  178:       if (sizeof(wchar_t) == 4) {
  179:         from_index = ei_ucs4internal;
  180:         break;
  181:       }
  182:       if (sizeof(wchar_t) == 2) {
  183:         from_index = ei_ucs2internal;
  184:         break;
  185:       }
  186:       if (sizeof(wchar_t) == 1) {
  187:         from_index = ei_iso8859_1;
  188:         break;
  189:       }
  190: #endif
  191: #if HAVE_WCRTOMB
  192:       from_wchar = 1;
  193:       fromcode = locale_charset();
  194:       continue;
  195: #endif
  196:       goto invalid;
  197:     }
  198:     from_index = ap->encoding_index;
  199:     break;
  200:   }
  201: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>