File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv.c
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
CVS tags: MAIN, HEAD
Initial revision

    1: /*
    2:  * Copyright (C) 1999-2008 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   18:  * Fifth Floor, Boston, MA 02110-1301, USA.
   19:  */
   20: 
   21: #include <iconv.h>
   22: 
   23: #include <stdlib.h>
   24: #include <string.h>
   25: #include "config.h"
   26: #include "localcharset.h"
   27: 
   28: #if ENABLE_EXTRA
   29: /*
   30:  * Consider all system dependent encodings, for any system,
   31:  * and the extra encodings.
   32:  */
   33: #define USE_AIX
   34: #define USE_OSF1
   35: #define USE_DOS
   36: #define USE_EXTRA
   37: #else
   38: /*
   39:  * Consider those system dependent encodings that are needed for the
   40:  * current system.
   41:  */
   42: #ifdef _AIX
   43: #define USE_AIX
   44: #endif
   45: #if defined(__osf__) || defined(VMS)
   46: #define USE_OSF1
   47: #endif
   48: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
   49: #define USE_DOS
   50: #endif
   51: #endif
   52: 
   53: /*
   54:  * Data type for general conversion loop.
   55:  */
   56: struct loop_funcs {
   57:   size_t (*loop_convert) (iconv_t icd,
   58:                           const char* * inbuf, size_t *inbytesleft,
   59:                           char* * outbuf, size_t *outbytesleft);
   60:   size_t (*loop_reset) (iconv_t icd,
   61:                         char* * outbuf, size_t *outbytesleft);
   62: };
   63: 
   64: /*
   65:  * Converters.
   66:  */
   67: #include "converters.h"
   68: 
   69: /*
   70:  * Transliteration tables.
   71:  */
   72: #include "cjk_variants.h"
   73: #include "translit.h"
   74: 
   75: /*
   76:  * Table of all supported encodings.
   77:  */
   78: struct encoding {
   79:   struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
   80:   struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
   81:   int oflags;                 /* flags for unicode -> multibyte conversion */
   82: };
   83: #define DEFALIAS(xxx_alias,xxx) /* nothing */
   84: enum {
   85: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
   86:   ei_##xxx ,
   87: #include "encodings.def"
   88: #ifdef USE_AIX
   89: # include "encodings_aix.def"
   90: #endif
   91: #ifdef USE_OSF1
   92: # include "encodings_osf1.def"
   93: #endif
   94: #ifdef USE_DOS
   95: # include "encodings_dos.def"
   96: #endif
   97: #ifdef USE_EXTRA
   98: # include "encodings_extra.def"
   99: #endif
  100: #include "encodings_local.def"
  101: #undef DEFENCODING
  102: ei_for_broken_compilers_that_dont_like_trailing_commas
  103: };
  104: #include "flags.h"
  105: static struct encoding const all_encodings[] = {
  106: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  107:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
  108: #include "encodings.def"
  109: #ifdef USE_AIX
  110: # include "encodings_aix.def"
  111: #endif
  112: #ifdef USE_OSF1
  113: # include "encodings_osf1.def"
  114: #endif
  115: #ifdef USE_DOS
  116: # include "encodings_dos.def"
  117: #endif
  118: #ifdef USE_EXTRA
  119: # include "encodings_extra.def"
  120: #endif
  121: #undef DEFENCODING
  122: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  123:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
  124: #include "encodings_local.def"
  125: #undef DEFENCODING
  126: };
  127: #undef DEFALIAS
  128: 
  129: /*
  130:  * Conversion loops.
  131:  */
  132: #include "loops.h"
  133: 
  134: /*
  135:  * Alias lookup function.
  136:  * Defines
  137:  *   struct alias { int name; unsigned int encoding_index; };
  138:  *   const struct alias * aliases_lookup (const char *str, unsigned int len);
  139:  *   #define MAX_WORD_LENGTH ...
  140:  */
  141: #if defined _AIX
  142: # include "aliases_sysaix.h"
  143: #elif defined hpux || defined __hpux
  144: # include "aliases_syshpux.h"
  145: #elif defined __osf__
  146: # include "aliases_sysosf1.h"
  147: #elif defined __sun
  148: # include "aliases_syssolaris.h"
  149: #else
  150: # include "aliases.h"
  151: #endif
  152: 
  153: /*
  154:  * System dependent alias lookup function.
  155:  * Defines
  156:  *   const struct alias * aliases2_lookup (const char *str);
  157:  */
  158: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
  159: struct stringpool2_t {
  160: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
  161: #include "aliases2.h"
  162: #undef S
  163: };
  164: static const struct stringpool2_t stringpool2_contents = {
  165: #define S(tag,name,encoding_index) name,
  166: #include "aliases2.h"
  167: #undef S
  168: };
  169: #define stringpool2 ((const char *) &stringpool2_contents)
  170: static const struct alias sysdep_aliases[] = {
  171: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
  172: #include "aliases2.h"
  173: #undef S
  174: };
  175: #ifdef __GNUC__
  176: __inline
  177: #endif
  178: const struct alias *
  179: aliases2_lookup (register const char *str)
  180: {
  181:   const struct alias * ptr;
  182:   unsigned int count;
  183:   for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
  184:     if (!strcmp(str, stringpool2 + ptr->name))
  185:       return ptr;
  186:   return NULL;
  187: }
  188: #else
  189: #define aliases2_lookup(str)  NULL
  190: #define stringpool2  NULL
  191: #endif
  192: 
  193: #if 0
  194: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
  195:    and the first string can be assumed to be in uppercase. */
  196: static int strequal (const char* str1, const char* str2)
  197: {
  198:   unsigned char c1;
  199:   unsigned char c2;
  200:   for (;;) {
  201:     c1 = * (unsigned char *) str1++;
  202:     c2 = * (unsigned char *) str2++;
  203:     if (c1 == 0)
  204:       break;
  205:     if (c2 >= 'a' && c2 <= 'z')
  206:       c2 -= 'a'-'A';
  207:     if (c1 != c2)
  208:       break;
  209:   }
  210:   return (c1 == c2);
  211: }
  212: #endif
  213: 
  214: iconv_t iconv_open (const char* tocode, const char* fromcode)
  215: {
  216:   struct conv_struct * cd;
  217:   unsigned int from_index;
  218:   int from_wchar;
  219:   unsigned int to_index;
  220:   int to_wchar;
  221:   int transliterate;
  222:   int discard_ilseq;
  223: 
  224: #include "iconv_open1.h"
  225: 
  226:   cd = (struct conv_struct *) malloc(from_wchar != to_wchar
  227:                                      ? sizeof(struct wchar_conv_struct)
  228:                                      : sizeof(struct conv_struct));
  229:   if (cd == NULL) {
  230:     errno = ENOMEM;
  231:     return (iconv_t)(-1);
  232:   }
  233: 
  234: #include "iconv_open2.h"
  235: 
  236:   return (iconv_t)cd;
  237: invalid:
  238:   errno = EINVAL;
  239:   return (iconv_t)(-1);
  240: }
  241: 
  242: size_t iconv (iconv_t icd,
  243:               ICONV_CONST char* * inbuf, size_t *inbytesleft,
  244:               char* * outbuf, size_t *outbytesleft)
  245: {
  246:   conv_t cd = (conv_t) icd;
  247:   if (inbuf == NULL || *inbuf == NULL)
  248:     return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
  249:   else
  250:     return cd->lfuncs.loop_convert(icd,
  251:                                    (const char* *)inbuf,inbytesleft,
  252:                                    outbuf,outbytesleft);
  253: }
  254: 
  255: int iconv_close (iconv_t icd)
  256: {
  257:   conv_t cd = (conv_t) icd;
  258:   free(cd);
  259:   return 0;
  260: }
  261: 
  262: #ifndef LIBICONV_PLUG
  263: 
  264: /*
  265:  * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
  266:  * fit in an iconv_allocation_t.
  267:  * If this verification fails, iconv_allocation_t must be made larger and
  268:  * the major version in LIBICONV_VERSION_INFO must be bumped.
  269:  * Currently 'struct conv_struct' has 21 integer/pointer fields, and
  270:  * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
  271:  */
  272: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  273: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  274: 
  275: int iconv_open_into (const char* tocode, const char* fromcode,
  276:                      iconv_allocation_t* resultp)
  277: {
  278:   struct conv_struct * cd;
  279:   unsigned int from_index;
  280:   int from_wchar;
  281:   unsigned int to_index;
  282:   int to_wchar;
  283:   int transliterate;
  284:   int discard_ilseq;
  285: 
  286: #include "iconv_open1.h"
  287: 
  288:   cd = (struct conv_struct *) resultp;
  289: 
  290: #include "iconv_open2.h"
  291: 
  292:   return 0;
  293: invalid:
  294:   errno = EINVAL;
  295:   return -1;
  296: }
  297: 
  298: int iconvctl (iconv_t icd, int request, void* argument)
  299: {
  300:   conv_t cd = (conv_t) icd;
  301:   switch (request) {
  302:     case ICONV_TRIVIALP:
  303:       *(int *)argument =
  304:         ((cd->lfuncs.loop_convert == unicode_loop_convert
  305:           && cd->iindex == cd->oindex)
  306:          || cd->lfuncs.loop_convert == wchar_id_loop_convert
  307:          ? 1 : 0);
  308:       return 0;
  309:     case ICONV_GET_TRANSLITERATE:
  310:       *(int *)argument = cd->transliterate;
  311:       return 0;
  312:     case ICONV_SET_TRANSLITERATE:
  313:       cd->transliterate = (*(const int *)argument ? 1 : 0);
  314:       return 0;
  315:     case ICONV_GET_DISCARD_ILSEQ:
  316:       *(int *)argument = cd->discard_ilseq;
  317:       return 0;
  318:     case ICONV_SET_DISCARD_ILSEQ:
  319:       cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
  320:       return 0;
  321:     case ICONV_SET_HOOKS:
  322:       if (argument != NULL) {
  323:         cd->hooks = *(const struct iconv_hooks *)argument;
  324:       } else {
  325:         cd->hooks.uc_hook = NULL;
  326:         cd->hooks.wc_hook = NULL;
  327:         cd->hooks.data = NULL;
  328:       }
  329:       return 0;
  330:     case ICONV_SET_FALLBACKS:
  331:       if (argument != NULL) {
  332:         cd->fallbacks = *(const struct iconv_fallbacks *)argument;
  333:       } else {
  334:         cd->fallbacks.mb_to_uc_fallback = NULL;
  335:         cd->fallbacks.uc_to_mb_fallback = NULL;
  336:         cd->fallbacks.mb_to_wc_fallback = NULL;
  337:         cd->fallbacks.wc_to_mb_fallback = NULL;
  338:         cd->fallbacks.data = NULL;
  339:       }
  340:       return 0;
  341:     default:
  342:       errno = EINVAL;
  343:       return -1;
  344:   }
  345: }
  346: 
  347: /* An alias after its name has been converted from 'int' to 'const char*'. */
  348: struct nalias { const char* name; unsigned int encoding_index; };
  349: 
  350: static int compare_by_index (const void * arg1, const void * arg2)
  351: {
  352:   const struct nalias * alias1 = (const struct nalias *) arg1;
  353:   const struct nalias * alias2 = (const struct nalias *) arg2;
  354:   return (int)alias1->encoding_index - (int)alias2->encoding_index;
  355: }
  356: 
  357: static int compare_by_name (const void * arg1, const void * arg2)
  358: {
  359:   const char * name1 = *(const char **)arg1;
  360:   const char * name2 = *(const char **)arg2;
  361:   /* Compare alphabetically, but put "CS" names at the end. */
  362:   int sign = strcmp(name1,name2);
  363:   if (sign != 0) {
  364:     sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
  365:            * 4 + (sign >= 0 ? 1 : -1);
  366:   }
  367:   return sign;
  368: }
  369: 
  370: void iconvlist (int (*do_one) (unsigned int namescount,
  371:                                const char * const * names,
  372:                                void* data),
  373:                 void* data)
  374: {
  375: #define aliascount1  sizeof(aliases)/sizeof(aliases[0])
  376: #ifndef aliases2_lookup
  377: #define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
  378: #else
  379: #define aliascount2  0
  380: #endif
  381: #define aliascount  (aliascount1+aliascount2)
  382:   struct nalias aliasbuf[aliascount];
  383:   const char * namesbuf[aliascount];
  384:   size_t num_aliases;
  385:   {
  386:     /* Put all existing aliases into a buffer. */
  387:     size_t i;
  388:     size_t j;
  389:     j = 0;
  390:     for (i = 0; i < aliascount1; i++) {
  391:       const struct alias * p = &aliases[i];
  392:       if (p->name >= 0
  393:           && p->encoding_index != ei_local_char
  394:           && p->encoding_index != ei_local_wchar_t) {
  395:         aliasbuf[j].name = stringpool + p->name;
  396:         aliasbuf[j].encoding_index = p->encoding_index;
  397:         j++;
  398:       }
  399:     }
  400: #ifndef aliases2_lookup
  401:     for (i = 0; i < aliascount2; i++) {
  402:       aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
  403:       aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
  404:       j++;
  405:     }
  406: #endif
  407:     num_aliases = j;
  408:   }
  409:   /* Sort by encoding_index. */
  410:   if (num_aliases > 1)
  411:     qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
  412:   {
  413:     /* Process all aliases with the same encoding_index together. */
  414:     size_t j;
  415:     j = 0;
  416:     while (j < num_aliases) {
  417:       unsigned int ei = aliasbuf[j].encoding_index;
  418:       size_t i = 0;
  419:       do
  420:         namesbuf[i++] = aliasbuf[j++].name;
  421:       while (j < num_aliases && aliasbuf[j].encoding_index == ei);
  422:       if (i > 1)
  423:         qsort(namesbuf, i, sizeof(const char *), compare_by_name);
  424:       /* Call the callback. */
  425:       if (do_one(i,namesbuf,data))
  426:         break;
  427:     }
  428:   }
  429: #undef aliascount
  430: #undef aliascount2
  431: #undef aliascount1
  432: }
  433: 
  434: /*
  435:  * Table of canonical names of encodings.
  436:  * Instead of strings, it contains offsets into stringpool and stringpool2.
  437:  */
  438: static const unsigned short all_canonical[] = {
  439: #if defined _AIX
  440: # include "canonical_sysaix.h"
  441: #elif defined hpux || defined __hpux
  442: # include "canonical_syshpux.h"
  443: #elif defined __osf__
  444: # include "canonical_sysosf1.h"
  445: #elif defined __sun
  446: # include "canonical_syssolaris.h"
  447: #else
  448: # include "canonical.h"
  449: #endif
  450: #ifdef USE_AIX
  451: # if defined _AIX
  452: #  include "canonical_aix_sysaix.h"
  453: # else
  454: #  include "canonical_aix.h"
  455: # endif
  456: #endif
  457: #ifdef USE_OSF1
  458: # if defined __osf__
  459: #  include "canonical_osf1_sysosf1.h"
  460: # else
  461: #  include "canonical_osf1.h"
  462: # endif
  463: #endif
  464: #ifdef USE_DOS
  465: # include "canonical_dos.h"
  466: #endif
  467: #ifdef USE_EXTRA
  468: # include "canonical_extra.h"
  469: #endif
  470: #if defined _AIX
  471: # include "canonical_local_sysaix.h"
  472: #elif defined hpux || defined __hpux
  473: # include "canonical_local_syshpux.h"
  474: #elif defined __osf__
  475: # include "canonical_local_sysosf1.h"
  476: #elif defined __sun
  477: # include "canonical_local_syssolaris.h"
  478: #else
  479: # include "canonical_local.h"
  480: #endif
  481: };
  482: 
  483: const char * iconv_canonicalize (const char * name)
  484: {
  485:   const char* code;
  486:   char buf[MAX_WORD_LENGTH+10+1];
  487:   const char* cp;
  488:   char* bp;
  489:   const struct alias * ap;
  490:   unsigned int count;
  491:   unsigned int index;
  492:   const char* pool;
  493: 
  494:   /* Before calling aliases_lookup, convert the input string to upper case,
  495:    * and check whether it's entirely ASCII (we call gperf with option "-7"
  496:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
  497:    * or if it's too long, it is not a valid encoding name.
  498:    */
  499:   for (code = name;;) {
  500:     /* Search code in the table. */
  501:     for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  502:       unsigned char c = * (unsigned char *) cp;
  503:       if (c >= 0x80)
  504:         goto invalid;
  505:       if (c >= 'a' && c <= 'z')
  506:         c -= 'a'-'A';
  507:       *bp = c;
  508:       if (c == '\0')
  509:         break;
  510:       if (--count == 0)
  511:         goto invalid;
  512:     }
  513:     for (;;) {
  514:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  515:         bp -= 10;
  516:         *bp = '\0';
  517:         continue;
  518:       }
  519:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  520:         bp -= 8;
  521:         *bp = '\0';
  522:         continue;
  523:       }
  524:       break;
  525:     }
  526:     if (buf[0] == '\0') {
  527:       code = locale_charset();
  528:       /* Avoid an endless loop that could occur when using an older version
  529:          of localcharset.c. */
  530:       if (code[0] == '\0')
  531:         goto invalid;
  532:       continue;
  533:     }
  534:     pool = stringpool;
  535:     ap = aliases_lookup(buf,bp-buf);
  536:     if (ap == NULL) {
  537:       pool = stringpool2;
  538:       ap = aliases2_lookup(buf);
  539:       if (ap == NULL)
  540:         goto invalid;
  541:     }
  542:     if (ap->encoding_index == ei_local_char) {
  543:       code = locale_charset();
  544:       /* Avoid an endless loop that could occur when using an older version
  545:          of localcharset.c. */
  546:       if (code[0] == '\0')
  547:         goto invalid;
  548:       continue;
  549:     }
  550:     if (ap->encoding_index == ei_local_wchar_t) {
  551:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  552:          This is also the case on native Woe32 systems.  */
  553: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
  554:       if (sizeof(wchar_t) == 4) {
  555:         index = ei_ucs4internal;
  556:         break;
  557:       }
  558:       if (sizeof(wchar_t) == 2) {
  559:         index = ei_ucs2internal;
  560:         break;
  561:       }
  562:       if (sizeof(wchar_t) == 1) {
  563:         index = ei_iso8859_1;
  564:         break;
  565:       }
  566: #endif
  567:     }
  568:     index = ap->encoding_index;
  569:     break;
  570:   }
  571:   return all_canonical[index] + pool;
  572:  invalid:
  573:   return name;
  574: }
  575: 
  576: int _libiconv_version = _LIBICONV_VERSION;
  577: 
  578: #if defined __FreeBSD__ && !defined __gnu_freebsd__
  579: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
  580:    It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'.  */
  581: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
  582: #define _strong_alias(name, aliasname) \
  583:   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
  584: #undef iconv_open
  585: #undef iconv
  586: #undef iconv_close
  587: strong_alias (libiconv_open, iconv_open)
  588: strong_alias (libiconv, iconv)
  589: strong_alias (libiconv_close, iconv_close)
  590: #endif
  591: 
  592: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>