File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / iconv.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /*
    2:  * Copyright (C) 1999-2008, 2011, 2016, 2018 Free Software Foundation, Inc.
    3:  * This file is part of the GNU LIBICONV Library.
    4:  *
    5:  * The GNU LIBICONV Library is free software; you can redistribute it
    6:  * and/or modify it under the terms of the GNU Library General Public
    7:  * License as published by the Free Software Foundation; either version 2
    8:  * of the License, or (at your option) any later version.
    9:  *
   10:  * The GNU LIBICONV Library is distributed in the hope that it will be
   11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:  * Library General Public License for more details.
   14:  *
   15:  * You should have received a copy of the GNU Library General Public
   16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   17:  * If not, see <https://www.gnu.org/licenses/>.
   18:  */
   19: 
   20: #include <iconv.h>
   21: 
   22: #include <limits.h>
   23: #include <stdlib.h>
   24: #include <string.h>
   25: #include "config.h"
   26: #include "localcharset.h"
   27: 
   28: #ifdef __CYGWIN__
   29: #include <cygwin/version.h>
   30: #endif
   31: 
   32: #if ENABLE_EXTRA
   33: /*
   34:  * Consider all system dependent encodings, for any system,
   35:  * and the extra encodings.
   36:  */
   37: #define USE_AIX
   38: #define USE_OSF1
   39: #define USE_DOS
   40: #define USE_EXTRA
   41: #else
   42: /*
   43:  * Consider those system dependent encodings that are needed for the
   44:  * current system.
   45:  */
   46: #ifdef _AIX
   47: #define USE_AIX
   48: #endif
   49: #if defined(__osf__) || defined(VMS)
   50: #define USE_OSF1
   51: #endif
   52: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
   53: #define USE_DOS
   54: #endif
   55: #endif
   56: 
   57: /*
   58:  * Data type for general conversion loop.
   59:  */
   60: struct loop_funcs {
   61:   size_t (*loop_convert) (iconv_t icd,
   62:                           const char* * inbuf, size_t *inbytesleft,
   63:                           char* * outbuf, size_t *outbytesleft);
   64:   size_t (*loop_reset) (iconv_t icd,
   65:                         char* * outbuf, size_t *outbytesleft);
   66: };
   67: 
   68: /*
   69:  * Converters.
   70:  */
   71: #include "converters.h"
   72: 
   73: /*
   74:  * Transliteration tables.
   75:  */
   76: #include "cjk_variants.h"
   77: #include "translit.h"
   78: 
   79: /*
   80:  * Table of all supported encodings.
   81:  */
   82: struct encoding {
   83:   struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
   84:   struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
   85:   int oflags;                 /* flags for unicode -> multibyte conversion */
   86: };
   87: #define DEFALIAS(xxx_alias,xxx) /* nothing */
   88: enum {
   89: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
   90:   ei_##xxx ,
   91: #include "encodings.def"
   92: #ifdef USE_AIX
   93: # include "encodings_aix.def"
   94: #endif
   95: #ifdef USE_OSF1
   96: # include "encodings_osf1.def"
   97: #endif
   98: #ifdef USE_DOS
   99: # include "encodings_dos.def"
  100: #endif
  101: #ifdef USE_EXTRA
  102: # include "encodings_extra.def"
  103: #endif
  104: #include "encodings_local.def"
  105: #undef DEFENCODING
  106: ei_for_broken_compilers_that_dont_like_trailing_commas
  107: };
  108: #include "flags.h"
  109: static struct encoding const all_encodings[] = {
  110: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  111:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
  112: #include "encodings.def"
  113: #ifdef USE_AIX
  114: # include "encodings_aix.def"
  115: #endif
  116: #ifdef USE_OSF1
  117: # include "encodings_osf1.def"
  118: #endif
  119: #ifdef USE_DOS
  120: # include "encodings_dos.def"
  121: #endif
  122: #ifdef USE_EXTRA
  123: # include "encodings_extra.def"
  124: #endif
  125: #undef DEFENCODING
  126: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  127:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
  128: #include "encodings_local.def"
  129: #undef DEFENCODING
  130: };
  131: #undef DEFALIAS
  132: 
  133: /*
  134:  * Conversion loops.
  135:  */
  136: #include "loops.h"
  137: 
  138: /*
  139:  * Alias lookup function.
  140:  * Defines
  141:  *   struct alias { int name; unsigned int encoding_index; };
  142:  *   const struct alias * aliases_lookup (const char *str, unsigned int len);
  143:  *   #define MAX_WORD_LENGTH ...
  144:  */
  145: #if defined _AIX
  146: # include "aliases_sysaix.h"
  147: #elif defined hpux || defined __hpux
  148: # include "aliases_syshpux.h"
  149: #elif defined __osf__
  150: # include "aliases_sysosf1.h"
  151: #elif defined __sun
  152: # include "aliases_syssolaris.h"
  153: #else
  154: # include "aliases.h"
  155: #endif
  156: 
  157: /*
  158:  * System dependent alias lookup function.
  159:  * Defines
  160:  *   const struct alias * aliases2_lookup (const char *str);
  161:  */
  162: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
  163: struct stringpool2_t {
  164: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
  165: #include "aliases2.h"
  166: #undef S
  167: };
  168: static const struct stringpool2_t stringpool2_contents = {
  169: #define S(tag,name,encoding_index) name,
  170: #include "aliases2.h"
  171: #undef S
  172: };
  173: #define stringpool2 ((const char *) &stringpool2_contents)
  174: static const struct alias sysdep_aliases[] = {
  175: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
  176: #include "aliases2.h"
  177: #undef S
  178: };
  179: #ifdef __GNUC__
  180: __inline
  181: #else
  182: #ifdef __cplusplus
  183: inline
  184: #endif
  185: #endif
  186: static const struct alias *
  187: aliases2_lookup (register const char *str)
  188: {
  189:   const struct alias * ptr;
  190:   unsigned int count;
  191:   for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
  192:     if (!strcmp(str, stringpool2 + ptr->name))
  193:       return ptr;
  194:   return NULL;
  195: }
  196: #else
  197: #define aliases2_lookup(str)  NULL
  198: #define stringpool2  NULL
  199: #endif
  200: 
  201: #if 0
  202: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
  203:    and the first string can be assumed to be in uppercase. */
  204: static int strequal (const char* str1, const char* str2)
  205: {
  206:   unsigned char c1;
  207:   unsigned char c2;
  208:   for (;;) {
  209:     c1 = * (unsigned char *) str1++;
  210:     c2 = * (unsigned char *) str2++;
  211:     if (c1 == 0)
  212:       break;
  213:     if (c2 >= 'a' && c2 <= 'z')
  214:       c2 -= 'a'-'A';
  215:     if (c1 != c2)
  216:       break;
  217:   }
  218:   return (c1 == c2);
  219: }
  220: #endif
  221: 
  222: iconv_t iconv_open (const char* tocode, const char* fromcode)
  223: {
  224:   struct conv_struct * cd;
  225:   unsigned int from_index;
  226:   int from_wchar;
  227:   unsigned int to_index;
  228:   int to_wchar;
  229:   int transliterate;
  230:   int discard_ilseq;
  231: 
  232: #include "iconv_open1.h"
  233: 
  234:   cd = (struct conv_struct *) malloc(from_wchar != to_wchar
  235:                                      ? sizeof(struct wchar_conv_struct)
  236:                                      : sizeof(struct conv_struct));
  237:   if (cd == NULL) {
  238:     errno = ENOMEM;
  239:     return (iconv_t)(-1);
  240:   }
  241: 
  242: #include "iconv_open2.h"
  243: 
  244:   return (iconv_t)cd;
  245: invalid:
  246:   errno = EINVAL;
  247:   return (iconv_t)(-1);
  248: }
  249: 
  250: size_t iconv (iconv_t icd,
  251:               ICONV_CONST char* * inbuf, size_t *inbytesleft,
  252:               char* * outbuf, size_t *outbytesleft)
  253: {
  254:   conv_t cd = (conv_t) icd;
  255:   if (inbuf == NULL || *inbuf == NULL)
  256:     return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
  257:   else
  258:     return cd->lfuncs.loop_convert(icd,
  259:                                    (const char* *)inbuf,inbytesleft,
  260:                                    outbuf,outbytesleft);
  261: }
  262: 
  263: int iconv_close (iconv_t icd)
  264: {
  265:   conv_t cd = (conv_t) icd;
  266:   free(cd);
  267:   return 0;
  268: }
  269: 
  270: #ifndef LIBICONV_PLUG
  271: 
  272: /*
  273:  * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
  274:  * fit in an iconv_allocation_t.
  275:  * If this verification fails, iconv_allocation_t must be made larger and
  276:  * the major version in LIBICONV_VERSION_INFO must be bumped.
  277:  * Currently 'struct conv_struct' has 21 integer/pointer fields, and
  278:  * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
  279:  */
  280: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  281: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  282: 
  283: int iconv_open_into (const char* tocode, const char* fromcode,
  284:                      iconv_allocation_t* resultp)
  285: {
  286:   struct conv_struct * cd;
  287:   unsigned int from_index;
  288:   int from_wchar;
  289:   unsigned int to_index;
  290:   int to_wchar;
  291:   int transliterate;
  292:   int discard_ilseq;
  293: 
  294: #include "iconv_open1.h"
  295: 
  296:   cd = (struct conv_struct *) resultp;
  297: 
  298: #include "iconv_open2.h"
  299: 
  300:   return 0;
  301: invalid:
  302:   errno = EINVAL;
  303:   return -1;
  304: }
  305: 
  306: int iconvctl (iconv_t icd, int request, void* argument)
  307: {
  308:   conv_t cd = (conv_t) icd;
  309:   switch (request) {
  310:     case ICONV_TRIVIALP:
  311:       *(int *)argument =
  312:         ((cd->lfuncs.loop_convert == unicode_loop_convert
  313:           && cd->iindex == cd->oindex)
  314:          || cd->lfuncs.loop_convert == wchar_id_loop_convert
  315:          ? 1 : 0);
  316:       return 0;
  317:     case ICONV_GET_TRANSLITERATE:
  318:       *(int *)argument = cd->transliterate;
  319:       return 0;
  320:     case ICONV_SET_TRANSLITERATE:
  321:       cd->transliterate = (*(const int *)argument ? 1 : 0);
  322:       return 0;
  323:     case ICONV_GET_DISCARD_ILSEQ:
  324:       *(int *)argument = cd->discard_ilseq;
  325:       return 0;
  326:     case ICONV_SET_DISCARD_ILSEQ:
  327:       cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
  328:       return 0;
  329:     case ICONV_SET_HOOKS:
  330:       if (argument != NULL) {
  331:         cd->hooks = *(const struct iconv_hooks *)argument;
  332:       } else {
  333:         cd->hooks.uc_hook = NULL;
  334:         cd->hooks.wc_hook = NULL;
  335:         cd->hooks.data = NULL;
  336:       }
  337:       return 0;
  338:     case ICONV_SET_FALLBACKS:
  339:       if (argument != NULL) {
  340:         cd->fallbacks = *(const struct iconv_fallbacks *)argument;
  341:       } else {
  342:         cd->fallbacks.mb_to_uc_fallback = NULL;
  343:         cd->fallbacks.uc_to_mb_fallback = NULL;
  344:         cd->fallbacks.mb_to_wc_fallback = NULL;
  345:         cd->fallbacks.wc_to_mb_fallback = NULL;
  346:         cd->fallbacks.data = NULL;
  347:       }
  348:       return 0;
  349:     default:
  350:       errno = EINVAL;
  351:       return -1;
  352:   }
  353: }
  354: 
  355: /* An alias after its name has been converted from 'int' to 'const char*'. */
  356: struct nalias { const char* name; unsigned int encoding_index; };
  357: 
  358: static int compare_by_index (const void * arg1, const void * arg2)
  359: {
  360:   const struct nalias * alias1 = (const struct nalias *) arg1;
  361:   const struct nalias * alias2 = (const struct nalias *) arg2;
  362:   return (int)alias1->encoding_index - (int)alias2->encoding_index;
  363: }
  364: 
  365: static int compare_by_name (const void * arg1, const void * arg2)
  366: {
  367:   const char * name1 = *(const char **)arg1;
  368:   const char * name2 = *(const char **)arg2;
  369:   /* Compare alphabetically, but put "CS" names at the end. */
  370:   int sign = strcmp(name1,name2);
  371:   if (sign != 0) {
  372:     sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
  373:            * 4 + (sign >= 0 ? 1 : -1);
  374:   }
  375:   return sign;
  376: }
  377: 
  378: void iconvlist (int (*do_one) (unsigned int namescount,
  379:                                const char * const * names,
  380:                                void* data),
  381:                 void* data)
  382: {
  383: #define aliascount1  sizeof(aliases)/sizeof(aliases[0])
  384: #ifndef aliases2_lookup
  385: #define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
  386: #else
  387: #define aliascount2  0
  388: #endif
  389: #define aliascount  (aliascount1+aliascount2)
  390:   struct nalias aliasbuf[aliascount];
  391:   const char * namesbuf[aliascount];
  392:   size_t num_aliases;
  393:   {
  394:     /* Put all existing aliases into a buffer. */
  395:     size_t i;
  396:     size_t j;
  397:     j = 0;
  398:     for (i = 0; i < aliascount1; i++) {
  399:       const struct alias * p = &aliases[i];
  400:       if (p->name >= 0
  401:           && p->encoding_index != ei_local_char
  402:           && p->encoding_index != ei_local_wchar_t) {
  403:         aliasbuf[j].name = stringpool + p->name;
  404:         aliasbuf[j].encoding_index = p->encoding_index;
  405:         j++;
  406:       }
  407:     }
  408: #ifndef aliases2_lookup
  409:     for (i = 0; i < aliascount2; i++) {
  410:       aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
  411:       aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
  412:       j++;
  413:     }
  414: #endif
  415:     num_aliases = j;
  416:   }
  417:   /* Sort by encoding_index. */
  418:   if (num_aliases > 1)
  419:     qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
  420:   {
  421:     /* Process all aliases with the same encoding_index together. */
  422:     size_t j;
  423:     j = 0;
  424:     while (j < num_aliases) {
  425:       unsigned int ei = aliasbuf[j].encoding_index;
  426:       size_t i = 0;
  427:       do
  428:         namesbuf[i++] = aliasbuf[j++].name;
  429:       while (j < num_aliases && aliasbuf[j].encoding_index == ei);
  430:       if (i > 1)
  431:         qsort(namesbuf, i, sizeof(const char *), compare_by_name);
  432:       /* Call the callback. */
  433:       if (do_one(i,namesbuf,data))
  434:         break;
  435:     }
  436:   }
  437: #undef aliascount
  438: #undef aliascount2
  439: #undef aliascount1
  440: }
  441: 
  442: /*
  443:  * Table of canonical names of encodings.
  444:  * Instead of strings, it contains offsets into stringpool and stringpool2.
  445:  */
  446: static const unsigned short all_canonical[] = {
  447: #if defined _AIX
  448: # include "canonical_sysaix.h"
  449: #elif defined hpux || defined __hpux
  450: # include "canonical_syshpux.h"
  451: #elif defined __osf__
  452: # include "canonical_sysosf1.h"
  453: #elif defined __sun
  454: # include "canonical_syssolaris.h"
  455: #else
  456: # include "canonical.h"
  457: #endif
  458: #ifdef USE_AIX
  459: # if defined _AIX
  460: #  include "canonical_aix_sysaix.h"
  461: # else
  462: #  include "canonical_aix.h"
  463: # endif
  464: #endif
  465: #ifdef USE_OSF1
  466: # if defined __osf__
  467: #  include "canonical_osf1_sysosf1.h"
  468: # else
  469: #  include "canonical_osf1.h"
  470: # endif
  471: #endif
  472: #ifdef USE_DOS
  473: # include "canonical_dos.h"
  474: #endif
  475: #ifdef USE_EXTRA
  476: # include "canonical_extra.h"
  477: #endif
  478: #if defined _AIX
  479: # include "canonical_local_sysaix.h"
  480: #elif defined hpux || defined __hpux
  481: # include "canonical_local_syshpux.h"
  482: #elif defined __osf__
  483: # include "canonical_local_sysosf1.h"
  484: #elif defined __sun
  485: # include "canonical_local_syssolaris.h"
  486: #else
  487: # include "canonical_local.h"
  488: #endif
  489: };
  490: 
  491: const char * iconv_canonicalize (const char * name)
  492: {
  493:   const char* code;
  494:   char buf[MAX_WORD_LENGTH+10+1];
  495:   const char* cp;
  496:   char* bp;
  497:   const struct alias * ap;
  498:   unsigned int count;
  499:   unsigned int index;
  500:   const char* pool;
  501: 
  502:   /* Before calling aliases_lookup, convert the input string to upper case,
  503:    * and check whether it's entirely ASCII (we call gperf with option "-7"
  504:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
  505:    * or if it's too long, it is not a valid encoding name.
  506:    */
  507:   for (code = name;;) {
  508:     /* Search code in the table. */
  509:     for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  510:       unsigned char c = * (unsigned char *) cp;
  511:       if (c >= 0x80)
  512:         goto invalid;
  513:       if (c >= 'a' && c <= 'z')
  514:         c -= 'a'-'A';
  515:       *bp = c;
  516:       if (c == '\0')
  517:         break;
  518:       if (--count == 0)
  519:         goto invalid;
  520:     }
  521:     for (;;) {
  522:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  523:         bp -= 10;
  524:         *bp = '\0';
  525:         continue;
  526:       }
  527:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  528:         bp -= 8;
  529:         *bp = '\0';
  530:         continue;
  531:       }
  532:       break;
  533:     }
  534:     if (buf[0] == '\0') {
  535:       code = locale_charset();
  536:       /* Avoid an endless loop that could occur when using an older version
  537:          of localcharset.c. */
  538:       if (code[0] == '\0')
  539:         goto invalid;
  540:       continue;
  541:     }
  542:     pool = stringpool;
  543:     ap = aliases_lookup(buf,bp-buf);
  544:     if (ap == NULL) {
  545:       pool = stringpool2;
  546:       ap = aliases2_lookup(buf);
  547:       if (ap == NULL)
  548:         goto invalid;
  549:     }
  550:     if (ap->encoding_index == ei_local_char) {
  551:       code = locale_charset();
  552:       /* Avoid an endless loop that could occur when using an older version
  553:          of localcharset.c. */
  554:       if (code[0] == '\0')
  555:         goto invalid;
  556:       continue;
  557:     }
  558:     if (ap->encoding_index == ei_local_wchar_t) {
  559:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  560:          This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  561:          we know that it is UTF-16.  */
  562: #if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  563:       if (sizeof(wchar_t) == 4) {
  564:         index = ei_ucs4internal;
  565:         break;
  566:       }
  567:       if (sizeof(wchar_t) == 2) {
  568: # if WORDS_LITTLEENDIAN
  569:         index = ei_utf16le;
  570: # else
  571:         index = ei_utf16be;
  572: # endif
  573:         break;
  574:       }
  575: #elif __STDC_ISO_10646__
  576:       if (sizeof(wchar_t) == 4) {
  577:         index = ei_ucs4internal;
  578:         break;
  579:       }
  580:       if (sizeof(wchar_t) == 2) {
  581:         index = ei_ucs2internal;
  582:         break;
  583:       }
  584:       if (sizeof(wchar_t) == 1) {
  585:         index = ei_iso8859_1;
  586:         break;
  587:       }
  588: #endif
  589:     }
  590:     index = ap->encoding_index;
  591:     break;
  592:   }
  593:   return all_canonical[index] + pool;
  594:  invalid:
  595:   return name;
  596: }
  597: 
  598: int _libiconv_version = _LIBICONV_VERSION;
  599: 
  600: #if defined __FreeBSD__ && !defined __gnu_freebsd__
  601: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
  602:    It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'.  */
  603: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
  604: #define _strong_alias(name, aliasname) \
  605:   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
  606: #undef iconv_open
  607: #undef iconv
  608: #undef iconv_close
  609: strong_alias (libiconv_open, iconv_open)
  610: strong_alias (libiconv, iconv)
  611: strong_alias (libiconv_close, iconv_close)
  612: #endif
  613: 
  614: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>