Annotation of embedaddon/libiconv/lib/iconv.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  * Copyright (C) 1999-2008 Free Software Foundation, Inc.
                      3:  * This file is part of the GNU LIBICONV Library.
                      4:  *
                      5:  * The GNU LIBICONV Library is free software; you can redistribute it
                      6:  * and/or modify it under the terms of the GNU Library General Public
                      7:  * License as published by the Free Software Foundation; either version 2
                      8:  * of the License, or (at your option) any later version.
                      9:  *
                     10:  * The GNU LIBICONV Library is distributed in the hope that it will be
                     11:  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     13:  * Library General Public License for more details.
                     14:  *
                     15:  * You should have received a copy of the GNU Library General Public
                     16:  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
                     17:  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
                     18:  * Fifth Floor, Boston, MA 02110-1301, USA.
                     19:  */
                     20: 
                     21: #include <iconv.h>
                     22: 
                     23: #include <stdlib.h>
                     24: #include <string.h>
                     25: #include "config.h"
                     26: #include "localcharset.h"
                     27: 
                     28: #if ENABLE_EXTRA
                     29: /*
                     30:  * Consider all system dependent encodings, for any system,
                     31:  * and the extra encodings.
                     32:  */
                     33: #define USE_AIX
                     34: #define USE_OSF1
                     35: #define USE_DOS
                     36: #define USE_EXTRA
                     37: #else
                     38: /*
                     39:  * Consider those system dependent encodings that are needed for the
                     40:  * current system.
                     41:  */
                     42: #ifdef _AIX
                     43: #define USE_AIX
                     44: #endif
                     45: #if defined(__osf__) || defined(VMS)
                     46: #define USE_OSF1
                     47: #endif
                     48: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
                     49: #define USE_DOS
                     50: #endif
                     51: #endif
                     52: 
                     53: /*
                     54:  * Data type for general conversion loop.
                     55:  */
                     56: struct loop_funcs {
                     57:   size_t (*loop_convert) (iconv_t icd,
                     58:                           const char* * inbuf, size_t *inbytesleft,
                     59:                           char* * outbuf, size_t *outbytesleft);
                     60:   size_t (*loop_reset) (iconv_t icd,
                     61:                         char* * outbuf, size_t *outbytesleft);
                     62: };
                     63: 
                     64: /*
                     65:  * Converters.
                     66:  */
                     67: #include "converters.h"
                     68: 
                     69: /*
                     70:  * Transliteration tables.
                     71:  */
                     72: #include "cjk_variants.h"
                     73: #include "translit.h"
                     74: 
                     75: /*
                     76:  * Table of all supported encodings.
                     77:  */
                     78: struct encoding {
                     79:   struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
                     80:   struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
                     81:   int oflags;                 /* flags for unicode -> multibyte conversion */
                     82: };
                     83: #define DEFALIAS(xxx_alias,xxx) /* nothing */
                     84: enum {
                     85: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
                     86:   ei_##xxx ,
                     87: #include "encodings.def"
                     88: #ifdef USE_AIX
                     89: # include "encodings_aix.def"
                     90: #endif
                     91: #ifdef USE_OSF1
                     92: # include "encodings_osf1.def"
                     93: #endif
                     94: #ifdef USE_DOS
                     95: # include "encodings_dos.def"
                     96: #endif
                     97: #ifdef USE_EXTRA
                     98: # include "encodings_extra.def"
                     99: #endif
                    100: #include "encodings_local.def"
                    101: #undef DEFENCODING
                    102: ei_for_broken_compilers_that_dont_like_trailing_commas
                    103: };
                    104: #include "flags.h"
                    105: static struct encoding const all_encodings[] = {
                    106: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
                    107:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
                    108: #include "encodings.def"
                    109: #ifdef USE_AIX
                    110: # include "encodings_aix.def"
                    111: #endif
                    112: #ifdef USE_OSF1
                    113: # include "encodings_osf1.def"
                    114: #endif
                    115: #ifdef USE_DOS
                    116: # include "encodings_dos.def"
                    117: #endif
                    118: #ifdef USE_EXTRA
                    119: # include "encodings_extra.def"
                    120: #endif
                    121: #undef DEFENCODING
                    122: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
                    123:   { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
                    124: #include "encodings_local.def"
                    125: #undef DEFENCODING
                    126: };
                    127: #undef DEFALIAS
                    128: 
                    129: /*
                    130:  * Conversion loops.
                    131:  */
                    132: #include "loops.h"
                    133: 
                    134: /*
                    135:  * Alias lookup function.
                    136:  * Defines
                    137:  *   struct alias { int name; unsigned int encoding_index; };
                    138:  *   const struct alias * aliases_lookup (const char *str, unsigned int len);
                    139:  *   #define MAX_WORD_LENGTH ...
                    140:  */
                    141: #if defined _AIX
                    142: # include "aliases_sysaix.h"
                    143: #elif defined hpux || defined __hpux
                    144: # include "aliases_syshpux.h"
                    145: #elif defined __osf__
                    146: # include "aliases_sysosf1.h"
                    147: #elif defined __sun
                    148: # include "aliases_syssolaris.h"
                    149: #else
                    150: # include "aliases.h"
                    151: #endif
                    152: 
                    153: /*
                    154:  * System dependent alias lookup function.
                    155:  * Defines
                    156:  *   const struct alias * aliases2_lookup (const char *str);
                    157:  */
                    158: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
                    159: struct stringpool2_t {
                    160: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
                    161: #include "aliases2.h"
                    162: #undef S
                    163: };
                    164: static const struct stringpool2_t stringpool2_contents = {
                    165: #define S(tag,name,encoding_index) name,
                    166: #include "aliases2.h"
                    167: #undef S
                    168: };
                    169: #define stringpool2 ((const char *) &stringpool2_contents)
                    170: static const struct alias sysdep_aliases[] = {
                    171: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
                    172: #include "aliases2.h"
                    173: #undef S
                    174: };
                    175: #ifdef __GNUC__
                    176: __inline
                    177: #endif
                    178: const struct alias *
                    179: aliases2_lookup (register const char *str)
                    180: {
                    181:   const struct alias * ptr;
                    182:   unsigned int count;
                    183:   for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
                    184:     if (!strcmp(str, stringpool2 + ptr->name))
                    185:       return ptr;
                    186:   return NULL;
                    187: }
                    188: #else
                    189: #define aliases2_lookup(str)  NULL
                    190: #define stringpool2  NULL
                    191: #endif
                    192: 
                    193: #if 0
                    194: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
                    195:    and the first string can be assumed to be in uppercase. */
                    196: static int strequal (const char* str1, const char* str2)
                    197: {
                    198:   unsigned char c1;
                    199:   unsigned char c2;
                    200:   for (;;) {
                    201:     c1 = * (unsigned char *) str1++;
                    202:     c2 = * (unsigned char *) str2++;
                    203:     if (c1 == 0)
                    204:       break;
                    205:     if (c2 >= 'a' && c2 <= 'z')
                    206:       c2 -= 'a'-'A';
                    207:     if (c1 != c2)
                    208:       break;
                    209:   }
                    210:   return (c1 == c2);
                    211: }
                    212: #endif
                    213: 
                    214: iconv_t iconv_open (const char* tocode, const char* fromcode)
                    215: {
                    216:   struct conv_struct * cd;
                    217:   unsigned int from_index;
                    218:   int from_wchar;
                    219:   unsigned int to_index;
                    220:   int to_wchar;
                    221:   int transliterate;
                    222:   int discard_ilseq;
                    223: 
                    224: #include "iconv_open1.h"
                    225: 
                    226:   cd = (struct conv_struct *) malloc(from_wchar != to_wchar
                    227:                                      ? sizeof(struct wchar_conv_struct)
                    228:                                      : sizeof(struct conv_struct));
                    229:   if (cd == NULL) {
                    230:     errno = ENOMEM;
                    231:     return (iconv_t)(-1);
                    232:   }
                    233: 
                    234: #include "iconv_open2.h"
                    235: 
                    236:   return (iconv_t)cd;
                    237: invalid:
                    238:   errno = EINVAL;
                    239:   return (iconv_t)(-1);
                    240: }
                    241: 
                    242: size_t iconv (iconv_t icd,
                    243:               ICONV_CONST char* * inbuf, size_t *inbytesleft,
                    244:               char* * outbuf, size_t *outbytesleft)
                    245: {
                    246:   conv_t cd = (conv_t) icd;
                    247:   if (inbuf == NULL || *inbuf == NULL)
                    248:     return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
                    249:   else
                    250:     return cd->lfuncs.loop_convert(icd,
                    251:                                    (const char* *)inbuf,inbytesleft,
                    252:                                    outbuf,outbytesleft);
                    253: }
                    254: 
                    255: int iconv_close (iconv_t icd)
                    256: {
                    257:   conv_t cd = (conv_t) icd;
                    258:   free(cd);
                    259:   return 0;
                    260: }
                    261: 
                    262: #ifndef LIBICONV_PLUG
                    263: 
                    264: /*
                    265:  * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
                    266:  * fit in an iconv_allocation_t.
                    267:  * If this verification fails, iconv_allocation_t must be made larger and
                    268:  * the major version in LIBICONV_VERSION_INFO must be bumped.
                    269:  * Currently 'struct conv_struct' has 21 integer/pointer fields, and
                    270:  * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
                    271:  */
                    272: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
                    273: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
                    274: 
                    275: int iconv_open_into (const char* tocode, const char* fromcode,
                    276:                      iconv_allocation_t* resultp)
                    277: {
                    278:   struct conv_struct * cd;
                    279:   unsigned int from_index;
                    280:   int from_wchar;
                    281:   unsigned int to_index;
                    282:   int to_wchar;
                    283:   int transliterate;
                    284:   int discard_ilseq;
                    285: 
                    286: #include "iconv_open1.h"
                    287: 
                    288:   cd = (struct conv_struct *) resultp;
                    289: 
                    290: #include "iconv_open2.h"
                    291: 
                    292:   return 0;
                    293: invalid:
                    294:   errno = EINVAL;
                    295:   return -1;
                    296: }
                    297: 
                    298: int iconvctl (iconv_t icd, int request, void* argument)
                    299: {
                    300:   conv_t cd = (conv_t) icd;
                    301:   switch (request) {
                    302:     case ICONV_TRIVIALP:
                    303:       *(int *)argument =
                    304:         ((cd->lfuncs.loop_convert == unicode_loop_convert
                    305:           && cd->iindex == cd->oindex)
                    306:          || cd->lfuncs.loop_convert == wchar_id_loop_convert
                    307:          ? 1 : 0);
                    308:       return 0;
                    309:     case ICONV_GET_TRANSLITERATE:
                    310:       *(int *)argument = cd->transliterate;
                    311:       return 0;
                    312:     case ICONV_SET_TRANSLITERATE:
                    313:       cd->transliterate = (*(const int *)argument ? 1 : 0);
                    314:       return 0;
                    315:     case ICONV_GET_DISCARD_ILSEQ:
                    316:       *(int *)argument = cd->discard_ilseq;
                    317:       return 0;
                    318:     case ICONV_SET_DISCARD_ILSEQ:
                    319:       cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
                    320:       return 0;
                    321:     case ICONV_SET_HOOKS:
                    322:       if (argument != NULL) {
                    323:         cd->hooks = *(const struct iconv_hooks *)argument;
                    324:       } else {
                    325:         cd->hooks.uc_hook = NULL;
                    326:         cd->hooks.wc_hook = NULL;
                    327:         cd->hooks.data = NULL;
                    328:       }
                    329:       return 0;
                    330:     case ICONV_SET_FALLBACKS:
                    331:       if (argument != NULL) {
                    332:         cd->fallbacks = *(const struct iconv_fallbacks *)argument;
                    333:       } else {
                    334:         cd->fallbacks.mb_to_uc_fallback = NULL;
                    335:         cd->fallbacks.uc_to_mb_fallback = NULL;
                    336:         cd->fallbacks.mb_to_wc_fallback = NULL;
                    337:         cd->fallbacks.wc_to_mb_fallback = NULL;
                    338:         cd->fallbacks.data = NULL;
                    339:       }
                    340:       return 0;
                    341:     default:
                    342:       errno = EINVAL;
                    343:       return -1;
                    344:   }
                    345: }
                    346: 
                    347: /* An alias after its name has been converted from 'int' to 'const char*'. */
                    348: struct nalias { const char* name; unsigned int encoding_index; };
                    349: 
                    350: static int compare_by_index (const void * arg1, const void * arg2)
                    351: {
                    352:   const struct nalias * alias1 = (const struct nalias *) arg1;
                    353:   const struct nalias * alias2 = (const struct nalias *) arg2;
                    354:   return (int)alias1->encoding_index - (int)alias2->encoding_index;
                    355: }
                    356: 
                    357: static int compare_by_name (const void * arg1, const void * arg2)
                    358: {
                    359:   const char * name1 = *(const char **)arg1;
                    360:   const char * name2 = *(const char **)arg2;
                    361:   /* Compare alphabetically, but put "CS" names at the end. */
                    362:   int sign = strcmp(name1,name2);
                    363:   if (sign != 0) {
                    364:     sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
                    365:            * 4 + (sign >= 0 ? 1 : -1);
                    366:   }
                    367:   return sign;
                    368: }
                    369: 
                    370: void iconvlist (int (*do_one) (unsigned int namescount,
                    371:                                const char * const * names,
                    372:                                void* data),
                    373:                 void* data)
                    374: {
                    375: #define aliascount1  sizeof(aliases)/sizeof(aliases[0])
                    376: #ifndef aliases2_lookup
                    377: #define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
                    378: #else
                    379: #define aliascount2  0
                    380: #endif
                    381: #define aliascount  (aliascount1+aliascount2)
                    382:   struct nalias aliasbuf[aliascount];
                    383:   const char * namesbuf[aliascount];
                    384:   size_t num_aliases;
                    385:   {
                    386:     /* Put all existing aliases into a buffer. */
                    387:     size_t i;
                    388:     size_t j;
                    389:     j = 0;
                    390:     for (i = 0; i < aliascount1; i++) {
                    391:       const struct alias * p = &aliases[i];
                    392:       if (p->name >= 0
                    393:           && p->encoding_index != ei_local_char
                    394:           && p->encoding_index != ei_local_wchar_t) {
                    395:         aliasbuf[j].name = stringpool + p->name;
                    396:         aliasbuf[j].encoding_index = p->encoding_index;
                    397:         j++;
                    398:       }
                    399:     }
                    400: #ifndef aliases2_lookup
                    401:     for (i = 0; i < aliascount2; i++) {
                    402:       aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
                    403:       aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
                    404:       j++;
                    405:     }
                    406: #endif
                    407:     num_aliases = j;
                    408:   }
                    409:   /* Sort by encoding_index. */
                    410:   if (num_aliases > 1)
                    411:     qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
                    412:   {
                    413:     /* Process all aliases with the same encoding_index together. */
                    414:     size_t j;
                    415:     j = 0;
                    416:     while (j < num_aliases) {
                    417:       unsigned int ei = aliasbuf[j].encoding_index;
                    418:       size_t i = 0;
                    419:       do
                    420:         namesbuf[i++] = aliasbuf[j++].name;
                    421:       while (j < num_aliases && aliasbuf[j].encoding_index == ei);
                    422:       if (i > 1)
                    423:         qsort(namesbuf, i, sizeof(const char *), compare_by_name);
                    424:       /* Call the callback. */
                    425:       if (do_one(i,namesbuf,data))
                    426:         break;
                    427:     }
                    428:   }
                    429: #undef aliascount
                    430: #undef aliascount2
                    431: #undef aliascount1
                    432: }
                    433: 
                    434: /*
                    435:  * Table of canonical names of encodings.
                    436:  * Instead of strings, it contains offsets into stringpool and stringpool2.
                    437:  */
                    438: static const unsigned short all_canonical[] = {
                    439: #if defined _AIX
                    440: # include "canonical_sysaix.h"
                    441: #elif defined hpux || defined __hpux
                    442: # include "canonical_syshpux.h"
                    443: #elif defined __osf__
                    444: # include "canonical_sysosf1.h"
                    445: #elif defined __sun
                    446: # include "canonical_syssolaris.h"
                    447: #else
                    448: # include "canonical.h"
                    449: #endif
                    450: #ifdef USE_AIX
                    451: # if defined _AIX
                    452: #  include "canonical_aix_sysaix.h"
                    453: # else
                    454: #  include "canonical_aix.h"
                    455: # endif
                    456: #endif
                    457: #ifdef USE_OSF1
                    458: # if defined __osf__
                    459: #  include "canonical_osf1_sysosf1.h"
                    460: # else
                    461: #  include "canonical_osf1.h"
                    462: # endif
                    463: #endif
                    464: #ifdef USE_DOS
                    465: # include "canonical_dos.h"
                    466: #endif
                    467: #ifdef USE_EXTRA
                    468: # include "canonical_extra.h"
                    469: #endif
                    470: #if defined _AIX
                    471: # include "canonical_local_sysaix.h"
                    472: #elif defined hpux || defined __hpux
                    473: # include "canonical_local_syshpux.h"
                    474: #elif defined __osf__
                    475: # include "canonical_local_sysosf1.h"
                    476: #elif defined __sun
                    477: # include "canonical_local_syssolaris.h"
                    478: #else
                    479: # include "canonical_local.h"
                    480: #endif
                    481: };
                    482: 
                    483: const char * iconv_canonicalize (const char * name)
                    484: {
                    485:   const char* code;
                    486:   char buf[MAX_WORD_LENGTH+10+1];
                    487:   const char* cp;
                    488:   char* bp;
                    489:   const struct alias * ap;
                    490:   unsigned int count;
                    491:   unsigned int index;
                    492:   const char* pool;
                    493: 
                    494:   /* Before calling aliases_lookup, convert the input string to upper case,
                    495:    * and check whether it's entirely ASCII (we call gperf with option "-7"
                    496:    * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
                    497:    * or if it's too long, it is not a valid encoding name.
                    498:    */
                    499:   for (code = name;;) {
                    500:     /* Search code in the table. */
                    501:     for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
                    502:       unsigned char c = * (unsigned char *) cp;
                    503:       if (c >= 0x80)
                    504:         goto invalid;
                    505:       if (c >= 'a' && c <= 'z')
                    506:         c -= 'a'-'A';
                    507:       *bp = c;
                    508:       if (c == '\0')
                    509:         break;
                    510:       if (--count == 0)
                    511:         goto invalid;
                    512:     }
                    513:     for (;;) {
                    514:       if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
                    515:         bp -= 10;
                    516:         *bp = '\0';
                    517:         continue;
                    518:       }
                    519:       if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
                    520:         bp -= 8;
                    521:         *bp = '\0';
                    522:         continue;
                    523:       }
                    524:       break;
                    525:     }
                    526:     if (buf[0] == '\0') {
                    527:       code = locale_charset();
                    528:       /* Avoid an endless loop that could occur when using an older version
                    529:          of localcharset.c. */
                    530:       if (code[0] == '\0')
                    531:         goto invalid;
                    532:       continue;
                    533:     }
                    534:     pool = stringpool;
                    535:     ap = aliases_lookup(buf,bp-buf);
                    536:     if (ap == NULL) {
                    537:       pool = stringpool2;
                    538:       ap = aliases2_lookup(buf);
                    539:       if (ap == NULL)
                    540:         goto invalid;
                    541:     }
                    542:     if (ap->encoding_index == ei_local_char) {
                    543:       code = locale_charset();
                    544:       /* Avoid an endless loop that could occur when using an older version
                    545:          of localcharset.c. */
                    546:       if (code[0] == '\0')
                    547:         goto invalid;
                    548:       continue;
                    549:     }
                    550:     if (ap->encoding_index == ei_local_wchar_t) {
                    551:       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
                    552:          This is also the case on native Woe32 systems.  */
                    553: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
                    554:       if (sizeof(wchar_t) == 4) {
                    555:         index = ei_ucs4internal;
                    556:         break;
                    557:       }
                    558:       if (sizeof(wchar_t) == 2) {
                    559:         index = ei_ucs2internal;
                    560:         break;
                    561:       }
                    562:       if (sizeof(wchar_t) == 1) {
                    563:         index = ei_iso8859_1;
                    564:         break;
                    565:       }
                    566: #endif
                    567:     }
                    568:     index = ap->encoding_index;
                    569:     break;
                    570:   }
                    571:   return all_canonical[index] + pool;
                    572:  invalid:
                    573:   return name;
                    574: }
                    575: 
                    576: int _libiconv_version = _LIBICONV_VERSION;
                    577: 
                    578: #if defined __FreeBSD__ && !defined __gnu_freebsd__
                    579: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
                    580:    It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'.  */
                    581: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
                    582: #define _strong_alias(name, aliasname) \
                    583:   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
                    584: #undef iconv_open
                    585: #undef iconv
                    586: #undef iconv_close
                    587: strong_alias (libiconv_open, iconv_open)
                    588: strong_alias (libiconv, iconv)
                    589: strong_alias (libiconv_close, iconv_close)
                    590: #endif
                    591: 
                    592: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>