Annotation of embedaddon/libiconv/lib/iconv.c, revision 1.1
1.1 ! misho 1: /*
! 2: * Copyright (C) 1999-2008 Free Software Foundation, Inc.
! 3: * This file is part of the GNU LIBICONV Library.
! 4: *
! 5: * The GNU LIBICONV Library is free software; you can redistribute it
! 6: * and/or modify it under the terms of the GNU Library General Public
! 7: * License as published by the Free Software Foundation; either version 2
! 8: * of the License, or (at your option) any later version.
! 9: *
! 10: * The GNU LIBICONV Library is distributed in the hope that it will be
! 11: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 13: * Library General Public License for more details.
! 14: *
! 15: * You should have received a copy of the GNU Library General Public
! 16: * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 17: * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 18: * Fifth Floor, Boston, MA 02110-1301, USA.
! 19: */
! 20:
! 21: #include <iconv.h>
! 22:
! 23: #include <stdlib.h>
! 24: #include <string.h>
! 25: #include "config.h"
! 26: #include "localcharset.h"
! 27:
! 28: #if ENABLE_EXTRA
! 29: /*
! 30: * Consider all system dependent encodings, for any system,
! 31: * and the extra encodings.
! 32: */
! 33: #define USE_AIX
! 34: #define USE_OSF1
! 35: #define USE_DOS
! 36: #define USE_EXTRA
! 37: #else
! 38: /*
! 39: * Consider those system dependent encodings that are needed for the
! 40: * current system.
! 41: */
! 42: #ifdef _AIX
! 43: #define USE_AIX
! 44: #endif
! 45: #if defined(__osf__) || defined(VMS)
! 46: #define USE_OSF1
! 47: #endif
! 48: #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
! 49: #define USE_DOS
! 50: #endif
! 51: #endif
! 52:
! 53: /*
! 54: * Data type for general conversion loop.
! 55: */
! 56: struct loop_funcs {
! 57: size_t (*loop_convert) (iconv_t icd,
! 58: const char* * inbuf, size_t *inbytesleft,
! 59: char* * outbuf, size_t *outbytesleft);
! 60: size_t (*loop_reset) (iconv_t icd,
! 61: char* * outbuf, size_t *outbytesleft);
! 62: };
! 63:
! 64: /*
! 65: * Converters.
! 66: */
! 67: #include "converters.h"
! 68:
! 69: /*
! 70: * Transliteration tables.
! 71: */
! 72: #include "cjk_variants.h"
! 73: #include "translit.h"
! 74:
! 75: /*
! 76: * Table of all supported encodings.
! 77: */
! 78: struct encoding {
! 79: struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
! 80: struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
! 81: int oflags; /* flags for unicode -> multibyte conversion */
! 82: };
! 83: #define DEFALIAS(xxx_alias,xxx) /* nothing */
! 84: enum {
! 85: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
! 86: ei_##xxx ,
! 87: #include "encodings.def"
! 88: #ifdef USE_AIX
! 89: # include "encodings_aix.def"
! 90: #endif
! 91: #ifdef USE_OSF1
! 92: # include "encodings_osf1.def"
! 93: #endif
! 94: #ifdef USE_DOS
! 95: # include "encodings_dos.def"
! 96: #endif
! 97: #ifdef USE_EXTRA
! 98: # include "encodings_extra.def"
! 99: #endif
! 100: #include "encodings_local.def"
! 101: #undef DEFENCODING
! 102: ei_for_broken_compilers_that_dont_like_trailing_commas
! 103: };
! 104: #include "flags.h"
! 105: static struct encoding const all_encodings[] = {
! 106: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
! 107: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
! 108: #include "encodings.def"
! 109: #ifdef USE_AIX
! 110: # include "encodings_aix.def"
! 111: #endif
! 112: #ifdef USE_OSF1
! 113: # include "encodings_osf1.def"
! 114: #endif
! 115: #ifdef USE_DOS
! 116: # include "encodings_dos.def"
! 117: #endif
! 118: #ifdef USE_EXTRA
! 119: # include "encodings_extra.def"
! 120: #endif
! 121: #undef DEFENCODING
! 122: #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
! 123: { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
! 124: #include "encodings_local.def"
! 125: #undef DEFENCODING
! 126: };
! 127: #undef DEFALIAS
! 128:
! 129: /*
! 130: * Conversion loops.
! 131: */
! 132: #include "loops.h"
! 133:
! 134: /*
! 135: * Alias lookup function.
! 136: * Defines
! 137: * struct alias { int name; unsigned int encoding_index; };
! 138: * const struct alias * aliases_lookup (const char *str, unsigned int len);
! 139: * #define MAX_WORD_LENGTH ...
! 140: */
! 141: #if defined _AIX
! 142: # include "aliases_sysaix.h"
! 143: #elif defined hpux || defined __hpux
! 144: # include "aliases_syshpux.h"
! 145: #elif defined __osf__
! 146: # include "aliases_sysosf1.h"
! 147: #elif defined __sun
! 148: # include "aliases_syssolaris.h"
! 149: #else
! 150: # include "aliases.h"
! 151: #endif
! 152:
! 153: /*
! 154: * System dependent alias lookup function.
! 155: * Defines
! 156: * const struct alias * aliases2_lookup (const char *str);
! 157: */
! 158: #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
! 159: struct stringpool2_t {
! 160: #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
! 161: #include "aliases2.h"
! 162: #undef S
! 163: };
! 164: static const struct stringpool2_t stringpool2_contents = {
! 165: #define S(tag,name,encoding_index) name,
! 166: #include "aliases2.h"
! 167: #undef S
! 168: };
! 169: #define stringpool2 ((const char *) &stringpool2_contents)
! 170: static const struct alias sysdep_aliases[] = {
! 171: #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
! 172: #include "aliases2.h"
! 173: #undef S
! 174: };
! 175: #ifdef __GNUC__
! 176: __inline
! 177: #endif
! 178: const struct alias *
! 179: aliases2_lookup (register const char *str)
! 180: {
! 181: const struct alias * ptr;
! 182: unsigned int count;
! 183: for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
! 184: if (!strcmp(str, stringpool2 + ptr->name))
! 185: return ptr;
! 186: return NULL;
! 187: }
! 188: #else
! 189: #define aliases2_lookup(str) NULL
! 190: #define stringpool2 NULL
! 191: #endif
! 192:
! 193: #if 0
! 194: /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
! 195: and the first string can be assumed to be in uppercase. */
! 196: static int strequal (const char* str1, const char* str2)
! 197: {
! 198: unsigned char c1;
! 199: unsigned char c2;
! 200: for (;;) {
! 201: c1 = * (unsigned char *) str1++;
! 202: c2 = * (unsigned char *) str2++;
! 203: if (c1 == 0)
! 204: break;
! 205: if (c2 >= 'a' && c2 <= 'z')
! 206: c2 -= 'a'-'A';
! 207: if (c1 != c2)
! 208: break;
! 209: }
! 210: return (c1 == c2);
! 211: }
! 212: #endif
! 213:
! 214: iconv_t iconv_open (const char* tocode, const char* fromcode)
! 215: {
! 216: struct conv_struct * cd;
! 217: unsigned int from_index;
! 218: int from_wchar;
! 219: unsigned int to_index;
! 220: int to_wchar;
! 221: int transliterate;
! 222: int discard_ilseq;
! 223:
! 224: #include "iconv_open1.h"
! 225:
! 226: cd = (struct conv_struct *) malloc(from_wchar != to_wchar
! 227: ? sizeof(struct wchar_conv_struct)
! 228: : sizeof(struct conv_struct));
! 229: if (cd == NULL) {
! 230: errno = ENOMEM;
! 231: return (iconv_t)(-1);
! 232: }
! 233:
! 234: #include "iconv_open2.h"
! 235:
! 236: return (iconv_t)cd;
! 237: invalid:
! 238: errno = EINVAL;
! 239: return (iconv_t)(-1);
! 240: }
! 241:
! 242: size_t iconv (iconv_t icd,
! 243: ICONV_CONST char* * inbuf, size_t *inbytesleft,
! 244: char* * outbuf, size_t *outbytesleft)
! 245: {
! 246: conv_t cd = (conv_t) icd;
! 247: if (inbuf == NULL || *inbuf == NULL)
! 248: return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
! 249: else
! 250: return cd->lfuncs.loop_convert(icd,
! 251: (const char* *)inbuf,inbytesleft,
! 252: outbuf,outbytesleft);
! 253: }
! 254:
! 255: int iconv_close (iconv_t icd)
! 256: {
! 257: conv_t cd = (conv_t) icd;
! 258: free(cd);
! 259: return 0;
! 260: }
! 261:
! 262: #ifndef LIBICONV_PLUG
! 263:
! 264: /*
! 265: * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
! 266: * fit in an iconv_allocation_t.
! 267: * If this verification fails, iconv_allocation_t must be made larger and
! 268: * the major version in LIBICONV_VERSION_INFO must be bumped.
! 269: * Currently 'struct conv_struct' has 21 integer/pointer fields, and
! 270: * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
! 271: */
! 272: typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
! 273: typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
! 274:
! 275: int iconv_open_into (const char* tocode, const char* fromcode,
! 276: iconv_allocation_t* resultp)
! 277: {
! 278: struct conv_struct * cd;
! 279: unsigned int from_index;
! 280: int from_wchar;
! 281: unsigned int to_index;
! 282: int to_wchar;
! 283: int transliterate;
! 284: int discard_ilseq;
! 285:
! 286: #include "iconv_open1.h"
! 287:
! 288: cd = (struct conv_struct *) resultp;
! 289:
! 290: #include "iconv_open2.h"
! 291:
! 292: return 0;
! 293: invalid:
! 294: errno = EINVAL;
! 295: return -1;
! 296: }
! 297:
! 298: int iconvctl (iconv_t icd, int request, void* argument)
! 299: {
! 300: conv_t cd = (conv_t) icd;
! 301: switch (request) {
! 302: case ICONV_TRIVIALP:
! 303: *(int *)argument =
! 304: ((cd->lfuncs.loop_convert == unicode_loop_convert
! 305: && cd->iindex == cd->oindex)
! 306: || cd->lfuncs.loop_convert == wchar_id_loop_convert
! 307: ? 1 : 0);
! 308: return 0;
! 309: case ICONV_GET_TRANSLITERATE:
! 310: *(int *)argument = cd->transliterate;
! 311: return 0;
! 312: case ICONV_SET_TRANSLITERATE:
! 313: cd->transliterate = (*(const int *)argument ? 1 : 0);
! 314: return 0;
! 315: case ICONV_GET_DISCARD_ILSEQ:
! 316: *(int *)argument = cd->discard_ilseq;
! 317: return 0;
! 318: case ICONV_SET_DISCARD_ILSEQ:
! 319: cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
! 320: return 0;
! 321: case ICONV_SET_HOOKS:
! 322: if (argument != NULL) {
! 323: cd->hooks = *(const struct iconv_hooks *)argument;
! 324: } else {
! 325: cd->hooks.uc_hook = NULL;
! 326: cd->hooks.wc_hook = NULL;
! 327: cd->hooks.data = NULL;
! 328: }
! 329: return 0;
! 330: case ICONV_SET_FALLBACKS:
! 331: if (argument != NULL) {
! 332: cd->fallbacks = *(const struct iconv_fallbacks *)argument;
! 333: } else {
! 334: cd->fallbacks.mb_to_uc_fallback = NULL;
! 335: cd->fallbacks.uc_to_mb_fallback = NULL;
! 336: cd->fallbacks.mb_to_wc_fallback = NULL;
! 337: cd->fallbacks.wc_to_mb_fallback = NULL;
! 338: cd->fallbacks.data = NULL;
! 339: }
! 340: return 0;
! 341: default:
! 342: errno = EINVAL;
! 343: return -1;
! 344: }
! 345: }
! 346:
! 347: /* An alias after its name has been converted from 'int' to 'const char*'. */
! 348: struct nalias { const char* name; unsigned int encoding_index; };
! 349:
! 350: static int compare_by_index (const void * arg1, const void * arg2)
! 351: {
! 352: const struct nalias * alias1 = (const struct nalias *) arg1;
! 353: const struct nalias * alias2 = (const struct nalias *) arg2;
! 354: return (int)alias1->encoding_index - (int)alias2->encoding_index;
! 355: }
! 356:
! 357: static int compare_by_name (const void * arg1, const void * arg2)
! 358: {
! 359: const char * name1 = *(const char **)arg1;
! 360: const char * name2 = *(const char **)arg2;
! 361: /* Compare alphabetically, but put "CS" names at the end. */
! 362: int sign = strcmp(name1,name2);
! 363: if (sign != 0) {
! 364: sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
! 365: * 4 + (sign >= 0 ? 1 : -1);
! 366: }
! 367: return sign;
! 368: }
! 369:
! 370: void iconvlist (int (*do_one) (unsigned int namescount,
! 371: const char * const * names,
! 372: void* data),
! 373: void* data)
! 374: {
! 375: #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
! 376: #ifndef aliases2_lookup
! 377: #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
! 378: #else
! 379: #define aliascount2 0
! 380: #endif
! 381: #define aliascount (aliascount1+aliascount2)
! 382: struct nalias aliasbuf[aliascount];
! 383: const char * namesbuf[aliascount];
! 384: size_t num_aliases;
! 385: {
! 386: /* Put all existing aliases into a buffer. */
! 387: size_t i;
! 388: size_t j;
! 389: j = 0;
! 390: for (i = 0; i < aliascount1; i++) {
! 391: const struct alias * p = &aliases[i];
! 392: if (p->name >= 0
! 393: && p->encoding_index != ei_local_char
! 394: && p->encoding_index != ei_local_wchar_t) {
! 395: aliasbuf[j].name = stringpool + p->name;
! 396: aliasbuf[j].encoding_index = p->encoding_index;
! 397: j++;
! 398: }
! 399: }
! 400: #ifndef aliases2_lookup
! 401: for (i = 0; i < aliascount2; i++) {
! 402: aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
! 403: aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
! 404: j++;
! 405: }
! 406: #endif
! 407: num_aliases = j;
! 408: }
! 409: /* Sort by encoding_index. */
! 410: if (num_aliases > 1)
! 411: qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
! 412: {
! 413: /* Process all aliases with the same encoding_index together. */
! 414: size_t j;
! 415: j = 0;
! 416: while (j < num_aliases) {
! 417: unsigned int ei = aliasbuf[j].encoding_index;
! 418: size_t i = 0;
! 419: do
! 420: namesbuf[i++] = aliasbuf[j++].name;
! 421: while (j < num_aliases && aliasbuf[j].encoding_index == ei);
! 422: if (i > 1)
! 423: qsort(namesbuf, i, sizeof(const char *), compare_by_name);
! 424: /* Call the callback. */
! 425: if (do_one(i,namesbuf,data))
! 426: break;
! 427: }
! 428: }
! 429: #undef aliascount
! 430: #undef aliascount2
! 431: #undef aliascount1
! 432: }
! 433:
! 434: /*
! 435: * Table of canonical names of encodings.
! 436: * Instead of strings, it contains offsets into stringpool and stringpool2.
! 437: */
! 438: static const unsigned short all_canonical[] = {
! 439: #if defined _AIX
! 440: # include "canonical_sysaix.h"
! 441: #elif defined hpux || defined __hpux
! 442: # include "canonical_syshpux.h"
! 443: #elif defined __osf__
! 444: # include "canonical_sysosf1.h"
! 445: #elif defined __sun
! 446: # include "canonical_syssolaris.h"
! 447: #else
! 448: # include "canonical.h"
! 449: #endif
! 450: #ifdef USE_AIX
! 451: # if defined _AIX
! 452: # include "canonical_aix_sysaix.h"
! 453: # else
! 454: # include "canonical_aix.h"
! 455: # endif
! 456: #endif
! 457: #ifdef USE_OSF1
! 458: # if defined __osf__
! 459: # include "canonical_osf1_sysosf1.h"
! 460: # else
! 461: # include "canonical_osf1.h"
! 462: # endif
! 463: #endif
! 464: #ifdef USE_DOS
! 465: # include "canonical_dos.h"
! 466: #endif
! 467: #ifdef USE_EXTRA
! 468: # include "canonical_extra.h"
! 469: #endif
! 470: #if defined _AIX
! 471: # include "canonical_local_sysaix.h"
! 472: #elif defined hpux || defined __hpux
! 473: # include "canonical_local_syshpux.h"
! 474: #elif defined __osf__
! 475: # include "canonical_local_sysosf1.h"
! 476: #elif defined __sun
! 477: # include "canonical_local_syssolaris.h"
! 478: #else
! 479: # include "canonical_local.h"
! 480: #endif
! 481: };
! 482:
! 483: const char * iconv_canonicalize (const char * name)
! 484: {
! 485: const char* code;
! 486: char buf[MAX_WORD_LENGTH+10+1];
! 487: const char* cp;
! 488: char* bp;
! 489: const struct alias * ap;
! 490: unsigned int count;
! 491: unsigned int index;
! 492: const char* pool;
! 493:
! 494: /* Before calling aliases_lookup, convert the input string to upper case,
! 495: * and check whether it's entirely ASCII (we call gperf with option "-7"
! 496: * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
! 497: * or if it's too long, it is not a valid encoding name.
! 498: */
! 499: for (code = name;;) {
! 500: /* Search code in the table. */
! 501: for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
! 502: unsigned char c = * (unsigned char *) cp;
! 503: if (c >= 0x80)
! 504: goto invalid;
! 505: if (c >= 'a' && c <= 'z')
! 506: c -= 'a'-'A';
! 507: *bp = c;
! 508: if (c == '\0')
! 509: break;
! 510: if (--count == 0)
! 511: goto invalid;
! 512: }
! 513: for (;;) {
! 514: if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
! 515: bp -= 10;
! 516: *bp = '\0';
! 517: continue;
! 518: }
! 519: if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
! 520: bp -= 8;
! 521: *bp = '\0';
! 522: continue;
! 523: }
! 524: break;
! 525: }
! 526: if (buf[0] == '\0') {
! 527: code = locale_charset();
! 528: /* Avoid an endless loop that could occur when using an older version
! 529: of localcharset.c. */
! 530: if (code[0] == '\0')
! 531: goto invalid;
! 532: continue;
! 533: }
! 534: pool = stringpool;
! 535: ap = aliases_lookup(buf,bp-buf);
! 536: if (ap == NULL) {
! 537: pool = stringpool2;
! 538: ap = aliases2_lookup(buf);
! 539: if (ap == NULL)
! 540: goto invalid;
! 541: }
! 542: if (ap->encoding_index == ei_local_char) {
! 543: code = locale_charset();
! 544: /* Avoid an endless loop that could occur when using an older version
! 545: of localcharset.c. */
! 546: if (code[0] == '\0')
! 547: goto invalid;
! 548: continue;
! 549: }
! 550: if (ap->encoding_index == ei_local_wchar_t) {
! 551: /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
! 552: This is also the case on native Woe32 systems. */
! 553: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
! 554: if (sizeof(wchar_t) == 4) {
! 555: index = ei_ucs4internal;
! 556: break;
! 557: }
! 558: if (sizeof(wchar_t) == 2) {
! 559: index = ei_ucs2internal;
! 560: break;
! 561: }
! 562: if (sizeof(wchar_t) == 1) {
! 563: index = ei_iso8859_1;
! 564: break;
! 565: }
! 566: #endif
! 567: }
! 568: index = ap->encoding_index;
! 569: break;
! 570: }
! 571: return all_canonical[index] + pool;
! 572: invalid:
! 573: return name;
! 574: }
! 575:
! 576: int _libiconv_version = _LIBICONV_VERSION;
! 577:
! 578: #if defined __FreeBSD__ && !defined __gnu_freebsd__
! 579: /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
! 580: It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
! 581: #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
! 582: #define _strong_alias(name, aliasname) \
! 583: extern __typeof (name) aliasname __attribute__ ((alias (#name)));
! 584: #undef iconv_open
! 585: #undef iconv
! 586: #undef iconv_close
! 587: strong_alias (libiconv_open, iconv_open)
! 588: strong_alias (libiconv, iconv)
! 589: strong_alias (libiconv_close, iconv_close)
! 590: #endif
! 591:
! 592: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>