Annotation of embedaddon/libiconv/src/iconv.c, revision 1.1
1.1 ! misho 1: /* Copyright (C) 2000-2009 Free Software Foundation, Inc.
! 2: This file is part of the GNU LIBICONV Library.
! 3:
! 4: This program is free software: you can redistribute it and/or modify
! 5: it under the terms of the GNU General Public License as published by
! 6: the Free Software Foundation; either version 3 of the License, or
! 7: (at your option) any later version.
! 8:
! 9: This program is distributed in the hope that it will be useful,
! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of
! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 12: GNU General Public License for more details.
! 13:
! 14: You should have received a copy of the GNU General Public License
! 15: along with this program. If not, see <http://www.gnu.org/licenses/>. */
! 16:
! 17: #include "config.h"
! 18: #ifndef ICONV_CONST
! 19: # define ICONV_CONST
! 20: #endif
! 21:
! 22: #include <limits.h>
! 23: #include <stddef.h>
! 24: #include <stdio.h>
! 25: #include <stdlib.h>
! 26: #include <string.h>
! 27: #include <iconv.h>
! 28: #include <errno.h>
! 29: #include <locale.h>
! 30: #include <fcntl.h>
! 31:
! 32: /* Ensure that iconv_no_i18n does not depend on libintl. */
! 33: #ifdef NO_I18N
! 34: # undef ENABLE_NLS
! 35: # undef ENABLE_RELOCATABLE
! 36: #endif
! 37:
! 38: #include "binary-io.h"
! 39: #include "progname.h"
! 40: #include "relocatable.h"
! 41: #include "xalloc.h"
! 42: #include "uniwidth.h"
! 43: #include "uniwidth/cjk.h"
! 44:
! 45: /* Ensure that iconv_no_i18n does not depend on libintl. */
! 46: #ifdef NO_I18N
! 47: #include <stdarg.h>
! 48: static void
! 49: error (int status, int errnum, const char *message, ...)
! 50: {
! 51: va_list args;
! 52:
! 53: fflush(stdout);
! 54: fprintf(stderr,"%s: ",program_name);
! 55: va_start(args,message);
! 56: vfprintf(stderr,message,args);
! 57: va_end(args);
! 58: if (errnum) {
! 59: const char *s = strerror(errnum);
! 60: if (s == NULL)
! 61: s = "Unknown system error";
! 62: }
! 63: putc('\n',stderr);
! 64: fflush(stderr);
! 65: if (status)
! 66: exit(status);
! 67: }
! 68: #else
! 69: # include "error.h"
! 70: #endif
! 71:
! 72: #include "gettext.h"
! 73:
! 74: #define _(str) gettext(str)
! 75:
! 76: /* Ensure that iconv_no_i18n does not depend on libintl. */
! 77: #ifdef NO_I18N
! 78: # define xmalloc malloc
! 79: # define xalloc_die abort
! 80: #endif
! 81:
! 82: /* Locale independent test for a decimal digit.
! 83: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
! 84: <ctype.h> isdigit must be an 'unsigned char'.) */
! 85: #undef isdigit
! 86: #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
! 87:
! 88: /* Locale independent test for a printable character.
! 89: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
! 90: <ctype.h> isdigit must be an 'unsigned char'.) */
! 91: #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
! 92:
! 93: /* ========================================================================= */
! 94:
! 95: static int discard_unconvertible = 0;
! 96: static int silent = 0;
! 97:
! 98: static void usage (int exitcode)
! 99: {
! 100: if (exitcode != 0) {
! 101: const char* helpstring1 =
! 102: /* TRANSLATORS: The first line of the short usage message. */
! 103: _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
! 104: const char* helpstring2 =
! 105: /* TRANSLATORS: The second line of the short usage message.
! 106: Align it correctly against the first line. */
! 107: _("or: iconv -l");
! 108: fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
! 109: fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
! 110: } else {
! 111: /* xgettext: no-wrap */
! 112: /* TRANSLATORS: The first line of the long usage message.
! 113: The %s placeholder expands to the program name. */
! 114: printf(_("\
! 115: Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
! 116: program_name);
! 117: /* xgettext: no-wrap */
! 118: /* TRANSLATORS: The second line of the long usage message.
! 119: Align it correctly against the first line.
! 120: The %s placeholder expands to the program name. */
! 121: printf(_("\
! 122: or: %s -l\n"),
! 123: program_name);
! 124: printf("\n");
! 125: /* xgettext: no-wrap */
! 126: /* TRANSLATORS: Description of the iconv program. */
! 127: printf(_("\
! 128: Converts text from one encoding to another encoding.\n"));
! 129: printf("\n");
! 130: /* xgettext: no-wrap */
! 131: printf(_("\
! 132: Options controlling the input and output format:\n"));
! 133: /* xgettext: no-wrap */
! 134: printf(_("\
! 135: -f ENCODING, --from-code=ENCODING\n\
! 136: the encoding of the input\n"));
! 137: /* xgettext: no-wrap */
! 138: printf(_("\
! 139: -t ENCODING, --to-code=ENCODING\n\
! 140: the encoding of the output\n"));
! 141: printf("\n");
! 142: /* xgettext: no-wrap */
! 143: printf(_("\
! 144: Options controlling conversion problems:\n"));
! 145: /* xgettext: no-wrap */
! 146: printf(_("\
! 147: -c discard unconvertible characters\n"));
! 148: /* xgettext: no-wrap */
! 149: printf(_("\
! 150: --unicode-subst=FORMATSTRING\n\
! 151: substitution for unconvertible Unicode characters\n"));
! 152: /* xgettext: no-wrap */
! 153: printf(_("\
! 154: --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
! 155: /* xgettext: no-wrap */
! 156: printf(_("\
! 157: --widechar-subst=FORMATSTRING\n\
! 158: substitution for unconvertible wide characters\n"));
! 159: printf("\n");
! 160: /* xgettext: no-wrap */
! 161: printf(_("\
! 162: Options controlling error output:\n"));
! 163: /* xgettext: no-wrap */
! 164: printf(_("\
! 165: -s, --silent suppress error messages about conversion problems\n"));
! 166: printf("\n");
! 167: /* xgettext: no-wrap */
! 168: printf(_("\
! 169: Informative output:\n"));
! 170: /* xgettext: no-wrap */
! 171: printf(_("\
! 172: -l, --list list the supported encodings\n"));
! 173: /* xgettext: no-wrap */
! 174: printf(_("\
! 175: --help display this help and exit\n"));
! 176: /* xgettext: no-wrap */
! 177: printf(_("\
! 178: --version output version information and exit\n"));
! 179: printf("\n");
! 180: /* TRANSLATORS: The placeholder indicates the bug-reporting address
! 181: for this package. Please add _another line_ saying
! 182: "Report translation bugs to <...>\n" with the address for translation
! 183: bugs (typically your translation team's web or email address). */
! 184: fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
! 185: }
! 186: exit(exitcode);
! 187: }
! 188:
! 189: static void print_version (void)
! 190: {
! 191: printf("iconv (GNU libiconv %d.%d)\n",
! 192: _libiconv_version >> 8, _libiconv_version & 0xff);
! 193: printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2009");
! 194: /* xgettext: no-wrap */
! 195: fputs (_("\
! 196: License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
! 197: This is free software: you are free to change and redistribute it.\n\
! 198: There is NO WARRANTY, to the extent permitted by law.\n\
! 199: "),stdout);
! 200: /* TRANSLATORS: The %s placeholder expands to an author's name. */
! 201: printf(_("Written by %s.\n"),"Bruno Haible");
! 202: exit(EXIT_SUCCESS);
! 203: }
! 204:
! 205: static int print_one (unsigned int namescount, const char * const * names,
! 206: void* data)
! 207: {
! 208: unsigned int i;
! 209: (void)data;
! 210: for (i = 0; i < namescount; i++) {
! 211: if (i > 0)
! 212: putc(' ',stdout);
! 213: fputs(names[i],stdout);
! 214: }
! 215: putc('\n',stdout);
! 216: return 0;
! 217: }
! 218:
! 219: /* ========================================================================= */
! 220:
! 221: /* Line number and column position. */
! 222: static unsigned int line;
! 223: static unsigned int column;
! 224: static const char* cjkcode;
! 225: /* Update the line number and column position after a character was
! 226: successfully converted. */
! 227: static void update_line_column (unsigned int uc, void* data)
! 228: {
! 229: if (uc == 0x000A) {
! 230: line++;
! 231: column = 0;
! 232: } else {
! 233: int width = uc_width(uc, cjkcode);
! 234: if (width >= 0)
! 235: column += width;
! 236: else if (uc == 0x0009)
! 237: column += 8 - (column % 8);
! 238: }
! 239: }
! 240:
! 241: /* ========================================================================= */
! 242:
! 243: /* Production of placeholder strings as fallback for unconvertible
! 244: characters. */
! 245:
! 246: /* Check that the argument is a format string taking either no argument
! 247: or exactly one unsigned integer argument. Returns the maximum output
! 248: size of the format string. */
! 249: static size_t check_subst_formatstring (const char *format, const char *param_name)
! 250: {
! 251: /* C format strings are described in POSIX (IEEE P1003.1 2001), section
! 252: XSH 3 fprintf(). See also Linux fprintf(3) manual page.
! 253: For simplicity, we don't accept
! 254: - the '%m$' reordering syntax,
! 255: - the 'I' flag,
! 256: - width specifications referring to an argument,
! 257: - precision specifications referring to an argument,
! 258: - size specifiers,
! 259: - format specifiers other than 'o', 'u', 'x', 'X'.
! 260: What remains?
! 261: A directive
! 262: - starts with '%',
! 263: - is optionally followed by any of the characters '#', '0', '-', ' ',
! 264: '+', "'", each of which acts as a flag,
! 265: - is optionally followed by a width specification: a nonempty digit
! 266: sequence,
! 267: - is optionally followed by '.' and a precision specification: a
! 268: nonempty digit sequence,
! 269: - is finished by a specifier
! 270: - '%', that needs no argument,
! 271: - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
! 272: */
! 273: size_t maxsize = 0;
! 274: unsigned int unnumbered_arg_count = 0;
! 275:
! 276: for (; *format != '\0';) {
! 277: if (*format++ == '%') {
! 278: /* A directive. */
! 279: unsigned int width = 0;
! 280: unsigned int precision = 0;
! 281: unsigned int length;
! 282: /* Parse flags. */
! 283: for (;;) {
! 284: if (*format == ' ' || *format == '+' || *format == '-'
! 285: || *format == '#' || *format == '0' || *format == '\'')
! 286: format++;
! 287: else
! 288: break;
! 289: }
! 290: /* Parse width. */
! 291: if (*format == '*')
! 292: error(EXIT_FAILURE,0,
! 293: /* TRANSLATORS: An error message.
! 294: The %s placeholder expands to a command-line option. */
! 295: _("%s argument: A format directive with a variable width is not allowed here."),
! 296: param_name);
! 297: if (isdigit (*format)) {
! 298: do {
! 299: width = 10*width + (*format - '0');
! 300: format++;
! 301: } while (isdigit (*format));
! 302: }
! 303: /* Parse precision. */
! 304: if (*format == '.') {
! 305: format++;
! 306: if (*format == '*')
! 307: error(EXIT_FAILURE,0,
! 308: /* TRANSLATORS: An error message.
! 309: The %s placeholder expands to a command-line option. */
! 310: _("%s argument: A format directive with a variable precision is not allowed here."),
! 311: param_name);
! 312: if (isdigit (*format)) {
! 313: do {
! 314: precision = 10*precision + (*format - '0');
! 315: format++;
! 316: } while (isdigit (*format));
! 317: }
! 318: }
! 319: /* Parse size. */
! 320: switch (*format) {
! 321: case 'h': case 'l': case 'L': case 'q':
! 322: case 'j': case 'z': case 'Z': case 't':
! 323: error(EXIT_FAILURE,0,
! 324: /* TRANSLATORS: An error message.
! 325: The %s placeholder expands to a command-line option. */
! 326: _("%s argument: A format directive with a size is not allowed here."),
! 327: param_name);
! 328: }
! 329: /* Parse end of directive. */
! 330: switch (*format) {
! 331: case '%':
! 332: length = 1;
! 333: break;
! 334: case 'u': case 'o': case 'x': case 'X':
! 335: if (*format == 'u') {
! 336: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
! 337: * 0.30103 /* binary -> decimal */
! 338: )
! 339: + 1; /* turn floor into ceil */
! 340: if (length < precision)
! 341: length = precision;
! 342: length *= 2; /* estimate for FLAG_GROUP */
! 343: length += 1; /* account for leading sign */
! 344: } else if (*format == 'o') {
! 345: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
! 346: * 0.333334 /* binary -> octal */
! 347: )
! 348: + 1; /* turn floor into ceil */
! 349: if (length < precision)
! 350: length = precision;
! 351: length += 1; /* account for leading sign */
! 352: } else { /* 'x', 'X' */
! 353: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
! 354: * 0.25 /* binary -> hexadecimal */
! 355: )
! 356: + 1; /* turn floor into ceil */
! 357: if (length < precision)
! 358: length = precision;
! 359: length += 2; /* account for leading sign or alternate form */
! 360: }
! 361: unnumbered_arg_count++;
! 362: break;
! 363: default:
! 364: if (*format == '\0')
! 365: error(EXIT_FAILURE,0,
! 366: /* TRANSLATORS: An error message.
! 367: The %s placeholder expands to a command-line option. */
! 368: _("%s argument: The string ends in the middle of a directive."),
! 369: param_name);
! 370: else if (c_isprint(*format))
! 371: error(EXIT_FAILURE,0,
! 372: /* TRANSLATORS: An error message.
! 373: The %s placeholder expands to a command-line option.
! 374: The %c placeholder expands to an unknown format directive. */
! 375: _("%s argument: The character '%c' is not a valid conversion specifier."),
! 376: param_name,*format);
! 377: else
! 378: error(EXIT_FAILURE,0,
! 379: /* TRANSLATORS: An error message.
! 380: The %s placeholder expands to a command-line option. */
! 381: _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
! 382: param_name);
! 383: abort(); /*NOTREACHED*/
! 384: }
! 385: format++;
! 386: if (length < width)
! 387: length = width;
! 388: maxsize += length;
! 389: } else
! 390: maxsize++;
! 391: }
! 392: if (unnumbered_arg_count > 1)
! 393: error(EXIT_FAILURE,0,
! 394: /* TRANSLATORS: An error message.
! 395: The %s placeholder expands to a command-line option.
! 396: The %u placeholder expands to the number of arguments consumed by the format string. */
! 397: ngettext("%s argument: The format string consumes more than one argument: %u argument.",
! 398: "%s argument: The format string consumes more than one argument: %u arguments.",
! 399: unnumbered_arg_count),
! 400: param_name,unnumbered_arg_count);
! 401: return maxsize;
! 402: }
! 403:
! 404: /* Format strings. */
! 405: static const char* ilseq_byte_subst;
! 406: static const char* ilseq_wchar_subst;
! 407: static const char* ilseq_unicode_subst;
! 408:
! 409: /* Maximum result size for each format string. */
! 410: static size_t ilseq_byte_subst_size;
! 411: static size_t ilseq_wchar_subst_size;
! 412: static size_t ilseq_unicode_subst_size;
! 413:
! 414: /* Buffer of size ilseq_byte_subst_size+1. */
! 415: static char* ilseq_byte_subst_buffer;
! 416: #if HAVE_WCHAR_T
! 417: /* Buffer of size ilseq_wchar_subst_size+1. */
! 418: static char* ilseq_wchar_subst_buffer;
! 419: #endif
! 420: /* Buffer of size ilseq_unicode_subst_size+1. */
! 421: static char* ilseq_unicode_subst_buffer;
! 422:
! 423: /* Auxiliary variables for subst_mb_to_uc_fallback. */
! 424: /* Converter from locale encoding to UCS-4. */
! 425: static iconv_t subst_mb_to_uc_cd;
! 426: /* Buffer of size ilseq_byte_subst_size. */
! 427: static unsigned int* subst_mb_to_uc_temp_buffer;
! 428:
! 429: static void subst_mb_to_uc_fallback
! 430: (const char* inbuf, size_t inbufsize,
! 431: void (*write_replacement) (const unsigned int *buf, size_t buflen,
! 432: void* callback_arg),
! 433: void* callback_arg,
! 434: void* data)
! 435: {
! 436: for (; inbufsize > 0; inbuf++, inbufsize--) {
! 437: const char* inptr;
! 438: size_t inbytesleft;
! 439: char* outptr;
! 440: size_t outbytesleft;
! 441: sprintf(ilseq_byte_subst_buffer,
! 442: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
! 443: inptr = ilseq_byte_subst_buffer;
! 444: inbytesleft = strlen(ilseq_byte_subst_buffer);
! 445: outptr = (char*)subst_mb_to_uc_temp_buffer;
! 446: outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
! 447: iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
! 448: if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
! 449: == (size_t)(-1)
! 450: || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
! 451: == (size_t)(-1))
! 452: error(EXIT_FAILURE,0,
! 453: /* TRANSLATORS: An error message.
! 454: The %s placeholder expands to a piece of text, specified through --byte-subst. */
! 455: _("cannot convert byte substitution to Unicode: %s"),
! 456: ilseq_byte_subst_buffer);
! 457: if (!(outbytesleft%sizeof(unsigned int) == 0))
! 458: abort();
! 459: write_replacement(subst_mb_to_uc_temp_buffer,
! 460: ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
! 461: callback_arg);
! 462: }
! 463: }
! 464:
! 465: /* Auxiliary variables for subst_uc_to_mb_fallback. */
! 466: /* Converter from locale encoding to target encoding. */
! 467: static iconv_t subst_uc_to_mb_cd;
! 468: /* Buffer of size ilseq_unicode_subst_size*4. */
! 469: static char* subst_uc_to_mb_temp_buffer;
! 470:
! 471: static void subst_uc_to_mb_fallback
! 472: (unsigned int code,
! 473: void (*write_replacement) (const char *buf, size_t buflen,
! 474: void* callback_arg),
! 475: void* callback_arg,
! 476: void* data)
! 477: {
! 478: const char* inptr;
! 479: size_t inbytesleft;
! 480: char* outptr;
! 481: size_t outbytesleft;
! 482: sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
! 483: inptr = ilseq_unicode_subst_buffer;
! 484: inbytesleft = strlen(ilseq_unicode_subst_buffer);
! 485: outptr = subst_uc_to_mb_temp_buffer;
! 486: outbytesleft = ilseq_unicode_subst_size*4;
! 487: iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
! 488: if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
! 489: == (size_t)(-1)
! 490: || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
! 491: == (size_t)(-1))
! 492: error(EXIT_FAILURE,0,
! 493: /* TRANSLATORS: An error message.
! 494: The %s placeholder expands to a piece of text, specified through --unicode-subst. */
! 495: _("cannot convert unicode substitution to target encoding: %s"),
! 496: ilseq_unicode_subst_buffer);
! 497: write_replacement(subst_uc_to_mb_temp_buffer,
! 498: ilseq_unicode_subst_size*4-outbytesleft,
! 499: callback_arg);
! 500: }
! 501:
! 502: #if HAVE_WCHAR_T
! 503:
! 504: /* Auxiliary variables for subst_mb_to_wc_fallback. */
! 505: /* Converter from locale encoding to wchar_t. */
! 506: static iconv_t subst_mb_to_wc_cd;
! 507: /* Buffer of size ilseq_byte_subst_size. */
! 508: static wchar_t* subst_mb_to_wc_temp_buffer;
! 509:
! 510: static void subst_mb_to_wc_fallback
! 511: (const char* inbuf, size_t inbufsize,
! 512: void (*write_replacement) (const wchar_t *buf, size_t buflen,
! 513: void* callback_arg),
! 514: void* callback_arg,
! 515: void* data)
! 516: {
! 517: for (; inbufsize > 0; inbuf++, inbufsize--) {
! 518: const char* inptr;
! 519: size_t inbytesleft;
! 520: char* outptr;
! 521: size_t outbytesleft;
! 522: sprintf(ilseq_byte_subst_buffer,
! 523: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
! 524: inptr = ilseq_byte_subst_buffer;
! 525: inbytesleft = strlen(ilseq_byte_subst_buffer);
! 526: outptr = (char*)subst_mb_to_wc_temp_buffer;
! 527: outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
! 528: iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
! 529: if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
! 530: == (size_t)(-1)
! 531: || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
! 532: == (size_t)(-1))
! 533: error(EXIT_FAILURE,0,
! 534: /* TRANSLATORS: An error message.
! 535: The %s placeholder expands to a piece of text, specified through --byte-subst. */
! 536: _("cannot convert byte substitution to wide string: %s"),
! 537: ilseq_byte_subst_buffer);
! 538: if (!(outbytesleft%sizeof(wchar_t) == 0))
! 539: abort();
! 540: write_replacement(subst_mb_to_wc_temp_buffer,
! 541: ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
! 542: callback_arg);
! 543: }
! 544: }
! 545:
! 546: /* Auxiliary variables for subst_wc_to_mb_fallback. */
! 547: /* Converter from locale encoding to target encoding. */
! 548: static iconv_t subst_wc_to_mb_cd;
! 549: /* Buffer of size ilseq_wchar_subst_size*4.
! 550: Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
! 551: static char* subst_wc_to_mb_temp_buffer;
! 552:
! 553: static void subst_wc_to_mb_fallback
! 554: (wchar_t code,
! 555: void (*write_replacement) (const char *buf, size_t buflen,
! 556: void* callback_arg),
! 557: void* callback_arg,
! 558: void* data)
! 559: {
! 560: const char* inptr;
! 561: size_t inbytesleft;
! 562: char* outptr;
! 563: size_t outbytesleft;
! 564: sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
! 565: inptr = ilseq_wchar_subst_buffer;
! 566: inbytesleft = strlen(ilseq_wchar_subst_buffer);
! 567: outptr = subst_wc_to_mb_temp_buffer;
! 568: outbytesleft = ilseq_wchar_subst_size*4;
! 569: iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
! 570: if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
! 571: == (size_t)(-1)
! 572: || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
! 573: == (size_t)(-1))
! 574: error(EXIT_FAILURE,0,
! 575: /* TRANSLATORS: An error message.
! 576: The %s placeholder expands to a piece of text, specified through --widechar-subst. */
! 577: _("cannot convert widechar substitution to target encoding: %s"),
! 578: ilseq_wchar_subst_buffer);
! 579: write_replacement(subst_wc_to_mb_temp_buffer,
! 580: ilseq_wchar_subst_size*4-outbytesleft,
! 581: callback_arg);
! 582: }
! 583:
! 584: #else
! 585:
! 586: #define subst_mb_to_wc_fallback NULL
! 587: #define subst_wc_to_mb_fallback NULL
! 588:
! 589: #endif
! 590:
! 591: /* Auxiliary variables for subst_mb_to_mb_fallback. */
! 592: /* Converter from locale encoding to target encoding. */
! 593: static iconv_t subst_mb_to_mb_cd;
! 594: /* Buffer of size ilseq_byte_subst_size*4. */
! 595: static char* subst_mb_to_mb_temp_buffer;
! 596:
! 597: static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
! 598: {
! 599: for (; inbufsize > 0; inbuf++, inbufsize--) {
! 600: const char* inptr;
! 601: size_t inbytesleft;
! 602: char* outptr;
! 603: size_t outbytesleft;
! 604: sprintf(ilseq_byte_subst_buffer,
! 605: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
! 606: inptr = ilseq_byte_subst_buffer;
! 607: inbytesleft = strlen(ilseq_byte_subst_buffer);
! 608: outptr = subst_mb_to_mb_temp_buffer;
! 609: outbytesleft = ilseq_byte_subst_size*4;
! 610: iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
! 611: if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
! 612: == (size_t)(-1)
! 613: || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
! 614: == (size_t)(-1))
! 615: error(EXIT_FAILURE,0,
! 616: /* TRANSLATORS: An error message.
! 617: The %s placeholder expands to a piece of text, specified through --byte-subst. */
! 618: _("cannot convert byte substitution to target encoding: %s"),
! 619: ilseq_byte_subst_buffer);
! 620: fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
! 621: stdout);
! 622: }
! 623: }
! 624:
! 625: /* ========================================================================= */
! 626:
! 627: /* Error messages during conversion. */
! 628:
! 629: static void conversion_error_EILSEQ (const char* infilename)
! 630: {
! 631: fflush(stdout);
! 632: if (column > 0)
! 633: putc('\n',stderr);
! 634: error(0,0,
! 635: /* TRANSLATORS: An error message.
! 636: The placeholders expand to the input file name, a line number, and a column number. */
! 637: _("%s:%u:%u: cannot convert"),
! 638: infilename,line,column);
! 639: }
! 640:
! 641: static void conversion_error_EINVAL (const char* infilename)
! 642: {
! 643: fflush(stdout);
! 644: if (column > 0)
! 645: putc('\n',stderr);
! 646: error(0,0,
! 647: /* TRANSLATORS: An error message.
! 648: The placeholders expand to the input file name, a line number, and a column number.
! 649: A "shift sequence" is a sequence of bytes that changes the state of the converter;
! 650: this concept exists only for "stateful" encodings like ISO-2022-JP. */
! 651: _("%s:%u:%u: incomplete character or shift sequence"),
! 652: infilename,line,column);
! 653: }
! 654:
! 655: static void conversion_error_other (int errnum, const char* infilename)
! 656: {
! 657: fflush(stdout);
! 658: if (column > 0)
! 659: putc('\n',stderr);
! 660: error(0,errnum,
! 661: /* TRANSLATORS: The first part of an error message.
! 662: It is followed by a colon and a detail message.
! 663: The placeholders expand to the input file name, a line number, and a column number. */
! 664: _("%s:%u:%u"),
! 665: infilename,line,column);
! 666: }
! 667:
! 668: /* Convert the input given in infile. */
! 669:
! 670: static int convert (iconv_t cd, FILE* infile, const char* infilename)
! 671: {
! 672: char inbuf[4096+4096];
! 673: size_t inbufrest = 0;
! 674: char initial_outbuf[4096];
! 675: char *outbuf = initial_outbuf;
! 676: size_t outbufsize = sizeof(initial_outbuf);
! 677: int status = 0;
! 678:
! 679: #if O_BINARY
! 680: SET_BINARY(fileno(infile));
! 681: #endif
! 682: line = 1; column = 0;
! 683: iconv(cd,NULL,NULL,NULL,NULL);
! 684: for (;;) {
! 685: size_t inbufsize = fread(inbuf+4096,1,4096,infile);
! 686: if (inbufsize == 0) {
! 687: if (inbufrest == 0)
! 688: break;
! 689: else {
! 690: if (ilseq_byte_subst != NULL)
! 691: subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
! 692: if (!silent)
! 693: conversion_error_EINVAL(infilename);
! 694: status = 1;
! 695: goto done;
! 696: }
! 697: } else {
! 698: const char* inptr = inbuf+4096-inbufrest;
! 699: size_t insize = inbufrest+inbufsize;
! 700: inbufrest = 0;
! 701: while (insize > 0) {
! 702: char* outptr = outbuf;
! 703: size_t outsize = outbufsize;
! 704: size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
! 705: if (outptr != outbuf) {
! 706: int saved_errno = errno;
! 707: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
! 708: status = 1;
! 709: goto done;
! 710: }
! 711: errno = saved_errno;
! 712: }
! 713: if (res == (size_t)(-1)) {
! 714: if (errno == EILSEQ) {
! 715: if (discard_unconvertible == 1) {
! 716: int one = 1;
! 717: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
! 718: discard_unconvertible = 2;
! 719: status = 1;
! 720: } else {
! 721: if (!silent)
! 722: conversion_error_EILSEQ(infilename);
! 723: status = 1;
! 724: goto done;
! 725: }
! 726: } else if (errno == EINVAL) {
! 727: if (inbufsize == 0 || insize > 4096) {
! 728: if (!silent)
! 729: conversion_error_EINVAL(infilename);
! 730: status = 1;
! 731: goto done;
! 732: } else {
! 733: inbufrest = insize;
! 734: if (insize > 0) {
! 735: /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
! 736: we cannot use memcpy here, because source and destination
! 737: regions may overlap. */
! 738: char* restptr = inbuf+4096-insize;
! 739: do { *restptr++ = *inptr++; } while (--insize > 0);
! 740: }
! 741: break;
! 742: }
! 743: } else if (errno == E2BIG) {
! 744: if (outptr==outbuf) {
! 745: /* outbuf is too small. Double its size. */
! 746: if (outbuf != initial_outbuf)
! 747: free(outbuf);
! 748: outbufsize = 2*outbufsize;
! 749: if (outbufsize==0) /* integer overflow? */
! 750: xalloc_die();
! 751: outbuf = (char*)xmalloc(outbufsize);
! 752: }
! 753: } else {
! 754: if (!silent)
! 755: conversion_error_other(errno,infilename);
! 756: status = 1;
! 757: goto done;
! 758: }
! 759: }
! 760: }
! 761: }
! 762: }
! 763: for (;;) {
! 764: char* outptr = outbuf;
! 765: size_t outsize = outbufsize;
! 766: size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
! 767: if (outptr != outbuf) {
! 768: int saved_errno = errno;
! 769: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
! 770: status = 1;
! 771: goto done;
! 772: }
! 773: errno = saved_errno;
! 774: }
! 775: if (res == (size_t)(-1)) {
! 776: if (errno == EILSEQ) {
! 777: if (discard_unconvertible == 1) {
! 778: int one = 1;
! 779: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
! 780: discard_unconvertible = 2;
! 781: status = 1;
! 782: } else {
! 783: if (!silent)
! 784: conversion_error_EILSEQ(infilename);
! 785: status = 1;
! 786: goto done;
! 787: }
! 788: } else if (errno == EINVAL) {
! 789: if (!silent)
! 790: conversion_error_EINVAL(infilename);
! 791: status = 1;
! 792: goto done;
! 793: } else if (errno == E2BIG) {
! 794: if (outptr==outbuf) {
! 795: /* outbuf is too small. Double its size. */
! 796: if (outbuf != initial_outbuf)
! 797: free(outbuf);
! 798: outbufsize = 2*outbufsize;
! 799: if (outbufsize==0) /* integer overflow? */
! 800: xalloc_die();
! 801: outbuf = (char*)xmalloc(outbufsize);
! 802: }
! 803: } else {
! 804: if (!silent)
! 805: conversion_error_other(errno,infilename);
! 806: status = 1;
! 807: goto done;
! 808: }
! 809: } else
! 810: break;
! 811: }
! 812: if (ferror(infile)) {
! 813: fflush(stdout);
! 814: if (column > 0)
! 815: putc('\n',stderr);
! 816: error(0,0,
! 817: /* TRANSLATORS: An error message.
! 818: The placeholder expands to the input file name. */
! 819: _("%s: I/O error"),
! 820: infilename);
! 821: status = 1;
! 822: goto done;
! 823: }
! 824: done:
! 825: if (outbuf != initial_outbuf)
! 826: free(outbuf);
! 827: return status;
! 828: }
! 829:
! 830: /* ========================================================================= */
! 831:
! 832: int main (int argc, char* argv[])
! 833: {
! 834: const char* fromcode = NULL;
! 835: const char* tocode = NULL;
! 836: int do_list = 0;
! 837: iconv_t cd;
! 838: struct iconv_fallbacks fallbacks;
! 839: struct iconv_hooks hooks;
! 840: int i;
! 841: int status;
! 842:
! 843: set_program_name (argv[0]);
! 844: #if HAVE_SETLOCALE
! 845: /* Needed for the locale dependent encodings, "char" and "wchar_t",
! 846: and for gettext. */
! 847: setlocale(LC_CTYPE,"");
! 848: #if ENABLE_NLS
! 849: /* Needed for gettext. */
! 850: setlocale(LC_MESSAGES,"");
! 851: #endif
! 852: #endif
! 853: #if ENABLE_NLS
! 854: bindtextdomain("libiconv",relocate(LOCALEDIR));
! 855: #endif
! 856: textdomain("libiconv");
! 857: for (i = 1; i < argc;) {
! 858: size_t len = strlen(argv[i]);
! 859: if (!strcmp(argv[i],"--")) {
! 860: i++;
! 861: break;
! 862: }
! 863: if (!strcmp(argv[i],"-f")
! 864: /* --f ... --from-code */
! 865: || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
! 866: /* --from-code=... */
! 867: || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
! 868: if (len < 12)
! 869: if (i == argc-1) usage(1);
! 870: if (fromcode != NULL) usage(1);
! 871: if (len < 12) {
! 872: fromcode = argv[i+1];
! 873: i += 2;
! 874: } else {
! 875: fromcode = argv[i]+12;
! 876: i++;
! 877: }
! 878: continue;
! 879: }
! 880: if (!strcmp(argv[i],"-t")
! 881: /* --t ... --to-code */
! 882: || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
! 883: /* --from-code=... */
! 884: || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
! 885: if (len < 10)
! 886: if (i == argc-1) usage(1);
! 887: if (tocode != NULL) usage(1);
! 888: if (len < 10) {
! 889: tocode = argv[i+1];
! 890: i += 2;
! 891: } else {
! 892: tocode = argv[i]+10;
! 893: i++;
! 894: }
! 895: continue;
! 896: }
! 897: if (!strcmp(argv[i],"-l")
! 898: /* --l ... --list */
! 899: || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
! 900: do_list = 1;
! 901: i++;
! 902: continue;
! 903: }
! 904: if (/* --by ... --byte-subst */
! 905: (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
! 906: /* --byte-subst=... */
! 907: || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
! 908: if (len < 13) {
! 909: if (i == argc-1) usage(1);
! 910: ilseq_byte_subst = argv[i+1];
! 911: i += 2;
! 912: } else {
! 913: ilseq_byte_subst = argv[i]+13;
! 914: i++;
! 915: }
! 916: ilseq_byte_subst_size =
! 917: check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
! 918: continue;
! 919: }
! 920: if (/* --w ... --widechar-subst */
! 921: (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
! 922: /* --widechar-subst=... */
! 923: || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
! 924: if (len < 17) {
! 925: if (i == argc-1) usage(1);
! 926: ilseq_wchar_subst = argv[i+1];
! 927: i += 2;
! 928: } else {
! 929: ilseq_wchar_subst = argv[i]+17;
! 930: i++;
! 931: }
! 932: ilseq_wchar_subst_size =
! 933: check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
! 934: continue;
! 935: }
! 936: if (/* --u ... --unicode-subst */
! 937: (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
! 938: /* --unicode-subst=... */
! 939: || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
! 940: if (len < 16) {
! 941: if (i == argc-1) usage(1);
! 942: ilseq_unicode_subst = argv[i+1];
! 943: i += 2;
! 944: } else {
! 945: ilseq_unicode_subst = argv[i]+16;
! 946: i++;
! 947: }
! 948: ilseq_unicode_subst_size =
! 949: check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
! 950: continue;
! 951: }
! 952: if /* --s ... --silent */
! 953: (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
! 954: silent = 1;
! 955: continue;
! 956: }
! 957: if /* --h ... --help */
! 958: (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
! 959: usage(0);
! 960: }
! 961: if /* --v ... --version */
! 962: (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
! 963: print_version();
! 964: }
! 965: #if O_BINARY
! 966: /* Backward compatibility with iconv <= 1.9.1. */
! 967: if /* --bi ... --binary */
! 968: (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
! 969: i++;
! 970: continue;
! 971: }
! 972: #endif
! 973: if (argv[i][0] == '-') {
! 974: const char *option = argv[i] + 1;
! 975: if (*option == '\0')
! 976: usage(1);
! 977: for (; *option; option++)
! 978: switch (*option) {
! 979: case 'c': discard_unconvertible = 1; break;
! 980: case 's': silent = 1; break;
! 981: default: usage(1);
! 982: }
! 983: i++;
! 984: continue;
! 985: }
! 986: break;
! 987: }
! 988: if (do_list) {
! 989: if (i != 2 || i != argc)
! 990: usage(1);
! 991: iconvlist(print_one,NULL);
! 992: status = 0;
! 993: } else {
! 994: #if O_BINARY
! 995: SET_BINARY(fileno(stdout));
! 996: #endif
! 997: if (fromcode == NULL)
! 998: fromcode = "char";
! 999: if (tocode == NULL)
! 1000: tocode = "char";
! 1001: cd = iconv_open(tocode,fromcode);
! 1002: if (cd == (iconv_t)(-1)) {
! 1003: if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
! 1004: error(0,0,
! 1005: /* TRANSLATORS: An error message.
! 1006: The placeholder expands to the encoding name, specified through --from-code. */
! 1007: _("conversion from %s unsupported"),
! 1008: fromcode);
! 1009: else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
! 1010: error(0,0,
! 1011: /* TRANSLATORS: An error message.
! 1012: The placeholder expands to the encoding name, specified through --to-code. */
! 1013: _("conversion to %s unsupported"),
! 1014: tocode);
! 1015: else
! 1016: error(0,0,
! 1017: /* TRANSLATORS: An error message.
! 1018: The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
! 1019: _("conversion from %s to %s unsupported"),
! 1020: fromcode,tocode);
! 1021: error(EXIT_FAILURE,0,
! 1022: /* TRANSLATORS: Additional advice after an error message.
! 1023: The %s placeholder expands to the program name. */
! 1024: _("try '%s -l' to get the list of supported encodings"),
! 1025: program_name);
! 1026: }
! 1027: /* Look at fromcode and tocode, to determine whether character widths
! 1028: should be determined according to legacy CJK conventions. */
! 1029: cjkcode = iconv_canonicalize(tocode);
! 1030: if (!is_cjk_encoding(cjkcode))
! 1031: cjkcode = iconv_canonicalize(fromcode);
! 1032: /* Set up fallback routines for handling impossible conversions. */
! 1033: if (ilseq_byte_subst != NULL)
! 1034: ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
! 1035: if (!discard_unconvertible) {
! 1036: #if HAVE_WCHAR_T
! 1037: if (ilseq_wchar_subst != NULL)
! 1038: ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
! 1039: #endif
! 1040: if (ilseq_unicode_subst != NULL)
! 1041: ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
! 1042: if (ilseq_byte_subst != NULL) {
! 1043: subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
! 1044: subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
! 1045: #if HAVE_WCHAR_T
! 1046: subst_mb_to_wc_cd = iconv_open("wchar_t","char");
! 1047: subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
! 1048: #endif
! 1049: subst_mb_to_mb_cd = iconv_open(tocode,"char");
! 1050: subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
! 1051: }
! 1052: #if HAVE_WCHAR_T
! 1053: if (ilseq_wchar_subst != NULL) {
! 1054: subst_wc_to_mb_cd = iconv_open(tocode,"char");
! 1055: subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
! 1056: }
! 1057: #endif
! 1058: if (ilseq_unicode_subst != NULL) {
! 1059: subst_uc_to_mb_cd = iconv_open(tocode,"char");
! 1060: subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
! 1061: }
! 1062: fallbacks.mb_to_uc_fallback =
! 1063: (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
! 1064: fallbacks.uc_to_mb_fallback =
! 1065: (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
! 1066: fallbacks.mb_to_wc_fallback =
! 1067: (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
! 1068: fallbacks.wc_to_mb_fallback =
! 1069: (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
! 1070: fallbacks.data = NULL;
! 1071: iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
! 1072: }
! 1073: /* Set up hooks for updating the line and column position. */
! 1074: hooks.uc_hook = update_line_column;
! 1075: hooks.wc_hook = NULL;
! 1076: hooks.data = NULL;
! 1077: iconvctl(cd, ICONV_SET_HOOKS, &hooks);
! 1078: if (i == argc)
! 1079: status = convert(cd,stdin,
! 1080: /* TRANSLATORS: A filename substitute denoting standard input. */
! 1081: _("(stdin)"));
! 1082: else {
! 1083: status = 0;
! 1084: for (; i < argc; i++) {
! 1085: const char* infilename = argv[i];
! 1086: FILE* infile = fopen(infilename,"r");
! 1087: if (infile == NULL) {
! 1088: int saved_errno = errno;
! 1089: error(0,saved_errno,
! 1090: /* TRANSLATORS: The first part of an error message.
! 1091: It is followed by a colon and a detail message.
! 1092: The %s placeholder expands to the input file name. */
! 1093: _("%s"),
! 1094: infilename);
! 1095: status = 1;
! 1096: } else {
! 1097: status |= convert(cd,infile,infilename);
! 1098: fclose(infile);
! 1099: }
! 1100: }
! 1101: }
! 1102: iconv_close(cd);
! 1103: }
! 1104: if (ferror(stdout) || fclose(stdout)) {
! 1105: error(0,0,
! 1106: /* TRANSLATORS: An error message. */
! 1107: _("I/O error"));
! 1108: status = 1;
! 1109: }
! 1110: exit(status);
! 1111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>