1: /* Copyright (C) 2000-2009, 2011-2012, 2016-2019 Free Software Foundation, Inc.
2: This file is part of the GNU LIBICONV Library.
3:
4: This program is free software: you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 3 of the License, or
7: (at your option) any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program. If not, see <https://www.gnu.org/licenses/>. */
16:
17: #include "config.h"
18: #ifndef ICONV_CONST
19: # define ICONV_CONST
20: #endif
21:
22: #include <limits.h>
23: #include <stddef.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <iconv.h>
28: #include <errno.h>
29: #include <locale.h>
30: #include <fcntl.h>
31:
32: /* Ensure that iconv_no_i18n does not depend on libintl. */
33: #ifdef NO_I18N
34: # undef ENABLE_NLS
35: # undef ENABLE_RELOCATABLE
36: #endif
37:
38: #include "binary-io.h"
39: #include "progname.h"
40: #include "relocatable.h"
41: #include "safe-read.h"
42: #include "xalloc.h"
43: #include "uniwidth.h"
44: #include "uniwidth/cjk.h"
45:
46: /* Ensure that iconv_no_i18n does not depend on libintl. */
47: #ifdef NO_I18N
48: #include <stdarg.h>
49: static void
50: error (int status, int errnum, const char *message, ...)
51: {
52: va_list args;
53:
54: fflush(stdout);
55: fprintf(stderr,"%s: ",program_name);
56: va_start(args,message);
57: vfprintf(stderr,message,args);
58: va_end(args);
59: if (errnum) {
60: const char *s = strerror(errnum);
61: if (s == NULL)
62: s = "Unknown system error";
63: }
64: putc('\n',stderr);
65: fflush(stderr);
66: if (status)
67: exit(status);
68: }
69: #else
70: # include "error.h"
71: #endif
72:
73: #include "gettext.h"
74:
75: #define _(str) gettext(str)
76:
77: /* Ensure that iconv_no_i18n does not depend on libintl. */
78: #ifdef NO_I18N
79: # define xmalloc malloc
80: # define xalloc_die abort
81: #endif
82:
83: /* Locale independent test for a decimal digit.
84: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
85: <ctype.h> isdigit must be an 'unsigned char'.) */
86: #undef isdigit
87: #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
88:
89: /* Locale independent test for a printable character.
90: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
91: <ctype.h> isdigit must be an 'unsigned char'.) */
92: #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
93:
94: /* ========================================================================= */
95:
96: static int discard_unconvertible = 0;
97: static int silent = 0;
98:
99: static void usage (int exitcode)
100: {
101: if (exitcode != 0) {
102: const char* helpstring1 =
103: /* TRANSLATORS: The first line of the short usage message. */
104: _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
105: const char* helpstring2 =
106: /* TRANSLATORS: The second line of the short usage message.
107: Align it correctly against the first line. */
108: _("or: iconv -l");
109: fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
110: fprintf(stderr, _("Try '%s --help' for more information.\n"), program_name);
111: } else {
112: /* xgettext: no-wrap */
113: /* TRANSLATORS: The first line of the long usage message.
114: The %s placeholder expands to the program name. */
115: printf(_("\
116: Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
117: program_name);
118: /* xgettext: no-wrap */
119: /* TRANSLATORS: The second line of the long usage message.
120: Align it correctly against the first line.
121: The %s placeholder expands to the program name. */
122: printf(_("\
123: or: %s -l\n"),
124: program_name);
125: printf("\n");
126: /* xgettext: no-wrap */
127: /* TRANSLATORS: Description of the iconv program. */
128: printf(_("\
129: Converts text from one encoding to another encoding.\n"));
130: printf("\n");
131: /* xgettext: no-wrap */
132: printf(_("\
133: Options controlling the input and output format:\n"));
134: /* xgettext: no-wrap */
135: printf(_("\
136: -f ENCODING, --from-code=ENCODING\n\
137: the encoding of the input\n"));
138: /* xgettext: no-wrap */
139: printf(_("\
140: -t ENCODING, --to-code=ENCODING\n\
141: the encoding of the output\n"));
142: printf("\n");
143: /* xgettext: no-wrap */
144: printf(_("\
145: Options controlling conversion problems:\n"));
146: /* xgettext: no-wrap */
147: printf(_("\
148: -c discard unconvertible characters\n"));
149: /* xgettext: no-wrap */
150: printf(_("\
151: --unicode-subst=FORMATSTRING\n\
152: substitution for unconvertible Unicode characters\n"));
153: /* xgettext: no-wrap */
154: printf(_("\
155: --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
156: /* xgettext: no-wrap */
157: printf(_("\
158: --widechar-subst=FORMATSTRING\n\
159: substitution for unconvertible wide characters\n"));
160: printf("\n");
161: /* xgettext: no-wrap */
162: printf(_("\
163: Options controlling error output:\n"));
164: /* xgettext: no-wrap */
165: printf(_("\
166: -s, --silent suppress error messages about conversion problems\n"));
167: printf("\n");
168: /* xgettext: no-wrap */
169: printf(_("\
170: Informative output:\n"));
171: /* xgettext: no-wrap */
172: printf(_("\
173: -l, --list list the supported encodings\n"));
174: /* xgettext: no-wrap */
175: printf(_("\
176: --help display this help and exit\n"));
177: /* xgettext: no-wrap */
178: printf(_("\
179: --version output version information and exit\n"));
180: printf("\n");
181: /* TRANSLATORS: The placeholder indicates the bug-reporting address
182: for this package. Please add _another line_ saying
183: "Report translation bugs to <...>\n" with the address for translation
184: bugs (typically your translation team's web or email address). */
185: fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
186: }
187: exit(exitcode);
188: }
189:
190: static void print_version (void)
191: {
192: printf("iconv (GNU libiconv %d.%d)\n",
193: _libiconv_version >> 8, _libiconv_version & 0xff);
194: printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2019");
195: /* xgettext: no-wrap */
196: fputs (_("\
197: License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>\n\
198: This is free software: you are free to change and redistribute it.\n\
199: There is NO WARRANTY, to the extent permitted by law.\n\
200: "),stdout);
201: /* TRANSLATORS: The %s placeholder expands to an author's name. */
202: printf(_("Written by %s.\n"),"Bruno Haible");
203: exit(EXIT_SUCCESS);
204: }
205:
206: static int print_one (unsigned int namescount, const char * const * names,
207: void* data)
208: {
209: unsigned int i;
210: (void)data;
211: for (i = 0; i < namescount; i++) {
212: if (i > 0)
213: putc(' ',stdout);
214: fputs(names[i],stdout);
215: }
216: putc('\n',stdout);
217: return 0;
218: }
219:
220: /* ========================================================================= */
221:
222: /* Line number and column position. */
223: static unsigned int line;
224: static unsigned int column;
225: static const char* cjkcode;
226: /* Update the line number and column position after a character was
227: successfully converted. */
228: static void update_line_column (unsigned int uc, void* data)
229: {
230: if (uc == 0x000A) {
231: line++;
232: column = 0;
233: } else {
234: int width = uc_width(uc, cjkcode);
235: if (width >= 0)
236: column += width;
237: else if (uc == 0x0009)
238: column += 8 - (column % 8);
239: }
240: }
241:
242: /* ========================================================================= */
243:
244: /* Production of placeholder strings as fallback for unconvertible
245: characters. */
246:
247: /* Check that the argument is a format string taking either no argument
248: or exactly one unsigned integer argument. Returns the maximum output
249: size of the format string. */
250: static size_t check_subst_formatstring (const char *format, const char *param_name)
251: {
252: /* C format strings are described in POSIX (IEEE P1003.1 2001), section
253: XSH 3 fprintf(). See also Linux fprintf(3) manual page.
254: For simplicity, we don't accept
255: - the '%m$' reordering syntax,
256: - the 'I' flag,
257: - width specifications referring to an argument,
258: - precision specifications referring to an argument,
259: - size specifiers,
260: - format specifiers other than 'o', 'u', 'x', 'X'.
261: What remains?
262: A directive
263: - starts with '%',
264: - is optionally followed by any of the characters '#', '0', '-', ' ',
265: '+', "'", each of which acts as a flag,
266: - is optionally followed by a width specification: a nonempty digit
267: sequence,
268: - is optionally followed by '.' and a precision specification: a
269: nonempty digit sequence,
270: - is finished by a specifier
271: - '%', that needs no argument,
272: - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
273: */
274: size_t maxsize = 0;
275: unsigned int unnumbered_arg_count = 0;
276:
277: for (; *format != '\0';) {
278: if (*format++ == '%') {
279: /* A directive. */
280: unsigned int width = 0;
281: unsigned int precision = 0;
282: unsigned int length;
283: /* Parse flags. */
284: for (;;) {
285: if (*format == ' ' || *format == '+' || *format == '-'
286: || *format == '#' || *format == '0' || *format == '\'')
287: format++;
288: else
289: break;
290: }
291: /* Parse width. */
292: if (*format == '*')
293: error(EXIT_FAILURE,0,
294: /* TRANSLATORS: An error message.
295: The %s placeholder expands to a command-line option. */
296: _("%s argument: A format directive with a variable width is not allowed here."),
297: param_name);
298: if (isdigit (*format)) {
299: do {
300: width = 10*width + (*format - '0');
301: format++;
302: } while (isdigit (*format));
303: }
304: /* Parse precision. */
305: if (*format == '.') {
306: format++;
307: if (*format == '*')
308: error(EXIT_FAILURE,0,
309: /* TRANSLATORS: An error message.
310: The %s placeholder expands to a command-line option. */
311: _("%s argument: A format directive with a variable precision is not allowed here."),
312: param_name);
313: if (isdigit (*format)) {
314: do {
315: precision = 10*precision + (*format - '0');
316: format++;
317: } while (isdigit (*format));
318: }
319: }
320: /* Parse size. */
321: switch (*format) {
322: case 'h': case 'l': case 'L': case 'q':
323: case 'j': case 'z': case 'Z': case 't':
324: error(EXIT_FAILURE,0,
325: /* TRANSLATORS: An error message.
326: The %s placeholder expands to a command-line option. */
327: _("%s argument: A format directive with a size is not allowed here."),
328: param_name);
329: }
330: /* Parse end of directive. */
331: switch (*format) {
332: case '%':
333: length = 1;
334: break;
335: case 'u': case 'o': case 'x': case 'X':
336: if (*format == 'u') {
337: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
338: * 0.30103 /* binary -> decimal */
339: )
340: + 1; /* turn floor into ceil */
341: if (length < precision)
342: length = precision;
343: length *= 2; /* estimate for FLAG_GROUP */
344: length += 1; /* account for leading sign */
345: } else if (*format == 'o') {
346: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
347: * 0.333334 /* binary -> octal */
348: )
349: + 1; /* turn floor into ceil */
350: if (length < precision)
351: length = precision;
352: length += 1; /* account for leading sign */
353: } else { /* 'x', 'X' */
354: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
355: * 0.25 /* binary -> hexadecimal */
356: )
357: + 1; /* turn floor into ceil */
358: if (length < precision)
359: length = precision;
360: length += 2; /* account for leading sign or alternate form */
361: }
362: unnumbered_arg_count++;
363: break;
364: default:
365: if (*format == '\0')
366: error(EXIT_FAILURE,0,
367: /* TRANSLATORS: An error message.
368: The %s placeholder expands to a command-line option. */
369: _("%s argument: The string ends in the middle of a directive."),
370: param_name);
371: else if (c_isprint(*format))
372: error(EXIT_FAILURE,0,
373: /* TRANSLATORS: An error message.
374: The %s placeholder expands to a command-line option.
375: The %c placeholder expands to an unknown format directive. */
376: _("%s argument: The character '%c' is not a valid conversion specifier."),
377: param_name,*format);
378: else
379: error(EXIT_FAILURE,0,
380: /* TRANSLATORS: An error message.
381: The %s placeholder expands to a command-line option. */
382: _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
383: param_name);
384: abort(); /*NOTREACHED*/
385: }
386: format++;
387: if (length < width)
388: length = width;
389: maxsize += length;
390: } else
391: maxsize++;
392: }
393: if (unnumbered_arg_count > 1)
394: error(EXIT_FAILURE,0,
395: /* TRANSLATORS: An error message.
396: The %s placeholder expands to a command-line option.
397: The %u placeholder expands to the number of arguments consumed by the format string. */
398: ngettext("%s argument: The format string consumes more than one argument: %u argument.",
399: "%s argument: The format string consumes more than one argument: %u arguments.",
400: unnumbered_arg_count),
401: param_name,unnumbered_arg_count);
402: return maxsize;
403: }
404:
405: /* Format strings. */
406: static const char* ilseq_byte_subst;
407: static const char* ilseq_wchar_subst;
408: static const char* ilseq_unicode_subst;
409:
410: /* Maximum result size for each format string. */
411: static size_t ilseq_byte_subst_size;
412: static size_t ilseq_wchar_subst_size;
413: static size_t ilseq_unicode_subst_size;
414:
415: /* Buffer of size ilseq_byte_subst_size+1. */
416: static char* ilseq_byte_subst_buffer;
417: #if HAVE_WCHAR_T
418: /* Buffer of size ilseq_wchar_subst_size+1. */
419: static char* ilseq_wchar_subst_buffer;
420: #endif
421: /* Buffer of size ilseq_unicode_subst_size+1. */
422: static char* ilseq_unicode_subst_buffer;
423:
424: /* Auxiliary variables for subst_mb_to_uc_fallback. */
425: /* Converter from locale encoding to UCS-4. */
426: static iconv_t subst_mb_to_uc_cd;
427: /* Buffer of size ilseq_byte_subst_size. */
428: static unsigned int* subst_mb_to_uc_temp_buffer;
429:
430: static void subst_mb_to_uc_fallback
431: (const char* inbuf, size_t inbufsize,
432: void (*write_replacement) (const unsigned int *buf, size_t buflen,
433: void* callback_arg),
434: void* callback_arg,
435: void* data)
436: {
437: for (; inbufsize > 0; inbuf++, inbufsize--) {
438: const char* inptr;
439: size_t inbytesleft;
440: char* outptr;
441: size_t outbytesleft;
442: sprintf(ilseq_byte_subst_buffer,
443: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
444: inptr = ilseq_byte_subst_buffer;
445: inbytesleft = strlen(ilseq_byte_subst_buffer);
446: outptr = (char*)subst_mb_to_uc_temp_buffer;
447: outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
448: iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
449: if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
450: == (size_t)(-1)
451: || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
452: == (size_t)(-1))
453: error(EXIT_FAILURE,0,
454: /* TRANSLATORS: An error message.
455: The %s placeholder expands to a piece of text, specified through --byte-subst. */
456: _("cannot convert byte substitution to Unicode: %s"),
457: ilseq_byte_subst_buffer);
458: if (!(outbytesleft%sizeof(unsigned int) == 0))
459: abort();
460: write_replacement(subst_mb_to_uc_temp_buffer,
461: ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
462: callback_arg);
463: }
464: }
465:
466: /* Auxiliary variables for subst_uc_to_mb_fallback. */
467: /* Converter from locale encoding to target encoding. */
468: static iconv_t subst_uc_to_mb_cd;
469: /* Buffer of size ilseq_unicode_subst_size*4. */
470: static char* subst_uc_to_mb_temp_buffer;
471:
472: static void subst_uc_to_mb_fallback
473: (unsigned int code,
474: void (*write_replacement) (const char *buf, size_t buflen,
475: void* callback_arg),
476: void* callback_arg,
477: void* data)
478: {
479: const char* inptr;
480: size_t inbytesleft;
481: char* outptr;
482: size_t outbytesleft;
483: sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
484: inptr = ilseq_unicode_subst_buffer;
485: inbytesleft = strlen(ilseq_unicode_subst_buffer);
486: outptr = subst_uc_to_mb_temp_buffer;
487: outbytesleft = ilseq_unicode_subst_size*4;
488: iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
489: if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
490: == (size_t)(-1)
491: || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
492: == (size_t)(-1))
493: error(EXIT_FAILURE,0,
494: /* TRANSLATORS: An error message.
495: The %s placeholder expands to a piece of text, specified through --unicode-subst. */
496: _("cannot convert unicode substitution to target encoding: %s"),
497: ilseq_unicode_subst_buffer);
498: write_replacement(subst_uc_to_mb_temp_buffer,
499: ilseq_unicode_subst_size*4-outbytesleft,
500: callback_arg);
501: }
502:
503: #if HAVE_WCHAR_T
504:
505: /* Auxiliary variables for subst_mb_to_wc_fallback. */
506: /* Converter from locale encoding to wchar_t. */
507: static iconv_t subst_mb_to_wc_cd;
508: /* Buffer of size ilseq_byte_subst_size. */
509: static wchar_t* subst_mb_to_wc_temp_buffer;
510:
511: static void subst_mb_to_wc_fallback
512: (const char* inbuf, size_t inbufsize,
513: void (*write_replacement) (const wchar_t *buf, size_t buflen,
514: void* callback_arg),
515: void* callback_arg,
516: void* data)
517: {
518: for (; inbufsize > 0; inbuf++, inbufsize--) {
519: const char* inptr;
520: size_t inbytesleft;
521: char* outptr;
522: size_t outbytesleft;
523: sprintf(ilseq_byte_subst_buffer,
524: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
525: inptr = ilseq_byte_subst_buffer;
526: inbytesleft = strlen(ilseq_byte_subst_buffer);
527: outptr = (char*)subst_mb_to_wc_temp_buffer;
528: outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
529: iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
530: if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
531: == (size_t)(-1)
532: || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
533: == (size_t)(-1))
534: error(EXIT_FAILURE,0,
535: /* TRANSLATORS: An error message.
536: The %s placeholder expands to a piece of text, specified through --byte-subst. */
537: _("cannot convert byte substitution to wide string: %s"),
538: ilseq_byte_subst_buffer);
539: if (!(outbytesleft%sizeof(wchar_t) == 0))
540: abort();
541: write_replacement(subst_mb_to_wc_temp_buffer,
542: ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
543: callback_arg);
544: }
545: }
546:
547: /* Auxiliary variables for subst_wc_to_mb_fallback. */
548: /* Converter from locale encoding to target encoding. */
549: static iconv_t subst_wc_to_mb_cd;
550: /* Buffer of size ilseq_wchar_subst_size*4.
551: Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
552: static char* subst_wc_to_mb_temp_buffer;
553:
554: static void subst_wc_to_mb_fallback
555: (wchar_t code,
556: void (*write_replacement) (const char *buf, size_t buflen,
557: void* callback_arg),
558: void* callback_arg,
559: void* data)
560: {
561: const char* inptr;
562: size_t inbytesleft;
563: char* outptr;
564: size_t outbytesleft;
565: sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
566: inptr = ilseq_wchar_subst_buffer;
567: inbytesleft = strlen(ilseq_wchar_subst_buffer);
568: outptr = subst_wc_to_mb_temp_buffer;
569: outbytesleft = ilseq_wchar_subst_size*4;
570: iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
571: if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
572: == (size_t)(-1)
573: || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
574: == (size_t)(-1))
575: error(EXIT_FAILURE,0,
576: /* TRANSLATORS: An error message.
577: The %s placeholder expands to a piece of text, specified through --widechar-subst. */
578: _("cannot convert widechar substitution to target encoding: %s"),
579: ilseq_wchar_subst_buffer);
580: write_replacement(subst_wc_to_mb_temp_buffer,
581: ilseq_wchar_subst_size*4-outbytesleft,
582: callback_arg);
583: }
584:
585: #else
586:
587: #define subst_mb_to_wc_fallback NULL
588: #define subst_wc_to_mb_fallback NULL
589:
590: #endif
591:
592: /* Auxiliary variables for subst_mb_to_mb_fallback. */
593: /* Converter from locale encoding to target encoding. */
594: static iconv_t subst_mb_to_mb_cd;
595: /* Buffer of size ilseq_byte_subst_size*4. */
596: static char* subst_mb_to_mb_temp_buffer;
597:
598: static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
599: {
600: for (; inbufsize > 0; inbuf++, inbufsize--) {
601: const char* inptr;
602: size_t inbytesleft;
603: char* outptr;
604: size_t outbytesleft;
605: sprintf(ilseq_byte_subst_buffer,
606: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
607: inptr = ilseq_byte_subst_buffer;
608: inbytesleft = strlen(ilseq_byte_subst_buffer);
609: outptr = subst_mb_to_mb_temp_buffer;
610: outbytesleft = ilseq_byte_subst_size*4;
611: iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
612: if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
613: == (size_t)(-1)
614: || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
615: == (size_t)(-1))
616: error(EXIT_FAILURE,0,
617: /* TRANSLATORS: An error message.
618: The %s placeholder expands to a piece of text, specified through --byte-subst. */
619: _("cannot convert byte substitution to target encoding: %s"),
620: ilseq_byte_subst_buffer);
621: fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
622: stdout);
623: }
624: }
625:
626: /* ========================================================================= */
627:
628: /* Error messages during conversion. */
629:
630: static void conversion_error_EILSEQ (const char* infilename)
631: {
632: fflush(stdout);
633: if (column > 0)
634: putc('\n',stderr);
635: error(0,0,
636: /* TRANSLATORS: An error message.
637: The placeholders expand to the input file name, a line number, and a column number. */
638: _("%s:%u:%u: cannot convert"),
639: infilename,line,column);
640: }
641:
642: static void conversion_error_EINVAL (const char* infilename)
643: {
644: fflush(stdout);
645: if (column > 0)
646: putc('\n',stderr);
647: error(0,0,
648: /* TRANSLATORS: An error message.
649: The placeholders expand to the input file name, a line number, and a column number.
650: A "shift sequence" is a sequence of bytes that changes the state of the converter;
651: this concept exists only for "stateful" encodings like ISO-2022-JP. */
652: _("%s:%u:%u: incomplete character or shift sequence"),
653: infilename,line,column);
654: }
655:
656: static void conversion_error_other (int errnum, const char* infilename)
657: {
658: fflush(stdout);
659: if (column > 0)
660: putc('\n',stderr);
661: error(0,errnum,
662: /* TRANSLATORS: The first part of an error message.
663: It is followed by a colon and a detail message.
664: The placeholders expand to the input file name, a line number, and a column number. */
665: _("%s:%u:%u"),
666: infilename,line,column);
667: }
668:
669: /* Convert the input given in infile. */
670:
671: static int convert (iconv_t cd, int infile, const char* infilename)
672: {
673: char inbuf[4096+4096];
674: size_t inbufrest = 0;
675: int infile_error = 0;
676: char initial_outbuf[4096];
677: char *outbuf = initial_outbuf;
678: size_t outbufsize = sizeof(initial_outbuf);
679: int status = 0;
680:
681: #if O_BINARY
682: SET_BINARY(infile);
683: #endif
684: line = 1; column = 0;
685: iconv(cd,NULL,NULL,NULL,NULL);
686: for (;;) {
687: size_t inbufsize;
688: /* Transfer the accumulated output to its destination, in case the
689: safe_read() call will block. */
690: fflush(stdout);
691: inbufsize = safe_read(infile,inbuf+4096,4096);
692: if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) {
693: infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0);
694: if (inbufrest == 0)
695: break;
696: else {
697: if (ilseq_byte_subst != NULL)
698: subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
699: if (!silent)
700: conversion_error_EINVAL(infilename);
701: status = 1;
702: goto done;
703: }
704: } else {
705: const char* inptr = inbuf+4096-inbufrest;
706: size_t insize = inbufrest+inbufsize;
707: inbufrest = 0;
708: while (insize > 0) {
709: char* outptr = outbuf;
710: size_t outsize = outbufsize;
711: size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
712: if (outptr != outbuf) {
713: int saved_errno = errno;
714: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
715: status = 1;
716: goto done;
717: }
718: errno = saved_errno;
719: }
720: if (res == (size_t)(-1)) {
721: if (errno == EILSEQ) {
722: if (discard_unconvertible == 1) {
723: int one = 1;
724: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
725: discard_unconvertible = 2;
726: status = 1;
727: } else {
728: if (!silent)
729: conversion_error_EILSEQ(infilename);
730: status = 1;
731: goto done;
732: }
733: } else if (errno == EINVAL) {
734: if (inbufsize == 0 || insize > 4096) {
735: if (!silent)
736: conversion_error_EINVAL(infilename);
737: status = 1;
738: goto done;
739: } else {
740: inbufrest = insize;
741: if (insize > 0) {
742: /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
743: we cannot use memcpy here, because source and destination
744: regions may overlap. */
745: char* restptr = inbuf+4096-insize;
746: do { *restptr++ = *inptr++; } while (--insize > 0);
747: }
748: break;
749: }
750: } else if (errno == E2BIG) {
751: if (outptr==outbuf) {
752: /* outbuf is too small. Double its size. */
753: if (outbuf != initial_outbuf)
754: free(outbuf);
755: outbufsize = 2*outbufsize;
756: if (outbufsize==0) /* integer overflow? */
757: xalloc_die();
758: outbuf = (char*)xmalloc(outbufsize);
759: }
760: } else {
761: if (!silent)
762: conversion_error_other(errno,infilename);
763: status = 1;
764: goto done;
765: }
766: }
767: }
768: }
769: }
770: for (;;) {
771: char* outptr = outbuf;
772: size_t outsize = outbufsize;
773: size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
774: if (outptr != outbuf) {
775: int saved_errno = errno;
776: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
777: status = 1;
778: goto done;
779: }
780: errno = saved_errno;
781: }
782: if (res == (size_t)(-1)) {
783: if (errno == EILSEQ) {
784: if (discard_unconvertible == 1) {
785: int one = 1;
786: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
787: discard_unconvertible = 2;
788: status = 1;
789: } else {
790: if (!silent)
791: conversion_error_EILSEQ(infilename);
792: status = 1;
793: goto done;
794: }
795: } else if (errno == EINVAL) {
796: if (!silent)
797: conversion_error_EINVAL(infilename);
798: status = 1;
799: goto done;
800: } else if (errno == E2BIG) {
801: if (outptr==outbuf) {
802: /* outbuf is too small. Double its size. */
803: if (outbuf != initial_outbuf)
804: free(outbuf);
805: outbufsize = 2*outbufsize;
806: if (outbufsize==0) /* integer overflow? */
807: xalloc_die();
808: outbuf = (char*)xmalloc(outbufsize);
809: }
810: } else {
811: if (!silent)
812: conversion_error_other(errno,infilename);
813: status = 1;
814: goto done;
815: }
816: } else
817: break;
818: }
819: if (infile_error) {
820: fflush(stdout);
821: if (column > 0)
822: putc('\n',stderr);
823: error(0,infile_error,
824: /* TRANSLATORS: An error message.
825: The placeholder expands to the input file name. */
826: _("%s: I/O error"),
827: infilename);
828: status = 1;
829: goto done;
830: }
831: done:
832: if (outbuf != initial_outbuf)
833: free(outbuf);
834: return status;
835: }
836:
837: /* ========================================================================= */
838:
839: int main (int argc, char* argv[])
840: {
841: const char* fromcode = NULL;
842: const char* tocode = NULL;
843: int do_list = 0;
844: iconv_t cd;
845: struct iconv_fallbacks fallbacks;
846: struct iconv_hooks hooks;
847: int i;
848: int status;
849:
850: set_program_name (argv[0]);
851: #if HAVE_SETLOCALE
852: /* Needed for the locale dependent encodings, "char" and "wchar_t",
853: and for gettext. */
854: setlocale(LC_CTYPE,"");
855: #if ENABLE_NLS
856: /* Needed for gettext. */
857: setlocale(LC_MESSAGES,"");
858: #endif
859: #endif
860: #if ENABLE_NLS
861: bindtextdomain("libiconv",relocate(LOCALEDIR));
862: #endif
863: textdomain("libiconv");
864: /* No need to invoke the gnulib function stdopen() here, because
865: (1) the only file descriptor allocations done by this program are
866: fopen(...,"r"),
867: (2) when such fopen() calls occur, stdin is not used,
868: hence
869: - when an fopen() call happens to open fd 0, it is harmless, by (2),
870: - when an fopen() call happens to open fd 1 or 2, writing to
871: stdout or stderr will produce an error, by (1). */
872:
873: for (i = 1; i < argc;) {
874: size_t len = strlen(argv[i]);
875: if (!strcmp(argv[i],"--")) {
876: i++;
877: break;
878: }
879: if (!strcmp(argv[i],"-f")
880: /* --f ... --from-code */
881: || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
882: /* --from-code=... */
883: || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
884: if (len < 12)
885: if (i == argc-1) usage(1);
886: if (fromcode != NULL) usage(1);
887: if (len < 12) {
888: fromcode = argv[i+1];
889: i += 2;
890: } else {
891: fromcode = argv[i]+12;
892: i++;
893: }
894: continue;
895: }
896: if (!strcmp(argv[i],"-t")
897: /* --t ... --to-code */
898: || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
899: /* --from-code=... */
900: || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
901: if (len < 10)
902: if (i == argc-1) usage(1);
903: if (tocode != NULL) usage(1);
904: if (len < 10) {
905: tocode = argv[i+1];
906: i += 2;
907: } else {
908: tocode = argv[i]+10;
909: i++;
910: }
911: continue;
912: }
913: if (!strcmp(argv[i],"-l")
914: /* --l ... --list */
915: || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
916: do_list = 1;
917: i++;
918: continue;
919: }
920: if (/* --by ... --byte-subst */
921: (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
922: /* --byte-subst=... */
923: || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
924: if (len < 13) {
925: if (i == argc-1) usage(1);
926: ilseq_byte_subst = argv[i+1];
927: i += 2;
928: } else {
929: ilseq_byte_subst = argv[i]+13;
930: i++;
931: }
932: ilseq_byte_subst_size =
933: check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
934: continue;
935: }
936: if (/* --w ... --widechar-subst */
937: (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
938: /* --widechar-subst=... */
939: || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
940: if (len < 17) {
941: if (i == argc-1) usage(1);
942: ilseq_wchar_subst = argv[i+1];
943: i += 2;
944: } else {
945: ilseq_wchar_subst = argv[i]+17;
946: i++;
947: }
948: ilseq_wchar_subst_size =
949: check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
950: continue;
951: }
952: if (/* --u ... --unicode-subst */
953: (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
954: /* --unicode-subst=... */
955: || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
956: if (len < 16) {
957: if (i == argc-1) usage(1);
958: ilseq_unicode_subst = argv[i+1];
959: i += 2;
960: } else {
961: ilseq_unicode_subst = argv[i]+16;
962: i++;
963: }
964: ilseq_unicode_subst_size =
965: check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
966: continue;
967: }
968: if /* --s ... --silent */
969: (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
970: silent = 1;
971: i++;
972: continue;
973: }
974: if /* --h ... --help */
975: (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
976: usage(0);
977: }
978: if /* --v ... --version */
979: (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
980: print_version();
981: }
982: #if O_BINARY
983: /* Backward compatibility with iconv <= 1.9.1. */
984: if /* --bi ... --binary */
985: (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
986: i++;
987: continue;
988: }
989: #endif
990: if (argv[i][0] == '-') {
991: const char *option = argv[i] + 1;
992: if (*option == '\0')
993: usage(1);
994: for (; *option; option++)
995: switch (*option) {
996: case 'c': discard_unconvertible = 1; break;
997: case 's': silent = 1; break;
998: default: usage(1);
999: }
1000: i++;
1001: continue;
1002: }
1003: break;
1004: }
1005: if (do_list) {
1006: if (i != 2 || i != argc)
1007: usage(1);
1008: iconvlist(print_one,NULL);
1009: status = 0;
1010: } else {
1011: #if O_BINARY
1012: SET_BINARY(fileno(stdout));
1013: #endif
1014: if (fromcode == NULL)
1015: fromcode = "char";
1016: if (tocode == NULL)
1017: tocode = "char";
1018: cd = iconv_open(tocode,fromcode);
1019: if (cd == (iconv_t)(-1)) {
1020: if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
1021: error(0,0,
1022: /* TRANSLATORS: An error message.
1023: The placeholder expands to the encoding name, specified through --from-code. */
1024: _("conversion from %s unsupported"),
1025: fromcode);
1026: else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
1027: error(0,0,
1028: /* TRANSLATORS: An error message.
1029: The placeholder expands to the encoding name, specified through --to-code. */
1030: _("conversion to %s unsupported"),
1031: tocode);
1032: else
1033: error(0,0,
1034: /* TRANSLATORS: An error message.
1035: The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
1036: _("conversion from %s to %s unsupported"),
1037: fromcode,tocode);
1038: error(EXIT_FAILURE,0,
1039: /* TRANSLATORS: Additional advice after an error message.
1040: The %s placeholder expands to the program name. */
1041: _("try '%s -l' to get the list of supported encodings"),
1042: program_name);
1043: }
1044: /* Look at fromcode and tocode, to determine whether character widths
1045: should be determined according to legacy CJK conventions. */
1046: cjkcode = iconv_canonicalize(tocode);
1047: if (!is_cjk_encoding(cjkcode))
1048: cjkcode = iconv_canonicalize(fromcode);
1049: /* Set up fallback routines for handling impossible conversions. */
1050: if (ilseq_byte_subst != NULL)
1051: ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
1052: if (!discard_unconvertible) {
1053: #if HAVE_WCHAR_T
1054: if (ilseq_wchar_subst != NULL)
1055: ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
1056: #endif
1057: if (ilseq_unicode_subst != NULL)
1058: ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
1059: if (ilseq_byte_subst != NULL) {
1060: subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
1061: subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
1062: #if HAVE_WCHAR_T
1063: subst_mb_to_wc_cd = iconv_open("wchar_t","char");
1064: subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
1065: #endif
1066: subst_mb_to_mb_cd = iconv_open(tocode,"char");
1067: subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
1068: }
1069: #if HAVE_WCHAR_T
1070: if (ilseq_wchar_subst != NULL) {
1071: subst_wc_to_mb_cd = iconv_open(tocode,"char");
1072: subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
1073: }
1074: #endif
1075: if (ilseq_unicode_subst != NULL) {
1076: subst_uc_to_mb_cd = iconv_open(tocode,"char");
1077: subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
1078: }
1079: fallbacks.mb_to_uc_fallback =
1080: (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
1081: fallbacks.uc_to_mb_fallback =
1082: (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
1083: fallbacks.mb_to_wc_fallback =
1084: (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
1085: fallbacks.wc_to_mb_fallback =
1086: (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
1087: fallbacks.data = NULL;
1088: iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
1089: }
1090: /* Set up hooks for updating the line and column position. */
1091: hooks.uc_hook = update_line_column;
1092: hooks.wc_hook = NULL;
1093: hooks.data = NULL;
1094: iconvctl(cd, ICONV_SET_HOOKS, &hooks);
1095: if (i == argc)
1096: status = convert(cd,fileno(stdin),
1097: /* TRANSLATORS: A filename substitute denoting standard input. */
1098: _("(stdin)"));
1099: else {
1100: status = 0;
1101: for (; i < argc; i++) {
1102: const char* infilename = argv[i];
1103: FILE* infile = fopen(infilename,"r");
1104: if (infile == NULL) {
1105: int saved_errno = errno;
1106: error(0,saved_errno,
1107: /* TRANSLATORS: The first part of an error message.
1108: It is followed by a colon and a detail message.
1109: The %s placeholder expands to the input file name. */
1110: _("%s"),
1111: infilename);
1112: status = 1;
1113: } else {
1114: status |= convert(cd,fileno(infile),infilename);
1115: fclose(infile);
1116: }
1117: }
1118: }
1119: iconv_close(cd);
1120: }
1121: if (ferror(stdout) || fclose(stdout)) {
1122: error(0,0,
1123: /* TRANSLATORS: An error message. */
1124: _("I/O error"));
1125: status = 1;
1126: }
1127: exit(status);
1128: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>