Annotation of embedaddon/libiconv/src/iconv.c, revision 1.1.1.1
1.1 misho 1: /* Copyright (C) 2000-2009 Free Software Foundation, Inc.
2: This file is part of the GNU LIBICONV Library.
3:
4: This program is free software: you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 3 of the License, or
7: (at your option) any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program. If not, see <http://www.gnu.org/licenses/>. */
16:
17: #include "config.h"
18: #ifndef ICONV_CONST
19: # define ICONV_CONST
20: #endif
21:
22: #include <limits.h>
23: #include <stddef.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <iconv.h>
28: #include <errno.h>
29: #include <locale.h>
30: #include <fcntl.h>
31:
32: /* Ensure that iconv_no_i18n does not depend on libintl. */
33: #ifdef NO_I18N
34: # undef ENABLE_NLS
35: # undef ENABLE_RELOCATABLE
36: #endif
37:
38: #include "binary-io.h"
39: #include "progname.h"
40: #include "relocatable.h"
41: #include "xalloc.h"
42: #include "uniwidth.h"
43: #include "uniwidth/cjk.h"
44:
45: /* Ensure that iconv_no_i18n does not depend on libintl. */
46: #ifdef NO_I18N
47: #include <stdarg.h>
48: static void
49: error (int status, int errnum, const char *message, ...)
50: {
51: va_list args;
52:
53: fflush(stdout);
54: fprintf(stderr,"%s: ",program_name);
55: va_start(args,message);
56: vfprintf(stderr,message,args);
57: va_end(args);
58: if (errnum) {
59: const char *s = strerror(errnum);
60: if (s == NULL)
61: s = "Unknown system error";
62: }
63: putc('\n',stderr);
64: fflush(stderr);
65: if (status)
66: exit(status);
67: }
68: #else
69: # include "error.h"
70: #endif
71:
72: #include "gettext.h"
73:
74: #define _(str) gettext(str)
75:
76: /* Ensure that iconv_no_i18n does not depend on libintl. */
77: #ifdef NO_I18N
78: # define xmalloc malloc
79: # define xalloc_die abort
80: #endif
81:
82: /* Locale independent test for a decimal digit.
83: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
84: <ctype.h> isdigit must be an 'unsigned char'.) */
85: #undef isdigit
86: #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
87:
88: /* Locale independent test for a printable character.
89: Argument can be 'char' or 'unsigned char'. (Whereas the argument of
90: <ctype.h> isdigit must be an 'unsigned char'.) */
91: #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
92:
93: /* ========================================================================= */
94:
95: static int discard_unconvertible = 0;
96: static int silent = 0;
97:
98: static void usage (int exitcode)
99: {
100: if (exitcode != 0) {
101: const char* helpstring1 =
102: /* TRANSLATORS: The first line of the short usage message. */
103: _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
104: const char* helpstring2 =
105: /* TRANSLATORS: The second line of the short usage message.
106: Align it correctly against the first line. */
107: _("or: iconv -l");
108: fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
109: fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
110: } else {
111: /* xgettext: no-wrap */
112: /* TRANSLATORS: The first line of the long usage message.
113: The %s placeholder expands to the program name. */
114: printf(_("\
115: Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
116: program_name);
117: /* xgettext: no-wrap */
118: /* TRANSLATORS: The second line of the long usage message.
119: Align it correctly against the first line.
120: The %s placeholder expands to the program name. */
121: printf(_("\
122: or: %s -l\n"),
123: program_name);
124: printf("\n");
125: /* xgettext: no-wrap */
126: /* TRANSLATORS: Description of the iconv program. */
127: printf(_("\
128: Converts text from one encoding to another encoding.\n"));
129: printf("\n");
130: /* xgettext: no-wrap */
131: printf(_("\
132: Options controlling the input and output format:\n"));
133: /* xgettext: no-wrap */
134: printf(_("\
135: -f ENCODING, --from-code=ENCODING\n\
136: the encoding of the input\n"));
137: /* xgettext: no-wrap */
138: printf(_("\
139: -t ENCODING, --to-code=ENCODING\n\
140: the encoding of the output\n"));
141: printf("\n");
142: /* xgettext: no-wrap */
143: printf(_("\
144: Options controlling conversion problems:\n"));
145: /* xgettext: no-wrap */
146: printf(_("\
147: -c discard unconvertible characters\n"));
148: /* xgettext: no-wrap */
149: printf(_("\
150: --unicode-subst=FORMATSTRING\n\
151: substitution for unconvertible Unicode characters\n"));
152: /* xgettext: no-wrap */
153: printf(_("\
154: --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
155: /* xgettext: no-wrap */
156: printf(_("\
157: --widechar-subst=FORMATSTRING\n\
158: substitution for unconvertible wide characters\n"));
159: printf("\n");
160: /* xgettext: no-wrap */
161: printf(_("\
162: Options controlling error output:\n"));
163: /* xgettext: no-wrap */
164: printf(_("\
165: -s, --silent suppress error messages about conversion problems\n"));
166: printf("\n");
167: /* xgettext: no-wrap */
168: printf(_("\
169: Informative output:\n"));
170: /* xgettext: no-wrap */
171: printf(_("\
172: -l, --list list the supported encodings\n"));
173: /* xgettext: no-wrap */
174: printf(_("\
175: --help display this help and exit\n"));
176: /* xgettext: no-wrap */
177: printf(_("\
178: --version output version information and exit\n"));
179: printf("\n");
180: /* TRANSLATORS: The placeholder indicates the bug-reporting address
181: for this package. Please add _another line_ saying
182: "Report translation bugs to <...>\n" with the address for translation
183: bugs (typically your translation team's web or email address). */
184: fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
185: }
186: exit(exitcode);
187: }
188:
189: static void print_version (void)
190: {
191: printf("iconv (GNU libiconv %d.%d)\n",
192: _libiconv_version >> 8, _libiconv_version & 0xff);
193: printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2009");
194: /* xgettext: no-wrap */
195: fputs (_("\
196: License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
197: This is free software: you are free to change and redistribute it.\n\
198: There is NO WARRANTY, to the extent permitted by law.\n\
199: "),stdout);
200: /* TRANSLATORS: The %s placeholder expands to an author's name. */
201: printf(_("Written by %s.\n"),"Bruno Haible");
202: exit(EXIT_SUCCESS);
203: }
204:
205: static int print_one (unsigned int namescount, const char * const * names,
206: void* data)
207: {
208: unsigned int i;
209: (void)data;
210: for (i = 0; i < namescount; i++) {
211: if (i > 0)
212: putc(' ',stdout);
213: fputs(names[i],stdout);
214: }
215: putc('\n',stdout);
216: return 0;
217: }
218:
219: /* ========================================================================= */
220:
221: /* Line number and column position. */
222: static unsigned int line;
223: static unsigned int column;
224: static const char* cjkcode;
225: /* Update the line number and column position after a character was
226: successfully converted. */
227: static void update_line_column (unsigned int uc, void* data)
228: {
229: if (uc == 0x000A) {
230: line++;
231: column = 0;
232: } else {
233: int width = uc_width(uc, cjkcode);
234: if (width >= 0)
235: column += width;
236: else if (uc == 0x0009)
237: column += 8 - (column % 8);
238: }
239: }
240:
241: /* ========================================================================= */
242:
243: /* Production of placeholder strings as fallback for unconvertible
244: characters. */
245:
246: /* Check that the argument is a format string taking either no argument
247: or exactly one unsigned integer argument. Returns the maximum output
248: size of the format string. */
249: static size_t check_subst_formatstring (const char *format, const char *param_name)
250: {
251: /* C format strings are described in POSIX (IEEE P1003.1 2001), section
252: XSH 3 fprintf(). See also Linux fprintf(3) manual page.
253: For simplicity, we don't accept
254: - the '%m$' reordering syntax,
255: - the 'I' flag,
256: - width specifications referring to an argument,
257: - precision specifications referring to an argument,
258: - size specifiers,
259: - format specifiers other than 'o', 'u', 'x', 'X'.
260: What remains?
261: A directive
262: - starts with '%',
263: - is optionally followed by any of the characters '#', '0', '-', ' ',
264: '+', "'", each of which acts as a flag,
265: - is optionally followed by a width specification: a nonempty digit
266: sequence,
267: - is optionally followed by '.' and a precision specification: a
268: nonempty digit sequence,
269: - is finished by a specifier
270: - '%', that needs no argument,
271: - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
272: */
273: size_t maxsize = 0;
274: unsigned int unnumbered_arg_count = 0;
275:
276: for (; *format != '\0';) {
277: if (*format++ == '%') {
278: /* A directive. */
279: unsigned int width = 0;
280: unsigned int precision = 0;
281: unsigned int length;
282: /* Parse flags. */
283: for (;;) {
284: if (*format == ' ' || *format == '+' || *format == '-'
285: || *format == '#' || *format == '0' || *format == '\'')
286: format++;
287: else
288: break;
289: }
290: /* Parse width. */
291: if (*format == '*')
292: error(EXIT_FAILURE,0,
293: /* TRANSLATORS: An error message.
294: The %s placeholder expands to a command-line option. */
295: _("%s argument: A format directive with a variable width is not allowed here."),
296: param_name);
297: if (isdigit (*format)) {
298: do {
299: width = 10*width + (*format - '0');
300: format++;
301: } while (isdigit (*format));
302: }
303: /* Parse precision. */
304: if (*format == '.') {
305: format++;
306: if (*format == '*')
307: error(EXIT_FAILURE,0,
308: /* TRANSLATORS: An error message.
309: The %s placeholder expands to a command-line option. */
310: _("%s argument: A format directive with a variable precision is not allowed here."),
311: param_name);
312: if (isdigit (*format)) {
313: do {
314: precision = 10*precision + (*format - '0');
315: format++;
316: } while (isdigit (*format));
317: }
318: }
319: /* Parse size. */
320: switch (*format) {
321: case 'h': case 'l': case 'L': case 'q':
322: case 'j': case 'z': case 'Z': case 't':
323: error(EXIT_FAILURE,0,
324: /* TRANSLATORS: An error message.
325: The %s placeholder expands to a command-line option. */
326: _("%s argument: A format directive with a size is not allowed here."),
327: param_name);
328: }
329: /* Parse end of directive. */
330: switch (*format) {
331: case '%':
332: length = 1;
333: break;
334: case 'u': case 'o': case 'x': case 'X':
335: if (*format == 'u') {
336: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
337: * 0.30103 /* binary -> decimal */
338: )
339: + 1; /* turn floor into ceil */
340: if (length < precision)
341: length = precision;
342: length *= 2; /* estimate for FLAG_GROUP */
343: length += 1; /* account for leading sign */
344: } else if (*format == 'o') {
345: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
346: * 0.333334 /* binary -> octal */
347: )
348: + 1; /* turn floor into ceil */
349: if (length < precision)
350: length = precision;
351: length += 1; /* account for leading sign */
352: } else { /* 'x', 'X' */
353: length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
354: * 0.25 /* binary -> hexadecimal */
355: )
356: + 1; /* turn floor into ceil */
357: if (length < precision)
358: length = precision;
359: length += 2; /* account for leading sign or alternate form */
360: }
361: unnumbered_arg_count++;
362: break;
363: default:
364: if (*format == '\0')
365: error(EXIT_FAILURE,0,
366: /* TRANSLATORS: An error message.
367: The %s placeholder expands to a command-line option. */
368: _("%s argument: The string ends in the middle of a directive."),
369: param_name);
370: else if (c_isprint(*format))
371: error(EXIT_FAILURE,0,
372: /* TRANSLATORS: An error message.
373: The %s placeholder expands to a command-line option.
374: The %c placeholder expands to an unknown format directive. */
375: _("%s argument: The character '%c' is not a valid conversion specifier."),
376: param_name,*format);
377: else
378: error(EXIT_FAILURE,0,
379: /* TRANSLATORS: An error message.
380: The %s placeholder expands to a command-line option. */
381: _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
382: param_name);
383: abort(); /*NOTREACHED*/
384: }
385: format++;
386: if (length < width)
387: length = width;
388: maxsize += length;
389: } else
390: maxsize++;
391: }
392: if (unnumbered_arg_count > 1)
393: error(EXIT_FAILURE,0,
394: /* TRANSLATORS: An error message.
395: The %s placeholder expands to a command-line option.
396: The %u placeholder expands to the number of arguments consumed by the format string. */
397: ngettext("%s argument: The format string consumes more than one argument: %u argument.",
398: "%s argument: The format string consumes more than one argument: %u arguments.",
399: unnumbered_arg_count),
400: param_name,unnumbered_arg_count);
401: return maxsize;
402: }
403:
404: /* Format strings. */
405: static const char* ilseq_byte_subst;
406: static const char* ilseq_wchar_subst;
407: static const char* ilseq_unicode_subst;
408:
409: /* Maximum result size for each format string. */
410: static size_t ilseq_byte_subst_size;
411: static size_t ilseq_wchar_subst_size;
412: static size_t ilseq_unicode_subst_size;
413:
414: /* Buffer of size ilseq_byte_subst_size+1. */
415: static char* ilseq_byte_subst_buffer;
416: #if HAVE_WCHAR_T
417: /* Buffer of size ilseq_wchar_subst_size+1. */
418: static char* ilseq_wchar_subst_buffer;
419: #endif
420: /* Buffer of size ilseq_unicode_subst_size+1. */
421: static char* ilseq_unicode_subst_buffer;
422:
423: /* Auxiliary variables for subst_mb_to_uc_fallback. */
424: /* Converter from locale encoding to UCS-4. */
425: static iconv_t subst_mb_to_uc_cd;
426: /* Buffer of size ilseq_byte_subst_size. */
427: static unsigned int* subst_mb_to_uc_temp_buffer;
428:
429: static void subst_mb_to_uc_fallback
430: (const char* inbuf, size_t inbufsize,
431: void (*write_replacement) (const unsigned int *buf, size_t buflen,
432: void* callback_arg),
433: void* callback_arg,
434: void* data)
435: {
436: for (; inbufsize > 0; inbuf++, inbufsize--) {
437: const char* inptr;
438: size_t inbytesleft;
439: char* outptr;
440: size_t outbytesleft;
441: sprintf(ilseq_byte_subst_buffer,
442: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
443: inptr = ilseq_byte_subst_buffer;
444: inbytesleft = strlen(ilseq_byte_subst_buffer);
445: outptr = (char*)subst_mb_to_uc_temp_buffer;
446: outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
447: iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
448: if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
449: == (size_t)(-1)
450: || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
451: == (size_t)(-1))
452: error(EXIT_FAILURE,0,
453: /* TRANSLATORS: An error message.
454: The %s placeholder expands to a piece of text, specified through --byte-subst. */
455: _("cannot convert byte substitution to Unicode: %s"),
456: ilseq_byte_subst_buffer);
457: if (!(outbytesleft%sizeof(unsigned int) == 0))
458: abort();
459: write_replacement(subst_mb_to_uc_temp_buffer,
460: ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
461: callback_arg);
462: }
463: }
464:
465: /* Auxiliary variables for subst_uc_to_mb_fallback. */
466: /* Converter from locale encoding to target encoding. */
467: static iconv_t subst_uc_to_mb_cd;
468: /* Buffer of size ilseq_unicode_subst_size*4. */
469: static char* subst_uc_to_mb_temp_buffer;
470:
471: static void subst_uc_to_mb_fallback
472: (unsigned int code,
473: void (*write_replacement) (const char *buf, size_t buflen,
474: void* callback_arg),
475: void* callback_arg,
476: void* data)
477: {
478: const char* inptr;
479: size_t inbytesleft;
480: char* outptr;
481: size_t outbytesleft;
482: sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
483: inptr = ilseq_unicode_subst_buffer;
484: inbytesleft = strlen(ilseq_unicode_subst_buffer);
485: outptr = subst_uc_to_mb_temp_buffer;
486: outbytesleft = ilseq_unicode_subst_size*4;
487: iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
488: if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
489: == (size_t)(-1)
490: || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
491: == (size_t)(-1))
492: error(EXIT_FAILURE,0,
493: /* TRANSLATORS: An error message.
494: The %s placeholder expands to a piece of text, specified through --unicode-subst. */
495: _("cannot convert unicode substitution to target encoding: %s"),
496: ilseq_unicode_subst_buffer);
497: write_replacement(subst_uc_to_mb_temp_buffer,
498: ilseq_unicode_subst_size*4-outbytesleft,
499: callback_arg);
500: }
501:
502: #if HAVE_WCHAR_T
503:
504: /* Auxiliary variables for subst_mb_to_wc_fallback. */
505: /* Converter from locale encoding to wchar_t. */
506: static iconv_t subst_mb_to_wc_cd;
507: /* Buffer of size ilseq_byte_subst_size. */
508: static wchar_t* subst_mb_to_wc_temp_buffer;
509:
510: static void subst_mb_to_wc_fallback
511: (const char* inbuf, size_t inbufsize,
512: void (*write_replacement) (const wchar_t *buf, size_t buflen,
513: void* callback_arg),
514: void* callback_arg,
515: void* data)
516: {
517: for (; inbufsize > 0; inbuf++, inbufsize--) {
518: const char* inptr;
519: size_t inbytesleft;
520: char* outptr;
521: size_t outbytesleft;
522: sprintf(ilseq_byte_subst_buffer,
523: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
524: inptr = ilseq_byte_subst_buffer;
525: inbytesleft = strlen(ilseq_byte_subst_buffer);
526: outptr = (char*)subst_mb_to_wc_temp_buffer;
527: outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
528: iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
529: if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
530: == (size_t)(-1)
531: || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
532: == (size_t)(-1))
533: error(EXIT_FAILURE,0,
534: /* TRANSLATORS: An error message.
535: The %s placeholder expands to a piece of text, specified through --byte-subst. */
536: _("cannot convert byte substitution to wide string: %s"),
537: ilseq_byte_subst_buffer);
538: if (!(outbytesleft%sizeof(wchar_t) == 0))
539: abort();
540: write_replacement(subst_mb_to_wc_temp_buffer,
541: ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
542: callback_arg);
543: }
544: }
545:
546: /* Auxiliary variables for subst_wc_to_mb_fallback. */
547: /* Converter from locale encoding to target encoding. */
548: static iconv_t subst_wc_to_mb_cd;
549: /* Buffer of size ilseq_wchar_subst_size*4.
550: Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
551: static char* subst_wc_to_mb_temp_buffer;
552:
553: static void subst_wc_to_mb_fallback
554: (wchar_t code,
555: void (*write_replacement) (const char *buf, size_t buflen,
556: void* callback_arg),
557: void* callback_arg,
558: void* data)
559: {
560: const char* inptr;
561: size_t inbytesleft;
562: char* outptr;
563: size_t outbytesleft;
564: sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
565: inptr = ilseq_wchar_subst_buffer;
566: inbytesleft = strlen(ilseq_wchar_subst_buffer);
567: outptr = subst_wc_to_mb_temp_buffer;
568: outbytesleft = ilseq_wchar_subst_size*4;
569: iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
570: if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
571: == (size_t)(-1)
572: || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
573: == (size_t)(-1))
574: error(EXIT_FAILURE,0,
575: /* TRANSLATORS: An error message.
576: The %s placeholder expands to a piece of text, specified through --widechar-subst. */
577: _("cannot convert widechar substitution to target encoding: %s"),
578: ilseq_wchar_subst_buffer);
579: write_replacement(subst_wc_to_mb_temp_buffer,
580: ilseq_wchar_subst_size*4-outbytesleft,
581: callback_arg);
582: }
583:
584: #else
585:
586: #define subst_mb_to_wc_fallback NULL
587: #define subst_wc_to_mb_fallback NULL
588:
589: #endif
590:
591: /* Auxiliary variables for subst_mb_to_mb_fallback. */
592: /* Converter from locale encoding to target encoding. */
593: static iconv_t subst_mb_to_mb_cd;
594: /* Buffer of size ilseq_byte_subst_size*4. */
595: static char* subst_mb_to_mb_temp_buffer;
596:
597: static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
598: {
599: for (; inbufsize > 0; inbuf++, inbufsize--) {
600: const char* inptr;
601: size_t inbytesleft;
602: char* outptr;
603: size_t outbytesleft;
604: sprintf(ilseq_byte_subst_buffer,
605: ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
606: inptr = ilseq_byte_subst_buffer;
607: inbytesleft = strlen(ilseq_byte_subst_buffer);
608: outptr = subst_mb_to_mb_temp_buffer;
609: outbytesleft = ilseq_byte_subst_size*4;
610: iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
611: if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
612: == (size_t)(-1)
613: || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
614: == (size_t)(-1))
615: error(EXIT_FAILURE,0,
616: /* TRANSLATORS: An error message.
617: The %s placeholder expands to a piece of text, specified through --byte-subst. */
618: _("cannot convert byte substitution to target encoding: %s"),
619: ilseq_byte_subst_buffer);
620: fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
621: stdout);
622: }
623: }
624:
625: /* ========================================================================= */
626:
627: /* Error messages during conversion. */
628:
629: static void conversion_error_EILSEQ (const char* infilename)
630: {
631: fflush(stdout);
632: if (column > 0)
633: putc('\n',stderr);
634: error(0,0,
635: /* TRANSLATORS: An error message.
636: The placeholders expand to the input file name, a line number, and a column number. */
637: _("%s:%u:%u: cannot convert"),
638: infilename,line,column);
639: }
640:
641: static void conversion_error_EINVAL (const char* infilename)
642: {
643: fflush(stdout);
644: if (column > 0)
645: putc('\n',stderr);
646: error(0,0,
647: /* TRANSLATORS: An error message.
648: The placeholders expand to the input file name, a line number, and a column number.
649: A "shift sequence" is a sequence of bytes that changes the state of the converter;
650: this concept exists only for "stateful" encodings like ISO-2022-JP. */
651: _("%s:%u:%u: incomplete character or shift sequence"),
652: infilename,line,column);
653: }
654:
655: static void conversion_error_other (int errnum, const char* infilename)
656: {
657: fflush(stdout);
658: if (column > 0)
659: putc('\n',stderr);
660: error(0,errnum,
661: /* TRANSLATORS: The first part of an error message.
662: It is followed by a colon and a detail message.
663: The placeholders expand to the input file name, a line number, and a column number. */
664: _("%s:%u:%u"),
665: infilename,line,column);
666: }
667:
668: /* Convert the input given in infile. */
669:
670: static int convert (iconv_t cd, FILE* infile, const char* infilename)
671: {
672: char inbuf[4096+4096];
673: size_t inbufrest = 0;
674: char initial_outbuf[4096];
675: char *outbuf = initial_outbuf;
676: size_t outbufsize = sizeof(initial_outbuf);
677: int status = 0;
678:
679: #if O_BINARY
680: SET_BINARY(fileno(infile));
681: #endif
682: line = 1; column = 0;
683: iconv(cd,NULL,NULL,NULL,NULL);
684: for (;;) {
685: size_t inbufsize = fread(inbuf+4096,1,4096,infile);
686: if (inbufsize == 0) {
687: if (inbufrest == 0)
688: break;
689: else {
690: if (ilseq_byte_subst != NULL)
691: subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
692: if (!silent)
693: conversion_error_EINVAL(infilename);
694: status = 1;
695: goto done;
696: }
697: } else {
698: const char* inptr = inbuf+4096-inbufrest;
699: size_t insize = inbufrest+inbufsize;
700: inbufrest = 0;
701: while (insize > 0) {
702: char* outptr = outbuf;
703: size_t outsize = outbufsize;
704: size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
705: if (outptr != outbuf) {
706: int saved_errno = errno;
707: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
708: status = 1;
709: goto done;
710: }
711: errno = saved_errno;
712: }
713: if (res == (size_t)(-1)) {
714: if (errno == EILSEQ) {
715: if (discard_unconvertible == 1) {
716: int one = 1;
717: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
718: discard_unconvertible = 2;
719: status = 1;
720: } else {
721: if (!silent)
722: conversion_error_EILSEQ(infilename);
723: status = 1;
724: goto done;
725: }
726: } else if (errno == EINVAL) {
727: if (inbufsize == 0 || insize > 4096) {
728: if (!silent)
729: conversion_error_EINVAL(infilename);
730: status = 1;
731: goto done;
732: } else {
733: inbufrest = insize;
734: if (insize > 0) {
735: /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
736: we cannot use memcpy here, because source and destination
737: regions may overlap. */
738: char* restptr = inbuf+4096-insize;
739: do { *restptr++ = *inptr++; } while (--insize > 0);
740: }
741: break;
742: }
743: } else if (errno == E2BIG) {
744: if (outptr==outbuf) {
745: /* outbuf is too small. Double its size. */
746: if (outbuf != initial_outbuf)
747: free(outbuf);
748: outbufsize = 2*outbufsize;
749: if (outbufsize==0) /* integer overflow? */
750: xalloc_die();
751: outbuf = (char*)xmalloc(outbufsize);
752: }
753: } else {
754: if (!silent)
755: conversion_error_other(errno,infilename);
756: status = 1;
757: goto done;
758: }
759: }
760: }
761: }
762: }
763: for (;;) {
764: char* outptr = outbuf;
765: size_t outsize = outbufsize;
766: size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
767: if (outptr != outbuf) {
768: int saved_errno = errno;
769: if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
770: status = 1;
771: goto done;
772: }
773: errno = saved_errno;
774: }
775: if (res == (size_t)(-1)) {
776: if (errno == EILSEQ) {
777: if (discard_unconvertible == 1) {
778: int one = 1;
779: iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
780: discard_unconvertible = 2;
781: status = 1;
782: } else {
783: if (!silent)
784: conversion_error_EILSEQ(infilename);
785: status = 1;
786: goto done;
787: }
788: } else if (errno == EINVAL) {
789: if (!silent)
790: conversion_error_EINVAL(infilename);
791: status = 1;
792: goto done;
793: } else if (errno == E2BIG) {
794: if (outptr==outbuf) {
795: /* outbuf is too small. Double its size. */
796: if (outbuf != initial_outbuf)
797: free(outbuf);
798: outbufsize = 2*outbufsize;
799: if (outbufsize==0) /* integer overflow? */
800: xalloc_die();
801: outbuf = (char*)xmalloc(outbufsize);
802: }
803: } else {
804: if (!silent)
805: conversion_error_other(errno,infilename);
806: status = 1;
807: goto done;
808: }
809: } else
810: break;
811: }
812: if (ferror(infile)) {
813: fflush(stdout);
814: if (column > 0)
815: putc('\n',stderr);
816: error(0,0,
817: /* TRANSLATORS: An error message.
818: The placeholder expands to the input file name. */
819: _("%s: I/O error"),
820: infilename);
821: status = 1;
822: goto done;
823: }
824: done:
825: if (outbuf != initial_outbuf)
826: free(outbuf);
827: return status;
828: }
829:
830: /* ========================================================================= */
831:
832: int main (int argc, char* argv[])
833: {
834: const char* fromcode = NULL;
835: const char* tocode = NULL;
836: int do_list = 0;
837: iconv_t cd;
838: struct iconv_fallbacks fallbacks;
839: struct iconv_hooks hooks;
840: int i;
841: int status;
842:
843: set_program_name (argv[0]);
844: #if HAVE_SETLOCALE
845: /* Needed for the locale dependent encodings, "char" and "wchar_t",
846: and for gettext. */
847: setlocale(LC_CTYPE,"");
848: #if ENABLE_NLS
849: /* Needed for gettext. */
850: setlocale(LC_MESSAGES,"");
851: #endif
852: #endif
853: #if ENABLE_NLS
854: bindtextdomain("libiconv",relocate(LOCALEDIR));
855: #endif
856: textdomain("libiconv");
857: for (i = 1; i < argc;) {
858: size_t len = strlen(argv[i]);
859: if (!strcmp(argv[i],"--")) {
860: i++;
861: break;
862: }
863: if (!strcmp(argv[i],"-f")
864: /* --f ... --from-code */
865: || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
866: /* --from-code=... */
867: || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
868: if (len < 12)
869: if (i == argc-1) usage(1);
870: if (fromcode != NULL) usage(1);
871: if (len < 12) {
872: fromcode = argv[i+1];
873: i += 2;
874: } else {
875: fromcode = argv[i]+12;
876: i++;
877: }
878: continue;
879: }
880: if (!strcmp(argv[i],"-t")
881: /* --t ... --to-code */
882: || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
883: /* --from-code=... */
884: || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
885: if (len < 10)
886: if (i == argc-1) usage(1);
887: if (tocode != NULL) usage(1);
888: if (len < 10) {
889: tocode = argv[i+1];
890: i += 2;
891: } else {
892: tocode = argv[i]+10;
893: i++;
894: }
895: continue;
896: }
897: if (!strcmp(argv[i],"-l")
898: /* --l ... --list */
899: || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
900: do_list = 1;
901: i++;
902: continue;
903: }
904: if (/* --by ... --byte-subst */
905: (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
906: /* --byte-subst=... */
907: || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
908: if (len < 13) {
909: if (i == argc-1) usage(1);
910: ilseq_byte_subst = argv[i+1];
911: i += 2;
912: } else {
913: ilseq_byte_subst = argv[i]+13;
914: i++;
915: }
916: ilseq_byte_subst_size =
917: check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
918: continue;
919: }
920: if (/* --w ... --widechar-subst */
921: (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
922: /* --widechar-subst=... */
923: || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
924: if (len < 17) {
925: if (i == argc-1) usage(1);
926: ilseq_wchar_subst = argv[i+1];
927: i += 2;
928: } else {
929: ilseq_wchar_subst = argv[i]+17;
930: i++;
931: }
932: ilseq_wchar_subst_size =
933: check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
934: continue;
935: }
936: if (/* --u ... --unicode-subst */
937: (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
938: /* --unicode-subst=... */
939: || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
940: if (len < 16) {
941: if (i == argc-1) usage(1);
942: ilseq_unicode_subst = argv[i+1];
943: i += 2;
944: } else {
945: ilseq_unicode_subst = argv[i]+16;
946: i++;
947: }
948: ilseq_unicode_subst_size =
949: check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
950: continue;
951: }
952: if /* --s ... --silent */
953: (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
954: silent = 1;
955: continue;
956: }
957: if /* --h ... --help */
958: (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
959: usage(0);
960: }
961: if /* --v ... --version */
962: (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
963: print_version();
964: }
965: #if O_BINARY
966: /* Backward compatibility with iconv <= 1.9.1. */
967: if /* --bi ... --binary */
968: (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
969: i++;
970: continue;
971: }
972: #endif
973: if (argv[i][0] == '-') {
974: const char *option = argv[i] + 1;
975: if (*option == '\0')
976: usage(1);
977: for (; *option; option++)
978: switch (*option) {
979: case 'c': discard_unconvertible = 1; break;
980: case 's': silent = 1; break;
981: default: usage(1);
982: }
983: i++;
984: continue;
985: }
986: break;
987: }
988: if (do_list) {
989: if (i != 2 || i != argc)
990: usage(1);
991: iconvlist(print_one,NULL);
992: status = 0;
993: } else {
994: #if O_BINARY
995: SET_BINARY(fileno(stdout));
996: #endif
997: if (fromcode == NULL)
998: fromcode = "char";
999: if (tocode == NULL)
1000: tocode = "char";
1001: cd = iconv_open(tocode,fromcode);
1002: if (cd == (iconv_t)(-1)) {
1003: if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
1004: error(0,0,
1005: /* TRANSLATORS: An error message.
1006: The placeholder expands to the encoding name, specified through --from-code. */
1007: _("conversion from %s unsupported"),
1008: fromcode);
1009: else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
1010: error(0,0,
1011: /* TRANSLATORS: An error message.
1012: The placeholder expands to the encoding name, specified through --to-code. */
1013: _("conversion to %s unsupported"),
1014: tocode);
1015: else
1016: error(0,0,
1017: /* TRANSLATORS: An error message.
1018: The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
1019: _("conversion from %s to %s unsupported"),
1020: fromcode,tocode);
1021: error(EXIT_FAILURE,0,
1022: /* TRANSLATORS: Additional advice after an error message.
1023: The %s placeholder expands to the program name. */
1024: _("try '%s -l' to get the list of supported encodings"),
1025: program_name);
1026: }
1027: /* Look at fromcode and tocode, to determine whether character widths
1028: should be determined according to legacy CJK conventions. */
1029: cjkcode = iconv_canonicalize(tocode);
1030: if (!is_cjk_encoding(cjkcode))
1031: cjkcode = iconv_canonicalize(fromcode);
1032: /* Set up fallback routines for handling impossible conversions. */
1033: if (ilseq_byte_subst != NULL)
1034: ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
1035: if (!discard_unconvertible) {
1036: #if HAVE_WCHAR_T
1037: if (ilseq_wchar_subst != NULL)
1038: ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
1039: #endif
1040: if (ilseq_unicode_subst != NULL)
1041: ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
1042: if (ilseq_byte_subst != NULL) {
1043: subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
1044: subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
1045: #if HAVE_WCHAR_T
1046: subst_mb_to_wc_cd = iconv_open("wchar_t","char");
1047: subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
1048: #endif
1049: subst_mb_to_mb_cd = iconv_open(tocode,"char");
1050: subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
1051: }
1052: #if HAVE_WCHAR_T
1053: if (ilseq_wchar_subst != NULL) {
1054: subst_wc_to_mb_cd = iconv_open(tocode,"char");
1055: subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
1056: }
1057: #endif
1058: if (ilseq_unicode_subst != NULL) {
1059: subst_uc_to_mb_cd = iconv_open(tocode,"char");
1060: subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
1061: }
1062: fallbacks.mb_to_uc_fallback =
1063: (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
1064: fallbacks.uc_to_mb_fallback =
1065: (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
1066: fallbacks.mb_to_wc_fallback =
1067: (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
1068: fallbacks.wc_to_mb_fallback =
1069: (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
1070: fallbacks.data = NULL;
1071: iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
1072: }
1073: /* Set up hooks for updating the line and column position. */
1074: hooks.uc_hook = update_line_column;
1075: hooks.wc_hook = NULL;
1076: hooks.data = NULL;
1077: iconvctl(cd, ICONV_SET_HOOKS, &hooks);
1078: if (i == argc)
1079: status = convert(cd,stdin,
1080: /* TRANSLATORS: A filename substitute denoting standard input. */
1081: _("(stdin)"));
1082: else {
1083: status = 0;
1084: for (; i < argc; i++) {
1085: const char* infilename = argv[i];
1086: FILE* infile = fopen(infilename,"r");
1087: if (infile == NULL) {
1088: int saved_errno = errno;
1089: error(0,saved_errno,
1090: /* TRANSLATORS: The first part of an error message.
1091: It is followed by a colon and a detail message.
1092: The %s placeholder expands to the input file name. */
1093: _("%s"),
1094: infilename);
1095: status = 1;
1096: } else {
1097: status |= convert(cd,infile,infilename);
1098: fclose(infile);
1099: }
1100: }
1101: }
1102: iconv_close(cd);
1103: }
1104: if (ferror(stdout) || fclose(stdout)) {
1105: error(0,0,
1106: /* TRANSLATORS: An error message. */
1107: _("I/O error"));
1108: status = 1;
1109: }
1110: exit(status);
1111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>