File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / src / iconv.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /* Copyright (C) 2000-2009, 2011-2012, 2016-2019 Free Software Foundation, Inc.
    2:    This file is part of the GNU LIBICONV Library.
    3: 
    4:    This program is free software: you can redistribute it and/or modify
    5:    it under the terms of the GNU General Public License as published by
    6:    the Free Software Foundation; either version 3 of the License, or
    7:    (at your option) any later version.
    8: 
    9:    This program is distributed in the hope that it will be useful,
   10:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12:    GNU General Public License for more details.
   13: 
   14:    You should have received a copy of the GNU General Public License
   15:    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
   16: 
   17: #include "config.h"
   18: #ifndef ICONV_CONST
   19: # define ICONV_CONST
   20: #endif
   21: 
   22: #include <limits.h>
   23: #include <stddef.h>
   24: #include <stdio.h>
   25: #include <stdlib.h>
   26: #include <string.h>
   27: #include <iconv.h>
   28: #include <errno.h>
   29: #include <locale.h>
   30: #include <fcntl.h>
   31: 
   32: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   33: #ifdef NO_I18N
   34: # undef ENABLE_NLS
   35: # undef ENABLE_RELOCATABLE
   36: #endif
   37: 
   38: #include "binary-io.h"
   39: #include "progname.h"
   40: #include "relocatable.h"
   41: #include "safe-read.h"
   42: #include "xalloc.h"
   43: #include "uniwidth.h"
   44: #include "uniwidth/cjk.h"
   45: 
   46: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   47: #ifdef NO_I18N
   48: #include <stdarg.h>
   49: static void
   50: error (int status, int errnum, const char *message, ...)
   51: {
   52:   va_list args;
   53: 
   54:   fflush(stdout);
   55:   fprintf(stderr,"%s: ",program_name);
   56:   va_start(args,message);
   57:   vfprintf(stderr,message,args);
   58:   va_end(args);
   59:   if (errnum) {
   60:     const char *s = strerror(errnum);
   61:     if (s == NULL)
   62:       s = "Unknown system error";
   63:   }
   64:   putc('\n',stderr);
   65:   fflush(stderr);
   66:   if (status)
   67:     exit(status);
   68: }
   69: #else
   70: # include "error.h"
   71: #endif
   72: 
   73: #include "gettext.h"
   74: 
   75: #define _(str) gettext(str)
   76: 
   77: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   78: #ifdef NO_I18N
   79: # define xmalloc malloc
   80: # define xalloc_die abort
   81: #endif
   82: 
   83: /* Locale independent test for a decimal digit.
   84:    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
   85:    <ctype.h> isdigit must be an 'unsigned char'.)  */
   86: #undef isdigit
   87: #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
   88: 
   89: /* Locale independent test for a printable character.
   90:    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
   91:    <ctype.h> isdigit must be an 'unsigned char'.)  */
   92: #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
   93: 
   94: /* ========================================================================= */
   95: 
   96: static int discard_unconvertible = 0;
   97: static int silent = 0;
   98: 
   99: static void usage (int exitcode)
  100: {
  101:   if (exitcode != 0) {
  102:     const char* helpstring1 =
  103:       /* TRANSLATORS: The first line of the short usage message.  */
  104:       _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
  105:     const char* helpstring2 =
  106:       /* TRANSLATORS: The second line of the short usage message.
  107:          Align it correctly against the first line.  */
  108:       _("or:    iconv -l");
  109:     fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
  110:     fprintf(stderr, _("Try '%s --help' for more information.\n"), program_name);
  111:   } else {
  112:     /* xgettext: no-wrap */
  113:     /* TRANSLATORS: The first line of the long usage message.
  114:        The %s placeholder expands to the program name.  */
  115:     printf(_("\
  116: Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
  117:            program_name);
  118:     /* xgettext: no-wrap */
  119:     /* TRANSLATORS: The second line of the long usage message.
  120:        Align it correctly against the first line.
  121:        The %s placeholder expands to the program name.  */
  122:     printf(_("\
  123: or:    %s -l\n"),
  124:            program_name);
  125:     printf("\n");
  126:     /* xgettext: no-wrap */
  127:     /* TRANSLATORS: Description of the iconv program.  */
  128:     printf(_("\
  129: Converts text from one encoding to another encoding.\n"));
  130:     printf("\n");
  131:     /* xgettext: no-wrap */
  132:     printf(_("\
  133: Options controlling the input and output format:\n"));
  134:     /* xgettext: no-wrap */
  135:     printf(_("\
  136:   -f ENCODING, --from-code=ENCODING\n\
  137:                               the encoding of the input\n"));
  138:     /* xgettext: no-wrap */
  139:     printf(_("\
  140:   -t ENCODING, --to-code=ENCODING\n\
  141:                               the encoding of the output\n"));
  142:     printf("\n");
  143:     /* xgettext: no-wrap */
  144:     printf(_("\
  145: Options controlling conversion problems:\n"));
  146:     /* xgettext: no-wrap */
  147:     printf(_("\
  148:   -c                          discard unconvertible characters\n"));
  149:     /* xgettext: no-wrap */
  150:     printf(_("\
  151:   --unicode-subst=FORMATSTRING\n\
  152:                               substitution for unconvertible Unicode characters\n"));
  153:     /* xgettext: no-wrap */
  154:     printf(_("\
  155:   --byte-subst=FORMATSTRING   substitution for unconvertible bytes\n"));
  156:     /* xgettext: no-wrap */
  157:     printf(_("\
  158:   --widechar-subst=FORMATSTRING\n\
  159:                               substitution for unconvertible wide characters\n"));
  160:     printf("\n");
  161:     /* xgettext: no-wrap */
  162:     printf(_("\
  163: Options controlling error output:\n"));
  164:     /* xgettext: no-wrap */
  165:     printf(_("\
  166:   -s, --silent                suppress error messages about conversion problems\n"));
  167:     printf("\n");
  168:     /* xgettext: no-wrap */
  169:     printf(_("\
  170: Informative output:\n"));
  171:     /* xgettext: no-wrap */
  172:     printf(_("\
  173:   -l, --list                  list the supported encodings\n"));
  174:     /* xgettext: no-wrap */
  175:     printf(_("\
  176:   --help                      display this help and exit\n"));
  177:     /* xgettext: no-wrap */
  178:     printf(_("\
  179:   --version                   output version information and exit\n"));
  180:     printf("\n");
  181:     /* TRANSLATORS: The placeholder indicates the bug-reporting address
  182:        for this package.  Please add _another line_ saying
  183:        "Report translation bugs to <...>\n" with the address for translation
  184:        bugs (typically your translation team's web or email address).  */
  185:     fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
  186:   }
  187:   exit(exitcode);
  188: }
  189: 
  190: static void print_version (void)
  191: {
  192:   printf("iconv (GNU libiconv %d.%d)\n",
  193:          _libiconv_version >> 8, _libiconv_version & 0xff);
  194:   printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2019");
  195:   /* xgettext: no-wrap */
  196:   fputs (_("\
  197: License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>\n\
  198: This is free software: you are free to change and redistribute it.\n\
  199: There is NO WARRANTY, to the extent permitted by law.\n\
  200: "),stdout);
  201:   /* TRANSLATORS: The %s placeholder expands to an author's name.  */
  202:   printf(_("Written by %s.\n"),"Bruno Haible");
  203:   exit(EXIT_SUCCESS);
  204: }
  205: 
  206: static int print_one (unsigned int namescount, const char * const * names,
  207:                       void* data)
  208: {
  209:   unsigned int i;
  210:   (void)data;
  211:   for (i = 0; i < namescount; i++) {
  212:     if (i > 0)
  213:       putc(' ',stdout);
  214:     fputs(names[i],stdout);
  215:   }
  216:   putc('\n',stdout);
  217:   return 0;
  218: }
  219: 
  220: /* ========================================================================= */
  221: 
  222: /* Line number and column position. */
  223: static unsigned int line;
  224: static unsigned int column;
  225: static const char* cjkcode;
  226: /* Update the line number and column position after a character was
  227:    successfully converted. */
  228: static void update_line_column (unsigned int uc, void* data)
  229: {
  230:   if (uc == 0x000A) {
  231:     line++;
  232:     column = 0;
  233:   } else {
  234:     int width = uc_width(uc, cjkcode);
  235:     if (width >= 0)
  236:       column += width;
  237:     else if (uc == 0x0009)
  238:       column += 8 - (column % 8);
  239:   }
  240: }
  241: 
  242: /* ========================================================================= */
  243: 
  244: /* Production of placeholder strings as fallback for unconvertible
  245:    characters. */
  246: 
  247: /* Check that the argument is a format string taking either no argument
  248:    or exactly one unsigned integer argument. Returns the maximum output
  249:    size of the format string. */
  250: static size_t check_subst_formatstring (const char *format, const char *param_name)
  251: {
  252:   /* C format strings are described in POSIX (IEEE P1003.1 2001), section
  253:      XSH 3 fprintf().  See also Linux fprintf(3) manual page.
  254:      For simplicity, we don't accept
  255:        - the '%m$' reordering syntax,
  256:        - the 'I' flag,
  257:        - width specifications referring to an argument,
  258:        - precision specifications referring to an argument,
  259:        - size specifiers,
  260:        - format specifiers other than 'o', 'u', 'x', 'X'.
  261:      What remains?
  262:      A directive
  263:        - starts with '%',
  264:        - is optionally followed by any of the characters '#', '0', '-', ' ',
  265:          '+', "'", each of which acts as a flag,
  266:        - is optionally followed by a width specification: a nonempty digit
  267:          sequence,
  268:        - is optionally followed by '.' and a precision specification: a
  269:          nonempty digit sequence,
  270:        - is finished by a specifier
  271:          - '%', that needs no argument,
  272:          - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
  273:    */
  274:   size_t maxsize = 0;
  275:   unsigned int unnumbered_arg_count = 0;
  276: 
  277:   for (; *format != '\0';) {
  278:     if (*format++ == '%') {
  279:       /* A directive. */
  280:       unsigned int width = 0;
  281:       unsigned int precision = 0;
  282:       unsigned int length;
  283:       /* Parse flags. */
  284:       for (;;) {
  285:         if (*format == ' ' || *format == '+' || *format == '-'
  286:             || *format == '#' || *format == '0' || *format == '\'')
  287:           format++;
  288:         else
  289:           break;
  290:       }
  291:       /* Parse width. */
  292:       if (*format == '*')
  293:         error(EXIT_FAILURE,0,
  294:               /* TRANSLATORS: An error message.
  295:                  The %s placeholder expands to a command-line option.  */
  296:               _("%s argument: A format directive with a variable width is not allowed here."),
  297:               param_name);
  298:       if (isdigit (*format)) {
  299:         do {
  300:           width = 10*width + (*format - '0');
  301:           format++;
  302:         } while (isdigit (*format));
  303:       }
  304:       /* Parse precision. */
  305:       if (*format == '.') {
  306:         format++;
  307:         if (*format == '*')
  308:           error(EXIT_FAILURE,0,
  309:                 /* TRANSLATORS: An error message.
  310:                    The %s placeholder expands to a command-line option.  */
  311:                 _("%s argument: A format directive with a variable precision is not allowed here."),
  312:                 param_name);
  313:         if (isdigit (*format)) {
  314:           do {
  315:             precision = 10*precision + (*format - '0');
  316:             format++;
  317:           } while (isdigit (*format));
  318:         }
  319:       }
  320:       /* Parse size. */
  321:       switch (*format) {
  322:         case 'h': case 'l': case 'L': case 'q':
  323:         case 'j': case 'z': case 'Z': case 't':
  324:           error(EXIT_FAILURE,0,
  325:                 /* TRANSLATORS: An error message.
  326:                    The %s placeholder expands to a command-line option.  */
  327:                 _("%s argument: A format directive with a size is not allowed here."),
  328:                 param_name);
  329:       }
  330:       /* Parse end of directive. */
  331:       switch (*format) {
  332:         case '%':
  333:           length = 1;
  334:           break;
  335:         case 'u': case 'o': case 'x': case 'X':
  336:           if (*format == 'u') {
  337:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  338:                                      * 0.30103 /* binary -> decimal */
  339:                                     )
  340:                      + 1; /* turn floor into ceil */
  341:             if (length < precision)
  342:               length = precision;
  343:             length *= 2; /* estimate for FLAG_GROUP */
  344:             length += 1; /* account for leading sign */
  345:           } else if (*format == 'o') {
  346:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  347:                                      * 0.333334 /* binary -> octal */
  348:                                     )
  349:                      + 1; /* turn floor into ceil */
  350:             if (length < precision)
  351:               length = precision;
  352:             length += 1; /* account for leading sign */
  353:           } else { /* 'x', 'X' */
  354:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  355:                                      * 0.25 /* binary -> hexadecimal */
  356:                                     )
  357:                      + 1; /* turn floor into ceil */
  358:             if (length < precision)
  359:               length = precision;
  360:             length += 2; /* account for leading sign or alternate form */
  361:           }
  362:           unnumbered_arg_count++;
  363:           break;
  364:         default:
  365:           if (*format == '\0')
  366:             error(EXIT_FAILURE,0,
  367:                   /* TRANSLATORS: An error message.
  368:                      The %s placeholder expands to a command-line option.  */
  369:                   _("%s argument: The string ends in the middle of a directive."),
  370:                   param_name);
  371:           else if (c_isprint(*format))
  372:             error(EXIT_FAILURE,0,
  373:                   /* TRANSLATORS: An error message.
  374:                      The %s placeholder expands to a command-line option.
  375:                      The %c placeholder expands to an unknown format directive.  */
  376:                   _("%s argument: The character '%c' is not a valid conversion specifier."),
  377:                   param_name,*format);
  378:           else
  379:             error(EXIT_FAILURE,0,
  380:                   /* TRANSLATORS: An error message.
  381:                      The %s placeholder expands to a command-line option.  */
  382:                   _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
  383:                   param_name);
  384:           abort(); /*NOTREACHED*/
  385:       }
  386:       format++;
  387:       if (length < width)
  388:         length = width;
  389:       maxsize += length;
  390:     } else
  391:       maxsize++;
  392:   }
  393:   if (unnumbered_arg_count > 1)
  394:     error(EXIT_FAILURE,0,
  395:           /* TRANSLATORS: An error message.
  396:              The %s placeholder expands to a command-line option.
  397:              The %u placeholder expands to the number of arguments consumed by the format string.  */
  398:           ngettext("%s argument: The format string consumes more than one argument: %u argument.",
  399:                    "%s argument: The format string consumes more than one argument: %u arguments.",
  400:                    unnumbered_arg_count),
  401:           param_name,unnumbered_arg_count);
  402:   return maxsize;
  403: }
  404: 
  405: /* Format strings. */
  406: static const char* ilseq_byte_subst;
  407: static const char* ilseq_wchar_subst;
  408: static const char* ilseq_unicode_subst;
  409: 
  410: /* Maximum result size for each format string. */
  411: static size_t ilseq_byte_subst_size;
  412: static size_t ilseq_wchar_subst_size;
  413: static size_t ilseq_unicode_subst_size;
  414: 
  415: /* Buffer of size ilseq_byte_subst_size+1. */
  416: static char* ilseq_byte_subst_buffer;
  417: #if HAVE_WCHAR_T
  418: /* Buffer of size ilseq_wchar_subst_size+1. */
  419: static char* ilseq_wchar_subst_buffer;
  420: #endif
  421: /* Buffer of size ilseq_unicode_subst_size+1. */
  422: static char* ilseq_unicode_subst_buffer;
  423: 
  424: /* Auxiliary variables for subst_mb_to_uc_fallback. */
  425: /* Converter from locale encoding to UCS-4. */
  426: static iconv_t subst_mb_to_uc_cd;
  427: /* Buffer of size ilseq_byte_subst_size. */
  428: static unsigned int* subst_mb_to_uc_temp_buffer;
  429: 
  430: static void subst_mb_to_uc_fallback
  431:             (const char* inbuf, size_t inbufsize,
  432:              void (*write_replacement) (const unsigned int *buf, size_t buflen,
  433:                                         void* callback_arg),
  434:              void* callback_arg,
  435:              void* data)
  436: {
  437:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  438:     const char* inptr;
  439:     size_t inbytesleft;
  440:     char* outptr;
  441:     size_t outbytesleft;
  442:     sprintf(ilseq_byte_subst_buffer,
  443:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  444:     inptr = ilseq_byte_subst_buffer;
  445:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  446:     outptr = (char*)subst_mb_to_uc_temp_buffer;
  447:     outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
  448:     iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
  449:     if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  450:         == (size_t)(-1)
  451:         || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
  452:            == (size_t)(-1))
  453:       error(EXIT_FAILURE,0,
  454:             /* TRANSLATORS: An error message.
  455:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  456:             _("cannot convert byte substitution to Unicode: %s"),
  457:             ilseq_byte_subst_buffer);
  458:     if (!(outbytesleft%sizeof(unsigned int) == 0))
  459:       abort();
  460:     write_replacement(subst_mb_to_uc_temp_buffer,
  461:                       ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
  462:                       callback_arg);
  463:   }
  464: }
  465: 
  466: /* Auxiliary variables for subst_uc_to_mb_fallback. */
  467: /* Converter from locale encoding to target encoding. */
  468: static iconv_t subst_uc_to_mb_cd;
  469: /* Buffer of size ilseq_unicode_subst_size*4. */
  470: static char* subst_uc_to_mb_temp_buffer;
  471: 
  472: static void subst_uc_to_mb_fallback
  473:             (unsigned int code,
  474:              void (*write_replacement) (const char *buf, size_t buflen,
  475:                                         void* callback_arg),
  476:              void* callback_arg,
  477:              void* data)
  478: {
  479:   const char* inptr;
  480:   size_t inbytesleft;
  481:   char* outptr;
  482:   size_t outbytesleft;
  483:   sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
  484:   inptr = ilseq_unicode_subst_buffer;
  485:   inbytesleft = strlen(ilseq_unicode_subst_buffer);
  486:   outptr = subst_uc_to_mb_temp_buffer;
  487:   outbytesleft = ilseq_unicode_subst_size*4;
  488:   iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
  489:   if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  490:       == (size_t)(-1)
  491:       || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  492:          == (size_t)(-1))
  493:     error(EXIT_FAILURE,0,
  494:           /* TRANSLATORS: An error message.
  495:              The %s placeholder expands to a piece of text, specified through --unicode-subst.  */
  496:           _("cannot convert unicode substitution to target encoding: %s"),
  497:           ilseq_unicode_subst_buffer);
  498:   write_replacement(subst_uc_to_mb_temp_buffer,
  499:                     ilseq_unicode_subst_size*4-outbytesleft,
  500:                     callback_arg);
  501: }
  502: 
  503: #if HAVE_WCHAR_T
  504: 
  505: /* Auxiliary variables for subst_mb_to_wc_fallback. */
  506: /* Converter from locale encoding to wchar_t. */
  507: static iconv_t subst_mb_to_wc_cd;
  508: /* Buffer of size ilseq_byte_subst_size. */
  509: static wchar_t* subst_mb_to_wc_temp_buffer;
  510: 
  511: static void subst_mb_to_wc_fallback
  512:             (const char* inbuf, size_t inbufsize,
  513:              void (*write_replacement) (const wchar_t *buf, size_t buflen,
  514:                                         void* callback_arg),
  515:              void* callback_arg,
  516:              void* data)
  517: {
  518:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  519:     const char* inptr;
  520:     size_t inbytesleft;
  521:     char* outptr;
  522:     size_t outbytesleft;
  523:     sprintf(ilseq_byte_subst_buffer,
  524:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  525:     inptr = ilseq_byte_subst_buffer;
  526:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  527:     outptr = (char*)subst_mb_to_wc_temp_buffer;
  528:     outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
  529:     iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
  530:     if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  531:         == (size_t)(-1)
  532:         || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
  533:            == (size_t)(-1))
  534:       error(EXIT_FAILURE,0,
  535:             /* TRANSLATORS: An error message.
  536:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  537:             _("cannot convert byte substitution to wide string: %s"),
  538:             ilseq_byte_subst_buffer);
  539:     if (!(outbytesleft%sizeof(wchar_t) == 0))
  540:       abort();
  541:     write_replacement(subst_mb_to_wc_temp_buffer,
  542:                       ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
  543:                       callback_arg);
  544:   }
  545: }
  546: 
  547: /* Auxiliary variables for subst_wc_to_mb_fallback. */
  548: /* Converter from locale encoding to target encoding. */
  549: static iconv_t subst_wc_to_mb_cd;
  550: /* Buffer of size ilseq_wchar_subst_size*4.
  551:    Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
  552: static char* subst_wc_to_mb_temp_buffer;
  553: 
  554: static void subst_wc_to_mb_fallback
  555:             (wchar_t code,
  556:              void (*write_replacement) (const char *buf, size_t buflen,
  557:                                         void* callback_arg),
  558:              void* callback_arg,
  559:              void* data)
  560: {
  561:   const char* inptr;
  562:   size_t inbytesleft;
  563:   char* outptr;
  564:   size_t outbytesleft;
  565:   sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
  566:   inptr = ilseq_wchar_subst_buffer;
  567:   inbytesleft = strlen(ilseq_wchar_subst_buffer);
  568:   outptr = subst_wc_to_mb_temp_buffer;
  569:   outbytesleft = ilseq_wchar_subst_size*4;
  570:   iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
  571:   if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  572:       == (size_t)(-1)
  573:       || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  574:          == (size_t)(-1))
  575:     error(EXIT_FAILURE,0,
  576:           /* TRANSLATORS: An error message.
  577:              The %s placeholder expands to a piece of text, specified through --widechar-subst.  */
  578:           _("cannot convert widechar substitution to target encoding: %s"),
  579:           ilseq_wchar_subst_buffer);
  580:   write_replacement(subst_wc_to_mb_temp_buffer,
  581:                     ilseq_wchar_subst_size*4-outbytesleft,
  582:                     callback_arg);
  583: }
  584: 
  585: #else
  586: 
  587: #define subst_mb_to_wc_fallback NULL
  588: #define subst_wc_to_mb_fallback NULL
  589: 
  590: #endif
  591: 
  592: /* Auxiliary variables for subst_mb_to_mb_fallback. */
  593: /* Converter from locale encoding to target encoding. */
  594: static iconv_t subst_mb_to_mb_cd;
  595: /* Buffer of size ilseq_byte_subst_size*4. */
  596: static char* subst_mb_to_mb_temp_buffer;
  597: 
  598: static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
  599: {
  600:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  601:     const char* inptr;
  602:     size_t inbytesleft;
  603:     char* outptr;
  604:     size_t outbytesleft;
  605:     sprintf(ilseq_byte_subst_buffer,
  606:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  607:     inptr = ilseq_byte_subst_buffer;
  608:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  609:     outptr = subst_mb_to_mb_temp_buffer;
  610:     outbytesleft = ilseq_byte_subst_size*4;
  611:     iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
  612:     if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  613:         == (size_t)(-1)
  614:         || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  615:            == (size_t)(-1))
  616:       error(EXIT_FAILURE,0,
  617:             /* TRANSLATORS: An error message.
  618:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  619:             _("cannot convert byte substitution to target encoding: %s"),
  620:             ilseq_byte_subst_buffer);
  621:     fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
  622:            stdout);
  623:   }
  624: }
  625: 
  626: /* ========================================================================= */
  627: 
  628: /* Error messages during conversion.  */
  629: 
  630: static void conversion_error_EILSEQ (const char* infilename)
  631: {
  632:   fflush(stdout);
  633:   if (column > 0)
  634:     putc('\n',stderr);
  635:   error(0,0,
  636:         /* TRANSLATORS: An error message.
  637:            The placeholders expand to the input file name, a line number, and a column number.  */
  638:         _("%s:%u:%u: cannot convert"),
  639:         infilename,line,column);
  640: }
  641: 
  642: static void conversion_error_EINVAL (const char* infilename)
  643: {
  644:   fflush(stdout);
  645:   if (column > 0)
  646:     putc('\n',stderr);
  647:   error(0,0,
  648:         /* TRANSLATORS: An error message.
  649:            The placeholders expand to the input file name, a line number, and a column number.
  650:            A "shift sequence" is a sequence of bytes that changes the state of the converter;
  651:            this concept exists only for "stateful" encodings like ISO-2022-JP.  */
  652:         _("%s:%u:%u: incomplete character or shift sequence"),
  653:         infilename,line,column);
  654: }
  655: 
  656: static void conversion_error_other (int errnum, const char* infilename)
  657: {
  658:   fflush(stdout);
  659:   if (column > 0)
  660:     putc('\n',stderr);
  661:   error(0,errnum,
  662:         /* TRANSLATORS: The first part of an error message.
  663:            It is followed by a colon and a detail message.
  664:            The placeholders expand to the input file name, a line number, and a column number.  */
  665:         _("%s:%u:%u"),
  666:         infilename,line,column);
  667: }
  668: 
  669: /* Convert the input given in infile.  */
  670: 
  671: static int convert (iconv_t cd, int infile, const char* infilename)
  672: {
  673:   char inbuf[4096+4096];
  674:   size_t inbufrest = 0;
  675:   int infile_error = 0;
  676:   char initial_outbuf[4096];
  677:   char *outbuf = initial_outbuf;
  678:   size_t outbufsize = sizeof(initial_outbuf);
  679:   int status = 0;
  680: 
  681: #if O_BINARY
  682:   SET_BINARY(infile);
  683: #endif
  684:   line = 1; column = 0;
  685:   iconv(cd,NULL,NULL,NULL,NULL);
  686:   for (;;) {
  687:     size_t inbufsize;
  688:     /* Transfer the accumulated output to its destination, in case the
  689:        safe_read() call will block. */
  690:     fflush(stdout);
  691:     inbufsize = safe_read(infile,inbuf+4096,4096);
  692:     if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) {
  693:       infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0);
  694:       if (inbufrest == 0)
  695:         break;
  696:       else {
  697:         if (ilseq_byte_subst != NULL)
  698:           subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
  699:         if (!silent)
  700:           conversion_error_EINVAL(infilename);
  701:         status = 1;
  702:         goto done;
  703:       }
  704:     } else {
  705:       const char* inptr = inbuf+4096-inbufrest;
  706:       size_t insize = inbufrest+inbufsize;
  707:       inbufrest = 0;
  708:       while (insize > 0) {
  709:         char* outptr = outbuf;
  710:         size_t outsize = outbufsize;
  711:         size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
  712:         if (outptr != outbuf) {
  713:           int saved_errno = errno;
  714:           if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  715:             status = 1;
  716:             goto done;
  717:           }
  718:           errno = saved_errno;
  719:         }
  720:         if (res == (size_t)(-1)) {
  721:           if (errno == EILSEQ) {
  722:             if (discard_unconvertible == 1) {
  723:               int one = 1;
  724:               iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  725:               discard_unconvertible = 2;
  726:               status = 1;
  727:             } else {
  728:               if (!silent)
  729:                 conversion_error_EILSEQ(infilename);
  730:               status = 1;
  731:               goto done;
  732:             }
  733:           } else if (errno == EINVAL) {
  734:             if (inbufsize == 0 || insize > 4096) {
  735:               if (!silent)
  736:                 conversion_error_EINVAL(infilename);
  737:               status = 1;
  738:               goto done;
  739:             } else {
  740:               inbufrest = insize;
  741:               if (insize > 0) {
  742:                 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
  743:                    we cannot use memcpy here, because source and destination
  744:                    regions may overlap. */
  745:                 char* restptr = inbuf+4096-insize;
  746:                 do { *restptr++ = *inptr++; } while (--insize > 0);
  747:               }
  748:               break;
  749:             }
  750:           } else if (errno == E2BIG) {
  751:             if (outptr==outbuf) {
  752:               /* outbuf is too small. Double its size. */
  753:               if (outbuf != initial_outbuf)
  754:                 free(outbuf);
  755:               outbufsize = 2*outbufsize;
  756:               if (outbufsize==0) /* integer overflow? */
  757:                 xalloc_die();
  758:               outbuf = (char*)xmalloc(outbufsize);
  759:             }
  760:           } else {
  761:             if (!silent)
  762:               conversion_error_other(errno,infilename);
  763:             status = 1;
  764:             goto done;
  765:           }
  766:         }
  767:       }
  768:     }
  769:   }
  770:   for (;;) {
  771:     char* outptr = outbuf;
  772:     size_t outsize = outbufsize;
  773:     size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  774:     if (outptr != outbuf) {
  775:       int saved_errno = errno;
  776:       if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  777:         status = 1;
  778:         goto done;
  779:       }
  780:       errno = saved_errno;
  781:     }
  782:     if (res == (size_t)(-1)) {
  783:       if (errno == EILSEQ) {
  784:         if (discard_unconvertible == 1) {
  785:           int one = 1;
  786:           iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  787:           discard_unconvertible = 2;
  788:           status = 1;
  789:         } else {
  790:           if (!silent)
  791:             conversion_error_EILSEQ(infilename);
  792:           status = 1;
  793:           goto done;
  794:         }
  795:       } else if (errno == EINVAL) {
  796:         if (!silent)
  797:           conversion_error_EINVAL(infilename);
  798:         status = 1;
  799:         goto done;
  800:       } else if (errno == E2BIG) {
  801:         if (outptr==outbuf) {
  802:           /* outbuf is too small. Double its size. */
  803:           if (outbuf != initial_outbuf)
  804:             free(outbuf);
  805:           outbufsize = 2*outbufsize;
  806:           if (outbufsize==0) /* integer overflow? */
  807:             xalloc_die();
  808:           outbuf = (char*)xmalloc(outbufsize);
  809:         }
  810:       } else {
  811:         if (!silent)
  812:           conversion_error_other(errno,infilename);
  813:         status = 1;
  814:         goto done;
  815:       }
  816:     } else
  817:       break;
  818:   }
  819:   if (infile_error) {
  820:     fflush(stdout);
  821:     if (column > 0)
  822:       putc('\n',stderr);
  823:     error(0,infile_error,
  824:           /* TRANSLATORS: An error message.
  825:              The placeholder expands to the input file name.  */
  826:           _("%s: I/O error"),
  827:           infilename);
  828:     status = 1;
  829:     goto done;
  830:   }
  831:  done:
  832:   if (outbuf != initial_outbuf)
  833:     free(outbuf);
  834:   return status;
  835: }
  836: 
  837: /* ========================================================================= */
  838: 
  839: int main (int argc, char* argv[])
  840: {
  841:   const char* fromcode = NULL;
  842:   const char* tocode = NULL;
  843:   int do_list = 0;
  844:   iconv_t cd;
  845:   struct iconv_fallbacks fallbacks;
  846:   struct iconv_hooks hooks;
  847:   int i;
  848:   int status;
  849: 
  850:   set_program_name (argv[0]);
  851: #if HAVE_SETLOCALE
  852:   /* Needed for the locale dependent encodings, "char" and "wchar_t",
  853:      and for gettext. */
  854:   setlocale(LC_CTYPE,"");
  855: #if ENABLE_NLS
  856:   /* Needed for gettext. */
  857:   setlocale(LC_MESSAGES,"");
  858: #endif
  859: #endif
  860: #if ENABLE_NLS
  861:   bindtextdomain("libiconv",relocate(LOCALEDIR));
  862: #endif
  863:   textdomain("libiconv");
  864:   /* No need to invoke the gnulib function stdopen() here, because
  865:      (1) the only file descriptor allocations done by this program are
  866:          fopen(...,"r"),
  867:      (2) when such fopen() calls occur, stdin is not used,
  868:      hence
  869:      - when an fopen() call happens to open fd 0, it is harmless, by (2),
  870:      - when an fopen() call happens to open fd 1 or 2, writing to
  871:        stdout or stderr will produce an error, by (1). */
  872: 
  873:   for (i = 1; i < argc;) {
  874:     size_t len = strlen(argv[i]);
  875:     if (!strcmp(argv[i],"--")) {
  876:       i++;
  877:       break;
  878:     }
  879:     if (!strcmp(argv[i],"-f")
  880:         /* --f ... --from-code */
  881:         || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
  882:         /* --from-code=... */
  883:         || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
  884:       if (len < 12)
  885:         if (i == argc-1) usage(1);
  886:       if (fromcode != NULL) usage(1);
  887:       if (len < 12) {
  888:         fromcode = argv[i+1];
  889:         i += 2;
  890:       } else {
  891:         fromcode = argv[i]+12;
  892:         i++;
  893:       }
  894:       continue;
  895:     }
  896:     if (!strcmp(argv[i],"-t")
  897:         /* --t ... --to-code */
  898:         || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
  899:         /* --from-code=... */
  900:         || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
  901:       if (len < 10)
  902:         if (i == argc-1) usage(1);
  903:       if (tocode != NULL) usage(1);
  904:       if (len < 10) {
  905:         tocode = argv[i+1];
  906:         i += 2;
  907:       } else {
  908:         tocode = argv[i]+10;
  909:         i++;
  910:       }
  911:       continue;
  912:     }
  913:     if (!strcmp(argv[i],"-l")
  914:         /* --l ... --list */
  915:         || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
  916:       do_list = 1;
  917:       i++;
  918:       continue;
  919:     }
  920:     if (/* --by ... --byte-subst */
  921:         (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
  922:         /* --byte-subst=... */
  923:         || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
  924:       if (len < 13) {
  925:         if (i == argc-1) usage(1);
  926:         ilseq_byte_subst = argv[i+1];
  927:         i += 2;
  928:       } else {
  929:         ilseq_byte_subst = argv[i]+13;
  930:         i++;
  931:       }
  932:       ilseq_byte_subst_size =
  933:         check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
  934:       continue;
  935:     }
  936:     if (/* --w ... --widechar-subst */
  937:         (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
  938:         /* --widechar-subst=... */
  939:         || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
  940:       if (len < 17) {
  941:         if (i == argc-1) usage(1);
  942:         ilseq_wchar_subst = argv[i+1];
  943:         i += 2;
  944:       } else {
  945:         ilseq_wchar_subst = argv[i]+17;
  946:         i++;
  947:       }
  948:       ilseq_wchar_subst_size =
  949:         check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
  950:       continue;
  951:     }
  952:     if (/* --u ... --unicode-subst */
  953:         (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
  954:         /* --unicode-subst=... */
  955:         || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
  956:       if (len < 16) {
  957:         if (i == argc-1) usage(1);
  958:         ilseq_unicode_subst = argv[i+1];
  959:         i += 2;
  960:       } else {
  961:         ilseq_unicode_subst = argv[i]+16;
  962:         i++;
  963:       }
  964:       ilseq_unicode_subst_size =
  965:         check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
  966:       continue;
  967:     }
  968:     if /* --s ... --silent */
  969:        (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
  970:       silent = 1;
  971:       i++;
  972:       continue;
  973:     }
  974:     if /* --h ... --help */
  975:        (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
  976:       usage(0);
  977:     }
  978:     if /* --v ... --version */
  979:        (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
  980:       print_version();
  981:     }
  982: #if O_BINARY
  983:     /* Backward compatibility with iconv <= 1.9.1. */
  984:     if /* --bi ... --binary */
  985:        (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
  986:       i++;
  987:       continue;
  988:     }
  989: #endif
  990:     if (argv[i][0] == '-') {
  991:       const char *option = argv[i] + 1;
  992:       if (*option == '\0')
  993:         usage(1);
  994:       for (; *option; option++)
  995:         switch (*option) {
  996:           case 'c': discard_unconvertible = 1; break;
  997:           case 's': silent = 1; break;
  998:           default: usage(1);
  999:         }
 1000:       i++;
 1001:       continue;
 1002:     }
 1003:     break;
 1004:   }
 1005:   if (do_list) {
 1006:     if (i != 2 || i != argc)
 1007:       usage(1);
 1008:     iconvlist(print_one,NULL);
 1009:     status = 0;
 1010:   } else {
 1011: #if O_BINARY
 1012:     SET_BINARY(fileno(stdout));
 1013: #endif
 1014:     if (fromcode == NULL)
 1015:       fromcode = "char";
 1016:     if (tocode == NULL)
 1017:       tocode = "char";
 1018:     cd = iconv_open(tocode,fromcode);
 1019:     if (cd == (iconv_t)(-1)) {
 1020:       if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
 1021:         error(0,0,
 1022:               /* TRANSLATORS: An error message.
 1023:                  The placeholder expands to the encoding name, specified through --from-code.  */
 1024:               _("conversion from %s unsupported"),
 1025:               fromcode);
 1026:       else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
 1027:         error(0,0,
 1028:               /* TRANSLATORS: An error message.
 1029:                  The placeholder expands to the encoding name, specified through --to-code.  */
 1030:               _("conversion to %s unsupported"),
 1031:               tocode);
 1032:       else
 1033:         error(0,0,
 1034:               /* TRANSLATORS: An error message.
 1035:                  The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively.  */
 1036:               _("conversion from %s to %s unsupported"),
 1037:               fromcode,tocode);
 1038:       error(EXIT_FAILURE,0,
 1039:             /* TRANSLATORS: Additional advice after an error message.
 1040:                The %s placeholder expands to the program name.  */
 1041:             _("try '%s -l' to get the list of supported encodings"),
 1042:             program_name);
 1043:     }
 1044:     /* Look at fromcode and tocode, to determine whether character widths
 1045:        should be determined according to legacy CJK conventions. */
 1046:     cjkcode = iconv_canonicalize(tocode);
 1047:     if (!is_cjk_encoding(cjkcode))
 1048:       cjkcode = iconv_canonicalize(fromcode);
 1049:     /* Set up fallback routines for handling impossible conversions. */
 1050:     if (ilseq_byte_subst != NULL)
 1051:       ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
 1052:     if (!discard_unconvertible) {
 1053:       #if HAVE_WCHAR_T
 1054:       if (ilseq_wchar_subst != NULL)
 1055:         ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
 1056:       #endif
 1057:       if (ilseq_unicode_subst != NULL)
 1058:         ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
 1059:       if (ilseq_byte_subst != NULL) {
 1060:         subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
 1061:         subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
 1062:         #if HAVE_WCHAR_T
 1063:         subst_mb_to_wc_cd = iconv_open("wchar_t","char");
 1064:         subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
 1065:         #endif
 1066:         subst_mb_to_mb_cd = iconv_open(tocode,"char");
 1067:         subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
 1068:       }
 1069:       #if HAVE_WCHAR_T
 1070:       if (ilseq_wchar_subst != NULL) {
 1071:         subst_wc_to_mb_cd = iconv_open(tocode,"char");
 1072:         subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
 1073:       }
 1074:       #endif
 1075:       if (ilseq_unicode_subst != NULL) {
 1076:         subst_uc_to_mb_cd = iconv_open(tocode,"char");
 1077:         subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
 1078:       }
 1079:       fallbacks.mb_to_uc_fallback =
 1080:         (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
 1081:       fallbacks.uc_to_mb_fallback =
 1082:         (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
 1083:       fallbacks.mb_to_wc_fallback =
 1084:         (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
 1085:       fallbacks.wc_to_mb_fallback =
 1086:         (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
 1087:       fallbacks.data = NULL;
 1088:       iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
 1089:     }
 1090:     /* Set up hooks for updating the line and column position. */
 1091:     hooks.uc_hook = update_line_column;
 1092:     hooks.wc_hook = NULL;
 1093:     hooks.data = NULL;
 1094:     iconvctl(cd, ICONV_SET_HOOKS, &hooks);
 1095:     if (i == argc)
 1096:       status = convert(cd,fileno(stdin),
 1097:                        /* TRANSLATORS: A filename substitute denoting standard input.  */
 1098:                        _("(stdin)"));
 1099:     else {
 1100:       status = 0;
 1101:       for (; i < argc; i++) {
 1102:         const char* infilename = argv[i];
 1103:         FILE* infile = fopen(infilename,"r");
 1104:         if (infile == NULL) {
 1105:           int saved_errno = errno;
 1106:           error(0,saved_errno,
 1107:                 /* TRANSLATORS: The first part of an error message.
 1108:                    It is followed by a colon and a detail message.
 1109:                    The %s placeholder expands to the input file name.  */
 1110:                 _("%s"),
 1111:                 infilename);
 1112:           status = 1;
 1113:         } else {
 1114:           status |= convert(cd,fileno(infile),infilename);
 1115:           fclose(infile);
 1116:         }
 1117:       }
 1118:     }
 1119:     iconv_close(cd);
 1120:   }
 1121:   if (ferror(stdout) || fclose(stdout)) {
 1122:     error(0,0,
 1123:           /* TRANSLATORS: An error message.  */
 1124:           _("I/O error"));
 1125:     status = 1;
 1126:   }
 1127:   exit(status);
 1128: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>