File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / src / iconv.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:48 2012 UTC (12 years, 4 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_13_1, HEAD
libiconv

    1: /* Copyright (C) 2000-2009 Free Software Foundation, Inc.
    2:    This file is part of the GNU LIBICONV Library.
    3: 
    4:    This program is free software: you can redistribute it and/or modify
    5:    it under the terms of the GNU General Public License as published by
    6:    the Free Software Foundation; either version 3 of the License, or
    7:    (at your option) any later version.
    8: 
    9:    This program is distributed in the hope that it will be useful,
   10:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12:    GNU General Public License for more details.
   13: 
   14:    You should have received a copy of the GNU General Public License
   15:    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
   16: 
   17: #include "config.h"
   18: #ifndef ICONV_CONST
   19: # define ICONV_CONST
   20: #endif
   21: 
   22: #include <limits.h>
   23: #include <stddef.h>
   24: #include <stdio.h>
   25: #include <stdlib.h>
   26: #include <string.h>
   27: #include <iconv.h>
   28: #include <errno.h>
   29: #include <locale.h>
   30: #include <fcntl.h>
   31: 
   32: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   33: #ifdef NO_I18N
   34: # undef ENABLE_NLS
   35: # undef ENABLE_RELOCATABLE
   36: #endif
   37: 
   38: #include "binary-io.h"
   39: #include "progname.h"
   40: #include "relocatable.h"
   41: #include "xalloc.h"
   42: #include "uniwidth.h"
   43: #include "uniwidth/cjk.h"
   44: 
   45: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   46: #ifdef NO_I18N
   47: #include <stdarg.h>
   48: static void
   49: error (int status, int errnum, const char *message, ...)
   50: {
   51:   va_list args;
   52: 
   53:   fflush(stdout);
   54:   fprintf(stderr,"%s: ",program_name);
   55:   va_start(args,message);
   56:   vfprintf(stderr,message,args);
   57:   va_end(args);
   58:   if (errnum) {
   59:     const char *s = strerror(errnum);
   60:     if (s == NULL)
   61:       s = "Unknown system error";
   62:   }
   63:   putc('\n',stderr);
   64:   fflush(stderr);
   65:   if (status)
   66:     exit(status);
   67: }
   68: #else
   69: # include "error.h"
   70: #endif
   71: 
   72: #include "gettext.h"
   73: 
   74: #define _(str) gettext(str)
   75: 
   76: /* Ensure that iconv_no_i18n does not depend on libintl.  */
   77: #ifdef NO_I18N
   78: # define xmalloc malloc
   79: # define xalloc_die abort
   80: #endif
   81: 
   82: /* Locale independent test for a decimal digit.
   83:    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
   84:    <ctype.h> isdigit must be an 'unsigned char'.)  */
   85: #undef isdigit
   86: #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
   87: 
   88: /* Locale independent test for a printable character.
   89:    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
   90:    <ctype.h> isdigit must be an 'unsigned char'.)  */
   91: #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
   92: 
   93: /* ========================================================================= */
   94: 
   95: static int discard_unconvertible = 0;
   96: static int silent = 0;
   97: 
   98: static void usage (int exitcode)
   99: {
  100:   if (exitcode != 0) {
  101:     const char* helpstring1 =
  102:       /* TRANSLATORS: The first line of the short usage message.  */
  103:       _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
  104:     const char* helpstring2 =
  105:       /* TRANSLATORS: The second line of the short usage message.
  106:          Align it correctly against the first line.  */
  107:       _("or:    iconv -l");
  108:     fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
  109:     fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
  110:   } else {
  111:     /* xgettext: no-wrap */
  112:     /* TRANSLATORS: The first line of the long usage message.
  113:        The %s placeholder expands to the program name.  */
  114:     printf(_("\
  115: Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
  116:            program_name);
  117:     /* xgettext: no-wrap */
  118:     /* TRANSLATORS: The second line of the long usage message.
  119:        Align it correctly against the first line.
  120:        The %s placeholder expands to the program name.  */
  121:     printf(_("\
  122: or:    %s -l\n"),
  123:            program_name);
  124:     printf("\n");
  125:     /* xgettext: no-wrap */
  126:     /* TRANSLATORS: Description of the iconv program.  */
  127:     printf(_("\
  128: Converts text from one encoding to another encoding.\n"));
  129:     printf("\n");
  130:     /* xgettext: no-wrap */
  131:     printf(_("\
  132: Options controlling the input and output format:\n"));
  133:     /* xgettext: no-wrap */
  134:     printf(_("\
  135:   -f ENCODING, --from-code=ENCODING\n\
  136:                               the encoding of the input\n"));
  137:     /* xgettext: no-wrap */
  138:     printf(_("\
  139:   -t ENCODING, --to-code=ENCODING\n\
  140:                               the encoding of the output\n"));
  141:     printf("\n");
  142:     /* xgettext: no-wrap */
  143:     printf(_("\
  144: Options controlling conversion problems:\n"));
  145:     /* xgettext: no-wrap */
  146:     printf(_("\
  147:   -c                          discard unconvertible characters\n"));
  148:     /* xgettext: no-wrap */
  149:     printf(_("\
  150:   --unicode-subst=FORMATSTRING\n\
  151:                               substitution for unconvertible Unicode characters\n"));
  152:     /* xgettext: no-wrap */
  153:     printf(_("\
  154:   --byte-subst=FORMATSTRING   substitution for unconvertible bytes\n"));
  155:     /* xgettext: no-wrap */
  156:     printf(_("\
  157:   --widechar-subst=FORMATSTRING\n\
  158:                               substitution for unconvertible wide characters\n"));
  159:     printf("\n");
  160:     /* xgettext: no-wrap */
  161:     printf(_("\
  162: Options controlling error output:\n"));
  163:     /* xgettext: no-wrap */
  164:     printf(_("\
  165:   -s, --silent                suppress error messages about conversion problems\n"));
  166:     printf("\n");
  167:     /* xgettext: no-wrap */
  168:     printf(_("\
  169: Informative output:\n"));
  170:     /* xgettext: no-wrap */
  171:     printf(_("\
  172:   -l, --list                  list the supported encodings\n"));
  173:     /* xgettext: no-wrap */
  174:     printf(_("\
  175:   --help                      display this help and exit\n"));
  176:     /* xgettext: no-wrap */
  177:     printf(_("\
  178:   --version                   output version information and exit\n"));
  179:     printf("\n");
  180:     /* TRANSLATORS: The placeholder indicates the bug-reporting address
  181:        for this package.  Please add _another line_ saying
  182:        "Report translation bugs to <...>\n" with the address for translation
  183:        bugs (typically your translation team's web or email address).  */
  184:     fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
  185:   }
  186:   exit(exitcode);
  187: }
  188: 
  189: static void print_version (void)
  190: {
  191:   printf("iconv (GNU libiconv %d.%d)\n",
  192:          _libiconv_version >> 8, _libiconv_version & 0xff);
  193:   printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2009");
  194:   /* xgettext: no-wrap */
  195:   fputs (_("\
  196: License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
  197: This is free software: you are free to change and redistribute it.\n\
  198: There is NO WARRANTY, to the extent permitted by law.\n\
  199: "),stdout);
  200:   /* TRANSLATORS: The %s placeholder expands to an author's name.  */
  201:   printf(_("Written by %s.\n"),"Bruno Haible");
  202:   exit(EXIT_SUCCESS);
  203: }
  204: 
  205: static int print_one (unsigned int namescount, const char * const * names,
  206:                       void* data)
  207: {
  208:   unsigned int i;
  209:   (void)data;
  210:   for (i = 0; i < namescount; i++) {
  211:     if (i > 0)
  212:       putc(' ',stdout);
  213:     fputs(names[i],stdout);
  214:   }
  215:   putc('\n',stdout);
  216:   return 0;
  217: }
  218: 
  219: /* ========================================================================= */
  220: 
  221: /* Line number and column position. */
  222: static unsigned int line;
  223: static unsigned int column;
  224: static const char* cjkcode;
  225: /* Update the line number and column position after a character was
  226:    successfully converted. */
  227: static void update_line_column (unsigned int uc, void* data)
  228: {
  229:   if (uc == 0x000A) {
  230:     line++;
  231:     column = 0;
  232:   } else {
  233:     int width = uc_width(uc, cjkcode);
  234:     if (width >= 0)
  235:       column += width;
  236:     else if (uc == 0x0009)
  237:       column += 8 - (column % 8);
  238:   }
  239: }
  240: 
  241: /* ========================================================================= */
  242: 
  243: /* Production of placeholder strings as fallback for unconvertible
  244:    characters. */
  245: 
  246: /* Check that the argument is a format string taking either no argument
  247:    or exactly one unsigned integer argument. Returns the maximum output
  248:    size of the format string. */
  249: static size_t check_subst_formatstring (const char *format, const char *param_name)
  250: {
  251:   /* C format strings are described in POSIX (IEEE P1003.1 2001), section
  252:      XSH 3 fprintf().  See also Linux fprintf(3) manual page.
  253:      For simplicity, we don't accept
  254:        - the '%m$' reordering syntax,
  255:        - the 'I' flag,
  256:        - width specifications referring to an argument,
  257:        - precision specifications referring to an argument,
  258:        - size specifiers,
  259:        - format specifiers other than 'o', 'u', 'x', 'X'.
  260:      What remains?
  261:      A directive
  262:        - starts with '%',
  263:        - is optionally followed by any of the characters '#', '0', '-', ' ',
  264:          '+', "'", each of which acts as a flag,
  265:        - is optionally followed by a width specification: a nonempty digit
  266:          sequence,
  267:        - is optionally followed by '.' and a precision specification: a
  268:          nonempty digit sequence,
  269:        - is finished by a specifier
  270:          - '%', that needs no argument,
  271:          - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
  272:    */
  273:   size_t maxsize = 0;
  274:   unsigned int unnumbered_arg_count = 0;
  275: 
  276:   for (; *format != '\0';) {
  277:     if (*format++ == '%') {
  278:       /* A directive. */
  279:       unsigned int width = 0;
  280:       unsigned int precision = 0;
  281:       unsigned int length;
  282:       /* Parse flags. */
  283:       for (;;) {
  284:         if (*format == ' ' || *format == '+' || *format == '-'
  285:             || *format == '#' || *format == '0' || *format == '\'')
  286:           format++;
  287:         else
  288:           break;
  289:       }
  290:       /* Parse width. */
  291:       if (*format == '*')
  292:         error(EXIT_FAILURE,0,
  293:               /* TRANSLATORS: An error message.
  294:                  The %s placeholder expands to a command-line option.  */
  295:               _("%s argument: A format directive with a variable width is not allowed here."),
  296:               param_name);
  297:       if (isdigit (*format)) {
  298:         do {
  299:           width = 10*width + (*format - '0');
  300:           format++;
  301:         } while (isdigit (*format));
  302:       }
  303:       /* Parse precision. */
  304:       if (*format == '.') {
  305:         format++;
  306:         if (*format == '*')
  307:           error(EXIT_FAILURE,0,
  308:                 /* TRANSLATORS: An error message.
  309:                    The %s placeholder expands to a command-line option.  */
  310:                 _("%s argument: A format directive with a variable precision is not allowed here."),
  311:                 param_name);
  312:         if (isdigit (*format)) {
  313:           do {
  314:             precision = 10*precision + (*format - '0');
  315:             format++;
  316:           } while (isdigit (*format));
  317:         }
  318:       }
  319:       /* Parse size. */
  320:       switch (*format) {
  321:         case 'h': case 'l': case 'L': case 'q':
  322:         case 'j': case 'z': case 'Z': case 't':
  323:           error(EXIT_FAILURE,0,
  324:                 /* TRANSLATORS: An error message.
  325:                    The %s placeholder expands to a command-line option.  */
  326:                 _("%s argument: A format directive with a size is not allowed here."),
  327:                 param_name);
  328:       }
  329:       /* Parse end of directive. */
  330:       switch (*format) {
  331:         case '%':
  332:           length = 1;
  333:           break;
  334:         case 'u': case 'o': case 'x': case 'X':
  335:           if (*format == 'u') {
  336:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  337:                                      * 0.30103 /* binary -> decimal */
  338:                                     )
  339:                      + 1; /* turn floor into ceil */
  340:             if (length < precision)
  341:               length = precision;
  342:             length *= 2; /* estimate for FLAG_GROUP */
  343:             length += 1; /* account for leading sign */
  344:           } else if (*format == 'o') {
  345:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  346:                                      * 0.333334 /* binary -> octal */
  347:                                     )
  348:                      + 1; /* turn floor into ceil */
  349:             if (length < precision)
  350:               length = precision;
  351:             length += 1; /* account for leading sign */
  352:           } else { /* 'x', 'X' */
  353:             length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
  354:                                      * 0.25 /* binary -> hexadecimal */
  355:                                     )
  356:                      + 1; /* turn floor into ceil */
  357:             if (length < precision)
  358:               length = precision;
  359:             length += 2; /* account for leading sign or alternate form */
  360:           }
  361:           unnumbered_arg_count++;
  362:           break;
  363:         default:
  364:           if (*format == '\0')
  365:             error(EXIT_FAILURE,0,
  366:                   /* TRANSLATORS: An error message.
  367:                      The %s placeholder expands to a command-line option.  */
  368:                   _("%s argument: The string ends in the middle of a directive."),
  369:                   param_name);
  370:           else if (c_isprint(*format))
  371:             error(EXIT_FAILURE,0,
  372:                   /* TRANSLATORS: An error message.
  373:                      The %s placeholder expands to a command-line option.
  374:                      The %c placeholder expands to an unknown format directive.  */
  375:                   _("%s argument: The character '%c' is not a valid conversion specifier."),
  376:                   param_name,*format);
  377:           else
  378:             error(EXIT_FAILURE,0,
  379:                   /* TRANSLATORS: An error message.
  380:                      The %s placeholder expands to a command-line option.  */
  381:                   _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
  382:                   param_name);
  383:           abort(); /*NOTREACHED*/
  384:       }
  385:       format++;
  386:       if (length < width)
  387:         length = width;
  388:       maxsize += length;
  389:     } else
  390:       maxsize++;
  391:   }
  392:   if (unnumbered_arg_count > 1)
  393:     error(EXIT_FAILURE,0,
  394:           /* TRANSLATORS: An error message.
  395:              The %s placeholder expands to a command-line option.
  396:              The %u placeholder expands to the number of arguments consumed by the format string.  */
  397:           ngettext("%s argument: The format string consumes more than one argument: %u argument.",
  398:                    "%s argument: The format string consumes more than one argument: %u arguments.",
  399:                    unnumbered_arg_count),
  400:           param_name,unnumbered_arg_count);
  401:   return maxsize;
  402: }
  403: 
  404: /* Format strings. */
  405: static const char* ilseq_byte_subst;
  406: static const char* ilseq_wchar_subst;
  407: static const char* ilseq_unicode_subst;
  408: 
  409: /* Maximum result size for each format string. */
  410: static size_t ilseq_byte_subst_size;
  411: static size_t ilseq_wchar_subst_size;
  412: static size_t ilseq_unicode_subst_size;
  413: 
  414: /* Buffer of size ilseq_byte_subst_size+1. */
  415: static char* ilseq_byte_subst_buffer;
  416: #if HAVE_WCHAR_T
  417: /* Buffer of size ilseq_wchar_subst_size+1. */
  418: static char* ilseq_wchar_subst_buffer;
  419: #endif
  420: /* Buffer of size ilseq_unicode_subst_size+1. */
  421: static char* ilseq_unicode_subst_buffer;
  422: 
  423: /* Auxiliary variables for subst_mb_to_uc_fallback. */
  424: /* Converter from locale encoding to UCS-4. */
  425: static iconv_t subst_mb_to_uc_cd;
  426: /* Buffer of size ilseq_byte_subst_size. */
  427: static unsigned int* subst_mb_to_uc_temp_buffer;
  428: 
  429: static void subst_mb_to_uc_fallback
  430:             (const char* inbuf, size_t inbufsize,
  431:              void (*write_replacement) (const unsigned int *buf, size_t buflen,
  432:                                         void* callback_arg),
  433:              void* callback_arg,
  434:              void* data)
  435: {
  436:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  437:     const char* inptr;
  438:     size_t inbytesleft;
  439:     char* outptr;
  440:     size_t outbytesleft;
  441:     sprintf(ilseq_byte_subst_buffer,
  442:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  443:     inptr = ilseq_byte_subst_buffer;
  444:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  445:     outptr = (char*)subst_mb_to_uc_temp_buffer;
  446:     outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
  447:     iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
  448:     if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  449:         == (size_t)(-1)
  450:         || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
  451:            == (size_t)(-1))
  452:       error(EXIT_FAILURE,0,
  453:             /* TRANSLATORS: An error message.
  454:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  455:             _("cannot convert byte substitution to Unicode: %s"),
  456:             ilseq_byte_subst_buffer);
  457:     if (!(outbytesleft%sizeof(unsigned int) == 0))
  458:       abort();
  459:     write_replacement(subst_mb_to_uc_temp_buffer,
  460:                       ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
  461:                       callback_arg);
  462:   }
  463: }
  464: 
  465: /* Auxiliary variables for subst_uc_to_mb_fallback. */
  466: /* Converter from locale encoding to target encoding. */
  467: static iconv_t subst_uc_to_mb_cd;
  468: /* Buffer of size ilseq_unicode_subst_size*4. */
  469: static char* subst_uc_to_mb_temp_buffer;
  470: 
  471: static void subst_uc_to_mb_fallback
  472:             (unsigned int code,
  473:              void (*write_replacement) (const char *buf, size_t buflen,
  474:                                         void* callback_arg),
  475:              void* callback_arg,
  476:              void* data)
  477: {
  478:   const char* inptr;
  479:   size_t inbytesleft;
  480:   char* outptr;
  481:   size_t outbytesleft;
  482:   sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
  483:   inptr = ilseq_unicode_subst_buffer;
  484:   inbytesleft = strlen(ilseq_unicode_subst_buffer);
  485:   outptr = subst_uc_to_mb_temp_buffer;
  486:   outbytesleft = ilseq_unicode_subst_size*4;
  487:   iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
  488:   if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  489:       == (size_t)(-1)
  490:       || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  491:          == (size_t)(-1))
  492:     error(EXIT_FAILURE,0,
  493:           /* TRANSLATORS: An error message.
  494:              The %s placeholder expands to a piece of text, specified through --unicode-subst.  */
  495:           _("cannot convert unicode substitution to target encoding: %s"),
  496:           ilseq_unicode_subst_buffer);
  497:   write_replacement(subst_uc_to_mb_temp_buffer,
  498:                     ilseq_unicode_subst_size*4-outbytesleft,
  499:                     callback_arg);
  500: }
  501: 
  502: #if HAVE_WCHAR_T
  503: 
  504: /* Auxiliary variables for subst_mb_to_wc_fallback. */
  505: /* Converter from locale encoding to wchar_t. */
  506: static iconv_t subst_mb_to_wc_cd;
  507: /* Buffer of size ilseq_byte_subst_size. */
  508: static wchar_t* subst_mb_to_wc_temp_buffer;
  509: 
  510: static void subst_mb_to_wc_fallback
  511:             (const char* inbuf, size_t inbufsize,
  512:              void (*write_replacement) (const wchar_t *buf, size_t buflen,
  513:                                         void* callback_arg),
  514:              void* callback_arg,
  515:              void* data)
  516: {
  517:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  518:     const char* inptr;
  519:     size_t inbytesleft;
  520:     char* outptr;
  521:     size_t outbytesleft;
  522:     sprintf(ilseq_byte_subst_buffer,
  523:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  524:     inptr = ilseq_byte_subst_buffer;
  525:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  526:     outptr = (char*)subst_mb_to_wc_temp_buffer;
  527:     outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
  528:     iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
  529:     if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  530:         == (size_t)(-1)
  531:         || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
  532:            == (size_t)(-1))
  533:       error(EXIT_FAILURE,0,
  534:             /* TRANSLATORS: An error message.
  535:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  536:             _("cannot convert byte substitution to wide string: %s"),
  537:             ilseq_byte_subst_buffer);
  538:     if (!(outbytesleft%sizeof(wchar_t) == 0))
  539:       abort();
  540:     write_replacement(subst_mb_to_wc_temp_buffer,
  541:                       ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
  542:                       callback_arg);
  543:   }
  544: }
  545: 
  546: /* Auxiliary variables for subst_wc_to_mb_fallback. */
  547: /* Converter from locale encoding to target encoding. */
  548: static iconv_t subst_wc_to_mb_cd;
  549: /* Buffer of size ilseq_wchar_subst_size*4.
  550:    Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
  551: static char* subst_wc_to_mb_temp_buffer;
  552: 
  553: static void subst_wc_to_mb_fallback
  554:             (wchar_t code,
  555:              void (*write_replacement) (const char *buf, size_t buflen,
  556:                                         void* callback_arg),
  557:              void* callback_arg,
  558:              void* data)
  559: {
  560:   const char* inptr;
  561:   size_t inbytesleft;
  562:   char* outptr;
  563:   size_t outbytesleft;
  564:   sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
  565:   inptr = ilseq_wchar_subst_buffer;
  566:   inbytesleft = strlen(ilseq_wchar_subst_buffer);
  567:   outptr = subst_wc_to_mb_temp_buffer;
  568:   outbytesleft = ilseq_wchar_subst_size*4;
  569:   iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
  570:   if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  571:       == (size_t)(-1)
  572:       || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  573:          == (size_t)(-1))
  574:     error(EXIT_FAILURE,0,
  575:           /* TRANSLATORS: An error message.
  576:              The %s placeholder expands to a piece of text, specified through --widechar-subst.  */
  577:           _("cannot convert widechar substitution to target encoding: %s"),
  578:           ilseq_wchar_subst_buffer);
  579:   write_replacement(subst_wc_to_mb_temp_buffer,
  580:                     ilseq_wchar_subst_size*4-outbytesleft,
  581:                     callback_arg);
  582: }
  583: 
  584: #else
  585: 
  586: #define subst_mb_to_wc_fallback NULL
  587: #define subst_wc_to_mb_fallback NULL
  588: 
  589: #endif
  590: 
  591: /* Auxiliary variables for subst_mb_to_mb_fallback. */
  592: /* Converter from locale encoding to target encoding. */
  593: static iconv_t subst_mb_to_mb_cd;
  594: /* Buffer of size ilseq_byte_subst_size*4. */
  595: static char* subst_mb_to_mb_temp_buffer;
  596: 
  597: static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
  598: {
  599:   for (; inbufsize > 0; inbuf++, inbufsize--) {
  600:     const char* inptr;
  601:     size_t inbytesleft;
  602:     char* outptr;
  603:     size_t outbytesleft;
  604:     sprintf(ilseq_byte_subst_buffer,
  605:             ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
  606:     inptr = ilseq_byte_subst_buffer;
  607:     inbytesleft = strlen(ilseq_byte_subst_buffer);
  608:     outptr = subst_mb_to_mb_temp_buffer;
  609:     outbytesleft = ilseq_byte_subst_size*4;
  610:     iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
  611:     if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
  612:         == (size_t)(-1)
  613:         || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
  614:            == (size_t)(-1))
  615:       error(EXIT_FAILURE,0,
  616:             /* TRANSLATORS: An error message.
  617:                The %s placeholder expands to a piece of text, specified through --byte-subst.  */
  618:             _("cannot convert byte substitution to target encoding: %s"),
  619:             ilseq_byte_subst_buffer);
  620:     fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
  621:            stdout);
  622:   }
  623: }
  624: 
  625: /* ========================================================================= */
  626: 
  627: /* Error messages during conversion.  */
  628: 
  629: static void conversion_error_EILSEQ (const char* infilename)
  630: {
  631:   fflush(stdout);
  632:   if (column > 0)
  633:     putc('\n',stderr);
  634:   error(0,0,
  635:         /* TRANSLATORS: An error message.
  636:            The placeholders expand to the input file name, a line number, and a column number.  */
  637:         _("%s:%u:%u: cannot convert"),
  638:         infilename,line,column);
  639: }
  640: 
  641: static void conversion_error_EINVAL (const char* infilename)
  642: {
  643:   fflush(stdout);
  644:   if (column > 0)
  645:     putc('\n',stderr);
  646:   error(0,0,
  647:         /* TRANSLATORS: An error message.
  648:            The placeholders expand to the input file name, a line number, and a column number.
  649:            A "shift sequence" is a sequence of bytes that changes the state of the converter;
  650:            this concept exists only for "stateful" encodings like ISO-2022-JP.  */
  651:         _("%s:%u:%u: incomplete character or shift sequence"),
  652:         infilename,line,column);
  653: }
  654: 
  655: static void conversion_error_other (int errnum, const char* infilename)
  656: {
  657:   fflush(stdout);
  658:   if (column > 0)
  659:     putc('\n',stderr);
  660:   error(0,errnum,
  661:         /* TRANSLATORS: The first part of an error message.
  662:            It is followed by a colon and a detail message.
  663:            The placeholders expand to the input file name, a line number, and a column number.  */
  664:         _("%s:%u:%u"),
  665:         infilename,line,column);
  666: }
  667: 
  668: /* Convert the input given in infile.  */
  669: 
  670: static int convert (iconv_t cd, FILE* infile, const char* infilename)
  671: {
  672:   char inbuf[4096+4096];
  673:   size_t inbufrest = 0;
  674:   char initial_outbuf[4096];
  675:   char *outbuf = initial_outbuf;
  676:   size_t outbufsize = sizeof(initial_outbuf);
  677:   int status = 0;
  678: 
  679: #if O_BINARY
  680:   SET_BINARY(fileno(infile));
  681: #endif
  682:   line = 1; column = 0;
  683:   iconv(cd,NULL,NULL,NULL,NULL);
  684:   for (;;) {
  685:     size_t inbufsize = fread(inbuf+4096,1,4096,infile);
  686:     if (inbufsize == 0) {
  687:       if (inbufrest == 0)
  688:         break;
  689:       else {
  690:         if (ilseq_byte_subst != NULL)
  691:           subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
  692:         if (!silent)
  693:           conversion_error_EINVAL(infilename);
  694:         status = 1;
  695:         goto done;
  696:       }
  697:     } else {
  698:       const char* inptr = inbuf+4096-inbufrest;
  699:       size_t insize = inbufrest+inbufsize;
  700:       inbufrest = 0;
  701:       while (insize > 0) {
  702:         char* outptr = outbuf;
  703:         size_t outsize = outbufsize;
  704:         size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
  705:         if (outptr != outbuf) {
  706:           int saved_errno = errno;
  707:           if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  708:             status = 1;
  709:             goto done;
  710:           }
  711:           errno = saved_errno;
  712:         }
  713:         if (res == (size_t)(-1)) {
  714:           if (errno == EILSEQ) {
  715:             if (discard_unconvertible == 1) {
  716:               int one = 1;
  717:               iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  718:               discard_unconvertible = 2;
  719:               status = 1;
  720:             } else {
  721:               if (!silent)
  722:                 conversion_error_EILSEQ(infilename);
  723:               status = 1;
  724:               goto done;
  725:             }
  726:           } else if (errno == EINVAL) {
  727:             if (inbufsize == 0 || insize > 4096) {
  728:               if (!silent)
  729:                 conversion_error_EINVAL(infilename);
  730:               status = 1;
  731:               goto done;
  732:             } else {
  733:               inbufrest = insize;
  734:               if (insize > 0) {
  735:                 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
  736:                    we cannot use memcpy here, because source and destination
  737:                    regions may overlap. */
  738:                 char* restptr = inbuf+4096-insize;
  739:                 do { *restptr++ = *inptr++; } while (--insize > 0);
  740:               }
  741:               break;
  742:             }
  743:           } else if (errno == E2BIG) {
  744:             if (outptr==outbuf) {
  745:               /* outbuf is too small. Double its size. */
  746:               if (outbuf != initial_outbuf)
  747:                 free(outbuf);
  748:               outbufsize = 2*outbufsize;
  749:               if (outbufsize==0) /* integer overflow? */
  750:                 xalloc_die();
  751:               outbuf = (char*)xmalloc(outbufsize);
  752:             }
  753:           } else {
  754:             if (!silent)
  755:               conversion_error_other(errno,infilename);
  756:             status = 1;
  757:             goto done;
  758:           }
  759:         }
  760:       }
  761:     }
  762:   }
  763:   for (;;) {
  764:     char* outptr = outbuf;
  765:     size_t outsize = outbufsize;
  766:     size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
  767:     if (outptr != outbuf) {
  768:       int saved_errno = errno;
  769:       if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
  770:         status = 1;
  771:         goto done;
  772:       }
  773:       errno = saved_errno;
  774:     }
  775:     if (res == (size_t)(-1)) {
  776:       if (errno == EILSEQ) {
  777:         if (discard_unconvertible == 1) {
  778:           int one = 1;
  779:           iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
  780:           discard_unconvertible = 2;
  781:           status = 1;
  782:         } else {
  783:           if (!silent)
  784:             conversion_error_EILSEQ(infilename);
  785:           status = 1;
  786:           goto done;
  787:         }
  788:       } else if (errno == EINVAL) {
  789:         if (!silent)
  790:           conversion_error_EINVAL(infilename);
  791:         status = 1;
  792:         goto done;
  793:       } else if (errno == E2BIG) {
  794:         if (outptr==outbuf) {
  795:           /* outbuf is too small. Double its size. */
  796:           if (outbuf != initial_outbuf)
  797:             free(outbuf);
  798:           outbufsize = 2*outbufsize;
  799:           if (outbufsize==0) /* integer overflow? */
  800:             xalloc_die();
  801:           outbuf = (char*)xmalloc(outbufsize);
  802:         }
  803:       } else {
  804:         if (!silent)
  805:           conversion_error_other(errno,infilename);
  806:         status = 1;
  807:         goto done;
  808:       }
  809:     } else
  810:       break;
  811:   }
  812:   if (ferror(infile)) {
  813:     fflush(stdout);
  814:     if (column > 0)
  815:       putc('\n',stderr);
  816:     error(0,0,
  817:           /* TRANSLATORS: An error message.
  818:              The placeholder expands to the input file name.  */
  819:           _("%s: I/O error"),
  820:           infilename);
  821:     status = 1;
  822:     goto done;
  823:   }
  824:  done:
  825:   if (outbuf != initial_outbuf)
  826:     free(outbuf);
  827:   return status;
  828: }
  829: 
  830: /* ========================================================================= */
  831: 
  832: int main (int argc, char* argv[])
  833: {
  834:   const char* fromcode = NULL;
  835:   const char* tocode = NULL;
  836:   int do_list = 0;
  837:   iconv_t cd;
  838:   struct iconv_fallbacks fallbacks;
  839:   struct iconv_hooks hooks;
  840:   int i;
  841:   int status;
  842: 
  843:   set_program_name (argv[0]);
  844: #if HAVE_SETLOCALE
  845:   /* Needed for the locale dependent encodings, "char" and "wchar_t",
  846:      and for gettext. */
  847:   setlocale(LC_CTYPE,"");
  848: #if ENABLE_NLS
  849:   /* Needed for gettext. */
  850:   setlocale(LC_MESSAGES,"");
  851: #endif
  852: #endif
  853: #if ENABLE_NLS
  854:   bindtextdomain("libiconv",relocate(LOCALEDIR));
  855: #endif
  856:   textdomain("libiconv");
  857:   for (i = 1; i < argc;) {
  858:     size_t len = strlen(argv[i]);
  859:     if (!strcmp(argv[i],"--")) {
  860:       i++;
  861:       break;
  862:     }
  863:     if (!strcmp(argv[i],"-f")
  864:         /* --f ... --from-code */
  865:         || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
  866:         /* --from-code=... */
  867:         || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
  868:       if (len < 12)
  869:         if (i == argc-1) usage(1);
  870:       if (fromcode != NULL) usage(1);
  871:       if (len < 12) {
  872:         fromcode = argv[i+1];
  873:         i += 2;
  874:       } else {
  875:         fromcode = argv[i]+12;
  876:         i++;
  877:       }
  878:       continue;
  879:     }
  880:     if (!strcmp(argv[i],"-t")
  881:         /* --t ... --to-code */
  882:         || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
  883:         /* --from-code=... */
  884:         || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
  885:       if (len < 10)
  886:         if (i == argc-1) usage(1);
  887:       if (tocode != NULL) usage(1);
  888:       if (len < 10) {
  889:         tocode = argv[i+1];
  890:         i += 2;
  891:       } else {
  892:         tocode = argv[i]+10;
  893:         i++;
  894:       }
  895:       continue;
  896:     }
  897:     if (!strcmp(argv[i],"-l")
  898:         /* --l ... --list */
  899:         || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
  900:       do_list = 1;
  901:       i++;
  902:       continue;
  903:     }
  904:     if (/* --by ... --byte-subst */
  905:         (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
  906:         /* --byte-subst=... */
  907:         || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
  908:       if (len < 13) {
  909:         if (i == argc-1) usage(1);
  910:         ilseq_byte_subst = argv[i+1];
  911:         i += 2;
  912:       } else {
  913:         ilseq_byte_subst = argv[i]+13;
  914:         i++;
  915:       }
  916:       ilseq_byte_subst_size =
  917:         check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
  918:       continue;
  919:     }
  920:     if (/* --w ... --widechar-subst */
  921:         (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
  922:         /* --widechar-subst=... */
  923:         || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
  924:       if (len < 17) {
  925:         if (i == argc-1) usage(1);
  926:         ilseq_wchar_subst = argv[i+1];
  927:         i += 2;
  928:       } else {
  929:         ilseq_wchar_subst = argv[i]+17;
  930:         i++;
  931:       }
  932:       ilseq_wchar_subst_size =
  933:         check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
  934:       continue;
  935:     }
  936:     if (/* --u ... --unicode-subst */
  937:         (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
  938:         /* --unicode-subst=... */
  939:         || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
  940:       if (len < 16) {
  941:         if (i == argc-1) usage(1);
  942:         ilseq_unicode_subst = argv[i+1];
  943:         i += 2;
  944:       } else {
  945:         ilseq_unicode_subst = argv[i]+16;
  946:         i++;
  947:       }
  948:       ilseq_unicode_subst_size =
  949:         check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
  950:       continue;
  951:     }
  952:     if /* --s ... --silent */
  953:        (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
  954:       silent = 1;
  955:       continue;
  956:     }
  957:     if /* --h ... --help */
  958:        (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
  959:       usage(0);
  960:     }
  961:     if /* --v ... --version */
  962:        (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
  963:       print_version();
  964:     }
  965: #if O_BINARY
  966:     /* Backward compatibility with iconv <= 1.9.1. */
  967:     if /* --bi ... --binary */
  968:        (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
  969:       i++;
  970:       continue;
  971:     }
  972: #endif
  973:     if (argv[i][0] == '-') {
  974:       const char *option = argv[i] + 1;
  975:       if (*option == '\0')
  976:         usage(1);
  977:       for (; *option; option++)
  978:         switch (*option) {
  979:           case 'c': discard_unconvertible = 1; break;
  980:           case 's': silent = 1; break;
  981:           default: usage(1);
  982:         }
  983:       i++;
  984:       continue;
  985:     }
  986:     break;
  987:   }
  988:   if (do_list) {
  989:     if (i != 2 || i != argc)
  990:       usage(1);
  991:     iconvlist(print_one,NULL);
  992:     status = 0;
  993:   } else {
  994: #if O_BINARY
  995:     SET_BINARY(fileno(stdout));
  996: #endif
  997:     if (fromcode == NULL)
  998:       fromcode = "char";
  999:     if (tocode == NULL)
 1000:       tocode = "char";
 1001:     cd = iconv_open(tocode,fromcode);
 1002:     if (cd == (iconv_t)(-1)) {
 1003:       if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
 1004:         error(0,0,
 1005:               /* TRANSLATORS: An error message.
 1006:                  The placeholder expands to the encoding name, specified through --from-code.  */
 1007:               _("conversion from %s unsupported"),
 1008:               fromcode);
 1009:       else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
 1010:         error(0,0,
 1011:               /* TRANSLATORS: An error message.
 1012:                  The placeholder expands to the encoding name, specified through --to-code.  */
 1013:               _("conversion to %s unsupported"),
 1014:               tocode);
 1015:       else
 1016:         error(0,0,
 1017:               /* TRANSLATORS: An error message.
 1018:                  The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively.  */
 1019:               _("conversion from %s to %s unsupported"),
 1020:               fromcode,tocode);
 1021:       error(EXIT_FAILURE,0,
 1022:             /* TRANSLATORS: Additional advice after an error message.
 1023:                The %s placeholder expands to the program name.  */
 1024:             _("try '%s -l' to get the list of supported encodings"),
 1025:             program_name);
 1026:     }
 1027:     /* Look at fromcode and tocode, to determine whether character widths
 1028:        should be determined according to legacy CJK conventions. */
 1029:     cjkcode = iconv_canonicalize(tocode);
 1030:     if (!is_cjk_encoding(cjkcode))
 1031:       cjkcode = iconv_canonicalize(fromcode);
 1032:     /* Set up fallback routines for handling impossible conversions. */
 1033:     if (ilseq_byte_subst != NULL)
 1034:       ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
 1035:     if (!discard_unconvertible) {
 1036:       #if HAVE_WCHAR_T
 1037:       if (ilseq_wchar_subst != NULL)
 1038:         ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
 1039:       #endif
 1040:       if (ilseq_unicode_subst != NULL)
 1041:         ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
 1042:       if (ilseq_byte_subst != NULL) {
 1043:         subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
 1044:         subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
 1045:         #if HAVE_WCHAR_T
 1046:         subst_mb_to_wc_cd = iconv_open("wchar_t","char");
 1047:         subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
 1048:         #endif
 1049:         subst_mb_to_mb_cd = iconv_open(tocode,"char");
 1050:         subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
 1051:       }
 1052:       #if HAVE_WCHAR_T
 1053:       if (ilseq_wchar_subst != NULL) {
 1054:         subst_wc_to_mb_cd = iconv_open(tocode,"char");
 1055:         subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
 1056:       }
 1057:       #endif
 1058:       if (ilseq_unicode_subst != NULL) {
 1059:         subst_uc_to_mb_cd = iconv_open(tocode,"char");
 1060:         subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
 1061:       }
 1062:       fallbacks.mb_to_uc_fallback =
 1063:         (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
 1064:       fallbacks.uc_to_mb_fallback =
 1065:         (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
 1066:       fallbacks.mb_to_wc_fallback =
 1067:         (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
 1068:       fallbacks.wc_to_mb_fallback =
 1069:         (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
 1070:       fallbacks.data = NULL;
 1071:       iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
 1072:     }
 1073:     /* Set up hooks for updating the line and column position. */
 1074:     hooks.uc_hook = update_line_column;
 1075:     hooks.wc_hook = NULL;
 1076:     hooks.data = NULL;
 1077:     iconvctl(cd, ICONV_SET_HOOKS, &hooks);
 1078:     if (i == argc)
 1079:       status = convert(cd,stdin,
 1080:                        /* TRANSLATORS: A filename substitute denoting standard input.  */
 1081:                        _("(stdin)"));
 1082:     else {
 1083:       status = 0;
 1084:       for (; i < argc; i++) {
 1085:         const char* infilename = argv[i];
 1086:         FILE* infile = fopen(infilename,"r");
 1087:         if (infile == NULL) {
 1088:           int saved_errno = errno;
 1089:           error(0,saved_errno,
 1090:                 /* TRANSLATORS: The first part of an error message.
 1091:                    It is followed by a colon and a detail message.
 1092:                    The %s placeholder expands to the input file name.  */
 1093:                 _("%s"),
 1094:                 infilename);
 1095:           status = 1;
 1096:         } else {
 1097:           status |= convert(cd,infile,infilename);
 1098:           fclose(infile);
 1099:         }
 1100:       }
 1101:     }
 1102:     iconv_close(cd);
 1103:   }
 1104:   if (ferror(stdout) || fclose(stdout)) {
 1105:     error(0,0,
 1106:           /* TRANSLATORS: An error message.  */
 1107:           _("I/O error"));
 1108:     status = 1;
 1109:   }
 1110:   exit(status);
 1111: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>