File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / tests / table-from.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 22:57:49 2012 UTC (12 years, 4 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_14p0, v1_14, v1_13_1, HEAD
libiconv

    1: /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
    2:    This file is part of the GNU LIBICONV Library.
    3: 
    4:    The GNU LIBICONV Library is free software; you can redistribute it
    5:    and/or modify it under the terms of the GNU Library General Public
    6:    License as published by the Free Software Foundation; either version 2
    7:    of the License, or (at your option) any later version.
    8: 
    9:    The GNU LIBICONV Library is distributed in the hope that it will be
   10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   12:    Library General Public License for more details.
   13: 
   14:    You should have received a copy of the GNU Library General Public
   15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   16:    If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
   17:    Fifth Floor, Boston, MA 02110-1301, USA.  */
   18: 
   19: /* Create a table from CHARSET to Unicode. */
   20: 
   21: #include "config.h"
   22: 
   23: #include <stddef.h>
   24: #include <stdio.h>
   25: #include <stdlib.h>
   26: #include <string.h>
   27: #include <iconv.h>
   28: #include <errno.h>
   29: 
   30: #include "binary-io.h"
   31: 
   32: /* If nonzero, ignore conversions outside Unicode plane 0. */
   33: static int bmp_only;
   34: 
   35: static const char* hexbuf (unsigned char buf[], unsigned int buflen)
   36: {
   37:   static char msg[50];
   38:   switch (buflen) {
   39:     case 1: sprintf(msg,"0x%02X",buf[0]); break;
   40:     case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
   41:     case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
   42:     case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
   43:     default: abort();
   44:   }
   45:   return msg;
   46: }
   47: 
   48: static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
   49: {
   50:   const char* inbuf = (const char*) buf;
   51:   size_t inbytesleft = buflen;
   52:   char* outbuf = (char*) out;
   53:   size_t outbytesleft = 3*sizeof(unsigned int);
   54:   size_t result;
   55:   iconv(cd,NULL,NULL,NULL,NULL);
   56:   result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
   57:   if (result != (size_t)(-1))
   58:     result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
   59:   if (result == (size_t)(-1)) {
   60:     if (errno == EILSEQ) {
   61:       return -1;
   62:     } else if (errno == EINVAL) {
   63:       return 0;
   64:     } else {
   65:       int saved_errno = errno;
   66:       fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
   67:       errno = saved_errno;
   68:       perror("");
   69:       exit(1);
   70:     }
   71:   } else if (result > 0) /* ignore conversions with transliteration */ {
   72:     return -1;
   73:   } else {
   74:     if (inbytesleft != 0) {
   75:       fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
   76:       exit(1);
   77:     }
   78:     return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
   79:   }
   80: }
   81: 
   82: /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
   83: static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
   84: {
   85:   static char hexbuf[21];
   86:   char* p = hexbuf;
   87:   while (outlen > 0) {
   88:     if (p > hexbuf)
   89:       *p++ = ' ';
   90:     sprintf (p, "0x%04X", out[0]);
   91:     out += 1; outlen -= 1;
   92:     if (bmp_only && strlen(p) > 6)
   93:       return NULL;
   94:     p += strlen(p);
   95:   }
   96:   return hexbuf;
   97: }
   98: 
   99: int main (int argc, char* argv[])
  100: {
  101:   const char* charset;
  102:   iconv_t cd;
  103:   int search_depth;
  104: 
  105:   if (argc != 2) {
  106:     fprintf(stderr,"Usage: table-from charset\n");
  107:     exit(1);
  108:   }
  109:   charset = argv[1];
  110: 
  111: #if O_BINARY
  112:   SET_BINARY(fileno(stdout));
  113: #endif
  114: 
  115:   cd = iconv_open("UCS-4-INTERNAL",charset);
  116:   if (cd == (iconv_t)(-1)) {
  117:     perror("iconv_open");
  118:     exit(1);
  119:   }
  120: 
  121:   /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
  122:      big. */
  123:   bmp_only = (strcmp(charset,"UTF-8") == 0);
  124:   search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
  125: 
  126:   {
  127:     unsigned int out[3];
  128:     unsigned char buf[4];
  129:     unsigned int i0, i1, i2, i3;
  130:     int result;
  131:     for (i0 = 0; i0 < 0x100; i0++) {
  132:       buf[0] = i0;
  133:       result = try(cd,buf,1,out);
  134:       if (result < 0) {
  135:       } else if (result > 0) {
  136:         const char* unicode = ucs4_decode(out,result);
  137:         if (unicode != NULL)
  138:           printf("0x%02X\t%s\n",i0,unicode);
  139:       } else {
  140:         for (i1 = 0; i1 < 0x100; i1++) {
  141:           buf[1] = i1;
  142:           result = try(cd,buf,2,out);
  143:           if (result < 0) {
  144:           } else if (result > 0) {
  145:             const char* unicode = ucs4_decode(out,result);
  146:             if (unicode != NULL)
  147:               printf("0x%02X%02X\t%s\n",i0,i1,unicode);
  148:           } else {
  149:             for (i2 = 0; i2 < 0x100; i2++) {
  150:               buf[2] = i2;
  151:               result = try(cd,buf,3,out);
  152:               if (result < 0) {
  153:               } else if (result > 0) {
  154:                 const char* unicode = ucs4_decode(out,result);
  155:                 if (unicode != NULL)
  156:                   printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
  157:               } else if (search_depth > 3) {
  158:                 for (i3 = 0; i3 < 0x100; i3++) {
  159:                   buf[3] = i3;
  160:                   result = try(cd,buf,4,out);
  161:                   if (result < 0) {
  162:                   } else if (result > 0) {
  163:                     const char* unicode = ucs4_decode(out,result);
  164:                     if (unicode != NULL)
  165:                       printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
  166:                   } else {
  167:                     fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
  168:                     exit(1);
  169:                   }
  170:                 }
  171:               }
  172:             }
  173:           }
  174:         }
  175:       }
  176:     }
  177:   }
  178: 
  179:   if (iconv_close(cd) < 0) {
  180:     perror("iconv_close");
  181:     exit(1);
  182:   }
  183: 
  184:   if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
  185:     fprintf(stderr,"I/O error\n");
  186:     exit(1);
  187:   }
  188: 
  189:   exit(0);
  190: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>