File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / tests / table-from.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 9 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
    2:    This file is part of the GNU LIBICONV Library.
    3: 
    4:    The GNU LIBICONV Library is free software; you can redistribute it
    5:    and/or modify it under the terms of the GNU Library General Public
    6:    License as published by the Free Software Foundation; either version 2
    7:    of the License, or (at your option) any later version.
    8: 
    9:    The GNU LIBICONV Library is distributed in the hope that it will be
   10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   12:    Library General Public License for more details.
   13: 
   14:    You should have received a copy of the GNU Library General Public
   15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   16:    If not, see <https://www.gnu.org/licenses/>.  */
   17: 
   18: /* Create a table from CHARSET to Unicode. */
   19: 
   20: #include "config.h"
   21: 
   22: #include <stddef.h>
   23: #include <stdio.h>
   24: #include <stdlib.h>
   25: #include <string.h>
   26: #include <iconv.h>
   27: #include <errno.h>
   28: 
   29: #include "binary-io.h"
   30: 
   31: /* If nonzero, ignore conversions outside Unicode plane 0. */
   32: static int bmp_only;
   33: 
   34: static const char* hexbuf (unsigned char buf[], unsigned int buflen)
   35: {
   36:   static char msg[50];
   37:   switch (buflen) {
   38:     case 1: sprintf(msg,"0x%02X",buf[0]); break;
   39:     case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
   40:     case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
   41:     case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
   42:     default: abort();
   43:   }
   44:   return msg;
   45: }
   46: 
   47: static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
   48: {
   49:   const char* inbuf = (const char*) buf;
   50:   size_t inbytesleft = buflen;
   51:   char* outbuf = (char*) out;
   52:   size_t outbytesleft = 3*sizeof(unsigned int);
   53:   size_t result;
   54:   iconv(cd,NULL,NULL,NULL,NULL);
   55:   result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
   56:   if (result != (size_t)(-1))
   57:     result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
   58:   if (result == (size_t)(-1)) {
   59:     if (errno == EILSEQ) {
   60:       return -1;
   61:     } else if (errno == EINVAL) {
   62:       return 0;
   63:     } else {
   64:       int saved_errno = errno;
   65:       fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
   66:       errno = saved_errno;
   67:       perror("");
   68:       exit(1);
   69:     }
   70:   } else if (result > 0) /* ignore conversions with transliteration */ {
   71:     return -1;
   72:   } else {
   73:     if (inbytesleft != 0) {
   74:       fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
   75:       exit(1);
   76:     }
   77:     return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
   78:   }
   79: }
   80: 
   81: /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
   82: static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
   83: {
   84:   static char hexbuf[21];
   85:   char* p = hexbuf;
   86:   while (outlen > 0) {
   87:     if (p > hexbuf)
   88:       *p++ = ' ';
   89:     sprintf (p, "0x%04X", out[0]);
   90:     out += 1; outlen -= 1;
   91:     if (bmp_only && strlen(p) > 6)
   92:       return NULL;
   93:     p += strlen(p);
   94:   }
   95:   return hexbuf;
   96: }
   97: 
   98: int main (int argc, char* argv[])
   99: {
  100:   const char* charset;
  101:   iconv_t cd;
  102:   int search_depth;
  103: 
  104:   if (argc != 2) {
  105:     fprintf(stderr,"Usage: table-from charset\n");
  106:     exit(1);
  107:   }
  108:   charset = argv[1];
  109: 
  110: #if O_BINARY
  111:   SET_BINARY(fileno(stdout));
  112: #endif
  113: 
  114:   cd = iconv_open("UCS-4-INTERNAL",charset);
  115:   if (cd == (iconv_t)(-1)) {
  116:     perror("iconv_open");
  117:     exit(1);
  118:   }
  119: 
  120:   /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
  121:      big. */
  122:   bmp_only = (strcmp(charset,"UTF-8") == 0);
  123:   search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
  124: 
  125:   {
  126:     unsigned int out[3];
  127:     unsigned char buf[4];
  128:     unsigned int i0, i1, i2, i3;
  129:     int result;
  130:     for (i0 = 0; i0 < 0x100; i0++) {
  131:       buf[0] = i0;
  132:       result = try(cd,buf,1,out);
  133:       if (result < 0) {
  134:       } else if (result > 0) {
  135:         const char* unicode = ucs4_decode(out,result);
  136:         if (unicode != NULL)
  137:           printf("0x%02X\t%s\n",i0,unicode);
  138:       } else {
  139:         for (i1 = 0; i1 < 0x100; i1++) {
  140:           buf[1] = i1;
  141:           result = try(cd,buf,2,out);
  142:           if (result < 0) {
  143:           } else if (result > 0) {
  144:             const char* unicode = ucs4_decode(out,result);
  145:             if (unicode != NULL)
  146:               printf("0x%02X%02X\t%s\n",i0,i1,unicode);
  147:           } else {
  148:             for (i2 = 0; i2 < 0x100; i2++) {
  149:               buf[2] = i2;
  150:               result = try(cd,buf,3,out);
  151:               if (result < 0) {
  152:               } else if (result > 0) {
  153:                 const char* unicode = ucs4_decode(out,result);
  154:                 if (unicode != NULL)
  155:                   printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
  156:               } else if (search_depth > 3) {
  157:                 for (i3 = 0; i3 < 0x100; i3++) {
  158:                   buf[3] = i3;
  159:                   result = try(cd,buf,4,out);
  160:                   if (result < 0) {
  161:                   } else if (result > 0) {
  162:                     const char* unicode = ucs4_decode(out,result);
  163:                     if (unicode != NULL)
  164:                       printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
  165:                   } else {
  166:                     fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
  167:                     exit(1);
  168:                   }
  169:                 }
  170:               }
  171:             }
  172:           }
  173:         }
  174:       }
  175:     }
  176:   }
  177: 
  178:   if (iconv_close(cd) < 0) {
  179:     perror("iconv_close");
  180:     exit(1);
  181:   }
  182: 
  183:   if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
  184:     fprintf(stderr,"I/O error\n");
  185:     exit(1);
  186:   }
  187: 
  188:   exit(0);
  189: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>