Annotation of embedaddon/libiconv/tests/table-from.c, revision 1.1.1.1

1.1       misho       1: /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
                      2:    This file is part of the GNU LIBICONV Library.
                      3: 
                      4:    The GNU LIBICONV Library is free software; you can redistribute it
                      5:    and/or modify it under the terms of the GNU Library General Public
                      6:    License as published by the Free Software Foundation; either version 2
                      7:    of the License, or (at your option) any later version.
                      8: 
                      9:    The GNU LIBICONV Library is distributed in the hope that it will be
                     10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     12:    Library General Public License for more details.
                     13: 
                     14:    You should have received a copy of the GNU Library General Public
                     15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
                     16:    If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
                     17:    Fifth Floor, Boston, MA 02110-1301, USA.  */
                     18: 
                     19: /* Create a table from CHARSET to Unicode. */
                     20: 
                     21: #include "config.h"
                     22: 
                     23: #include <stddef.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <iconv.h>
                     28: #include <errno.h>
                     29: 
                     30: #include "binary-io.h"
                     31: 
                     32: /* If nonzero, ignore conversions outside Unicode plane 0. */
                     33: static int bmp_only;
                     34: 
                     35: static const char* hexbuf (unsigned char buf[], unsigned int buflen)
                     36: {
                     37:   static char msg[50];
                     38:   switch (buflen) {
                     39:     case 1: sprintf(msg,"0x%02X",buf[0]); break;
                     40:     case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
                     41:     case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
                     42:     case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
                     43:     default: abort();
                     44:   }
                     45:   return msg;
                     46: }
                     47: 
                     48: static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
                     49: {
                     50:   const char* inbuf = (const char*) buf;
                     51:   size_t inbytesleft = buflen;
                     52:   char* outbuf = (char*) out;
                     53:   size_t outbytesleft = 3*sizeof(unsigned int);
                     54:   size_t result;
                     55:   iconv(cd,NULL,NULL,NULL,NULL);
                     56:   result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
                     57:   if (result != (size_t)(-1))
                     58:     result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
                     59:   if (result == (size_t)(-1)) {
                     60:     if (errno == EILSEQ) {
                     61:       return -1;
                     62:     } else if (errno == EINVAL) {
                     63:       return 0;
                     64:     } else {
                     65:       int saved_errno = errno;
                     66:       fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
                     67:       errno = saved_errno;
                     68:       perror("");
                     69:       exit(1);
                     70:     }
                     71:   } else if (result > 0) /* ignore conversions with transliteration */ {
                     72:     return -1;
                     73:   } else {
                     74:     if (inbytesleft != 0) {
                     75:       fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
                     76:       exit(1);
                     77:     }
                     78:     return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
                     79:   }
                     80: }
                     81: 
                     82: /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
                     83: static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
                     84: {
                     85:   static char hexbuf[21];
                     86:   char* p = hexbuf;
                     87:   while (outlen > 0) {
                     88:     if (p > hexbuf)
                     89:       *p++ = ' ';
                     90:     sprintf (p, "0x%04X", out[0]);
                     91:     out += 1; outlen -= 1;
                     92:     if (bmp_only && strlen(p) > 6)
                     93:       return NULL;
                     94:     p += strlen(p);
                     95:   }
                     96:   return hexbuf;
                     97: }
                     98: 
                     99: int main (int argc, char* argv[])
                    100: {
                    101:   const char* charset;
                    102:   iconv_t cd;
                    103:   int search_depth;
                    104: 
                    105:   if (argc != 2) {
                    106:     fprintf(stderr,"Usage: table-from charset\n");
                    107:     exit(1);
                    108:   }
                    109:   charset = argv[1];
                    110: 
                    111: #if O_BINARY
                    112:   SET_BINARY(fileno(stdout));
                    113: #endif
                    114: 
                    115:   cd = iconv_open("UCS-4-INTERNAL",charset);
                    116:   if (cd == (iconv_t)(-1)) {
                    117:     perror("iconv_open");
                    118:     exit(1);
                    119:   }
                    120: 
                    121:   /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
                    122:      big. */
                    123:   bmp_only = (strcmp(charset,"UTF-8") == 0);
                    124:   search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
                    125: 
                    126:   {
                    127:     unsigned int out[3];
                    128:     unsigned char buf[4];
                    129:     unsigned int i0, i1, i2, i3;
                    130:     int result;
                    131:     for (i0 = 0; i0 < 0x100; i0++) {
                    132:       buf[0] = i0;
                    133:       result = try(cd,buf,1,out);
                    134:       if (result < 0) {
                    135:       } else if (result > 0) {
                    136:         const char* unicode = ucs4_decode(out,result);
                    137:         if (unicode != NULL)
                    138:           printf("0x%02X\t%s\n",i0,unicode);
                    139:       } else {
                    140:         for (i1 = 0; i1 < 0x100; i1++) {
                    141:           buf[1] = i1;
                    142:           result = try(cd,buf,2,out);
                    143:           if (result < 0) {
                    144:           } else if (result > 0) {
                    145:             const char* unicode = ucs4_decode(out,result);
                    146:             if (unicode != NULL)
                    147:               printf("0x%02X%02X\t%s\n",i0,i1,unicode);
                    148:           } else {
                    149:             for (i2 = 0; i2 < 0x100; i2++) {
                    150:               buf[2] = i2;
                    151:               result = try(cd,buf,3,out);
                    152:               if (result < 0) {
                    153:               } else if (result > 0) {
                    154:                 const char* unicode = ucs4_decode(out,result);
                    155:                 if (unicode != NULL)
                    156:                   printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
                    157:               } else if (search_depth > 3) {
                    158:                 for (i3 = 0; i3 < 0x100; i3++) {
                    159:                   buf[3] = i3;
                    160:                   result = try(cd,buf,4,out);
                    161:                   if (result < 0) {
                    162:                   } else if (result > 0) {
                    163:                     const char* unicode = ucs4_decode(out,result);
                    164:                     if (unicode != NULL)
                    165:                       printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
                    166:                   } else {
                    167:                     fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
                    168:                     exit(1);
                    169:                   }
                    170:                 }
                    171:               }
                    172:             }
                    173:           }
                    174:         }
                    175:       }
                    176:     }
                    177:   }
                    178: 
                    179:   if (iconv_close(cd) < 0) {
                    180:     perror("iconv_close");
                    181:     exit(1);
                    182:   }
                    183: 
                    184:   if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
                    185:     fprintf(stderr,"I/O error\n");
                    186:     exit(1);
                    187:   }
                    188: 
                    189:   exit(0);
                    190: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>