Annotation of embedaddon/libiconv/tests/table-from.c, revision 1.1
1.1 ! misho 1: /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
! 2: This file is part of the GNU LIBICONV Library.
! 3:
! 4: The GNU LIBICONV Library is free software; you can redistribute it
! 5: and/or modify it under the terms of the GNU Library General Public
! 6: License as published by the Free Software Foundation; either version 2
! 7: of the License, or (at your option) any later version.
! 8:
! 9: The GNU LIBICONV Library is distributed in the hope that it will be
! 10: useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 12: Library General Public License for more details.
! 13:
! 14: You should have received a copy of the GNU Library General Public
! 15: License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 16: If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 17: Fifth Floor, Boston, MA 02110-1301, USA. */
! 18:
! 19: /* Create a table from CHARSET to Unicode. */
! 20:
! 21: #include "config.h"
! 22:
! 23: #include <stddef.h>
! 24: #include <stdio.h>
! 25: #include <stdlib.h>
! 26: #include <string.h>
! 27: #include <iconv.h>
! 28: #include <errno.h>
! 29:
! 30: #include "binary-io.h"
! 31:
! 32: /* If nonzero, ignore conversions outside Unicode plane 0. */
! 33: static int bmp_only;
! 34:
! 35: static const char* hexbuf (unsigned char buf[], unsigned int buflen)
! 36: {
! 37: static char msg[50];
! 38: switch (buflen) {
! 39: case 1: sprintf(msg,"0x%02X",buf[0]); break;
! 40: case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
! 41: case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
! 42: case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
! 43: default: abort();
! 44: }
! 45: return msg;
! 46: }
! 47:
! 48: static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
! 49: {
! 50: const char* inbuf = (const char*) buf;
! 51: size_t inbytesleft = buflen;
! 52: char* outbuf = (char*) out;
! 53: size_t outbytesleft = 3*sizeof(unsigned int);
! 54: size_t result;
! 55: iconv(cd,NULL,NULL,NULL,NULL);
! 56: result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
! 57: if (result != (size_t)(-1))
! 58: result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
! 59: if (result == (size_t)(-1)) {
! 60: if (errno == EILSEQ) {
! 61: return -1;
! 62: } else if (errno == EINVAL) {
! 63: return 0;
! 64: } else {
! 65: int saved_errno = errno;
! 66: fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
! 67: errno = saved_errno;
! 68: perror("");
! 69: exit(1);
! 70: }
! 71: } else if (result > 0) /* ignore conversions with transliteration */ {
! 72: return -1;
! 73: } else {
! 74: if (inbytesleft != 0) {
! 75: fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
! 76: exit(1);
! 77: }
! 78: return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
! 79: }
! 80: }
! 81:
! 82: /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
! 83: static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
! 84: {
! 85: static char hexbuf[21];
! 86: char* p = hexbuf;
! 87: while (outlen > 0) {
! 88: if (p > hexbuf)
! 89: *p++ = ' ';
! 90: sprintf (p, "0x%04X", out[0]);
! 91: out += 1; outlen -= 1;
! 92: if (bmp_only && strlen(p) > 6)
! 93: return NULL;
! 94: p += strlen(p);
! 95: }
! 96: return hexbuf;
! 97: }
! 98:
! 99: int main (int argc, char* argv[])
! 100: {
! 101: const char* charset;
! 102: iconv_t cd;
! 103: int search_depth;
! 104:
! 105: if (argc != 2) {
! 106: fprintf(stderr,"Usage: table-from charset\n");
! 107: exit(1);
! 108: }
! 109: charset = argv[1];
! 110:
! 111: #if O_BINARY
! 112: SET_BINARY(fileno(stdout));
! 113: #endif
! 114:
! 115: cd = iconv_open("UCS-4-INTERNAL",charset);
! 116: if (cd == (iconv_t)(-1)) {
! 117: perror("iconv_open");
! 118: exit(1);
! 119: }
! 120:
! 121: /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
! 122: big. */
! 123: bmp_only = (strcmp(charset,"UTF-8") == 0);
! 124: search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
! 125:
! 126: {
! 127: unsigned int out[3];
! 128: unsigned char buf[4];
! 129: unsigned int i0, i1, i2, i3;
! 130: int result;
! 131: for (i0 = 0; i0 < 0x100; i0++) {
! 132: buf[0] = i0;
! 133: result = try(cd,buf,1,out);
! 134: if (result < 0) {
! 135: } else if (result > 0) {
! 136: const char* unicode = ucs4_decode(out,result);
! 137: if (unicode != NULL)
! 138: printf("0x%02X\t%s\n",i0,unicode);
! 139: } else {
! 140: for (i1 = 0; i1 < 0x100; i1++) {
! 141: buf[1] = i1;
! 142: result = try(cd,buf,2,out);
! 143: if (result < 0) {
! 144: } else if (result > 0) {
! 145: const char* unicode = ucs4_decode(out,result);
! 146: if (unicode != NULL)
! 147: printf("0x%02X%02X\t%s\n",i0,i1,unicode);
! 148: } else {
! 149: for (i2 = 0; i2 < 0x100; i2++) {
! 150: buf[2] = i2;
! 151: result = try(cd,buf,3,out);
! 152: if (result < 0) {
! 153: } else if (result > 0) {
! 154: const char* unicode = ucs4_decode(out,result);
! 155: if (unicode != NULL)
! 156: printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
! 157: } else if (search_depth > 3) {
! 158: for (i3 = 0; i3 < 0x100; i3++) {
! 159: buf[3] = i3;
! 160: result = try(cd,buf,4,out);
! 161: if (result < 0) {
! 162: } else if (result > 0) {
! 163: const char* unicode = ucs4_decode(out,result);
! 164: if (unicode != NULL)
! 165: printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
! 166: } else {
! 167: fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
! 168: exit(1);
! 169: }
! 170: }
! 171: }
! 172: }
! 173: }
! 174: }
! 175: }
! 176: }
! 177: }
! 178:
! 179: if (iconv_close(cd) < 0) {
! 180: perror("iconv_close");
! 181: exit(1);
! 182: }
! 183:
! 184: if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
! 185: fprintf(stderr,"I/O error\n");
! 186: exit(1);
! 187: }
! 188:
! 189: exit(0);
! 190: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>