Annotation of embedaddon/libiconv/tests/table-from.c, revision 1.1.1.1
1.1 misho 1: /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
2: This file is part of the GNU LIBICONV Library.
3:
4: The GNU LIBICONV Library is free software; you can redistribute it
5: and/or modify it under the terms of the GNU Library General Public
6: License as published by the Free Software Foundation; either version 2
7: of the License, or (at your option) any later version.
8:
9: The GNU LIBICONV Library is distributed in the hope that it will be
10: useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12: Library General Public License for more details.
13:
14: You should have received a copy of the GNU Library General Public
15: License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16: If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17: Fifth Floor, Boston, MA 02110-1301, USA. */
18:
19: /* Create a table from CHARSET to Unicode. */
20:
21: #include "config.h"
22:
23: #include <stddef.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <iconv.h>
28: #include <errno.h>
29:
30: #include "binary-io.h"
31:
32: /* If nonzero, ignore conversions outside Unicode plane 0. */
33: static int bmp_only;
34:
35: static const char* hexbuf (unsigned char buf[], unsigned int buflen)
36: {
37: static char msg[50];
38: switch (buflen) {
39: case 1: sprintf(msg,"0x%02X",buf[0]); break;
40: case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
41: case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
42: case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
43: default: abort();
44: }
45: return msg;
46: }
47:
48: static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
49: {
50: const char* inbuf = (const char*) buf;
51: size_t inbytesleft = buflen;
52: char* outbuf = (char*) out;
53: size_t outbytesleft = 3*sizeof(unsigned int);
54: size_t result;
55: iconv(cd,NULL,NULL,NULL,NULL);
56: result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
57: if (result != (size_t)(-1))
58: result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
59: if (result == (size_t)(-1)) {
60: if (errno == EILSEQ) {
61: return -1;
62: } else if (errno == EINVAL) {
63: return 0;
64: } else {
65: int saved_errno = errno;
66: fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
67: errno = saved_errno;
68: perror("");
69: exit(1);
70: }
71: } else if (result > 0) /* ignore conversions with transliteration */ {
72: return -1;
73: } else {
74: if (inbytesleft != 0) {
75: fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
76: exit(1);
77: }
78: return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
79: }
80: }
81:
82: /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
83: static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
84: {
85: static char hexbuf[21];
86: char* p = hexbuf;
87: while (outlen > 0) {
88: if (p > hexbuf)
89: *p++ = ' ';
90: sprintf (p, "0x%04X", out[0]);
91: out += 1; outlen -= 1;
92: if (bmp_only && strlen(p) > 6)
93: return NULL;
94: p += strlen(p);
95: }
96: return hexbuf;
97: }
98:
99: int main (int argc, char* argv[])
100: {
101: const char* charset;
102: iconv_t cd;
103: int search_depth;
104:
105: if (argc != 2) {
106: fprintf(stderr,"Usage: table-from charset\n");
107: exit(1);
108: }
109: charset = argv[1];
110:
111: #if O_BINARY
112: SET_BINARY(fileno(stdout));
113: #endif
114:
115: cd = iconv_open("UCS-4-INTERNAL",charset);
116: if (cd == (iconv_t)(-1)) {
117: perror("iconv_open");
118: exit(1);
119: }
120:
121: /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
122: big. */
123: bmp_only = (strcmp(charset,"UTF-8") == 0);
124: search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
125:
126: {
127: unsigned int out[3];
128: unsigned char buf[4];
129: unsigned int i0, i1, i2, i3;
130: int result;
131: for (i0 = 0; i0 < 0x100; i0++) {
132: buf[0] = i0;
133: result = try(cd,buf,1,out);
134: if (result < 0) {
135: } else if (result > 0) {
136: const char* unicode = ucs4_decode(out,result);
137: if (unicode != NULL)
138: printf("0x%02X\t%s\n",i0,unicode);
139: } else {
140: for (i1 = 0; i1 < 0x100; i1++) {
141: buf[1] = i1;
142: result = try(cd,buf,2,out);
143: if (result < 0) {
144: } else if (result > 0) {
145: const char* unicode = ucs4_decode(out,result);
146: if (unicode != NULL)
147: printf("0x%02X%02X\t%s\n",i0,i1,unicode);
148: } else {
149: for (i2 = 0; i2 < 0x100; i2++) {
150: buf[2] = i2;
151: result = try(cd,buf,3,out);
152: if (result < 0) {
153: } else if (result > 0) {
154: const char* unicode = ucs4_decode(out,result);
155: if (unicode != NULL)
156: printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
157: } else if (search_depth > 3) {
158: for (i3 = 0; i3 < 0x100; i3++) {
159: buf[3] = i3;
160: result = try(cd,buf,4,out);
161: if (result < 0) {
162: } else if (result > 0) {
163: const char* unicode = ucs4_decode(out,result);
164: if (unicode != NULL)
165: printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
166: } else {
167: fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
168: exit(1);
169: }
170: }
171: }
172: }
173: }
174: }
175: }
176: }
177: }
178:
179: if (iconv_close(cd) < 0) {
180: perror("iconv_close");
181: exit(1);
182: }
183:
184: if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
185: fprintf(stderr,"I/O error\n");
186: exit(1);
187: }
188:
189: exit(0);
190: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>