Annotation of embedaddon/libiconv/lib/gentranslit.c, revision 1.1.1.1
1.1 misho 1: /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
2: This file is part of the GNU LIBICONV Library.
3:
4: The GNU LIBICONV Library is free software; you can redistribute it
5: and/or modify it under the terms of the GNU Library General Public
6: License as published by the Free Software Foundation; either version 2
7: of the License, or (at your option) any later version.
8:
9: The GNU LIBICONV Library is distributed in the hope that it will be
10: useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12: Library General Public License for more details.
13:
14: You should have received a copy of the GNU Library General Public
15: License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16: If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17: Fifth Floor, Boston, MA 02110-1301, USA. */
18:
19: /*
20: * Generates a table of small strings, used for transliteration, from a table
21: * containing lines of the form
22: * Unicode <tab> utf-8 replacement <tab> # comment
23: */
24:
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <stdbool.h>
28:
29: int main (int argc, char *argv[])
30: {
31: unsigned int data[0x100000];
32: int uni2index[0x110000];
33: int index;
34:
35: if (argc != 1)
36: exit(1);
37:
38: printf("/*\n");
39: printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
40: printf(" * This file is part of the GNU LIBICONV Library.\n");
41: printf(" *\n");
42: printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
43: printf(" * and/or modify it under the terms of the GNU Library General Public\n");
44: printf(" * License as published by the Free Software Foundation; either version 2\n");
45: printf(" * of the License, or (at your option) any later version.\n");
46: printf(" *\n");
47: printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
48: printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
49: printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
50: printf(" * Library General Public License for more details.\n");
51: printf(" *\n");
52: printf(" * You should have received a copy of the GNU Library General Public\n");
53: printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
54: printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
55: printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
56: printf(" */\n");
57: printf("\n");
58: printf("/*\n");
59: printf(" * Transliteration table\n");
60: printf(" */\n");
61: printf("\n");
62: {
63: int c;
64: int j;
65: for (j = 0; j < 0x110000; j++)
66: uni2index[j] = -1;
67: index = 0;
68: for (;;) {
69: c = getc(stdin);
70: if (c == EOF)
71: break;
72: if (c == '#') {
73: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
74: continue;
75: }
76: ungetc(c,stdin);
77: if (scanf("%x",&j) != 1)
78: exit(1);
79: c = getc(stdin);
80: if (c != '\t')
81: exit(1);
82: for (;;) {
83: c = getc(stdin);
84: if (c == EOF || c == '\n')
85: exit(1);
86: if (c == '\t')
87: break;
88: if (uni2index[j] < 0) {
89: uni2index[j] = index;
90: data[index++] = 0;
91: }
92: if (c >= 0x80) {
93: /* Finish reading an UTF-8 character. */
94: if (c < 0xc0)
95: exit(1);
96: else {
97: unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
98: c &= (1 << (8-i)) - 1;
99: while (--i > 0) {
100: int cc = getc(stdin);
101: if (!(cc >= 0x80 && cc < 0xc0))
102: exit(1);
103: c <<= 6; c |= (cc & 0x3f);
104: }
105: }
106: }
107: data[index++] = (unsigned int) c;
108: }
109: if (uni2index[j] >= 0)
110: data[uni2index[j]] = index - uni2index[j] - 1;
111: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
112: }
113: }
114: printf("static const unsigned int translit_data[%d] = {",index);
115: {
116: int i;
117: for (i = 0; i < index; i++) {
118: if (data[i] < 32)
119: printf("\n %3d,",data[i]);
120: else if (data[i] == '\'')
121: printf("'\\'',");
122: else if (data[i] == '\\')
123: printf("'\\\\',");
124: else if (data[i] < 127)
125: printf(" '%c',",data[i]);
126: else if (data[i] < 256)
127: printf("0x%02X,",data[i]);
128: else
129: printf("0x%04X,",data[i]);
130: }
131: printf("\n};\n");
132: }
133: printf("\n");
134: {
135: bool pages[0x1100];
136: int line[0x22000];
137: int tableno;
138: struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
139: int i, j, p, j1, j2, t;
140:
141: for (p = 0; p < 0x1100; p++)
142: pages[p] = false;
143: for (j = 0; j < 0x110000; j++)
144: if (uni2index[j] >= 0)
145: pages[j>>8] = true;
146: for (j1 = 0; j1 < 0x22000; j1++) {
147: bool all_invalid = true;
148: for (j2 = 0; j2 < 8; j2++) {
149: j = 8*j1+j2;
150: if (uni2index[j] >= 0)
151: all_invalid = false;
152: }
153: if (all_invalid)
154: line[j1] = -1;
155: else
156: line[j1] = 0;
157: }
158: tableno = 0;
159: for (j1 = 0; j1 < 0x22000; j1++) {
160: if (line[j1] >= 0) {
161: if (tableno > 0
162: && ((j1 > 0 && line[j1-1] == tableno-1)
163: || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
164: && j1 - tables[tableno-1].maxline <= 8))) {
165: line[j1] = tableno-1;
166: tables[tableno-1].maxline = j1;
167: } else {
168: tableno++;
169: line[j1] = tableno-1;
170: tables[tableno-1].minline = tables[tableno-1].maxline = j1;
171: }
172: }
173: }
174: for (t = 0; t < tableno; t++) {
175: tables[t].usecount = 0;
176: j1 = 8*tables[t].minline;
177: j2 = 8*(tables[t].maxline+1);
178: for (j = j1; j < j2; j++)
179: if (uni2index[j] >= 0)
180: tables[t].usecount++;
181: }
182: for (t = 0, p = -1, i = 0; t < tableno; t++) {
183: if (tables[t].usecount > 1) {
184: char* s;
185: if (p == tables[t].minline >> 5) {
186: s = (char*) malloc(5+1);
187: sprintf(s, "%02x_%d", p, ++i);
188: } else {
189: p = tables[t].minline >> 5;
190: s = (char*) malloc(2+1);
191: sprintf(s, "%02x", p);
192: }
193: tables[t].suffix = s;
194: } else
195: tables[t].suffix = NULL;
196: }
197: {
198: p = -1;
199: for (t = 0; t < tableno; t++)
200: if (tables[t].usecount > 1) {
201: p = 0;
202: printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
203: for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
204: if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
205: printf(" /* 0x%04x */\n", 8*j1);
206: printf(" ");
207: for (j2 = 0; j2 < 8; j2++) {
208: j = 8*j1+j2;
209: printf(" %4d,", uni2index[j]);
210: }
211: printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
212: }
213: printf("};\n");
214: }
215: if (p >= 0)
216: printf("\n");
217: }
218: printf("#define translit_index(wc) \\\n (");
219: for (j1 = 0; j1 < 0x22000;) {
220: t = line[j1];
221: for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
222: if (t >= 0) {
223: if (j1 != tables[t].minline) abort();
224: if (j2 > tables[t].maxline+1) abort();
225: j2 = tables[t].maxline+1;
226: }
227: if (t == -1) {
228: } else {
229: if (t >= 0 && tables[t].usecount == 0) abort();
230: if (t >= 0 && tables[t].usecount == 1) {
231: if (j2 != j1+1) abort();
232: for (j = 8*j1; j < 8*j2; j++)
233: if (uni2index[j] >= 0) {
234: printf("wc == 0x%04x ? %d", j, uni2index[j]);
235: break;
236: }
237: } else {
238: if (j1 == 0) {
239: printf("wc < 0x%04x", 8*j2);
240: } else {
241: printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
242: }
243: printf(" ? translit_page%s[wc", tables[t].suffix);
244: if (tables[t].minline > 0)
245: printf("-0x%04x", 8*j1);
246: printf("]");
247: }
248: printf(" : \\\n ");
249: }
250: j1 = j2;
251: }
252: printf("-1)\n");
253: }
254:
255: if (ferror(stdout) || fclose(stdout))
256: exit(1);
257: exit(0);
258: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>