Annotation of embedaddon/libiconv/lib/gentranslit.c, revision 1.1.1.3
1.1.1.3 ! misho 1: /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018 Free Software Foundation, Inc.
1.1 misho 2: This file is part of the GNU LIBICONV Library.
3:
4: The GNU LIBICONV Library is free software; you can redistribute it
5: and/or modify it under the terms of the GNU Library General Public
6: License as published by the Free Software Foundation; either version 2
7: of the License, or (at your option) any later version.
8:
9: The GNU LIBICONV Library is distributed in the hope that it will be
10: useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12: Library General Public License for more details.
13:
14: You should have received a copy of the GNU Library General Public
15: License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho 16: If not, see <https://www.gnu.org/licenses/>. */
1.1 misho 17:
18: /*
19: * Generates a table of small strings, used for transliteration, from a table
20: * containing lines of the form
21: * Unicode <tab> utf-8 replacement <tab> # comment
22: */
23:
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <stdbool.h>
27:
28: int main (int argc, char *argv[])
29: {
1.1.1.3 ! misho 30: unsigned int *data;
! 31: int *uni2index;
1.1 misho 32: int index;
33:
34: if (argc != 1)
35: exit(1);
36:
1.1.1.3 ! misho 37: data = malloc(0x100000 * sizeof(*data));
! 38: uni2index = malloc(0x110000 * sizeof(*uni2index));
! 39: if (data == NULL || uni2index == NULL) {
! 40: fprintf(stderr, "out of memory\n");
! 41: exit(1);
! 42: }
! 43:
1.1 misho 44: printf("/*\n");
45: printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
46: printf(" * This file is part of the GNU LIBICONV Library.\n");
47: printf(" *\n");
48: printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
49: printf(" * and/or modify it under the terms of the GNU Library General Public\n");
50: printf(" * License as published by the Free Software Foundation; either version 2\n");
51: printf(" * of the License, or (at your option) any later version.\n");
52: printf(" *\n");
53: printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
54: printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
55: printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
56: printf(" * Library General Public License for more details.\n");
57: printf(" *\n");
58: printf(" * You should have received a copy of the GNU Library General Public\n");
59: printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
1.1.1.3 ! misho 60: printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
1.1 misho 61: printf(" */\n");
62: printf("\n");
63: printf("/*\n");
64: printf(" * Transliteration table\n");
65: printf(" */\n");
66: printf("\n");
67: {
68: int c;
69: int j;
70: for (j = 0; j < 0x110000; j++)
71: uni2index[j] = -1;
72: index = 0;
73: for (;;) {
74: c = getc(stdin);
75: if (c == EOF)
76: break;
77: if (c == '#') {
78: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
79: continue;
80: }
81: ungetc(c,stdin);
82: if (scanf("%x",&j) != 1)
83: exit(1);
84: c = getc(stdin);
85: if (c != '\t')
86: exit(1);
87: for (;;) {
88: c = getc(stdin);
89: if (c == EOF || c == '\n')
90: exit(1);
91: if (c == '\t')
92: break;
93: if (uni2index[j] < 0) {
94: uni2index[j] = index;
95: data[index++] = 0;
96: }
97: if (c >= 0x80) {
98: /* Finish reading an UTF-8 character. */
99: if (c < 0xc0)
100: exit(1);
101: else {
102: unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
103: c &= (1 << (8-i)) - 1;
104: while (--i > 0) {
105: int cc = getc(stdin);
106: if (!(cc >= 0x80 && cc < 0xc0))
107: exit(1);
108: c <<= 6; c |= (cc & 0x3f);
109: }
110: }
111: }
112: data[index++] = (unsigned int) c;
113: }
114: if (uni2index[j] >= 0)
115: data[uni2index[j]] = index - uni2index[j] - 1;
116: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
117: }
118: }
119: printf("static const unsigned int translit_data[%d] = {",index);
120: {
121: int i;
122: for (i = 0; i < index; i++) {
123: if (data[i] < 32)
124: printf("\n %3d,",data[i]);
125: else if (data[i] == '\'')
126: printf("'\\'',");
127: else if (data[i] == '\\')
128: printf("'\\\\',");
129: else if (data[i] < 127)
130: printf(" '%c',",data[i]);
131: else if (data[i] < 256)
132: printf("0x%02X,",data[i]);
133: else
134: printf("0x%04X,",data[i]);
135: }
136: printf("\n};\n");
137: }
138: printf("\n");
139: {
140: int line[0x22000];
141: int tableno;
142: struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
143: int i, j, p, j1, j2, t;
144:
145: for (j1 = 0; j1 < 0x22000; j1++) {
146: bool all_invalid = true;
147: for (j2 = 0; j2 < 8; j2++) {
148: j = 8*j1+j2;
149: if (uni2index[j] >= 0)
150: all_invalid = false;
151: }
152: if (all_invalid)
153: line[j1] = -1;
154: else
155: line[j1] = 0;
156: }
157: tableno = 0;
158: for (j1 = 0; j1 < 0x22000; j1++) {
159: if (line[j1] >= 0) {
160: if (tableno > 0
161: && ((j1 > 0 && line[j1-1] == tableno-1)
162: || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
163: && j1 - tables[tableno-1].maxline <= 8))) {
164: line[j1] = tableno-1;
165: tables[tableno-1].maxline = j1;
166: } else {
167: tableno++;
168: line[j1] = tableno-1;
169: tables[tableno-1].minline = tables[tableno-1].maxline = j1;
170: }
171: }
172: }
173: for (t = 0; t < tableno; t++) {
174: tables[t].usecount = 0;
175: j1 = 8*tables[t].minline;
176: j2 = 8*(tables[t].maxline+1);
177: for (j = j1; j < j2; j++)
178: if (uni2index[j] >= 0)
179: tables[t].usecount++;
180: }
181: for (t = 0, p = -1, i = 0; t < tableno; t++) {
182: if (tables[t].usecount > 1) {
183: char* s;
184: if (p == tables[t].minline >> 5) {
1.1.1.2 misho 185: s = (char*) malloc(4+1+2+1);
1.1 misho 186: sprintf(s, "%02x_%d", p, ++i);
187: } else {
188: p = tables[t].minline >> 5;
1.1.1.2 misho 189: s = (char*) malloc(4+1);
1.1 misho 190: sprintf(s, "%02x", p);
191: }
192: tables[t].suffix = s;
193: } else
194: tables[t].suffix = NULL;
195: }
196: {
197: p = -1;
198: for (t = 0; t < tableno; t++)
199: if (tables[t].usecount > 1) {
200: p = 0;
201: printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
202: for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
203: if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
204: printf(" /* 0x%04x */\n", 8*j1);
205: printf(" ");
206: for (j2 = 0; j2 < 8; j2++) {
207: j = 8*j1+j2;
208: printf(" %4d,", uni2index[j]);
209: }
210: printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
211: }
212: printf("};\n");
213: }
214: if (p >= 0)
215: printf("\n");
216: }
217: printf("#define translit_index(wc) \\\n (");
218: for (j1 = 0; j1 < 0x22000;) {
219: t = line[j1];
220: for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
221: if (t >= 0) {
222: if (j1 != tables[t].minline) abort();
223: if (j2 > tables[t].maxline+1) abort();
224: j2 = tables[t].maxline+1;
225: }
226: if (t == -1) {
227: } else {
228: if (t >= 0 && tables[t].usecount == 0) abort();
229: if (t >= 0 && tables[t].usecount == 1) {
230: if (j2 != j1+1) abort();
231: for (j = 8*j1; j < 8*j2; j++)
232: if (uni2index[j] >= 0) {
233: printf("wc == 0x%04x ? %d", j, uni2index[j]);
234: break;
235: }
236: } else {
237: if (j1 == 0) {
238: printf("wc < 0x%04x", 8*j2);
239: } else {
240: printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
241: }
242: printf(" ? translit_page%s[wc", tables[t].suffix);
243: if (tables[t].minline > 0)
244: printf("-0x%04x", 8*j1);
245: printf("]");
246: }
247: printf(" : \\\n ");
248: }
249: j1 = j2;
250: }
251: printf("-1)\n");
252: }
253:
254: if (ferror(stdout) || fclose(stdout))
255: exit(1);
256: exit(0);
257: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>