Annotation of embedaddon/libiconv/lib/gentranslit.c, revision 1.1
1.1 ! misho 1: /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
! 2: This file is part of the GNU LIBICONV Library.
! 3:
! 4: The GNU LIBICONV Library is free software; you can redistribute it
! 5: and/or modify it under the terms of the GNU Library General Public
! 6: License as published by the Free Software Foundation; either version 2
! 7: of the License, or (at your option) any later version.
! 8:
! 9: The GNU LIBICONV Library is distributed in the hope that it will be
! 10: useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 12: Library General Public License for more details.
! 13:
! 14: You should have received a copy of the GNU Library General Public
! 15: License along with the GNU LIBICONV Library; see the file COPYING.LIB.
! 16: If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
! 17: Fifth Floor, Boston, MA 02110-1301, USA. */
! 18:
! 19: /*
! 20: * Generates a table of small strings, used for transliteration, from a table
! 21: * containing lines of the form
! 22: * Unicode <tab> utf-8 replacement <tab> # comment
! 23: */
! 24:
! 25: #include <stdio.h>
! 26: #include <stdlib.h>
! 27: #include <stdbool.h>
! 28:
! 29: int main (int argc, char *argv[])
! 30: {
! 31: unsigned int data[0x100000];
! 32: int uni2index[0x110000];
! 33: int index;
! 34:
! 35: if (argc != 1)
! 36: exit(1);
! 37:
! 38: printf("/*\n");
! 39: printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
! 40: printf(" * This file is part of the GNU LIBICONV Library.\n");
! 41: printf(" *\n");
! 42: printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
! 43: printf(" * and/or modify it under the terms of the GNU Library General Public\n");
! 44: printf(" * License as published by the Free Software Foundation; either version 2\n");
! 45: printf(" * of the License, or (at your option) any later version.\n");
! 46: printf(" *\n");
! 47: printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
! 48: printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
! 49: printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
! 50: printf(" * Library General Public License for more details.\n");
! 51: printf(" *\n");
! 52: printf(" * You should have received a copy of the GNU Library General Public\n");
! 53: printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
! 54: printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
! 55: printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
! 56: printf(" */\n");
! 57: printf("\n");
! 58: printf("/*\n");
! 59: printf(" * Transliteration table\n");
! 60: printf(" */\n");
! 61: printf("\n");
! 62: {
! 63: int c;
! 64: int j;
! 65: for (j = 0; j < 0x110000; j++)
! 66: uni2index[j] = -1;
! 67: index = 0;
! 68: for (;;) {
! 69: c = getc(stdin);
! 70: if (c == EOF)
! 71: break;
! 72: if (c == '#') {
! 73: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
! 74: continue;
! 75: }
! 76: ungetc(c,stdin);
! 77: if (scanf("%x",&j) != 1)
! 78: exit(1);
! 79: c = getc(stdin);
! 80: if (c != '\t')
! 81: exit(1);
! 82: for (;;) {
! 83: c = getc(stdin);
! 84: if (c == EOF || c == '\n')
! 85: exit(1);
! 86: if (c == '\t')
! 87: break;
! 88: if (uni2index[j] < 0) {
! 89: uni2index[j] = index;
! 90: data[index++] = 0;
! 91: }
! 92: if (c >= 0x80) {
! 93: /* Finish reading an UTF-8 character. */
! 94: if (c < 0xc0)
! 95: exit(1);
! 96: else {
! 97: unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
! 98: c &= (1 << (8-i)) - 1;
! 99: while (--i > 0) {
! 100: int cc = getc(stdin);
! 101: if (!(cc >= 0x80 && cc < 0xc0))
! 102: exit(1);
! 103: c <<= 6; c |= (cc & 0x3f);
! 104: }
! 105: }
! 106: }
! 107: data[index++] = (unsigned int) c;
! 108: }
! 109: if (uni2index[j] >= 0)
! 110: data[uni2index[j]] = index - uni2index[j] - 1;
! 111: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
! 112: }
! 113: }
! 114: printf("static const unsigned int translit_data[%d] = {",index);
! 115: {
! 116: int i;
! 117: for (i = 0; i < index; i++) {
! 118: if (data[i] < 32)
! 119: printf("\n %3d,",data[i]);
! 120: else if (data[i] == '\'')
! 121: printf("'\\'',");
! 122: else if (data[i] == '\\')
! 123: printf("'\\\\',");
! 124: else if (data[i] < 127)
! 125: printf(" '%c',",data[i]);
! 126: else if (data[i] < 256)
! 127: printf("0x%02X,",data[i]);
! 128: else
! 129: printf("0x%04X,",data[i]);
! 130: }
! 131: printf("\n};\n");
! 132: }
! 133: printf("\n");
! 134: {
! 135: bool pages[0x1100];
! 136: int line[0x22000];
! 137: int tableno;
! 138: struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
! 139: int i, j, p, j1, j2, t;
! 140:
! 141: for (p = 0; p < 0x1100; p++)
! 142: pages[p] = false;
! 143: for (j = 0; j < 0x110000; j++)
! 144: if (uni2index[j] >= 0)
! 145: pages[j>>8] = true;
! 146: for (j1 = 0; j1 < 0x22000; j1++) {
! 147: bool all_invalid = true;
! 148: for (j2 = 0; j2 < 8; j2++) {
! 149: j = 8*j1+j2;
! 150: if (uni2index[j] >= 0)
! 151: all_invalid = false;
! 152: }
! 153: if (all_invalid)
! 154: line[j1] = -1;
! 155: else
! 156: line[j1] = 0;
! 157: }
! 158: tableno = 0;
! 159: for (j1 = 0; j1 < 0x22000; j1++) {
! 160: if (line[j1] >= 0) {
! 161: if (tableno > 0
! 162: && ((j1 > 0 && line[j1-1] == tableno-1)
! 163: || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
! 164: && j1 - tables[tableno-1].maxline <= 8))) {
! 165: line[j1] = tableno-1;
! 166: tables[tableno-1].maxline = j1;
! 167: } else {
! 168: tableno++;
! 169: line[j1] = tableno-1;
! 170: tables[tableno-1].minline = tables[tableno-1].maxline = j1;
! 171: }
! 172: }
! 173: }
! 174: for (t = 0; t < tableno; t++) {
! 175: tables[t].usecount = 0;
! 176: j1 = 8*tables[t].minline;
! 177: j2 = 8*(tables[t].maxline+1);
! 178: for (j = j1; j < j2; j++)
! 179: if (uni2index[j] >= 0)
! 180: tables[t].usecount++;
! 181: }
! 182: for (t = 0, p = -1, i = 0; t < tableno; t++) {
! 183: if (tables[t].usecount > 1) {
! 184: char* s;
! 185: if (p == tables[t].minline >> 5) {
! 186: s = (char*) malloc(5+1);
! 187: sprintf(s, "%02x_%d", p, ++i);
! 188: } else {
! 189: p = tables[t].minline >> 5;
! 190: s = (char*) malloc(2+1);
! 191: sprintf(s, "%02x", p);
! 192: }
! 193: tables[t].suffix = s;
! 194: } else
! 195: tables[t].suffix = NULL;
! 196: }
! 197: {
! 198: p = -1;
! 199: for (t = 0; t < tableno; t++)
! 200: if (tables[t].usecount > 1) {
! 201: p = 0;
! 202: printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
! 203: for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
! 204: if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
! 205: printf(" /* 0x%04x */\n", 8*j1);
! 206: printf(" ");
! 207: for (j2 = 0; j2 < 8; j2++) {
! 208: j = 8*j1+j2;
! 209: printf(" %4d,", uni2index[j]);
! 210: }
! 211: printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
! 212: }
! 213: printf("};\n");
! 214: }
! 215: if (p >= 0)
! 216: printf("\n");
! 217: }
! 218: printf("#define translit_index(wc) \\\n (");
! 219: for (j1 = 0; j1 < 0x22000;) {
! 220: t = line[j1];
! 221: for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
! 222: if (t >= 0) {
! 223: if (j1 != tables[t].minline) abort();
! 224: if (j2 > tables[t].maxline+1) abort();
! 225: j2 = tables[t].maxline+1;
! 226: }
! 227: if (t == -1) {
! 228: } else {
! 229: if (t >= 0 && tables[t].usecount == 0) abort();
! 230: if (t >= 0 && tables[t].usecount == 1) {
! 231: if (j2 != j1+1) abort();
! 232: for (j = 8*j1; j < 8*j2; j++)
! 233: if (uni2index[j] >= 0) {
! 234: printf("wc == 0x%04x ? %d", j, uni2index[j]);
! 235: break;
! 236: }
! 237: } else {
! 238: if (j1 == 0) {
! 239: printf("wc < 0x%04x", 8*j2);
! 240: } else {
! 241: printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
! 242: }
! 243: printf(" ? translit_page%s[wc", tables[t].suffix);
! 244: if (tables[t].minline > 0)
! 245: printf("-0x%04x", 8*j1);
! 246: printf("]");
! 247: }
! 248: printf(" : \\\n ");
! 249: }
! 250: j1 = j2;
! 251: }
! 252: printf("-1)\n");
! 253: }
! 254:
! 255: if (ferror(stdout) || fclose(stdout))
! 256: exit(1);
! 257: exit(0);
! 258: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>