Annotation of embedaddon/libiconv/lib/gentranslit.c, revision 1.1

1.1     ! misho       1: /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
        !             2:    This file is part of the GNU LIBICONV Library.
        !             3: 
        !             4:    The GNU LIBICONV Library is free software; you can redistribute it
        !             5:    and/or modify it under the terms of the GNU Library General Public
        !             6:    License as published by the Free Software Foundation; either version 2
        !             7:    of the License, or (at your option) any later version.
        !             8: 
        !             9:    The GNU LIBICONV Library is distributed in the hope that it will be
        !            10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            12:    Library General Public License for more details.
        !            13: 
        !            14:    You should have received a copy of the GNU Library General Public
        !            15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
        !            16:    If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
        !            17:    Fifth Floor, Boston, MA 02110-1301, USA.  */
        !            18: 
        !            19: /*
        !            20:  * Generates a table of small strings, used for transliteration, from a table
        !            21:  * containing lines of the form
        !            22:  *   Unicode <tab> utf-8 replacement <tab> # comment
        !            23:  */
        !            24: 
        !            25: #include <stdio.h>
        !            26: #include <stdlib.h>
        !            27: #include <stdbool.h>
        !            28: 
        !            29: int main (int argc, char *argv[])
        !            30: {
        !            31:   unsigned int data[0x100000];
        !            32:   int uni2index[0x110000];
        !            33:   int index;
        !            34: 
        !            35:   if (argc != 1)
        !            36:     exit(1);
        !            37: 
        !            38:   printf("/*\n");
        !            39:   printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
        !            40:   printf(" * This file is part of the GNU LIBICONV Library.\n");
        !            41:   printf(" *\n");
        !            42:   printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
        !            43:   printf(" * and/or modify it under the terms of the GNU Library General Public\n");
        !            44:   printf(" * License as published by the Free Software Foundation; either version 2\n");
        !            45:   printf(" * of the License, or (at your option) any later version.\n");
        !            46:   printf(" *\n");
        !            47:   printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
        !            48:   printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
        !            49:   printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
        !            50:   printf(" * Library General Public License for more details.\n");
        !            51:   printf(" *\n");
        !            52:   printf(" * You should have received a copy of the GNU Library General Public\n");
        !            53:   printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
        !            54:   printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
        !            55:   printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
        !            56:   printf(" */\n");
        !            57:   printf("\n");
        !            58:   printf("/*\n");
        !            59:   printf(" * Transliteration table\n");
        !            60:   printf(" */\n");
        !            61:   printf("\n");
        !            62:   {
        !            63:     int c;
        !            64:     int j;
        !            65:     for (j = 0; j < 0x110000; j++)
        !            66:       uni2index[j] = -1;
        !            67:     index = 0;
        !            68:     for (;;) {
        !            69:       c = getc(stdin);
        !            70:       if (c == EOF)
        !            71:         break;
        !            72:       if (c == '#') {
        !            73:         do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
        !            74:         continue;
        !            75:       }
        !            76:       ungetc(c,stdin);
        !            77:       if (scanf("%x",&j) != 1)
        !            78:         exit(1);
        !            79:       c = getc(stdin);
        !            80:       if (c != '\t')
        !            81:         exit(1);
        !            82:       for (;;) {
        !            83:         c = getc(stdin);
        !            84:         if (c == EOF || c == '\n')
        !            85:           exit(1);
        !            86:         if (c == '\t')
        !            87:           break;
        !            88:         if (uni2index[j] < 0) {
        !            89:           uni2index[j] = index;
        !            90:           data[index++] = 0;
        !            91:         }
        !            92:         if (c >= 0x80) {
        !            93:           /* Finish reading an UTF-8 character. */
        !            94:           if (c < 0xc0)
        !            95:             exit(1);
        !            96:           else {
        !            97:             unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
        !            98:             c &= (1 << (8-i)) - 1;
        !            99:             while (--i > 0) {
        !           100:               int cc = getc(stdin);
        !           101:               if (!(cc >= 0x80 && cc < 0xc0))
        !           102:                 exit(1);
        !           103:               c <<= 6; c |= (cc & 0x3f);
        !           104:             }
        !           105:           }
        !           106:         }
        !           107:         data[index++] = (unsigned int) c;
        !           108:       }
        !           109:       if (uni2index[j] >= 0)
        !           110:         data[uni2index[j]] = index - uni2index[j] - 1;
        !           111:       do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
        !           112:     }
        !           113:   }
        !           114:   printf("static const unsigned int translit_data[%d] = {",index);
        !           115:   {
        !           116:     int i;
        !           117:     for (i = 0; i < index; i++) {
        !           118:       if (data[i] < 32)
        !           119:         printf("\n %3d,",data[i]);
        !           120:       else if (data[i] == '\'')
        !           121:         printf("'\\'',");
        !           122:       else if (data[i] == '\\')
        !           123:         printf("'\\\\',");
        !           124:       else if (data[i] < 127)
        !           125:         printf(" '%c',",data[i]);
        !           126:       else if (data[i] < 256)
        !           127:         printf("0x%02X,",data[i]);
        !           128:       else
        !           129:         printf("0x%04X,",data[i]);
        !           130:     }
        !           131:     printf("\n};\n");
        !           132:   }
        !           133:   printf("\n");
        !           134:   {
        !           135:     bool pages[0x1100];
        !           136:     int line[0x22000];
        !           137:     int tableno;
        !           138:     struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
        !           139:     int i, j, p, j1, j2, t;
        !           140: 
        !           141:     for (p = 0; p < 0x1100; p++)
        !           142:       pages[p] = false;
        !           143:     for (j = 0; j < 0x110000; j++)
        !           144:       if (uni2index[j] >= 0)
        !           145:         pages[j>>8] = true;
        !           146:     for (j1 = 0; j1 < 0x22000; j1++) {
        !           147:       bool all_invalid = true;
        !           148:       for (j2 = 0; j2 < 8; j2++) {
        !           149:         j = 8*j1+j2;
        !           150:         if (uni2index[j] >= 0)
        !           151:           all_invalid = false;
        !           152:       }
        !           153:       if (all_invalid)
        !           154:         line[j1] = -1;
        !           155:       else
        !           156:         line[j1] = 0;
        !           157:     }
        !           158:     tableno = 0;
        !           159:     for (j1 = 0; j1 < 0x22000; j1++) {
        !           160:       if (line[j1] >= 0) {
        !           161:         if (tableno > 0
        !           162:             && ((j1 > 0 && line[j1-1] == tableno-1)
        !           163:                 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
        !           164:                     && j1 - tables[tableno-1].maxline <= 8))) {
        !           165:           line[j1] = tableno-1;
        !           166:           tables[tableno-1].maxline = j1;
        !           167:         } else {
        !           168:           tableno++;
        !           169:           line[j1] = tableno-1;
        !           170:           tables[tableno-1].minline = tables[tableno-1].maxline = j1;
        !           171:         }
        !           172:       }
        !           173:     }
        !           174:     for (t = 0; t < tableno; t++) {
        !           175:       tables[t].usecount = 0;
        !           176:       j1 = 8*tables[t].minline;
        !           177:       j2 = 8*(tables[t].maxline+1);
        !           178:       for (j = j1; j < j2; j++)
        !           179:         if (uni2index[j] >= 0)
        !           180:           tables[t].usecount++;
        !           181:     }
        !           182:     for (t = 0, p = -1, i = 0; t < tableno; t++) {
        !           183:       if (tables[t].usecount > 1) {
        !           184:         char* s;
        !           185:         if (p == tables[t].minline >> 5) {
        !           186:           s = (char*) malloc(5+1);
        !           187:           sprintf(s, "%02x_%d", p, ++i);
        !           188:         } else {
        !           189:           p = tables[t].minline >> 5;
        !           190:           s = (char*) malloc(2+1);
        !           191:           sprintf(s, "%02x", p);
        !           192:         }
        !           193:         tables[t].suffix = s;
        !           194:       } else
        !           195:         tables[t].suffix = NULL;
        !           196:     }
        !           197:     {
        !           198:       p = -1;
        !           199:       for (t = 0; t < tableno; t++)
        !           200:         if (tables[t].usecount > 1) {
        !           201:           p = 0;
        !           202:           printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
        !           203:           for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
        !           204:             if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
        !           205:               printf("  /* 0x%04x */\n", 8*j1);
        !           206:             printf(" ");
        !           207:             for (j2 = 0; j2 < 8; j2++) {
        !           208:               j = 8*j1+j2;
        !           209:               printf(" %4d,", uni2index[j]);
        !           210:             }
        !           211:             printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
        !           212:           }
        !           213:           printf("};\n");
        !           214:         }
        !           215:       if (p >= 0)
        !           216:         printf("\n");
        !           217:     }
        !           218:     printf("#define translit_index(wc) \\\n  (");
        !           219:     for (j1 = 0; j1 < 0x22000;) {
        !           220:       t = line[j1];
        !           221:       for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
        !           222:       if (t >= 0) {
        !           223:         if (j1 != tables[t].minline) abort();
        !           224:         if (j2 > tables[t].maxline+1) abort();
        !           225:         j2 = tables[t].maxline+1;
        !           226:       }
        !           227:       if (t == -1) {
        !           228:       } else {
        !           229:         if (t >= 0 && tables[t].usecount == 0) abort();
        !           230:         if (t >= 0 && tables[t].usecount == 1) {
        !           231:           if (j2 != j1+1) abort();
        !           232:           for (j = 8*j1; j < 8*j2; j++)
        !           233:             if (uni2index[j] >= 0) {
        !           234:               printf("wc == 0x%04x ? %d", j, uni2index[j]);
        !           235:               break;
        !           236:             }
        !           237:         } else {
        !           238:           if (j1 == 0) {
        !           239:             printf("wc < 0x%04x", 8*j2);
        !           240:           } else {
        !           241:             printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
        !           242:           }
        !           243:           printf(" ? translit_page%s[wc", tables[t].suffix);
        !           244:           if (tables[t].minline > 0)
        !           245:             printf("-0x%04x", 8*j1);
        !           246:           printf("]");
        !           247:         }
        !           248:         printf(" : \\\n   ");
        !           249:       }
        !           250:       j1 = j2;
        !           251:     }
        !           252:     printf("-1)\n");
        !           253:   }
        !           254: 
        !           255:   if (ferror(stdout) || fclose(stdout))
        !           256:     exit(1);
        !           257:   exit(0);
        !           258: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>