Annotation of embedaddon/libiconv/lib/gentranslit.c, revision 1.1.1.3

1.1.1.3 ! misho       1: /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018 Free Software Foundation, Inc.
1.1       misho       2:    This file is part of the GNU LIBICONV Library.
                      3: 
                      4:    The GNU LIBICONV Library is free software; you can redistribute it
                      5:    and/or modify it under the terms of the GNU Library General Public
                      6:    License as published by the Free Software Foundation; either version 2
                      7:    of the License, or (at your option) any later version.
                      8: 
                      9:    The GNU LIBICONV Library is distributed in the hope that it will be
                     10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
                     11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     12:    Library General Public License for more details.
                     13: 
                     14:    You should have received a copy of the GNU Library General Public
                     15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
1.1.1.3 ! misho      16:    If not, see <https://www.gnu.org/licenses/>.  */
1.1       misho      17: 
                     18: /*
                     19:  * Generates a table of small strings, used for transliteration, from a table
                     20:  * containing lines of the form
                     21:  *   Unicode <tab> utf-8 replacement <tab> # comment
                     22:  */
                     23: 
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <stdbool.h>
                     27: 
                     28: int main (int argc, char *argv[])
                     29: {
1.1.1.3 ! misho      30:   unsigned int *data;
        !            31:   int *uni2index;
1.1       misho      32:   int index;
                     33: 
                     34:   if (argc != 1)
                     35:     exit(1);
                     36: 
1.1.1.3 ! misho      37:   data = malloc(0x100000 * sizeof(*data));
        !            38:   uni2index = malloc(0x110000 * sizeof(*uni2index));
        !            39:   if (data == NULL || uni2index == NULL) {
        !            40:     fprintf(stderr, "out of memory\n");
        !            41:     exit(1);
        !            42:   }
        !            43: 
1.1       misho      44:   printf("/*\n");
                     45:   printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
                     46:   printf(" * This file is part of the GNU LIBICONV Library.\n");
                     47:   printf(" *\n");
                     48:   printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
                     49:   printf(" * and/or modify it under the terms of the GNU Library General Public\n");
                     50:   printf(" * License as published by the Free Software Foundation; either version 2\n");
                     51:   printf(" * of the License, or (at your option) any later version.\n");
                     52:   printf(" *\n");
                     53:   printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
                     54:   printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
                     55:   printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
                     56:   printf(" * Library General Public License for more details.\n");
                     57:   printf(" *\n");
                     58:   printf(" * You should have received a copy of the GNU Library General Public\n");
                     59:   printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
1.1.1.3 ! misho      60:   printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
1.1       misho      61:   printf(" */\n");
                     62:   printf("\n");
                     63:   printf("/*\n");
                     64:   printf(" * Transliteration table\n");
                     65:   printf(" */\n");
                     66:   printf("\n");
                     67:   {
                     68:     int c;
                     69:     int j;
                     70:     for (j = 0; j < 0x110000; j++)
                     71:       uni2index[j] = -1;
                     72:     index = 0;
                     73:     for (;;) {
                     74:       c = getc(stdin);
                     75:       if (c == EOF)
                     76:         break;
                     77:       if (c == '#') {
                     78:         do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
                     79:         continue;
                     80:       }
                     81:       ungetc(c,stdin);
                     82:       if (scanf("%x",&j) != 1)
                     83:         exit(1);
                     84:       c = getc(stdin);
                     85:       if (c != '\t')
                     86:         exit(1);
                     87:       for (;;) {
                     88:         c = getc(stdin);
                     89:         if (c == EOF || c == '\n')
                     90:           exit(1);
                     91:         if (c == '\t')
                     92:           break;
                     93:         if (uni2index[j] < 0) {
                     94:           uni2index[j] = index;
                     95:           data[index++] = 0;
                     96:         }
                     97:         if (c >= 0x80) {
                     98:           /* Finish reading an UTF-8 character. */
                     99:           if (c < 0xc0)
                    100:             exit(1);
                    101:           else {
                    102:             unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
                    103:             c &= (1 << (8-i)) - 1;
                    104:             while (--i > 0) {
                    105:               int cc = getc(stdin);
                    106:               if (!(cc >= 0x80 && cc < 0xc0))
                    107:                 exit(1);
                    108:               c <<= 6; c |= (cc & 0x3f);
                    109:             }
                    110:           }
                    111:         }
                    112:         data[index++] = (unsigned int) c;
                    113:       }
                    114:       if (uni2index[j] >= 0)
                    115:         data[uni2index[j]] = index - uni2index[j] - 1;
                    116:       do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
                    117:     }
                    118:   }
                    119:   printf("static const unsigned int translit_data[%d] = {",index);
                    120:   {
                    121:     int i;
                    122:     for (i = 0; i < index; i++) {
                    123:       if (data[i] < 32)
                    124:         printf("\n %3d,",data[i]);
                    125:       else if (data[i] == '\'')
                    126:         printf("'\\'',");
                    127:       else if (data[i] == '\\')
                    128:         printf("'\\\\',");
                    129:       else if (data[i] < 127)
                    130:         printf(" '%c',",data[i]);
                    131:       else if (data[i] < 256)
                    132:         printf("0x%02X,",data[i]);
                    133:       else
                    134:         printf("0x%04X,",data[i]);
                    135:     }
                    136:     printf("\n};\n");
                    137:   }
                    138:   printf("\n");
                    139:   {
                    140:     int line[0x22000];
                    141:     int tableno;
                    142:     struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
                    143:     int i, j, p, j1, j2, t;
                    144: 
                    145:     for (j1 = 0; j1 < 0x22000; j1++) {
                    146:       bool all_invalid = true;
                    147:       for (j2 = 0; j2 < 8; j2++) {
                    148:         j = 8*j1+j2;
                    149:         if (uni2index[j] >= 0)
                    150:           all_invalid = false;
                    151:       }
                    152:       if (all_invalid)
                    153:         line[j1] = -1;
                    154:       else
                    155:         line[j1] = 0;
                    156:     }
                    157:     tableno = 0;
                    158:     for (j1 = 0; j1 < 0x22000; j1++) {
                    159:       if (line[j1] >= 0) {
                    160:         if (tableno > 0
                    161:             && ((j1 > 0 && line[j1-1] == tableno-1)
                    162:                 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
                    163:                     && j1 - tables[tableno-1].maxline <= 8))) {
                    164:           line[j1] = tableno-1;
                    165:           tables[tableno-1].maxline = j1;
                    166:         } else {
                    167:           tableno++;
                    168:           line[j1] = tableno-1;
                    169:           tables[tableno-1].minline = tables[tableno-1].maxline = j1;
                    170:         }
                    171:       }
                    172:     }
                    173:     for (t = 0; t < tableno; t++) {
                    174:       tables[t].usecount = 0;
                    175:       j1 = 8*tables[t].minline;
                    176:       j2 = 8*(tables[t].maxline+1);
                    177:       for (j = j1; j < j2; j++)
                    178:         if (uni2index[j] >= 0)
                    179:           tables[t].usecount++;
                    180:     }
                    181:     for (t = 0, p = -1, i = 0; t < tableno; t++) {
                    182:       if (tables[t].usecount > 1) {
                    183:         char* s;
                    184:         if (p == tables[t].minline >> 5) {
1.1.1.2   misho     185:           s = (char*) malloc(4+1+2+1);
1.1       misho     186:           sprintf(s, "%02x_%d", p, ++i);
                    187:         } else {
                    188:           p = tables[t].minline >> 5;
1.1.1.2   misho     189:           s = (char*) malloc(4+1);
1.1       misho     190:           sprintf(s, "%02x", p);
                    191:         }
                    192:         tables[t].suffix = s;
                    193:       } else
                    194:         tables[t].suffix = NULL;
                    195:     }
                    196:     {
                    197:       p = -1;
                    198:       for (t = 0; t < tableno; t++)
                    199:         if (tables[t].usecount > 1) {
                    200:           p = 0;
                    201:           printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
                    202:           for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
                    203:             if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
                    204:               printf("  /* 0x%04x */\n", 8*j1);
                    205:             printf(" ");
                    206:             for (j2 = 0; j2 < 8; j2++) {
                    207:               j = 8*j1+j2;
                    208:               printf(" %4d,", uni2index[j]);
                    209:             }
                    210:             printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
                    211:           }
                    212:           printf("};\n");
                    213:         }
                    214:       if (p >= 0)
                    215:         printf("\n");
                    216:     }
                    217:     printf("#define translit_index(wc) \\\n  (");
                    218:     for (j1 = 0; j1 < 0x22000;) {
                    219:       t = line[j1];
                    220:       for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
                    221:       if (t >= 0) {
                    222:         if (j1 != tables[t].minline) abort();
                    223:         if (j2 > tables[t].maxline+1) abort();
                    224:         j2 = tables[t].maxline+1;
                    225:       }
                    226:       if (t == -1) {
                    227:       } else {
                    228:         if (t >= 0 && tables[t].usecount == 0) abort();
                    229:         if (t >= 0 && tables[t].usecount == 1) {
                    230:           if (j2 != j1+1) abort();
                    231:           for (j = 8*j1; j < 8*j2; j++)
                    232:             if (uni2index[j] >= 0) {
                    233:               printf("wc == 0x%04x ? %d", j, uni2index[j]);
                    234:               break;
                    235:             }
                    236:         } else {
                    237:           if (j1 == 0) {
                    238:             printf("wc < 0x%04x", 8*j2);
                    239:           } else {
                    240:             printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
                    241:           }
                    242:           printf(" ? translit_page%s[wc", tables[t].suffix);
                    243:           if (tables[t].minline > 0)
                    244:             printf("-0x%04x", 8*j1);
                    245:           printf("]");
                    246:         }
                    247:         printf(" : \\\n   ");
                    248:       }
                    249:       j1 = j2;
                    250:     }
                    251:     printf("-1)\n");
                    252:   }
                    253: 
                    254:   if (ferror(stdout) || fclose(stdout))
                    255:     exit(1);
                    256:   exit(0);
                    257: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>