File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libiconv / lib / gentranslit.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 13:38:46 2021 UTC (3 years, 3 months ago) by misho
Branches: libiconv, MAIN
CVS tags: v1_16p0, HEAD
libiconv 1.16

    1: /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018 Free Software Foundation, Inc.
    2:    This file is part of the GNU LIBICONV Library.
    3: 
    4:    The GNU LIBICONV Library is free software; you can redistribute it
    5:    and/or modify it under the terms of the GNU Library General Public
    6:    License as published by the Free Software Foundation; either version 2
    7:    of the License, or (at your option) any later version.
    8: 
    9:    The GNU LIBICONV Library is distributed in the hope that it will be
   10:    useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   11:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   12:    Library General Public License for more details.
   13: 
   14:    You should have received a copy of the GNU Library General Public
   15:    License along with the GNU LIBICONV Library; see the file COPYING.LIB.
   16:    If not, see <https://www.gnu.org/licenses/>.  */
   17: 
   18: /*
   19:  * Generates a table of small strings, used for transliteration, from a table
   20:  * containing lines of the form
   21:  *   Unicode <tab> utf-8 replacement <tab> # comment
   22:  */
   23: 
   24: #include <stdio.h>
   25: #include <stdlib.h>
   26: #include <stdbool.h>
   27: 
   28: int main (int argc, char *argv[])
   29: {
   30:   unsigned int *data;
   31:   int *uni2index;
   32:   int index;
   33: 
   34:   if (argc != 1)
   35:     exit(1);
   36: 
   37:   data = malloc(0x100000 * sizeof(*data));
   38:   uni2index = malloc(0x110000 * sizeof(*uni2index));
   39:   if (data == NULL || uni2index == NULL) {
   40:     fprintf(stderr, "out of memory\n");
   41:     exit(1);
   42:   }
   43: 
   44:   printf("/*\n");
   45:   printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
   46:   printf(" * This file is part of the GNU LIBICONV Library.\n");
   47:   printf(" *\n");
   48:   printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
   49:   printf(" * and/or modify it under the terms of the GNU Library General Public\n");
   50:   printf(" * License as published by the Free Software Foundation; either version 2\n");
   51:   printf(" * of the License, or (at your option) any later version.\n");
   52:   printf(" *\n");
   53:   printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
   54:   printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
   55:   printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
   56:   printf(" * Library General Public License for more details.\n");
   57:   printf(" *\n");
   58:   printf(" * You should have received a copy of the GNU Library General Public\n");
   59:   printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
   60:   printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
   61:   printf(" */\n");
   62:   printf("\n");
   63:   printf("/*\n");
   64:   printf(" * Transliteration table\n");
   65:   printf(" */\n");
   66:   printf("\n");
   67:   {
   68:     int c;
   69:     int j;
   70:     for (j = 0; j < 0x110000; j++)
   71:       uni2index[j] = -1;
   72:     index = 0;
   73:     for (;;) {
   74:       c = getc(stdin);
   75:       if (c == EOF)
   76:         break;
   77:       if (c == '#') {
   78:         do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
   79:         continue;
   80:       }
   81:       ungetc(c,stdin);
   82:       if (scanf("%x",&j) != 1)
   83:         exit(1);
   84:       c = getc(stdin);
   85:       if (c != '\t')
   86:         exit(1);
   87:       for (;;) {
   88:         c = getc(stdin);
   89:         if (c == EOF || c == '\n')
   90:           exit(1);
   91:         if (c == '\t')
   92:           break;
   93:         if (uni2index[j] < 0) {
   94:           uni2index[j] = index;
   95:           data[index++] = 0;
   96:         }
   97:         if (c >= 0x80) {
   98:           /* Finish reading an UTF-8 character. */
   99:           if (c < 0xc0)
  100:             exit(1);
  101:           else {
  102:             unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
  103:             c &= (1 << (8-i)) - 1;
  104:             while (--i > 0) {
  105:               int cc = getc(stdin);
  106:               if (!(cc >= 0x80 && cc < 0xc0))
  107:                 exit(1);
  108:               c <<= 6; c |= (cc & 0x3f);
  109:             }
  110:           }
  111:         }
  112:         data[index++] = (unsigned int) c;
  113:       }
  114:       if (uni2index[j] >= 0)
  115:         data[uni2index[j]] = index - uni2index[j] - 1;
  116:       do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
  117:     }
  118:   }
  119:   printf("static const unsigned int translit_data[%d] = {",index);
  120:   {
  121:     int i;
  122:     for (i = 0; i < index; i++) {
  123:       if (data[i] < 32)
  124:         printf("\n %3d,",data[i]);
  125:       else if (data[i] == '\'')
  126:         printf("'\\'',");
  127:       else if (data[i] == '\\')
  128:         printf("'\\\\',");
  129:       else if (data[i] < 127)
  130:         printf(" '%c',",data[i]);
  131:       else if (data[i] < 256)
  132:         printf("0x%02X,",data[i]);
  133:       else
  134:         printf("0x%04X,",data[i]);
  135:     }
  136:     printf("\n};\n");
  137:   }
  138:   printf("\n");
  139:   {
  140:     int line[0x22000];
  141:     int tableno;
  142:     struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
  143:     int i, j, p, j1, j2, t;
  144: 
  145:     for (j1 = 0; j1 < 0x22000; j1++) {
  146:       bool all_invalid = true;
  147:       for (j2 = 0; j2 < 8; j2++) {
  148:         j = 8*j1+j2;
  149:         if (uni2index[j] >= 0)
  150:           all_invalid = false;
  151:       }
  152:       if (all_invalid)
  153:         line[j1] = -1;
  154:       else
  155:         line[j1] = 0;
  156:     }
  157:     tableno = 0;
  158:     for (j1 = 0; j1 < 0x22000; j1++) {
  159:       if (line[j1] >= 0) {
  160:         if (tableno > 0
  161:             && ((j1 > 0 && line[j1-1] == tableno-1)
  162:                 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
  163:                     && j1 - tables[tableno-1].maxline <= 8))) {
  164:           line[j1] = tableno-1;
  165:           tables[tableno-1].maxline = j1;
  166:         } else {
  167:           tableno++;
  168:           line[j1] = tableno-1;
  169:           tables[tableno-1].minline = tables[tableno-1].maxline = j1;
  170:         }
  171:       }
  172:     }
  173:     for (t = 0; t < tableno; t++) {
  174:       tables[t].usecount = 0;
  175:       j1 = 8*tables[t].minline;
  176:       j2 = 8*(tables[t].maxline+1);
  177:       for (j = j1; j < j2; j++)
  178:         if (uni2index[j] >= 0)
  179:           tables[t].usecount++;
  180:     }
  181:     for (t = 0, p = -1, i = 0; t < tableno; t++) {
  182:       if (tables[t].usecount > 1) {
  183:         char* s;
  184:         if (p == tables[t].minline >> 5) {
  185:           s = (char*) malloc(4+1+2+1);
  186:           sprintf(s, "%02x_%d", p, ++i);
  187:         } else {
  188:           p = tables[t].minline >> 5;
  189:           s = (char*) malloc(4+1);
  190:           sprintf(s, "%02x", p);
  191:         }
  192:         tables[t].suffix = s;
  193:       } else
  194:         tables[t].suffix = NULL;
  195:     }
  196:     {
  197:       p = -1;
  198:       for (t = 0; t < tableno; t++)
  199:         if (tables[t].usecount > 1) {
  200:           p = 0;
  201:           printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
  202:           for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
  203:             if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
  204:               printf("  /* 0x%04x */\n", 8*j1);
  205:             printf(" ");
  206:             for (j2 = 0; j2 < 8; j2++) {
  207:               j = 8*j1+j2;
  208:               printf(" %4d,", uni2index[j]);
  209:             }
  210:             printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
  211:           }
  212:           printf("};\n");
  213:         }
  214:       if (p >= 0)
  215:         printf("\n");
  216:     }
  217:     printf("#define translit_index(wc) \\\n  (");
  218:     for (j1 = 0; j1 < 0x22000;) {
  219:       t = line[j1];
  220:       for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
  221:       if (t >= 0) {
  222:         if (j1 != tables[t].minline) abort();
  223:         if (j2 > tables[t].maxline+1) abort();
  224:         j2 = tables[t].maxline+1;
  225:       }
  226:       if (t == -1) {
  227:       } else {
  228:         if (t >= 0 && tables[t].usecount == 0) abort();
  229:         if (t >= 0 && tables[t].usecount == 1) {
  230:           if (j2 != j1+1) abort();
  231:           for (j = 8*j1; j < 8*j2; j++)
  232:             if (uni2index[j] >= 0) {
  233:               printf("wc == 0x%04x ? %d", j, uni2index[j]);
  234:               break;
  235:             }
  236:         } else {
  237:           if (j1 == 0) {
  238:             printf("wc < 0x%04x", 8*j2);
  239:           } else {
  240:             printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
  241:           }
  242:           printf(" ? translit_page%s[wc", tables[t].suffix);
  243:           if (tables[t].minline > 0)
  244:             printf("-0x%04x", 8*j1);
  245:           printf("]");
  246:         }
  247:         printf(" : \\\n   ");
  248:       }
  249:       j1 = j2;
  250:     }
  251:     printf("-1)\n");
  252:   }
  253: 
  254:   if (ferror(stdout) || fclose(stdout))
  255:     exit(1);
  256:   exit(0);
  257: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>