Annotation of embedaddon/libiconv/tools/cjk_tab_to_h.c, revision 1.1
1.1 ! misho 1: /* Copyright (C) 1999-2004, 2006-2007 Free Software Foundation, Inc.
! 2: This file is part of the GNU LIBICONV Tools.
! 3:
! 4: This program is free software: you can redistribute it and/or modify
! 5: it under the terms of the GNU General Public License as published by
! 6: the Free Software Foundation; either version 3 of the License, or
! 7: (at your option) any later version.
! 8:
! 9: This program is distributed in the hope that it will be useful,
! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of
! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 12: GNU General Public License for more details.
! 13:
! 14: You should have received a copy of the GNU General Public License
! 15: along with this program; if not, write to the Free Software Foundation,
! 16: Inc., along with this program. If not, see <http://www.gnu.org/licenses/>. */
! 17:
! 18: /*
! 19: * Generates a CJK character set table from a .TXT table as found on
! 20: * ftp.unicode.org or in the X nls directory.
! 21: * Examples:
! 22: *
! 23: * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
! 24: * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
! 25: * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
! 26: *
! 27: * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
! 28: * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
! 29: * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
! 30: * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
! 31: * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
! 32: *
! 33: * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
! 34: *
! 35: * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
! 36: *
! 37: * ./cjk_tab_to_h JISX0213:2004 jisx0213 > jisx0213.h < JISX0213.TXT
! 38: */
! 39:
! 40: #include <stdio.h>
! 41: #include <stdlib.h>
! 42: #include <stdbool.h>
! 43: #include <string.h>
! 44: #include <ctype.h>
! 45: #include <assert.h>
! 46:
! 47: typedef struct {
! 48: int start;
! 49: int end;
! 50: } Block;
! 51:
! 52: typedef struct {
! 53: int rows; /* number of possible values for the 1st byte */
! 54: int cols; /* number of possible values for the 2nd byte */
! 55: int (*row_byte) (int row); /* returns the 1st byte value for a given row */
! 56: int (*col_byte) (int col); /* returns the 2nd byte value for a given col */
! 57: int (*byte_row) (int byte); /* converts a 1st byte value to a row, else -1 */
! 58: int (*byte_col) (int byte); /* converts a 2nd byte value to a col, else -1 */
! 59: const char* check_row_expr; /* format string for 1st byte value checking */
! 60: const char* check_col_expr; /* format string for 2nd byte value checking */
! 61: const char* byte_row_expr; /* format string for 1st byte value to row */
! 62: const char* byte_col_expr; /* format string for 2nd byte value to col */
! 63: int** charset2uni; /* charset2uni[0..rows-1][0..cols-1] is valid */
! 64: /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
! 65: Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
! 66: int* charsetpage; /* charsetpage[0..rows]: how large is a page for a row */
! 67: int ncharsetblocks;
! 68: Block* charsetblocks; /* blocks[0..nblocks-1] */
! 69: int* uni2charset; /* uni2charset[0x0000..0xffff] */
! 70: int fffd; /* uni representation of the invalid character */
! 71: } Encoding;
! 72:
! 73: /*
! 74: * Outputs the file title.
! 75: */
! 76: static void output_title (const char *charsetname)
! 77: {
! 78: printf("/*\n");
! 79: printf(" * Copyright (C) 1999-2007 Free Software Foundation, Inc.\n");
! 80: printf(" * This file is part of the GNU LIBICONV Library.\n");
! 81: printf(" *\n");
! 82: printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
! 83: printf(" * and/or modify it under the terms of the GNU Library General Public\n");
! 84: printf(" * License as published by the Free Software Foundation; either version 2\n");
! 85: printf(" * of the License, or (at your option) any later version.\n");
! 86: printf(" *\n");
! 87: printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
! 88: printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
! 89: printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
! 90: printf(" * Library General Public License for more details.\n");
! 91: printf(" *\n");
! 92: printf(" * You should have received a copy of the GNU Library General Public\n");
! 93: printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
! 94: printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
! 95: printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
! 96: printf(" */\n");
! 97: printf("\n");
! 98: printf("/*\n");
! 99: printf(" * %s\n", charsetname);
! 100: printf(" */\n");
! 101: printf("\n");
! 102: }
! 103:
! 104: /*
! 105: * Reads the charset2uni table from standard input.
! 106: */
! 107: static void read_table (Encoding* enc)
! 108: {
! 109: int row, col, i, i1, i2, c, j;
! 110:
! 111: enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
! 112: for (row = 0; row < enc->rows; row++)
! 113: enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
! 114:
! 115: for (row = 0; row < enc->rows; row++)
! 116: for (col = 0; col < enc->cols; col++)
! 117: enc->charset2uni[row][col] = 0xfffd;
! 118:
! 119: c = getc(stdin);
! 120: ungetc(c,stdin);
! 121: if (c == '#') {
! 122: /* Read a unicode.org style .TXT file. */
! 123: for (;;) {
! 124: c = getc(stdin);
! 125: if (c == EOF)
! 126: break;
! 127: if (c == '\n' || c == ' ' || c == '\t')
! 128: continue;
! 129: if (c == '#') {
! 130: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
! 131: continue;
! 132: }
! 133: ungetc(c,stdin);
! 134: if (scanf("0x%x", &j) != 1)
! 135: exit(1);
! 136: i1 = j >> 8;
! 137: i2 = j & 0xff;
! 138: row = enc->byte_row(i1);
! 139: col = enc->byte_col(i2);
! 140: if (row < 0 || col < 0) {
! 141: fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
! 142: exit(1);
! 143: }
! 144: if (scanf(" 0x%x", &enc->charset2uni[row][col]) != 1)
! 145: exit(1);
! 146: }
! 147: } else {
! 148: /* Read a table of hexadecimal Unicode values. */
! 149: for (i1 = 32; i1 < 132; i1++)
! 150: for (i2 = 32; i2 < 132; i2++) {
! 151: i = scanf("%x", &j);
! 152: if (i == EOF)
! 153: goto read_done;
! 154: if (i != 1)
! 155: exit(1);
! 156: if (j < 0 || j == 0xffff)
! 157: j = 0xfffd;
! 158: if (j != 0xfffd) {
! 159: if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
! 160: fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
! 161: exit (1);
! 162: }
! 163: enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
! 164: }
! 165: }
! 166: read_done: ;
! 167: }
! 168: }
! 169:
! 170: /*
! 171: * Determine whether the Unicode range goes outside the BMP.
! 172: */
! 173: static bool is_charset2uni_large (Encoding* enc)
! 174: {
! 175: int row, col;
! 176:
! 177: for (row = 0; row < enc->rows; row++)
! 178: for (col = 0; col < enc->cols; col++)
! 179: if (enc->charset2uni[row][col] >= 0x10000)
! 180: return true;
! 181: return false;
! 182: }
! 183:
! 184: /*
! 185: * Compactify the Unicode range by use of an auxiliary table,
! 186: * so 16 bits suffice to store each value.
! 187: */
! 188: static int compact_large_charset2uni (Encoding* enc, unsigned int **urows, unsigned int *urowshift)
! 189: {
! 190: unsigned int shift;
! 191:
! 192: for (shift = 8; ; shift--) {
! 193: int *upages = (int *) malloc((0x110000>>shift) * sizeof(int));
! 194: int i, row, col, nurows;
! 195:
! 196: for (i = 0; i < 0x110000>>shift; i++)
! 197: upages[i] = -1;
! 198:
! 199: for (row = 0; row < enc->rows; row++)
! 200: for (col = 0; col < enc->cols; col++)
! 201: upages[enc->charset2uni[row][col] >> shift] = 0;
! 202:
! 203: nurows = 0;
! 204: for (i = 0; i < 0x110000>>shift; i++)
! 205: if (upages[i] == 0)
! 206: nurows++;
! 207:
! 208: /* We want all table entries to fit in an 'unsigned short'. */
! 209: if (nurows <= 1<<(16-shift)) {
! 210: int** old_charset2uni;
! 211:
! 212: *urows = (unsigned int *) malloc(nurows * sizeof(unsigned int));
! 213: *urowshift = shift;
! 214:
! 215: nurows = 0;
! 216: for (i = 0; i < 0x110000>>shift; i++)
! 217: if (upages[i] == 0) {
! 218: upages[i] = nurows;
! 219: (*urows)[nurows] = i;
! 220: nurows++;
! 221: }
! 222:
! 223: old_charset2uni = enc->charset2uni;
! 224: enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
! 225: for (row = 0; row < enc->rows; row++)
! 226: enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
! 227: for (row = 0; row < enc->rows; row++)
! 228: for (col = 0; col < enc->cols; col++) {
! 229: int u = old_charset2uni[row][col];
! 230: enc->charset2uni[row][col] =
! 231: (upages[u >> shift] << shift) | (u & ((1 << shift) - 1));
! 232: }
! 233: enc->fffd =
! 234: (upages[0xfffd >> shift] << shift) | (0xfffd & ((1 << shift) - 1));
! 235:
! 236: return nurows;
! 237: }
! 238: }
! 239: abort();
! 240: }
! 241:
! 242: /*
! 243: * Computes the charsetpage[0..rows] array.
! 244: */
! 245: static void find_charset2uni_pages (Encoding* enc)
! 246: {
! 247: int row, col;
! 248:
! 249: enc->charsetpage = (int*) malloc((enc->rows+1)*sizeof(int));
! 250:
! 251: for (row = 0; row <= enc->rows; row++)
! 252: enc->charsetpage[row] = 0;
! 253:
! 254: for (row = 0; row < enc->rows; row++) {
! 255: int used = 0;
! 256: for (col = 0; col < enc->cols; col++)
! 257: if (enc->charset2uni[row][col] != enc->fffd)
! 258: used = col+1;
! 259: enc->charsetpage[row] = used;
! 260: }
! 261: }
! 262:
! 263: /*
! 264: * Fills in nblocks and blocks.
! 265: */
! 266: static void find_charset2uni_blocks (Encoding* enc)
! 267: {
! 268: int n, row, lastrow;
! 269:
! 270: enc->charsetblocks = (Block*) malloc(enc->rows*sizeof(Block));
! 271:
! 272: n = 0;
! 273: for (row = 0; row < enc->rows; row++)
! 274: if (enc->charsetpage[row] > 0 && (row == 0 || enc->charsetpage[row-1] == 0)) {
! 275: for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
! 276: enc->charsetblocks[n].start = row * enc->cols;
! 277: enc->charsetblocks[n].end = lastrow * enc->cols + enc->charsetpage[lastrow];
! 278: n++;
! 279: }
! 280: enc->ncharsetblocks = n;
! 281: }
! 282:
! 283: /*
! 284: * Outputs the charset to unicode table and function.
! 285: */
! 286: static void output_charset2uni (const char* name, Encoding* enc)
! 287: {
! 288: int nurows, row, col, lastrow, col_max, i, i1_min, i1_max;
! 289: bool is_large;
! 290: unsigned int* urows;
! 291: unsigned int urowshift;
! 292: Encoding tmpenc;
! 293:
! 294: is_large = is_charset2uni_large(enc);
! 295: if (is_large) {
! 296: /* Use a temporary copy of enc. */
! 297: tmpenc = *enc;
! 298: enc = &tmpenc;
! 299: nurows = compact_large_charset2uni(enc,&urows,&urowshift);
! 300: } else {
! 301: nurows = 0; urows = NULL; urowshift = 0; enc->fffd = 0xfffd;
! 302: }
! 303:
! 304: find_charset2uni_pages(enc);
! 305:
! 306: find_charset2uni_blocks(enc);
! 307:
! 308: for (row = 0; row < enc->rows; row++)
! 309: if (enc->charsetpage[row] > 0) {
! 310: if (row == 0 || enc->charsetpage[row-1] == 0) {
! 311: /* Start a new block. */
! 312: for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
! 313: printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
! 314: name, enc->row_byte(row),
! 315: (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
! 316: }
! 317: printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
! 318: col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
! 319: for (col = 0; col < col_max; col++) {
! 320: printf(" 0x%04x,", enc->charset2uni[row][col]);
! 321: if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
! 322: }
! 323: printf("\n");
! 324: if (enc->charsetpage[row+1] == 0) {
! 325: /* End a block. */
! 326: printf("};\n");
! 327: }
! 328: }
! 329: printf("\n");
! 330:
! 331: if (is_large) {
! 332: printf("static const ucs4_t %s_2uni_upages[%d] = {\n ", name, nurows);
! 333: for (i = 0; i < nurows; i++) {
! 334: printf(" 0x%05x,", urows[i] << urowshift);
! 335: if ((i % 8) == 7 && (i+1 < nurows)) printf("\n ");
! 336: }
! 337: printf("\n");
! 338: printf("};\n");
! 339: printf("\n");
! 340: }
! 341:
! 342: printf("static int\n");
! 343: printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
! 344: printf("{\n");
! 345: printf(" unsigned char c1 = s[0];\n");
! 346: printf(" if (");
! 347: for (i = 0; i < enc->ncharsetblocks; i++) {
! 348: i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
! 349: i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
! 350: if (i > 0)
! 351: printf(" || ");
! 352: if (i1_min == i1_max)
! 353: printf("(c1 == 0x%02x)", i1_min);
! 354: else
! 355: printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
! 356: }
! 357: printf(") {\n");
! 358: printf(" if (n >= 2) {\n");
! 359: printf(" unsigned char c2 = s[1];\n");
! 360: printf(" if (");
! 361: printf(enc->check_col_expr, "c2");
! 362: printf(") {\n");
! 363: printf(" unsigned int i = %d * (", enc->cols);
! 364: printf(enc->byte_row_expr, "c1");
! 365: printf(") + (");
! 366: printf(enc->byte_col_expr, "c2");
! 367: printf(");\n");
! 368: printf(" %s wc = 0xfffd;\n", is_large ? "ucs4_t" : "unsigned short");
! 369: if (is_large) printf(" unsigned short swc;\n");
! 370: for (i = 0; i < enc->ncharsetblocks; i++) {
! 371: printf(" ");
! 372: if (i > 0)
! 373: printf("} else ");
! 374: if (i < enc->ncharsetblocks-1)
! 375: printf("if (i < %d) ", enc->charsetblocks[i+1].start);
! 376: printf("{\n");
! 377: printf(" if (i < %d)\n", enc->charsetblocks[i].end);
! 378: printf(" %s = ", is_large ? "swc" : "wc");
! 379: printf("%s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
! 380: if (enc->charsetblocks[i].start > 0)
! 381: printf("-%d", enc->charsetblocks[i].start);
! 382: printf("]");
! 383: if (is_large) printf(",\n wc = %s_2uni_upages[swc>>%d] | (swc & 0x%x)", name, urowshift, (1 << urowshift) - 1);
! 384: printf(";\n");
! 385: }
! 386: printf(" }\n");
! 387: printf(" if (wc != 0xfffd) {\n");
! 388: printf(" *pwc = %swc;\n", is_large ? "" : "(ucs4_t) ");
! 389: printf(" return 2;\n");
! 390: printf(" }\n");
! 391: printf(" }\n");
! 392: printf(" return RET_ILSEQ;\n");
! 393: printf(" }\n");
! 394: printf(" return RET_TOOFEW(0);\n");
! 395: printf(" }\n");
! 396: printf(" return RET_ILSEQ;\n");
! 397: printf("}\n");
! 398: printf("\n");
! 399: }
! 400:
! 401: /*
! 402: * Outputs the charset to unicode table and function.
! 403: * (Suitable if the mapping function is well defined, i.e. has no holes, and
! 404: * is monotonically increasing with small gaps only.)
! 405: */
! 406: static void output_charset2uni_noholes_monotonic (const char* name, Encoding* enc)
! 407: {
! 408: int row, col, lastrow, r, col_max, i, i1_min, i1_max;
! 409:
! 410: /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
! 411: enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
! 412: is always < 0x100. */
! 413: int steps_per_row = 2;
! 414: int stepsize = (enc->cols + steps_per_row-1) / steps_per_row;
! 415:
! 416: find_charset2uni_pages(enc);
! 417:
! 418: find_charset2uni_blocks(enc);
! 419:
! 420: for (row = 0; row < enc->rows; row++)
! 421: if (enc->charsetpage[row] > 0) {
! 422: if (row == 0 || enc->charsetpage[row-1] == 0) {
! 423: /* Start a new block. */
! 424: for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
! 425: printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
! 426: name, enc->row_byte(row),
! 427: steps_per_row*(lastrow-row+1));
! 428: for (r = row; r <= lastrow; r++) {
! 429: for (i = 0; i < steps_per_row; i++)
! 430: printf(" 0x%04x,", enc->charset2uni[r][i*stepsize]);
! 431: if (((r-row) % 4) == 3 && (r < lastrow)) printf("\n ");
! 432: }
! 433: printf("\n");
! 434: printf("};\n");
! 435: printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
! 436: name, enc->row_byte(row),
! 437: (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
! 438: }
! 439: printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
! 440: col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
! 441: for (col = 0; col < col_max; col++) {
! 442: printf(" 0x%02x,", enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]);
! 443: if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
! 444: }
! 445: printf("\n");
! 446: if (enc->charsetpage[row+1] == 0) {
! 447: /* End a block. */
! 448: printf("};\n");
! 449: }
! 450: }
! 451: printf("\n");
! 452:
! 453: printf("static int\n");
! 454: printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
! 455: printf("{\n");
! 456: printf(" unsigned char c1 = s[0];\n");
! 457: printf(" if (");
! 458: for (i = 0; i < enc->ncharsetblocks; i++) {
! 459: i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
! 460: i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
! 461: if (i > 0)
! 462: printf(" || ");
! 463: if (i1_min == i1_max)
! 464: printf("(c1 == 0x%02x)", i1_min);
! 465: else
! 466: printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
! 467: }
! 468: printf(") {\n");
! 469: printf(" if (n >= 2) {\n");
! 470: printf(" unsigned char c2 = s[1];\n");
! 471: printf(" if (");
! 472: printf(enc->check_col_expr, "c2");
! 473: printf(") {\n");
! 474: printf(" unsigned int row = ");
! 475: printf(enc->byte_row_expr, "c1");
! 476: printf(";\n");
! 477: printf(" unsigned int col = ");
! 478: printf(enc->byte_col_expr, "c2");
! 479: printf(";\n");
! 480: printf(" unsigned int i = %d * row + col;\n", enc->cols);
! 481: printf(" unsigned short wc = 0xfffd;\n");
! 482: for (i = 0; i < enc->ncharsetblocks; i++) {
! 483: printf(" ");
! 484: if (i > 0)
! 485: printf("} else ");
! 486: if (i < enc->ncharsetblocks-1)
! 487: printf("if (i < %d) ", enc->charsetblocks[i+1].start);
! 488: printf("{\n");
! 489: printf(" if (i < %d)\n", enc->charsetblocks[i].end);
! 490: printf(" wc = %s_2uni_main_page%02x[%d*", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols), steps_per_row);
! 491: if (enc->charsetblocks[i].start > 0)
! 492: printf("(row-%d)", enc->charsetblocks[i].start / enc->cols);
! 493: else
! 494: printf("row");
! 495: printf("+");
! 496: if (steps_per_row == 2)
! 497: printf("(col>=%d?1:0)", stepsize);
! 498: else
! 499: printf("col/%d", stepsize);
! 500: printf("] + %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
! 501: if (enc->charsetblocks[i].start > 0)
! 502: printf("-%d", enc->charsetblocks[i].start);
! 503: printf("];\n");
! 504: }
! 505: printf(" }\n");
! 506: printf(" if (wc != 0xfffd) {\n");
! 507: printf(" *pwc = (ucs4_t) wc;\n");
! 508: printf(" return 2;\n");
! 509: printf(" }\n");
! 510: printf(" }\n");
! 511: printf(" return RET_ILSEQ;\n");
! 512: printf(" }\n");
! 513: printf(" return RET_TOOFEW(0);\n");
! 514: printf(" }\n");
! 515: printf(" return RET_ILSEQ;\n");
! 516: printf("}\n");
! 517: printf("\n");
! 518: }
! 519:
! 520: /*
! 521: * Computes the uni2charset[0x0000..0x2ffff] array.
! 522: */
! 523: static void invert (Encoding* enc)
! 524: {
! 525: int row, col, j;
! 526:
! 527: enc->uni2charset = (int*) malloc(0x30000*sizeof(int));
! 528:
! 529: for (j = 0; j < 0x30000; j++)
! 530: enc->uni2charset[j] = 0;
! 531:
! 532: for (row = 0; row < enc->rows; row++)
! 533: for (col = 0; col < enc->cols; col++) {
! 534: j = enc->charset2uni[row][col];
! 535: if (j != 0xfffd)
! 536: enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col);
! 537: }
! 538: }
! 539:
! 540: /*
! 541: * Outputs the unicode to charset table and function, using a linear array.
! 542: * (Suitable if the table is dense.)
! 543: */
! 544: static void output_uni2charset_dense (const char* name, Encoding* enc)
! 545: {
! 546: /* Like in 8bit_tab_to_h.c */
! 547: bool pages[0x300];
! 548: int line[0x6000];
! 549: int tableno;
! 550: struct { int minline; int maxline; int usecount; } tables[0x6000];
! 551: bool first;
! 552: int row, col, j, p, j1, j2, t;
! 553:
! 554: for (p = 0; p < 0x300; p++)
! 555: pages[p] = false;
! 556: for (row = 0; row < enc->rows; row++)
! 557: for (col = 0; col < enc->cols; col++) {
! 558: j = enc->charset2uni[row][col];
! 559: if (j != 0xfffd)
! 560: pages[j>>8] = true;
! 561: }
! 562: for (j1 = 0; j1 < 0x6000; j1++) {
! 563: bool all_invalid = true;
! 564: for (j2 = 0; j2 < 8; j2++) {
! 565: j = 8*j1+j2;
! 566: if (enc->uni2charset[j] != 0)
! 567: all_invalid = false;
! 568: }
! 569: if (all_invalid)
! 570: line[j1] = -1;
! 571: else
! 572: line[j1] = 0;
! 573: }
! 574: tableno = 0;
! 575: for (j1 = 0; j1 < 0x6000; j1++) {
! 576: if (line[j1] >= 0) {
! 577: if (tableno > 0
! 578: && ((j1 > 0 && line[j1-1] == tableno-1)
! 579: || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
! 580: && j1 - tables[tableno-1].maxline <= 8))) {
! 581: line[j1] = tableno-1;
! 582: tables[tableno-1].maxline = j1;
! 583: } else {
! 584: tableno++;
! 585: line[j1] = tableno-1;
! 586: tables[tableno-1].minline = tables[tableno-1].maxline = j1;
! 587: }
! 588: }
! 589: }
! 590: for (t = 0; t < tableno; t++) {
! 591: tables[t].usecount = 0;
! 592: j1 = 8*tables[t].minline;
! 593: j2 = 8*(tables[t].maxline+1);
! 594: for (j = j1; j < j2; j++)
! 595: if (enc->uni2charset[j] != 0)
! 596: tables[t].usecount++;
! 597: }
! 598: {
! 599: p = -1;
! 600: for (t = 0; t < tableno; t++)
! 601: if (tables[t].usecount > 1) {
! 602: p = tables[t].minline >> 5;
! 603: printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1));
! 604: for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
! 605: if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
! 606: printf(" /* 0x%04x */\n", 8*j1);
! 607: printf(" ");
! 608: for (j2 = 0; j2 < 8; j2++) {
! 609: j = 8*j1+j2;
! 610: printf(" 0x%04x,", enc->uni2charset[j]);
! 611: }
! 612: printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
! 613: }
! 614: printf("};\n");
! 615: }
! 616: if (p >= 0)
! 617: printf("\n");
! 618: }
! 619: printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
! 620: printf("{\n");
! 621: printf(" if (n >= 2) {\n");
! 622: printf(" unsigned short c = 0;\n");
! 623: first = true;
! 624: for (j1 = 0; j1 < 0x6000;) {
! 625: t = line[j1];
! 626: for (j2 = j1; j2 < 0x6000 && line[j2] == t; j2++);
! 627: if (t >= 0) {
! 628: if (j1 != tables[t].minline) abort();
! 629: if (j2 > tables[t].maxline+1) abort();
! 630: j2 = tables[t].maxline+1;
! 631: if (first)
! 632: printf(" ");
! 633: else
! 634: printf(" else ");
! 635: first = false;
! 636: if (tables[t].usecount == 0) abort();
! 637: if (tables[t].usecount == 1) {
! 638: if (j2 != j1+1) abort();
! 639: for (j = 8*j1; j < 8*j2; j++)
! 640: if (enc->uni2charset[j] != 0) {
! 641: printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j, enc->uni2charset[j]);
! 642: break;
! 643: }
! 644: } else {
! 645: if (j1 == 0) {
! 646: printf("if (wc < 0x%04x)", 8*j2);
! 647: } else {
! 648: printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
! 649: }
! 650: printf("\n c = %s_page%02x[wc", name, j1 >> 5);
! 651: if (tables[t].minline > 0)
! 652: printf("-0x%04x", 8*j1);
! 653: printf("];\n");
! 654: }
! 655: }
! 656: j1 = j2;
! 657: }
! 658: printf(" if (c != 0) {\n");
! 659: printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
! 660: printf(" return 2;\n");
! 661: printf(" }\n");
! 662: printf(" return RET_ILUNI;\n");
! 663: printf(" }\n");
! 664: printf(" return RET_TOOSMALL;\n");
! 665: printf("}\n");
! 666: }
! 667:
! 668: /*
! 669: * Outputs the unicode to charset table and function, using a packed array.
! 670: * (Suitable if the table is sparse.)
! 671: * The argument 'monotonic' may be set to true if the mapping is monotonically
! 672: * increasing with small gaps only.
! 673: */
! 674: static void output_uni2charset_sparse (const char* name, Encoding* enc, bool monotonic)
! 675: {
! 676: bool pages[0x300];
! 677: Block pageblocks[0x300]; int npageblocks;
! 678: int indx2charset[0x30000];
! 679: int summary_indx[0x3000];
! 680: int summary_used[0x3000];
! 681: int i, row, col, j, p, j1, j2, indx;
! 682: bool is_large;
! 683: /* for monotonic: */
! 684: int log2_stepsize = (!strcmp(name,"uhc_2") ? 6 : 7);
! 685: int stepsize = 1 << log2_stepsize;
! 686: int indxsteps;
! 687:
! 688: /* Fill pages[0x300]. */
! 689: for (p = 0; p < 0x300; p++)
! 690: pages[p] = false;
! 691: for (row = 0; row < enc->rows; row++)
! 692: for (col = 0; col < enc->cols; col++) {
! 693: j = enc->charset2uni[row][col];
! 694: if (j != 0xfffd)
! 695: pages[j>>8] = true;
! 696: }
! 697:
! 698: /* Determine whether two or three bytes are needed for each character. */
! 699: is_large = false;
! 700: for (j = 0; j < 0x30000; j++)
! 701: if (enc->uni2charset[j] >= 0x10000)
! 702: is_large = true;
! 703:
! 704: #if 0
! 705: for (p = 0; p < 0x300; p++)
! 706: if (pages[p]) {
! 707: printf("static const unsigned short %s_page%02x[256] = {\n", name, p);
! 708: for (j1 = 0; j1 < 32; j1++) {
! 709: printf(" ");
! 710: for (j2 = 0; j2 < 8; j2++)
! 711: printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]);
! 712: printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7);
! 713: }
! 714: printf("};\n");
! 715: }
! 716: printf("\n");
! 717: #endif
! 718:
! 719: /* Fill summary_indx[] and summary_used[]. */
! 720: indx = 0;
! 721: for (j1 = 0; j1 < 0x3000; j1++) {
! 722: summary_indx[j1] = indx;
! 723: summary_used[j1] = 0;
! 724: for (j2 = 0; j2 < 16; j2++) {
! 725: j = 16*j1+j2;
! 726: if (enc->uni2charset[j] != 0) {
! 727: indx2charset[indx++] = enc->uni2charset[j];
! 728: summary_used[j1] |= (1 << j2);
! 729: }
! 730: }
! 731: }
! 732:
! 733: /* Fill npageblocks and pageblocks[]. */
! 734: npageblocks = 0;
! 735: for (p = 0; p < 0x300; ) {
! 736: if (pages[p] && (p == 0 || !pages[p-1])) {
! 737: pageblocks[npageblocks].start = 16*p;
! 738: do p++; while (p < 0x300 && pages[p]);
! 739: j1 = 16*p;
! 740: while (summary_used[j1-1] == 0) j1--;
! 741: pageblocks[npageblocks].end = j1;
! 742: npageblocks++;
! 743: } else
! 744: p++;
! 745: }
! 746:
! 747: if (monotonic) {
! 748: indxsteps = (indx + stepsize-1) / stepsize;
! 749: printf("static const unsigned short %s_2charset_main[%d] = {\n", name, indxsteps);
! 750: for (i = 0; i < indxsteps; ) {
! 751: if ((i % 8) == 0) printf(" ");
! 752: printf(" 0x%04x,", indx2charset[i*stepsize]);
! 753: i++;
! 754: if ((i % 8) == 0 || i == indxsteps) printf("\n");
! 755: }
! 756: printf("};\n");
! 757: printf("static const unsigned char %s_2charset[%d] = {\n", name, indx);
! 758: for (i = 0; i < indx; ) {
! 759: if ((i % 8) == 0) printf(" ");
! 760: printf(" 0x%02x,", indx2charset[i] - indx2charset[i/stepsize*stepsize]);
! 761: i++;
! 762: if ((i % 8) == 0 || i == indx) printf("\n");
! 763: }
! 764: printf("};\n");
! 765: } else {
! 766: if (is_large) {
! 767: printf("static const unsigned char %s_2charset[3*%d] = {\n", name, indx);
! 768: for (i = 0; i < indx; ) {
! 769: if ((i % 4) == 0) printf(" ");
! 770: printf(" 0x%1x,0x%02x,0x%02x,", indx2charset[i] >> 16,
! 771: (indx2charset[i] >> 8) & 0xff, indx2charset[i] & 0xff);
! 772: i++;
! 773: if ((i % 4) == 0 || i == indx) printf("\n");
! 774: }
! 775: printf("};\n");
! 776: } else {
! 777: printf("static const unsigned short %s_2charset[%d] = {\n", name, indx);
! 778: for (i = 0; i < indx; ) {
! 779: if ((i % 8) == 0) printf(" ");
! 780: printf(" 0x%04x,", indx2charset[i]);
! 781: i++;
! 782: if ((i % 8) == 0 || i == indx) printf("\n");
! 783: }
! 784: printf("};\n");
! 785: }
! 786: }
! 787: printf("\n");
! 788: for (i = 0; i < npageblocks; i++) {
! 789: printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name,
! 790: pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start);
! 791: for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) {
! 792: if (((16*j1) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1);
! 793: if ((j1 % 4) == 0) printf(" ");
! 794: printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]);
! 795: j1++;
! 796: if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n");
! 797: }
! 798: printf("};\n");
! 799: }
! 800: printf("\n");
! 801:
! 802: printf("static int\n");
! 803: printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
! 804: printf("{\n");
! 805: printf(" if (n >= 2) {\n");
! 806: printf(" const Summary16 *summary = NULL;\n");
! 807: for (i = 0; i < npageblocks; i++) {
! 808: printf(" ");
! 809: if (i > 0)
! 810: printf("else ");
! 811: printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
! 812: 16*pageblocks[i].start, 16*pageblocks[i].end);
! 813: printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name,
! 814: pageblocks[i].start/16);
! 815: if (pageblocks[i].start > 0)
! 816: printf("-0x%03x", pageblocks[i].start);
! 817: printf("];\n");
! 818: }
! 819: printf(" if (summary) {\n");
! 820: printf(" unsigned short used = summary->used;\n");
! 821: printf(" unsigned int i = wc & 0x0f;\n");
! 822: printf(" if (used & ((unsigned short) 1 << i)) {\n");
! 823: if (monotonic || !is_large)
! 824: printf(" unsigned short c;\n");
! 825: printf(" /* Keep in `used' only the bits 0..i-1. */\n");
! 826: printf(" used &= ((unsigned short) 1 << i) - 1;\n");
! 827: printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
! 828: printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
! 829: printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
! 830: printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
! 831: printf(" used = (used & 0x00ff) + (used >> 8);\n");
! 832: if (monotonic) {
! 833: printf(" used += summary->indx;\n");
! 834: printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name, log2_stepsize, name);
! 835: printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
! 836: printf(" return 2;\n");
! 837: } else {
! 838: if (is_large) {
! 839: printf(" used += summary->indx;\n");
! 840: printf(" r[0] = %s_2charset[3*used];\n", name);
! 841: printf(" r[1] = %s_2charset[3*used+1];\n", name);
! 842: printf(" r[2] = %s_2charset[3*used+2];\n", name);
! 843: printf(" return 3;\n");
! 844: } else {
! 845: printf(" c = %s_2charset[summary->indx + used];\n", name);
! 846: printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
! 847: printf(" return 2;\n");
! 848: }
! 849: }
! 850: printf(" }\n");
! 851: printf(" }\n");
! 852: printf(" return RET_ILUNI;\n");
! 853: printf(" }\n");
! 854: printf(" return RET_TOOSMALL;\n");
! 855: printf("}\n");
! 856: }
! 857:
! 858: /* ISO-2022/EUC specifics */
! 859:
! 860: static int row_byte_normal (int row) { return 0x21+row; }
! 861: static int col_byte_normal (int col) { return 0x21+col; }
! 862: static int byte_row_normal (int byte) { return byte-0x21; }
! 863: static int byte_col_normal (int byte) { return byte-0x21; }
! 864:
! 865: static void do_normal (const char* name)
! 866: {
! 867: Encoding enc;
! 868:
! 869: enc.rows = 94;
! 870: enc.cols = 94;
! 871: enc.row_byte = row_byte_normal;
! 872: enc.col_byte = col_byte_normal;
! 873: enc.byte_row = byte_row_normal;
! 874: enc.byte_col = byte_col_normal;
! 875: enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 876: enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 877: enc.byte_row_expr = "%1$s - 0x21";
! 878: enc.byte_col_expr = "%1$s - 0x21";
! 879:
! 880: read_table(&enc);
! 881: output_charset2uni(name,&enc);
! 882: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 883: }
! 884:
! 885: /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
! 886: starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
! 887: order. There are 75 out-of-order values, scattered all throughout the table.
! 888: */
! 889:
! 890: static void do_normal_only_charset2uni (const char* name)
! 891: {
! 892: Encoding enc;
! 893:
! 894: enc.rows = 94;
! 895: enc.cols = 94;
! 896: enc.row_byte = row_byte_normal;
! 897: enc.col_byte = col_byte_normal;
! 898: enc.byte_row = byte_row_normal;
! 899: enc.byte_col = byte_col_normal;
! 900: enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 901: enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 902: enc.byte_row_expr = "%1$s - 0x21";
! 903: enc.byte_col_expr = "%1$s - 0x21";
! 904:
! 905: read_table(&enc);
! 906: output_charset2uni(name,&enc);
! 907: }
! 908:
! 909: /* CNS 11643 specifics - trick to put two tables into one */
! 910:
! 911: static int row_byte_cns11643 (int row) {
! 912: return 0x100 * (row / 94) + (row % 94) + 0x21;
! 913: }
! 914: static int byte_row_cns11643 (int byte) {
! 915: return (byte >> 8) * 94 + (byte & 0xff) - 0x21;
! 916: }
! 917:
! 918: static void do_cns11643_only_uni2charset (const char* name)
! 919: {
! 920: Encoding enc;
! 921:
! 922: enc.rows = 16*94;
! 923: enc.cols = 94;
! 924: enc.row_byte = row_byte_cns11643;
! 925: enc.col_byte = col_byte_normal;
! 926: enc.byte_row = byte_row_cns11643;
! 927: enc.byte_col = byte_col_normal;
! 928: enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 929: enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 930: enc.byte_row_expr = "%1$s - 0x21";
! 931: enc.byte_col_expr = "%1$s - 0x21";
! 932:
! 933: read_table(&enc);
! 934: invert(&enc);
! 935: output_uni2charset_sparse(name,&enc,false);
! 936: }
! 937:
! 938: /* GBK specifics */
! 939:
! 940: static int row_byte_gbk1 (int row) {
! 941: return 0x81+row;
! 942: }
! 943: static int col_byte_gbk1 (int col) {
! 944: return (col >= 0x3f ? 0x41 : 0x40) + col;
! 945: }
! 946: static int byte_row_gbk1 (int byte) {
! 947: if (byte >= 0x81 && byte < 0xff)
! 948: return byte-0x81;
! 949: else
! 950: return -1;
! 951: }
! 952: static int byte_col_gbk1 (int byte) {
! 953: if (byte >= 0x40 && byte < 0x7f)
! 954: return byte-0x40;
! 955: else if (byte >= 0x80 && byte < 0xff)
! 956: return byte-0x41;
! 957: else
! 958: return -1;
! 959: }
! 960:
! 961: static void do_gbk1 (const char* name)
! 962: {
! 963: Encoding enc;
! 964:
! 965: enc.rows = 126;
! 966: enc.cols = 190;
! 967: enc.row_byte = row_byte_gbk1;
! 968: enc.col_byte = col_byte_gbk1;
! 969: enc.byte_row = byte_row_gbk1;
! 970: enc.byte_col = byte_col_gbk1;
! 971: enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
! 972: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
! 973: enc.byte_row_expr = "%1$s - 0x81";
! 974: enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
! 975:
! 976: read_table(&enc);
! 977: output_charset2uni(name,&enc);
! 978: invert(&enc); output_uni2charset_dense(name,&enc);
! 979: }
! 980:
! 981: static void do_gbk1_only_charset2uni (const char* name)
! 982: {
! 983: Encoding enc;
! 984:
! 985: enc.rows = 126;
! 986: enc.cols = 190;
! 987: enc.row_byte = row_byte_gbk1;
! 988: enc.col_byte = col_byte_gbk1;
! 989: enc.byte_row = byte_row_gbk1;
! 990: enc.byte_col = byte_col_gbk1;
! 991: enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
! 992: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
! 993: enc.byte_row_expr = "%1$s - 0x81";
! 994: enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
! 995:
! 996: read_table(&enc);
! 997: output_charset2uni(name,&enc);
! 998: }
! 999:
! 1000: static int row_byte_gbk2 (int row) {
! 1001: return 0x81+row;
! 1002: }
! 1003: static int col_byte_gbk2 (int col) {
! 1004: return (col >= 0x3f ? 0x41 : 0x40) + col;
! 1005: }
! 1006: static int byte_row_gbk2 (int byte) {
! 1007: if (byte >= 0x81 && byte < 0xff)
! 1008: return byte-0x81;
! 1009: else
! 1010: return -1;
! 1011: }
! 1012: static int byte_col_gbk2 (int byte) {
! 1013: if (byte >= 0x40 && byte < 0x7f)
! 1014: return byte-0x40;
! 1015: else if (byte >= 0x80 && byte < 0xa1)
! 1016: return byte-0x41;
! 1017: else
! 1018: return -1;
! 1019: }
! 1020:
! 1021: static void do_gbk2_only_charset2uni (const char* name)
! 1022: {
! 1023: Encoding enc;
! 1024:
! 1025: enc.rows = 126;
! 1026: enc.cols = 96;
! 1027: enc.row_byte = row_byte_gbk2;
! 1028: enc.col_byte = col_byte_gbk2;
! 1029: enc.byte_row = byte_row_gbk2;
! 1030: enc.byte_col = byte_col_gbk2;
! 1031: enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
! 1032: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
! 1033: enc.byte_row_expr = "%1$s - 0x81";
! 1034: enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
! 1035:
! 1036: read_table(&enc);
! 1037: output_charset2uni(name,&enc);
! 1038: }
! 1039:
! 1040: static void do_gbk1_only_uni2charset (const char* name)
! 1041: {
! 1042: Encoding enc;
! 1043:
! 1044: enc.rows = 126;
! 1045: enc.cols = 190;
! 1046: enc.row_byte = row_byte_gbk1;
! 1047: enc.col_byte = col_byte_gbk1;
! 1048: enc.byte_row = byte_row_gbk1;
! 1049: enc.byte_col = byte_col_gbk1;
! 1050: enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
! 1051: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
! 1052: enc.byte_row_expr = "%1$s - 0x81";
! 1053: enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
! 1054:
! 1055: read_table(&enc);
! 1056: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 1057: }
! 1058:
! 1059: /* KSC 5601 specifics */
! 1060:
! 1061: /*
! 1062: * Reads the charset2uni table from standard input.
! 1063: */
! 1064: static void read_table_ksc5601 (Encoding* enc)
! 1065: {
! 1066: int row, col, i, i1, i2, c, j;
! 1067:
! 1068: enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
! 1069: for (row = 0; row < enc->rows; row++)
! 1070: enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
! 1071:
! 1072: for (row = 0; row < enc->rows; row++)
! 1073: for (col = 0; col < enc->cols; col++)
! 1074: enc->charset2uni[row][col] = 0xfffd;
! 1075:
! 1076: c = getc(stdin);
! 1077: ungetc(c,stdin);
! 1078: if (c == '#') {
! 1079: /* Read a unicode.org style .TXT file. */
! 1080: for (;;) {
! 1081: c = getc(stdin);
! 1082: if (c == EOF)
! 1083: break;
! 1084: if (c == '\n' || c == ' ' || c == '\t')
! 1085: continue;
! 1086: if (c == '#') {
! 1087: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
! 1088: continue;
! 1089: }
! 1090: ungetc(c,stdin);
! 1091: if (scanf("0x%x", &j) != 1)
! 1092: exit(1);
! 1093: i1 = j >> 8;
! 1094: i2 = j & 0xff;
! 1095: if (scanf(" 0x%x", &j) != 1)
! 1096: exit(1);
! 1097: /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
! 1098: = KS X 1001.1992, ignore the rest. */
! 1099: if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127))
! 1100: continue; /* KSC5601 specific */
! 1101: i1 &= 0x7f; /* KSC5601 specific */
! 1102: i2 &= 0x7f; /* KSC5601 specific */
! 1103: row = enc->byte_row(i1);
! 1104: col = enc->byte_col(i2);
! 1105: if (row < 0 || col < 0) {
! 1106: fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
! 1107: exit(1);
! 1108: }
! 1109: enc->charset2uni[row][col] = j;
! 1110: }
! 1111: } else {
! 1112: /* Read a table of hexadecimal Unicode values. */
! 1113: for (i1 = 33; i1 < 127; i1++)
! 1114: for (i2 = 33; i2 < 127; i2++) {
! 1115: i = scanf("%x", &j);
! 1116: if (i == EOF)
! 1117: goto read_done;
! 1118: if (i != 1)
! 1119: exit(1);
! 1120: if (j < 0 || j == 0xffff)
! 1121: j = 0xfffd;
! 1122: if (j != 0xfffd) {
! 1123: if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
! 1124: fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
! 1125: exit (1);
! 1126: }
! 1127: enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
! 1128: }
! 1129: }
! 1130: read_done: ;
! 1131: }
! 1132: }
! 1133:
! 1134: static void do_ksc5601 (const char* name)
! 1135: {
! 1136: Encoding enc;
! 1137:
! 1138: enc.rows = 94;
! 1139: enc.cols = 94;
! 1140: enc.row_byte = row_byte_normal;
! 1141: enc.col_byte = col_byte_normal;
! 1142: enc.byte_row = byte_row_normal;
! 1143: enc.byte_col = byte_col_normal;
! 1144: enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 1145: enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
! 1146: enc.byte_row_expr = "%1$s - 0x21";
! 1147: enc.byte_col_expr = "%1$s - 0x21";
! 1148:
! 1149: read_table_ksc5601(&enc);
! 1150: output_charset2uni(name,&enc);
! 1151: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 1152: }
! 1153:
! 1154: /* UHC specifics */
! 1155:
! 1156: /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
! 1157:
! 1158: static int row_byte_uhc_1 (int row) {
! 1159: return 0x81 + row;
! 1160: }
! 1161: static int col_byte_uhc_1 (int col) {
! 1162: return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
! 1163: }
! 1164: static int byte_row_uhc_1 (int byte) {
! 1165: if (byte >= 0x81 && byte < 0xa1)
! 1166: return byte-0x81;
! 1167: else
! 1168: return -1;
! 1169: }
! 1170: static int byte_col_uhc_1 (int byte) {
! 1171: if (byte >= 0x41 && byte < 0x5b)
! 1172: return byte-0x41;
! 1173: else if (byte >= 0x61 && byte < 0x7b)
! 1174: return byte-0x47;
! 1175: else if (byte >= 0x81 && byte < 0xff)
! 1176: return byte-0x4d;
! 1177: else
! 1178: return -1;
! 1179: }
! 1180:
! 1181: static void do_uhc_1 (const char* name)
! 1182: {
! 1183: Encoding enc;
! 1184:
! 1185: enc.rows = 32;
! 1186: enc.cols = 178;
! 1187: enc.row_byte = row_byte_uhc_1;
! 1188: enc.col_byte = col_byte_uhc_1;
! 1189: enc.byte_row = byte_row_uhc_1;
! 1190: enc.byte_col = byte_col_uhc_1;
! 1191: enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa1)";
! 1192: enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
! 1193: enc.byte_row_expr = "%1$s - 0x81";
! 1194: enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
! 1195:
! 1196: read_table(&enc);
! 1197: output_charset2uni_noholes_monotonic(name,&enc);
! 1198: invert(&enc); output_uni2charset_sparse(name,&enc,true);
! 1199: }
! 1200:
! 1201: /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
! 1202:
! 1203: static int row_byte_uhc_2 (int row) {
! 1204: return 0xa1 + row;
! 1205: }
! 1206: static int col_byte_uhc_2 (int col) {
! 1207: return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
! 1208: }
! 1209: static int byte_row_uhc_2 (int byte) {
! 1210: if (byte >= 0xa1 && byte < 0xff)
! 1211: return byte-0xa1;
! 1212: else
! 1213: return -1;
! 1214: }
! 1215: static int byte_col_uhc_2 (int byte) {
! 1216: if (byte >= 0x41 && byte < 0x5b)
! 1217: return byte-0x41;
! 1218: else if (byte >= 0x61 && byte < 0x7b)
! 1219: return byte-0x47;
! 1220: else if (byte >= 0x81 && byte < 0xa1)
! 1221: return byte-0x4d;
! 1222: else
! 1223: return -1;
! 1224: }
! 1225:
! 1226: static void do_uhc_2 (const char* name)
! 1227: {
! 1228: Encoding enc;
! 1229:
! 1230: enc.rows = 94;
! 1231: enc.cols = 84;
! 1232: enc.row_byte = row_byte_uhc_2;
! 1233: enc.col_byte = col_byte_uhc_2;
! 1234: enc.byte_row = byte_row_uhc_2;
! 1235: enc.byte_col = byte_col_uhc_2;
! 1236: enc.check_row_expr = "(%1$s >= 0xa1 && %1$s < 0xff)";
! 1237: enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
! 1238: enc.byte_row_expr = "%1$s - 0xa1";
! 1239: enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
! 1240:
! 1241: read_table(&enc);
! 1242: output_charset2uni_noholes_monotonic(name,&enc);
! 1243: invert(&enc); output_uni2charset_sparse(name,&enc,true);
! 1244: }
! 1245:
! 1246: /* Big5 specifics */
! 1247:
! 1248: static int row_byte_big5 (int row) {
! 1249: return 0xa1+row;
! 1250: }
! 1251: static int col_byte_big5 (int col) {
! 1252: return (col >= 0x3f ? 0x62 : 0x40) + col;
! 1253: }
! 1254: static int byte_row_big5 (int byte) {
! 1255: if (byte >= 0xa1 && byte < 0xff)
! 1256: return byte-0xa1;
! 1257: else
! 1258: return -1;
! 1259: }
! 1260: static int byte_col_big5 (int byte) {
! 1261: if (byte >= 0x40 && byte < 0x7f)
! 1262: return byte-0x40;
! 1263: else if (byte >= 0xa1 && byte < 0xff)
! 1264: return byte-0x62;
! 1265: else
! 1266: return -1;
! 1267: }
! 1268:
! 1269: static void do_big5 (const char* name)
! 1270: {
! 1271: Encoding enc;
! 1272:
! 1273: enc.rows = 94;
! 1274: enc.cols = 157;
! 1275: enc.row_byte = row_byte_big5;
! 1276: enc.col_byte = col_byte_big5;
! 1277: enc.byte_row = byte_row_big5;
! 1278: enc.byte_col = byte_col_big5;
! 1279: enc.check_row_expr = "%1$s >= 0xa1 && %1$s < 0xff";
! 1280: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
! 1281: enc.byte_row_expr = "%1$s - 0xa1";
! 1282: enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
! 1283:
! 1284: read_table(&enc);
! 1285: output_charset2uni(name,&enc);
! 1286: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 1287: }
! 1288:
! 1289: /* HKSCS specifics */
! 1290:
! 1291: static int row_byte_hkscs (int row) {
! 1292: return 0x80+row;
! 1293: }
! 1294: static int byte_row_hkscs (int byte) {
! 1295: if (byte >= 0x80 && byte < 0xff)
! 1296: return byte-0x80;
! 1297: else
! 1298: return -1;
! 1299: }
! 1300:
! 1301: static void do_hkscs (const char* name)
! 1302: {
! 1303: Encoding enc;
! 1304:
! 1305: enc.rows = 128;
! 1306: enc.cols = 157;
! 1307: enc.row_byte = row_byte_hkscs;
! 1308: enc.col_byte = col_byte_big5;
! 1309: enc.byte_row = byte_row_hkscs;
! 1310: enc.byte_col = byte_col_big5;
! 1311: enc.check_row_expr = "%1$s >= 0x80 && %1$s < 0xff";
! 1312: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
! 1313: enc.byte_row_expr = "%1$s - 0x80";
! 1314: enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
! 1315:
! 1316: read_table(&enc);
! 1317: output_charset2uni(name,&enc);
! 1318: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 1319: }
! 1320:
! 1321: /* Johab Hangul specifics */
! 1322:
! 1323: static int row_byte_johab_hangul (int row) {
! 1324: return 0x84+row;
! 1325: }
! 1326: static int col_byte_johab_hangul (int col) {
! 1327: return (col >= 0x3e ? 0x43 : 0x41) + col;
! 1328: }
! 1329: static int byte_row_johab_hangul (int byte) {
! 1330: if (byte >= 0x84 && byte < 0xd4)
! 1331: return byte-0x84;
! 1332: else
! 1333: return -1;
! 1334: }
! 1335: static int byte_col_johab_hangul (int byte) {
! 1336: if (byte >= 0x41 && byte < 0x7f)
! 1337: return byte-0x41;
! 1338: else if (byte >= 0x81 && byte < 0xff)
! 1339: return byte-0x43;
! 1340: else
! 1341: return -1;
! 1342: }
! 1343:
! 1344: static void do_johab_hangul (const char* name)
! 1345: {
! 1346: Encoding enc;
! 1347:
! 1348: enc.rows = 80;
! 1349: enc.cols = 188;
! 1350: enc.row_byte = row_byte_johab_hangul;
! 1351: enc.col_byte = col_byte_johab_hangul;
! 1352: enc.byte_row = byte_row_johab_hangul;
! 1353: enc.byte_col = byte_col_johab_hangul;
! 1354: enc.check_row_expr = "%1$s >= 0x84 && %1$s < 0xd4";
! 1355: enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
! 1356: enc.byte_row_expr = "%1$s - 0x84";
! 1357: enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
! 1358:
! 1359: read_table(&enc);
! 1360: output_charset2uni(name,&enc);
! 1361: invert(&enc); output_uni2charset_dense(name,&enc);
! 1362: }
! 1363:
! 1364: /* SJIS specifics */
! 1365:
! 1366: static int row_byte_sjis (int row) {
! 1367: return (row >= 0x1f ? 0xc1 : 0x81) + row;
! 1368: }
! 1369: static int col_byte_sjis (int col) {
! 1370: return (col >= 0x3f ? 0x41 : 0x40) + col;
! 1371: }
! 1372: static int byte_row_sjis (int byte) {
! 1373: if (byte >= 0x81 && byte < 0xa0)
! 1374: return byte-0x81;
! 1375: else if (byte >= 0xe0)
! 1376: return byte-0xc1;
! 1377: else
! 1378: return -1;
! 1379: }
! 1380: static int byte_col_sjis (int byte) {
! 1381: if (byte >= 0x40 && byte < 0x7f)
! 1382: return byte-0x40;
! 1383: else if (byte >= 0x80 && byte < 0xfd)
! 1384: return byte-0x41;
! 1385: else
! 1386: return -1;
! 1387: }
! 1388:
! 1389: static void do_sjis (const char* name)
! 1390: {
! 1391: Encoding enc;
! 1392:
! 1393: enc.rows = 94;
! 1394: enc.cols = 188;
! 1395: enc.row_byte = row_byte_sjis;
! 1396: enc.col_byte = col_byte_sjis;
! 1397: enc.byte_row = byte_row_sjis;
! 1398: enc.byte_col = byte_col_sjis;
! 1399: enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
! 1400: enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
! 1401: enc.byte_row_expr = "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
! 1402: enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
! 1403:
! 1404: read_table(&enc);
! 1405: output_charset2uni(name,&enc);
! 1406: invert(&enc); output_uni2charset_sparse(name,&enc,false);
! 1407: }
! 1408:
! 1409: /* GB18030 Unicode specifics */
! 1410:
! 1411: static void do_gb18030uni (const char* name)
! 1412: {
! 1413: int c;
! 1414: unsigned int bytes;
! 1415: int i1, i2, i3, i4, i, j, k;
! 1416: int charset2uni[4*10*126*10];
! 1417: int uni2charset[0x10000];
! 1418: struct { int low; int high; int diff; int total; } ranges[256];
! 1419: int ranges_count, ranges_total;
! 1420:
! 1421: for (i = 0; i < 4*10*126*10; i++)
! 1422: charset2uni[i] = 0;
! 1423: for (j = 0; j < 0x10000; j++)
! 1424: uni2charset[j] = 0;
! 1425:
! 1426: /* Read a unicode.org style .TXT file. */
! 1427: for (;;) {
! 1428: c = getc(stdin);
! 1429: if (c == EOF)
! 1430: break;
! 1431: if (c == '\n' || c == ' ' || c == '\t')
! 1432: continue;
! 1433: if (c == '#') {
! 1434: do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
! 1435: continue;
! 1436: }
! 1437: ungetc(c,stdin);
! 1438: if (scanf("0x%x", &bytes) != 1)
! 1439: exit(1);
! 1440: i1 = (bytes >> 24) & 0xff;
! 1441: i2 = (bytes >> 16) & 0xff;
! 1442: i3 = (bytes >> 8) & 0xff;
! 1443: i4 = bytes & 0xff;
! 1444: if (!(i1 >= 0x81 && i1 <= 0x84
! 1445: && i2 >= 0x30 && i2 <= 0x39
! 1446: && i3 >= 0x81 && i3 <= 0xfe
! 1447: && i4 >= 0x30 && i4 <= 0x39)) {
! 1448: fprintf(stderr, "lost entry for %02x %02x %02x %02x\n", i1, i2, i3, i4);
! 1449: exit(1);
! 1450: }
! 1451: i = (((i1-0x81) * 10 + (i2-0x30)) * 126 + (i3-0x81)) * 10 + (i4-0x30);
! 1452: if (scanf(" 0x%x", &j) != 1)
! 1453: exit(1);
! 1454: if (!(j >= 0 && j < 0x10000))
! 1455: exit(1);
! 1456: charset2uni[i] = j;
! 1457: uni2charset[j] = i;
! 1458: }
! 1459:
! 1460: /* Verify that the mapping i -> j is monotonically increasing and
! 1461: of the form
! 1462: low[k] <= i <= high[k] => j = diff[k] + i
! 1463: with a set of disjoint intervals (low[k], high[k]). */
! 1464: ranges_count = 0;
! 1465: for (i = 0; i < 4*10*126*10; i++)
! 1466: if (charset2uni[i] != 0) {
! 1467: int diff;
! 1468: j = charset2uni[i];
! 1469: diff = j - i;
! 1470: if (ranges_count > 0) {
! 1471: if (!(i > ranges[ranges_count-1].high))
! 1472: exit(1);
! 1473: if (!(j > ranges[ranges_count-1].high + ranges[ranges_count-1].diff))
! 1474: exit(1);
! 1475: /* Additional property: The diffs are also increasing. */
! 1476: if (!(diff >= ranges[ranges_count-1].diff))
! 1477: exit(1);
! 1478: }
! 1479: if (ranges_count > 0 && diff == ranges[ranges_count-1].diff)
! 1480: ranges[ranges_count-1].high = i;
! 1481: else {
! 1482: if (ranges_count == 256)
! 1483: exit(1);
! 1484: ranges[ranges_count].low = i;
! 1485: ranges[ranges_count].high = i;
! 1486: ranges[ranges_count].diff = diff;
! 1487: ranges_count++;
! 1488: }
! 1489: }
! 1490:
! 1491: /* Determine size of bitmap. */
! 1492: ranges_total = 0;
! 1493: for (k = 0; k < ranges_count; k++) {
! 1494: ranges[k].total = ranges_total;
! 1495: ranges_total += ranges[k].high - ranges[k].low + 1;
! 1496: }
! 1497:
! 1498: printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name, 2*ranges_count);
! 1499: for (k = 0; k < ranges_count; k++) {
! 1500: printf(" 0x%04x, 0x%04x", ranges[k].low, ranges[k].high);
! 1501: if (k+1 < ranges_count) printf(",");
! 1502: if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
! 1503: }
! 1504: printf("\n");
! 1505: printf("};\n");
! 1506:
! 1507: printf("\n");
! 1508:
! 1509: printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name, 2*ranges_count);
! 1510: for (k = 0; k < ranges_count; k++) {
! 1511: printf(" 0x%04x, 0x%04x", ranges[k].low + ranges[k].diff, ranges[k].high + ranges[k].diff);
! 1512: if (k+1 < ranges_count) printf(",");
! 1513: if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
! 1514: }
! 1515: printf("\n");
! 1516: printf("};\n");
! 1517:
! 1518: printf("\n");
! 1519:
! 1520: printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name, ranges_count);
! 1521: for (k = 0; k < ranges_count; k++) {
! 1522: printf(" { %5d, 0x%04x }", ranges[k].diff, ranges[k].total);
! 1523: if (k+1 < ranges_count) printf(",");
! 1524: if ((k % 4) == 3 && k+1 < ranges_count) printf("\n ");
! 1525: }
! 1526: printf("\n");
! 1527: printf("};\n");
! 1528:
! 1529: printf("\n");
! 1530:
! 1531: printf("static const unsigned char %s_bitmap[%d] = {\n ", name, (ranges_total + 7) / 8);
! 1532: {
! 1533: int accu = 0;
! 1534: for (k = 0; k < ranges_count; k++) {
! 1535: for (i = ranges[k].total; i <= ranges[k].total + (ranges[k].high - ranges[k].low);) {
! 1536: if (charset2uni[i - ranges[k].total + ranges[k].low] != 0)
! 1537: accu |= (1 << (i % 8));
! 1538: i++;
! 1539: if ((i % 8) == 0) {
! 1540: printf(" 0x%02x", accu);
! 1541: if ((i / 8) < (ranges_total + 7) / 8) printf(",");
! 1542: if (((i / 8) % 12) == 0)
! 1543: printf("\n ");
! 1544: accu = 0;
! 1545: }
! 1546: }
! 1547: if (i != (k+1 < ranges_count ? ranges[k+1].total : ranges_total)) abort();
! 1548: }
! 1549: if ((ranges_total % 8) != 0)
! 1550: printf(" 0x%02x", accu);
! 1551: printf("\n");
! 1552: }
! 1553: printf("};\n");
! 1554:
! 1555: printf("\n");
! 1556:
! 1557: printf("static int\n");
! 1558: printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
! 1559: printf("{\n");
! 1560: printf(" unsigned char c1 = s[0];\n");
! 1561: printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
! 1562: printf(" if (n >= 2) {\n");
! 1563: printf(" unsigned char c2 = s[1];\n");
! 1564: printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
! 1565: printf(" if (n >= 3) {\n");
! 1566: printf(" unsigned char c3 = s[2];\n");
! 1567: printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
! 1568: printf(" if (n >= 4) {\n");
! 1569: printf(" unsigned char c4 = s[3];\n");
! 1570: printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
! 1571: printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
! 1572: printf(" if (i >= %d && i <= %d) {\n", ranges[0].low, ranges[ranges_count-1].high);
! 1573: printf(" unsigned int k1 = 0;\n");
! 1574: printf(" unsigned int k2 = %d;\n", ranges_count-1);
! 1575: printf(" while (k1 < k2) {\n");
! 1576: printf(" unsigned int k = (k1 + k2) / 2;\n");
! 1577: printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name);
! 1578: printf(" k2 = k;\n");
! 1579: printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name);
! 1580: printf(" k1 = k + 1;\n");
! 1581: printf(" else\n");
! 1582: printf(" return RET_ILSEQ;\n");
! 1583: printf(" }\n");
! 1584: printf(" {\n");
! 1585: printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
! 1586: printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
! 1587: printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
! 1588: printf(" *pwc = (ucs4_t) (i + diff);\n");
! 1589: printf(" return 4;\n");
! 1590: printf(" }\n");
! 1591: printf(" }\n");
! 1592: printf(" }\n");
! 1593: printf(" }\n");
! 1594: printf(" return RET_ILSEQ;\n");
! 1595: printf(" }\n");
! 1596: printf(" return RET_TOOFEW(0);\n");
! 1597: printf(" }\n");
! 1598: printf(" return RET_ILSEQ;\n");
! 1599: printf(" }\n");
! 1600: printf(" return RET_TOOFEW(0);\n");
! 1601: printf(" }\n");
! 1602: printf(" return RET_ILSEQ;\n");
! 1603: printf(" }\n");
! 1604: printf(" return RET_TOOFEW(0);\n");
! 1605: printf(" }\n");
! 1606: printf(" return RET_ILSEQ;\n");
! 1607: printf("}\n");
! 1608:
! 1609: printf("\n");
! 1610:
! 1611: printf("static int\n");
! 1612: printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
! 1613: printf("{\n");
! 1614: printf(" if (n >= 4) {\n");
! 1615: printf(" unsigned int i = wc;\n");
! 1616: printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges[0].low + ranges[0].diff, ranges[ranges_count-1].high + ranges[ranges_count-1].diff);
! 1617: printf(" unsigned int k1 = 0;\n");
! 1618: printf(" unsigned int k2 = %d;\n", ranges_count-1);
! 1619: printf(" while (k1 < k2) {\n");
! 1620: printf(" unsigned int k = (k1 + k2) / 2;\n");
! 1621: printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name);
! 1622: printf(" k2 = k;\n");
! 1623: printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name);
! 1624: printf(" k1 = k + 1;\n");
! 1625: printf(" else\n");
! 1626: printf(" return RET_ILUNI;\n");
! 1627: printf(" }\n");
! 1628: printf(" {\n");
! 1629: printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
! 1630: printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
! 1631: printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
! 1632: printf(" i -= diff;\n");
! 1633: printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
! 1634: printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
! 1635: printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
! 1636: printf(" r[0] = i + 0x81;\n");
! 1637: printf(" return 4;\n");
! 1638: printf(" }\n");
! 1639: printf(" }\n");
! 1640: printf(" }\n");
! 1641: printf(" return RET_ILUNI;\n");
! 1642: printf(" }\n");
! 1643: printf(" return RET_TOOSMALL;\n");
! 1644: printf("}\n");
! 1645: }
! 1646:
! 1647: /* JISX0213 specifics */
! 1648:
! 1649: static void do_jisx0213 (const char* name)
! 1650: {
! 1651: printf("#ifndef _JISX0213_H\n");
! 1652: printf("#define _JISX0213_H\n");
! 1653: printf("\n");
! 1654: printf("/* JISX0213 plane 1 (= ISO-IR-233) characters are in the range\n");
! 1655: printf(" 0x{21..7E}{21..7E}.\n");
! 1656: printf(" JISX0213 plane 2 (= ISO-IR-229) characters are in the range\n");
! 1657: printf(" 0x{21,23..25,28,2C..2F,6E..7E}{21..7E}.\n");
! 1658: printf(" Together this makes 120 rows of 94 characters.\n");
! 1659: printf("*/\n");
! 1660: printf("\n");
! 1661: {
! 1662: #define row_convert(row) \
! 1663: ((row) >= 0x121 && (row) <= 0x17E ? row-289 : /* 0..93 */ \
! 1664: (row) == 0x221 ? row-451 : /* 94 */ \
! 1665: (row) >= 0x223 && (row) <= 0x225 ? row-452 : /* 95..97 */ \
! 1666: (row) == 0x228 ? row-454 : /* 98 */ \
! 1667: (row) >= 0x22C && (row) <= 0x22F ? row-457 : /* 99..102 */ \
! 1668: (row) >= 0x26E && (row) <= 0x27E ? row-519 : /* 103..119 */ \
! 1669: -1)
! 1670: unsigned int table[120][94];
! 1671: int pagemin[0x1100];
! 1672: int pagemax[0x1100];
! 1673: int pageidx[0x1100];
! 1674: unsigned int pagestart[0x1100];
! 1675: unsigned int pagestart_len = 0;
! 1676: {
! 1677: unsigned int rowc, colc;
! 1678: for (rowc = 0; rowc < 120; rowc++)
! 1679: for (colc = 0; colc < 94; colc++)
! 1680: table[rowc][colc] = 0;
! 1681: }
! 1682: {
! 1683: unsigned int page;
! 1684: for (page = 0; page < 0x1100; page++)
! 1685: pagemin[page] = -1;
! 1686: for (page = 0; page < 0x1100; page++)
! 1687: pagemax[page] = -1;
! 1688: for (page = 0; page < 0x1100; page++)
! 1689: pageidx[page] = -1;
! 1690: }
! 1691: printf("static const unsigned short jisx0213_to_ucs_combining[][2] = {\n");
! 1692: {
! 1693: int private_use = 0x0001;
! 1694: for (;;) {
! 1695: char line[30];
! 1696: unsigned int row, col;
! 1697: unsigned int ucs;
! 1698: memset(line,0,sizeof(line));
! 1699: if (scanf("%[^\n]\n",line) < 1)
! 1700: break;
! 1701: assert(line[0]=='0');
! 1702: assert(line[1]=='x');
! 1703: assert(isxdigit(line[2]));
! 1704: assert(isxdigit(line[3]));
! 1705: assert(isxdigit(line[4]));
! 1706: assert(isxdigit(line[5]));
! 1707: assert(isxdigit(line[6]));
! 1708: assert(line[7]=='\t');
! 1709: line[7] = '\0';
! 1710: col = strtoul(&line[5],NULL,16);
! 1711: line[5] = '\0';
! 1712: row = strtoul(&line[2],NULL,16);
! 1713: if (line[20] != '\0' && line[21] == '\0') {
! 1714: unsigned int u1, u2;
! 1715: assert(line[8]=='0');
! 1716: assert(line[9]=='x');
! 1717: assert(isxdigit(line[10]));
! 1718: assert(isxdigit(line[11]));
! 1719: assert(isxdigit(line[12]));
! 1720: assert(isxdigit(line[13]));
! 1721: assert(line[14]==' ');
! 1722: assert(line[15]=='0');
! 1723: assert(line[16]=='x');
! 1724: assert(isxdigit(line[17]));
! 1725: assert(isxdigit(line[18]));
! 1726: assert(isxdigit(line[19]));
! 1727: assert(isxdigit(line[20]));
! 1728: u2 = strtoul(&line[17],NULL,16);
! 1729: line[14] = '\0';
! 1730: u1 = strtoul(&line[10],NULL,16);
! 1731: printf(" { 0x%04x, 0x%04x },\n", u1, u2);
! 1732: ucs = private_use++;
! 1733: } else {
! 1734: assert(line[8]=='0');
! 1735: assert(line[9]=='x');
! 1736: assert(isxdigit(line[10]));
! 1737: assert(isxdigit(line[11]));
! 1738: assert(isxdigit(line[12]));
! 1739: assert(isxdigit(line[13]));
! 1740: ucs = strtoul(&line[10],NULL,16);
! 1741: }
! 1742: assert((unsigned int) row_convert(row) < 120);
! 1743: assert((unsigned int) (col-0x21) < 94);
! 1744: table[row_convert(row)][col-0x21] = ucs;
! 1745: }
! 1746: }
! 1747: printf("};\n");
! 1748: printf("\n");
! 1749: {
! 1750: unsigned int rowc, colc;
! 1751: for (rowc = 0; rowc < 120; rowc++) {
! 1752: for (colc = 0; colc < 94; colc++) {
! 1753: unsigned int value = table[rowc][colc];
! 1754: unsigned int page = value >> 8;
! 1755: unsigned int rest = value & 0xff;
! 1756: if (pagemin[page] < 0 || pagemin[page] > rest) pagemin[page] = rest;
! 1757: if (pagemax[page] < 0 || pagemax[page] < rest) pagemax[page] = rest;
! 1758: }
! 1759: }
! 1760: }
! 1761: {
! 1762: unsigned int index = 0;
! 1763: unsigned int i;
! 1764: for (i = 0; i < 0x1100; ) {
! 1765: if (pagemin[i] >= 0) {
! 1766: if (pagemin[i+1] >= 0 && pagemin[i] >= 0x80 && pagemax[i+1] < 0x80) {
! 1767: /* Combine two pages into a single one. */
! 1768: assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
! 1769: pagestart[pagestart_len++] = (i<<8)+0x80;
! 1770: pageidx[i] = index;
! 1771: pageidx[i+1] = index;
! 1772: index++;
! 1773: i += 2;
! 1774: } else {
! 1775: /* A single page. */
! 1776: assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
! 1777: pagestart[pagestart_len++] = i<<8;
! 1778: pageidx[i] = index;
! 1779: index++;
! 1780: i += 1;
! 1781: }
! 1782: } else
! 1783: i++;
! 1784: }
! 1785: }
! 1786: printf("static const unsigned short jisx0213_to_ucs_main[120 * 94] = {\n");
! 1787: {
! 1788: unsigned int row;
! 1789: for (row = 0; row < 0x300; row++) {
! 1790: unsigned int rowc = row_convert(row);
! 1791: if (rowc != (unsigned int) (-1)) {
! 1792: printf(" /* 0x%X21..0x%X7E */\n",row,row);
! 1793: {
! 1794: unsigned int count = 0;
! 1795: unsigned int colc;
! 1796: for (colc = 0; colc < 94; colc++) {
! 1797: if ((count % 8) == 0) printf(" ");
! 1798: {
! 1799: unsigned int value = table[rowc][colc];
! 1800: unsigned int page = value >> 8;
! 1801: unsigned int index = pageidx[page];
! 1802: assert(value-pagestart[index] < 0x100);
! 1803: printf(" 0x%04x,",(index<<8)|(value-pagestart[index]));
! 1804: }
! 1805: count++;
! 1806: if ((count % 8) == 0) printf("\n");
! 1807: }
! 1808: }
! 1809: printf("\n");
! 1810: }
! 1811: }
! 1812: }
! 1813: printf("};\n");
! 1814: printf("\n");
! 1815: printf("static const ucs4_t jisx0213_to_ucs_pagestart[] = {\n");
! 1816: {
! 1817: unsigned int count = 0;
! 1818: unsigned int i;
! 1819: for (i = 0; i < pagestart_len; i++) {
! 1820: char buf[10];
! 1821: if ((count % 8) == 0) printf(" ");
! 1822: printf(" ");
! 1823: sprintf(buf,"0x%04x",pagestart[i]);
! 1824: if (strlen(buf) < 7) printf("%*s",7-strlen(buf),"");
! 1825: printf("%s,",buf);
! 1826: count++;
! 1827: if ((count % 8) == 0) printf("\n");
! 1828: }
! 1829: }
! 1830: printf("\n");
! 1831: printf("};\n");
! 1832: #undef row_convert
! 1833: }
! 1834: rewind(stdin);
! 1835: printf("\n");
! 1836: {
! 1837: int table[0x110000];
! 1838: bool pages[0x4400];
! 1839: int maxpage = -1;
! 1840: unsigned int combining_prefixes[100];
! 1841: unsigned int combining_prefixes_len = 0;
! 1842: {
! 1843: unsigned int i;
! 1844: for (i = 0; i < 0x110000; i++)
! 1845: table[i] = -1;
! 1846: for (i = 0; i < 0x4400; i++)
! 1847: pages[i] = false;
! 1848: }
! 1849: for (;;) {
! 1850: char line[30];
! 1851: unsigned int plane, row, col;
! 1852: memset(line,0,sizeof(line));
! 1853: if (scanf("%[^\n]\n",line) < 1)
! 1854: break;
! 1855: assert(line[0]=='0');
! 1856: assert(line[1]=='x');
! 1857: assert(isxdigit(line[2]));
! 1858: assert(isxdigit(line[3]));
! 1859: assert(isxdigit(line[4]));
! 1860: assert(isxdigit(line[5]));
! 1861: assert(isxdigit(line[6]));
! 1862: assert(line[7]=='\t');
! 1863: line[7] = '\0';
! 1864: col = strtoul(&line[5],NULL,16);
! 1865: line[5] = '\0';
! 1866: row = strtoul(&line[3],NULL,16);
! 1867: line[3] = '\0';
! 1868: plane = strtoul(&line[2],NULL,16) - 1;
! 1869: if (line[20] != '\0' && line[21] == '\0') {
! 1870: unsigned int u1, u2;
! 1871: assert(line[8]=='0');
! 1872: assert(line[9]=='x');
! 1873: assert(isxdigit(line[10]));
! 1874: assert(isxdigit(line[11]));
! 1875: assert(isxdigit(line[12]));
! 1876: assert(isxdigit(line[13]));
! 1877: assert(line[14]==' ');
! 1878: assert(line[15]=='0');
! 1879: assert(line[16]=='x');
! 1880: assert(isxdigit(line[17]));
! 1881: assert(isxdigit(line[18]));
! 1882: assert(isxdigit(line[19]));
! 1883: assert(isxdigit(line[20]));
! 1884: u2 = strtoul(&line[17],NULL,16);
! 1885: line[14] = '\0';
! 1886: u1 = strtoul(&line[10],NULL,16);
! 1887: assert(u2 == 0x02E5 || u2 == 0x02E9 || u2 == 0x0300 || u2 == 0x0301
! 1888: || u2 == 0x309A);
! 1889: assert(combining_prefixes_len < sizeof(combining_prefixes)/sizeof(combining_prefixes[0]));
! 1890: combining_prefixes[combining_prefixes_len++] = u1;
! 1891: } else {
! 1892: unsigned int ucs;
! 1893: assert(line[8]=='0');
! 1894: assert(line[9]=='x');
! 1895: assert(isxdigit(line[10]));
! 1896: assert(isxdigit(line[11]));
! 1897: assert(isxdigit(line[12]));
! 1898: assert(isxdigit(line[13]));
! 1899: ucs = strtoul(&line[10],NULL,16);
! 1900: /* Add an entry. */
! 1901: assert(plane <= 1);
! 1902: assert(row <= 0x7f);
! 1903: assert(col <= 0x7f);
! 1904: table[ucs] = (plane << 15) | (row << 8) | col;
! 1905: pages[ucs>>6] = true;
! 1906: if (maxpage < 0 || (ucs>>6) > maxpage) maxpage = ucs>>6;
! 1907: }
! 1908: }
! 1909: {
! 1910: unsigned int i;
! 1911: for (i = 0; i < combining_prefixes_len; i++) {
! 1912: unsigned int u1 = combining_prefixes[i];
! 1913: assert(table[u1] >= 0);
! 1914: table[u1] |= 0x0080;
! 1915: }
! 1916: }
! 1917: printf("static const short jisx0213_from_ucs_level1[%d] = {\n",maxpage+1);
! 1918: {
! 1919: unsigned int index = 0;
! 1920: unsigned int i;
! 1921: for (i = 0; i <= maxpage; i++) {
! 1922: if ((i % 8) == 0) printf(" ");
! 1923: if (pages[i]) {
! 1924: printf(" %3u,",index);
! 1925: index++;
! 1926: } else {
! 1927: printf(" %3d,",-1);
! 1928: }
! 1929: if (((i+1) % 8) == 0) printf("\n");
! 1930: }
! 1931: }
! 1932: printf("\n");
! 1933: printf("};\n");
! 1934: printf("\n");
! 1935: #if 0 /* Dense array */
! 1936: printf("static const unsigned short jisx0213_from_ucs_level2[] = {\n");
! 1937: {
! 1938: unsigned int i;
! 1939: for (i = 0; i <= maxpage; i++) {
! 1940: if (pages[i]) {
! 1941: printf(" /* 0x%04X */\n",i<<6);
! 1942: {
! 1943: unsigned int j;
! 1944: for (j = 0; j < 0x40; ) {
! 1945: unsigned int ucs = (i<<6)+j;
! 1946: int value = table[ucs];
! 1947: if (value < 0) value = 0;
! 1948: if ((j % 8) == 0) printf(" ");
! 1949: printf(" 0x%04x,",value);
! 1950: j++;
! 1951: if ((j % 8) == 0) printf("\n");
! 1952: }
! 1953: }
! 1954: }
! 1955: }
! 1956: }
! 1957: printf("};\n");
! 1958: #else /* Sparse array */
! 1959: {
! 1960: int summary_indx[0x11000];
! 1961: int summary_used[0x11000];
! 1962: unsigned int i, k, indx;
! 1963: printf("static const unsigned short jisx0213_from_ucs_level2_data[] = {\n");
! 1964: /* Fill summary_indx[] and summary_used[]. */
! 1965: indx = 0;
! 1966: for (i = 0, k = 0; i <= maxpage; i++) {
! 1967: if (pages[i]) {
! 1968: unsigned int j1, j2;
! 1969: unsigned int count = 0;
! 1970: printf(" /* 0x%04X */\n",i<<6);
! 1971: for (j1 = 0; j1 < 4; j1++) {
! 1972: summary_indx[4*k+j1] = indx;
! 1973: summary_used[4*k+j1] = 0;
! 1974: for (j2 = 0; j2 < 16; j2++) {
! 1975: unsigned int j = 16*j1+j2;
! 1976: unsigned int ucs = (i<<6)+j;
! 1977: int value = table[ucs];
! 1978: if (value < 0) value = 0;
! 1979: if (value > 0) {
! 1980: summary_used[4*k+j1] |= (1 << j2);
! 1981: if ((count % 8) == 0) printf(" ");
! 1982: printf(" 0x%04x,",value);
! 1983: count++;
! 1984: if ((count % 8) == 0) printf("\n");
! 1985: indx++;
! 1986: }
! 1987: }
! 1988: }
! 1989: if ((count % 8) > 0)
! 1990: printf("\n");
! 1991: k++;
! 1992: }
! 1993: }
! 1994: printf("};\n");
! 1995: printf("\n");
! 1996: printf("static const Summary16 jisx0213_from_ucs_level2_2indx[] = {\n");
! 1997: for (i = 0, k = 0; i <= maxpage; i++) {
! 1998: if (pages[i]) {
! 1999: unsigned int j1;
! 2000: printf(" /* 0x%04X */\n",i<<6);
! 2001: printf(" ");
! 2002: for (j1 = 0; j1 < 4; j1++) {
! 2003: printf(" { %4d, 0x%04x },", summary_indx[4*k+j1], summary_used[4*k+j1]);
! 2004: }
! 2005: printf("\n");
! 2006: k++;
! 2007: }
! 2008: }
! 2009: printf("};\n");
! 2010: }
! 2011: #endif
! 2012: printf("\n");
! 2013: }
! 2014: printf("#ifdef __GNUC__\n");
! 2015: printf("__inline\n");
! 2016: printf("#else\n");
! 2017: printf("#ifdef __cplusplus\n");
! 2018: printf("inline\n");
! 2019: printf("#endif\n");
! 2020: printf("#endif\n");
! 2021: printf("static ucs4_t jisx0213_to_ucs4 (unsigned int row, unsigned int col)\n");
! 2022: printf("{\n");
! 2023: printf(" ucs4_t val;\n");
! 2024: printf("\n");
! 2025: printf(" if (row >= 0x121 && row <= 0x17e)\n");
! 2026: printf(" row -= 289;\n");
! 2027: printf(" else if (row == 0x221)\n");
! 2028: printf(" row -= 451;\n");
! 2029: printf(" else if (row >= 0x223 && row <= 0x225)\n");
! 2030: printf(" row -= 452;\n");
! 2031: printf(" else if (row == 0x228)\n");
! 2032: printf(" row -= 454;\n");
! 2033: printf(" else if (row >= 0x22c && row <= 0x22f)\n");
! 2034: printf(" row -= 457;\n");
! 2035: printf(" else if (row >= 0x26e && row <= 0x27e)\n");
! 2036: printf(" row -= 519;\n");
! 2037: printf(" else\n");
! 2038: printf(" return 0x0000;\n");
! 2039: printf("\n");
! 2040: printf(" if (col >= 0x21 && col <= 0x7e)\n");
! 2041: printf(" col -= 0x21;\n");
! 2042: printf(" else\n");
! 2043: printf(" return 0x0000;\n");
! 2044: printf("\n");
! 2045: printf(" val = jisx0213_to_ucs_main[row * 94 + col];\n");
! 2046: printf(" val = jisx0213_to_ucs_pagestart[val >> 8] + (val & 0xff);\n");
! 2047: printf(" if (val == 0xfffd)\n");
! 2048: printf(" val = 0x0000;\n");
! 2049: printf(" return val;\n");
! 2050: printf("}\n");
! 2051: printf("\n");
! 2052: printf("#ifdef __GNUC__\n");
! 2053: printf("__inline\n");
! 2054: printf("#else\n");
! 2055: printf("#ifdef __cplusplus\n");
! 2056: printf("inline\n");
! 2057: printf("#endif\n");
! 2058: printf("#endif\n");
! 2059: printf("static unsigned short ucs4_to_jisx0213 (ucs4_t ucs)\n");
! 2060: printf("{\n");
! 2061: printf(" if (ucs < (sizeof(jisx0213_from_ucs_level1)/sizeof(jisx0213_from_ucs_level1[0])) << 6) {\n");
! 2062: printf(" int index1 = jisx0213_from_ucs_level1[ucs >> 6];\n");
! 2063: printf(" if (index1 >= 0)");
! 2064: #if 0 /* Dense array */
! 2065: printf("\n");
! 2066: printf(" return jisx0213_from_ucs_level2[(index1 << 6) + (ucs & 0x3f)];\n");
! 2067: #else /* Sparse array */
! 2068: printf(" {\n");
! 2069: printf(" const Summary16 *summary = &jisx0213_from_ucs_level2_2indx[((index1 << 6) + (ucs & 0x3f)) >> 4];\n");
! 2070: printf(" unsigned short used = summary->used;\n");
! 2071: printf(" unsigned int i = ucs & 0x0f;\n");
! 2072: printf(" if (used & ((unsigned short) 1 << i)) {\n");
! 2073: printf(" /* Keep in `used' only the bits 0..i-1. */\n");
! 2074: printf(" used &= ((unsigned short) 1 << i) - 1;\n");
! 2075: printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
! 2076: printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
! 2077: printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
! 2078: printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
! 2079: printf(" used = (used & 0x00ff) + (used >> 8);\n");
! 2080: printf(" return jisx0213_from_ucs_level2_data[summary->indx + used];\n");
! 2081: printf(" };\n");
! 2082: printf(" };\n");
! 2083: #endif
! 2084: printf(" }\n");
! 2085: printf(" return 0x0000;\n");
! 2086: printf("}\n");
! 2087: printf("\n");
! 2088: printf("#endif /* _JISX0213_H */\n");
! 2089: }
! 2090:
! 2091: /* Main program */
! 2092:
! 2093: int main (int argc, char *argv[])
! 2094: {
! 2095: const char* charsetname;
! 2096: const char* name;
! 2097:
! 2098: if (argc != 3)
! 2099: exit(1);
! 2100: charsetname = argv[1];
! 2101: name = argv[2];
! 2102:
! 2103: output_title(charsetname);
! 2104:
! 2105: if (!strcmp(name,"gb2312")
! 2106: || !strcmp(name,"isoir165ext") || !strcmp(name,"gb12345ext")
! 2107: || !strcmp(name,"jisx0208") || !strcmp(name,"jisx0212"))
! 2108: do_normal(name);
! 2109: else if (!strcmp(name,"cns11643_1") || !strcmp(name,"cns11643_2")
! 2110: || !strcmp(name,"cns11643_3") || !strcmp(name,"cns11643_4a")
! 2111: || !strcmp(name,"cns11643_4b") || !strcmp(name,"cns11643_5")
! 2112: || !strcmp(name,"cns11643_6") || !strcmp(name,"cns11643_7")
! 2113: || !strcmp(name,"cns11643_15"))
! 2114: do_normal_only_charset2uni(name);
! 2115: else if (!strcmp(name,"cns11643_inv"))
! 2116: do_cns11643_only_uni2charset(name);
! 2117: else if (!strcmp(name,"gbkext1"))
! 2118: do_gbk1_only_charset2uni(name);
! 2119: else if (!strcmp(name,"gbkext2"))
! 2120: do_gbk2_only_charset2uni(name);
! 2121: else if (!strcmp(name,"gbkext_inv"))
! 2122: do_gbk1_only_uni2charset(name);
! 2123: else if (!strcmp(name,"cp936ext") || !strcmp(name,"gb18030ext"))
! 2124: do_gbk1(name);
! 2125: else if (!strcmp(name,"ksc5601"))
! 2126: do_ksc5601(name);
! 2127: else if (!strcmp(name,"uhc_1"))
! 2128: do_uhc_1(name);
! 2129: else if (!strcmp(name,"uhc_2"))
! 2130: do_uhc_2(name);
! 2131: else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext"))
! 2132: do_big5(name);
! 2133: else if (!strcmp(name,"hkscs1999") || !strcmp(name,"hkscs2001")
! 2134: || !strcmp(name,"hkscs2004"))
! 2135: do_hkscs(name);
! 2136: else if (!strcmp(name,"johab_hangul"))
! 2137: do_johab_hangul(name);
! 2138: else if (!strcmp(name,"cp932ext"))
! 2139: do_sjis(name);
! 2140: else if (!strcmp(name,"gb18030uni"))
! 2141: do_gb18030uni(name);
! 2142: else if (!strcmp(name,"jisx0213"))
! 2143: do_jisx0213(name);
! 2144: else
! 2145: exit(1);
! 2146:
! 2147: return 0;
! 2148: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>