Annotation of embedaddon/php/ext/mbstring/libmbfl/filters/mk_emoji_tbl.pl, revision 1.1
1.1 ! misho 1: #!/usr/bin/perl
! 2: # script to generate Shift_JIS encoded Emoji to/from Unicode conversion table.
! 3: # Rui Hirokawa <hirokawa@php.net>
! 4: #
! 5: # usage: mktbl.pl EmojiSources.txt
! 6: #
! 7: # Unicoe;DoCoMo;KDDI;SoftBank
! 8:
! 9: @docomo = ();
! 10: @kddi = ();
! 11: @softbank = ();
! 12:
! 13: @to_docomo = ();
! 14: @to_kddi = ();
! 15: @to_sb = ();
! 16:
! 17: $fname = "emoji2uni.h";
! 18: open(OUT,">$fname") or die $!;
! 19:
! 20: sub sjis2code {
! 21: my @c = unpack("C*", pack("H4", $_[0]));
! 22:
! 23: # Shift_JIS -> JIS
! 24: $c[0] = (($c[0]-($c[0]<160?112:176))<<1)-($c[1]<159?1:0);
! 25: $c[1] -= ($c[1]<159?($c[1]>127?32:31):126);
! 26:
! 27: $s = ($c[0] - 0x21)*94 + $c[1]-0x21;
! 28:
! 29: return $s;
! 30: }
! 31:
! 32: sub show_code {
! 33: my @c = @_;
! 34: $s = "\t";
! 35: for ($i=0; $i<=$#c; $i++) {#
! 36: if ($c[$i]) {
! 37: @v = split(' ',$c[$i]);
! 38: $s .= "0x$v[0], \t";
! 39: if ($#v > 0) {
! 40: print "$i $v[0] $v[1]\n";
! 41: }
! 42: } else {
! 43: $s .= "0x0000, \t";
! 44: }
! 45: if ($i % 4 == 3) {
! 46: $s .= "\n\t";
! 47: }
! 48: }
! 49: return $s;
! 50: }
! 51:
! 52: while(<>) {
! 53: if ($_ =~ /^\d+/) {
! 54: @v = split(/;/,$_);
! 55: if ($v[1] =~ /[\dA-F]+/) {
! 56: $code = &sjis2code($v[1]);
! 57: $docomo{$code} = $v[0];
! 58: $to_docomo{$v[0]} = $code;
! 59: }
! 60: if ($v[2] =~ /[\dA-F]+/) {
! 61: $code = &sjis2code($v[2]);
! 62: $kddi{$code} = $v[0];
! 63: $to_kddi{$v[0]} = $code;
! 64: }
! 65: if ($v[3] =~ /[\dA-F]+/) {
! 66: $code = &sjis2code($v[3]);
! 67: $softbank{$code} = $v[0];
! 68: $to_sb{$v[0]} = $code;
! 69: }
! 70: }
! 71: }
! 72:
! 73: print "DoCoMo\n";
! 74:
! 75: $docomo_min = 10434;
! 76: $docomo_max = 10434+281;
! 77: @docomo_v = ();
! 78:
! 79: foreach $key (sort {hex($a) <=> hex($b)} keys(%docomo)) {
! 80: $s = $key;
! 81: $pos = $s % 94;
! 82: $ku = ($s - $pos)/94;
! 83: $v = $key - $docomo_min;
! 84: #print "$ku:$pos - ". $v ."=> $docomo{$key}\n";
! 85: $docomo_v[$key-$docomo_min] = $docomo{$key};
! 86: }
! 87:
! 88: $to_docomo_min = 10434;
! 89:
! 90: $to_docomo_min1 = 0x0023;
! 91: $to_docomo_max1 = 0x00AE;
! 92: $to_docomo_min2 = 0x203C;
! 93: $to_docomo_max2 = 0x3299;
! 94: $to_docomo_min3 = 0x1F17F;
! 95: $to_docomo_max3 = 0x1F6BB;
! 96:
! 97: @r_docomo1_key = ();
! 98: @r_docomo1_val = ();
! 99:
! 100: @r_docomo2_key = ();
! 101: @r_docomo2_val = ();
! 102:
! 103: @r_docomo3_key = ();
! 104: @r_docomo3_val = ();
! 105:
! 106:
! 107: foreach $key (sort {hex($a) <=> hex($b)} keys(%to_docomo)) {
! 108: $s = $to_docomo{$key};
! 109:
! 110: $pos = $s % 94;
! 111: $ku = ($s - $pos)/94;
! 112: $v = $to_docomo{$key} - $to_docomo_min;
! 113: $h = sprintf("%x",$s);
! 114: #print "$ku:$pos = $h ($v) <= $key\n";
! 115: if (hex($key) <= $to_docomo_max1) {
! 116: push(@r_docomo1_key, $key);
! 117: push(@r_docomo1_val, sprintf("%x", $to_docomo{$key}));
! 118: } elsif (hex($key) <= $to_docomo_max2) {
! 119: push(@r_docomo2_key, $key);
! 120: push(@r_docomo2_val, $h);
! 121: } elsif (hex($key) >= $to_docomo_max3) {
! 122: push(@r_docomo3_key, $key);
! 123: push(@r_docomo3_val, $h);
! 124: }
! 125: }
! 126:
! 127: push(@r_docomo1_key, 0x00);
! 128: push(@r_docomo1_val, 0x00);
! 129: push(@r_docomo2_key, 0x00);
! 130: push(@r_docomo2_val, 0x00);
! 131: push(@r_docomo3_key, 0x00);
! 132: push(@r_docomo3_val, 0x00);
! 133:
! 134: print OUT "int mb_tbl_code2uni_docomo_min = $docomo_min;\n";
! 135: print OUT "int mb_tbl_code2uni_docomo_max = $docomo_max;\n\n";
! 136:
! 137: print OUT "int mb_tbl_code2uni_docomo[] = {\n";
! 138: print OUT &show_code(@docomo_v);
! 139: print OUT "};\n\n";
! 140:
! 141: print OUT "int mb_tbl_uni_docomo2code_min1 = $to_docomo_min1;\n";
! 142: print OUT "int mb_tbl_uni_docomo2code_max1 = $to_docomo_max1;\n";
! 143: print OUT "int mb_tbl_uni_docomo2code_min2 = $to_docomo_min2;\n";
! 144: print OUT "int mb_tbl_uni_docomo2code_max2 = $to_docomo_max2;\n";
! 145: print OUT "int mb_tbl_uni_docomo2code_min3 = $to_docomo_min3;\n";
! 146: print OUT "int mb_tbl_uni_docomo2code_max3 = $to_docomo_max3;\n\n";
! 147:
! 148: #print "DOCOMO reverse 1\n";
! 149:
! 150: print OUT "int mb_tbl_uni_docomo2code_key1[] = {\n";
! 151: print OUT &show_code(@r_docomo1_key),"\n";
! 152: print OUT "};\n\n";
! 153: print OUT "int mb_tbl_uni_docomo2code_val1[] = {\n";
! 154: print OUT &show_code(@r_docomo1_val),"\n";
! 155: print OUT "};\n\n";
! 156:
! 157: #print "DOCOMO reverse 2\n";
! 158:
! 159: print OUT "int mb_tbl_uni_docomo2code_key2[] = {\n";
! 160: print OUT &show_code(@r_docomo2_key),"\n";
! 161: print OUT "};\n\n";
! 162: print OUT "int mb_tbl_uni_docomo2code_val2[] = {\n";
! 163: print OUT &show_code(@r_docomo2_val),"\n";
! 164: print OUT "};\n\n";
! 165:
! 166: print "DOCOMO reverse 3\n";
! 167:
! 168: print OUT "int mb_tbl_uni_docomo2code_key3[] = {\n";
! 169: print OUT &show_code(@r_docomo3_key),"\n";
! 170: print OUT "};\n\n";
! 171: print OUT "int mb_tbl_uni_docomo2code_val3[] = {\n";
! 172: print OUT &show_code(@r_docomo3_val),"\n";
! 173: print OUT "};\n\n";
! 174:
! 175: #print "DOCOMO reverse end \n";
! 176:
! 177: $kddi_min1 = 9400;
! 178: $kddi_max1 = 9400+264;
! 179: $kddi_min2 = 9400+564;
! 180: $kddi_max2 = 9400+939;
! 181:
! 182: @kddi_v1 = ();
! 183: @kddi_v2 = ();
! 184:
! 185: #print "KDDI\n";
! 186:
! 187: foreach $key (sort {hex($a) <=> hex($b)} keys(%kddi)) {
! 188: $s = $key;
! 189: $pos = $s % 94;
! 190: $ku = ($s - $pos)/94;
! 191: $v = $key - $kddi_min1;
! 192: $h = sprintf("%x",$key);
! 193: #print "$ku:$pos :: $v ($h) => $kddi{$key}\n";
! 194:
! 195: if ($key <= $kddi_max1) {
! 196: $kddi_v1[$key-$kddi_min1] = $kddi{$key};
! 197: } elsif ($key <= $kddi_max2) {
! 198: $kddi_v2[$key-$kddi_min2] = $kddi{$key};
! 199: }
! 200: }
! 201:
! 202: $to_kddi_min = 9660;
! 203: $to_kddi_min1 = 0x0030;
! 204: $to_kddi_max1 = 0x00AE;
! 205: $to_kddi_min2 = 0x2002;
! 206: $to_kddi_max2 = 0x3299;
! 207: $to_kddi_min3 = 0x1F004;
! 208: $to_kddi_max3 = 0x1F6C0;
! 209:
! 210: @r_kddi1_key = (); @r_kddi1_val = ();
! 211: @r_kddi2_key = (); @r_kddi2_val = ();
! 212: @r_kddi3_key = (); @r_kddi3_val = ();
! 213:
! 214: foreach $key (sort {hex($a) <=> hex($b)} keys(%to_kddi)) {
! 215: $s = $to_kddi{$key};
! 216:
! 217: $pos = $s % 94;
! 218: $ku = ($s - $pos)/94;
! 219: $v = $to_kddi{$key} - $to_kddi_min;
! 220: $h = sprintf("%x",$s);
! 221: #print "$ku:$pos = $h ($v) <= $key\n";
! 222: if (hex($key) <= $to_kddi_max1) {
! 223: push(@r_kddi1_key, $key);
! 224: push(@r_kddi1_val, $h);
! 225: } elsif (hex($key) <= $to_kddi_max2) {
! 226: push(@r_kddi2_key, $key);
! 227: push(@r_kddi2_val, $h);
! 228: } else {
! 229: push(@r_kddi3_key, $key);
! 230: push(@r_kddi3_val, $h);
! 231: }
! 232: }
! 233:
! 234: push(@r_kddi1_key, 0x00);
! 235: push(@r_kddi1_val, 0x00);
! 236: push(@r_kddi2_key, 0x00);
! 237: push(@r_kddi2_val, 0x00);
! 238: push(@r_kddi3_key, 0x00);
! 239: push(@r_kddi3_val, 0x00);
! 240:
! 241: print OUT "int mb_tbl_code2uni_kddi1_min = $kddi_min1;\n";
! 242: print OUT "int mb_tbl_code2uni_kddi1_max = $kddi_max1;\n";
! 243: print OUT "int mb_tbl_code2uni_kddi2_min = $kddi_min2;\n";
! 244: print OUT "int mb_tbl_code2uni_kddi2_max = $kddi_max2;\n\n";
! 245:
! 246: #print "KDDI 1\n";
! 247:
! 248: print OUT "int mb_tbl_code2uni_kddi1[] = {\n";
! 249: print OUT &show_code(@kddi_v1);
! 250: print OUT "};\n\n";
! 251:
! 252: #print "KDDI 2\n";
! 253:
! 254: print OUT "int mb_tbl_code2uni_kddi2[] = {\n";
! 255: print OUT &show_code(@kddi_v2);
! 256: print OUT "};\n\n";
! 257:
! 258: print OUT "int mb_tbl_uni_kddi2code_min1 = $to_kddi_min1;\n";
! 259: print OUT "int mb_tbl_uni_kddi2code_max1 = $to_kddi_max1;\n";
! 260: print OUT "int mb_tbl_uni_kddi2code_min2 = $to_kddi_min2;\n";
! 261: print OUT "int mb_tbl_uni_kddi2code_max2 = $to_kddi_max2;\n";
! 262: print OUT "int mb_tbl_uni_kddi2code_min3 = $to_kddi_min3;\n";
! 263: print OUT "int mb_tbl_uni_kddi2code_max3 = $to_kddi_max3;\n\n";
! 264:
! 265: #print "KDDI reverse 1\n";
! 266:
! 267: print OUT "int mb_tbl_uni_kddi2code_key1[] = {\n";
! 268: print OUT &show_code(@r_kddi1_key),"\n";
! 269: print OUT "};\n\n";
! 270: print OUT "int mb_tbl_uni_kddi2code_val1[] = {\n";
! 271: print OUT &show_code(@r_kddi1_val),"\n";
! 272: print OUT "};\n\n";
! 273:
! 274: #print "KDDI reverse 1\n";
! 275:
! 276: print OUT "int mb_tbl_uni_kddi2code_key2[] = {\n";
! 277: print OUT &show_code(@r_kddi2_key),"\n";
! 278: print OUT "};\n\n";
! 279: print OUT "int mb_tbl_uni_kddi2code_val2[] = {\n";
! 280: print OUT &show_code(@r_kddi2_val),"\n";
! 281: print OUT "};\n\n";
! 282:
! 283: #print "KDDI reverse 3\n";
! 284:
! 285: print OUT "int mb_tbl_uni_kddi2code_key3[] = {\n";
! 286: print OUT &show_code(@r_kddi3_key),"\n";
! 287: print OUT "};\n\n";
! 288: print OUT "int mb_tbl_uni_kddi2code_val3[] = {\n";
! 289: print OUT &show_code(@r_kddi3_val),"\n";
! 290: print OUT "};\n\n";
! 291:
! 292:
! 293: $sb_min1 = 10153;
! 294: $sb_max1 = 10153+177;
! 295: $sb_min2 = 10153+376;
! 296: $sb_max2 = 10153+547;
! 297: $sb_min3 = 10153+752;
! 298: $sb_max3 = 10153+901;
! 299:
! 300: @sb_v1 = ();
! 301: @sb_v2 = ();
! 302: @sb_v3 = ();
! 303:
! 304: if (1) {
! 305: print "SoftBank\n";
! 306:
! 307: foreach $key (sort {hex($a) <=> hex($b)} keys(%softbank)) {
! 308: $s = $key;
! 309: $pos = $s % 94;
! 310: $ku = ($s - $pos)/94;
! 311: $v = $key - $sb_min1;
! 312: $h = sprintf("%x",$key);
! 313: #print "$ku:$pos :: $v ($h) => $softbank{$key}\n";
! 314: if ($key <= $sb_max1) {
! 315: $sb_v1[$key-$sb_min1] = $softbank{$key};
! 316: } elsif ($key <= $sb_max2) {
! 317: $sb_v2[$key-$sb_min2] = $softbank{$key};
! 318: } elsif ($key <= $sb_max3) {
! 319: $sb_v3[$key-$sb_min3] = $softbank{$key};
! 320: }
! 321: }
! 322:
! 323: }
! 324:
! 325: $to_sb_min = 10263;
! 326: $to_sb_min1 = 0x0023;
! 327: $to_sb_max1 = 0x00AE;
! 328: $to_sb_min2 = 0x2122;
! 329: $to_sb_max2 = 0x3299;
! 330: $to_sb_min3 = 0x1F004;
! 331: $to_sb_max3 = 0x1F6C0;
! 332:
! 333: @r_sb1_key = (); @r_sb1_val = ();
! 334: @r_sb2_key = (); @r_sb2_val = ();
! 335: @r_sb3_key = (); @r_sb3_val = ();
! 336:
! 337: foreach $key (sort {hex($a) <=> hex($b)} keys(%to_sb)) {
! 338: $s = $to_sb{$key};
! 339:
! 340: $pos = $s % 94;
! 341: $ku = ($s - $pos)/94;
! 342: $v = $to_sb{$key} - $to_sb_min;
! 343: $h = sprintf("%x",$s);
! 344: #print "$ku:$pos = $h ($v) <= $key\n";
! 345: if (hex($key) <= $to_sb_max1) {
! 346: push(@r_sb1_key, $key);
! 347: push(@r_sb1_val, $h);
! 348: } elsif (hex($key) >= $to_sb_min2 && hex($key) <= $to_sb_max2) {
! 349: push(@r_sb2_key, $key);
! 350: push(@r_sb2_val, $h);
! 351: } else {
! 352: push(@r_sb3_key, $key);
! 353: push(@r_sb3_val, $h);
! 354: }
! 355: }
! 356:
! 357: push(@r_sb1_key, 0x00);
! 358: push(@r_sb1_val, 0x00);
! 359: push(@r_sb2_key, 0x00);
! 360: push(@r_sb2_val, 0x00);
! 361: push(@r_sb3_key, 0x00);
! 362: push(@r_sb3_val, 0x00);
! 363:
! 364:
! 365: print OUT "int mb_tbl_code2uni_sb1_min = $sb_min1;\n";
! 366: print OUT "int mb_tbl_code2uni_sb1_max = $sb_max1;\n";
! 367: print OUT "int mb_tbl_code2uni_sb2_min = $sb_min2;\n";
! 368: print OUT "int mb_tbl_code2uni_sb2_max = $sb_max2;\n";
! 369: print OUT "int mb_tbl_code2uni_sb3_min = $sb_min3;\n";
! 370: print OUT "int mb_tbl_code2uni_sb3_max = $sb_max3;\n\n";
! 371:
! 372: #print "SoftBank 1\n";
! 373:
! 374: print OUT "int mb_tbl_code2uni_sb1[] = {\n";
! 375: print OUT &show_code(@sb_v1);
! 376: print OUT "};\n\n";
! 377:
! 378: #print "SoftBank 2\n";
! 379:
! 380: print OUT "int mb_tbl_code2uni_sb2[] = {\n";
! 381: print OUT &show_code(@sb_v2);
! 382: print OUT "};\n\n";
! 383:
! 384: #print "SoftBank 3\n";
! 385:
! 386: print OUT "int mb_tbl_code2uni_sb3[] = {\n";
! 387: print OUT &show_code(@sb_v3);
! 388: print OUT "};\n\n";
! 389:
! 390: print OUT "int mb_tbl_uni_sb2code_min1 = $to_sb_min1;\n";
! 391: print OUT "int mb_tbl_uni_sb2code_max1 = $to_sb_max1;\n";
! 392: print OUT "int mb_tbl_uni_sb2code_min2 = $to_sb_min2;\n";
! 393: print OUT "int mb_tbl_uni_sb2code_max2 = $to_sb_max2;\n";
! 394: print OUT "int mb_tbl_uni_sb2code_min3 = $to_sb_min3;\n";
! 395: print OUT "int mb_tbl_uni_sb2code_max3 = $to_sb_max3;\n\n";
! 396:
! 397: #print "SB reverse 1\n";
! 398:
! 399: print OUT "int mb_tbl_uni_sb2code_key1[] = {\n";
! 400: print OUT &show_code(@r_sb1_key),"\n";
! 401: print OUT "};\n\n";
! 402: print OUT "int mb_tbl_uni_sb2code_val1[] = {\n";
! 403: print OUT &show_code(@r_sb1_val),"\n";
! 404: print OUT "};\n\n";
! 405:
! 406: #print "SB reverse 2\n";
! 407:
! 408: print OUT "int mb_tbl_uni_sb2code_key2[] = {\n";
! 409: print OUT &show_code(@r_sb2_key),"\n";
! 410: print OUT "};\n\n";
! 411: print OUT "int mb_tbl_uni_sb2code_val2[] = {\n";
! 412: print OUT &show_code(@r_sb2_val),"\n";
! 413: print OUT "};\n\n";
! 414:
! 415: #print "SB reverse 3\n";
! 416:
! 417: print OUT "int mb_tbl_uni_sb2code_key3[] = {\n";
! 418: print OUT &show_code(@r_sb3_key),"\n";
! 419: print OUT "};\n\n";
! 420: print OUT "int mb_tbl_uni_sb2code_val3[] = {\n";
! 421: print OUT &show_code(@r_sb3_val),"\n";
! 422: print OUT "};\n\n";
! 423:
! 424:
! 425: close(OUT);
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>