Annotation of embedaddon/php/ext/mbstring/tests/illformed_utf_sequences.phpt, revision 1.1.1.1

1.1       misho       1: --TEST--
                      2: Unicode standard conformance test (ill-formed UTF sequences.)
                      3: --SKIPIF--
                      4: <?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
                      5: --FILE--
                      6: <?php
                      7: echo "UTF-8 redundancy\n";
                      8: var_dump(bin2hex(mb_convert_encoding("\x31\x32\x33", "UCS-4BE", "UTF-8")));
                      9: var_dump(bin2hex(mb_convert_encoding("\x41\x42\x43", "UCS-4BE", "UTF-8")));
                     10: var_dump(bin2hex(mb_convert_encoding("\xc0\xb1\xc0\xb2\xc0\xb3", "UCS-4BE", "UTF-8")));
                     11: var_dump(bin2hex(mb_convert_encoding("\xc1\x81\xc1\x82\xc1\x83", "UCS-4BE", "UTF-8")));
                     12: var_dump(bin2hex(mb_convert_encoding("\xe0\x80\xb1\xe0\x80\xb2\xe0\x80\xb3", "UCS-4BE", "UTF-8")));
                     13: var_dump(bin2hex(mb_convert_encoding("\xe0\x81\x81\xe0\x81\x82\xe0\x81\x83", "UCS-4BE", "UTF-8")));
                     14: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x80\xb1\xf0\x80\x80\xb2\xf0\x80\x80\xb3", "UCS-4BE", "UTF-8")));
                     15: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x81\x81\xf0\x80\x81\x82\xf0\x81\x83", "UCS-4BE", "UTF-8")));
                     16: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x80\xb1\xf8\x80\x80\x80\xb2\xf8\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
                     17: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x81\x81\xf8\x80\x80\x81\x82\xf8\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
                     18: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x80\xb1\xfc\x80\x80\x80\x80\xb2\xfc\x80\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
                     19: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x81\x81\xfc\x80\x80\x80\x81\x82\xfc\x80\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
                     20: 
                     21: var_dump(bin2hex(mb_convert_encoding("\xc2\xa2\xc2\xa3\xc2\xa5", "UCS-4BE", "UTF-8")));
                     22: var_dump(bin2hex(mb_convert_encoding("\xe0\x82\xa2\xe0\x82\xa3\xe0\x82\xa5", "UCS-4BE", "UTF-8")));
                     23: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x82\xa2\xf0\x80\x82\xa3\xf0\x80\x82\xa5", "UCS-4BE", "UTF-8")));
                     24: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x82\xa2\xf8\x80\x80\x82\xa3\xf8\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
                     25: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x82\xa2\xfc\x80\x80\x80\x82\xa3\xfc\x80\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
                     26: 
                     27: var_dump(bin2hex(mb_convert_encoding("\xc1\xbf", "UCS-4BE", "UTF-8")));
                     28: var_dump(bin2hex(mb_convert_encoding("\xc2\x80", "UCS-4BE", "UTF-8")));
                     29: var_dump(bin2hex(mb_convert_encoding("\xdf\xbf", "UCS-4BE", "UTF-8")));
                     30: var_dump(bin2hex(mb_convert_encoding("\xe0\x9f\xff", "UCS-4BE", "UTF-8")));
                     31: var_dump(bin2hex(mb_convert_encoding("\xe0\xa0\x80", "UCS-4BE", "UTF-8")));
                     32: var_dump(bin2hex(mb_convert_encoding("\xef\xbf\xbf", "UCS-4BE", "UTF-8")));
                     33: var_dump(bin2hex(mb_convert_encoding("\xf0\x8f\xbf\xbf", "UCS-4BE", "UTF-8")));
                     34: var_dump(bin2hex(mb_convert_encoding("\xf0\x90\x80\x80", "UCS-4BE", "UTF-8")));
                     35: var_dump(bin2hex(mb_convert_encoding("\xf7\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     36: var_dump(bin2hex(mb_convert_encoding("\xf8\x87\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     37: var_dump(bin2hex(mb_convert_encoding("\xf8\x88\x80\x80\x80", "UCS-4BE", "UTF-8")));
                     38: var_dump(bin2hex(mb_convert_encoding("\xfb\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     39: var_dump(bin2hex(mb_convert_encoding("\xfc\x83\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     40: var_dump(bin2hex(mb_convert_encoding("\xfc\x84\x80\x80\x80\x80", "UCS-4BE", "UTF-8")));
                     41: var_dump(bin2hex(mb_convert_encoding("\xfd\xaf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     42: var_dump(bin2hex(mb_convert_encoding("\xfd\xbf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
                     43: 
                     44: echo "UTF-8 and surrogates area\n";
                     45: $out = '';
                     46: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     47:     $out .= mb_convert_encoding(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), "UCS-4BE", "UTF-8");
                     48: }
                     49: var_dump(bin2hex($out));
                     50: 
                     51: echo "UTF-32 code range\n";
                     52: var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32BE")));
                     53: var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32BE")));
                     54: var_dump(bin2hex(mb_convert_encoding("\x00\x00\x11\x00", "UCS-4BE", "UTF-32LE")));
                     55: var_dump(bin2hex(mb_convert_encoding("\xff\xff\x10\x00", "UCS-4BE", "UTF-32LE")));
                     56: var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
                     57: var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
                     58: var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
                     59: var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
                     60: var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\x00\x00\x11\x00", "UCS-4BE", "UTF-32")));
                     61: var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\xff\xff\x10\x00", "UCS-4BE", "UTF-32")));
                     62: 
                     63: echo "UTF-32 and surrogates area\n";
                     64: $out = '';
                     65: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     66:     $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32BE");
                     67: }
                     68: var_dump(bin2hex($out));
                     69: 
                     70: $out = '';
                     71: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     72:     $out .= mb_convert_encoding(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32LE");
                     73: }
                     74: var_dump(bin2hex($out));
                     75: 
                     76: $out = '';
                     77: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     78:     $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
                     79: }
                     80: var_dump(bin2hex($out));
                     81: 
                     82: $out = '';
                     83: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     84:     $out .= mb_convert_encoding("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
                     85: }
                     86: var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
                     87: 
                     88: 
                     89: $out = '';
                     90: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
                     91:     $out .= mb_convert_encoding("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32");
                     92: }
                     93: var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
                     94: ?>
                     95: --EXPECT--
                     96: UTF-8 redundancy
                     97: string(24) "000000310000003200000033"
                     98: string(24) "000000410000004200000043"
                     99: string(0) ""
                    100: string(0) ""
                    101: string(0) ""
                    102: string(0) ""
                    103: string(0) ""
                    104: string(0) ""
                    105: string(0) ""
                    106: string(0) ""
                    107: string(0) ""
                    108: string(0) ""
                    109: string(24) "000000a2000000a3000000a5"
                    110: string(0) ""
                    111: string(0) ""
                    112: string(0) ""
                    113: string(0) ""
                    114: string(0) ""
                    115: string(8) "00000080"
                    116: string(8) "000007ff"
                    117: string(0) ""
                    118: string(8) "00000800"
                    119: string(8) "0000ffff"
                    120: string(0) ""
                    121: string(8) "00010000"
                    122: string(8) "001fffff"
                    123: string(0) ""
                    124: string(8) "00200000"
                    125: string(8) "03ffffff"
                    126: string(0) ""
                    127: string(8) "04000000"
                    128: string(8) "6fffffff"
                    129: string(0) ""
                    130: UTF-8 and surrogates area
                    131: string(16) "0000d7ff0000e000"
                    132: UTF-32 code range
                    133: string(0) ""
                    134: string(8) "0010ffff"
                    135: string(0) ""
                    136: string(8) "0010ffff"
                    137: string(0) ""
                    138: string(8) "0010ffff"
                    139: string(8) "0000feff"
                    140: string(16) "0000feff0010ffff"
                    141: string(8) "0000feff"
                    142: string(16) "0000feff0010ffff"
                    143: UTF-32 and surrogates area
                    144: string(16) "0000d7ff0000e000"
                    145: string(16) "0000d7ff0000e000"
                    146: string(16) "0000d7ff0000e000"
                    147: string(16) "0000d7ff0000e000"
                    148: string(16) "0000d7ff0000e000"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>