Annotation of embedaddon/php/ext/mbstring/tests/illformed_utf_sequences.phpt, revision 1.1

1.1     ! misho       1: --TEST--
        !             2: Unicode standard conformance test (ill-formed UTF sequences.)
        !             3: --SKIPIF--
        !             4: <?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
        !             5: --FILE--
        !             6: <?php
        !             7: echo "UTF-8 redundancy\n";
        !             8: var_dump(bin2hex(mb_convert_encoding("\x31\x32\x33", "UCS-4BE", "UTF-8")));
        !             9: var_dump(bin2hex(mb_convert_encoding("\x41\x42\x43", "UCS-4BE", "UTF-8")));
        !            10: var_dump(bin2hex(mb_convert_encoding("\xc0\xb1\xc0\xb2\xc0\xb3", "UCS-4BE", "UTF-8")));
        !            11: var_dump(bin2hex(mb_convert_encoding("\xc1\x81\xc1\x82\xc1\x83", "UCS-4BE", "UTF-8")));
        !            12: var_dump(bin2hex(mb_convert_encoding("\xe0\x80\xb1\xe0\x80\xb2\xe0\x80\xb3", "UCS-4BE", "UTF-8")));
        !            13: var_dump(bin2hex(mb_convert_encoding("\xe0\x81\x81\xe0\x81\x82\xe0\x81\x83", "UCS-4BE", "UTF-8")));
        !            14: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x80\xb1\xf0\x80\x80\xb2\xf0\x80\x80\xb3", "UCS-4BE", "UTF-8")));
        !            15: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x81\x81\xf0\x80\x81\x82\xf0\x81\x83", "UCS-4BE", "UTF-8")));
        !            16: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x80\xb1\xf8\x80\x80\x80\xb2\xf8\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
        !            17: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x81\x81\xf8\x80\x80\x81\x82\xf8\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
        !            18: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x80\xb1\xfc\x80\x80\x80\x80\xb2\xfc\x80\x80\x80\x80\xb3", "UCS-4BE", "UTF-8")));
        !            19: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x81\x81\xfc\x80\x80\x80\x81\x82\xfc\x80\x80\x80\x81\x83", "UCS-4BE", "UTF-8")));
        !            20: 
        !            21: var_dump(bin2hex(mb_convert_encoding("\xc2\xa2\xc2\xa3\xc2\xa5", "UCS-4BE", "UTF-8")));
        !            22: var_dump(bin2hex(mb_convert_encoding("\xe0\x82\xa2\xe0\x82\xa3\xe0\x82\xa5", "UCS-4BE", "UTF-8")));
        !            23: var_dump(bin2hex(mb_convert_encoding("\xf0\x80\x82\xa2\xf0\x80\x82\xa3\xf0\x80\x82\xa5", "UCS-4BE", "UTF-8")));
        !            24: var_dump(bin2hex(mb_convert_encoding("\xf8\x80\x80\x82\xa2\xf8\x80\x80\x82\xa3\xf8\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
        !            25: var_dump(bin2hex(mb_convert_encoding("\xfc\x80\x80\x80\x82\xa2\xfc\x80\x80\x80\x82\xa3\xfc\x80\x80\x80\x82\xa5", "UCS-4BE", "UTF-8")));
        !            26: 
        !            27: var_dump(bin2hex(mb_convert_encoding("\xc1\xbf", "UCS-4BE", "UTF-8")));
        !            28: var_dump(bin2hex(mb_convert_encoding("\xc2\x80", "UCS-4BE", "UTF-8")));
        !            29: var_dump(bin2hex(mb_convert_encoding("\xdf\xbf", "UCS-4BE", "UTF-8")));
        !            30: var_dump(bin2hex(mb_convert_encoding("\xe0\x9f\xff", "UCS-4BE", "UTF-8")));
        !            31: var_dump(bin2hex(mb_convert_encoding("\xe0\xa0\x80", "UCS-4BE", "UTF-8")));
        !            32: var_dump(bin2hex(mb_convert_encoding("\xef\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            33: var_dump(bin2hex(mb_convert_encoding("\xf0\x8f\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            34: var_dump(bin2hex(mb_convert_encoding("\xf0\x90\x80\x80", "UCS-4BE", "UTF-8")));
        !            35: var_dump(bin2hex(mb_convert_encoding("\xf7\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            36: var_dump(bin2hex(mb_convert_encoding("\xf8\x87\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            37: var_dump(bin2hex(mb_convert_encoding("\xf8\x88\x80\x80\x80", "UCS-4BE", "UTF-8")));
        !            38: var_dump(bin2hex(mb_convert_encoding("\xfb\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            39: var_dump(bin2hex(mb_convert_encoding("\xfc\x83\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            40: var_dump(bin2hex(mb_convert_encoding("\xfc\x84\x80\x80\x80\x80", "UCS-4BE", "UTF-8")));
        !            41: var_dump(bin2hex(mb_convert_encoding("\xfd\xaf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            42: var_dump(bin2hex(mb_convert_encoding("\xfd\xbf\xbf\xbf\xbf\xbf", "UCS-4BE", "UTF-8")));
        !            43: 
        !            44: echo "UTF-8 and surrogates area\n";
        !            45: $out = '';
        !            46: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            47:     $out .= mb_convert_encoding(pack('C3', 0xe0 | ($i >> 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), "UCS-4BE", "UTF-8");
        !            48: }
        !            49: var_dump(bin2hex($out));
        !            50: 
        !            51: echo "UTF-32 code range\n";
        !            52: var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32BE")));
        !            53: var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32BE")));
        !            54: var_dump(bin2hex(mb_convert_encoding("\x00\x00\x11\x00", "UCS-4BE", "UTF-32LE")));
        !            55: var_dump(bin2hex(mb_convert_encoding("\xff\xff\x10\x00", "UCS-4BE", "UTF-32LE")));
        !            56: var_dump(bin2hex(mb_convert_encoding("\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
        !            57: var_dump(bin2hex(mb_convert_encoding("\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
        !            58: var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x11\x00\x00", "UCS-4BE", "UTF-32")));
        !            59: var_dump(bin2hex(mb_convert_encoding("\x00\x00\xfe\xff\x00\x10\xff\xff", "UCS-4BE", "UTF-32")));
        !            60: var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\x00\x00\x11\x00", "UCS-4BE", "UTF-32")));
        !            61: var_dump(bin2hex(mb_convert_encoding("\xff\xfe\x00\x00\xff\xff\x10\x00", "UCS-4BE", "UTF-32")));
        !            62: 
        !            63: echo "UTF-32 and surrogates area\n";
        !            64: $out = '';
        !            65: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            66:     $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32BE");
        !            67: }
        !            68: var_dump(bin2hex($out));
        !            69: 
        !            70: $out = '';
        !            71: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            72:     $out .= mb_convert_encoding(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32LE");
        !            73: }
        !            74: var_dump(bin2hex($out));
        !            75: 
        !            76: $out = '';
        !            77: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            78:     $out .= mb_convert_encoding(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
        !            79: }
        !            80: var_dump(bin2hex($out));
        !            81: 
        !            82: $out = '';
        !            83: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            84:     $out .= mb_convert_encoding("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), "UCS-4BE", "UTF-32");
        !            85: }
        !            86: var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
        !            87: 
        !            88: 
        !            89: $out = '';
        !            90: for ($i = 0xd7ff; $i <= 0xe000; ++$i) {
        !            91:     $out .= mb_convert_encoding("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), "UCS-4BE", "UTF-32");
        !            92: }
        !            93: var_dump(bin2hex(str_replace("\x00\x00\xfe\xff", "", $out)));
        !            94: ?>
        !            95: --EXPECT--
        !            96: UTF-8 redundancy
        !            97: string(24) "000000310000003200000033"
        !            98: string(24) "000000410000004200000043"
        !            99: string(0) ""
        !           100: string(0) ""
        !           101: string(0) ""
        !           102: string(0) ""
        !           103: string(0) ""
        !           104: string(0) ""
        !           105: string(0) ""
        !           106: string(0) ""
        !           107: string(0) ""
        !           108: string(0) ""
        !           109: string(24) "000000a2000000a3000000a5"
        !           110: string(0) ""
        !           111: string(0) ""
        !           112: string(0) ""
        !           113: string(0) ""
        !           114: string(0) ""
        !           115: string(8) "00000080"
        !           116: string(8) "000007ff"
        !           117: string(0) ""
        !           118: string(8) "00000800"
        !           119: string(8) "0000ffff"
        !           120: string(0) ""
        !           121: string(8) "00010000"
        !           122: string(8) "001fffff"
        !           123: string(0) ""
        !           124: string(8) "00200000"
        !           125: string(8) "03ffffff"
        !           126: string(0) ""
        !           127: string(8) "04000000"
        !           128: string(8) "6fffffff"
        !           129: string(0) ""
        !           130: UTF-8 and surrogates area
        !           131: string(16) "0000d7ff0000e000"
        !           132: UTF-32 code range
        !           133: string(0) ""
        !           134: string(8) "0010ffff"
        !           135: string(0) ""
        !           136: string(8) "0010ffff"
        !           137: string(0) ""
        !           138: string(8) "0010ffff"
        !           139: string(8) "0000feff"
        !           140: string(16) "0000feff0010ffff"
        !           141: string(8) "0000feff"
        !           142: string(16) "0000feff0010ffff"
        !           143: UTF-32 and surrogates area
        !           144: string(16) "0000d7ff0000e000"
        !           145: string(16) "0000d7ff0000e000"
        !           146: string(16) "0000d7ff0000e000"
        !           147: string(16) "0000d7ff0000e000"
        !           148: string(16) "0000d7ff0000e000"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>