Annotation of embedaddon/sqlite3/test/enc.test, revision

1.1       misho       1: # 2002 May 24
                      2: #
                      3: # The author disclaims copyright to this source code.  In place of
                      4: # a legal notice, here is a blessing:
                      5: #
                      6: #    May you do good and not evil.
                      7: #    May you find forgiveness for yourself and forgive others.
                      8: #    May you share freely, never taking more than you give.
                      9: #
                     10: #***********************************************************************
                     11: # This file implements regression tests for SQLite library.  The focus of
                     12: # this file is testing the SQLite routines used for converting between the
                     13: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
                     14: # UTF-16be).
                     15: #
                     16: # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
                     18: set testdir [file dirname $argv0]
                     19: source $testdir/tester.tcl
                     21: # Skip this test if the build does not support multiple encodings.
                     22: #
                     23: ifcapable {!utf16} {
                     24:   finish_test
                     25:   return
                     26: }
                     28: proc do_bincmp_test {testname got expect} {
                     29:   binary scan $expect \c* expectvals
                     30:   binary scan $got \c* gotvals
                     31:   do_test $testname [list set dummy $gotvals] $expectvals
                     32: }
                     34: # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
                     35: # to change the byte-order of the string.
                     36: proc swap_byte_order {utf16} {
                     37:   binary scan $utf16 \c* ints
                     39:   foreach {a b} $ints {
                     40:     lappend ints2 $b
                     41:     lappend ints2 $a
                     42:   }
                     44:   return [binary format \c* $ints2]
                     45: }
                     47: #
                     48: # Test that the SQLite routines for converting between UTF encodings
                     49: # produce the same results as their TCL counterparts.
                     50: #
                     51: # $testname is the prefix to be used for the test names.
                     52: # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
                     53: #
                     54: # The test procedure is:
                     55: # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
                     56: #    SQLite routines produce the same results.
                     57: #
                     58: # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
                     59: #    SQLite routines produce the same results.
                     60: #
                     61: # 3. Use the SQLite routines to convert the native machine order UTF-16
                     62: #    representation back to the original UTF-8. Check that the result
                     63: #    matches the original representation.
                     64: #
                     65: # 4. Add a byte-order mark to each of the UTF-16 representations and
                     66: #    check that the SQLite routines can convert them back to UTF-8.  For
                     67: #    byte-order mark info, refer to section 3.10 of the unicode standard.
                     68: #
                     69: # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
                     70: #    that SQLite can convert them both to native byte order UTF-16 
                     71: #    strings, sans BOM.
                     72: #
                     73: # Coverage:
                     74: #
                     75: # sqlite_utf8to16be (step 2)
                     76: # sqlite_utf8to16le (step 1)
                     77: # sqlite_utf16to8 (steps 3, 4)
                     78: # sqlite_utf16to16le (step 5)
                     79: # sqlite_utf16to16be (step 5)
                     80: #
                     81: proc test_conversion {testname str} {
                     83:   # Step 1.
                     84:   set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
                     85:   set utf16le_tcl [encoding convertto unicode $str]
                     86:   append utf16le_tcl "\x00\x00"
                     87:   if { $::tcl_platform(byteOrder)!="littleEndian" } {
                     88:     set utf16le_tcl [swap_byte_order $utf16le_tcl]
                     89:   }
                     90:   do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
                     91:   set utf16le $utf16le_tcl
                     93:   # Step 2.
                     94:   set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
                     95:   set utf16be_tcl [encoding convertto unicode $str]
                     96:   append utf16be_tcl "\x00\x00"
                     97:   if { $::tcl_platform(byteOrder)=="littleEndian" } {
                     98:     set utf16be_tcl [swap_byte_order $utf16be_tcl]
                     99:   }
                    100:   do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
                    101:   set utf16be $utf16be_tcl
                    103:   # Step 3.
                    104:   if { $::tcl_platform(byteOrder)=="littleEndian" } {
                    105:     set utf16 $utf16le
                    106:   } else {
                    107:     set utf16 $utf16be
                    108:   }
                    109:   set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
                    110:   do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
                    112:   # Step 4 (little endian).
                    113:   append utf16le_bom "\xFF\xFE" $utf16le
                    114:   set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
                    115:   do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
                    117:   # Step 4 (big endian).
                    118:   append utf16be_bom "\xFE\xFF" $utf16be
                    119:   set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
                    120:   do_bincmp_test $ $utf8_sqlite3 [binarize $str]
                    122:   # Step 5 (little endian to little endian).
                    123:   set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
                    124:   do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
                    126:   # Step 5 (big endian to big endian).
                    127:   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
                    128:   do_bincmp_test $ $utf16_sqlite3 $utf16be
                    130:   # Step 5 (big endian to little endian).
                    131:   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
                    132:   do_bincmp_test $ $utf16_sqlite3 $utf16le
                    134:   # Step 5 (little endian to big endian).
                    135:   set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
                    136:   do_bincmp_test $ $utf16_sqlite3 $utf16be
                    137: }
                    139: translate_selftest
                    141: test_conversion enc-1 "hello world"
                    142: test_conversion enc-2 "sqlite"
                    143: test_conversion enc-3 ""
                    144: test_conversion enc-X "\u0100"
                    145: test_conversion enc-4 "\u1234"
                    146: test_conversion enc-5 "\u4321abc"
                    147: test_conversion enc-6 "\u4321\u1234"
                    148: test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
                    149: test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
                    150: test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
                    151: test_conversion enc-10 [string repeat "\uE000" 100]
                    153: proc test_collate {enc zLeft zRight} {
                    154:   return [string compare $zLeft $zRight]
                    155: }
                    156: add_test_collate $::DB 0 0 1
                    157: do_test enc-11.1 {
                    158:   execsql {
                    159:     CREATE TABLE ab(a COLLATE test_collate, b);
                    160:     INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
                    161:     INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
                    162:     CREATE INDEX ab_i ON ab(a, b);
                    163:   }
                    164: } {}
                    165: do_test enc-11.2 {
                    166:   set cp200 "\u00C8"
                    167:   execsql {
                    168:     SELECT count(*) FROM ab WHERE a = $::cp200;
                    169:   }
                    170: } {2}
                    172: finish_test

FreeBSD-CVSweb <>