1: # 2002 May 24
2: #
3: # The author disclaims copyright to this source code. In place of
4: # a legal notice, here is a blessing:
5: #
6: # May you do good and not evil.
7: # May you find forgiveness for yourself and forgive others.
8: # May you share freely, never taking more than you give.
9: #
10: #***********************************************************************
11: # This file implements regression tests for SQLite library. The focus of
12: # this file is testing the SQLite routines used for converting between the
13: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14: # UTF-16be).
15: #
16: # $Id: enc.test,v 1.1.1.1 2012/02/21 17:04:16 misho Exp $
17:
18: set testdir [file dirname $argv0]
19: source $testdir/tester.tcl
20:
21: # Skip this test if the build does not support multiple encodings.
22: #
23: ifcapable {!utf16} {
24: finish_test
25: return
26: }
27:
28: proc do_bincmp_test {testname got expect} {
29: binary scan $expect \c* expectvals
30: binary scan $got \c* gotvals
31: do_test $testname [list set dummy $gotvals] $expectvals
32: }
33:
34: # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
35: # to change the byte-order of the string.
36: proc swap_byte_order {utf16} {
37: binary scan $utf16 \c* ints
38:
39: foreach {a b} $ints {
40: lappend ints2 $b
41: lappend ints2 $a
42: }
43:
44: return [binary format \c* $ints2]
45: }
46:
47: #
48: # Test that the SQLite routines for converting between UTF encodings
49: # produce the same results as their TCL counterparts.
50: #
51: # $testname is the prefix to be used for the test names.
52: # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
53: #
54: # The test procedure is:
55: # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
56: # SQLite routines produce the same results.
57: #
58: # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
59: # SQLite routines produce the same results.
60: #
61: # 3. Use the SQLite routines to convert the native machine order UTF-16
62: # representation back to the original UTF-8. Check that the result
63: # matches the original representation.
64: #
65: # 4. Add a byte-order mark to each of the UTF-16 representations and
66: # check that the SQLite routines can convert them back to UTF-8. For
67: # byte-order mark info, refer to section 3.10 of the unicode standard.
68: #
69: # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
70: # that SQLite can convert them both to native byte order UTF-16
71: # strings, sans BOM.
72: #
73: # Coverage:
74: #
75: # sqlite_utf8to16be (step 2)
76: # sqlite_utf8to16le (step 1)
77: # sqlite_utf16to8 (steps 3, 4)
78: # sqlite_utf16to16le (step 5)
79: # sqlite_utf16to16be (step 5)
80: #
81: proc test_conversion {testname str} {
82:
83: # Step 1.
84: set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
85: set utf16le_tcl [encoding convertto unicode $str]
86: append utf16le_tcl "\x00\x00"
87: if { $::tcl_platform(byteOrder)!="littleEndian" } {
88: set utf16le_tcl [swap_byte_order $utf16le_tcl]
89: }
90: do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
91: set utf16le $utf16le_tcl
92:
93: # Step 2.
94: set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
95: set utf16be_tcl [encoding convertto unicode $str]
96: append utf16be_tcl "\x00\x00"
97: if { $::tcl_platform(byteOrder)=="littleEndian" } {
98: set utf16be_tcl [swap_byte_order $utf16be_tcl]
99: }
100: do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
101: set utf16be $utf16be_tcl
102:
103: # Step 3.
104: if { $::tcl_platform(byteOrder)=="littleEndian" } {
105: set utf16 $utf16le
106: } else {
107: set utf16 $utf16be
108: }
109: set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
110: do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
111:
112: # Step 4 (little endian).
113: append utf16le_bom "\xFF\xFE" $utf16le
114: set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
115: do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
116:
117: # Step 4 (big endian).
118: append utf16be_bom "\xFE\xFF" $utf16be
119: set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
120: do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
121:
122: # Step 5 (little endian to little endian).
123: set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
124: do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
125:
126: # Step 5 (big endian to big endian).
127: set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
128: do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
129:
130: # Step 5 (big endian to little endian).
131: set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
132: do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
133:
134: # Step 5 (little endian to big endian).
135: set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
136: do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
137: }
138:
139: translate_selftest
140:
141: test_conversion enc-1 "hello world"
142: test_conversion enc-2 "sqlite"
143: test_conversion enc-3 ""
144: test_conversion enc-X "\u0100"
145: test_conversion enc-4 "\u1234"
146: test_conversion enc-5 "\u4321abc"
147: test_conversion enc-6 "\u4321\u1234"
148: test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
149: test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
150: test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
151: test_conversion enc-10 [string repeat "\uE000" 100]
152:
153: proc test_collate {enc zLeft zRight} {
154: return [string compare $zLeft $zRight]
155: }
156: add_test_collate $::DB 0 0 1
157: do_test enc-11.1 {
158: execsql {
159: CREATE TABLE ab(a COLLATE test_collate, b);
160: INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
161: INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
162: CREATE INDEX ab_i ON ab(a, b);
163: }
164: } {}
165: do_test enc-11.2 {
166: set cp200 "\u00C8"
167: execsql {
168: SELECT count(*) FROM ab WHERE a = $::cp200;
169: }
170: } {2}
171:
172: finish_test
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>