File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / test / enc.test
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:16 2012 UTC (12 years, 10 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: # 2002 May 24
    2: #
    3: # The author disclaims copyright to this source code.  In place of
    4: # a legal notice, here is a blessing:
    5: #
    6: #    May you do good and not evil.
    7: #    May you find forgiveness for yourself and forgive others.
    8: #    May you share freely, never taking more than you give.
    9: #
   10: #***********************************************************************
   11: # This file implements regression tests for SQLite library.  The focus of
   12: # this file is testing the SQLite routines used for converting between the
   13: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
   14: # UTF-16be).
   15: #
   16: # $Id: enc.test,v 1.1.1.1 2012/02/21 17:04:16 misho Exp $
   17: 
   18: set testdir [file dirname $argv0]
   19: source $testdir/tester.tcl
   20: 
   21: # Skip this test if the build does not support multiple encodings.
   22: #
   23: ifcapable {!utf16} {
   24:   finish_test
   25:   return
   26: }
   27: 
   28: proc do_bincmp_test {testname got expect} {
   29:   binary scan $expect \c* expectvals
   30:   binary scan $got \c* gotvals
   31:   do_test $testname [list set dummy $gotvals] $expectvals
   32: }
   33: 
   34: # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
   35: # to change the byte-order of the string.
   36: proc swap_byte_order {utf16} {
   37:   binary scan $utf16 \c* ints
   38: 
   39:   foreach {a b} $ints {
   40:     lappend ints2 $b
   41:     lappend ints2 $a
   42:   }
   43: 
   44:   return [binary format \c* $ints2]
   45: }
   46: 
   47: #
   48: # Test that the SQLite routines for converting between UTF encodings
   49: # produce the same results as their TCL counterparts.
   50: #
   51: # $testname is the prefix to be used for the test names.
   52: # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
   53: #
   54: # The test procedure is:
   55: # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
   56: #    SQLite routines produce the same results.
   57: #
   58: # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
   59: #    SQLite routines produce the same results.
   60: #
   61: # 3. Use the SQLite routines to convert the native machine order UTF-16
   62: #    representation back to the original UTF-8. Check that the result
   63: #    matches the original representation.
   64: #
   65: # 4. Add a byte-order mark to each of the UTF-16 representations and
   66: #    check that the SQLite routines can convert them back to UTF-8.  For
   67: #    byte-order mark info, refer to section 3.10 of the unicode standard.
   68: #
   69: # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
   70: #    that SQLite can convert them both to native byte order UTF-16 
   71: #    strings, sans BOM.
   72: #
   73: # Coverage:
   74: #
   75: # sqlite_utf8to16be (step 2)
   76: # sqlite_utf8to16le (step 1)
   77: # sqlite_utf16to8 (steps 3, 4)
   78: # sqlite_utf16to16le (step 5)
   79: # sqlite_utf16to16be (step 5)
   80: #
   81: proc test_conversion {testname str} {
   82:  
   83:   # Step 1.
   84:   set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
   85:   set utf16le_tcl [encoding convertto unicode $str]
   86:   append utf16le_tcl "\x00\x00"
   87:   if { $::tcl_platform(byteOrder)!="littleEndian" } {
   88:     set utf16le_tcl [swap_byte_order $utf16le_tcl]
   89:   }
   90:   do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
   91:   set utf16le $utf16le_tcl
   92: 
   93:   # Step 2.
   94:   set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
   95:   set utf16be_tcl [encoding convertto unicode $str]
   96:   append utf16be_tcl "\x00\x00"
   97:   if { $::tcl_platform(byteOrder)=="littleEndian" } {
   98:     set utf16be_tcl [swap_byte_order $utf16be_tcl]
   99:   }
  100:   do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
  101:   set utf16be $utf16be_tcl
  102:  
  103:   # Step 3.
  104:   if { $::tcl_platform(byteOrder)=="littleEndian" } {
  105:     set utf16 $utf16le
  106:   } else {
  107:     set utf16 $utf16be
  108:   }
  109:   set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
  110:   do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
  111: 
  112:   # Step 4 (little endian).
  113:   append utf16le_bom "\xFF\xFE" $utf16le
  114:   set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
  115:   do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
  116: 
  117:   # Step 4 (big endian).
  118:   append utf16be_bom "\xFE\xFF" $utf16be
  119:   set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
  120:   do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
  121: 
  122:   # Step 5 (little endian to little endian).
  123:   set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
  124:   do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
  125: 
  126:   # Step 5 (big endian to big endian).
  127:   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
  128:   do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
  129: 
  130:   # Step 5 (big endian to little endian).
  131:   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
  132:   do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
  133: 
  134:   # Step 5 (little endian to big endian).
  135:   set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
  136:   do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
  137: }
  138: 
  139: translate_selftest
  140: 
  141: test_conversion enc-1 "hello world"
  142: test_conversion enc-2 "sqlite"
  143: test_conversion enc-3 ""
  144: test_conversion enc-X "\u0100"
  145: test_conversion enc-4 "\u1234"
  146: test_conversion enc-5 "\u4321abc"
  147: test_conversion enc-6 "\u4321\u1234"
  148: test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
  149: test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
  150: test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
  151: test_conversion enc-10 [string repeat "\uE000" 100]
  152: 
  153: proc test_collate {enc zLeft zRight} {
  154:   return [string compare $zLeft $zRight]
  155: }
  156: add_test_collate $::DB 0 0 1
  157: do_test enc-11.1 {
  158:   execsql {
  159:     CREATE TABLE ab(a COLLATE test_collate, b);
  160:     INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
  161:     INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
  162:     CREATE INDEX ab_i ON ab(a, b);
  163:   }
  164: } {}
  165: do_test enc-11.2 {
  166:   set cp200 "\u00C8"
  167:   execsql {
  168:     SELECT count(*) FROM ab WHERE a = $::cp200;
  169:   }
  170: } {2}
  171: 
  172: finish_test

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>