File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / test / enc2.test
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:16 2012 UTC (12 years, 10 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: # 2002 May 24
    2: #
    3: # The author disclaims copyright to this source code.  In place of
    4: # a legal notice, here is a blessing:
    5: #
    6: #    May you do good and not evil.
    7: #    May you find forgiveness for yourself and forgive others.
    8: #    May you share freely, never taking more than you give.
    9: #
   10: #***********************************************************************
   11: # This file implements regression tests for SQLite library.  The focus of
   12: # this file is testing the SQLite routines used for converting between the
   13: # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
   14: # UTF-16be).
   15: #
   16: # $Id: enc2.test,v 1.1.1.1 2012/02/21 17:04:16 misho Exp $
   17: 
   18: set testdir [file dirname $argv0]
   19: source $testdir/tester.tcl
   20: 
   21: # If UTF16 support is disabled, ignore the tests in this file
   22: #
   23: ifcapable {!utf16} {
   24:   finish_test
   25:   return
   26: }
   27: 
   28: # The rough organisation of tests in this file is:
   29: #
   30: # enc2.1.*: Simple tests with a UTF-8 db.
   31: # enc2.2.*: Simple tests with a UTF-16LE db.
   32: # enc2.3.*: Simple tests with a UTF-16BE db.
   33: # enc2.4.*: Test that attached databases must have the same text encoding
   34: #           as the main database.
   35: # enc2.5.*: Test the behaviour of the library when a collation sequence is
   36: #           not available for the most desirable text encoding.
   37: # enc2.6.*: Similar test for user functions.
   38: # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
   39: #           wrong text encoding for the database.
   40: # enc2.8.*: Test sqlite3_complete16()
   41: #
   42: 
   43: db close
   44: 
   45: # Return the UTF-8 representation of the supplied UTF-16 string $str. 
   46: proc utf8 {str} {
   47:   # If $str ends in two 0x00 0x00 bytes, knock these off before
   48:   # converting to UTF-8 using TCL.
   49:   binary scan $str \c* vals
   50:   if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
   51:     set str [binary format \c* [lrange $vals 0 end-2]]
   52:   }
   53: 
   54:   set r [encoding convertfrom unicode $str]
   55:   return $r
   56: }
   57: 
   58: #
   59: # This proc contains all the tests in this file. It is run
   60: # three times. Each time the file 'test.db' contains a database
   61: # with the following contents:
   62: set dbcontents {
   63:   CREATE TABLE t1(a PRIMARY KEY, b, c);
   64:   INSERT INTO t1 VALUES('one', 'I', 1);
   65: }
   66: # This proc tests that we can open and manipulate the test.db 
   67: # database, and that it is possible to retreive values in
   68: # various text encodings.
   69: #
   70: proc run_test_script {t enc} {
   71: 
   72: # Open the database and pull out a (the) row.
   73: do_test $t.1 {
   74:   sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
   75:   execsql {SELECT * FROM t1}
   76: } {one I 1}
   77: 
   78: # Insert some data
   79: do_test $t.2 {
   80:   execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
   81:   execsql {SELECT * FROM t1}
   82: } {one I 1 two II 2}
   83: 
   84: # Insert some data 
   85: do_test $t.3 {
   86:   execsql {
   87:     INSERT INTO t1 VALUES('three','III',3);
   88:     INSERT INTO t1 VALUES('four','IV',4);
   89:     INSERT INTO t1 VALUES('five','V',5);
   90:   }
   91:   execsql {SELECT * FROM t1}
   92: } {one I 1 two II 2 three III 3 four IV 4 five V 5}
   93: 
   94: # Use the index
   95: do_test $t.4 {
   96:   execsql {
   97:     SELECT * FROM t1 WHERE a = 'one';
   98:   }
   99: } {one I 1}
  100: do_test $t.5 {
  101:   execsql {
  102:     SELECT * FROM t1 WHERE a = 'four';
  103:   }
  104: } {four IV 4}
  105: ifcapable subquery {
  106:   do_test $t.6 {
  107:     execsql {
  108:       SELECT * FROM t1 WHERE a IN ('one', 'two');
  109:     }
  110:   } {one I 1 two II 2}
  111: }
  112: 
  113: # Now check that we can retrieve data in both UTF-16 and UTF-8
  114: do_test $t.7 {
  115:   set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
  116:   sqlite3_step $STMT
  117:   sqlite3_column_text $STMT 0
  118: } {four}
  119: 
  120: do_test $t.8 {
  121:   sqlite3_step $STMT
  122:   utf8 [sqlite3_column_text16 $STMT 0]
  123: } {five}
  124: 
  125: do_test $t.9 {
  126:   sqlite3_finalize $STMT
  127: } SQLITE_OK
  128: 
  129: ifcapable vacuum {
  130:   execsql VACUUM
  131: }
  132: 
  133: do_test $t.10 {
  134:   db eval {PRAGMA encoding}
  135: } $enc
  136: 
  137: }
  138: 
  139: # The three unicode encodings understood by SQLite.
  140: set encodings [list UTF-8 UTF-16le UTF-16be]
  141: 
  142: set sqlite_os_trace 0
  143: set i 1
  144: foreach enc $encodings {
  145:   forcedelete test.db
  146:   sqlite3 db test.db
  147:   db eval "PRAGMA encoding = \"$enc\""
  148:   execsql $dbcontents
  149:   do_test enc2-$i.0.1 {
  150:     db eval {PRAGMA encoding}
  151:   } $enc
  152:   do_test enc2-$i.0.2 {
  153:     db eval {PRAGMA encoding=UTF8}
  154:     db eval {PRAGMA encoding}
  155:   } $enc
  156:   do_test enc2-$i.0.3 {
  157:     db eval {PRAGMA encoding=UTF16le}
  158:     db eval {PRAGMA encoding}
  159:   } $enc
  160:   do_test enc2-$i.0.4 {
  161:     db eval {PRAGMA encoding=UTF16be}
  162:     db eval {PRAGMA encoding}
  163:   } $enc
  164: 
  165:   db close
  166:   run_test_script enc2-$i $enc
  167:   db close
  168:   incr i
  169: }
  170: 
  171: # Test that it is an error to try to attach a database with a different
  172: # encoding to the main database.
  173: ifcapable attach {
  174:   do_test enc2-4.1 {
  175:     forcedelete test.db
  176:     sqlite3 db test.db
  177:     db eval "PRAGMA encoding = 'UTF-8'"
  178:     db eval "CREATE TABLE abc(a, b, c);"
  179:   } {}
  180:   do_test enc2-4.2 {
  181:     forcedelete test2.db
  182:     sqlite3 db2 test2.db
  183:     db2 eval "PRAGMA encoding = 'UTF-16'"
  184:     db2 eval "CREATE TABLE abc(a, b, c);"
  185:   } {}
  186:   do_test enc2-4.3 {
  187:     catchsql {
  188:       ATTACH 'test2.db' as aux;
  189:     }
  190:   } {1 {attached databases must use the same text encoding as main database}}
  191:   db2 close
  192:   db close
  193: }
  194: 
  195: # The following tests - enc2-5.* - test that SQLite selects the correct
  196: # collation sequence when more than one is available.
  197: 
  198: set ::values [list one two three four five]
  199: set ::test_collate_enc INVALID
  200: proc test_collate {enc lhs rhs} {
  201:   set ::test_collate_enc $enc
  202:   set l [lsearch -exact $::values $lhs]
  203:   set r [lsearch -exact $::values $rhs]
  204:   set res [expr $l - $r]
  205:   # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
  206:   return $res
  207: }
  208: 
  209: forcedelete test.db
  210: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  211: do_test enc2-5.0 {
  212:   execsql {
  213:     CREATE TABLE t5(a);
  214:     INSERT INTO t5 VALUES('one');
  215:     INSERT INTO t5 VALUES('two');
  216:     INSERT INTO t5 VALUES('five');
  217:     INSERT INTO t5 VALUES('three');
  218:     INSERT INTO t5 VALUES('four');
  219:   }
  220: } {}
  221: do_test enc2-5.1 {
  222:   add_test_collate $DB 1 1 1
  223:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
  224:   lappend res $::test_collate_enc
  225: } {one two three four five UTF-8}
  226: do_test enc2-5.2 {
  227:   add_test_collate $DB 0 1 0
  228:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  229:   lappend res $::test_collate_enc
  230: } {one two three four five UTF-16LE}
  231: do_test enc2-5.3 {
  232:   add_test_collate $DB 0 0 1
  233:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  234:   lappend res $::test_collate_enc
  235: } {one two three four five UTF-16BE}
  236: 
  237: db close
  238: forcedelete test.db
  239: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  240: execsql {pragma encoding = 'UTF-16LE'}
  241: do_test enc2-5.4 {
  242:   execsql {
  243:     CREATE TABLE t5(a);
  244:     INSERT INTO t5 VALUES('one');
  245:     INSERT INTO t5 VALUES('two');
  246:     INSERT INTO t5 VALUES('five');
  247:     INSERT INTO t5 VALUES('three');
  248:     INSERT INTO t5 VALUES('four');
  249:   }
  250: } {}
  251: do_test enc2-5.5 {
  252:   add_test_collate $DB 1 1 1
  253:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  254:   lappend res $::test_collate_enc
  255: } {one two three four five UTF-16LE}
  256: do_test enc2-5.6 {
  257:   add_test_collate $DB 1 0 1
  258:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  259:   lappend res $::test_collate_enc
  260: } {one two three four five UTF-16BE}
  261: do_test enc2-5.7 {
  262:   add_test_collate $DB 1 0 0
  263:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  264:   lappend res $::test_collate_enc
  265: } {one two three four five UTF-8}
  266: 
  267: db close
  268: forcedelete test.db
  269: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  270: execsql {pragma encoding = 'UTF-16BE'}
  271: do_test enc2-5.8 {
  272:   execsql {
  273:     CREATE TABLE t5(a);
  274:     INSERT INTO t5 VALUES('one');
  275:     INSERT INTO t5 VALUES('two');
  276:     INSERT INTO t5 VALUES('five');
  277:     INSERT INTO t5 VALUES('three');
  278:     INSERT INTO t5 VALUES('four');
  279:   }
  280: } {}
  281: do_test enc2-5.9 {
  282:   add_test_collate $DB 1 1 1
  283:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  284:   lappend res $::test_collate_enc
  285: } {one two three four five UTF-16BE}
  286: do_test enc2-5.10 {
  287:   add_test_collate $DB 1 1 0
  288:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  289:   lappend res $::test_collate_enc
  290: } {one two three four five UTF-16LE}
  291: do_test enc2-5.11 {
  292:   add_test_collate $DB 1 0 0
  293:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  294:   lappend res $::test_collate_enc
  295: } {one two three four five UTF-8}
  296: 
  297: # Also test that a UTF-16 collation factory works.
  298: do_test enc2-5-12 {
  299:   add_test_collate $DB 0 0 0
  300:   catchsql {
  301:     SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
  302:   }
  303: } {1 {no such collation sequence: test_collate}}
  304: do_test enc2-5.13 {
  305:   add_test_collate_needed $DB 
  306:   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
  307:   lappend res $::test_collate_enc
  308: } {one two three four five UTF-16BE}
  309: do_test enc2-5.14 {
  310:   set ::sqlite_last_needed_collation
  311: } test_collate
  312: 
  313: db close
  314: forcedelete test.db
  315: 
  316: do_test enc2-5.15 {
  317:   sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
  318:   add_test_collate_needed $::DB
  319:   set ::sqlite_last_needed_collation
  320: } {}
  321: do_test enc2-5.16 {
  322:   execsql {CREATE TABLE t1(a varchar collate test_collate);}
  323: } {}
  324: do_test enc2-5.17 {
  325:   set ::sqlite_last_needed_collation
  326: } {test_collate}
  327: 
  328: # The following tests - enc2-6.* - test that SQLite selects the correct
  329: # user function when more than one is available.
  330: 
  331: proc test_function {enc arg} {
  332:   return "$enc $arg"
  333: }
  334: 
  335: db close
  336: forcedelete test.db
  337: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  338: execsql {pragma encoding = 'UTF-8'}
  339: do_test enc2-6.0 {
  340:   execsql {
  341:     CREATE TABLE t5(a);
  342:     INSERT INTO t5 VALUES('one');
  343:   }
  344: } {}
  345: do_test enc2-6.1 {
  346:   add_test_function $DB 1 1 1
  347:   execsql {
  348:     SELECT test_function('sqlite')
  349:   }
  350: } {{UTF-8 sqlite}}
  351: db close
  352: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  353: do_test enc2-6.2 {
  354:   add_test_function $DB 0 1 0
  355:   execsql {
  356:     SELECT test_function('sqlite')
  357:   }
  358: } {{UTF-16LE sqlite}}
  359: db close
  360: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  361: do_test enc2-6.3 {
  362:   add_test_function $DB 0 0 1
  363:   execsql {
  364:     SELECT test_function('sqlite')
  365:   }
  366: } {{UTF-16BE sqlite}}
  367: 
  368: db close
  369: forcedelete test.db
  370: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  371: execsql {pragma encoding = 'UTF-16LE'}
  372: do_test enc2-6.3 {
  373:   execsql {
  374:     CREATE TABLE t5(a);
  375:     INSERT INTO t5 VALUES('sqlite');
  376:   }
  377: } {}
  378: do_test enc2-6.4 {
  379:   add_test_function $DB 1 1 1
  380:   execsql {
  381:     SELECT test_function('sqlite')
  382:   }
  383: } {{UTF-16LE sqlite}}
  384: db close
  385: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  386: do_test enc2-6.5 {
  387:   add_test_function $DB 0 1 0
  388:   execsql {
  389:     SELECT test_function('sqlite')
  390:   }
  391: } {{UTF-16LE sqlite}}
  392: db close
  393: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  394: do_test enc2-6.6 {
  395:   add_test_function $DB 0 0 1
  396:   execsql {
  397:     SELECT test_function('sqlite')
  398:   }
  399: } {{UTF-16BE sqlite}}
  400: 
  401: db close
  402: forcedelete test.db
  403: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  404: execsql {pragma encoding = 'UTF-16BE'}
  405: do_test enc2-6.7 {
  406:   execsql {
  407:     CREATE TABLE t5(a);
  408:     INSERT INTO t5 VALUES('sqlite');
  409:   }
  410: } {}
  411: do_test enc2-6.8 {
  412:   add_test_function $DB 1 1 1
  413:   execsql {
  414:     SELECT test_function('sqlite')
  415:   }
  416: } {{UTF-16BE sqlite}}
  417: db close
  418: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  419: do_test enc2-6.9 {
  420:   add_test_function $DB 0 1 0
  421:   execsql {
  422:     SELECT test_function('sqlite')
  423:   }
  424: } {{UTF-16LE sqlite}}
  425: db close
  426: sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
  427: do_test enc2-6.10 {
  428:   add_test_function $DB 0 0 1
  429:   execsql {
  430:     SELECT test_function('sqlite')
  431:   }
  432: } {{UTF-16BE sqlite}}
  433: 
  434: 
  435: db close
  436: forcedelete test.db
  437: 
  438: # The following tests - enc2-7.* - function as follows:
  439: #
  440: # 1: Open an empty database file assuming UTF-16 encoding.
  441: # 2: Open the same database with a different handle assuming UTF-8. Create
  442: #    a table using this handle.
  443: # 3: Read the sqlite_master table from the first handle. 
  444: # 4: Ensure the first handle recognises the database encoding is UTF-8.
  445: #
  446: do_test enc2-7.1 {
  447:   sqlite3 db test.db
  448:   execsql {
  449:     PRAGMA encoding = 'UTF-16';
  450:     SELECT * FROM sqlite_master;
  451:   }
  452: } {}
  453: do_test enc2-7.2 {
  454:   set enc [execsql {
  455:     PRAGMA encoding;
  456:   }]
  457:   string range $enc 0 end-2 ;# Chop off the "le" or "be"
  458: } {UTF-16}
  459: do_test enc2-7.3 {
  460:   sqlite3 db2 test.db
  461:   execsql {
  462:     PRAGMA encoding = 'UTF-8';
  463:     CREATE TABLE abc(a, b, c);
  464:   } db2
  465: } {}
  466: do_test enc2-7.4 {
  467:   execsql {
  468:     SELECT * FROM sqlite_master;
  469:   }
  470: } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
  471: do_test enc2-7.5 {
  472:   execsql {
  473:     PRAGMA encoding;
  474:   }
  475: } {UTF-8}
  476: 
  477: db close
  478: db2 close
  479: 
  480: proc utf16 {utf8} {
  481:   set utf16 [encoding convertto unicode $utf8]
  482:   append utf16 "\x00\x00"
  483:   return $utf16
  484: }
  485: ifcapable {complete} {
  486:   do_test enc2-8.1 {
  487:     sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
  488:   } {1}
  489:   do_test enc2-8.2 {
  490:     sqlite3_complete16 [utf16 "SELECT * FROM"]
  491:   } {0}
  492: }
  493: 
  494: # Test that the encoding of an empty database may still be set after the
  495: # (empty) schema has been initialized.
  496: forcedelete test.db
  497: do_test enc2-9.1 {
  498:   sqlite3 db test.db
  499:   execsql {
  500:     PRAGMA encoding = 'UTF-8';
  501:     PRAGMA encoding;
  502:   }
  503: } {UTF-8}
  504: do_test enc2-9.2 {
  505:   sqlite3 db test.db
  506:   execsql {
  507:     PRAGMA encoding = 'UTF-16le';
  508:     PRAGMA encoding;
  509:   }
  510: } {UTF-16le}
  511: do_test enc2-9.3 {
  512:   sqlite3 db test.db
  513:   execsql {
  514:     SELECT * FROM sqlite_master;
  515:     PRAGMA encoding = 'UTF-8';
  516:     PRAGMA encoding;
  517:   }
  518: } {UTF-8}
  519: do_test enc2-9.4 {
  520:   sqlite3 db test.db
  521:   execsql {
  522:     PRAGMA encoding = 'UTF-16le';
  523:     CREATE TABLE abc(a, b, c);
  524:     PRAGMA encoding;
  525:   }
  526: } {UTF-16le}
  527: do_test enc2-9.5 {
  528:   sqlite3 db test.db
  529:   execsql {
  530:     PRAGMA encoding = 'UTF-8';
  531:     PRAGMA encoding;
  532:   }
  533: } {UTF-16le}
  534: 
  535: # Ticket #1987.
  536: # Disallow encoding changes once the encoding has been set.
  537: #
  538: do_test enc2-10.1 {
  539:   db close
  540:   forcedelete test.db test.db-journal
  541:   sqlite3 db test.db
  542:   db eval {
  543:     PRAGMA encoding=UTF16;
  544:     CREATE TABLE t1(a);
  545:     PRAGMA encoding=UTF8;
  546:     CREATE TABLE t2(b);
  547:   }
  548:   db close
  549:   sqlite3 db test.db
  550:   db eval {
  551:     SELECT name FROM sqlite_master
  552:   }
  553: } {t1 t2}
  554: 
  555: finish_test

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>