# 2007 April 26 # # The author disclaims copyright to this source code. # #************************************************************************* # This file implements tests for prefix-searching in the fts3 # component of the SQLite library. # # $Id: fts3an.test,v 1.1.1.1 2012/02/21 17:04:16 misho Exp $ # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts3 { finish_test return } # A large string to prime the pump with. set text { Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam sed turpis posuere placerat. Curabitur et lorem in lorem porttitor aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus, ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse potenti. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu, suscipit nec, consequat quis, risus. } db eval { CREATE VIRTUAL TABLE t1 USING fts3(c); INSERT INTO t1(rowid, c) VALUES(1, $text); INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row'); } # Exact match do_test fts3an-1.1 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'" } {1} # And a prefix do_test fts3an-1.2 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'" } {1} # Prefix includes exact match do_test fts3an-1.3 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'" } {1} # Make certain everything isn't considered a prefix! do_test fts3an-1.4 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'" } {} # Prefix across multiple rows. do_test fts3an-1.5 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'" } {1 2} # Likewise, with multiple hits in one document. do_test fts3an-1.6 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'" } {1 2} # Prefix which should only hit one document. do_test fts3an-1.7 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'" } {2} # * not at end is dropped. do_test fts3an-1.8 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'" } {} # Stand-alone * is dropped. do_test fts3an-1.9 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'" } {} # Phrase-query prefix. do_test fts3an-1.10 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'" } {2} do_test fts3an-1.11 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'" } {} # Phrase query with multiple prefix matches. do_test fts3an-1.12 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'" } {1 2} # Phrase query with multiple prefix matches. do_test fts3an-1.13 { execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'" } {2} # Test across updates (and, by implication, deletes). # Version of text without "lorem". regsub -all {[Ll]orem} $text '' ntext db eval { CREATE VIRTUAL TABLE t2 USING fts3(c); INSERT INTO t2(rowid, c) VALUES(1, $text); INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row'); UPDATE t2 SET c = $ntext WHERE rowid = 1; } # Can't see lorem as an exact match. do_test fts3an-2.1 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'" } {} # Can't see a prefix of lorem, either. do_test fts3an-2.2 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'" } {} # Can see lovely in the other document. do_test fts3an-2.3 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'" } {2} # Can still see other hits. do_test fts3an-2.4 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'" } {1 2} # Prefix which should only hit one document. do_test fts3an-2.5 { execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'" } {2} # Test with a segment which will have multiple levels in the tree. # Build a big document with lots of unique terms. set bigtext $text foreach c {a b c d e} { regsub -all {[A-Za-z]+} $bigtext "&$c" t append bigtext $t } # Populate a table with many copies of the big document, so that we # can test the number of hits found. Populate $ret with the expected # hit counts for each row. offsets() returns 4 elements for every # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for # $bigtext. set ret {6 1} db eval { BEGIN; CREATE VIRTUAL TABLE t3 USING fts3(c); INSERT INTO t3(rowid, c) VALUES(1, $text); INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row'); } for {set i 0} {$i<68} {incr i} { db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)} lappend ret 192 } db eval {COMMIT;} # Test that we get the expected number of hits. do_test fts3an-3.1 { set t {} db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} { set l [llength $o] lappend t [expr {$l/4}] } set t } $ret # Test a boundary condition: More than 2^16 terms that match a searched for # prefix in a single segment. # puts "This next test can take a little while (~ 30 seconds)..." do_test fts3an-4.1 { execsql { CREATE VIRTUAL TABLE ft USING fts3(x) } execsql BEGIN execsql { INSERT INTO ft VALUES(NULL) } execsql { INSERT INTO ft SELECT * FROM ft } ;# 2 execsql { INSERT INTO ft SELECT * FROM ft } ;# 4 execsql { INSERT INTO ft SELECT * FROM ft } ;# 8 execsql { INSERT INTO ft SELECT * FROM ft } ;# 16 execsql { INSERT INTO ft SELECT * FROM ft } ;# 32 execsql { INSERT INTO ft SELECT * FROM ft } ;# 64 execsql { INSERT INTO ft SELECT * FROM ft } ;# 128 execsql { INSERT INTO ft SELECT * FROM ft } ;# 256 execsql { INSERT INTO ft SELECT * FROM ft } ;# 512 execsql { INSERT INTO ft SELECT * FROM ft } ;# 1024 execsql { INSERT INTO ft SELECT * FROM ft } ;# 2048 execsql { INSERT INTO ft SELECT * FROM ft } ;# 4096 execsql { INSERT INTO ft SELECT * FROM ft } ;# 8192 execsql { INSERT INTO ft SELECT * FROM ft } ;# 16384 execsql { INSERT INTO ft SELECT * FROM ft } ;# 32768 execsql { INSERT INTO ft SELECT * FROM ft } ;# 65536 execsql { INSERT INTO ft SELECT * FROM ft } ;# 131072 execsql COMMIT execsql { UPDATE ft SET x = 'abc' || rowid } execsql { SELECT count(*) FROM ft WHERE x MATCH 'abc*' } } {131072} finish_test