Annotation of embedaddon/sqlite3/test/fts3rnd.test, revision 1.1
1.1 ! misho 1: # 2009 December 03
! 2: #
! 3: # May you do good and not evil.
! 4: # May you find forgiveness for yourself and forgive others.
! 5: # May you share freely, never taking more than you give.
! 6: #
! 7: #***********************************************************************
! 8: #
! 9: # Brute force (random data) tests for FTS3.
! 10: #
! 11:
! 12: #-------------------------------------------------------------------------
! 13: #
! 14: # The FTS3 tests implemented in this file focus on testing that FTS3
! 15: # returns the correct set of documents for various types of full-text
! 16: # query. This is done using pseudo-randomly generated data and queries.
! 17: # The expected result of each query is calculated using Tcl code.
! 18: #
! 19: # 1. The database is initialized to contain a single table with three
! 20: # columns. 100 rows are inserted into the table. Each of the three
! 21: # values in each row is a document consisting of between 0 and 100
! 22: # terms. Terms are selected from a vocabulary of $G(nVocab) terms.
! 23: #
! 24: # 2. The following is performed 100 times:
! 25: #
! 26: # a. A row is inserted into the database. The row contents are
! 27: # generated as in step 1. The docid is a pseudo-randomly selected
! 28: # value between 0 and 1000000.
! 29: #
! 30: # b. A psuedo-randomly selected row is updated. One of its columns is
! 31: # set to contain a new document generated in the same way as the
! 32: # documents in step 1.
! 33: #
! 34: # c. A psuedo-randomly selected row is deleted.
! 35: #
! 36: # d. For each of several types of fts3 queries, 10 SELECT queries
! 37: # of the form:
! 38: #
! 39: # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'
! 40: #
! 41: # are evaluated. The results are compared to those calculated by
! 42: # Tcl code in this file. The patterns used for the different query
! 43: # types are:
! 44: #
! 45: # 1. query = <term>
! 46: # 2. query = <prefix>
! 47: # 3. query = "<term> <term>"
! 48: # 4. query = "<term> <term> <term>"
! 49: # 5. query = "<prefix> <prefix> <prefix>"
! 50: # 6. query = <term> NEAR <term>
! 51: # 7. query = <term> NEAR/11 <term> NEAR/11 <term>
! 52: # 8. query = <term> OR <term>
! 53: # 9. query = <term> NOT <term>
! 54: # 10. query = <term> AND <term>
! 55: # 11. query = <term> NEAR <term> OR <term> NEAR <term>
! 56: # 12. query = <term> NEAR <term> NOT <term> NEAR <term>
! 57: # 13. query = <term> NEAR <term> AND <term> NEAR <term>
! 58: #
! 59: # where <term> is a term psuedo-randomly selected from the vocabulary
! 60: # and prefix is the first 2 characters of such a term followed by
! 61: # a "*" character.
! 62: #
! 63: # Every second iteration, steps (a) through (d) above are performed
! 64: # within a single transaction. This forces the queries in (d) to
! 65: # read data from both the database and the in-memory hash table
! 66: # that caches the full-text index entries created by steps (a), (b)
! 67: # and (c) until the transaction is committed.
! 68: #
! 69: # The procedure above is run 5 times, using advisory fts3 node sizes of 50,
! 70: # 500, 1000 and 2000 bytes.
! 71: #
! 72: # After the test using an advisory node-size of 50, an OOM test is run using
! 73: # the database. This test is similar to step (d) above, except that it tests
! 74: # the effects of transient and persistent OOM conditions encountered while
! 75: # executing each query.
! 76: #
! 77:
! 78: set testdir [file dirname $argv0]
! 79: source $testdir/tester.tcl
! 80:
! 81: # If this build does not include FTS3, skip the tests in this file.
! 82: #
! 83: ifcapable !fts3 { finish_test ; return }
! 84: source $testdir/fts3_common.tcl
! 85: source $testdir/malloc_common.tcl
! 86:
! 87: set G(nVocab) 100
! 88:
! 89: set nVocab 100
! 90: set lVocab [list]
! 91:
! 92: expr srand(0)
! 93:
! 94: # Generate a vocabulary of nVocab words. Each word is 3 characters long.
! 95: #
! 96: set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
! 97: for {set i 0} {$i < $nVocab} {incr i} {
! 98: set len [expr int(rand()*3)+2]
! 99: set word [lindex $lChar [expr int(rand()*26)]]
! 100: append word [lindex $lChar [expr int(rand()*26)]]
! 101: if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }
! 102: if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }
! 103: lappend lVocab $word
! 104: }
! 105:
! 106: proc random_term {} {
! 107: lindex $::lVocab [expr {int(rand()*$::nVocab)}]
! 108: }
! 109:
! 110: # Return a document consisting of $nWord arbitrarily selected terms
! 111: # from the $::lVocab list.
! 112: #
! 113: proc generate_doc {nWord} {
! 114: set doc [list]
! 115: for {set i 0} {$i < $nWord} {incr i} {
! 116: lappend doc [random_term]
! 117: }
! 118: return $doc
! 119: }
! 120:
! 121:
! 122:
! 123: # Primitives to update the table.
! 124: #
! 125: unset -nocomplain t1
! 126: proc insert_row {rowid} {
! 127: set a [generate_doc [expr int((rand()*100))]]
! 128: set b [generate_doc [expr int((rand()*100))]]
! 129: set c [generate_doc [expr int((rand()*100))]]
! 130: execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }
! 131: set ::t1($rowid) [list $a $b $c]
! 132: }
! 133: proc delete_row {rowid} {
! 134: execsql { DELETE FROM t1 WHERE rowid = $rowid }
! 135: catch {unset ::t1($rowid)}
! 136: }
! 137: proc update_row {rowid} {
! 138: set cols {a b c}
! 139: set iCol [expr int(rand()*3)]
! 140: set doc [generate_doc [expr int((rand()*100))]]
! 141: lset ::t1($rowid) $iCol $doc
! 142: execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"
! 143: }
! 144:
! 145: proc simple_phrase {zPrefix} {
! 146: set ret [list]
! 147:
! 148: set reg [string map {* {[^ ]*}} $zPrefix]
! 149: set reg " $reg "
! 150:
! 151: foreach key [lsort -integer [array names ::t1]] {
! 152: set value $::t1($key)
! 153: set cnt [list]
! 154: foreach col $value {
! 155: if {[regexp $reg " $col "]} { lappend ret $key ; break }
! 156: }
! 157: }
! 158:
! 159: #lsort -uniq -integer $ret
! 160: set ret
! 161: }
! 162:
! 163: # This [proc] is used to test the FTS3 matchinfo() function.
! 164: #
! 165: proc simple_token_matchinfo {zToken bDesc} {
! 166:
! 167: set nDoc(0) 0
! 168: set nDoc(1) 0
! 169: set nDoc(2) 0
! 170: set nHit(0) 0
! 171: set nHit(1) 0
! 172: set nHit(2) 0
! 173:
! 174: set dir -inc
! 175: if {$bDesc} { set dir -dec }
! 176:
! 177: foreach key [array names ::t1] {
! 178: set value $::t1($key)
! 179: set a($key) [list]
! 180: foreach i {0 1 2} col $value {
! 181: set hit [llength [lsearch -all $col $zToken]]
! 182: lappend a($key) $hit
! 183: incr nHit($i) $hit
! 184: if {$hit>0} { incr nDoc($i) }
! 185: }
! 186: }
! 187:
! 188: set ret [list]
! 189: foreach docid [lsort -integer $dir [array names a]] {
! 190: if { [lindex [lsort -integer $a($docid)] end] } {
! 191: set matchinfo [list 1 3]
! 192: foreach i {0 1 2} hit $a($docid) {
! 193: lappend matchinfo $hit $nHit($i) $nDoc($i)
! 194: }
! 195: lappend ret $docid $matchinfo
! 196: }
! 197: }
! 198:
! 199: set ret
! 200: }
! 201:
! 202: proc simple_near {termlist nNear} {
! 203: set ret [list]
! 204:
! 205: foreach {key value} [array get ::t1] {
! 206: foreach v $value {
! 207:
! 208: set l [lsearch -exact -all $v [lindex $termlist 0]]
! 209: foreach T [lrange $termlist 1 end] {
! 210: set l2 [list]
! 211: foreach i $l {
! 212: set iStart [expr $i - $nNear - 1]
! 213: set iEnd [expr $i + $nNear + 1]
! 214: if {$iStart < 0} {set iStart 0}
! 215: foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {
! 216: incr i2 $iStart
! 217: if {$i2 != $i} { lappend l2 $i2 }
! 218: }
! 219: }
! 220: set l [lsort -uniq -integer $l2]
! 221: }
! 222:
! 223: if {[llength $l]} {
! 224: #puts "MATCH($key): $v"
! 225: lappend ret $key
! 226: }
! 227: }
! 228: }
! 229:
! 230: lsort -unique -integer $ret
! 231: }
! 232:
! 233: # The following three procs:
! 234: #
! 235: # setup_not A B
! 236: # setup_or A B
! 237: # setup_and A B
! 238: #
! 239: # each take two arguments. Both arguments must be lists of integer values
! 240: # sorted by value. The return value is the list produced by evaluating
! 241: # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or
! 242: # AND.
! 243: #
! 244: proc setop_not {A B} {
! 245: foreach b $B { set n($b) {} }
! 246: set ret [list]
! 247: foreach a $A { if {![info exists n($a)]} {lappend ret $a} }
! 248: return $ret
! 249: }
! 250: proc setop_or {A B} {
! 251: lsort -integer -uniq [concat $A $B]
! 252: }
! 253: proc setop_and {A B} {
! 254: foreach b $B { set n($b) {} }
! 255: set ret [list]
! 256: foreach a $A { if {[info exists n($a)]} {lappend ret $a} }
! 257: return $ret
! 258: }
! 259:
! 260: proc mit {blob} {
! 261: set scan(littleEndian) i*
! 262: set scan(bigEndian) I*
! 263: binary scan $blob $scan($::tcl_platform(byteOrder)) r
! 264: return $r
! 265: }
! 266: db func mit mit
! 267: set sqlite_fts3_enable_parentheses 1
! 268:
! 269: proc do_orderbydocid_test {tn sql res} {
! 270: uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res]
! 271: uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \
! 272: [lsort -int -dec $res]
! 273: ]
! 274: }
! 275:
! 276: set NUM_TRIALS 100
! 277:
! 278: foreach {nodesize order} {
! 279: 50 DESC
! 280: 50 ASC
! 281: 500 ASC
! 282: 1000 DESC
! 283: 2000 ASC
! 284: } {
! 285: catch { array unset ::t1 }
! 286: set testname "$nodesize/$order"
! 287:
! 288: # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.
! 289: #
! 290: db transaction {
! 291: catchsql { DROP TABLE t1 }
! 292: execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)"
! 293: execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"
! 294: for {set i 0} {$i < 100} {incr i} { insert_row $i }
! 295: }
! 296:
! 297: for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} {
! 298: catchsql COMMIT
! 299:
! 300: set DO_MALLOC_TEST 0
! 301: set nRep 10
! 302: if {$iTest==100 && $nodesize==50} {
! 303: set DO_MALLOC_TEST 1
! 304: set nRep 2
! 305: }
! 306:
! 307: set ::testprefix fts3rnd-1.$testname.$iTest
! 308:
! 309: # Delete one row, update one row and insert one row.
! 310: #
! 311: set rows [array names ::t1]
! 312: set nRow [llength $rows]
! 313: set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]
! 314: set iDelete $iUpdate
! 315: while {$iDelete == $iUpdate} {
! 316: set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
! 317: }
! 318: set iInsert $iUpdate
! 319: while {[info exists ::t1($iInsert)]} {
! 320: set iInsert [expr {int(rand()*1000000)}]
! 321: }
! 322: execsql BEGIN
! 323: insert_row $iInsert
! 324: update_row $iUpdate
! 325: delete_row $iDelete
! 326: if {0==($iTest%2)} { execsql COMMIT }
! 327:
! 328: if {0==($iTest%2)} {
! 329: #do_test 0 { fts3_integrity_check t1 } ok
! 330: }
! 331:
! 332: # Pick 10 terms from the vocabulary. Check that the results of querying
! 333: # the database for the set of documents containing each of these terms
! 334: # is the same as the result obtained by scanning the contents of the Tcl
! 335: # array for each term.
! 336: #
! 337: for {set i 0} {$i < 10} {incr i} {
! 338: set term [random_term]
! 339: do_select_test 1.$i.asc {
! 340: SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
! 341: ORDER BY docid ASC
! 342: } [simple_token_matchinfo $term 0]
! 343: do_select_test 1.$i.desc {
! 344: SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
! 345: ORDER BY docid DESC
! 346: } [simple_token_matchinfo $term 1]
! 347: }
! 348:
! 349: # This time, use the first two characters of each term as a term prefix
! 350: # to query for. Test that querying the Tcl array produces the same results
! 351: # as querying the FTS3 table for the prefix.
! 352: #
! 353: for {set i 0} {$i < $nRep} {incr i} {
! 354: set prefix [string range [random_term] 0 end-1]
! 355: set match "${prefix}*"
! 356: do_orderbydocid_test 2.$i {
! 357: SELECT docid FROM t1 WHERE t1 MATCH $match
! 358: } [simple_phrase $match]
! 359: }
! 360:
! 361: # Similar to the above, except for phrase queries.
! 362: #
! 363: for {set i 0} {$i < $nRep} {incr i} {
! 364: set term [list [random_term] [random_term]]
! 365: set match "\"$term\""
! 366: do_orderbydocid_test 3.$i {
! 367: SELECT docid FROM t1 WHERE t1 MATCH $match
! 368: } [simple_phrase $term]
! 369: }
! 370:
! 371: # Three word phrases.
! 372: #
! 373: for {set i 0} {$i < $nRep} {incr i} {
! 374: set term [list [random_term] [random_term] [random_term]]
! 375: set match "\"$term\""
! 376: do_orderbydocid_test 4.$i {
! 377: SELECT docid FROM t1 WHERE t1 MATCH $match
! 378: } [simple_phrase $term]
! 379: }
! 380:
! 381: # Three word phrases made up of term-prefixes.
! 382: #
! 383: for {set i 0} {$i < $nRep} {incr i} {
! 384: set query "[string range [random_term] 0 end-1]* "
! 385: append query "[string range [random_term] 0 end-1]* "
! 386: append query "[string range [random_term] 0 end-1]*"
! 387:
! 388: set match "\"$query\""
! 389: do_orderbydocid_test 5.$i {
! 390: SELECT docid FROM t1 WHERE t1 MATCH $match
! 391: } [simple_phrase $query]
! 392: }
! 393:
! 394: # A NEAR query with terms as the arguments:
! 395: #
! 396: # ... MATCH '$term1 NEAR $term2' ...
! 397: #
! 398: for {set i 0} {$i < $nRep} {incr i} {
! 399: set terms [list [random_term] [random_term]]
! 400: set match [join $terms " NEAR "]
! 401: do_orderbydocid_test 6.$i {
! 402: SELECT docid FROM t1 WHERE t1 MATCH $match
! 403: } [simple_near $terms 10]
! 404: }
! 405:
! 406: # A 3-way NEAR query with terms as the arguments.
! 407: #
! 408: for {set i 0} {$i < $nRep} {incr i} {
! 409: set terms [list [random_term] [random_term] [random_term]]
! 410: set nNear 11
! 411: set match [join $terms " NEAR/$nNear "]
! 412: do_orderbydocid_test 7.$i {
! 413: SELECT docid FROM t1 WHERE t1 MATCH $match
! 414: } [simple_near $terms $nNear]
! 415: }
! 416:
! 417: # Set operations on simple term queries.
! 418: #
! 419: foreach {tn op proc} {
! 420: 8 OR setop_or
! 421: 9 NOT setop_not
! 422: 10 AND setop_and
! 423: } {
! 424: for {set i 0} {$i < $nRep} {incr i} {
! 425: set term1 [random_term]
! 426: set term2 [random_term]
! 427: set match "$term1 $op $term2"
! 428: do_orderbydocid_test $tn.$i {
! 429: SELECT docid FROM t1 WHERE t1 MATCH $match
! 430: } [$proc [simple_phrase $term1] [simple_phrase $term2]]
! 431: }
! 432: }
! 433:
! 434: # Set operations on NEAR queries.
! 435: #
! 436: foreach {tn op proc} {
! 437: 11 OR setop_or
! 438: 12 NOT setop_not
! 439: 13 AND setop_and
! 440: } {
! 441: for {set i 0} {$i < $nRep} {incr i} {
! 442: set term1 [random_term]
! 443: set term2 [random_term]
! 444: set term3 [random_term]
! 445: set term4 [random_term]
! 446: set match "$term1 NEAR $term2 $op $term3 NEAR $term4"
! 447: do_orderbydocid_test $tn.$i {
! 448: SELECT docid FROM t1 WHERE t1 MATCH $match
! 449: } [$proc \
! 450: [simple_near [list $term1 $term2] 10] \
! 451: [simple_near [list $term3 $term4] 10]
! 452: ]
! 453: }
! 454: }
! 455:
! 456: catchsql COMMIT
! 457: }
! 458: }
! 459:
! 460: finish_test
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>