Annotation of embedaddon/sqlite3/test/fts3rnd.test, revision 1.1

1.1     ! misho       1: # 2009 December 03
        !             2: #
        !             3: #    May you do good and not evil.
        !             4: #    May you find forgiveness for yourself and forgive others.
        !             5: #    May you share freely, never taking more than you give.
        !             6: #
        !             7: #***********************************************************************
        !             8: #
        !             9: # Brute force (random data) tests for FTS3.
        !            10: #
        !            11: 
        !            12: #-------------------------------------------------------------------------
        !            13: #
        !            14: # The FTS3 tests implemented in this file focus on testing that FTS3
        !            15: # returns the correct set of documents for various types of full-text
        !            16: # query. This is done using pseudo-randomly generated data and queries.
        !            17: # The expected result of each query is calculated using Tcl code.
        !            18: #
        !            19: #   1. The database is initialized to contain a single table with three
        !            20: #      columns. 100 rows are inserted into the table. Each of the three
        !            21: #      values in each row is a document consisting of between 0 and 100
        !            22: #      terms. Terms are selected from a vocabulary of $G(nVocab) terms.
        !            23: #
        !            24: #   2. The following is performed 100 times:
        !            25: #
        !            26: #      a. A row is inserted into the database. The row contents are 
        !            27: #         generated as in step 1. The docid is a pseudo-randomly selected
        !            28: #         value between 0 and 1000000.
        !            29: # 
        !            30: #      b. A psuedo-randomly selected row is updated. One of its columns is
        !            31: #         set to contain a new document generated in the same way as the
        !            32: #         documents in step 1.
        !            33: # 
        !            34: #      c. A psuedo-randomly selected row is deleted.
        !            35: # 
        !            36: #      d. For each of several types of fts3 queries, 10 SELECT queries
        !            37: #         of the form:
        !            38: # 
        !            39: #           SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'
        !            40: # 
        !            41: #         are evaluated. The results are compared to those calculated by
        !            42: #         Tcl code in this file. The patterns used for the different query
        !            43: #         types are:
        !            44: # 
        !            45: #           1.  query = <term>
        !            46: #           2.  query = <prefix>
        !            47: #           3.  query = "<term> <term>"
        !            48: #           4.  query = "<term> <term> <term>"
        !            49: #           5.  query = "<prefix> <prefix> <prefix>"
        !            50: #           6.  query = <term> NEAR <term>
        !            51: #           7.  query = <term> NEAR/11 <term> NEAR/11 <term>
        !            52: #           8.  query = <term> OR <term>
        !            53: #           9.  query = <term> NOT <term>
        !            54: #           10. query = <term> AND <term>
        !            55: #           11. query = <term> NEAR <term> OR <term> NEAR <term>
        !            56: #           12. query = <term> NEAR <term> NOT <term> NEAR <term>
        !            57: #           13. query = <term> NEAR <term> AND <term> NEAR <term>
        !            58: # 
        !            59: #         where <term> is a term psuedo-randomly selected from the vocabulary
        !            60: #         and prefix is the first 2 characters of such a term followed by
        !            61: #         a "*" character.
        !            62: #     
        !            63: #      Every second iteration, steps (a) through (d) above are performed
        !            64: #      within a single transaction. This forces the queries in (d) to
        !            65: #      read data from both the database and the in-memory hash table
        !            66: #      that caches the full-text index entries created by steps (a), (b)
        !            67: #      and (c) until the transaction is committed.
        !            68: #
        !            69: # The procedure above is run 5 times, using advisory fts3 node sizes of 50,
        !            70: # 500, 1000 and 2000 bytes.
        !            71: #
        !            72: # After the test using an advisory node-size of 50, an OOM test is run using
        !            73: # the database. This test is similar to step (d) above, except that it tests
        !            74: # the effects of transient and persistent OOM conditions encountered while
        !            75: # executing each query.
        !            76: #
        !            77: 
        !            78: set testdir [file dirname $argv0]
        !            79: source $testdir/tester.tcl
        !            80: 
        !            81: # If this build does not include FTS3, skip the tests in this file.
        !            82: #
        !            83: ifcapable !fts3 { finish_test ; return }
        !            84: source $testdir/fts3_common.tcl
        !            85: source $testdir/malloc_common.tcl
        !            86: 
        !            87: set G(nVocab) 100
        !            88: 
        !            89: set nVocab 100
        !            90: set lVocab [list]
        !            91: 
        !            92: expr srand(0)
        !            93: 
        !            94: # Generate a vocabulary of nVocab words. Each word is 3 characters long.
        !            95: #
        !            96: set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
        !            97: for {set i 0} {$i < $nVocab} {incr i} {
        !            98:   set len [expr int(rand()*3)+2]
        !            99:   set    word [lindex $lChar [expr int(rand()*26)]]
        !           100:   append word [lindex $lChar [expr int(rand()*26)]]
        !           101:   if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }
        !           102:   if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }
        !           103:   lappend lVocab $word
        !           104: }
        !           105: 
        !           106: proc random_term {} {
        !           107:   lindex $::lVocab [expr {int(rand()*$::nVocab)}]
        !           108: }
        !           109: 
        !           110: # Return a document consisting of $nWord arbitrarily selected terms
        !           111: # from the $::lVocab list.
        !           112: #
        !           113: proc generate_doc {nWord} {
        !           114:   set doc [list]
        !           115:   for {set i 0} {$i < $nWord} {incr i} {
        !           116:     lappend doc [random_term]
        !           117:   }
        !           118:   return $doc
        !           119: }
        !           120: 
        !           121: 
        !           122: 
        !           123: # Primitives to update the table.
        !           124: #
        !           125: unset -nocomplain t1
        !           126: proc insert_row {rowid} {
        !           127:   set a [generate_doc [expr int((rand()*100))]]
        !           128:   set b [generate_doc [expr int((rand()*100))]]
        !           129:   set c [generate_doc [expr int((rand()*100))]]
        !           130:   execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }
        !           131:   set ::t1($rowid) [list $a $b $c]
        !           132: }
        !           133: proc delete_row {rowid} {
        !           134:   execsql { DELETE FROM t1 WHERE rowid = $rowid }
        !           135:   catch {unset ::t1($rowid)}
        !           136: }
        !           137: proc update_row {rowid} {
        !           138:   set cols {a b c}
        !           139:   set iCol [expr int(rand()*3)]
        !           140:   set doc  [generate_doc [expr int((rand()*100))]]
        !           141:   lset ::t1($rowid) $iCol $doc
        !           142:   execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"
        !           143: }
        !           144: 
        !           145: proc simple_phrase {zPrefix} {
        !           146:   set ret [list]
        !           147: 
        !           148:   set reg [string map {* {[^ ]*}} $zPrefix]
        !           149:   set reg " $reg "
        !           150: 
        !           151:   foreach key [lsort -integer [array names ::t1]] {
        !           152:     set value $::t1($key)
        !           153:     set cnt [list]
        !           154:     foreach col $value {
        !           155:       if {[regexp $reg " $col "]} { lappend ret $key ; break }
        !           156:     }
        !           157:   }
        !           158: 
        !           159:   #lsort -uniq -integer $ret
        !           160:   set ret
        !           161: }
        !           162: 
        !           163: # This [proc] is used to test the FTS3 matchinfo() function.
        !           164: # 
        !           165: proc simple_token_matchinfo {zToken bDesc} {
        !           166: 
        !           167:   set nDoc(0) 0
        !           168:   set nDoc(1) 0
        !           169:   set nDoc(2) 0
        !           170:   set nHit(0) 0
        !           171:   set nHit(1) 0
        !           172:   set nHit(2) 0
        !           173: 
        !           174:   set dir -inc
        !           175:   if {$bDesc} { set dir -dec }
        !           176: 
        !           177:   foreach key [array names ::t1] {
        !           178:     set value $::t1($key)
        !           179:     set a($key) [list]
        !           180:     foreach i {0 1 2} col $value {
        !           181:       set hit [llength [lsearch -all $col $zToken]]
        !           182:       lappend a($key) $hit
        !           183:       incr nHit($i) $hit
        !           184:       if {$hit>0} { incr nDoc($i) }
        !           185:     }
        !           186:   }
        !           187: 
        !           188:   set ret [list]
        !           189:   foreach docid [lsort -integer $dir [array names a]] {
        !           190:     if { [lindex [lsort -integer $a($docid)] end] } {
        !           191:       set matchinfo [list 1 3]
        !           192:       foreach i {0 1 2} hit $a($docid) {
        !           193:         lappend matchinfo $hit $nHit($i) $nDoc($i)
        !           194:       }
        !           195:       lappend ret $docid $matchinfo
        !           196:     }
        !           197:   }
        !           198: 
        !           199:   set ret
        !           200: } 
        !           201: 
        !           202: proc simple_near {termlist nNear} {
        !           203:   set ret [list]
        !           204: 
        !           205:   foreach {key value} [array get ::t1] {
        !           206:     foreach v $value {
        !           207: 
        !           208:       set l [lsearch -exact -all $v [lindex $termlist 0]]
        !           209:       foreach T [lrange $termlist 1 end] {
        !           210:         set l2 [list]
        !           211:         foreach i $l {
        !           212:           set iStart [expr $i - $nNear - 1]
        !           213:           set iEnd [expr $i + $nNear + 1]
        !           214:           if {$iStart < 0} {set iStart 0}
        !           215:           foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {
        !           216:             incr i2 $iStart
        !           217:             if {$i2 != $i} { lappend l2 $i2 } 
        !           218:           }
        !           219:         }
        !           220:         set l [lsort -uniq -integer $l2]
        !           221:       }
        !           222: 
        !           223:       if {[llength $l]} {
        !           224: #puts "MATCH($key): $v"
        !           225:         lappend ret $key
        !           226:       } 
        !           227:     }
        !           228:   }
        !           229: 
        !           230:   lsort -unique -integer $ret
        !           231: }
        !           232: 
        !           233: # The following three procs:
        !           234: # 
        !           235: #   setup_not A B
        !           236: #   setup_or  A B
        !           237: #   setup_and A B
        !           238: #
        !           239: # each take two arguments. Both arguments must be lists of integer values
        !           240: # sorted by value. The return value is the list produced by evaluating
        !           241: # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or
        !           242: # AND.
        !           243: #
        !           244: proc setop_not {A B} {
        !           245:   foreach b $B { set n($b) {} }
        !           246:   set ret [list]
        !           247:   foreach a $A { if {![info exists n($a)]} {lappend ret $a} }
        !           248:   return $ret
        !           249: }
        !           250: proc setop_or {A B} {
        !           251:   lsort -integer -uniq [concat $A $B]
        !           252: }
        !           253: proc setop_and {A B} {
        !           254:   foreach b $B { set n($b) {} }
        !           255:   set ret [list]
        !           256:   foreach a $A { if {[info exists n($a)]} {lappend ret $a} }
        !           257:   return $ret
        !           258: }
        !           259: 
        !           260: proc mit {blob} {
        !           261:   set scan(littleEndian) i*
        !           262:   set scan(bigEndian) I*
        !           263:   binary scan $blob $scan($::tcl_platform(byteOrder)) r
        !           264:   return $r
        !           265: }
        !           266: db func mit mit
        !           267: set sqlite_fts3_enable_parentheses 1
        !           268: 
        !           269: proc do_orderbydocid_test {tn sql res} {
        !           270:   uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res]
        !           271:   uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \
        !           272:     [lsort -int -dec $res]
        !           273:   ]
        !           274: }
        !           275: 
        !           276: set NUM_TRIALS 100
        !           277: 
        !           278: foreach {nodesize order} {
        !           279:   50    DESC
        !           280:   50    ASC
        !           281:   500   ASC
        !           282:   1000  DESC
        !           283:   2000  ASC
        !           284: } {
        !           285:   catch { array unset ::t1 }
        !           286:   set testname "$nodesize/$order"
        !           287: 
        !           288:   # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.
        !           289:   #
        !           290:   db transaction {
        !           291:     catchsql { DROP TABLE t1 }
        !           292:     execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)"
        !           293:     execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"
        !           294:     for {set i 0} {$i < 100} {incr i} { insert_row $i }
        !           295:   }
        !           296:   
        !           297:   for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} {
        !           298:     catchsql COMMIT
        !           299: 
        !           300:     set DO_MALLOC_TEST 0
        !           301:     set nRep 10
        !           302:     if {$iTest==100 && $nodesize==50} { 
        !           303:       set DO_MALLOC_TEST 1 
        !           304:       set nRep 2
        !           305:     }
        !           306: 
        !           307:     set ::testprefix fts3rnd-1.$testname.$iTest
        !           308:   
        !           309:     # Delete one row, update one row and insert one row.
        !           310:     #
        !           311:     set rows [array names ::t1]
        !           312:     set nRow [llength $rows]
        !           313:     set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]
        !           314:     set iDelete $iUpdate
        !           315:     while {$iDelete == $iUpdate} {
        !           316:       set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
        !           317:     }
        !           318:     set iInsert $iUpdate
        !           319:     while {[info exists ::t1($iInsert)]} {
        !           320:       set iInsert [expr {int(rand()*1000000)}]
        !           321:     }
        !           322:     execsql BEGIN
        !           323:       insert_row $iInsert
        !           324:       update_row $iUpdate
        !           325:       delete_row $iDelete
        !           326:     if {0==($iTest%2)} { execsql COMMIT }
        !           327: 
        !           328:     if {0==($iTest%2)} { 
        !           329:       #do_test 0 { fts3_integrity_check t1 } ok 
        !           330:     }
        !           331: 
        !           332:     # Pick 10 terms from the vocabulary. Check that the results of querying
        !           333:     # the database for the set of documents containing each of these terms
        !           334:     # is the same as the result obtained by scanning the contents of the Tcl 
        !           335:     # array for each term.
        !           336:     #
        !           337:     for {set i 0} {$i < 10} {incr i} {
        !           338:       set term [random_term]
        !           339:       do_select_test 1.$i.asc {
        !           340:         SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
        !           341:         ORDER BY docid ASC
        !           342:       } [simple_token_matchinfo $term 0]
        !           343:       do_select_test 1.$i.desc {
        !           344:         SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
        !           345:         ORDER BY docid DESC
        !           346:       } [simple_token_matchinfo $term 1]
        !           347:     }
        !           348: 
        !           349:     # This time, use the first two characters of each term as a term prefix
        !           350:     # to query for. Test that querying the Tcl array produces the same results
        !           351:     # as querying the FTS3 table for the prefix.
        !           352:     #
        !           353:     for {set i 0} {$i < $nRep} {incr i} {
        !           354:       set prefix [string range [random_term] 0 end-1]
        !           355:       set match "${prefix}*"
        !           356:       do_orderbydocid_test 2.$i {
        !           357:         SELECT docid FROM t1 WHERE t1 MATCH $match
        !           358:       } [simple_phrase $match]
        !           359:     }
        !           360: 
        !           361:     # Similar to the above, except for phrase queries.
        !           362:     #
        !           363:     for {set i 0} {$i < $nRep} {incr i} {
        !           364:       set term [list [random_term] [random_term]]
        !           365:       set match "\"$term\""
        !           366:       do_orderbydocid_test 3.$i {
        !           367:         SELECT docid FROM t1 WHERE t1 MATCH $match
        !           368:       } [simple_phrase $term]
        !           369:     }
        !           370: 
        !           371:     # Three word phrases.
        !           372:     #
        !           373:     for {set i 0} {$i < $nRep} {incr i} {
        !           374:       set term [list [random_term] [random_term] [random_term]]
        !           375:       set match "\"$term\""
        !           376:       do_orderbydocid_test 4.$i {
        !           377:         SELECT docid FROM t1 WHERE t1 MATCH $match
        !           378:       } [simple_phrase $term]
        !           379:     }
        !           380: 
        !           381:     # Three word phrases made up of term-prefixes.
        !           382:     #
        !           383:     for {set i 0} {$i < $nRep} {incr i} {
        !           384:       set    query "[string range [random_term] 0 end-1]* "
        !           385:       append query "[string range [random_term] 0 end-1]* "
        !           386:       append query "[string range [random_term] 0 end-1]*"
        !           387: 
        !           388:       set match "\"$query\""
        !           389:       do_orderbydocid_test 5.$i {
        !           390:         SELECT docid FROM t1 WHERE t1 MATCH $match
        !           391:       } [simple_phrase $query]
        !           392:     }
        !           393: 
        !           394:     # A NEAR query with terms as the arguments:
        !           395:     #
        !           396:     #     ... MATCH '$term1 NEAR $term2' ...
        !           397:     #
        !           398:     for {set i 0} {$i < $nRep} {incr i} {
        !           399:       set terms [list [random_term] [random_term]]
        !           400:       set match [join $terms " NEAR "]
        !           401:       do_orderbydocid_test 6.$i {
        !           402:         SELECT docid FROM t1 WHERE t1 MATCH $match 
        !           403:       } [simple_near $terms 10]
        !           404:     }
        !           405: 
        !           406:     # A 3-way NEAR query with terms as the arguments.
        !           407:     #
        !           408:     for {set i 0} {$i < $nRep} {incr i} {
        !           409:       set terms [list [random_term] [random_term] [random_term]]
        !           410:       set nNear 11
        !           411:       set match [join $terms " NEAR/$nNear "]
        !           412:       do_orderbydocid_test 7.$i {
        !           413:         SELECT docid FROM t1 WHERE t1 MATCH $match
        !           414:       } [simple_near $terms $nNear]
        !           415:     }
        !           416:     
        !           417:     # Set operations on simple term queries.
        !           418:     #
        !           419:     foreach {tn op proc} {
        !           420:       8  OR  setop_or
        !           421:       9  NOT setop_not
        !           422:       10 AND setop_and
        !           423:     } {
        !           424:       for {set i 0} {$i < $nRep} {incr i} {
        !           425:         set term1 [random_term]
        !           426:         set term2 [random_term]
        !           427:         set match "$term1 $op $term2"
        !           428:         do_orderbydocid_test $tn.$i {
        !           429:           SELECT docid FROM t1 WHERE t1 MATCH $match
        !           430:         } [$proc [simple_phrase $term1] [simple_phrase $term2]]
        !           431:       }
        !           432:     }
        !           433:  
        !           434:     # Set operations on NEAR queries.
        !           435:     #
        !           436:     foreach {tn op proc} {
        !           437:       11 OR  setop_or
        !           438:       12 NOT setop_not
        !           439:       13 AND setop_and
        !           440:     } {
        !           441:       for {set i 0} {$i < $nRep} {incr i} {
        !           442:         set term1 [random_term]
        !           443:         set term2 [random_term]
        !           444:         set term3 [random_term]
        !           445:         set term4 [random_term]
        !           446:         set match "$term1 NEAR $term2 $op $term3 NEAR $term4"
        !           447:         do_orderbydocid_test $tn.$i {
        !           448:           SELECT docid FROM t1 WHERE t1 MATCH $match
        !           449:         } [$proc                                  \
        !           450:             [simple_near [list $term1 $term2] 10] \
        !           451:             [simple_near [list $term3 $term4] 10]
        !           452:           ]
        !           453:       }
        !           454:     }
        !           455: 
        !           456:     catchsql COMMIT
        !           457:   }
        !           458: }
        !           459: 
        !           460: finish_test

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>