Annotation of embedaddon/sqlite3/test/fts3_common.tcl, revision 1.1

1.1     ! misho       1: # 2009 November 04
        !             2: #
        !             3: # The author disclaims copyright to this source code.  In place of
        !             4: # a legal notice, here is a blessing:
        !             5: #
        !             6: #    May you do good and not evil.
        !             7: #    May you find forgiveness for yourself and forgive others.
        !             8: #    May you share freely, never taking more than you give.
        !             9: #
        !            10: #***********************************************************************
        !            11: #
        !            12: # This file contains common code used the fts3 tests. At one point
        !            13: # equivalent functionality was implemented in C code. But it is easier
        !            14: # to use Tcl.
        !            15: #
        !            16: 
        !            17: #-------------------------------------------------------------------------
        !            18: # USAGE: fts3_integrity_check TBL
        !            19: #
        !            20: # This proc is used to verify that the full-text index is consistent with
        !            21: # the contents of the fts3 table. In other words, it checks that the
        !            22: # data in the %_contents table matches that in the %_segdir and %_segments 
        !            23: # tables.
        !            24: #
        !            25: # This is not an efficient procedure. It uses a lot of memory and a lot
        !            26: # of CPU. But it is better than not checking at all.
        !            27: #
        !            28: # The procedure is:
        !            29: #
        !            30: #   1) Read the entire full-text index from the %_segdir and %_segments
        !            31: #      tables into memory. For each entry in the index, the following is
        !            32: #      done:
        !            33: #
        !            34: #          set C($iDocid,$iCol,$iPosition) $zTerm
        !            35: #
        !            36: #   2) Iterate through each column of each row of the %_content table. 
        !            37: #      Tokenize all documents, and check that for each token there is
        !            38: #      a corresponding entry in the $C array. After checking a token,
        !            39: #      [unset] the $C array entry.
        !            40: #
        !            41: #   3) Check that array $C is now empty.
        !            42: #      
        !            43: #
        !            44: proc fts3_integrity_check {tbl} {
        !            45: 
        !            46:   fts3_read2 $tbl 1 A
        !            47: 
        !            48:   foreach zTerm [array names A] {
        !            49:     foreach doclist $A($zTerm) {
        !            50:       set docid 0
        !            51:       while {[string length $doclist]>0} {
        !            52:         set iCol 0
        !            53:         set iPos 0
        !            54:         set lPos [list]
        !            55:         set lCol [list]
        !            56: 
        !            57:         # First varint of a doclist-entry is the docid. Delta-compressed
        !            58:         # with respect to the docid of the previous entry.
        !            59:         #
        !            60:         incr docid [gobble_varint doclist]
        !            61:         if {[info exists D($zTerm,$docid)]} {
        !            62:           while {[set iDelta [gobble_varint doclist]] != 0} {}
        !            63:           continue
        !            64:         }
        !            65:         set D($zTerm,$docid) 1
        !            66: 
        !            67:         # Gobble varints until the 0x00 that terminates the doclist-entry
        !            68:         # is found.
        !            69:         while {[set iDelta [gobble_varint doclist]] > 0} {
        !            70:           if {$iDelta == 1} {
        !            71:             set iCol [gobble_varint doclist]
        !            72:             set iPos 0
        !            73:           } else {
        !            74:             incr iPos $iDelta
        !            75:             incr iPos -2
        !            76:             set C($docid,$iCol,$iPos) $zTerm
        !            77:           }
        !            78:         }
        !            79:       }
        !            80:     }
        !            81:   }
        !            82: 
        !            83:   foreach key [array names C] {
        !            84:     #puts "$key -> $C($key)"
        !            85:   }
        !            86: 
        !            87: 
        !            88:   db eval "SELECT * FROM ${tbl}_content" E {
        !            89:     set iCol 0
        !            90:     set iDoc $E(docid)
        !            91:     foreach col [lrange $E(*) 1 end] {
        !            92:       set c $E($col)
        !            93:       set sql {SELECT fts3_tokenizer_test('simple', $c)}
        !            94: 
        !            95:       foreach {pos term dummy} [db one $sql] {
        !            96:         if {![info exists C($iDoc,$iCol,$pos)]} {
        !            97:           set es "Error at docid=$iDoc col=$iCol pos=$pos. Index is missing"
        !            98:           lappend errors $es
        !            99:         } else {
        !           100:           if {$C($iDoc,$iCol,$pos) != "$term"} {
        !           101:             set    es "Error at docid=$iDoc col=$iCol pos=$pos. Index "
        !           102:             append es "has \"$C($iDoc,$iCol,$pos)\", document has \"$term\""
        !           103:             lappend errors $es
        !           104:           }
        !           105:           unset C($iDoc,$iCol,$pos)
        !           106:         }
        !           107:       }
        !           108:       incr iCol
        !           109:     }
        !           110:   }
        !           111: 
        !           112:   foreach c [array names C] {
        !           113:     lappend errors "Bad index entry: $c -> $C($c)"
        !           114:   }
        !           115: 
        !           116:   if {[info exists errors]} { return [join $errors "\n"] }
        !           117:   return "ok"
        !           118: }
        !           119: 
        !           120: # USAGE: fts3_terms TBL WHERE
        !           121: #
        !           122: # Argument TBL must be the name of an FTS3 table. Argument WHERE is an
        !           123: # SQL expression that will be used as the WHERE clause when scanning
        !           124: # the %_segdir table. As in the following query:
        !           125: #
        !           126: #   "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}"
        !           127: #
        !           128: # This function returns a list of all terms present in the segments
        !           129: # selected by the statement above.
        !           130: #
        !           131: proc fts3_terms {tbl where} {
        !           132:   fts3_read $tbl $where a
        !           133:   return [lsort [array names a]]
        !           134: }
        !           135: 
        !           136: 
        !           137: # USAGE: fts3_doclist TBL TERM WHERE
        !           138: #
        !           139: # Argument TBL must be the name of an FTS3 table. TERM is a term that may
        !           140: # or may not be present in the table. Argument WHERE is used to select a 
        !           141: # subset of the b-tree segments in the associated full-text index as 
        !           142: # described above for [fts3_terms].
        !           143: #
        !           144: # This function returns the results of merging the doclists associated
        !           145: # with TERM in the selected segments. Each doclist is an element of the
        !           146: # returned list. Each doclist is formatted as follows:
        !           147: #
        !           148: #   [$docid ?$col[$off1 $off2...]?...]
        !           149: #
        !           150: # The formatting is odd for a Tcl command in order to be compatible with
        !           151: # the original C-language implementation. If argument WHERE is "1", then 
        !           152: # any empty doclists are omitted from the returned list.
        !           153: #
        !           154: proc fts3_doclist {tbl term where} {
        !           155:   fts3_read $tbl $where a
        !           156: 
        !           157: 
        !           158:   foreach doclist $a($term) {
        !           159:     set docid 0
        !           160: 
        !           161:     while {[string length $doclist]>0} {
        !           162:       set iCol 0
        !           163:       set iPos 0
        !           164:       set lPos [list]
        !           165:       set lCol [list]
        !           166:       incr docid [gobble_varint doclist]
        !           167:   
        !           168:       while {[set iDelta [gobble_varint doclist]] > 0} {
        !           169:         if {$iDelta == 1} {
        !           170:           lappend lCol [list $iCol $lPos]
        !           171:           set iPos 0
        !           172:           set lPos [list]
        !           173:           set iCol [gobble_varint doclist]
        !           174:         } else {
        !           175:           incr iPos $iDelta
        !           176:           incr iPos -2
        !           177:           lappend lPos $iPos
        !           178:         }
        !           179:       }
        !           180:   
        !           181:       if {[llength $lPos]>0} {
        !           182:         lappend lCol [list $iCol $lPos]
        !           183:       }
        !           184:   
        !           185:       if {$where != "1" || [llength $lCol]>0} {
        !           186:         set ret($docid) $lCol
        !           187:       } else {
        !           188:         unset -nocomplain ret($docid)
        !           189:       }
        !           190:     }
        !           191:   }
        !           192: 
        !           193:   set lDoc [list]
        !           194:   foreach docid [lsort -integer [array names ret]] {
        !           195:     set lCol [list]
        !           196:     set cols ""
        !           197:     foreach col $ret($docid) {
        !           198:       foreach {iCol lPos} $col {}
        !           199:       append cols " $iCol\[[join $lPos { }]\]"
        !           200:     }
        !           201:     lappend lDoc "\[${docid}${cols}\]"
        !           202:   }
        !           203: 
        !           204:   join $lDoc " "
        !           205: }
        !           206: 
        !           207: ###########################################################################
        !           208: 
        !           209: proc gobble_varint {varname} {
        !           210:   upvar $varname blob
        !           211:   set n [read_fts3varint $blob ret]
        !           212:   set blob [string range $blob $n end]
        !           213:   return $ret
        !           214: }
        !           215: proc gobble_string {varname nLength} {
        !           216:   upvar $varname blob
        !           217:   set ret [string range $blob 0 [expr $nLength-1]]
        !           218:   set blob [string range $blob $nLength end]
        !           219:   return $ret
        !           220: }
        !           221: 
        !           222: # The argument is a blob of data representing an FTS3 segment leaf. 
        !           223: # Return a list consisting of alternating terms (strings) and doclists
        !           224: # (blobs of data).
        !           225: #
        !           226: proc fts3_readleaf {blob} {
        !           227:   set zPrev ""
        !           228:   set terms [list]
        !           229: 
        !           230:   while {[string length $blob] > 0} {
        !           231:     set nPrefix [gobble_varint blob]
        !           232:     set nSuffix [gobble_varint blob]
        !           233: 
        !           234:     set zTerm [string range $zPrev 0 [expr $nPrefix-1]]
        !           235:     append zTerm [gobble_string blob $nSuffix]
        !           236:     set doclist [gobble_string blob [gobble_varint blob]]
        !           237: 
        !           238:     lappend terms $zTerm $doclist
        !           239:     set zPrev $zTerm
        !           240:   }
        !           241: 
        !           242:   return $terms
        !           243: }
        !           244: 
        !           245: proc fts3_read2 {tbl where varname} {
        !           246:   upvar $varname a
        !           247:   array unset a
        !           248:   db eval " SELECT start_block, leaves_end_block, root 
        !           249:             FROM ${tbl}_segdir WHERE $where
        !           250:             ORDER BY level ASC, idx DESC
        !           251:   " {
        !           252:     if {$start_block == 0} {
        !           253:       foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
        !           254:     } else {
        !           255:       db eval " SELECT block 
        !           256:                 FROM ${tbl}_segments 
        !           257:                 WHERE blockid>=$start_block AND blockid<=$leaves_end_block
        !           258:                 ORDER BY blockid
        !           259:       " {
        !           260:         foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
        !           261: 
        !           262:       }
        !           263:     }
        !           264:   }
        !           265: }
        !           266: 
        !           267: proc fts3_read {tbl where varname} {
        !           268:   upvar $varname a
        !           269:   array unset a
        !           270:   db eval " SELECT start_block, leaves_end_block, root 
        !           271:             FROM ${tbl}_segdir WHERE $where
        !           272:             ORDER BY level DESC, idx ASC
        !           273:   " {
        !           274:     if {$start_block == 0} {
        !           275:       foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
        !           276:     } else {
        !           277:       db eval " SELECT block 
        !           278:                 FROM ${tbl}_segments 
        !           279:                 WHERE blockid>=$start_block AND blockid<$leaves_end_block
        !           280:                 ORDER BY blockid
        !           281:       " {
        !           282:         foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
        !           283: 
        !           284:       }
        !           285:     }
        !           286:   }
        !           287: }
        !           288: 
        !           289: ##########################################################################
        !           290: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>