Annotation of embedaddon/sqlite3/test/fts3_common.tcl, revision 1.1
1.1 ! misho 1: # 2009 November 04
! 2: #
! 3: # The author disclaims copyright to this source code. In place of
! 4: # a legal notice, here is a blessing:
! 5: #
! 6: # May you do good and not evil.
! 7: # May you find forgiveness for yourself and forgive others.
! 8: # May you share freely, never taking more than you give.
! 9: #
! 10: #***********************************************************************
! 11: #
! 12: # This file contains common code used the fts3 tests. At one point
! 13: # equivalent functionality was implemented in C code. But it is easier
! 14: # to use Tcl.
! 15: #
! 16:
! 17: #-------------------------------------------------------------------------
! 18: # USAGE: fts3_integrity_check TBL
! 19: #
! 20: # This proc is used to verify that the full-text index is consistent with
! 21: # the contents of the fts3 table. In other words, it checks that the
! 22: # data in the %_contents table matches that in the %_segdir and %_segments
! 23: # tables.
! 24: #
! 25: # This is not an efficient procedure. It uses a lot of memory and a lot
! 26: # of CPU. But it is better than not checking at all.
! 27: #
! 28: # The procedure is:
! 29: #
! 30: # 1) Read the entire full-text index from the %_segdir and %_segments
! 31: # tables into memory. For each entry in the index, the following is
! 32: # done:
! 33: #
! 34: # set C($iDocid,$iCol,$iPosition) $zTerm
! 35: #
! 36: # 2) Iterate through each column of each row of the %_content table.
! 37: # Tokenize all documents, and check that for each token there is
! 38: # a corresponding entry in the $C array. After checking a token,
! 39: # [unset] the $C array entry.
! 40: #
! 41: # 3) Check that array $C is now empty.
! 42: #
! 43: #
! 44: proc fts3_integrity_check {tbl} {
! 45:
! 46: fts3_read2 $tbl 1 A
! 47:
! 48: foreach zTerm [array names A] {
! 49: foreach doclist $A($zTerm) {
! 50: set docid 0
! 51: while {[string length $doclist]>0} {
! 52: set iCol 0
! 53: set iPos 0
! 54: set lPos [list]
! 55: set lCol [list]
! 56:
! 57: # First varint of a doclist-entry is the docid. Delta-compressed
! 58: # with respect to the docid of the previous entry.
! 59: #
! 60: incr docid [gobble_varint doclist]
! 61: if {[info exists D($zTerm,$docid)]} {
! 62: while {[set iDelta [gobble_varint doclist]] != 0} {}
! 63: continue
! 64: }
! 65: set D($zTerm,$docid) 1
! 66:
! 67: # Gobble varints until the 0x00 that terminates the doclist-entry
! 68: # is found.
! 69: while {[set iDelta [gobble_varint doclist]] > 0} {
! 70: if {$iDelta == 1} {
! 71: set iCol [gobble_varint doclist]
! 72: set iPos 0
! 73: } else {
! 74: incr iPos $iDelta
! 75: incr iPos -2
! 76: set C($docid,$iCol,$iPos) $zTerm
! 77: }
! 78: }
! 79: }
! 80: }
! 81: }
! 82:
! 83: foreach key [array names C] {
! 84: #puts "$key -> $C($key)"
! 85: }
! 86:
! 87:
! 88: db eval "SELECT * FROM ${tbl}_content" E {
! 89: set iCol 0
! 90: set iDoc $E(docid)
! 91: foreach col [lrange $E(*) 1 end] {
! 92: set c $E($col)
! 93: set sql {SELECT fts3_tokenizer_test('simple', $c)}
! 94:
! 95: foreach {pos term dummy} [db one $sql] {
! 96: if {![info exists C($iDoc,$iCol,$pos)]} {
! 97: set es "Error at docid=$iDoc col=$iCol pos=$pos. Index is missing"
! 98: lappend errors $es
! 99: } else {
! 100: if {$C($iDoc,$iCol,$pos) != "$term"} {
! 101: set es "Error at docid=$iDoc col=$iCol pos=$pos. Index "
! 102: append es "has \"$C($iDoc,$iCol,$pos)\", document has \"$term\""
! 103: lappend errors $es
! 104: }
! 105: unset C($iDoc,$iCol,$pos)
! 106: }
! 107: }
! 108: incr iCol
! 109: }
! 110: }
! 111:
! 112: foreach c [array names C] {
! 113: lappend errors "Bad index entry: $c -> $C($c)"
! 114: }
! 115:
! 116: if {[info exists errors]} { return [join $errors "\n"] }
! 117: return "ok"
! 118: }
! 119:
! 120: # USAGE: fts3_terms TBL WHERE
! 121: #
! 122: # Argument TBL must be the name of an FTS3 table. Argument WHERE is an
! 123: # SQL expression that will be used as the WHERE clause when scanning
! 124: # the %_segdir table. As in the following query:
! 125: #
! 126: # "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}"
! 127: #
! 128: # This function returns a list of all terms present in the segments
! 129: # selected by the statement above.
! 130: #
! 131: proc fts3_terms {tbl where} {
! 132: fts3_read $tbl $where a
! 133: return [lsort [array names a]]
! 134: }
! 135:
! 136:
! 137: # USAGE: fts3_doclist TBL TERM WHERE
! 138: #
! 139: # Argument TBL must be the name of an FTS3 table. TERM is a term that may
! 140: # or may not be present in the table. Argument WHERE is used to select a
! 141: # subset of the b-tree segments in the associated full-text index as
! 142: # described above for [fts3_terms].
! 143: #
! 144: # This function returns the results of merging the doclists associated
! 145: # with TERM in the selected segments. Each doclist is an element of the
! 146: # returned list. Each doclist is formatted as follows:
! 147: #
! 148: # [$docid ?$col[$off1 $off2...]?...]
! 149: #
! 150: # The formatting is odd for a Tcl command in order to be compatible with
! 151: # the original C-language implementation. If argument WHERE is "1", then
! 152: # any empty doclists are omitted from the returned list.
! 153: #
! 154: proc fts3_doclist {tbl term where} {
! 155: fts3_read $tbl $where a
! 156:
! 157:
! 158: foreach doclist $a($term) {
! 159: set docid 0
! 160:
! 161: while {[string length $doclist]>0} {
! 162: set iCol 0
! 163: set iPos 0
! 164: set lPos [list]
! 165: set lCol [list]
! 166: incr docid [gobble_varint doclist]
! 167:
! 168: while {[set iDelta [gobble_varint doclist]] > 0} {
! 169: if {$iDelta == 1} {
! 170: lappend lCol [list $iCol $lPos]
! 171: set iPos 0
! 172: set lPos [list]
! 173: set iCol [gobble_varint doclist]
! 174: } else {
! 175: incr iPos $iDelta
! 176: incr iPos -2
! 177: lappend lPos $iPos
! 178: }
! 179: }
! 180:
! 181: if {[llength $lPos]>0} {
! 182: lappend lCol [list $iCol $lPos]
! 183: }
! 184:
! 185: if {$where != "1" || [llength $lCol]>0} {
! 186: set ret($docid) $lCol
! 187: } else {
! 188: unset -nocomplain ret($docid)
! 189: }
! 190: }
! 191: }
! 192:
! 193: set lDoc [list]
! 194: foreach docid [lsort -integer [array names ret]] {
! 195: set lCol [list]
! 196: set cols ""
! 197: foreach col $ret($docid) {
! 198: foreach {iCol lPos} $col {}
! 199: append cols " $iCol\[[join $lPos { }]\]"
! 200: }
! 201: lappend lDoc "\[${docid}${cols}\]"
! 202: }
! 203:
! 204: join $lDoc " "
! 205: }
! 206:
! 207: ###########################################################################
! 208:
! 209: proc gobble_varint {varname} {
! 210: upvar $varname blob
! 211: set n [read_fts3varint $blob ret]
! 212: set blob [string range $blob $n end]
! 213: return $ret
! 214: }
! 215: proc gobble_string {varname nLength} {
! 216: upvar $varname blob
! 217: set ret [string range $blob 0 [expr $nLength-1]]
! 218: set blob [string range $blob $nLength end]
! 219: return $ret
! 220: }
! 221:
! 222: # The argument is a blob of data representing an FTS3 segment leaf.
! 223: # Return a list consisting of alternating terms (strings) and doclists
! 224: # (blobs of data).
! 225: #
! 226: proc fts3_readleaf {blob} {
! 227: set zPrev ""
! 228: set terms [list]
! 229:
! 230: while {[string length $blob] > 0} {
! 231: set nPrefix [gobble_varint blob]
! 232: set nSuffix [gobble_varint blob]
! 233:
! 234: set zTerm [string range $zPrev 0 [expr $nPrefix-1]]
! 235: append zTerm [gobble_string blob $nSuffix]
! 236: set doclist [gobble_string blob [gobble_varint blob]]
! 237:
! 238: lappend terms $zTerm $doclist
! 239: set zPrev $zTerm
! 240: }
! 241:
! 242: return $terms
! 243: }
! 244:
! 245: proc fts3_read2 {tbl where varname} {
! 246: upvar $varname a
! 247: array unset a
! 248: db eval " SELECT start_block, leaves_end_block, root
! 249: FROM ${tbl}_segdir WHERE $where
! 250: ORDER BY level ASC, idx DESC
! 251: " {
! 252: if {$start_block == 0} {
! 253: foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
! 254: } else {
! 255: db eval " SELECT block
! 256: FROM ${tbl}_segments
! 257: WHERE blockid>=$start_block AND blockid<=$leaves_end_block
! 258: ORDER BY blockid
! 259: " {
! 260: foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
! 261:
! 262: }
! 263: }
! 264: }
! 265: }
! 266:
! 267: proc fts3_read {tbl where varname} {
! 268: upvar $varname a
! 269: array unset a
! 270: db eval " SELECT start_block, leaves_end_block, root
! 271: FROM ${tbl}_segdir WHERE $where
! 272: ORDER BY level DESC, idx ASC
! 273: " {
! 274: if {$start_block == 0} {
! 275: foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
! 276: } else {
! 277: db eval " SELECT block
! 278: FROM ${tbl}_segments
! 279: WHERE blockid>=$start_block AND blockid<$leaves_end_block
! 280: ORDER BY blockid
! 281: " {
! 282: foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
! 283:
! 284: }
! 285: }
! 286: }
! 287: }
! 288:
! 289: ##########################################################################
! 290:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>