File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / test / fts2n.test
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:16 2012 UTC (12 years, 10 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: # 2007 April 26
    2: #
    3: # The author disclaims copyright to this source code.
    4: #
    5: #*************************************************************************
    6: # This file implements tests for prefix-searching in the fts2
    7: # component of the SQLite library.
    8: #
    9: # $Id: fts2n.test,v 1.1.1.1 2012/02/21 17:04:16 misho Exp $
   10: #
   11: 
   12: set testdir [file dirname $argv0]
   13: source $testdir/tester.tcl
   14: 
   15: # If SQLITE_ENABLE_FTS2 is defined, omit this file.
   16: ifcapable !fts2 {
   17:   finish_test
   18:   return
   19: }
   20: 
   21: # A large string to prime the pump with.
   22: set text {
   23:   Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
   24:   iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
   25:   sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
   26:   aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
   27:   ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
   28:   at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
   29:   ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
   30:   luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
   31:   lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
   32:   potenti. Cum sociis natoque penatibus et magnis dis parturient
   33:   montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
   34:   suscipit nec, consequat quis, risus.
   35: }
   36: 
   37: db eval {
   38:   CREATE VIRTUAL TABLE t1 USING fts2(c);
   39: 
   40:   INSERT INTO t1(rowid, c) VALUES(1, $text);
   41:   INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
   42: }
   43: 
   44: # Exact match
   45: do_test fts2n-1.1 {
   46:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
   47: } {1}
   48: 
   49: # And a prefix
   50: do_test fts2n-1.2 {
   51:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
   52: } {1}
   53: 
   54: # Prefix includes exact match
   55: do_test fts2n-1.3 {
   56:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
   57: } {1}
   58: 
   59: # Make certain everything isn't considered a prefix!
   60: do_test fts2n-1.4 {
   61:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
   62: } {}
   63: 
   64: # Prefix across multiple rows.
   65: do_test fts2n-1.5 {
   66:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
   67: } {1 2}
   68: 
   69: # Likewise, with multiple hits in one document.
   70: do_test fts2n-1.6 {
   71:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
   72: } {1 2}
   73: 
   74: # Prefix which should only hit one document.
   75: do_test fts2n-1.7 {
   76:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
   77: } {2}
   78: 
   79: # * not at end is dropped.
   80: do_test fts2n-1.8 {
   81:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
   82: } {}
   83: 
   84: # Stand-alone * is dropped.
   85: do_test fts2n-1.9 {
   86:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
   87: } {}
   88: 
   89: # Phrase-query prefix.
   90: do_test fts2n-1.10 {
   91:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
   92: } {2}
   93: do_test fts2n-1.11 {
   94:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
   95: } {}
   96: 
   97: # Phrase query with multiple prefix matches.
   98: do_test fts2n-1.12 {
   99:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
  100: } {1 2}
  101: 
  102: # Phrase query with multiple prefix matches.
  103: do_test fts2n-1.13 {
  104:   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
  105: } {2}
  106: 
  107: 
  108: 
  109: 
  110: # Test across updates (and, by implication, deletes).
  111: 
  112: # Version of text without "lorem".
  113: regsub -all {[Ll]orem} $text '' ntext
  114: 
  115: db eval {
  116:   CREATE VIRTUAL TABLE t2 USING fts2(c);
  117: 
  118:   INSERT INTO t2(rowid, c) VALUES(1, $text);
  119:   INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
  120:   UPDATE t2 SET c = $ntext WHERE rowid = 1;
  121: }
  122: 
  123: # Can't see lorem as an exact match.
  124: do_test fts2n-2.1 {
  125:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
  126: } {}
  127: 
  128: # Can't see a prefix of lorem, either.
  129: do_test fts2n-2.2 {
  130:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
  131: } {}
  132: 
  133: # Can see lovely in the other document.
  134: do_test fts2n-2.3 {
  135:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
  136: } {2}
  137: 
  138: # Can still see other hits.
  139: do_test fts2n-2.4 {
  140:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
  141: } {1 2}
  142: 
  143: # Prefix which should only hit one document.
  144: do_test fts2n-2.5 {
  145:   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
  146: } {2}
  147: 
  148: 
  149: 
  150: # Test with a segment which will have multiple levels in the tree.
  151: 
  152: # Build a big document with lots of unique terms.
  153: set bigtext $text
  154: foreach c {a b c d e} {
  155:   regsub -all {[A-Za-z]+} $bigtext "&$c" t
  156:   append bigtext $t
  157: }
  158: 
  159: # Populate a table with many copies of the big document, so that we
  160: # can test the number of hits found.  Populate $ret with the expected
  161: # hit counts for each row.  offsets() returns 4 elements for every
  162: # hit.  We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
  163: # $bigtext.
  164: set ret {6 1}
  165: db eval {
  166:   BEGIN;
  167:   CREATE VIRTUAL TABLE t3 USING fts2(c);
  168: 
  169:   INSERT INTO t3(rowid, c) VALUES(1, $text);
  170:   INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
  171: }
  172: for {set i 0} {$i<100} {incr i} {
  173:   db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
  174:   lappend ret 192
  175: }
  176: db eval {COMMIT;}
  177: 
  178: # Test that we get the expected number of hits.
  179: do_test fts2n-3.1 {
  180:   set t {}
  181:   db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
  182:     set l [llength $o]
  183:     lappend t [expr {$l/4}]
  184:   }
  185:   set t
  186: } $ret
  187: 
  188: # TODO(shess) It would be useful to test a couple edge cases, but I
  189: # don't know if we have the precision to manage it from here at this
  190: # time.  Prefix hits can cross leaves, which the code above _should_
  191: # hit by virtue of size.  There are two variations on this.  If the
  192: # tree is 2 levels high, the code will find the leaf-node extent
  193: # directly, but if its higher, the code will have to follow two
  194: # separate interior branches down the tree.  Both should be tested.
  195: 
  196: finish_test

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>