File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / test / fts3matchinfo.test
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:16 2012 UTC (12 years, 4 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: # 2010 November 02
    2: #
    3: # The author disclaims copyright to this source code.  In place of
    4: # a legal notice, here is a blessing:
    5: #
    6: #    May you do good and not evil.
    7: #    May you find forgiveness for yourself and forgive others.
    8: #    May you share freely, never taking more than you give.
    9: #
   10: #***********************************************************************
   11: # This file implements regression tests for the FTS3 module. The focus
   12: # of this file is tables created with the "matchinfo=fts3" option.
   13: #
   14: 
   15: set testdir [file dirname $argv0]
   16: source $testdir/tester.tcl
   17: 
   18: # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
   19: ifcapable !fts3 { finish_test ; return }
   20: 
   21: set testprefix fts3matchinfo
   22: set sqlite_fts3_enable_parentheses 0
   23: 
   24: proc mit {blob} {
   25:   set scan(littleEndian) i*
   26:   set scan(bigEndian) I*
   27:   binary scan $blob $scan($::tcl_platform(byteOrder)) r
   28:   return $r
   29: }
   30: db func mit mit
   31: 
   32: do_execsql_test 1.0 {
   33:   CREATE VIRTUAL TABLE t1 USING fts4(matchinfo=fts3);
   34:   SELECT name FROM sqlite_master WHERE type = 'table';
   35: } {t1 t1_content t1_segments t1_segdir t1_stat}
   36: 
   37: do_execsql_test 1.1 {
   38:   INSERT INTO t1(content) VALUES('I wandered lonely as a cloud');
   39:   INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,');
   40:   INSERT INTO t1(content) VALUES('When all at once I saw a crowd,');
   41:   INSERT INTO t1(content) VALUES('A host, of golden daffodils,');
   42:   SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I';
   43: } {{1 1 1 2 2} {1 1 1 2 2}}
   44: 
   45: # Now create an FTS4 table that does not specify matchinfo=fts3.
   46: #
   47: do_execsql_test 1.2 {
   48:   CREATE VIRTUAL TABLE t2 USING fts4;
   49:   INSERT INTO t2 SELECT * FROM t1;
   50:   SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I';
   51: } {{1 1 1 2 2} {1 1 1 2 2}}
   52: 
   53: # Test some syntax-error handling.
   54: #
   55: do_catchsql_test 2.0 {
   56:   CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3);
   57: } {1 {unrecognized matchinfo: fs3}}
   58: do_catchsql_test 2.1 {
   59:   CREATE VIRTUAL TABLE x2 USING fts4(mtchinfo=fts3);
   60: } {1 {unrecognized parameter: mtchinfo=fts3}}
   61: do_catchsql_test 2.2 {
   62:   CREATE VIRTUAL TABLE x2 USING fts4(matchinfo=fts5);
   63: } {1 {unrecognized matchinfo: fts5}}
   64: 
   65: # Check that with fts3, the "=" character is permitted in column definitions.
   66: #
   67: do_execsql_test 3.1 {
   68:   CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
   69:   INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
   70:   SELECT mtchinfo FROM t3;
   71: } {{Beside the lake, beneath the trees}}
   72: 
   73: do_execsql_test 3.2 {
   74:   CREATE VIRTUAL TABLE xx USING FTS4;
   75: }
   76: do_execsql_test 3.3 {
   77:   SELECT * FROM xx WHERE xx MATCH 'abc';
   78: }
   79: do_execsql_test 3.4 {
   80:   SELECT * FROM xx WHERE xx MATCH 'a b c';
   81: }
   82: 
   83: 
   84: #--------------------------------------------------------------------------
   85: # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
   86: #
   87: # The first argument - $tn - is a test identifier. This may be either a
   88: # full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
   89: # is set, just the numeric component (i.e. "1.1").
   90: #
   91: # The second argument is the name of an FTSX table. The third is the 
   92: # full text of a WHERE/MATCH expression to query the table for 
   93: # (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
   94: # key-value list (serialized array) with matchinfo() format specifiers
   95: # as keys, and the results of executing the statement:
   96: #
   97: #   SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
   98: #
   99: # For example:
  100: #
  101: #   CREATE VIRTUAL TABLE t1 USING fts4;
  102: #   INSERT INTO t1 VALUES('abc');
  103: #   INSERT INTO t1 VALUES('def');
  104: #   INSERT INTO t1 VALUES('abc abc');
  105: #
  106: #   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
  107: #     n {3 3}
  108: #     p {1 1}
  109: #     c {1 1}
  110: #     x {{1 3 2} {2 3 2}}
  111: #   }
  112: #
  113: # If the $results list contains keys mapped to "-" instead of a matchinfo()
  114: # result, then this command computes the expected results based on other
  115: # mappings to test the matchinfo() function. For example, the command above
  116: # could be changed to:
  117: #
  118: #   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
  119: #     n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
  120: #     pcx -
  121: #   }
  122: #
  123: # And this command would compute the expected results for matchinfo(t1, 'pcx')
  124: # based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and 
  125: # matchinfo(t1, 'x') in order to test 'pcx'.
  126: #
  127: proc do_matchinfo_test {tn tbl expr results} {
  128: 
  129:   foreach {fmt res} $results {
  130:     if {$res == "-"} continue
  131:     set resarray($fmt) $res
  132:   }
  133: 
  134:   set nRow 0
  135:   foreach {fmt res} [array get resarray] {
  136:     if {[llength $res]>$nRow} { set nRow [llength $res] }
  137:   }
  138: 
  139:   # Construct expected results for any formats for which the caller 
  140:   # supplied result is "-".
  141:   #
  142:   foreach {fmt res} $results {
  143:     if {$res == "-"} {
  144:       set res [list]
  145:       for {set iRow 0} {$iRow<$nRow} {incr iRow} {
  146:         set rowres [list]
  147:         foreach c [split $fmt ""] {
  148:           set rowres [concat $rowres [lindex $resarray($c) $iRow]]
  149:         }
  150:         lappend res $rowres
  151:       }
  152:       set resarray($fmt) $res
  153:     }
  154:   }
  155: 
  156:   # Test each matchinfo() request individually.
  157:   #
  158:   foreach {fmt res} [array get resarray] {
  159:     set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
  160:     do_execsql_test $tn.$fmt $sql [normalize2 $res]
  161:   }
  162: 
  163:   # Test them all executed together (multiple invocations of matchinfo()).
  164:   #
  165:   set exprlist [list]
  166:   foreach {format res} [array get resarray] {
  167:     lappend exprlist "mit(matchinfo($tbl, '$format'))"
  168:   }
  169:   set allres [list]
  170:   for {set iRow 0} {$iRow<$nRow} {incr iRow} {
  171:     foreach {format res} [array get resarray] {
  172:       lappend allres [lindex $res $iRow]
  173:     }
  174:   }
  175:   set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
  176:   do_execsql_test $tn.multi $sql [normalize2 $allres]
  177: }
  178: proc normalize2 {list_of_lists} {
  179:   set res [list]
  180:   foreach elem $list_of_lists {
  181:     lappend res [list {*}$elem]
  182:   }
  183:   return $res
  184: }
  185: 
  186: 
  187: do_execsql_test 4.1.0 {
  188:   CREATE VIRTUAL TABLE t4 USING fts4(x, y);
  189:   INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
  190:   INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
  191: }
  192: 
  193: do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
  194:   p {3 3}
  195:   c {2 2}
  196:   x {
  197:     {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
  198:     {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
  199:   }
  200:   n {2 2}
  201:   l {{5 5} {5 5}}
  202:   a {{5 5} {5 5}}
  203: 
  204:   s {{3 0} {0 3}}
  205: 
  206:   xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  207:   xpxsscplax -
  208: }
  209: 
  210: do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
  211:   p {1 1}
  212:   c {2 2}
  213:   x {
  214:     {0 1 1   1 1 1}
  215:     {1 1 1   0 1 1}
  216:   }
  217:   n {2 2}
  218:   l {{5 5} {5 5}}
  219:   a {{5 5} {5 5}}
  220: 
  221:   s {{0 1} {1 0}}
  222: 
  223:   xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
  224:   sxsxs -
  225: }
  226: 
  227: do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'}     { s {{2 0} {0 2}} }
  228: do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
  229: do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
  230: do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'}     { s {{1 0} {0 1}} }
  231: do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} {
  232:   x { 
  233:     {0 1 1  1 1 1  0 0 0  0 0 0} 
  234:     {1 1 1  0 1 1  0 0 0  0 0 0}
  235:   }
  236: }
  237: do_matchinfo_test 4.1.8 t4 {t4 MATCH 'f -abcd'} {
  238:   x { 
  239:     {0 1 1  1 1 1}
  240:     {1 1 1  0 1 1}
  241:   }
  242: }
  243: 
  244: do_execsql_test 4.2.0 {
  245:   CREATE VIRTUAL TABLE t5 USING fts4;
  246:   INSERT INTO t5 VALUES('a a a a a');
  247:   INSERT INTO t5 VALUES('a b a b a');
  248:   INSERT INTO t5 VALUES('c b c b c');
  249:   INSERT INTO t5 VALUES('x x x x x');
  250: }
  251: do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'}         { 
  252:   x {{5 8 2   5 8 2} {3 8 2   3 8 2}}
  253:   s {2 1} 
  254: }
  255: do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'}         { s {2} }
  256: do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'}       { s {3} }
  257: do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
  258: do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  259: do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1} }
  260: 
  261: do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
  262: 
  263: # It used to be that the second 'a' token would be deferred. That doesn't
  264: # work any longer.
  265: if 0 {
  266:   do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { 
  267:     x {{5 8 2   5 5 5} {3 8 2   3 5 5}}
  268:     s {2 1} 
  269:   }
  270: }
  271: 
  272: do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'}         { s {2} }
  273: do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'}       { s {3} }
  274: do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
  275: do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  276: do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1 1} }
  277: 
  278: do_execsql_test 4.4.0 {
  279:   INSERT INTO t5(t5) VALUES('optimize');
  280:   UPDATE t5_segments 
  281:   SET block = zeroblob(length(block)) 
  282:   WHERE length(block)>10000;
  283: }
  284: 
  285: do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
  286: do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
  287: do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
  288: do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'}       { s {3} }
  289: do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
  290: do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
  291: 
  292: do_execsql_test 4.5.0 {
  293:   CREATE VIRTUAL TABLE t6 USING fts4(a, b, c);
  294:   INSERT INTO t6 VALUES('a', 'b', 'c');
  295: }
  296: do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'}       { s {{1 1 1}} }
  297: 
  298: 
  299: #-------------------------------------------------------------------------
  300: # Check the following restrictions:
  301: #
  302: #   + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3.
  303: #   + Matchinfo flag 'l' cannot be used with matchinfo=fts3.
  304: #
  305: do_execsql_test 5.1 {
  306:   CREATE VIRTUAL TABLE t7 USING fts3(a, b);
  307:   INSERT INTO t7 VALUES('u v w', 'x y z');
  308: 
  309:   CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3);
  310:   INSERT INTO t8 VALUES('u v w', 'x y z');
  311: }
  312: 
  313: do_catchsql_test 5.2.1 { 
  314:   SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y'
  315: } {1 {unrecognized matchinfo request: a}}
  316: do_catchsql_test 5.2.2 { 
  317:   SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y'
  318: } {1 {unrecognized matchinfo request: l}}
  319: do_catchsql_test 5.2.3 { 
  320:   SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y'
  321: } {1 {unrecognized matchinfo request: n}}
  322: 
  323: do_catchsql_test 5.3.1 { 
  324:   SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y'
  325: } {1 {unrecognized matchinfo request: l}}
  326: 
  327: #-------------------------------------------------------------------------
  328: # Test that the offsets() function handles corruption in the %_content
  329: # table correctly.
  330: #
  331: do_execsql_test 6.1 {
  332:   CREATE VIRTUAL TABLE t9 USING fts4;
  333:   INSERT INTO t9 VALUES(
  334:     'this record is used to try to dectect corruption'
  335:   );
  336:   SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
  337: } {{0 0 20 2 0 0 27 2}}
  338: 
  339: do_catchsql_test 6.2 {
  340:   UPDATE t9_content SET c0content = 'this record is used to'; 
  341:   SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
  342: } {1 {database disk image is malformed}}
  343: 
  344: #-------------------------------------------------------------------------
  345: # Test the outcome of matchinfo() when used within a query that does not
  346: # use the full-text index (i.e. lookup by rowid or full-table scan).
  347: #
  348: do_execsql_test 7.1 {
  349:   CREATE VIRTUAL TABLE t10 USING fts4;
  350:   INSERT INTO t10 VALUES('first record');
  351:   INSERT INTO t10 VALUES('second record');
  352: }
  353: do_execsql_test 7.2 {
  354:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10;
  355: } {blob 0 blob 0}
  356: do_execsql_test 7.3 {
  357:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE docid=1;
  358: } {blob 0}
  359: do_execsql_test 7.4 {
  360:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) 
  361:   FROM t10 WHERE t10 MATCH 'record'
  362: } {blob 20 blob 20}
  363: 
  364: #-------------------------------------------------------------------------
  365: # Test a special case - matchinfo('nxa') with many zero length documents. 
  366: # Special because "x" internally uses a statement used by both "n" and "a". 
  367: # This was causing a problem at one point in the obscure case where the
  368: # total number of bytes of data stored in an fts3 table was greater than
  369: # the number of rows. i.e. when the following query returns true:
  370: #
  371: #   SELECT sum(length(content)) < count(*) FROM fts4table;
  372: #
  373: do_execsql_test 8.1 {
  374:   CREATE VIRTUAL TABLE t11 USING fts4;
  375:   INSERT INTO t11(t11) VALUES('nodesize=24');
  376:   INSERT INTO t11 VALUES('quitealongstringoftext');
  377:   INSERT INTO t11 VALUES('anotherquitealongstringoftext');
  378:   INSERT INTO t11 VALUES('athirdlongstringoftext');
  379:   INSERT INTO t11 VALUES('andonemoreforgoodluck');
  380: }
  381: do_test 8.2 {
  382:   for {set i 0} {$i < 200} {incr i} {
  383:     execsql { INSERT INTO t11 VALUES('') }
  384:   }
  385:   execsql { INSERT INTO t11(t11) VALUES('optimize') }
  386: } {}
  387: do_execsql_test 8.3 {
  388:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  389: } {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}}
  390: 
  391: # Corruption related tests.
  392: do_execsql_test  8.4.1.1 { UPDATE t11_stat SET value = X'0000'; }
  393: do_catchsql_test 8.5.1.2 {
  394:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  395: } {1 {database disk image is malformed}}
  396: 
  397: do_execsql_test  8.4.2.1 { UPDATE t11_stat SET value = X'00'; }
  398: do_catchsql_test 8.5.2.2 {
  399:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  400: } {1 {database disk image is malformed}}
  401: 
  402: do_execsql_test  8.4.3.1 { UPDATE t11_stat SET value = NULL; }
  403: do_catchsql_test 8.5.3.2 {
  404:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
  405: } {1 {database disk image is malformed}}
  406: 
  407: finish_test
  408: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>