Annotation of embedaddon/sqlite3/test/fts3matchinfo.test, revision 1.1.1.1

1.1       misho       1: # 2010 November 02
                      2: #
                      3: # The author disclaims copyright to this source code.  In place of
                      4: # a legal notice, here is a blessing:
                      5: #
                      6: #    May you do good and not evil.
                      7: #    May you find forgiveness for yourself and forgive others.
                      8: #    May you share freely, never taking more than you give.
                      9: #
                     10: #***********************************************************************
                     11: # This file implements regression tests for the FTS3 module. The focus
                     12: # of this file is tables created with the "matchinfo=fts3" option.
                     13: #
                     14: 
                     15: set testdir [file dirname $argv0]
                     16: source $testdir/tester.tcl
                     17: 
                     18: # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
                     19: ifcapable !fts3 { finish_test ; return }
                     20: 
                     21: set testprefix fts3matchinfo
                     22: set sqlite_fts3_enable_parentheses 0
                     23: 
                     24: proc mit {blob} {
                     25:   set scan(littleEndian) i*
                     26:   set scan(bigEndian) I*
                     27:   binary scan $blob $scan($::tcl_platform(byteOrder)) r
                     28:   return $r
                     29: }
                     30: db func mit mit
                     31: 
                     32: do_execsql_test 1.0 {
                     33:   CREATE VIRTUAL TABLE t1 USING fts4(matchinfo=fts3);
                     34:   SELECT name FROM sqlite_master WHERE type = 'table';
                     35: } {t1 t1_content t1_segments t1_segdir t1_stat}
                     36: 
                     37: do_execsql_test 1.1 {
                     38:   INSERT INTO t1(content) VALUES('I wandered lonely as a cloud');
                     39:   INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,');
                     40:   INSERT INTO t1(content) VALUES('When all at once I saw a crowd,');
                     41:   INSERT INTO t1(content) VALUES('A host, of golden daffodils,');
                     42:   SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I';
                     43: } {{1 1 1 2 2} {1 1 1 2 2}}
                     44: 
                     45: # Now create an FTS4 table that does not specify matchinfo=fts3.
                     46: #
                     47: do_execsql_test 1.2 {
                     48:   CREATE VIRTUAL TABLE t2 USING fts4;
                     49:   INSERT INTO t2 SELECT * FROM t1;
                     50:   SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I';
                     51: } {{1 1 1 2 2} {1 1 1 2 2}}
                     52: 
                     53: # Test some syntax-error handling.
                     54: #
                     55: do_catchsql_test 2.0 {
                     56:   CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3);
                     57: } {1 {unrecognized matchinfo: fs3}}
                     58: do_catchsql_test 2.1 {
                     59:   CREATE VIRTUAL TABLE x2 USING fts4(mtchinfo=fts3);
                     60: } {1 {unrecognized parameter: mtchinfo=fts3}}
                     61: do_catchsql_test 2.2 {
                     62:   CREATE VIRTUAL TABLE x2 USING fts4(matchinfo=fts5);
                     63: } {1 {unrecognized matchinfo: fts5}}
                     64: 
                     65: # Check that with fts3, the "=" character is permitted in column definitions.
                     66: #
                     67: do_execsql_test 3.1 {
                     68:   CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
                     69:   INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
                     70:   SELECT mtchinfo FROM t3;
                     71: } {{Beside the lake, beneath the trees}}
                     72: 
                     73: do_execsql_test 3.2 {
                     74:   CREATE VIRTUAL TABLE xx USING FTS4;
                     75: }
                     76: do_execsql_test 3.3 {
                     77:   SELECT * FROM xx WHERE xx MATCH 'abc';
                     78: }
                     79: do_execsql_test 3.4 {
                     80:   SELECT * FROM xx WHERE xx MATCH 'a b c';
                     81: }
                     82: 
                     83: 
                     84: #--------------------------------------------------------------------------
                     85: # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
                     86: #
                     87: # The first argument - $tn - is a test identifier. This may be either a
                     88: # full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
                     89: # is set, just the numeric component (i.e. "1.1").
                     90: #
                     91: # The second argument is the name of an FTSX table. The third is the 
                     92: # full text of a WHERE/MATCH expression to query the table for 
                     93: # (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
                     94: # key-value list (serialized array) with matchinfo() format specifiers
                     95: # as keys, and the results of executing the statement:
                     96: #
                     97: #   SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
                     98: #
                     99: # For example:
                    100: #
                    101: #   CREATE VIRTUAL TABLE t1 USING fts4;
                    102: #   INSERT INTO t1 VALUES('abc');
                    103: #   INSERT INTO t1 VALUES('def');
                    104: #   INSERT INTO t1 VALUES('abc abc');
                    105: #
                    106: #   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
                    107: #     n {3 3}
                    108: #     p {1 1}
                    109: #     c {1 1}
                    110: #     x {{1 3 2} {2 3 2}}
                    111: #   }
                    112: #
                    113: # If the $results list contains keys mapped to "-" instead of a matchinfo()
                    114: # result, then this command computes the expected results based on other
                    115: # mappings to test the matchinfo() function. For example, the command above
                    116: # could be changed to:
                    117: #
                    118: #   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
                    119: #     n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
                    120: #     pcx -
                    121: #   }
                    122: #
                    123: # And this command would compute the expected results for matchinfo(t1, 'pcx')
                    124: # based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and 
                    125: # matchinfo(t1, 'x') in order to test 'pcx'.
                    126: #
                    127: proc do_matchinfo_test {tn tbl expr results} {
                    128: 
                    129:   foreach {fmt res} $results {
                    130:     if {$res == "-"} continue
                    131:     set resarray($fmt) $res
                    132:   }
                    133: 
                    134:   set nRow 0
                    135:   foreach {fmt res} [array get resarray] {
                    136:     if {[llength $res]>$nRow} { set nRow [llength $res] }
                    137:   }
                    138: 
                    139:   # Construct expected results for any formats for which the caller 
                    140:   # supplied result is "-".
                    141:   #
                    142:   foreach {fmt res} $results {
                    143:     if {$res == "-"} {
                    144:       set res [list]
                    145:       for {set iRow 0} {$iRow<$nRow} {incr iRow} {
                    146:         set rowres [list]
                    147:         foreach c [split $fmt ""] {
                    148:           set rowres [concat $rowres [lindex $resarray($c) $iRow]]
                    149:         }
                    150:         lappend res $rowres
                    151:       }
                    152:       set resarray($fmt) $res
                    153:     }
                    154:   }
                    155: 
                    156:   # Test each matchinfo() request individually.
                    157:   #
                    158:   foreach {fmt res} [array get resarray] {
                    159:     set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
                    160:     do_execsql_test $tn.$fmt $sql [normalize2 $res]
                    161:   }
                    162: 
                    163:   # Test them all executed together (multiple invocations of matchinfo()).
                    164:   #
                    165:   set exprlist [list]
                    166:   foreach {format res} [array get resarray] {
                    167:     lappend exprlist "mit(matchinfo($tbl, '$format'))"
                    168:   }
                    169:   set allres [list]
                    170:   for {set iRow 0} {$iRow<$nRow} {incr iRow} {
                    171:     foreach {format res} [array get resarray] {
                    172:       lappend allres [lindex $res $iRow]
                    173:     }
                    174:   }
                    175:   set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
                    176:   do_execsql_test $tn.multi $sql [normalize2 $allres]
                    177: }
                    178: proc normalize2 {list_of_lists} {
                    179:   set res [list]
                    180:   foreach elem $list_of_lists {
                    181:     lappend res [list {*}$elem]
                    182:   }
                    183:   return $res
                    184: }
                    185: 
                    186: 
                    187: do_execsql_test 4.1.0 {
                    188:   CREATE VIRTUAL TABLE t4 USING fts4(x, y);
                    189:   INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
                    190:   INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
                    191: }
                    192: 
                    193: do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
                    194:   p {3 3}
                    195:   c {2 2}
                    196:   x {
                    197:     {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
                    198:     {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
                    199:   }
                    200:   n {2 2}
                    201:   l {{5 5} {5 5}}
                    202:   a {{5 5} {5 5}}
                    203: 
                    204:   s {{3 0} {0 3}}
                    205: 
                    206:   xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
                    207:   xpxsscplax -
                    208: }
                    209: 
                    210: do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
                    211:   p {1 1}
                    212:   c {2 2}
                    213:   x {
                    214:     {0 1 1   1 1 1}
                    215:     {1 1 1   0 1 1}
                    216:   }
                    217:   n {2 2}
                    218:   l {{5 5} {5 5}}
                    219:   a {{5 5} {5 5}}
                    220: 
                    221:   s {{0 1} {1 0}}
                    222: 
                    223:   xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
                    224:   sxsxs -
                    225: }
                    226: 
                    227: do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'}     { s {{2 0} {0 2}} }
                    228: do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
                    229: do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
                    230: do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'}     { s {{1 0} {0 1}} }
                    231: do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} {
                    232:   x { 
                    233:     {0 1 1  1 1 1  0 0 0  0 0 0} 
                    234:     {1 1 1  0 1 1  0 0 0  0 0 0}
                    235:   }
                    236: }
                    237: do_matchinfo_test 4.1.8 t4 {t4 MATCH 'f -abcd'} {
                    238:   x { 
                    239:     {0 1 1  1 1 1}
                    240:     {1 1 1  0 1 1}
                    241:   }
                    242: }
                    243: 
                    244: do_execsql_test 4.2.0 {
                    245:   CREATE VIRTUAL TABLE t5 USING fts4;
                    246:   INSERT INTO t5 VALUES('a a a a a');
                    247:   INSERT INTO t5 VALUES('a b a b a');
                    248:   INSERT INTO t5 VALUES('c b c b c');
                    249:   INSERT INTO t5 VALUES('x x x x x');
                    250: }
                    251: do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'}         { 
                    252:   x {{5 8 2   5 8 2} {3 8 2   3 8 2}}
                    253:   s {2 1} 
                    254: }
                    255: do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'}         { s {2} }
                    256: do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'}       { s {3} }
                    257: do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
                    258: do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
                    259: do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1} }
                    260: 
                    261: do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
                    262: 
                    263: # It used to be that the second 'a' token would be deferred. That doesn't
                    264: # work any longer.
                    265: if 0 {
                    266:   do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { 
                    267:     x {{5 8 2   5 5 5} {3 8 2   3 5 5}}
                    268:     s {2 1} 
                    269:   }
                    270: }
                    271: 
                    272: do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'}         { s {2} }
                    273: do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'}       { s {3} }
                    274: do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
                    275: do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
                    276: do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1 1} }
                    277: 
                    278: do_execsql_test 4.4.0 {
                    279:   INSERT INTO t5(t5) VALUES('optimize');
                    280:   UPDATE t5_segments 
                    281:   SET block = zeroblob(length(block)) 
                    282:   WHERE length(block)>10000;
                    283: }
                    284: 
                    285: do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
                    286: do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
                    287: do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
                    288: do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'}       { s {3} }
                    289: do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
                    290: do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
                    291: 
                    292: do_execsql_test 4.5.0 {
                    293:   CREATE VIRTUAL TABLE t6 USING fts4(a, b, c);
                    294:   INSERT INTO t6 VALUES('a', 'b', 'c');
                    295: }
                    296: do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'}       { s {{1 1 1}} }
                    297: 
                    298: 
                    299: #-------------------------------------------------------------------------
                    300: # Check the following restrictions:
                    301: #
                    302: #   + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3.
                    303: #   + Matchinfo flag 'l' cannot be used with matchinfo=fts3.
                    304: #
                    305: do_execsql_test 5.1 {
                    306:   CREATE VIRTUAL TABLE t7 USING fts3(a, b);
                    307:   INSERT INTO t7 VALUES('u v w', 'x y z');
                    308: 
                    309:   CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3);
                    310:   INSERT INTO t8 VALUES('u v w', 'x y z');
                    311: }
                    312: 
                    313: do_catchsql_test 5.2.1 { 
                    314:   SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y'
                    315: } {1 {unrecognized matchinfo request: a}}
                    316: do_catchsql_test 5.2.2 { 
                    317:   SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y'
                    318: } {1 {unrecognized matchinfo request: l}}
                    319: do_catchsql_test 5.2.3 { 
                    320:   SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y'
                    321: } {1 {unrecognized matchinfo request: n}}
                    322: 
                    323: do_catchsql_test 5.3.1 { 
                    324:   SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y'
                    325: } {1 {unrecognized matchinfo request: l}}
                    326: 
                    327: #-------------------------------------------------------------------------
                    328: # Test that the offsets() function handles corruption in the %_content
                    329: # table correctly.
                    330: #
                    331: do_execsql_test 6.1 {
                    332:   CREATE VIRTUAL TABLE t9 USING fts4;
                    333:   INSERT INTO t9 VALUES(
                    334:     'this record is used to try to dectect corruption'
                    335:   );
                    336:   SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
                    337: } {{0 0 20 2 0 0 27 2}}
                    338: 
                    339: do_catchsql_test 6.2 {
                    340:   UPDATE t9_content SET c0content = 'this record is used to'; 
                    341:   SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
                    342: } {1 {database disk image is malformed}}
                    343: 
                    344: #-------------------------------------------------------------------------
                    345: # Test the outcome of matchinfo() when used within a query that does not
                    346: # use the full-text index (i.e. lookup by rowid or full-table scan).
                    347: #
                    348: do_execsql_test 7.1 {
                    349:   CREATE VIRTUAL TABLE t10 USING fts4;
                    350:   INSERT INTO t10 VALUES('first record');
                    351:   INSERT INTO t10 VALUES('second record');
                    352: }
                    353: do_execsql_test 7.2 {
                    354:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10;
                    355: } {blob 0 blob 0}
                    356: do_execsql_test 7.3 {
                    357:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE docid=1;
                    358: } {blob 0}
                    359: do_execsql_test 7.4 {
                    360:   SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) 
                    361:   FROM t10 WHERE t10 MATCH 'record'
                    362: } {blob 20 blob 20}
                    363: 
                    364: #-------------------------------------------------------------------------
                    365: # Test a special case - matchinfo('nxa') with many zero length documents. 
                    366: # Special because "x" internally uses a statement used by both "n" and "a". 
                    367: # This was causing a problem at one point in the obscure case where the
                    368: # total number of bytes of data stored in an fts3 table was greater than
                    369: # the number of rows. i.e. when the following query returns true:
                    370: #
                    371: #   SELECT sum(length(content)) < count(*) FROM fts4table;
                    372: #
                    373: do_execsql_test 8.1 {
                    374:   CREATE VIRTUAL TABLE t11 USING fts4;
                    375:   INSERT INTO t11(t11) VALUES('nodesize=24');
                    376:   INSERT INTO t11 VALUES('quitealongstringoftext');
                    377:   INSERT INTO t11 VALUES('anotherquitealongstringoftext');
                    378:   INSERT INTO t11 VALUES('athirdlongstringoftext');
                    379:   INSERT INTO t11 VALUES('andonemoreforgoodluck');
                    380: }
                    381: do_test 8.2 {
                    382:   for {set i 0} {$i < 200} {incr i} {
                    383:     execsql { INSERT INTO t11 VALUES('') }
                    384:   }
                    385:   execsql { INSERT INTO t11(t11) VALUES('optimize') }
                    386: } {}
                    387: do_execsql_test 8.3 {
                    388:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
                    389: } {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}}
                    390: 
                    391: # Corruption related tests.
                    392: do_execsql_test  8.4.1.1 { UPDATE t11_stat SET value = X'0000'; }
                    393: do_catchsql_test 8.5.1.2 {
                    394:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
                    395: } {1 {database disk image is malformed}}
                    396: 
                    397: do_execsql_test  8.4.2.1 { UPDATE t11_stat SET value = X'00'; }
                    398: do_catchsql_test 8.5.2.2 {
                    399:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
                    400: } {1 {database disk image is malformed}}
                    401: 
                    402: do_execsql_test  8.4.3.1 { UPDATE t11_stat SET value = NULL; }
                    403: do_catchsql_test 8.5.3.2 {
                    404:   SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
                    405: } {1 {database disk image is malformed}}
                    406: 
                    407: finish_test
                    408: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>