1: # 2010 January 07
2: #
3: # The author disclaims copyright to this source code. In place of
4: # a legal notice, here is a blessing:
5: #
6: # May you do good and not evil.
7: # May you find forgiveness for yourself and forgive others.
8: # May you share freely, never taking more than you give.
9: #
10: #*************************************************************************
11: #
12: # The tests in this file test the FTS3 auxillary functions offsets(),
13: # snippet() and matchinfo() work. At time of writing, running this file
14: # provides full coverage of fts3_snippet.c.
15: #
16:
17: set testdir [file dirname $argv0]
18: source $testdir/tester.tcl
19:
20: # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
21: ifcapable !fts3 { finish_test ; return }
22: source $testdir/fts3_common.tcl
23:
24: set sqlite_fts3_enable_parentheses 1
25: set DO_MALLOC_TEST 0
26:
27: # Transform the list $L to its "normal" form. So that it can be compared to
28: # another list with the same set of elements using [string compare].
29: #
30: proc normalize {L} {
31: set ret [list]
32: foreach l $L {lappend ret $l}
33: return $ret
34: }
35:
36: proc do_offsets_test {name expr args} {
37: set result [list]
38: foreach a $args {
39: lappend result [normalize $a]
40: }
41: do_select_test $name {
42: SELECT offsets(ft) FROM ft WHERE ft MATCH $expr
43: } $result
44: }
45:
46: # Document text used by a few tests. Contains the English names of all
47: # integers between 1 and 300.
48: #
49: set numbers [normalize {
50: one two three four five six seven eight nine ten eleven twelve thirteen
51: fourteen fifteen sixteen seventeen eighteen nineteen twenty twentyone
52: twentytwo twentythree twentyfour twentyfive twentysix twentyseven
53: twentyeight twentynine thirty thirtyone thirtytwo thirtythree thirtyfour
54: thirtyfive thirtysix thirtyseven thirtyeight thirtynine forty fortyone
55: fortytwo fortythree fortyfour fortyfive fortysix fortyseven fortyeight
56: fortynine fifty fiftyone fiftytwo fiftythree fiftyfour fiftyfive fiftysix
57: fiftyseven fiftyeight fiftynine sixty sixtyone sixtytwo sixtythree sixtyfour
58: sixtyfive sixtysix sixtyseven sixtyeight sixtynine seventy seventyone
59: seventytwo seventythree seventyfour seventyfive seventysix seventyseven
60: seventyeight seventynine eighty eightyone eightytwo eightythree eightyfour
61: eightyfive eightysix eightyseven eightyeight eightynine ninety ninetyone
62: ninetytwo ninetythree ninetyfour ninetyfive ninetysix ninetyseven
63: ninetyeight ninetynine onehundred onehundredone onehundredtwo
64: onehundredthree onehundredfour onehundredfive onehundredsix onehundredseven
65: onehundredeight onehundrednine onehundredten onehundredeleven
66: onehundredtwelve onehundredthirteen onehundredfourteen onehundredfifteen
67: onehundredsixteen onehundredseventeen onehundredeighteen onehundrednineteen
68: onehundredtwenty onehundredtwentyone onehundredtwentytwo
69: onehundredtwentythree onehundredtwentyfour onehundredtwentyfive
70: onehundredtwentysix onehundredtwentyseven onehundredtwentyeight
71: onehundredtwentynine onehundredthirty onehundredthirtyone
72: onehundredthirtytwo onehundredthirtythree onehundredthirtyfour
73: onehundredthirtyfive onehundredthirtysix onehundredthirtyseven
74: onehundredthirtyeight onehundredthirtynine onehundredforty
75: onehundredfortyone onehundredfortytwo onehundredfortythree
76: onehundredfortyfour onehundredfortyfive onehundredfortysix
77: onehundredfortyseven onehundredfortyeight onehundredfortynine
78: onehundredfifty onehundredfiftyone onehundredfiftytwo onehundredfiftythree
79: onehundredfiftyfour onehundredfiftyfive onehundredfiftysix
80: onehundredfiftyseven onehundredfiftyeight onehundredfiftynine
81: onehundredsixty onehundredsixtyone onehundredsixtytwo onehundredsixtythree
82: onehundredsixtyfour onehundredsixtyfive onehundredsixtysix
83: onehundredsixtyseven onehundredsixtyeight onehundredsixtynine
84: onehundredseventy onehundredseventyone onehundredseventytwo
85: onehundredseventythree onehundredseventyfour onehundredseventyfive
86: onehundredseventysix onehundredseventyseven onehundredseventyeight
87: onehundredseventynine onehundredeighty onehundredeightyone
88: onehundredeightytwo onehundredeightythree onehundredeightyfour
89: onehundredeightyfive onehundredeightysix onehundredeightyseven
90: onehundredeightyeight onehundredeightynine onehundredninety
91: onehundredninetyone onehundredninetytwo onehundredninetythree
92: onehundredninetyfour onehundredninetyfive onehundredninetysix
93: onehundredninetyseven onehundredninetyeight onehundredninetynine twohundred
94: twohundredone twohundredtwo twohundredthree twohundredfour twohundredfive
95: twohundredsix twohundredseven twohundredeight twohundrednine twohundredten
96: twohundredeleven twohundredtwelve twohundredthirteen twohundredfourteen
97: twohundredfifteen twohundredsixteen twohundredseventeen twohundredeighteen
98: twohundrednineteen twohundredtwenty twohundredtwentyone twohundredtwentytwo
99: twohundredtwentythree twohundredtwentyfour twohundredtwentyfive
100: twohundredtwentysix twohundredtwentyseven twohundredtwentyeight
101: twohundredtwentynine twohundredthirty twohundredthirtyone
102: twohundredthirtytwo twohundredthirtythree twohundredthirtyfour
103: twohundredthirtyfive twohundredthirtysix twohundredthirtyseven
104: twohundredthirtyeight twohundredthirtynine twohundredforty
105: twohundredfortyone twohundredfortytwo twohundredfortythree
106: twohundredfortyfour twohundredfortyfive twohundredfortysix
107: twohundredfortyseven twohundredfortyeight twohundredfortynine
108: twohundredfifty twohundredfiftyone twohundredfiftytwo twohundredfiftythree
109: twohundredfiftyfour twohundredfiftyfive twohundredfiftysix
110: twohundredfiftyseven twohundredfiftyeight twohundredfiftynine
111: twohundredsixty twohundredsixtyone twohundredsixtytwo twohundredsixtythree
112: twohundredsixtyfour twohundredsixtyfive twohundredsixtysix
113: twohundredsixtyseven twohundredsixtyeight twohundredsixtynine
114: twohundredseventy twohundredseventyone twohundredseventytwo
115: twohundredseventythree twohundredseventyfour twohundredseventyfive
116: twohundredseventysix twohundredseventyseven twohundredseventyeight
117: twohundredseventynine twohundredeighty twohundredeightyone
118: twohundredeightytwo twohundredeightythree twohundredeightyfour
119: twohundredeightyfive twohundredeightysix twohundredeightyseven
120: twohundredeightyeight twohundredeightynine twohundredninety
121: twohundredninetyone twohundredninetytwo twohundredninetythree
122: twohundredninetyfour twohundredninetyfive twohundredninetysix
123: twohundredninetyseven twohundredninetyeight twohundredninetynine
124: threehundred
125: }]
126:
127: foreach {DO_MALLOC_TEST enc} {
128: 0 utf8
129: 1 utf8
130: 1 utf16
131: } {
132:
133: db close
134: forcedelete test.db
135: sqlite3 db test.db
136: sqlite3_db_config_lookaside db 0 0 0
137: db eval "PRAGMA encoding = \"$enc\""
138:
139: # Set variable $T to the test name prefix for this iteration of the loop.
140: #
141: set T "fts3snippet-$enc"
142:
143: ##########################################################################
144: # Test the offset function.
145: #
146: do_test $T.1.1 {
147: execsql {
148: CREATE VIRTUAL TABLE ft USING fts3;
149: INSERT INTO ft VALUES('xxx xxx xxx xxx');
150: }
151: } {}
152: do_offsets_test $T.1.2 {xxx} {0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}
153: do_offsets_test $T.1.3 {"xxx xxx"} {
154: 0 0 0 3 0 0 4 3 0 1 4 3 0 0 8 3
155: 0 1 8 3 0 1 12 3
156: }
157: do_offsets_test $T.1.4 {"xxx xxx" xxx} {
158: 0 0 0 3 0 2 0 3 0 0 4 3 0 1 4 3
159: 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3
160: 0 1 12 3 0 2 12 3
161: }
162: do_offsets_test $T.1.5 {xxx "xxx xxx"} {
163: 0 0 0 3 0 1 0 3 0 0 4 3 0 1 4 3
164: 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3
165: 0 0 12 3 0 2 12 3
166: }
167:
168: do_test $T.2.1 {
169: set v1 [lrange $numbers 0 99]
170: execsql {
171: DROP TABLE IF EXISTS ft;
172: CREATE VIRTUAL TABLE ft USING fts3(a, b);
173: INSERT INTO ft VALUES($v1, $numbers);
174: INSERT INTO ft VALUES($v1, NULL);
175: }
176: } {}
177:
178: set off [string first "twohundred " $numbers]
179: do_offsets_test $T.2.1 {twohundred} [list 1 0 $off 10]
180:
181: set off [string first "onehundred " $numbers]
182: do_offsets_test $T.2.2 {onehundred} \
183: [list 0 0 $off 10 1 0 $off 10] [list 0 0 $off 10]
184:
185: # Test a corruption case:
186: execsql { UPDATE ft_content SET c1b = 'hello world' WHERE c1b = $numbers }
187: do_error_test $T.2.3 {
188: SELECT offsets(ft) FROM ft WHERE ft MATCH 'onehundred'
189: } {database disk image is malformed}
190:
191: ##########################################################################
192: # Test the snippet function.
193: #
194: proc do_snippet_test {name expr iCol nTok args} {
195: set res [list]
196: foreach a $args { lappend res [string trim $a] }
197: do_select_test $name {
198: SELECT snippet(ft,'{','}','...',$iCol,$nTok) FROM ft WHERE ft MATCH $expr
199: } $res
200: }
201: do_test $T.3.1 {
202: execsql {
203: DROP TABLE IF EXISTS ft;
204: CREATE VIRTUAL TABLE ft USING fts3;
205: INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
206: }
207: } {}
208: do_snippet_test $T.3.2 one 0 5 "{one} two three four five..."
209: do_snippet_test $T.3.3 two 0 5 "one {two} three four five..."
210: do_snippet_test $T.3.4 three 0 5 "one two {three} four five..."
211: do_snippet_test $T.3.5 four 0 5 "...two three {four} five six..."
212: do_snippet_test $T.3.6 five 0 5 "...three four {five} six seven..."
213: do_snippet_test $T.3.7 six 0 5 "...four five {six} seven eight..."
214: do_snippet_test $T.3.8 seven 0 5 "...five six {seven} eight nine..."
215: do_snippet_test $T.3.9 eight 0 5 "...six seven {eight} nine ten"
216: do_snippet_test $T.3.10 nine 0 5 "...six seven eight {nine} ten"
217: do_snippet_test $T.3.11 ten 0 5 "...six seven eight nine {ten}"
218:
219: do_test $T.4.1 {
220: execsql {
221: INSERT INTO ft VALUES(
222: 'one two three four five '
223: || 'six seven eight nine ten '
224: || 'eleven twelve thirteen fourteen fifteen '
225: || 'sixteen seventeen eighteen nineteen twenty '
226: || 'one two three four five '
227: || 'six seven eight nine ten '
228: || 'eleven twelve thirteen fourteen fifteen '
229: || 'sixteen seventeen eighteen nineteen twenty'
230: );
231: }
232: } {}
233:
234: do_snippet_test $T.4.2 {one nine} 0 5 {
235: {one} two three...eight {nine} ten
236: } {
237: {one} two three...eight {nine} ten...
238: }
239:
240: do_snippet_test $T.4.3 {one nine} 0 -5 {
241: {one} two three four five...six seven eight {nine} ten
242: } {
243: {one} two three four five...seven eight {nine} ten eleven...
244: }
245: do_snippet_test $T.4.3 {one nineteen} 0 -5 {
246: ...eighteen {nineteen} twenty {one} two...
247: }
248: do_snippet_test $T.4.4 {two nineteen} 0 -5 {
249: ...eighteen {nineteen} twenty one {two}...
250: }
251: do_snippet_test $T.4.5 {three nineteen} 0 -5 {
252: ...{nineteen} twenty one two {three}...
253: }
254:
255: do_snippet_test $T.4.6 {four nineteen} 0 -5 {
256: ...two three {four} five six...seventeen eighteen {nineteen} twenty one...
257: }
258: do_snippet_test $T.4.7 {four NEAR nineteen} 0 -5 {
259: ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
260: }
261:
262: do_snippet_test $T.4.8 {four nineteen} 0 5 {
263: ...three {four} five...eighteen {nineteen} twenty...
264: }
265: do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 {
266: ...eighteen {nineteen} twenty...three {four} five...
267: }
268: do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 {
269: ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
270: }
271: do_snippet_test $T.4.11 {four NOT (nineteen twentyone)} 0 5 {
272: ...two three {four} five six...
273: } {
274: ...two three {four} five six...
275: }
276: do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 {
277: ...two three {four} five six...
278: } {
279: ...two three {four} five six...
280: }
281:
282: do_test $T.5.1 {
283: execsql {
284: DROP TABLE IF EXISTS ft;
285: CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
286: INSERT INTO ft VALUES(
287: 'one two three four five',
288: 'four five six seven eight',
289: 'seven eight nine ten eleven'
290: );
291: }
292: } {}
293:
294: do_snippet_test $T.5.2 {five} -1 3 {...three four {five}}
295: do_snippet_test $T.5.3 {five} 0 3 {...three four {five}}
296: do_snippet_test $T.5.4 {five} 1 3 {four {five} six...}
297: do_snippet_test $T.5.5 {five} 2 3 {seven eight nine...}
298:
299: do_test $T.5.6 {
300: execsql { UPDATE ft SET b = NULL }
301: } {}
302:
303: do_snippet_test $T.5.7 {five} -1 3 {...three four {five}}
304: do_snippet_test $T.5.8 {five} 0 3 {...three four {five}}
305: do_snippet_test $T.5.9 {five} 1 3 {}
306: do_snippet_test $T.5.10 {five} 2 3 {seven eight nine...}
307:
308: do_snippet_test $T.5.11 {one "seven eight nine"} -1 -3 {
309: {one} two three...{seven} {eight} {nine}...
310: }
311:
312: do_test $T.6.1 {
313: execsql {
314: DROP TABLE IF EXISTS ft;
315: CREATE VIRTUAL TABLE ft USING fts3(x);
316: INSERT INTO ft VALUES($numbers);
317: }
318: } {}
319: do_snippet_test $T.6.2 {
320: one fifty onehundred onehundredfifty twohundredfifty threehundred
321: } -1 4 {
322: {one}...{fifty}...{onehundred}...{onehundredfifty}...
323: }
324: do_snippet_test $T.6.3 {
325: one fifty onehundred onehundredfifty twohundredfifty threehundred
326: } -1 -4 {
327: {one} two three four...fortyeight fortynine {fifty} fiftyone...ninetyeight ninetynine {onehundred} onehundredone...onehundredfortyeight onehundredfortynine {onehundredfifty} onehundredfiftyone...
328: }
329:
330: do_test $T.7.1 {
331: execsql {
332: BEGIN;
333: DROP TABLE IF EXISTS ft;
334: CREATE VIRTUAL TABLE ft USING fts3(x);
335: }
336: set testresults [list]
337: for {set i 1} {$i < 150} {incr i} {
338: set commas [string repeat , $i]
339: execsql {INSERT INTO ft VALUES('one' || $commas || 'two')}
340: lappend testresults "{one}$commas{two}"
341: }
342: execsql COMMIT
343: } {}
344: eval [list do_snippet_test $T.7.2 {one two} -1 3] $testresults
345:
346: ##########################################################################
347: # Test the matchinfo function.
348: #
349: proc mit {blob} {
350: set scan(littleEndian) i*
351: set scan(bigEndian) I*
352: binary scan $blob $scan($::tcl_platform(byteOrder)) r
353: return $r
354: }
355: db func mit mit
356: proc do_matchinfo_test {name expr args} {
357: set res [list]
358: foreach a $args { lappend res [normalize $a] }
359: do_select_test $name {
360: SELECT mit(matchinfo(ft)) FROM ft WHERE ft MATCH $expr
361: } $res
362: }
363: do_test $T.8.1 {
364: set ten {one two three four five six seven eight nine ten}
365: execsql {
366: DROP TABLE IF EXISTS ft;
367: CREATE VIRTUAL TABLE ft USING fts3;
368: INSERT INTO ft VALUES($ten);
369: INSERT INTO ft VALUES($ten || ' ' || $ten);
370: }
371: } {}
372:
373: do_matchinfo_test $T.8.2 "one" {1 1 1 3 2} {1 1 2 3 2}
374: do_matchinfo_test $T.8.3 "one NEAR/3 ten" {2 1 1 1 1 1 1 1}
375: do_matchinfo_test $T.8.4 "five NEAR/4 ten" \
376: {2 1 1 3 2 1 3 2} {2 1 2 3 2 2 3 2}
377: do_matchinfo_test $T.8.5 "six NEAR/3 ten NEAR/3 two" \
378: {3 1 1 1 1 1 1 1 1 1 1}
379: do_matchinfo_test $T.8.6 "five NEAR/4 ten NEAR/3 two" \
380: {3 1 2 2 1 1 1 1 1 1 1}
381:
382: do_test $T.9.1 {
383: execsql {
384: DROP TABLE IF EXISTS ft;
385: CREATE VIRTUAL TABLE ft USING fts3(x, y);
386: }
387: foreach n {1 2 3} {
388: set v1 [lrange $numbers 0 [expr $n*100]]
389: set v2 [string trim [string repeat "$numbers " $n]]
390: set docid [expr $n * 1000000]
391: execsql { INSERT INTO ft(docid, x, y) VALUES($docid, $v1, $v2) }
392: }
393: } {}
394: do_matchinfo_test $T.9.2 {two*} \
395: { 1 2 1 105 3 101 606 3} \
396: { 1 2 3 105 3 202 606 3} \
397: { 1 2 101 105 3 303 606 3}
398:
399: do_matchinfo_test $T.9.4 {"one* two*"} \
400: { 1 2 1 5 3 2 12 3} \
401: { 1 2 2 5 3 4 12 3} \
402: { 1 2 2 5 3 6 12 3}
403:
404: do_matchinfo_test $T.9.5 {twohundredfifty} \
405: { 1 2 0 1 1 1 6 3} \
406: { 1 2 0 1 1 2 6 3} \
407: { 1 2 1 1 1 3 6 3}
408:
409: do_matchinfo_test $T.9.6 {"threehundred one"} \
410: { 1 2 0 0 0 1 3 2} \
411: { 1 2 0 0 0 2 3 2}
412:
413: do_matchinfo_test $T.9.7 {one OR fivehundred} \
414: { 2 2 1 3 3 1 6 3 0 0 0 0 0 0 } \
415: { 2 2 1 3 3 2 6 3 0 0 0 0 0 0 } \
416: { 2 2 1 3 3 3 6 3 0 0 0 0 0 0 }
417:
418: do_matchinfo_test $T.9.8 {two OR "threehundred one"} \
419: { 2 2 1 3 3 1 6 3 0 0 0 0 3 2 } \
420: { 2 2 1 3 3 2 6 3 0 0 0 1 3 2 } \
421: { 2 2 1 3 3 3 6 3 0 0 0 2 3 2 }
422:
423: do_select_test $T.9.9 {
424: SELECT mit(matchinfo(ft)), mit(matchinfo(ft))
425: FROM ft WHERE ft MATCH 'two OR "threehundred one"'
426: } [normalize {
427: {2 2 1 3 3 1 6 3 0 0 0 0 3 2}
428: {2 2 1 3 3 1 6 3 0 0 0 0 3 2}
429: {2 2 1 3 3 2 6 3 0 0 0 1 3 2}
430: {2 2 1 3 3 2 6 3 0 0 0 1 3 2}
431: {2 2 1 3 3 3 6 3 0 0 0 2 3 2}
432: {2 2 1 3 3 3 6 3 0 0 0 2 3 2}
433: }]
434:
435: # EVIDENCE-OF: R-40630-02268 If used within a SELECT that uses the
436: # "query by rowid" or "linear scan" strategies, then the snippet and
437: # offsets both return an empty string, and the matchinfo function
438: # returns a blob value zero bytes in size.
439: #
440: set r 1000000 ;# A rowid that exists in table ft
441: do_select_test $T.10.0 { SELECT rowid FROM ft WHERE rowid = $r } $r
442: do_select_test $T.10.1 {
443: SELECT length(offsets(ft)), typeof(offsets(ft)) FROM ft;
444: } {0 text 0 text 0 text}
445: do_select_test $T.10.2 {
446: SELECT length(offsets(ft)), typeof(offsets(ft)) FROM ft WHERE rowid = $r
447: } {0 text}
448: do_select_test $T.10.3 {
449: SELECT length(snippet(ft)), typeof(snippet(ft)) FROM ft;
450: } {0 text 0 text 0 text}
451: do_select_test $T.10.4 {
452: SELECT length(snippet(ft)), typeof(snippet(ft)) FROM ft WHERE rowid = $r;
453: } {0 text}
454: do_select_test $T.10.5 {
455: SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft;
456: } {0 blob 0 blob 0 blob}
457: do_select_test $T.10.6 {
458: SELECT length(matchinfo(ft)), typeof(matchinfo(ft)) FROM ft WHERE rowid = $r
459: } {0 blob}
460: }
461:
462: set sqlite_fts3_enable_parentheses 0
463: finish_test
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>