Annotation of embedaddon/sqlite3/ext/fts3/fts3_test.c, revision 1.1.1.1
1.1 misho 1: /*
2: ** 2011 Jun 13
3: **
4: ** The author disclaims copyright to this source code. In place of
5: ** a legal notice, here is a blessing:
6: **
7: ** May you do good and not evil.
8: ** May you find forgiveness for yourself and forgive others.
9: ** May you share freely, never taking more than you give.
10: **
11: ******************************************************************************
12: **
13: ** This file is not part of the production FTS code. It is only used for
14: ** testing. It contains a Tcl command that can be used to test if a document
15: ** matches an FTS NEAR expression.
16: */
17:
18: #include <tcl.h>
19: #include <string.h>
20: #include <assert.h>
21:
22: #ifdef SQLITE_TEST
23:
24: /* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
25: #include "fts3Int.h"
26:
27: #define NM_MAX_TOKEN 12
28:
29: typedef struct NearPhrase NearPhrase;
30: typedef struct NearDocument NearDocument;
31: typedef struct NearToken NearToken;
32:
33: struct NearDocument {
34: int nToken; /* Length of token in bytes */
35: NearToken *aToken; /* Token array */
36: };
37:
38: struct NearToken {
39: int n; /* Length of token in bytes */
40: const char *z; /* Pointer to token string */
41: };
42:
43: struct NearPhrase {
44: int nNear; /* Preceding NEAR value */
45: int nToken; /* Number of tokens in this phrase */
46: NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
47: };
48:
49: static int nm_phrase_match(
50: NearPhrase *p,
51: NearToken *aToken
52: ){
53: int ii;
54:
55: for(ii=0; ii<p->nToken; ii++){
56: NearToken *pToken = &p->aToken[ii];
57: if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
58: if( aToken[ii].n<(pToken->n-1) ) return 0;
59: if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
60: }else{
61: if( aToken[ii].n!=pToken->n ) return 0;
62: if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
63: }
64: }
65:
66: return 1;
67: }
68:
69: static int nm_near_chain(
70: int iDir, /* Direction to iterate through aPhrase[] */
71: NearDocument *pDoc, /* Document to match against */
72: int iPos, /* Position at which iPhrase was found */
73: int nPhrase, /* Size of phrase array */
74: NearPhrase *aPhrase, /* Phrase array */
75: int iPhrase /* Index of phrase found */
76: ){
77: int iStart;
78: int iStop;
79: int ii;
80: int nNear;
81: int iPhrase2;
82: NearPhrase *p;
83: NearPhrase *pPrev;
84:
85: assert( iDir==1 || iDir==-1 );
86:
87: if( iDir==1 ){
88: if( (iPhrase+1)==nPhrase ) return 1;
89: nNear = aPhrase[iPhrase+1].nNear;
90: }else{
91: if( iPhrase==0 ) return 1;
92: nNear = aPhrase[iPhrase].nNear;
93: }
94: pPrev = &aPhrase[iPhrase];
95: iPhrase2 = iPhrase+iDir;
96: p = &aPhrase[iPhrase2];
97:
98: iStart = iPos - nNear - p->nToken;
99: iStop = iPos + nNear + pPrev->nToken;
100:
101: if( iStart<0 ) iStart = 0;
102: if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
103:
104: for(ii=iStart; ii<=iStop; ii++){
105: if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
106: if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
107: }
108: }
109:
110: return 0;
111: }
112:
113: static int nm_match_count(
114: NearDocument *pDoc, /* Document to match against */
115: int nPhrase, /* Size of phrase array */
116: NearPhrase *aPhrase, /* Phrase array */
117: int iPhrase /* Index of phrase to count matches for */
118: ){
119: int nOcc = 0;
120: int ii;
121: NearPhrase *p = &aPhrase[iPhrase];
122:
123: for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
124: if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
125: /* Test forward NEAR chain (i>iPhrase) */
126: if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
127:
128: /* Test reverse NEAR chain (i<iPhrase) */
129: if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
130:
131: /* This is a real match. Increment the counter. */
132: nOcc++;
133: }
134: }
135:
136: return nOcc;
137: }
138:
139: /*
140: ** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
141: */
142: static int fts3_near_match_cmd(
143: ClientData clientData,
144: Tcl_Interp *interp,
145: int objc,
146: Tcl_Obj *CONST objv[]
147: ){
148: int nTotal = 0;
149: int rc;
150: int ii;
151: int nPhrase;
152: NearPhrase *aPhrase = 0;
153: NearDocument doc = {0, 0};
154: Tcl_Obj **apDocToken;
155: Tcl_Obj *pRet;
156: Tcl_Obj *pPhrasecount = 0;
157:
158: Tcl_Obj **apExprToken;
159: int nExprToken;
160:
161: /* Must have 3 or more arguments. */
162: if( objc<3 || (objc%2)==0 ){
163: Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
164: rc = TCL_ERROR;
165: goto near_match_out;
166: }
167:
168: for(ii=3; ii<objc; ii+=2){
169: enum NM_enum { NM_PHRASECOUNTS };
170: struct TestnmSubcmd {
171: char *zName;
172: enum NM_enum eOpt;
173: } aOpt[] = {
174: { "-phrasecountvar", NM_PHRASECOUNTS },
175: { 0, 0 }
176: };
177: int iOpt;
178: if( Tcl_GetIndexFromObjStruct(
179: interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
180: ){
181: return TCL_ERROR;
182: }
183:
184: switch( aOpt[iOpt].eOpt ){
185: case NM_PHRASECOUNTS:
186: pPhrasecount = objv[ii+1];
187: break;
188: }
189: }
190:
191: rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
192: if( rc!=TCL_OK ) goto near_match_out;
193: doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
194: for(ii=0; ii<doc.nToken; ii++){
195: doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
196: }
197:
198: rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
199: if( rc!=TCL_OK ) goto near_match_out;
200:
201: nPhrase = (nExprToken + 1) / 2;
202: aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
203: memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
204: for(ii=0; ii<nPhrase; ii++){
205: Tcl_Obj *pPhrase = apExprToken[ii*2];
206: Tcl_Obj **apToken;
207: int nToken;
208: int jj;
209:
210: rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
211: if( rc!=TCL_OK ) goto near_match_out;
212: if( nToken>NM_MAX_TOKEN ){
213: Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
214: rc = TCL_ERROR;
215: goto near_match_out;
216: }
217: for(jj=0; jj<nToken; jj++){
218: NearToken *pT = &aPhrase[ii].aToken[jj];
219: pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
220: }
221: aPhrase[ii].nToken = nToken;
222: }
223: for(ii=1; ii<nPhrase; ii++){
224: Tcl_Obj *pNear = apExprToken[2*ii-1];
225: int nNear;
226: rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
227: if( rc!=TCL_OK ) goto near_match_out;
228: aPhrase[ii].nNear = nNear;
229: }
230:
231: pRet = Tcl_NewObj();
232: Tcl_IncrRefCount(pRet);
233: for(ii=0; ii<nPhrase; ii++){
234: int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
235: Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
236: nTotal += nOcc;
237: }
238: if( pPhrasecount ){
239: Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
240: }
241: Tcl_DecrRefCount(pRet);
242: Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
243:
244: near_match_out:
245: ckfree((char *)aPhrase);
246: ckfree((char *)doc.aToken);
247: return rc;
248: }
249:
250: /*
251: ** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
252: **
253: ** Normally, FTS uses hard-coded values to determine the minimum doclist
254: ** size eligible for incremental loading, and the size of the chunks loaded
255: ** when a doclist is incrementally loaded. This command allows the built-in
256: ** values to be overridden for testing purposes.
257: **
258: ** If present, the first argument is the chunksize in bytes to load doclists
259: ** in. The second argument is the minimum doclist size in bytes to use
260: ** incremental loading with.
261: **
262: ** Whether or not the arguments are present, this command returns a list of
263: ** two integers - the initial chunksize and threshold when the command is
264: ** invoked. This can be used to restore the default behaviour after running
265: ** tests. For example:
266: **
267: ** # Override incr-load settings for testing:
268: ** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
269: **
270: ** .... run tests ....
271: **
272: ** # Restore initial incr-load settings:
273: ** eval fts3_configure_incr_load $cfg
274: */
275: static int fts3_configure_incr_load_cmd(
276: ClientData clientData,
277: Tcl_Interp *interp,
278: int objc,
279: Tcl_Obj *CONST objv[]
280: ){
281: #ifdef SQLITE_ENABLE_FTS3
282: extern int test_fts3_node_chunksize;
283: extern int test_fts3_node_chunk_threshold;
284: Tcl_Obj *pRet;
285:
286: if( objc!=1 && objc!=3 ){
287: Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
288: return TCL_ERROR;
289: }
290:
291: pRet = Tcl_NewObj();
292: Tcl_IncrRefCount(pRet);
293: Tcl_ListObjAppendElement(
294: interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
295: Tcl_ListObjAppendElement(
296: interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
297:
298: if( objc==3 ){
299: int iArg1;
300: int iArg2;
301: if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
302: || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
303: ){
304: Tcl_DecrRefCount(pRet);
305: return TCL_ERROR;
306: }
307: test_fts3_node_chunksize = iArg1;
308: test_fts3_node_chunk_threshold = iArg2;
309: }
310:
311: Tcl_SetObjResult(interp, pRet);
312: Tcl_DecrRefCount(pRet);
313: #endif
314: return TCL_OK;
315: }
316:
317: int Sqlitetestfts3_Init(Tcl_Interp *interp){
318: Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
319: Tcl_CreateObjCommand(interp,
320: "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
321: );
322: return TCL_OK;
323: }
324: #endif /* ifdef SQLITE_TEST */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>