Annotation of embedaddon/sqlite3/ext/fts3/fts3_test.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2011 Jun 13
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: ******************************************************************************
! 12: **
! 13: ** This file is not part of the production FTS code. It is only used for
! 14: ** testing. It contains a Tcl command that can be used to test if a document
! 15: ** matches an FTS NEAR expression.
! 16: */
! 17:
! 18: #include <tcl.h>
! 19: #include <string.h>
! 20: #include <assert.h>
! 21:
! 22: #ifdef SQLITE_TEST
! 23:
! 24: /* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
! 25: #include "fts3Int.h"
! 26:
! 27: #define NM_MAX_TOKEN 12
! 28:
! 29: typedef struct NearPhrase NearPhrase;
! 30: typedef struct NearDocument NearDocument;
! 31: typedef struct NearToken NearToken;
! 32:
! 33: struct NearDocument {
! 34: int nToken; /* Length of token in bytes */
! 35: NearToken *aToken; /* Token array */
! 36: };
! 37:
! 38: struct NearToken {
! 39: int n; /* Length of token in bytes */
! 40: const char *z; /* Pointer to token string */
! 41: };
! 42:
! 43: struct NearPhrase {
! 44: int nNear; /* Preceding NEAR value */
! 45: int nToken; /* Number of tokens in this phrase */
! 46: NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
! 47: };
! 48:
! 49: static int nm_phrase_match(
! 50: NearPhrase *p,
! 51: NearToken *aToken
! 52: ){
! 53: int ii;
! 54:
! 55: for(ii=0; ii<p->nToken; ii++){
! 56: NearToken *pToken = &p->aToken[ii];
! 57: if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
! 58: if( aToken[ii].n<(pToken->n-1) ) return 0;
! 59: if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
! 60: }else{
! 61: if( aToken[ii].n!=pToken->n ) return 0;
! 62: if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
! 63: }
! 64: }
! 65:
! 66: return 1;
! 67: }
! 68:
! 69: static int nm_near_chain(
! 70: int iDir, /* Direction to iterate through aPhrase[] */
! 71: NearDocument *pDoc, /* Document to match against */
! 72: int iPos, /* Position at which iPhrase was found */
! 73: int nPhrase, /* Size of phrase array */
! 74: NearPhrase *aPhrase, /* Phrase array */
! 75: int iPhrase /* Index of phrase found */
! 76: ){
! 77: int iStart;
! 78: int iStop;
! 79: int ii;
! 80: int nNear;
! 81: int iPhrase2;
! 82: NearPhrase *p;
! 83: NearPhrase *pPrev;
! 84:
! 85: assert( iDir==1 || iDir==-1 );
! 86:
! 87: if( iDir==1 ){
! 88: if( (iPhrase+1)==nPhrase ) return 1;
! 89: nNear = aPhrase[iPhrase+1].nNear;
! 90: }else{
! 91: if( iPhrase==0 ) return 1;
! 92: nNear = aPhrase[iPhrase].nNear;
! 93: }
! 94: pPrev = &aPhrase[iPhrase];
! 95: iPhrase2 = iPhrase+iDir;
! 96: p = &aPhrase[iPhrase2];
! 97:
! 98: iStart = iPos - nNear - p->nToken;
! 99: iStop = iPos + nNear + pPrev->nToken;
! 100:
! 101: if( iStart<0 ) iStart = 0;
! 102: if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
! 103:
! 104: for(ii=iStart; ii<=iStop; ii++){
! 105: if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
! 106: if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
! 107: }
! 108: }
! 109:
! 110: return 0;
! 111: }
! 112:
! 113: static int nm_match_count(
! 114: NearDocument *pDoc, /* Document to match against */
! 115: int nPhrase, /* Size of phrase array */
! 116: NearPhrase *aPhrase, /* Phrase array */
! 117: int iPhrase /* Index of phrase to count matches for */
! 118: ){
! 119: int nOcc = 0;
! 120: int ii;
! 121: NearPhrase *p = &aPhrase[iPhrase];
! 122:
! 123: for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
! 124: if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
! 125: /* Test forward NEAR chain (i>iPhrase) */
! 126: if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
! 127:
! 128: /* Test reverse NEAR chain (i<iPhrase) */
! 129: if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
! 130:
! 131: /* This is a real match. Increment the counter. */
! 132: nOcc++;
! 133: }
! 134: }
! 135:
! 136: return nOcc;
! 137: }
! 138:
! 139: /*
! 140: ** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
! 141: */
! 142: static int fts3_near_match_cmd(
! 143: ClientData clientData,
! 144: Tcl_Interp *interp,
! 145: int objc,
! 146: Tcl_Obj *CONST objv[]
! 147: ){
! 148: int nTotal = 0;
! 149: int rc;
! 150: int ii;
! 151: int nPhrase;
! 152: NearPhrase *aPhrase = 0;
! 153: NearDocument doc = {0, 0};
! 154: Tcl_Obj **apDocToken;
! 155: Tcl_Obj *pRet;
! 156: Tcl_Obj *pPhrasecount = 0;
! 157:
! 158: Tcl_Obj **apExprToken;
! 159: int nExprToken;
! 160:
! 161: /* Must have 3 or more arguments. */
! 162: if( objc<3 || (objc%2)==0 ){
! 163: Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
! 164: rc = TCL_ERROR;
! 165: goto near_match_out;
! 166: }
! 167:
! 168: for(ii=3; ii<objc; ii+=2){
! 169: enum NM_enum { NM_PHRASECOUNTS };
! 170: struct TestnmSubcmd {
! 171: char *zName;
! 172: enum NM_enum eOpt;
! 173: } aOpt[] = {
! 174: { "-phrasecountvar", NM_PHRASECOUNTS },
! 175: { 0, 0 }
! 176: };
! 177: int iOpt;
! 178: if( Tcl_GetIndexFromObjStruct(
! 179: interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
! 180: ){
! 181: return TCL_ERROR;
! 182: }
! 183:
! 184: switch( aOpt[iOpt].eOpt ){
! 185: case NM_PHRASECOUNTS:
! 186: pPhrasecount = objv[ii+1];
! 187: break;
! 188: }
! 189: }
! 190:
! 191: rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
! 192: if( rc!=TCL_OK ) goto near_match_out;
! 193: doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
! 194: for(ii=0; ii<doc.nToken; ii++){
! 195: doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
! 196: }
! 197:
! 198: rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
! 199: if( rc!=TCL_OK ) goto near_match_out;
! 200:
! 201: nPhrase = (nExprToken + 1) / 2;
! 202: aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
! 203: memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
! 204: for(ii=0; ii<nPhrase; ii++){
! 205: Tcl_Obj *pPhrase = apExprToken[ii*2];
! 206: Tcl_Obj **apToken;
! 207: int nToken;
! 208: int jj;
! 209:
! 210: rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
! 211: if( rc!=TCL_OK ) goto near_match_out;
! 212: if( nToken>NM_MAX_TOKEN ){
! 213: Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
! 214: rc = TCL_ERROR;
! 215: goto near_match_out;
! 216: }
! 217: for(jj=0; jj<nToken; jj++){
! 218: NearToken *pT = &aPhrase[ii].aToken[jj];
! 219: pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
! 220: }
! 221: aPhrase[ii].nToken = nToken;
! 222: }
! 223: for(ii=1; ii<nPhrase; ii++){
! 224: Tcl_Obj *pNear = apExprToken[2*ii-1];
! 225: int nNear;
! 226: rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
! 227: if( rc!=TCL_OK ) goto near_match_out;
! 228: aPhrase[ii].nNear = nNear;
! 229: }
! 230:
! 231: pRet = Tcl_NewObj();
! 232: Tcl_IncrRefCount(pRet);
! 233: for(ii=0; ii<nPhrase; ii++){
! 234: int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
! 235: Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
! 236: nTotal += nOcc;
! 237: }
! 238: if( pPhrasecount ){
! 239: Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
! 240: }
! 241: Tcl_DecrRefCount(pRet);
! 242: Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
! 243:
! 244: near_match_out:
! 245: ckfree((char *)aPhrase);
! 246: ckfree((char *)doc.aToken);
! 247: return rc;
! 248: }
! 249:
! 250: /*
! 251: ** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
! 252: **
! 253: ** Normally, FTS uses hard-coded values to determine the minimum doclist
! 254: ** size eligible for incremental loading, and the size of the chunks loaded
! 255: ** when a doclist is incrementally loaded. This command allows the built-in
! 256: ** values to be overridden for testing purposes.
! 257: **
! 258: ** If present, the first argument is the chunksize in bytes to load doclists
! 259: ** in. The second argument is the minimum doclist size in bytes to use
! 260: ** incremental loading with.
! 261: **
! 262: ** Whether or not the arguments are present, this command returns a list of
! 263: ** two integers - the initial chunksize and threshold when the command is
! 264: ** invoked. This can be used to restore the default behaviour after running
! 265: ** tests. For example:
! 266: **
! 267: ** # Override incr-load settings for testing:
! 268: ** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
! 269: **
! 270: ** .... run tests ....
! 271: **
! 272: ** # Restore initial incr-load settings:
! 273: ** eval fts3_configure_incr_load $cfg
! 274: */
! 275: static int fts3_configure_incr_load_cmd(
! 276: ClientData clientData,
! 277: Tcl_Interp *interp,
! 278: int objc,
! 279: Tcl_Obj *CONST objv[]
! 280: ){
! 281: #ifdef SQLITE_ENABLE_FTS3
! 282: extern int test_fts3_node_chunksize;
! 283: extern int test_fts3_node_chunk_threshold;
! 284: Tcl_Obj *pRet;
! 285:
! 286: if( objc!=1 && objc!=3 ){
! 287: Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
! 288: return TCL_ERROR;
! 289: }
! 290:
! 291: pRet = Tcl_NewObj();
! 292: Tcl_IncrRefCount(pRet);
! 293: Tcl_ListObjAppendElement(
! 294: interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
! 295: Tcl_ListObjAppendElement(
! 296: interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
! 297:
! 298: if( objc==3 ){
! 299: int iArg1;
! 300: int iArg2;
! 301: if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
! 302: || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
! 303: ){
! 304: Tcl_DecrRefCount(pRet);
! 305: return TCL_ERROR;
! 306: }
! 307: test_fts3_node_chunksize = iArg1;
! 308: test_fts3_node_chunk_threshold = iArg2;
! 309: }
! 310:
! 311: Tcl_SetObjResult(interp, pRet);
! 312: Tcl_DecrRefCount(pRet);
! 313: #endif
! 314: return TCL_OK;
! 315: }
! 316:
! 317: int Sqlitetestfts3_Init(Tcl_Interp *interp){
! 318: Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
! 319: Tcl_CreateObjCommand(interp,
! 320: "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
! 321: );
! 322: return TCL_OK;
! 323: }
! 324: #endif /* ifdef SQLITE_TEST */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>