Annotation of embedaddon/sqlite3/ext/fts3/fts3_test.c, revision 1.1

1.1     ! misho       1: /*
        !             2: ** 2011 Jun 13
        !             3: **
        !             4: ** The author disclaims copyright to this source code.  In place of
        !             5: ** a legal notice, here is a blessing:
        !             6: **
        !             7: **    May you do good and not evil.
        !             8: **    May you find forgiveness for yourself and forgive others.
        !             9: **    May you share freely, never taking more than you give.
        !            10: **
        !            11: ******************************************************************************
        !            12: **
        !            13: ** This file is not part of the production FTS code. It is only used for
        !            14: ** testing. It contains a Tcl command that can be used to test if a document
        !            15: ** matches an FTS NEAR expression.
        !            16: */
        !            17: 
        !            18: #include <tcl.h>
        !            19: #include <string.h>
        !            20: #include <assert.h>
        !            21: 
        !            22: #ifdef SQLITE_TEST
        !            23: 
        !            24: /* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
        !            25: #include "fts3Int.h"
        !            26: 
        !            27: #define NM_MAX_TOKEN 12
        !            28: 
        !            29: typedef struct NearPhrase NearPhrase;
        !            30: typedef struct NearDocument NearDocument;
        !            31: typedef struct NearToken NearToken;
        !            32: 
        !            33: struct NearDocument {
        !            34:   int nToken;                     /* Length of token in bytes */
        !            35:   NearToken *aToken;              /* Token array */
        !            36: };
        !            37: 
        !            38: struct NearToken {
        !            39:   int n;                          /* Length of token in bytes */
        !            40:   const char *z;                  /* Pointer to token string */
        !            41: };
        !            42: 
        !            43: struct NearPhrase {
        !            44:   int nNear;                      /* Preceding NEAR value */
        !            45:   int nToken;                     /* Number of tokens in this phrase */
        !            46:   NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
        !            47: };
        !            48: 
        !            49: static int nm_phrase_match(
        !            50:   NearPhrase *p,
        !            51:   NearToken *aToken
        !            52: ){
        !            53:   int ii;
        !            54: 
        !            55:   for(ii=0; ii<p->nToken; ii++){
        !            56:     NearToken *pToken = &p->aToken[ii];
        !            57:     if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
        !            58:       if( aToken[ii].n<(pToken->n-1) ) return 0;
        !            59:       if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
        !            60:     }else{
        !            61:       if( aToken[ii].n!=pToken->n ) return 0;
        !            62:       if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
        !            63:     }
        !            64:   }
        !            65: 
        !            66:   return 1;
        !            67: }
        !            68: 
        !            69: static int nm_near_chain(
        !            70:   int iDir,                       /* Direction to iterate through aPhrase[] */
        !            71:   NearDocument *pDoc,             /* Document to match against */
        !            72:   int iPos,                       /* Position at which iPhrase was found */
        !            73:   int nPhrase,                    /* Size of phrase array */
        !            74:   NearPhrase *aPhrase,            /* Phrase array */
        !            75:   int iPhrase                     /* Index of phrase found */
        !            76: ){
        !            77:   int iStart;
        !            78:   int iStop;
        !            79:   int ii;
        !            80:   int nNear;
        !            81:   int iPhrase2;
        !            82:   NearPhrase *p;
        !            83:   NearPhrase *pPrev;
        !            84: 
        !            85:   assert( iDir==1 || iDir==-1 );
        !            86: 
        !            87:   if( iDir==1 ){
        !            88:     if( (iPhrase+1)==nPhrase ) return 1;
        !            89:     nNear = aPhrase[iPhrase+1].nNear;
        !            90:   }else{
        !            91:     if( iPhrase==0 ) return 1;
        !            92:     nNear = aPhrase[iPhrase].nNear;
        !            93:   }
        !            94:   pPrev = &aPhrase[iPhrase];
        !            95:   iPhrase2 = iPhrase+iDir;
        !            96:   p = &aPhrase[iPhrase2];
        !            97: 
        !            98:   iStart = iPos - nNear - p->nToken;
        !            99:   iStop = iPos + nNear + pPrev->nToken;
        !           100: 
        !           101:   if( iStart<0 ) iStart = 0;
        !           102:   if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
        !           103: 
        !           104:   for(ii=iStart; ii<=iStop; ii++){
        !           105:     if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
        !           106:       if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
        !           107:     }
        !           108:   }
        !           109: 
        !           110:   return 0;
        !           111: }
        !           112: 
        !           113: static int nm_match_count(
        !           114:   NearDocument *pDoc,             /* Document to match against */
        !           115:   int nPhrase,                    /* Size of phrase array */
        !           116:   NearPhrase *aPhrase,            /* Phrase array */
        !           117:   int iPhrase                     /* Index of phrase to count matches for */
        !           118: ){
        !           119:   int nOcc = 0;
        !           120:   int ii;
        !           121:   NearPhrase *p = &aPhrase[iPhrase];
        !           122: 
        !           123:   for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
        !           124:     if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
        !           125:       /* Test forward NEAR chain (i>iPhrase) */
        !           126:       if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
        !           127: 
        !           128:       /* Test reverse NEAR chain (i<iPhrase) */
        !           129:       if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
        !           130: 
        !           131:       /* This is a real match. Increment the counter. */
        !           132:       nOcc++;
        !           133:     }
        !           134:   } 
        !           135: 
        !           136:   return nOcc;
        !           137: }
        !           138: 
        !           139: /*
        !           140: ** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
        !           141: */
        !           142: static int fts3_near_match_cmd(
        !           143:   ClientData clientData,
        !           144:   Tcl_Interp *interp,
        !           145:   int objc,
        !           146:   Tcl_Obj *CONST objv[]
        !           147: ){
        !           148:   int nTotal = 0;
        !           149:   int rc;
        !           150:   int ii;
        !           151:   int nPhrase;
        !           152:   NearPhrase *aPhrase = 0;
        !           153:   NearDocument doc = {0, 0};
        !           154:   Tcl_Obj **apDocToken;
        !           155:   Tcl_Obj *pRet;
        !           156:   Tcl_Obj *pPhrasecount = 0;
        !           157:   
        !           158:   Tcl_Obj **apExprToken;
        !           159:   int nExprToken;
        !           160: 
        !           161:   /* Must have 3 or more arguments. */
        !           162:   if( objc<3 || (objc%2)==0 ){
        !           163:     Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
        !           164:     rc = TCL_ERROR;
        !           165:     goto near_match_out;
        !           166:   }
        !           167: 
        !           168:   for(ii=3; ii<objc; ii+=2){
        !           169:     enum NM_enum { NM_PHRASECOUNTS };
        !           170:     struct TestnmSubcmd {
        !           171:       char *zName;
        !           172:       enum NM_enum eOpt;
        !           173:     } aOpt[] = {
        !           174:       { "-phrasecountvar", NM_PHRASECOUNTS },
        !           175:       { 0, 0 }
        !           176:     };
        !           177:     int iOpt;
        !           178:     if( Tcl_GetIndexFromObjStruct(
        !           179:         interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt) 
        !           180:     ){
        !           181:       return TCL_ERROR;
        !           182:     }
        !           183: 
        !           184:     switch( aOpt[iOpt].eOpt ){
        !           185:       case NM_PHRASECOUNTS:
        !           186:         pPhrasecount = objv[ii+1];
        !           187:         break;
        !           188:     }
        !           189:   }
        !           190: 
        !           191:   rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
        !           192:   if( rc!=TCL_OK ) goto near_match_out;
        !           193:   doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
        !           194:   for(ii=0; ii<doc.nToken; ii++){
        !           195:     doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
        !           196:   }
        !           197: 
        !           198:   rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
        !           199:   if( rc!=TCL_OK ) goto near_match_out;
        !           200: 
        !           201:   nPhrase = (nExprToken + 1) / 2;
        !           202:   aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
        !           203:   memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
        !           204:   for(ii=0; ii<nPhrase; ii++){
        !           205:     Tcl_Obj *pPhrase = apExprToken[ii*2];
        !           206:     Tcl_Obj **apToken;
        !           207:     int nToken;
        !           208:     int jj;
        !           209: 
        !           210:     rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
        !           211:     if( rc!=TCL_OK ) goto near_match_out;
        !           212:     if( nToken>NM_MAX_TOKEN ){
        !           213:       Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
        !           214:       rc = TCL_ERROR;
        !           215:       goto near_match_out;
        !           216:     }
        !           217:     for(jj=0; jj<nToken; jj++){
        !           218:       NearToken *pT = &aPhrase[ii].aToken[jj];
        !           219:       pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
        !           220:     }
        !           221:     aPhrase[ii].nToken = nToken;
        !           222:   }
        !           223:   for(ii=1; ii<nPhrase; ii++){
        !           224:     Tcl_Obj *pNear = apExprToken[2*ii-1];
        !           225:     int nNear;
        !           226:     rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
        !           227:     if( rc!=TCL_OK ) goto near_match_out;
        !           228:     aPhrase[ii].nNear = nNear;
        !           229:   }
        !           230: 
        !           231:   pRet = Tcl_NewObj();
        !           232:   Tcl_IncrRefCount(pRet);
        !           233:   for(ii=0; ii<nPhrase; ii++){
        !           234:     int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
        !           235:     Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
        !           236:     nTotal += nOcc;
        !           237:   }
        !           238:   if( pPhrasecount ){
        !           239:     Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
        !           240:   }
        !           241:   Tcl_DecrRefCount(pRet);
        !           242:   Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
        !           243: 
        !           244:  near_match_out: 
        !           245:   ckfree((char *)aPhrase);
        !           246:   ckfree((char *)doc.aToken);
        !           247:   return rc;
        !           248: }
        !           249: 
        !           250: /*
        !           251: **   Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
        !           252: **
        !           253: ** Normally, FTS uses hard-coded values to determine the minimum doclist
        !           254: ** size eligible for incremental loading, and the size of the chunks loaded
        !           255: ** when a doclist is incrementally loaded. This command allows the built-in
        !           256: ** values to be overridden for testing purposes.
        !           257: **
        !           258: ** If present, the first argument is the chunksize in bytes to load doclists
        !           259: ** in. The second argument is the minimum doclist size in bytes to use
        !           260: ** incremental loading with.
        !           261: **
        !           262: ** Whether or not the arguments are present, this command returns a list of
        !           263: ** two integers - the initial chunksize and threshold when the command is
        !           264: ** invoked. This can be used to restore the default behaviour after running
        !           265: ** tests. For example:
        !           266: **
        !           267: **    # Override incr-load settings for testing:
        !           268: **    set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
        !           269: **
        !           270: **    .... run tests ....
        !           271: **
        !           272: **    # Restore initial incr-load settings:
        !           273: **    eval fts3_configure_incr_load $cfg
        !           274: */
        !           275: static int fts3_configure_incr_load_cmd(
        !           276:   ClientData clientData,
        !           277:   Tcl_Interp *interp,
        !           278:   int objc,
        !           279:   Tcl_Obj *CONST objv[]
        !           280: ){
        !           281: #ifdef SQLITE_ENABLE_FTS3
        !           282:   extern int test_fts3_node_chunksize;
        !           283:   extern int test_fts3_node_chunk_threshold;
        !           284:   Tcl_Obj *pRet;
        !           285: 
        !           286:   if( objc!=1 && objc!=3 ){
        !           287:     Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
        !           288:     return TCL_ERROR;
        !           289:   }
        !           290: 
        !           291:   pRet = Tcl_NewObj();
        !           292:   Tcl_IncrRefCount(pRet);
        !           293:   Tcl_ListObjAppendElement(
        !           294:       interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
        !           295:   Tcl_ListObjAppendElement(
        !           296:       interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
        !           297: 
        !           298:   if( objc==3 ){
        !           299:     int iArg1;
        !           300:     int iArg2;
        !           301:     if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
        !           302:      || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
        !           303:     ){
        !           304:       Tcl_DecrRefCount(pRet);
        !           305:       return TCL_ERROR;
        !           306:     }
        !           307:     test_fts3_node_chunksize = iArg1;
        !           308:     test_fts3_node_chunk_threshold = iArg2;
        !           309:   }
        !           310: 
        !           311:   Tcl_SetObjResult(interp, pRet);
        !           312:   Tcl_DecrRefCount(pRet);
        !           313: #endif
        !           314:   return TCL_OK;
        !           315: }
        !           316: 
        !           317: int Sqlitetestfts3_Init(Tcl_Interp *interp){
        !           318:   Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
        !           319:   Tcl_CreateObjCommand(interp, 
        !           320:       "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
        !           321:   );
        !           322:   return TCL_OK;
        !           323: }
        !           324: #endif                  /* ifdef SQLITE_TEST */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>