Annotation of embedaddon/sqlite3/ext/fts3/fts3_test.c, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2011 Jun 13
                      3: **
                      4: ** The author disclaims copyright to this source code.  In place of
                      5: ** a legal notice, here is a blessing:
                      6: **
                      7: **    May you do good and not evil.
                      8: **    May you find forgiveness for yourself and forgive others.
                      9: **    May you share freely, never taking more than you give.
                     10: **
                     11: ******************************************************************************
                     12: **
                     13: ** This file is not part of the production FTS code. It is only used for
                     14: ** testing. It contains a Tcl command that can be used to test if a document
                     15: ** matches an FTS NEAR expression.
                     16: */
                     17: 
                     18: #include <tcl.h>
                     19: #include <string.h>
                     20: #include <assert.h>
                     21: 
                     22: #ifdef SQLITE_TEST
                     23: 
                     24: /* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
                     25: #include "fts3Int.h"
                     26: 
                     27: #define NM_MAX_TOKEN 12
                     28: 
                     29: typedef struct NearPhrase NearPhrase;
                     30: typedef struct NearDocument NearDocument;
                     31: typedef struct NearToken NearToken;
                     32: 
                     33: struct NearDocument {
                     34:   int nToken;                     /* Length of token in bytes */
                     35:   NearToken *aToken;              /* Token array */
                     36: };
                     37: 
                     38: struct NearToken {
                     39:   int n;                          /* Length of token in bytes */
                     40:   const char *z;                  /* Pointer to token string */
                     41: };
                     42: 
                     43: struct NearPhrase {
                     44:   int nNear;                      /* Preceding NEAR value */
                     45:   int nToken;                     /* Number of tokens in this phrase */
                     46:   NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
                     47: };
                     48: 
                     49: static int nm_phrase_match(
                     50:   NearPhrase *p,
                     51:   NearToken *aToken
                     52: ){
                     53:   int ii;
                     54: 
                     55:   for(ii=0; ii<p->nToken; ii++){
                     56:     NearToken *pToken = &p->aToken[ii];
                     57:     if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
                     58:       if( aToken[ii].n<(pToken->n-1) ) return 0;
                     59:       if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
                     60:     }else{
                     61:       if( aToken[ii].n!=pToken->n ) return 0;
                     62:       if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
                     63:     }
                     64:   }
                     65: 
                     66:   return 1;
                     67: }
                     68: 
                     69: static int nm_near_chain(
                     70:   int iDir,                       /* Direction to iterate through aPhrase[] */
                     71:   NearDocument *pDoc,             /* Document to match against */
                     72:   int iPos,                       /* Position at which iPhrase was found */
                     73:   int nPhrase,                    /* Size of phrase array */
                     74:   NearPhrase *aPhrase,            /* Phrase array */
                     75:   int iPhrase                     /* Index of phrase found */
                     76: ){
                     77:   int iStart;
                     78:   int iStop;
                     79:   int ii;
                     80:   int nNear;
                     81:   int iPhrase2;
                     82:   NearPhrase *p;
                     83:   NearPhrase *pPrev;
                     84: 
                     85:   assert( iDir==1 || iDir==-1 );
                     86: 
                     87:   if( iDir==1 ){
                     88:     if( (iPhrase+1)==nPhrase ) return 1;
                     89:     nNear = aPhrase[iPhrase+1].nNear;
                     90:   }else{
                     91:     if( iPhrase==0 ) return 1;
                     92:     nNear = aPhrase[iPhrase].nNear;
                     93:   }
                     94:   pPrev = &aPhrase[iPhrase];
                     95:   iPhrase2 = iPhrase+iDir;
                     96:   p = &aPhrase[iPhrase2];
                     97: 
                     98:   iStart = iPos - nNear - p->nToken;
                     99:   iStop = iPos + nNear + pPrev->nToken;
                    100: 
                    101:   if( iStart<0 ) iStart = 0;
                    102:   if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
                    103: 
                    104:   for(ii=iStart; ii<=iStop; ii++){
                    105:     if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
                    106:       if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
                    107:     }
                    108:   }
                    109: 
                    110:   return 0;
                    111: }
                    112: 
                    113: static int nm_match_count(
                    114:   NearDocument *pDoc,             /* Document to match against */
                    115:   int nPhrase,                    /* Size of phrase array */
                    116:   NearPhrase *aPhrase,            /* Phrase array */
                    117:   int iPhrase                     /* Index of phrase to count matches for */
                    118: ){
                    119:   int nOcc = 0;
                    120:   int ii;
                    121:   NearPhrase *p = &aPhrase[iPhrase];
                    122: 
                    123:   for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
                    124:     if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
                    125:       /* Test forward NEAR chain (i>iPhrase) */
                    126:       if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
                    127: 
                    128:       /* Test reverse NEAR chain (i<iPhrase) */
                    129:       if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
                    130: 
                    131:       /* This is a real match. Increment the counter. */
                    132:       nOcc++;
                    133:     }
                    134:   } 
                    135: 
                    136:   return nOcc;
                    137: }
                    138: 
                    139: /*
                    140: ** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
                    141: */
                    142: static int fts3_near_match_cmd(
                    143:   ClientData clientData,
                    144:   Tcl_Interp *interp,
                    145:   int objc,
                    146:   Tcl_Obj *CONST objv[]
                    147: ){
                    148:   int nTotal = 0;
                    149:   int rc;
                    150:   int ii;
                    151:   int nPhrase;
                    152:   NearPhrase *aPhrase = 0;
                    153:   NearDocument doc = {0, 0};
                    154:   Tcl_Obj **apDocToken;
                    155:   Tcl_Obj *pRet;
                    156:   Tcl_Obj *pPhrasecount = 0;
                    157:   
                    158:   Tcl_Obj **apExprToken;
                    159:   int nExprToken;
                    160: 
                    161:   /* Must have 3 or more arguments. */
                    162:   if( objc<3 || (objc%2)==0 ){
                    163:     Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
                    164:     rc = TCL_ERROR;
                    165:     goto near_match_out;
                    166:   }
                    167: 
                    168:   for(ii=3; ii<objc; ii+=2){
                    169:     enum NM_enum { NM_PHRASECOUNTS };
                    170:     struct TestnmSubcmd {
                    171:       char *zName;
                    172:       enum NM_enum eOpt;
                    173:     } aOpt[] = {
                    174:       { "-phrasecountvar", NM_PHRASECOUNTS },
                    175:       { 0, 0 }
                    176:     };
                    177:     int iOpt;
                    178:     if( Tcl_GetIndexFromObjStruct(
                    179:         interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt) 
                    180:     ){
                    181:       return TCL_ERROR;
                    182:     }
                    183: 
                    184:     switch( aOpt[iOpt].eOpt ){
                    185:       case NM_PHRASECOUNTS:
                    186:         pPhrasecount = objv[ii+1];
                    187:         break;
                    188:     }
                    189:   }
                    190: 
                    191:   rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
                    192:   if( rc!=TCL_OK ) goto near_match_out;
                    193:   doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
                    194:   for(ii=0; ii<doc.nToken; ii++){
                    195:     doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
                    196:   }
                    197: 
                    198:   rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
                    199:   if( rc!=TCL_OK ) goto near_match_out;
                    200: 
                    201:   nPhrase = (nExprToken + 1) / 2;
                    202:   aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
                    203:   memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
                    204:   for(ii=0; ii<nPhrase; ii++){
                    205:     Tcl_Obj *pPhrase = apExprToken[ii*2];
                    206:     Tcl_Obj **apToken;
                    207:     int nToken;
                    208:     int jj;
                    209: 
                    210:     rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
                    211:     if( rc!=TCL_OK ) goto near_match_out;
                    212:     if( nToken>NM_MAX_TOKEN ){
                    213:       Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
                    214:       rc = TCL_ERROR;
                    215:       goto near_match_out;
                    216:     }
                    217:     for(jj=0; jj<nToken; jj++){
                    218:       NearToken *pT = &aPhrase[ii].aToken[jj];
                    219:       pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
                    220:     }
                    221:     aPhrase[ii].nToken = nToken;
                    222:   }
                    223:   for(ii=1; ii<nPhrase; ii++){
                    224:     Tcl_Obj *pNear = apExprToken[2*ii-1];
                    225:     int nNear;
                    226:     rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
                    227:     if( rc!=TCL_OK ) goto near_match_out;
                    228:     aPhrase[ii].nNear = nNear;
                    229:   }
                    230: 
                    231:   pRet = Tcl_NewObj();
                    232:   Tcl_IncrRefCount(pRet);
                    233:   for(ii=0; ii<nPhrase; ii++){
                    234:     int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
                    235:     Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
                    236:     nTotal += nOcc;
                    237:   }
                    238:   if( pPhrasecount ){
                    239:     Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
                    240:   }
                    241:   Tcl_DecrRefCount(pRet);
                    242:   Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
                    243: 
                    244:  near_match_out: 
                    245:   ckfree((char *)aPhrase);
                    246:   ckfree((char *)doc.aToken);
                    247:   return rc;
                    248: }
                    249: 
                    250: /*
                    251: **   Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
                    252: **
                    253: ** Normally, FTS uses hard-coded values to determine the minimum doclist
                    254: ** size eligible for incremental loading, and the size of the chunks loaded
                    255: ** when a doclist is incrementally loaded. This command allows the built-in
                    256: ** values to be overridden for testing purposes.
                    257: **
                    258: ** If present, the first argument is the chunksize in bytes to load doclists
                    259: ** in. The second argument is the minimum doclist size in bytes to use
                    260: ** incremental loading with.
                    261: **
                    262: ** Whether or not the arguments are present, this command returns a list of
                    263: ** two integers - the initial chunksize and threshold when the command is
                    264: ** invoked. This can be used to restore the default behaviour after running
                    265: ** tests. For example:
                    266: **
                    267: **    # Override incr-load settings for testing:
                    268: **    set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
                    269: **
                    270: **    .... run tests ....
                    271: **
                    272: **    # Restore initial incr-load settings:
                    273: **    eval fts3_configure_incr_load $cfg
                    274: */
                    275: static int fts3_configure_incr_load_cmd(
                    276:   ClientData clientData,
                    277:   Tcl_Interp *interp,
                    278:   int objc,
                    279:   Tcl_Obj *CONST objv[]
                    280: ){
                    281: #ifdef SQLITE_ENABLE_FTS3
                    282:   extern int test_fts3_node_chunksize;
                    283:   extern int test_fts3_node_chunk_threshold;
                    284:   Tcl_Obj *pRet;
                    285: 
                    286:   if( objc!=1 && objc!=3 ){
                    287:     Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
                    288:     return TCL_ERROR;
                    289:   }
                    290: 
                    291:   pRet = Tcl_NewObj();
                    292:   Tcl_IncrRefCount(pRet);
                    293:   Tcl_ListObjAppendElement(
                    294:       interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
                    295:   Tcl_ListObjAppendElement(
                    296:       interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
                    297: 
                    298:   if( objc==3 ){
                    299:     int iArg1;
                    300:     int iArg2;
                    301:     if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
                    302:      || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
                    303:     ){
                    304:       Tcl_DecrRefCount(pRet);
                    305:       return TCL_ERROR;
                    306:     }
                    307:     test_fts3_node_chunksize = iArg1;
                    308:     test_fts3_node_chunk_threshold = iArg2;
                    309:   }
                    310: 
                    311:   Tcl_SetObjResult(interp, pRet);
                    312:   Tcl_DecrRefCount(pRet);
                    313: #endif
                    314:   return TCL_OK;
                    315: }
                    316: 
                    317: int Sqlitetestfts3_Init(Tcl_Interp *interp){
                    318:   Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
                    319:   Tcl_CreateObjCommand(interp, 
                    320:       "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
                    321:   );
                    322:   return TCL_OK;
                    323: }
                    324: #endif                  /* ifdef SQLITE_TEST */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>