Annotation of embedaddon/sqlite3/src/test_fuzzer.c, revision 1.1

1.1     ! misho       1: /*
        !             2: ** 2011 March 24
        !             3: **
        !             4: ** The author disclaims copyright to this source code.  In place of
        !             5: ** a legal notice, here is a blessing:
        !             6: **
        !             7: **    May you do good and not evil.
        !             8: **    May you find forgiveness for yourself and forgive others.
        !             9: **    May you share freely, never taking more than you give.
        !            10: **
        !            11: *************************************************************************
        !            12: **
        !            13: ** Code for demonstartion virtual table that generates variations
        !            14: ** on an input word at increasing edit distances from the original.
        !            15: **
        !            16: ** A fuzzer virtual table is created like this:
        !            17: **
        !            18: **     CREATE VIRTUAL TABLE temp.f USING fuzzer;
        !            19: **
        !            20: ** The name of the new virtual table in the example above is "f".
        !            21: ** Note that all fuzzer virtual tables must be TEMP tables.  The
        !            22: ** "temp." prefix in front of the table name is required when the
        !            23: ** table is being created.  The "temp." prefix can be omitted when
        !            24: ** using the table as long as the name is unambiguous.
        !            25: **
        !            26: ** Before being used, the fuzzer needs to be programmed by giving it
        !            27: ** character transformations and a cost associated with each transformation.
        !            28: ** Examples:
        !            29: **
        !            30: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100);
        !            31: **
        !            32: ** The above statement says that the cost of inserting a letter 'a' is
        !            33: ** 100.  (All costs are integers.  We recommend that costs be scaled so
        !            34: ** that the average cost is around 100.)
        !            35: **
        !            36: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87);
        !            37: **
        !            38: ** The above statement says that the cost of deleting a single letter
        !            39: ** 'b' is 87.
        !            40: **
        !            41: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38);
        !            42: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40);
        !            43: **
        !            44: ** This third example says that the cost of transforming the single
        !            45: ** letter "o" into the two-letter sequence "oe" is 38 and that the
        !            46: ** cost of transforming "oe" back into "o" is 40.
        !            47: **
        !            48: ** After all the transformation costs have been set, the fuzzer table
        !            49: ** can be queried as follows:
        !            50: **
        !            51: **    SELECT word, distance FROM f
        !            52: **     WHERE word MATCH 'abcdefg'
        !            53: **       AND distance<200;
        !            54: **
        !            55: ** This first query outputs the string "abcdefg" and all strings that
        !            56: ** can be derived from that string by appling the specified transformations.
        !            57: ** The strings are output together with their total transformation cost
        !            58: ** (called "distance") and appear in order of increasing cost.  No string
        !            59: ** is output more than once.  If there are multiple ways to transform the
        !            60: ** target string into the output string then the lowest cost transform is
        !            61: ** the one that is returned.  In the example, the search is limited to 
        !            62: ** strings with a total distance of less than 200.
        !            63: **
        !            64: ** It is important to put some kind of a limit on the fuzzer output.  This
        !            65: ** can be either in the form of a LIMIT clause at the end of the query,
        !            66: ** or better, a "distance<NNN" constraint where NNN is some number.  The
        !            67: ** running time and memory requirement is exponential in the value of NNN 
        !            68: ** so you want to make sure that NNN is not too big.  A value of NNN that
        !            69: ** is about twice the average transformation cost seems to give good results.
        !            70: **
        !            71: ** The fuzzer table can be useful for tasks such as spelling correction.
        !            72: ** Suppose there is a second table vocabulary(w) where the w column contains
        !            73: ** all correctly spelled words.   Let $word be a word you want to look up.
        !            74: **
        !            75: **   SELECT vocabulary.w FROM f, vocabulary
        !            76: **    WHERE f.word MATCH $word
        !            77: **      AND f.distance<=200
        !            78: **      AND f.word=vocabulary.w
        !            79: **    LIMIT 20
        !            80: **
        !            81: ** The query above gives the 20 closest words to the $word being tested.
        !            82: ** (Note that for good performance, the vocubulary.w column should be
        !            83: ** indexed.)
        !            84: **
        !            85: ** A similar query can be used to find all words in the dictionary that
        !            86: ** begin with some prefix $prefix:
        !            87: **
        !            88: **   SELECT vocabulary.w FROM f, vocabulary
        !            89: **    WHERE f.word MATCH $prefix
        !            90: **      AND f.distance<=200
        !            91: **      AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
        !            92: **    LIMIT 50
        !            93: **
        !            94: ** This last query will show up to 50 words out of the vocabulary that
        !            95: ** match or nearly match the $prefix.
        !            96: */
        !            97: #include "sqlite3.h"
        !            98: #include <stdlib.h>
        !            99: #include <string.h>
        !           100: #include <assert.h>
        !           101: #include <stdio.h>
        !           102: 
        !           103: #ifndef SQLITE_OMIT_VIRTUALTABLE
        !           104: 
        !           105: /*
        !           106: ** Forward declaration of objects used by this implementation
        !           107: */
        !           108: typedef struct fuzzer_vtab fuzzer_vtab;
        !           109: typedef struct fuzzer_cursor fuzzer_cursor;
        !           110: typedef struct fuzzer_rule fuzzer_rule;
        !           111: typedef struct fuzzer_seen fuzzer_seen;
        !           112: typedef struct fuzzer_stem fuzzer_stem;
        !           113: 
        !           114: /*
        !           115: ** Type of the "cost" of an edit operation.  Might be changed to
        !           116: ** "float" or "double" or "sqlite3_int64" in the future.
        !           117: */
        !           118: typedef int fuzzer_cost;
        !           119: 
        !           120: 
        !           121: /*
        !           122: ** Each transformation rule is stored as an instance of this object.
        !           123: ** All rules are kept on a linked list sorted by rCost.
        !           124: */
        !           125: struct fuzzer_rule {
        !           126:   fuzzer_rule *pNext;        /* Next rule in order of increasing rCost */
        !           127:   fuzzer_cost rCost;         /* Cost of this transformation */
        !           128:   int nFrom, nTo;            /* Length of the zFrom and zTo strings */
        !           129:   char *zFrom;               /* Transform from */
        !           130:   char zTo[4];               /* Transform to (extra space appended) */
        !           131: };
        !           132: 
        !           133: /*
        !           134: ** A stem object is used to generate variants.  It is also used to record
        !           135: ** previously generated outputs.
        !           136: **
        !           137: ** Every stem is added to a hash table as it is output.  Generation of
        !           138: ** duplicate stems is suppressed.
        !           139: **
        !           140: ** Active stems (those that might generate new outputs) are kepts on a linked
        !           141: ** list sorted by increasing cost.  The cost is the sum of rBaseCost and
        !           142: ** pRule->rCost.
        !           143: */
        !           144: struct fuzzer_stem {
        !           145:   char *zBasis;              /* Word being fuzzed */
        !           146:   int nBasis;                /* Length of the zBasis string */
        !           147:   const fuzzer_rule *pRule;  /* Current rule to apply */
        !           148:   int n;                     /* Apply pRule at this character offset */
        !           149:   fuzzer_cost rBaseCost;     /* Base cost of getting to zBasis */
        !           150:   fuzzer_cost rCostX;        /* Precomputed rBaseCost + pRule->rCost */
        !           151:   fuzzer_stem *pNext;        /* Next stem in rCost order */
        !           152:   fuzzer_stem *pHash;        /* Next stem with same hash on zBasis */
        !           153: };
        !           154: 
        !           155: /* 
        !           156: ** A fuzzer virtual-table object 
        !           157: */
        !           158: struct fuzzer_vtab {
        !           159:   sqlite3_vtab base;         /* Base class - must be first */
        !           160:   char *zClassName;          /* Name of this class.  Default: "fuzzer" */
        !           161:   fuzzer_rule *pRule;        /* All active rules in this fuzzer */
        !           162:   fuzzer_rule *pNewRule;     /* New rules to add when last cursor expires */
        !           163:   int nCursor;               /* Number of active cursors */
        !           164: };
        !           165: 
        !           166: #define FUZZER_HASH  4001    /* Hash table size */
        !           167: #define FUZZER_NQUEUE  20    /* Number of slots on the stem queue */
        !           168: 
        !           169: /* A fuzzer cursor object */
        !           170: struct fuzzer_cursor {
        !           171:   sqlite3_vtab_cursor base;  /* Base class - must be first */
        !           172:   sqlite3_int64 iRowid;      /* The rowid of the current word */
        !           173:   fuzzer_vtab *pVtab;        /* The virtual table this cursor belongs to */
        !           174:   fuzzer_cost rLimit;        /* Maximum cost of any term */
        !           175:   fuzzer_stem *pStem;        /* Stem with smallest rCostX */
        !           176:   fuzzer_stem *pDone;        /* Stems already processed to completion */
        !           177:   fuzzer_stem *aQueue[FUZZER_NQUEUE];  /* Queue of stems with higher rCostX */
        !           178:   int mxQueue;               /* Largest used index in aQueue[] */
        !           179:   char *zBuf;                /* Temporary use buffer */
        !           180:   int nBuf;                  /* Bytes allocated for zBuf */
        !           181:   int nStem;                 /* Number of stems allocated */
        !           182:   fuzzer_rule nullRule;      /* Null rule used first */
        !           183:   fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
        !           184: };
        !           185: 
        !           186: /* Methods for the fuzzer module */
        !           187: static int fuzzerConnect(
        !           188:   sqlite3 *db,
        !           189:   void *pAux,
        !           190:   int argc, const char *const*argv,
        !           191:   sqlite3_vtab **ppVtab,
        !           192:   char **pzErr
        !           193: ){
        !           194:   fuzzer_vtab *pNew;
        !           195:   int n;
        !           196:   if( strcmp(argv[1],"temp")!=0 ){
        !           197:     *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]);
        !           198:     return SQLITE_ERROR;
        !           199:   }
        !           200:   n = strlen(argv[0]) + 1;
        !           201:   pNew = sqlite3_malloc( sizeof(*pNew) + n );
        !           202:   if( pNew==0 ) return SQLITE_NOMEM;
        !           203:   pNew->zClassName = (char*)&pNew[1];
        !           204:   memcpy(pNew->zClassName, argv[0], n);
        !           205:   sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
        !           206:   memset(pNew, 0, sizeof(*pNew));
        !           207:   *ppVtab = &pNew->base;
        !           208:   return SQLITE_OK;
        !           209: }
        !           210: /* Note that for this virtual table, the xCreate and xConnect
        !           211: ** methods are identical. */
        !           212: 
        !           213: static int fuzzerDisconnect(sqlite3_vtab *pVtab){
        !           214:   fuzzer_vtab *p = (fuzzer_vtab*)pVtab;
        !           215:   assert( p->nCursor==0 );
        !           216:   do{
        !           217:     while( p->pRule ){
        !           218:       fuzzer_rule *pRule = p->pRule;
        !           219:       p->pRule = pRule->pNext;
        !           220:       sqlite3_free(pRule);
        !           221:     }
        !           222:     p->pRule = p->pNewRule;
        !           223:     p->pNewRule = 0;
        !           224:   }while( p->pRule );
        !           225:   sqlite3_free(p);
        !           226:   return SQLITE_OK;
        !           227: }
        !           228: /* The xDisconnect and xDestroy methods are also the same */
        !           229: 
        !           230: /*
        !           231: ** The two input rule lists are both sorted in order of increasing
        !           232: ** cost.  Merge them together into a single list, sorted by cost, and
        !           233: ** return a pointer to the head of that list.
        !           234: */
        !           235: static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){
        !           236:   fuzzer_rule head;
        !           237:   fuzzer_rule *pTail;
        !           238: 
        !           239:   pTail =  &head;
        !           240:   while( pA && pB ){
        !           241:     if( pA->rCost<=pB->rCost ){
        !           242:       pTail->pNext = pA;
        !           243:       pTail = pA;
        !           244:       pA = pA->pNext;
        !           245:     }else{
        !           246:       pTail->pNext = pB;
        !           247:       pTail = pB;
        !           248:       pB = pB->pNext;
        !           249:     }
        !           250:   }
        !           251:   if( pA==0 ){
        !           252:     pTail->pNext = pB;
        !           253:   }else{
        !           254:     pTail->pNext = pA;
        !           255:   }
        !           256:   return head.pNext;
        !           257: }
        !           258: 
        !           259: 
        !           260: /*
        !           261: ** Open a new fuzzer cursor.
        !           262: */
        !           263: static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
        !           264:   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
        !           265:   fuzzer_cursor *pCur;
        !           266:   pCur = sqlite3_malloc( sizeof(*pCur) );
        !           267:   if( pCur==0 ) return SQLITE_NOMEM;
        !           268:   memset(pCur, 0, sizeof(*pCur));
        !           269:   pCur->pVtab = p;
        !           270:   *ppCursor = &pCur->base;
        !           271:   if( p->nCursor==0 && p->pNewRule ){
        !           272:     unsigned int i;
        !           273:     fuzzer_rule *pX;
        !           274:     fuzzer_rule *a[15];
        !           275:     for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0;
        !           276:     while( (pX = p->pNewRule)!=0 ){
        !           277:       p->pNewRule = pX->pNext;
        !           278:       pX->pNext = 0;
        !           279:       for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){
        !           280:         pX = fuzzerMergeRules(a[i], pX);
        !           281:         a[i] = 0;
        !           282:       }
        !           283:       a[i] = fuzzerMergeRules(a[i], pX);
        !           284:     }
        !           285:     for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){
        !           286:       pX = fuzzerMergeRules(a[i], pX);
        !           287:     }
        !           288:     p->pRule = fuzzerMergeRules(p->pRule, pX);
        !           289:   }
        !           290:   p->nCursor++;
        !           291:   return SQLITE_OK;
        !           292: }
        !           293: 
        !           294: /*
        !           295: ** Free all stems in a list.
        !           296: */
        !           297: static void fuzzerClearStemList(fuzzer_stem *pStem){
        !           298:   while( pStem ){
        !           299:     fuzzer_stem *pNext = pStem->pNext;
        !           300:     sqlite3_free(pStem);
        !           301:     pStem = pNext;
        !           302:   }
        !           303: }
        !           304: 
        !           305: /*
        !           306: ** Free up all the memory allocated by a cursor.  Set it rLimit to 0
        !           307: ** to indicate that it is at EOF.
        !           308: */
        !           309: static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
        !           310:   int i;
        !           311:   fuzzerClearStemList(pCur->pStem);
        !           312:   fuzzerClearStemList(pCur->pDone);
        !           313:   for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
        !           314:   pCur->rLimit = (fuzzer_cost)0;
        !           315:   if( clearHash && pCur->nStem ){
        !           316:     pCur->mxQueue = 0;
        !           317:     pCur->pStem = 0;
        !           318:     pCur->pDone = 0;
        !           319:     memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
        !           320:     memset(pCur->apHash, 0, sizeof(pCur->apHash));
        !           321:   }
        !           322:   pCur->nStem = 0;
        !           323: }
        !           324: 
        !           325: /*
        !           326: ** Close a fuzzer cursor.
        !           327: */
        !           328: static int fuzzerClose(sqlite3_vtab_cursor *cur){
        !           329:   fuzzer_cursor *pCur = (fuzzer_cursor *)cur;
        !           330:   fuzzerClearCursor(pCur, 0);
        !           331:   sqlite3_free(pCur->zBuf);
        !           332:   pCur->pVtab->nCursor--;
        !           333:   sqlite3_free(pCur);
        !           334:   return SQLITE_OK;
        !           335: }
        !           336: 
        !           337: /*
        !           338: ** Compute the current output term for a fuzzer_stem.
        !           339: */
        !           340: static int fuzzerRender(
        !           341:   fuzzer_stem *pStem,   /* The stem to be rendered */
        !           342:   char **pzBuf,         /* Write results into this buffer.  realloc if needed */
        !           343:   int *pnBuf            /* Size of the buffer */
        !           344: ){
        !           345:   const fuzzer_rule *pRule = pStem->pRule;
        !           346:   int n;
        !           347:   char *z;
        !           348: 
        !           349:   n = pStem->nBasis + pRule->nTo - pRule->nFrom;
        !           350:   if( (*pnBuf)<n+1 ){
        !           351:     (*pzBuf) = sqlite3_realloc((*pzBuf), n+100);
        !           352:     if( (*pzBuf)==0 ) return SQLITE_NOMEM;
        !           353:     (*pnBuf) = n+100;
        !           354:   }
        !           355:   n = pStem->n;
        !           356:   z = *pzBuf;
        !           357:   if( n<0 ){
        !           358:     memcpy(z, pStem->zBasis, pStem->nBasis+1);
        !           359:   }else{
        !           360:     memcpy(z, pStem->zBasis, n);
        !           361:     memcpy(&z[n], pRule->zTo, pRule->nTo);
        !           362:     memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom], 
        !           363:            pStem->nBasis-n-pRule->nFrom+1);
        !           364:   }
        !           365:   return SQLITE_OK;
        !           366: }
        !           367: 
        !           368: /*
        !           369: ** Compute a hash on zBasis.
        !           370: */
        !           371: static unsigned int fuzzerHash(const char *z){
        !           372:   unsigned int h = 0;
        !           373:   while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); }
        !           374:   return h % FUZZER_HASH;
        !           375: }
        !           376: 
        !           377: /*
        !           378: ** Current cost of a stem
        !           379: */
        !           380: static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
        !           381:   return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
        !           382: }
        !           383: 
        !           384: #if 0
        !           385: /*
        !           386: ** Print a description of a fuzzer_stem on stderr.
        !           387: */
        !           388: static void fuzzerStemPrint(
        !           389:   const char *zPrefix,
        !           390:   fuzzer_stem *pStem,
        !           391:   const char *zSuffix
        !           392: ){
        !           393:   if( pStem->n<0 ){
        !           394:     fprintf(stderr, "%s[%s](%d)-->self%s",
        !           395:        zPrefix,
        !           396:        pStem->zBasis, pStem->rBaseCost,
        !           397:        zSuffix
        !           398:     );
        !           399:   }else{
        !           400:     char *zBuf = 0;
        !           401:     int nBuf = 0;
        !           402:     if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
        !           403:     fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
        !           404:       zPrefix,
        !           405:       pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
        !           406:       zSuffix
        !           407:     );
        !           408:     sqlite3_free(zBuf);
        !           409:   }
        !           410: }
        !           411: #endif
        !           412: 
        !           413: /*
        !           414: ** Return 1 if the string to which the cursor is point has already
        !           415: ** been emitted.  Return 0 if not.  Return -1 on a memory allocation
        !           416: ** failures.
        !           417: */
        !           418: static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
        !           419:   unsigned int h;
        !           420:   fuzzer_stem *pLookup;
        !           421: 
        !           422:   if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
        !           423:     return -1;
        !           424:   }
        !           425:   h = fuzzerHash(pCur->zBuf);
        !           426:   pLookup = pCur->apHash[h];
        !           427:     while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
        !           428:     pLookup = pLookup->pHash;
        !           429:   }
        !           430:   return pLookup!=0;
        !           431: }
        !           432: 
        !           433: /*
        !           434: ** Advance a fuzzer_stem to its next value.   Return 0 if there are
        !           435: ** no more values that can be generated by this fuzzer_stem.  Return
        !           436: ** -1 on a memory allocation failure.
        !           437: */
        !           438: static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
        !           439:   const fuzzer_rule *pRule;
        !           440:   while( (pRule = pStem->pRule)!=0 ){
        !           441:     while( pStem->n < pStem->nBasis - pRule->nFrom ){
        !           442:       pStem->n++;
        !           443:       if( pRule->nFrom==0
        !           444:        || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0
        !           445:       ){
        !           446:         /* Found a rewrite case.  Make sure it is not a duplicate */
        !           447:         int rc = fuzzerSeen(pCur, pStem);
        !           448:         if( rc<0 ) return -1;
        !           449:         if( rc==0 ){
        !           450:           fuzzerCost(pStem);
        !           451:           return 1;
        !           452:         }
        !           453:       }
        !           454:     }
        !           455:     pStem->n = -1;
        !           456:     pStem->pRule = pRule->pNext;
        !           457:     if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
        !           458:   }
        !           459:   return 0;
        !           460: }
        !           461: 
        !           462: /*
        !           463: ** The two input stem lists are both sorted in order of increasing
        !           464: ** rCostX.  Merge them together into a single list, sorted by rCostX, and
        !           465: ** return a pointer to the head of that new list.
        !           466: */
        !           467: static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
        !           468:   fuzzer_stem head;
        !           469:   fuzzer_stem *pTail;
        !           470: 
        !           471:   pTail =  &head;
        !           472:   while( pA && pB ){
        !           473:     if( pA->rCostX<=pB->rCostX ){
        !           474:       pTail->pNext = pA;
        !           475:       pTail = pA;
        !           476:       pA = pA->pNext;
        !           477:     }else{
        !           478:       pTail->pNext = pB;
        !           479:       pTail = pB;
        !           480:       pB = pB->pNext;
        !           481:     }
        !           482:   }
        !           483:   if( pA==0 ){
        !           484:     pTail->pNext = pB;
        !           485:   }else{
        !           486:     pTail->pNext = pA;
        !           487:   }
        !           488:   return head.pNext;
        !           489: }
        !           490: 
        !           491: /*
        !           492: ** Load pCur->pStem with the lowest-cost stem.  Return a pointer
        !           493: ** to the lowest-cost stem.
        !           494: */
        !           495: static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
        !           496:   fuzzer_stem *pBest, *pX;
        !           497:   int iBest;
        !           498:   int i;
        !           499: 
        !           500:   if( pCur->pStem==0 ){
        !           501:     iBest = -1;
        !           502:     pBest = 0;
        !           503:     for(i=0; i<=pCur->mxQueue; i++){
        !           504:       pX = pCur->aQueue[i];
        !           505:       if( pX==0 ) continue;
        !           506:       if( pBest==0 || pBest->rCostX>pX->rCostX ){
        !           507:         pBest = pX;
        !           508:         iBest = i;
        !           509:       }
        !           510:     } 
        !           511:     if( pBest ){
        !           512:       pCur->aQueue[iBest] = pBest->pNext;
        !           513:       pBest->pNext = 0;
        !           514:       pCur->pStem = pBest;
        !           515:     }
        !           516:   }
        !           517:   return pCur->pStem;
        !           518: }
        !           519: 
        !           520: /*
        !           521: ** Insert pNew into queue of pending stems.  Then find the stem
        !           522: ** with the lowest rCostX and move it into pCur->pStem.
        !           523: ** list.  The insert is done such the pNew is in the correct order
        !           524: ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
        !           525: */
        !           526: static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
        !           527:   fuzzer_stem *pX;
        !           528:   int i;
        !           529: 
        !           530:   /* If pCur->pStem exists and is greater than pNew, then make pNew
        !           531:   ** the new pCur->pStem and insert the old pCur->pStem instead.
        !           532:   */
        !           533:   if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
        !           534:     pNew->pNext = 0;
        !           535:     pCur->pStem = pNew;
        !           536:     pNew = pX;
        !           537:   }
        !           538: 
        !           539:   /* Insert the new value */
        !           540:   pNew->pNext = 0;
        !           541:   pX = pNew;
        !           542:   for(i=0; i<=pCur->mxQueue; i++){
        !           543:     if( pCur->aQueue[i] ){
        !           544:       pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
        !           545:       pCur->aQueue[i] = 0;
        !           546:     }else{
        !           547:       pCur->aQueue[i] = pX;
        !           548:       break;
        !           549:     }
        !           550:   }
        !           551:   if( i>pCur->mxQueue ){
        !           552:     if( i<FUZZER_NQUEUE ){
        !           553:       pCur->mxQueue = i;
        !           554:       pCur->aQueue[i] = pX;
        !           555:     }else{
        !           556:       assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
        !           557:       pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
        !           558:       pCur->aQueue[FUZZER_NQUEUE-1] = pX;
        !           559:     }
        !           560:   }
        !           561: 
        !           562:   return fuzzerLowestCostStem(pCur);
        !           563: }
        !           564: 
        !           565: /*
        !           566: ** Allocate a new fuzzer_stem.  Add it to the hash table but do not
        !           567: ** link it into either the pCur->pStem or pCur->pDone lists.
        !           568: */
        !           569: static fuzzer_stem *fuzzerNewStem(
        !           570:   fuzzer_cursor *pCur,
        !           571:   const char *zWord,
        !           572:   fuzzer_cost rBaseCost
        !           573: ){
        !           574:   fuzzer_stem *pNew;
        !           575:   unsigned int h;
        !           576: 
        !           577:   pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 );
        !           578:   if( pNew==0 ) return 0;
        !           579:   memset(pNew, 0, sizeof(*pNew));
        !           580:   pNew->zBasis = (char*)&pNew[1];
        !           581:   pNew->nBasis = strlen(zWord);
        !           582:   memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
        !           583:   pNew->pRule = pCur->pVtab->pRule;
        !           584:   pNew->n = -1;
        !           585:   pNew->rBaseCost = pNew->rCostX = rBaseCost;
        !           586:   h = fuzzerHash(pNew->zBasis);
        !           587:   pNew->pHash = pCur->apHash[h];
        !           588:   pCur->apHash[h] = pNew;
        !           589:   pCur->nStem++;
        !           590:   return pNew;
        !           591: }
        !           592: 
        !           593: 
        !           594: /*
        !           595: ** Advance a cursor to its next row of output
        !           596: */
        !           597: static int fuzzerNext(sqlite3_vtab_cursor *cur){
        !           598:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
        !           599:   int rc;
        !           600:   fuzzer_stem *pStem, *pNew;
        !           601: 
        !           602:   pCur->iRowid++;
        !           603: 
        !           604:   /* Use the element the cursor is currently point to to create
        !           605:   ** a new stem and insert the new stem into the priority queue.
        !           606:   */
        !           607:   pStem = pCur->pStem;
        !           608:   if( pStem->rCostX>0 ){
        !           609:     rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
        !           610:     if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
        !           611:     pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
        !           612:     if( pNew ){
        !           613:       if( fuzzerAdvance(pCur, pNew)==0 ){
        !           614:         pNew->pNext = pCur->pDone;
        !           615:         pCur->pDone = pNew;
        !           616:       }else{
        !           617:         if( fuzzerInsert(pCur, pNew)==pNew ){
        !           618:           return SQLITE_OK;
        !           619:         }
        !           620:       }
        !           621:     }else{
        !           622:       return SQLITE_NOMEM;
        !           623:     }
        !           624:   }
        !           625: 
        !           626:   /* Adjust the priority queue so that the first element of the
        !           627:   ** stem list is the next lowest cost word.
        !           628:   */
        !           629:   while( (pStem = pCur->pStem)!=0 ){
        !           630:     if( fuzzerAdvance(pCur, pStem) ){
        !           631:       pCur->pStem = 0;
        !           632:       pStem = fuzzerInsert(pCur, pStem);
        !           633:       if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
        !           634:         if( rc<0 ) return SQLITE_NOMEM;
        !           635:         continue;
        !           636:       }
        !           637:       return SQLITE_OK;  /* New word found */
        !           638:     }
        !           639:     pCur->pStem = 0;
        !           640:     pStem->pNext = pCur->pDone;
        !           641:     pCur->pDone = pStem;
        !           642:     if( fuzzerLowestCostStem(pCur) ){
        !           643:       rc = fuzzerSeen(pCur, pCur->pStem);
        !           644:       if( rc<0 ) return SQLITE_NOMEM;
        !           645:       if( rc==0 ){
        !           646:         return SQLITE_OK;
        !           647:       }
        !           648:     }
        !           649:   }
        !           650: 
        !           651:   /* Reach this point only if queue has been exhausted and there is
        !           652:   ** nothing left to be output. */
        !           653:   pCur->rLimit = (fuzzer_cost)0;
        !           654:   return SQLITE_OK;
        !           655: }
        !           656: 
        !           657: /*
        !           658: ** Called to "rewind" a cursor back to the beginning so that
        !           659: ** it starts its output over again.  Always called at least once
        !           660: ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
        !           661: */
        !           662: static int fuzzerFilter(
        !           663:   sqlite3_vtab_cursor *pVtabCursor, 
        !           664:   int idxNum, const char *idxStr,
        !           665:   int argc, sqlite3_value **argv
        !           666: ){
        !           667:   fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
        !           668:   const char *zWord = 0;
        !           669:   fuzzer_stem *pStem;
        !           670: 
        !           671:   fuzzerClearCursor(pCur, 1);
        !           672:   pCur->rLimit = 2147483647;
        !           673:   if( idxNum==1 ){
        !           674:     zWord = (const char*)sqlite3_value_text(argv[0]);
        !           675:   }else if( idxNum==2 ){
        !           676:     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
        !           677:   }else if( idxNum==3 ){
        !           678:     zWord = (const char*)sqlite3_value_text(argv[0]);
        !           679:     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
        !           680:   }
        !           681:   if( zWord==0 ) zWord = "";
        !           682:   pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
        !           683:   if( pStem==0 ) return SQLITE_NOMEM;
        !           684:   pCur->nullRule.pNext = pCur->pVtab->pRule;
        !           685:   pCur->nullRule.rCost = 0;
        !           686:   pCur->nullRule.nFrom = 0;
        !           687:   pCur->nullRule.nTo = 0;
        !           688:   pCur->nullRule.zFrom = "";
        !           689:   pStem->pRule = &pCur->nullRule;
        !           690:   pStem->n = pStem->nBasis;
        !           691:   pCur->iRowid = 1;
        !           692:   return SQLITE_OK;
        !           693: }
        !           694: 
        !           695: /*
        !           696: ** Only the word and distance columns have values.  All other columns
        !           697: ** return NULL
        !           698: */
        !           699: static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
        !           700:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
        !           701:   if( i==0 ){
        !           702:     /* the "word" column */
        !           703:     if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
        !           704:       return SQLITE_NOMEM;
        !           705:     }
        !           706:     sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
        !           707:   }else if( i==1 ){
        !           708:     /* the "distance" column */
        !           709:     sqlite3_result_int(ctx, pCur->pStem->rCostX);
        !           710:   }else{
        !           711:     /* All other columns are NULL */
        !           712:     sqlite3_result_null(ctx);
        !           713:   }
        !           714:   return SQLITE_OK;
        !           715: }
        !           716: 
        !           717: /*
        !           718: ** The rowid.
        !           719: */
        !           720: static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
        !           721:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
        !           722:   *pRowid = pCur->iRowid;
        !           723:   return SQLITE_OK;
        !           724: }
        !           725: 
        !           726: /*
        !           727: ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
        !           728: ** that the cursor has nothing more to output.
        !           729: */
        !           730: static int fuzzerEof(sqlite3_vtab_cursor *cur){
        !           731:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
        !           732:   return pCur->rLimit<=(fuzzer_cost)0;
        !           733: }
        !           734: 
        !           735: /*
        !           736: ** Search for terms of these forms:
        !           737: **
        !           738: **       word MATCH $str
        !           739: **       distance < $value
        !           740: **       distance <= $value
        !           741: **
        !           742: ** The distance< and distance<= are both treated as distance<=.
        !           743: ** The query plan number is as follows:
        !           744: **
        !           745: **   0:    None of the terms above are found
        !           746: **   1:    There is a "word MATCH" term with $str in filter.argv[0].
        !           747: **   2:    There is a "distance<" term with $value in filter.argv[0].
        !           748: **   3:    Both "word MATCH" and "distance<" with $str in argv[0] and
        !           749: **         $value in argv[1].
        !           750: */
        !           751: static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
        !           752:   int iPlan = 0;
        !           753:   int iDistTerm = -1;
        !           754:   int i;
        !           755:   const struct sqlite3_index_constraint *pConstraint;
        !           756:   pConstraint = pIdxInfo->aConstraint;
        !           757:   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
        !           758:     if( pConstraint->usable==0 ) continue;
        !           759:     if( (iPlan & 1)==0 
        !           760:      && pConstraint->iColumn==0
        !           761:      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
        !           762:     ){
        !           763:       iPlan |= 1;
        !           764:       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
        !           765:       pIdxInfo->aConstraintUsage[i].omit = 1;
        !           766:     }
        !           767:     if( (iPlan & 2)==0
        !           768:      && pConstraint->iColumn==1
        !           769:      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
        !           770:            || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
        !           771:     ){
        !           772:       iPlan |= 2;
        !           773:       iDistTerm = i;
        !           774:     }
        !           775:   }
        !           776:   if( iPlan==2 ){
        !           777:     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
        !           778:   }else if( iPlan==3 ){
        !           779:     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
        !           780:   }
        !           781:   pIdxInfo->idxNum = iPlan;
        !           782:   if( pIdxInfo->nOrderBy==1
        !           783:    && pIdxInfo->aOrderBy[0].iColumn==1
        !           784:    && pIdxInfo->aOrderBy[0].desc==0
        !           785:   ){
        !           786:     pIdxInfo->orderByConsumed = 1;
        !           787:   }
        !           788:   pIdxInfo->estimatedCost = (double)10000;
        !           789:    
        !           790:   return SQLITE_OK;
        !           791: }
        !           792: 
        !           793: /*
        !           794: ** Disallow all attempts to DELETE or UPDATE.  Only INSERTs are allowed.
        !           795: **
        !           796: ** On an insert, the cFrom, cTo, and cost columns are used to construct
        !           797: ** a new rule.   All other columns are ignored.  The rule is ignored
        !           798: ** if cFrom and cTo are identical.  A NULL value for cFrom or cTo is
        !           799: ** interpreted as an empty string.  The cost must be positive.
        !           800: */
        !           801: static int fuzzerUpdate(
        !           802:   sqlite3_vtab *pVTab,
        !           803:   int argc,
        !           804:   sqlite3_value **argv,
        !           805:   sqlite_int64 *pRowid
        !           806: ){
        !           807:   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
        !           808:   fuzzer_rule *pRule;
        !           809:   const char *zFrom;
        !           810:   int nFrom;
        !           811:   const char *zTo;
        !           812:   int nTo;
        !           813:   fuzzer_cost rCost;
        !           814:   if( argc!=7 ){
        !           815:     sqlite3_free(pVTab->zErrMsg);
        !           816:     pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
        !           817:                                      p->zClassName);
        !           818:     return SQLITE_CONSTRAINT;
        !           819:   }
        !           820:   if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
        !           821:     sqlite3_free(pVTab->zErrMsg);
        !           822:     pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table",
        !           823:                                      p->zClassName);
        !           824:     return SQLITE_CONSTRAINT;
        !           825:   }
        !           826:   zFrom = (char*)sqlite3_value_text(argv[4]);
        !           827:   if( zFrom==0 ) zFrom = "";
        !           828:   zTo = (char*)sqlite3_value_text(argv[5]);
        !           829:   if( zTo==0 ) zTo = "";
        !           830:   if( strcmp(zFrom,zTo)==0 ){
        !           831:     /* Silently ignore null transformations */
        !           832:     return SQLITE_OK;
        !           833:   }
        !           834:   rCost = sqlite3_value_int(argv[6]);
        !           835:   if( rCost<=0 ){
        !           836:     sqlite3_free(pVTab->zErrMsg);
        !           837:     pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
        !           838:     return SQLITE_CONSTRAINT;    
        !           839:   }
        !           840:   nFrom = strlen(zFrom);
        !           841:   nTo = strlen(zTo);
        !           842:   pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
        !           843:   if( pRule==0 ){
        !           844:     return SQLITE_NOMEM;
        !           845:   }
        !           846:   pRule->zFrom = &pRule->zTo[nTo+1];
        !           847:   pRule->nFrom = nFrom;
        !           848:   memcpy(pRule->zFrom, zFrom, nFrom+1);
        !           849:   memcpy(pRule->zTo, zTo, nTo+1);
        !           850:   pRule->nTo = nTo;
        !           851:   pRule->rCost = rCost;
        !           852:   pRule->pNext = p->pNewRule;
        !           853:   p->pNewRule = pRule;
        !           854:   return SQLITE_OK;
        !           855: }
        !           856: 
        !           857: /*
        !           858: ** A virtual table module that provides read-only access to a
        !           859: ** Tcl global variable namespace.
        !           860: */
        !           861: static sqlite3_module fuzzerModule = {
        !           862:   0,                           /* iVersion */
        !           863:   fuzzerConnect,
        !           864:   fuzzerConnect,
        !           865:   fuzzerBestIndex,
        !           866:   fuzzerDisconnect, 
        !           867:   fuzzerDisconnect,
        !           868:   fuzzerOpen,                  /* xOpen - open a cursor */
        !           869:   fuzzerClose,                 /* xClose - close a cursor */
        !           870:   fuzzerFilter,                /* xFilter - configure scan constraints */
        !           871:   fuzzerNext,                  /* xNext - advance a cursor */
        !           872:   fuzzerEof,                   /* xEof - check for end of scan */
        !           873:   fuzzerColumn,                /* xColumn - read data */
        !           874:   fuzzerRowid,                 /* xRowid - read data */
        !           875:   fuzzerUpdate,                /* xUpdate - INSERT */
        !           876:   0,                           /* xBegin */
        !           877:   0,                           /* xSync */
        !           878:   0,                           /* xCommit */
        !           879:   0,                           /* xRollback */
        !           880:   0,                           /* xFindMethod */
        !           881:   0,                           /* xRename */
        !           882: };
        !           883: 
        !           884: #endif /* SQLITE_OMIT_VIRTUALTABLE */
        !           885: 
        !           886: 
        !           887: /*
        !           888: ** Register the fuzzer virtual table
        !           889: */
        !           890: int fuzzer_register(sqlite3 *db){
        !           891:   int rc = SQLITE_OK;
        !           892: #ifndef SQLITE_OMIT_VIRTUALTABLE
        !           893:   rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0);
        !           894: #endif
        !           895:   return rc;
        !           896: }
        !           897: 
        !           898: #ifdef SQLITE_TEST
        !           899: #include <tcl.h>
        !           900: /*
        !           901: ** Decode a pointer to an sqlite3 object.
        !           902: */
        !           903: extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb);
        !           904: 
        !           905: /*
        !           906: ** Register the echo virtual table module.
        !           907: */
        !           908: static int register_fuzzer_module(
        !           909:   ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
        !           910:   Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
        !           911:   int objc,              /* Number of arguments */
        !           912:   Tcl_Obj *CONST objv[]  /* Command arguments */
        !           913: ){
        !           914:   sqlite3 *db;
        !           915:   if( objc!=2 ){
        !           916:     Tcl_WrongNumArgs(interp, 1, objv, "DB");
        !           917:     return TCL_ERROR;
        !           918:   }
        !           919:   if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
        !           920:   fuzzer_register(db);
        !           921:   return TCL_OK;
        !           922: }
        !           923: 
        !           924: 
        !           925: /*
        !           926: ** Register commands with the TCL interpreter.
        !           927: */
        !           928: int Sqlitetestfuzzer_Init(Tcl_Interp *interp){
        !           929:   static struct {
        !           930:      char *zName;
        !           931:      Tcl_ObjCmdProc *xProc;
        !           932:      void *clientData;
        !           933:   } aObjCmd[] = {
        !           934:      { "register_fuzzer_module",   register_fuzzer_module, 0 },
        !           935:   };
        !           936:   int i;
        !           937:   for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
        !           938:     Tcl_CreateObjCommand(interp, aObjCmd[i].zName, 
        !           939:         aObjCmd[i].xProc, aObjCmd[i].clientData, 0);
        !           940:   }
        !           941:   return TCL_OK;
        !           942: }
        !           943: 
        !           944: #endif /* SQLITE_TEST */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>