embedaddon/sqlite3/src/test_fuzzer.c - annotate

Return to test_fuzzer.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / src
Annotation of embedaddon/sqlite3/src/test_fuzzer.c, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2011 March 24
                      3: **
                      4: ** The author disclaims copyright to this source code.  In place of
                      5: ** a legal notice, here is a blessing:
                      6: **
                      7: **    May you do good and not evil.
                      8: **    May you find forgiveness for yourself and forgive others.
                      9: **    May you share freely, never taking more than you give.
                     10: **
                     11: *************************************************************************
                     12: **
                     13: ** Code for demonstartion virtual table that generates variations
                     14: ** on an input word at increasing edit distances from the original.
                     15: **
                     16: ** A fuzzer virtual table is created like this:
                     17: **
                     18: **     CREATE VIRTUAL TABLE temp.f USING fuzzer;
                     19: **
                     20: ** The name of the new virtual table in the example above is "f".
                     21: ** Note that all fuzzer virtual tables must be TEMP tables.  The
                     22: ** "temp." prefix in front of the table name is required when the
                     23: ** table is being created.  The "temp." prefix can be omitted when
                     24: ** using the table as long as the name is unambiguous.
                     25: **
                     26: ** Before being used, the fuzzer needs to be programmed by giving it
                     27: ** character transformations and a cost associated with each transformation.
                     28: ** Examples:
                     29: **
                     30: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100);
                     31: **
                     32: ** The above statement says that the cost of inserting a letter 'a' is
                     33: ** 100.  (All costs are integers.  We recommend that costs be scaled so
                     34: ** that the average cost is around 100.)
                     35: **
                     36: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87);
                     37: **
                     38: ** The above statement says that the cost of deleting a single letter
                     39: ** 'b' is 87.
                     40: **
                     41: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38);
                     42: **    INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40);
                     43: **
                     44: ** This third example says that the cost of transforming the single
                     45: ** letter "o" into the two-letter sequence "oe" is 38 and that the
                     46: ** cost of transforming "oe" back into "o" is 40.
                     47: **
                     48: ** After all the transformation costs have been set, the fuzzer table
                     49: ** can be queried as follows:
                     50: **
                     51: **    SELECT word, distance FROM f
                     52: **     WHERE word MATCH 'abcdefg'
                     53: **       AND distance<200;
                     54: **
                     55: ** This first query outputs the string "abcdefg" and all strings that
                     56: ** can be derived from that string by appling the specified transformations.
                     57: ** The strings are output together with their total transformation cost
                     58: ** (called "distance") and appear in order of increasing cost.  No string
                     59: ** is output more than once.  If there are multiple ways to transform the
                     60: ** target string into the output string then the lowest cost transform is
                     61: ** the one that is returned.  In the example, the search is limited to 
                     62: ** strings with a total distance of less than 200.
                     63: **
                     64: ** It is important to put some kind of a limit on the fuzzer output.  This
                     65: ** can be either in the form of a LIMIT clause at the end of the query,
                     66: ** or better, a "distance<NNN" constraint where NNN is some number.  The
                     67: ** running time and memory requirement is exponential in the value of NNN 
                     68: ** so you want to make sure that NNN is not too big.  A value of NNN that
                     69: ** is about twice the average transformation cost seems to give good results.
                     70: **
                     71: ** The fuzzer table can be useful for tasks such as spelling correction.
                     72: ** Suppose there is a second table vocabulary(w) where the w column contains
                     73: ** all correctly spelled words.   Let $word be a word you want to look up.
                     74: **
                     75: **   SELECT vocabulary.w FROM f, vocabulary
                     76: **    WHERE f.word MATCH $word
                     77: **      AND f.distance<=200
                     78: **      AND f.word=vocabulary.w
                     79: **    LIMIT 20
                     80: **
                     81: ** The query above gives the 20 closest words to the $word being tested.
                     82: ** (Note that for good performance, the vocubulary.w column should be
                     83: ** indexed.)
                     84: **
                     85: ** A similar query can be used to find all words in the dictionary that
                     86: ** begin with some prefix $prefix:
                     87: **
                     88: **   SELECT vocabulary.w FROM f, vocabulary
                     89: **    WHERE f.word MATCH $prefix
                     90: **      AND f.distance<=200
                     91: **      AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
                     92: **    LIMIT 50
                     93: **
                     94: ** This last query will show up to 50 words out of the vocabulary that
                     95: ** match or nearly match the $prefix.
                     96: */
                     97: #include "sqlite3.h"
                     98: #include <stdlib.h>
                     99: #include <string.h>
                    100: #include <assert.h>
                    101: #include <stdio.h>
                    102: 
                    103: #ifndef SQLITE_OMIT_VIRTUALTABLE
                    104: 
                    105: /*
                    106: ** Forward declaration of objects used by this implementation
                    107: */
                    108: typedef struct fuzzer_vtab fuzzer_vtab;
                    109: typedef struct fuzzer_cursor fuzzer_cursor;
                    110: typedef struct fuzzer_rule fuzzer_rule;
                    111: typedef struct fuzzer_seen fuzzer_seen;
                    112: typedef struct fuzzer_stem fuzzer_stem;
                    113: 
                    114: /*
                    115: ** Type of the "cost" of an edit operation.  Might be changed to
                    116: ** "float" or "double" or "sqlite3_int64" in the future.
                    117: */
                    118: typedef int fuzzer_cost;
                    119: 
                    120: 
                    121: /*
                    122: ** Each transformation rule is stored as an instance of this object.
                    123: ** All rules are kept on a linked list sorted by rCost.
                    124: */
                    125: struct fuzzer_rule {
                    126:   fuzzer_rule *pNext;        /* Next rule in order of increasing rCost */
                    127:   fuzzer_cost rCost;         /* Cost of this transformation */
                    128:   int nFrom, nTo;            /* Length of the zFrom and zTo strings */
                    129:   char *zFrom;               /* Transform from */
                    130:   char zTo[4];               /* Transform to (extra space appended) */
                    131: };
                    132: 
                    133: /*
                    134: ** A stem object is used to generate variants.  It is also used to record
                    135: ** previously generated outputs.
                    136: **
                    137: ** Every stem is added to a hash table as it is output.  Generation of
                    138: ** duplicate stems is suppressed.
                    139: **
                    140: ** Active stems (those that might generate new outputs) are kepts on a linked
                    141: ** list sorted by increasing cost.  The cost is the sum of rBaseCost and
                    142: ** pRule->rCost.
                    143: */
                    144: struct fuzzer_stem {
                    145:   char *zBasis;              /* Word being fuzzed */
                    146:   int nBasis;                /* Length of the zBasis string */
                    147:   const fuzzer_rule *pRule;  /* Current rule to apply */
                    148:   int n;                     /* Apply pRule at this character offset */
                    149:   fuzzer_cost rBaseCost;     /* Base cost of getting to zBasis */
                    150:   fuzzer_cost rCostX;        /* Precomputed rBaseCost + pRule->rCost */
                    151:   fuzzer_stem *pNext;        /* Next stem in rCost order */
                    152:   fuzzer_stem *pHash;        /* Next stem with same hash on zBasis */
                    153: };
                    154: 
                    155: /* 
                    156: ** A fuzzer virtual-table object 
                    157: */
                    158: struct fuzzer_vtab {
                    159:   sqlite3_vtab base;         /* Base class - must be first */
                    160:   char *zClassName;          /* Name of this class.  Default: "fuzzer" */
                    161:   fuzzer_rule *pRule;        /* All active rules in this fuzzer */
                    162:   fuzzer_rule *pNewRule;     /* New rules to add when last cursor expires */
                    163:   int nCursor;               /* Number of active cursors */
                    164: };
                    165: 
                    166: #define FUZZER_HASH  4001    /* Hash table size */
                    167: #define FUZZER_NQUEUE  20    /* Number of slots on the stem queue */
                    168: 
                    169: /* A fuzzer cursor object */
                    170: struct fuzzer_cursor {
                    171:   sqlite3_vtab_cursor base;  /* Base class - must be first */
                    172:   sqlite3_int64 iRowid;      /* The rowid of the current word */
                    173:   fuzzer_vtab *pVtab;        /* The virtual table this cursor belongs to */
                    174:   fuzzer_cost rLimit;        /* Maximum cost of any term */
                    175:   fuzzer_stem *pStem;        /* Stem with smallest rCostX */
                    176:   fuzzer_stem *pDone;        /* Stems already processed to completion */
                    177:   fuzzer_stem *aQueue[FUZZER_NQUEUE];  /* Queue of stems with higher rCostX */
                    178:   int mxQueue;               /* Largest used index in aQueue[] */
                    179:   char *zBuf;                /* Temporary use buffer */
                    180:   int nBuf;                  /* Bytes allocated for zBuf */
                    181:   int nStem;                 /* Number of stems allocated */
                    182:   fuzzer_rule nullRule;      /* Null rule used first */
                    183:   fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
                    184: };
                    185: 
                    186: /* Methods for the fuzzer module */
                    187: static int fuzzerConnect(
                    188:   sqlite3 *db,
                    189:   void *pAux,
                    190:   int argc, const char *const*argv,
                    191:   sqlite3_vtab **ppVtab,
                    192:   char **pzErr
                    193: ){
                    194:   fuzzer_vtab *pNew;
                    195:   int n;
                    196:   if( strcmp(argv[1],"temp")!=0 ){
                    197:     *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]);
                    198:     return SQLITE_ERROR;
                    199:   }
                    200:   n = strlen(argv[0]) + 1;
                    201:   pNew = sqlite3_malloc( sizeof(*pNew) + n );
                    202:   if( pNew==0 ) return SQLITE_NOMEM;
                    203:   pNew->zClassName = (char*)&pNew[1];
                    204:   memcpy(pNew->zClassName, argv[0], n);
                    205:   sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
                    206:   memset(pNew, 0, sizeof(*pNew));
                    207:   *ppVtab = &pNew->base;
                    208:   return SQLITE_OK;
                    209: }
                    210: /* Note that for this virtual table, the xCreate and xConnect
                    211: ** methods are identical. */
                    212: 
                    213: static int fuzzerDisconnect(sqlite3_vtab *pVtab){
                    214:   fuzzer_vtab *p = (fuzzer_vtab*)pVtab;
                    215:   assert( p->nCursor==0 );
                    216:   do{
                    217:     while( p->pRule ){
                    218:       fuzzer_rule *pRule = p->pRule;
                    219:       p->pRule = pRule->pNext;
                    220:       sqlite3_free(pRule);
                    221:     }
                    222:     p->pRule = p->pNewRule;
                    223:     p->pNewRule = 0;
                    224:   }while( p->pRule );
                    225:   sqlite3_free(p);
                    226:   return SQLITE_OK;
                    227: }
                    228: /* The xDisconnect and xDestroy methods are also the same */
                    229: 
                    230: /*
                    231: ** The two input rule lists are both sorted in order of increasing
                    232: ** cost.  Merge them together into a single list, sorted by cost, and
                    233: ** return a pointer to the head of that list.
                    234: */
                    235: static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){
                    236:   fuzzer_rule head;
                    237:   fuzzer_rule *pTail;
                    238: 
                    239:   pTail =  &head;
                    240:   while( pA && pB ){
                    241:     if( pA->rCost<=pB->rCost ){
                    242:       pTail->pNext = pA;
                    243:       pTail = pA;
                    244:       pA = pA->pNext;
                    245:     }else{
                    246:       pTail->pNext = pB;
                    247:       pTail = pB;
                    248:       pB = pB->pNext;
                    249:     }
                    250:   }
                    251:   if( pA==0 ){
                    252:     pTail->pNext = pB;
                    253:   }else{
                    254:     pTail->pNext = pA;
                    255:   }
                    256:   return head.pNext;
                    257: }
                    258: 
                    259: 
                    260: /*
                    261: ** Open a new fuzzer cursor.
                    262: */
                    263: static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
                    264:   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
                    265:   fuzzer_cursor *pCur;
                    266:   pCur = sqlite3_malloc( sizeof(*pCur) );
                    267:   if( pCur==0 ) return SQLITE_NOMEM;
                    268:   memset(pCur, 0, sizeof(*pCur));
                    269:   pCur->pVtab = p;
                    270:   *ppCursor = &pCur->base;
                    271:   if( p->nCursor==0 && p->pNewRule ){
                    272:     unsigned int i;
                    273:     fuzzer_rule *pX;
                    274:     fuzzer_rule *a[15];
                    275:     for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0;
                    276:     while( (pX = p->pNewRule)!=0 ){
                    277:       p->pNewRule = pX->pNext;
                    278:       pX->pNext = 0;
                    279:       for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){
                    280:         pX = fuzzerMergeRules(a[i], pX);
                    281:         a[i] = 0;
                    282:       }
                    283:       a[i] = fuzzerMergeRules(a[i], pX);
                    284:     }
                    285:     for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){
                    286:       pX = fuzzerMergeRules(a[i], pX);
                    287:     }
                    288:     p->pRule = fuzzerMergeRules(p->pRule, pX);
                    289:   }
                    290:   p->nCursor++;
                    291:   return SQLITE_OK;
                    292: }
                    293: 
                    294: /*
                    295: ** Free all stems in a list.
                    296: */
                    297: static void fuzzerClearStemList(fuzzer_stem *pStem){
                    298:   while( pStem ){
                    299:     fuzzer_stem *pNext = pStem->pNext;
                    300:     sqlite3_free(pStem);
                    301:     pStem = pNext;
                    302:   }
                    303: }
                    304: 
                    305: /*
                    306: ** Free up all the memory allocated by a cursor.  Set it rLimit to 0
                    307: ** to indicate that it is at EOF.
                    308: */
                    309: static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
                    310:   int i;
                    311:   fuzzerClearStemList(pCur->pStem);
                    312:   fuzzerClearStemList(pCur->pDone);
                    313:   for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
                    314:   pCur->rLimit = (fuzzer_cost)0;
                    315:   if( clearHash && pCur->nStem ){
                    316:     pCur->mxQueue = 0;
                    317:     pCur->pStem = 0;
                    318:     pCur->pDone = 0;
                    319:     memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
                    320:     memset(pCur->apHash, 0, sizeof(pCur->apHash));
                    321:   }
                    322:   pCur->nStem = 0;
                    323: }
                    324: 
                    325: /*
                    326: ** Close a fuzzer cursor.
                    327: */
                    328: static int fuzzerClose(sqlite3_vtab_cursor *cur){
                    329:   fuzzer_cursor *pCur = (fuzzer_cursor *)cur;
                    330:   fuzzerClearCursor(pCur, 0);
                    331:   sqlite3_free(pCur->zBuf);
                    332:   pCur->pVtab->nCursor--;
                    333:   sqlite3_free(pCur);
                    334:   return SQLITE_OK;
                    335: }
                    336: 
                    337: /*
                    338: ** Compute the current output term for a fuzzer_stem.
                    339: */
                    340: static int fuzzerRender(
                    341:   fuzzer_stem *pStem,   /* The stem to be rendered */
                    342:   char **pzBuf,         /* Write results into this buffer.  realloc if needed */
                    343:   int *pnBuf            /* Size of the buffer */
                    344: ){
                    345:   const fuzzer_rule *pRule = pStem->pRule;
                    346:   int n;
                    347:   char *z;
                    348: 
                    349:   n = pStem->nBasis + pRule->nTo - pRule->nFrom;
                    350:   if( (*pnBuf)<n+1 ){
                    351:     (*pzBuf) = sqlite3_realloc((*pzBuf), n+100);
                    352:     if( (*pzBuf)==0 ) return SQLITE_NOMEM;
                    353:     (*pnBuf) = n+100;
                    354:   }
                    355:   n = pStem->n;
                    356:   z = *pzBuf;
                    357:   if( n<0 ){
                    358:     memcpy(z, pStem->zBasis, pStem->nBasis+1);
                    359:   }else{
                    360:     memcpy(z, pStem->zBasis, n);
                    361:     memcpy(&z[n], pRule->zTo, pRule->nTo);
                    362:     memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom], 
                    363:            pStem->nBasis-n-pRule->nFrom+1);
                    364:   }
                    365:   return SQLITE_OK;
                    366: }
                    367: 
                    368: /*
                    369: ** Compute a hash on zBasis.
                    370: */
                    371: static unsigned int fuzzerHash(const char *z){
                    372:   unsigned int h = 0;
                    373:   while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); }
                    374:   return h % FUZZER_HASH;
                    375: }
                    376: 
                    377: /*
                    378: ** Current cost of a stem
                    379: */
                    380: static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
                    381:   return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
                    382: }
                    383: 
                    384: #if 0
                    385: /*
                    386: ** Print a description of a fuzzer_stem on stderr.
                    387: */
                    388: static void fuzzerStemPrint(
                    389:   const char *zPrefix,
                    390:   fuzzer_stem *pStem,
                    391:   const char *zSuffix
                    392: ){
                    393:   if( pStem->n<0 ){
                    394:     fprintf(stderr, "%s[%s](%d)-->self%s",
                    395:        zPrefix,
                    396:        pStem->zBasis, pStem->rBaseCost,
                    397:        zSuffix
                    398:     );
                    399:   }else{
                    400:     char *zBuf = 0;
                    401:     int nBuf = 0;
                    402:     if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
                    403:     fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
                    404:       zPrefix,
                    405:       pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
                    406:       zSuffix
                    407:     );
                    408:     sqlite3_free(zBuf);
                    409:   }
                    410: }
                    411: #endif
                    412: 
                    413: /*
                    414: ** Return 1 if the string to which the cursor is point has already
                    415: ** been emitted.  Return 0 if not.  Return -1 on a memory allocation
                    416: ** failures.
                    417: */
                    418: static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
                    419:   unsigned int h;
                    420:   fuzzer_stem *pLookup;
                    421: 
                    422:   if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
                    423:     return -1;
                    424:   }
                    425:   h = fuzzerHash(pCur->zBuf);
                    426:   pLookup = pCur->apHash[h];
                    427:     while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
                    428:     pLookup = pLookup->pHash;
                    429:   }
                    430:   return pLookup!=0;
                    431: }
                    432: 
                    433: /*
                    434: ** Advance a fuzzer_stem to its next value.   Return 0 if there are
                    435: ** no more values that can be generated by this fuzzer_stem.  Return
                    436: ** -1 on a memory allocation failure.
                    437: */
                    438: static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
                    439:   const fuzzer_rule *pRule;
                    440:   while( (pRule = pStem->pRule)!=0 ){
                    441:     while( pStem->n < pStem->nBasis - pRule->nFrom ){
                    442:       pStem->n++;
                    443:       if( pRule->nFrom==0
                    444:        || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0
                    445:       ){
                    446:         /* Found a rewrite case.  Make sure it is not a duplicate */
                    447:         int rc = fuzzerSeen(pCur, pStem);
                    448:         if( rc<0 ) return -1;
                    449:         if( rc==0 ){
                    450:           fuzzerCost(pStem);
                    451:           return 1;
                    452:         }
                    453:       }
                    454:     }
                    455:     pStem->n = -1;
                    456:     pStem->pRule = pRule->pNext;
                    457:     if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
                    458:   }
                    459:   return 0;
                    460: }
                    461: 
                    462: /*
                    463: ** The two input stem lists are both sorted in order of increasing
                    464: ** rCostX.  Merge them together into a single list, sorted by rCostX, and
                    465: ** return a pointer to the head of that new list.
                    466: */
                    467: static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
                    468:   fuzzer_stem head;
                    469:   fuzzer_stem *pTail;
                    470: 
                    471:   pTail =  &head;
                    472:   while( pA && pB ){
                    473:     if( pA->rCostX<=pB->rCostX ){
                    474:       pTail->pNext = pA;
                    475:       pTail = pA;
                    476:       pA = pA->pNext;
                    477:     }else{
                    478:       pTail->pNext = pB;
                    479:       pTail = pB;
                    480:       pB = pB->pNext;
                    481:     }
                    482:   }
                    483:   if( pA==0 ){
                    484:     pTail->pNext = pB;
                    485:   }else{
                    486:     pTail->pNext = pA;
                    487:   }
                    488:   return head.pNext;
                    489: }
                    490: 
                    491: /*
                    492: ** Load pCur->pStem with the lowest-cost stem.  Return a pointer
                    493: ** to the lowest-cost stem.
                    494: */
                    495: static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
                    496:   fuzzer_stem *pBest, *pX;
                    497:   int iBest;
                    498:   int i;
                    499: 
                    500:   if( pCur->pStem==0 ){
                    501:     iBest = -1;
                    502:     pBest = 0;
                    503:     for(i=0; i<=pCur->mxQueue; i++){
                    504:       pX = pCur->aQueue[i];
                    505:       if( pX==0 ) continue;
                    506:       if( pBest==0 || pBest->rCostX>pX->rCostX ){
                    507:         pBest = pX;
                    508:         iBest = i;
                    509:       }
                    510:     } 
                    511:     if( pBest ){
                    512:       pCur->aQueue[iBest] = pBest->pNext;
                    513:       pBest->pNext = 0;
                    514:       pCur->pStem = pBest;
                    515:     }
                    516:   }
                    517:   return pCur->pStem;
                    518: }
                    519: 
                    520: /*
                    521: ** Insert pNew into queue of pending stems.  Then find the stem
                    522: ** with the lowest rCostX and move it into pCur->pStem.
                    523: ** list.  The insert is done such the pNew is in the correct order
                    524: ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
                    525: */
                    526: static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
                    527:   fuzzer_stem *pX;
                    528:   int i;
                    529: 
                    530:   /* If pCur->pStem exists and is greater than pNew, then make pNew
                    531:   ** the new pCur->pStem and insert the old pCur->pStem instead.
                    532:   */
                    533:   if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
                    534:     pNew->pNext = 0;
                    535:     pCur->pStem = pNew;
                    536:     pNew = pX;
                    537:   }
                    538: 
                    539:   /* Insert the new value */
                    540:   pNew->pNext = 0;
                    541:   pX = pNew;
                    542:   for(i=0; i<=pCur->mxQueue; i++){
                    543:     if( pCur->aQueue[i] ){
                    544:       pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
                    545:       pCur->aQueue[i] = 0;
                    546:     }else{
                    547:       pCur->aQueue[i] = pX;
                    548:       break;
                    549:     }
                    550:   }
                    551:   if( i>pCur->mxQueue ){
                    552:     if( i<FUZZER_NQUEUE ){
                    553:       pCur->mxQueue = i;
                    554:       pCur->aQueue[i] = pX;
                    555:     }else{
                    556:       assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
                    557:       pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
                    558:       pCur->aQueue[FUZZER_NQUEUE-1] = pX;
                    559:     }
                    560:   }
                    561: 
                    562:   return fuzzerLowestCostStem(pCur);
                    563: }
                    564: 
                    565: /*
                    566: ** Allocate a new fuzzer_stem.  Add it to the hash table but do not
                    567: ** link it into either the pCur->pStem or pCur->pDone lists.
                    568: */
                    569: static fuzzer_stem *fuzzerNewStem(
                    570:   fuzzer_cursor *pCur,
                    571:   const char *zWord,
                    572:   fuzzer_cost rBaseCost
                    573: ){
                    574:   fuzzer_stem *pNew;
                    575:   unsigned int h;
                    576: 
                    577:   pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 );
                    578:   if( pNew==0 ) return 0;
                    579:   memset(pNew, 0, sizeof(*pNew));
                    580:   pNew->zBasis = (char*)&pNew[1];
                    581:   pNew->nBasis = strlen(zWord);
                    582:   memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
                    583:   pNew->pRule = pCur->pVtab->pRule;
                    584:   pNew->n = -1;
                    585:   pNew->rBaseCost = pNew->rCostX = rBaseCost;
                    586:   h = fuzzerHash(pNew->zBasis);
                    587:   pNew->pHash = pCur->apHash[h];
                    588:   pCur->apHash[h] = pNew;
                    589:   pCur->nStem++;
                    590:   return pNew;
                    591: }
                    592: 
                    593: 
                    594: /*
                    595: ** Advance a cursor to its next row of output
                    596: */
                    597: static int fuzzerNext(sqlite3_vtab_cursor *cur){
                    598:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
                    599:   int rc;
                    600:   fuzzer_stem *pStem, *pNew;
                    601: 
                    602:   pCur->iRowid++;
                    603: 
                    604:   /* Use the element the cursor is currently point to to create
                    605:   ** a new stem and insert the new stem into the priority queue.
                    606:   */
                    607:   pStem = pCur->pStem;
                    608:   if( pStem->rCostX>0 ){
                    609:     rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
                    610:     if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
                    611:     pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
                    612:     if( pNew ){
                    613:       if( fuzzerAdvance(pCur, pNew)==0 ){
                    614:         pNew->pNext = pCur->pDone;
                    615:         pCur->pDone = pNew;
                    616:       }else{
                    617:         if( fuzzerInsert(pCur, pNew)==pNew ){
                    618:           return SQLITE_OK;
                    619:         }
                    620:       }
                    621:     }else{
                    622:       return SQLITE_NOMEM;
                    623:     }
                    624:   }
                    625: 
                    626:   /* Adjust the priority queue so that the first element of the
                    627:   ** stem list is the next lowest cost word.
                    628:   */
                    629:   while( (pStem = pCur->pStem)!=0 ){
                    630:     if( fuzzerAdvance(pCur, pStem) ){
                    631:       pCur->pStem = 0;
                    632:       pStem = fuzzerInsert(pCur, pStem);
                    633:       if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
                    634:         if( rc<0 ) return SQLITE_NOMEM;
                    635:         continue;
                    636:       }
                    637:       return SQLITE_OK;  /* New word found */
                    638:     }
                    639:     pCur->pStem = 0;
                    640:     pStem->pNext = pCur->pDone;
                    641:     pCur->pDone = pStem;
                    642:     if( fuzzerLowestCostStem(pCur) ){
                    643:       rc = fuzzerSeen(pCur, pCur->pStem);
                    644:       if( rc<0 ) return SQLITE_NOMEM;
                    645:       if( rc==0 ){
                    646:         return SQLITE_OK;
                    647:       }
                    648:     }
                    649:   }
                    650: 
                    651:   /* Reach this point only if queue has been exhausted and there is
                    652:   ** nothing left to be output. */
                    653:   pCur->rLimit = (fuzzer_cost)0;
                    654:   return SQLITE_OK;
                    655: }
                    656: 
                    657: /*
                    658: ** Called to "rewind" a cursor back to the beginning so that
                    659: ** it starts its output over again.  Always called at least once
                    660: ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
                    661: */
                    662: static int fuzzerFilter(
                    663:   sqlite3_vtab_cursor *pVtabCursor, 
                    664:   int idxNum, const char *idxStr,
                    665:   int argc, sqlite3_value **argv
                    666: ){
                    667:   fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
                    668:   const char *zWord = 0;
                    669:   fuzzer_stem *pStem;
                    670: 
                    671:   fuzzerClearCursor(pCur, 1);
                    672:   pCur->rLimit = 2147483647;
                    673:   if( idxNum==1 ){
                    674:     zWord = (const char*)sqlite3_value_text(argv[0]);
                    675:   }else if( idxNum==2 ){
                    676:     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
                    677:   }else if( idxNum==3 ){
                    678:     zWord = (const char*)sqlite3_value_text(argv[0]);
                    679:     pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
                    680:   }
                    681:   if( zWord==0 ) zWord = "";
                    682:   pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
                    683:   if( pStem==0 ) return SQLITE_NOMEM;
                    684:   pCur->nullRule.pNext = pCur->pVtab->pRule;
                    685:   pCur->nullRule.rCost = 0;
                    686:   pCur->nullRule.nFrom = 0;
                    687:   pCur->nullRule.nTo = 0;
                    688:   pCur->nullRule.zFrom = "";
                    689:   pStem->pRule = &pCur->nullRule;
                    690:   pStem->n = pStem->nBasis;
                    691:   pCur->iRowid = 1;
                    692:   return SQLITE_OK;
                    693: }
                    694: 
                    695: /*
                    696: ** Only the word and distance columns have values.  All other columns
                    697: ** return NULL
                    698: */
                    699: static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
                    700:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
                    701:   if( i==0 ){
                    702:     /* the "word" column */
                    703:     if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
                    704:       return SQLITE_NOMEM;
                    705:     }
                    706:     sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
                    707:   }else if( i==1 ){
                    708:     /* the "distance" column */
                    709:     sqlite3_result_int(ctx, pCur->pStem->rCostX);
                    710:   }else{
                    711:     /* All other columns are NULL */
                    712:     sqlite3_result_null(ctx);
                    713:   }
                    714:   return SQLITE_OK;
                    715: }
                    716: 
                    717: /*
                    718: ** The rowid.
                    719: */
                    720: static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
                    721:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
                    722:   *pRowid = pCur->iRowid;
                    723:   return SQLITE_OK;
                    724: }
                    725: 
                    726: /*
                    727: ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
                    728: ** that the cursor has nothing more to output.
                    729: */
                    730: static int fuzzerEof(sqlite3_vtab_cursor *cur){
                    731:   fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
                    732:   return pCur->rLimit<=(fuzzer_cost)0;
                    733: }
                    734: 
                    735: /*
                    736: ** Search for terms of these forms:
                    737: **
                    738: **       word MATCH $str
                    739: **       distance < $value
                    740: **       distance <= $value
                    741: **
                    742: ** The distance< and distance<= are both treated as distance<=.
                    743: ** The query plan number is as follows:
                    744: **
                    745: **   0:    None of the terms above are found
                    746: **   1:    There is a "word MATCH" term with $str in filter.argv[0].
                    747: **   2:    There is a "distance<" term with $value in filter.argv[0].
                    748: **   3:    Both "word MATCH" and "distance<" with $str in argv[0] and
                    749: **         $value in argv[1].
                    750: */
                    751: static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
                    752:   int iPlan = 0;
                    753:   int iDistTerm = -1;
                    754:   int i;
                    755:   const struct sqlite3_index_constraint *pConstraint;
                    756:   pConstraint = pIdxInfo->aConstraint;
                    757:   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
                    758:     if( pConstraint->usable==0 ) continue;
                    759:     if( (iPlan & 1)==0 
                    760:      && pConstraint->iColumn==0
                    761:      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
                    762:     ){
                    763:       iPlan |= 1;
                    764:       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
                    765:       pIdxInfo->aConstraintUsage[i].omit = 1;
                    766:     }
                    767:     if( (iPlan & 2)==0
                    768:      && pConstraint->iColumn==1
                    769:      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
                    770:            || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
                    771:     ){
                    772:       iPlan |= 2;
                    773:       iDistTerm = i;
                    774:     }
                    775:   }
                    776:   if( iPlan==2 ){
                    777:     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
                    778:   }else if( iPlan==3 ){
                    779:     pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
                    780:   }
                    781:   pIdxInfo->idxNum = iPlan;
                    782:   if( pIdxInfo->nOrderBy==1
                    783:    && pIdxInfo->aOrderBy[0].iColumn==1
                    784:    && pIdxInfo->aOrderBy[0].desc==0
                    785:   ){
                    786:     pIdxInfo->orderByConsumed = 1;
                    787:   }
                    788:   pIdxInfo->estimatedCost = (double)10000;
                    789:    
                    790:   return SQLITE_OK;
                    791: }
                    792: 
                    793: /*
                    794: ** Disallow all attempts to DELETE or UPDATE.  Only INSERTs are allowed.
                    795: **
                    796: ** On an insert, the cFrom, cTo, and cost columns are used to construct
                    797: ** a new rule.   All other columns are ignored.  The rule is ignored
                    798: ** if cFrom and cTo are identical.  A NULL value for cFrom or cTo is
                    799: ** interpreted as an empty string.  The cost must be positive.
                    800: */
                    801: static int fuzzerUpdate(
                    802:   sqlite3_vtab *pVTab,
                    803:   int argc,
                    804:   sqlite3_value **argv,
                    805:   sqlite_int64 *pRowid
                    806: ){
                    807:   fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
                    808:   fuzzer_rule *pRule;
                    809:   const char *zFrom;
                    810:   int nFrom;
                    811:   const char *zTo;
                    812:   int nTo;
                    813:   fuzzer_cost rCost;
                    814:   if( argc!=7 ){
                    815:     sqlite3_free(pVTab->zErrMsg);
                    816:     pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
                    817:                                      p->zClassName);
                    818:     return SQLITE_CONSTRAINT;
                    819:   }
                    820:   if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
                    821:     sqlite3_free(pVTab->zErrMsg);
                    822:     pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table",
                    823:                                      p->zClassName);
                    824:     return SQLITE_CONSTRAINT;
                    825:   }
                    826:   zFrom = (char*)sqlite3_value_text(argv[4]);
                    827:   if( zFrom==0 ) zFrom = "";
                    828:   zTo = (char*)sqlite3_value_text(argv[5]);
                    829:   if( zTo==0 ) zTo = "";
                    830:   if( strcmp(zFrom,zTo)==0 ){
                    831:     /* Silently ignore null transformations */
                    832:     return SQLITE_OK;
                    833:   }
                    834:   rCost = sqlite3_value_int(argv[6]);
                    835:   if( rCost<=0 ){
                    836:     sqlite3_free(pVTab->zErrMsg);
                    837:     pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
                    838:     return SQLITE_CONSTRAINT;    
                    839:   }
                    840:   nFrom = strlen(zFrom);
                    841:   nTo = strlen(zTo);
                    842:   pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
                    843:   if( pRule==0 ){
                    844:     return SQLITE_NOMEM;
                    845:   }
                    846:   pRule->zFrom = &pRule->zTo[nTo+1];
                    847:   pRule->nFrom = nFrom;
                    848:   memcpy(pRule->zFrom, zFrom, nFrom+1);
                    849:   memcpy(pRule->zTo, zTo, nTo+1);
                    850:   pRule->nTo = nTo;
                    851:   pRule->rCost = rCost;
                    852:   pRule->pNext = p->pNewRule;
                    853:   p->pNewRule = pRule;
                    854:   return SQLITE_OK;
                    855: }
                    856: 
                    857: /*
                    858: ** A virtual table module that provides read-only access to a
                    859: ** Tcl global variable namespace.
                    860: */
                    861: static sqlite3_module fuzzerModule = {
                    862:   0,                           /* iVersion */
                    863:   fuzzerConnect,
                    864:   fuzzerConnect,
                    865:   fuzzerBestIndex,
                    866:   fuzzerDisconnect, 
                    867:   fuzzerDisconnect,
                    868:   fuzzerOpen,                  /* xOpen - open a cursor */
                    869:   fuzzerClose,                 /* xClose - close a cursor */
                    870:   fuzzerFilter,                /* xFilter - configure scan constraints */
                    871:   fuzzerNext,                  /* xNext - advance a cursor */
                    872:   fuzzerEof,                   /* xEof - check for end of scan */
                    873:   fuzzerColumn,                /* xColumn - read data */
                    874:   fuzzerRowid,                 /* xRowid - read data */
                    875:   fuzzerUpdate,                /* xUpdate - INSERT */
                    876:   0,                           /* xBegin */
                    877:   0,                           /* xSync */
                    878:   0,                           /* xCommit */
                    879:   0,                           /* xRollback */
                    880:   0,                           /* xFindMethod */
                    881:   0,                           /* xRename */
                    882: };
                    883: 
                    884: #endif /* SQLITE_OMIT_VIRTUALTABLE */
                    885: 
                    886: 
                    887: /*
                    888: ** Register the fuzzer virtual table
                    889: */
                    890: int fuzzer_register(sqlite3 *db){
                    891:   int rc = SQLITE_OK;
                    892: #ifndef SQLITE_OMIT_VIRTUALTABLE
                    893:   rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0);
                    894: #endif
                    895:   return rc;
                    896: }
                    897: 
                    898: #ifdef SQLITE_TEST
                    899: #include <tcl.h>
                    900: /*
                    901: ** Decode a pointer to an sqlite3 object.
                    902: */
                    903: extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb);
                    904: 
                    905: /*
                    906: ** Register the echo virtual table module.
                    907: */
                    908: static int register_fuzzer_module(
                    909:   ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
                    910:   Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
                    911:   int objc,              /* Number of arguments */
                    912:   Tcl_Obj *CONST objv[]  /* Command arguments */
                    913: ){
                    914:   sqlite3 *db;
                    915:   if( objc!=2 ){
                    916:     Tcl_WrongNumArgs(interp, 1, objv, "DB");
                    917:     return TCL_ERROR;
                    918:   }
                    919:   if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
                    920:   fuzzer_register(db);
                    921:   return TCL_OK;
                    922: }
                    923: 
                    924: 
                    925: /*
                    926: ** Register commands with the TCL interpreter.
                    927: */
                    928: int Sqlitetestfuzzer_Init(Tcl_Interp *interp){
                    929:   static struct {
                    930:      char *zName;
                    931:      Tcl_ObjCmdProc *xProc;
                    932:      void *clientData;
                    933:   } aObjCmd[] = {
                    934:      { "register_fuzzer_module",   register_fuzzer_module, 0 },
                    935:   };
                    936:   int i;
                    937:   for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
                    938:     Tcl_CreateObjCommand(interp, aObjCmd[i].zName, 
                    939:         aObjCmd[i].xProc, aObjCmd[i].clientData, 0);
                    940:   }
                    941:   return TCL_OK;
                    942: }
                    943: 
                    944: #endif /* SQLITE_TEST */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>