Annotation of embedaddon/sqlite3/src/test_fuzzer.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2011 March 24
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: *************************************************************************
! 12: **
! 13: ** Code for demonstartion virtual table that generates variations
! 14: ** on an input word at increasing edit distances from the original.
! 15: **
! 16: ** A fuzzer virtual table is created like this:
! 17: **
! 18: ** CREATE VIRTUAL TABLE temp.f USING fuzzer;
! 19: **
! 20: ** The name of the new virtual table in the example above is "f".
! 21: ** Note that all fuzzer virtual tables must be TEMP tables. The
! 22: ** "temp." prefix in front of the table name is required when the
! 23: ** table is being created. The "temp." prefix can be omitted when
! 24: ** using the table as long as the name is unambiguous.
! 25: **
! 26: ** Before being used, the fuzzer needs to be programmed by giving it
! 27: ** character transformations and a cost associated with each transformation.
! 28: ** Examples:
! 29: **
! 30: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100);
! 31: **
! 32: ** The above statement says that the cost of inserting a letter 'a' is
! 33: ** 100. (All costs are integers. We recommend that costs be scaled so
! 34: ** that the average cost is around 100.)
! 35: **
! 36: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87);
! 37: **
! 38: ** The above statement says that the cost of deleting a single letter
! 39: ** 'b' is 87.
! 40: **
! 41: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38);
! 42: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40);
! 43: **
! 44: ** This third example says that the cost of transforming the single
! 45: ** letter "o" into the two-letter sequence "oe" is 38 and that the
! 46: ** cost of transforming "oe" back into "o" is 40.
! 47: **
! 48: ** After all the transformation costs have been set, the fuzzer table
! 49: ** can be queried as follows:
! 50: **
! 51: ** SELECT word, distance FROM f
! 52: ** WHERE word MATCH 'abcdefg'
! 53: ** AND distance<200;
! 54: **
! 55: ** This first query outputs the string "abcdefg" and all strings that
! 56: ** can be derived from that string by appling the specified transformations.
! 57: ** The strings are output together with their total transformation cost
! 58: ** (called "distance") and appear in order of increasing cost. No string
! 59: ** is output more than once. If there are multiple ways to transform the
! 60: ** target string into the output string then the lowest cost transform is
! 61: ** the one that is returned. In the example, the search is limited to
! 62: ** strings with a total distance of less than 200.
! 63: **
! 64: ** It is important to put some kind of a limit on the fuzzer output. This
! 65: ** can be either in the form of a LIMIT clause at the end of the query,
! 66: ** or better, a "distance<NNN" constraint where NNN is some number. The
! 67: ** running time and memory requirement is exponential in the value of NNN
! 68: ** so you want to make sure that NNN is not too big. A value of NNN that
! 69: ** is about twice the average transformation cost seems to give good results.
! 70: **
! 71: ** The fuzzer table can be useful for tasks such as spelling correction.
! 72: ** Suppose there is a second table vocabulary(w) where the w column contains
! 73: ** all correctly spelled words. Let $word be a word you want to look up.
! 74: **
! 75: ** SELECT vocabulary.w FROM f, vocabulary
! 76: ** WHERE f.word MATCH $word
! 77: ** AND f.distance<=200
! 78: ** AND f.word=vocabulary.w
! 79: ** LIMIT 20
! 80: **
! 81: ** The query above gives the 20 closest words to the $word being tested.
! 82: ** (Note that for good performance, the vocubulary.w column should be
! 83: ** indexed.)
! 84: **
! 85: ** A similar query can be used to find all words in the dictionary that
! 86: ** begin with some prefix $prefix:
! 87: **
! 88: ** SELECT vocabulary.w FROM f, vocabulary
! 89: ** WHERE f.word MATCH $prefix
! 90: ** AND f.distance<=200
! 91: ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
! 92: ** LIMIT 50
! 93: **
! 94: ** This last query will show up to 50 words out of the vocabulary that
! 95: ** match or nearly match the $prefix.
! 96: */
! 97: #include "sqlite3.h"
! 98: #include <stdlib.h>
! 99: #include <string.h>
! 100: #include <assert.h>
! 101: #include <stdio.h>
! 102:
! 103: #ifndef SQLITE_OMIT_VIRTUALTABLE
! 104:
! 105: /*
! 106: ** Forward declaration of objects used by this implementation
! 107: */
! 108: typedef struct fuzzer_vtab fuzzer_vtab;
! 109: typedef struct fuzzer_cursor fuzzer_cursor;
! 110: typedef struct fuzzer_rule fuzzer_rule;
! 111: typedef struct fuzzer_seen fuzzer_seen;
! 112: typedef struct fuzzer_stem fuzzer_stem;
! 113:
! 114: /*
! 115: ** Type of the "cost" of an edit operation. Might be changed to
! 116: ** "float" or "double" or "sqlite3_int64" in the future.
! 117: */
! 118: typedef int fuzzer_cost;
! 119:
! 120:
! 121: /*
! 122: ** Each transformation rule is stored as an instance of this object.
! 123: ** All rules are kept on a linked list sorted by rCost.
! 124: */
! 125: struct fuzzer_rule {
! 126: fuzzer_rule *pNext; /* Next rule in order of increasing rCost */
! 127: fuzzer_cost rCost; /* Cost of this transformation */
! 128: int nFrom, nTo; /* Length of the zFrom and zTo strings */
! 129: char *zFrom; /* Transform from */
! 130: char zTo[4]; /* Transform to (extra space appended) */
! 131: };
! 132:
! 133: /*
! 134: ** A stem object is used to generate variants. It is also used to record
! 135: ** previously generated outputs.
! 136: **
! 137: ** Every stem is added to a hash table as it is output. Generation of
! 138: ** duplicate stems is suppressed.
! 139: **
! 140: ** Active stems (those that might generate new outputs) are kepts on a linked
! 141: ** list sorted by increasing cost. The cost is the sum of rBaseCost and
! 142: ** pRule->rCost.
! 143: */
! 144: struct fuzzer_stem {
! 145: char *zBasis; /* Word being fuzzed */
! 146: int nBasis; /* Length of the zBasis string */
! 147: const fuzzer_rule *pRule; /* Current rule to apply */
! 148: int n; /* Apply pRule at this character offset */
! 149: fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */
! 150: fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */
! 151: fuzzer_stem *pNext; /* Next stem in rCost order */
! 152: fuzzer_stem *pHash; /* Next stem with same hash on zBasis */
! 153: };
! 154:
! 155: /*
! 156: ** A fuzzer virtual-table object
! 157: */
! 158: struct fuzzer_vtab {
! 159: sqlite3_vtab base; /* Base class - must be first */
! 160: char *zClassName; /* Name of this class. Default: "fuzzer" */
! 161: fuzzer_rule *pRule; /* All active rules in this fuzzer */
! 162: fuzzer_rule *pNewRule; /* New rules to add when last cursor expires */
! 163: int nCursor; /* Number of active cursors */
! 164: };
! 165:
! 166: #define FUZZER_HASH 4001 /* Hash table size */
! 167: #define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */
! 168:
! 169: /* A fuzzer cursor object */
! 170: struct fuzzer_cursor {
! 171: sqlite3_vtab_cursor base; /* Base class - must be first */
! 172: sqlite3_int64 iRowid; /* The rowid of the current word */
! 173: fuzzer_vtab *pVtab; /* The virtual table this cursor belongs to */
! 174: fuzzer_cost rLimit; /* Maximum cost of any term */
! 175: fuzzer_stem *pStem; /* Stem with smallest rCostX */
! 176: fuzzer_stem *pDone; /* Stems already processed to completion */
! 177: fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */
! 178: int mxQueue; /* Largest used index in aQueue[] */
! 179: char *zBuf; /* Temporary use buffer */
! 180: int nBuf; /* Bytes allocated for zBuf */
! 181: int nStem; /* Number of stems allocated */
! 182: fuzzer_rule nullRule; /* Null rule used first */
! 183: fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
! 184: };
! 185:
! 186: /* Methods for the fuzzer module */
! 187: static int fuzzerConnect(
! 188: sqlite3 *db,
! 189: void *pAux,
! 190: int argc, const char *const*argv,
! 191: sqlite3_vtab **ppVtab,
! 192: char **pzErr
! 193: ){
! 194: fuzzer_vtab *pNew;
! 195: int n;
! 196: if( strcmp(argv[1],"temp")!=0 ){
! 197: *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]);
! 198: return SQLITE_ERROR;
! 199: }
! 200: n = strlen(argv[0]) + 1;
! 201: pNew = sqlite3_malloc( sizeof(*pNew) + n );
! 202: if( pNew==0 ) return SQLITE_NOMEM;
! 203: pNew->zClassName = (char*)&pNew[1];
! 204: memcpy(pNew->zClassName, argv[0], n);
! 205: sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
! 206: memset(pNew, 0, sizeof(*pNew));
! 207: *ppVtab = &pNew->base;
! 208: return SQLITE_OK;
! 209: }
! 210: /* Note that for this virtual table, the xCreate and xConnect
! 211: ** methods are identical. */
! 212:
! 213: static int fuzzerDisconnect(sqlite3_vtab *pVtab){
! 214: fuzzer_vtab *p = (fuzzer_vtab*)pVtab;
! 215: assert( p->nCursor==0 );
! 216: do{
! 217: while( p->pRule ){
! 218: fuzzer_rule *pRule = p->pRule;
! 219: p->pRule = pRule->pNext;
! 220: sqlite3_free(pRule);
! 221: }
! 222: p->pRule = p->pNewRule;
! 223: p->pNewRule = 0;
! 224: }while( p->pRule );
! 225: sqlite3_free(p);
! 226: return SQLITE_OK;
! 227: }
! 228: /* The xDisconnect and xDestroy methods are also the same */
! 229:
! 230: /*
! 231: ** The two input rule lists are both sorted in order of increasing
! 232: ** cost. Merge them together into a single list, sorted by cost, and
! 233: ** return a pointer to the head of that list.
! 234: */
! 235: static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){
! 236: fuzzer_rule head;
! 237: fuzzer_rule *pTail;
! 238:
! 239: pTail = &head;
! 240: while( pA && pB ){
! 241: if( pA->rCost<=pB->rCost ){
! 242: pTail->pNext = pA;
! 243: pTail = pA;
! 244: pA = pA->pNext;
! 245: }else{
! 246: pTail->pNext = pB;
! 247: pTail = pB;
! 248: pB = pB->pNext;
! 249: }
! 250: }
! 251: if( pA==0 ){
! 252: pTail->pNext = pB;
! 253: }else{
! 254: pTail->pNext = pA;
! 255: }
! 256: return head.pNext;
! 257: }
! 258:
! 259:
! 260: /*
! 261: ** Open a new fuzzer cursor.
! 262: */
! 263: static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
! 264: fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
! 265: fuzzer_cursor *pCur;
! 266: pCur = sqlite3_malloc( sizeof(*pCur) );
! 267: if( pCur==0 ) return SQLITE_NOMEM;
! 268: memset(pCur, 0, sizeof(*pCur));
! 269: pCur->pVtab = p;
! 270: *ppCursor = &pCur->base;
! 271: if( p->nCursor==0 && p->pNewRule ){
! 272: unsigned int i;
! 273: fuzzer_rule *pX;
! 274: fuzzer_rule *a[15];
! 275: for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0;
! 276: while( (pX = p->pNewRule)!=0 ){
! 277: p->pNewRule = pX->pNext;
! 278: pX->pNext = 0;
! 279: for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){
! 280: pX = fuzzerMergeRules(a[i], pX);
! 281: a[i] = 0;
! 282: }
! 283: a[i] = fuzzerMergeRules(a[i], pX);
! 284: }
! 285: for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){
! 286: pX = fuzzerMergeRules(a[i], pX);
! 287: }
! 288: p->pRule = fuzzerMergeRules(p->pRule, pX);
! 289: }
! 290: p->nCursor++;
! 291: return SQLITE_OK;
! 292: }
! 293:
! 294: /*
! 295: ** Free all stems in a list.
! 296: */
! 297: static void fuzzerClearStemList(fuzzer_stem *pStem){
! 298: while( pStem ){
! 299: fuzzer_stem *pNext = pStem->pNext;
! 300: sqlite3_free(pStem);
! 301: pStem = pNext;
! 302: }
! 303: }
! 304:
! 305: /*
! 306: ** Free up all the memory allocated by a cursor. Set it rLimit to 0
! 307: ** to indicate that it is at EOF.
! 308: */
! 309: static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
! 310: int i;
! 311: fuzzerClearStemList(pCur->pStem);
! 312: fuzzerClearStemList(pCur->pDone);
! 313: for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
! 314: pCur->rLimit = (fuzzer_cost)0;
! 315: if( clearHash && pCur->nStem ){
! 316: pCur->mxQueue = 0;
! 317: pCur->pStem = 0;
! 318: pCur->pDone = 0;
! 319: memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
! 320: memset(pCur->apHash, 0, sizeof(pCur->apHash));
! 321: }
! 322: pCur->nStem = 0;
! 323: }
! 324:
! 325: /*
! 326: ** Close a fuzzer cursor.
! 327: */
! 328: static int fuzzerClose(sqlite3_vtab_cursor *cur){
! 329: fuzzer_cursor *pCur = (fuzzer_cursor *)cur;
! 330: fuzzerClearCursor(pCur, 0);
! 331: sqlite3_free(pCur->zBuf);
! 332: pCur->pVtab->nCursor--;
! 333: sqlite3_free(pCur);
! 334: return SQLITE_OK;
! 335: }
! 336:
! 337: /*
! 338: ** Compute the current output term for a fuzzer_stem.
! 339: */
! 340: static int fuzzerRender(
! 341: fuzzer_stem *pStem, /* The stem to be rendered */
! 342: char **pzBuf, /* Write results into this buffer. realloc if needed */
! 343: int *pnBuf /* Size of the buffer */
! 344: ){
! 345: const fuzzer_rule *pRule = pStem->pRule;
! 346: int n;
! 347: char *z;
! 348:
! 349: n = pStem->nBasis + pRule->nTo - pRule->nFrom;
! 350: if( (*pnBuf)<n+1 ){
! 351: (*pzBuf) = sqlite3_realloc((*pzBuf), n+100);
! 352: if( (*pzBuf)==0 ) return SQLITE_NOMEM;
! 353: (*pnBuf) = n+100;
! 354: }
! 355: n = pStem->n;
! 356: z = *pzBuf;
! 357: if( n<0 ){
! 358: memcpy(z, pStem->zBasis, pStem->nBasis+1);
! 359: }else{
! 360: memcpy(z, pStem->zBasis, n);
! 361: memcpy(&z[n], pRule->zTo, pRule->nTo);
! 362: memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom],
! 363: pStem->nBasis-n-pRule->nFrom+1);
! 364: }
! 365: return SQLITE_OK;
! 366: }
! 367:
! 368: /*
! 369: ** Compute a hash on zBasis.
! 370: */
! 371: static unsigned int fuzzerHash(const char *z){
! 372: unsigned int h = 0;
! 373: while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); }
! 374: return h % FUZZER_HASH;
! 375: }
! 376:
! 377: /*
! 378: ** Current cost of a stem
! 379: */
! 380: static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
! 381: return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
! 382: }
! 383:
! 384: #if 0
! 385: /*
! 386: ** Print a description of a fuzzer_stem on stderr.
! 387: */
! 388: static void fuzzerStemPrint(
! 389: const char *zPrefix,
! 390: fuzzer_stem *pStem,
! 391: const char *zSuffix
! 392: ){
! 393: if( pStem->n<0 ){
! 394: fprintf(stderr, "%s[%s](%d)-->self%s",
! 395: zPrefix,
! 396: pStem->zBasis, pStem->rBaseCost,
! 397: zSuffix
! 398: );
! 399: }else{
! 400: char *zBuf = 0;
! 401: int nBuf = 0;
! 402: if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
! 403: fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
! 404: zPrefix,
! 405: pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
! 406: zSuffix
! 407: );
! 408: sqlite3_free(zBuf);
! 409: }
! 410: }
! 411: #endif
! 412:
! 413: /*
! 414: ** Return 1 if the string to which the cursor is point has already
! 415: ** been emitted. Return 0 if not. Return -1 on a memory allocation
! 416: ** failures.
! 417: */
! 418: static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
! 419: unsigned int h;
! 420: fuzzer_stem *pLookup;
! 421:
! 422: if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
! 423: return -1;
! 424: }
! 425: h = fuzzerHash(pCur->zBuf);
! 426: pLookup = pCur->apHash[h];
! 427: while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
! 428: pLookup = pLookup->pHash;
! 429: }
! 430: return pLookup!=0;
! 431: }
! 432:
! 433: /*
! 434: ** Advance a fuzzer_stem to its next value. Return 0 if there are
! 435: ** no more values that can be generated by this fuzzer_stem. Return
! 436: ** -1 on a memory allocation failure.
! 437: */
! 438: static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
! 439: const fuzzer_rule *pRule;
! 440: while( (pRule = pStem->pRule)!=0 ){
! 441: while( pStem->n < pStem->nBasis - pRule->nFrom ){
! 442: pStem->n++;
! 443: if( pRule->nFrom==0
! 444: || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0
! 445: ){
! 446: /* Found a rewrite case. Make sure it is not a duplicate */
! 447: int rc = fuzzerSeen(pCur, pStem);
! 448: if( rc<0 ) return -1;
! 449: if( rc==0 ){
! 450: fuzzerCost(pStem);
! 451: return 1;
! 452: }
! 453: }
! 454: }
! 455: pStem->n = -1;
! 456: pStem->pRule = pRule->pNext;
! 457: if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
! 458: }
! 459: return 0;
! 460: }
! 461:
! 462: /*
! 463: ** The two input stem lists are both sorted in order of increasing
! 464: ** rCostX. Merge them together into a single list, sorted by rCostX, and
! 465: ** return a pointer to the head of that new list.
! 466: */
! 467: static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
! 468: fuzzer_stem head;
! 469: fuzzer_stem *pTail;
! 470:
! 471: pTail = &head;
! 472: while( pA && pB ){
! 473: if( pA->rCostX<=pB->rCostX ){
! 474: pTail->pNext = pA;
! 475: pTail = pA;
! 476: pA = pA->pNext;
! 477: }else{
! 478: pTail->pNext = pB;
! 479: pTail = pB;
! 480: pB = pB->pNext;
! 481: }
! 482: }
! 483: if( pA==0 ){
! 484: pTail->pNext = pB;
! 485: }else{
! 486: pTail->pNext = pA;
! 487: }
! 488: return head.pNext;
! 489: }
! 490:
! 491: /*
! 492: ** Load pCur->pStem with the lowest-cost stem. Return a pointer
! 493: ** to the lowest-cost stem.
! 494: */
! 495: static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
! 496: fuzzer_stem *pBest, *pX;
! 497: int iBest;
! 498: int i;
! 499:
! 500: if( pCur->pStem==0 ){
! 501: iBest = -1;
! 502: pBest = 0;
! 503: for(i=0; i<=pCur->mxQueue; i++){
! 504: pX = pCur->aQueue[i];
! 505: if( pX==0 ) continue;
! 506: if( pBest==0 || pBest->rCostX>pX->rCostX ){
! 507: pBest = pX;
! 508: iBest = i;
! 509: }
! 510: }
! 511: if( pBest ){
! 512: pCur->aQueue[iBest] = pBest->pNext;
! 513: pBest->pNext = 0;
! 514: pCur->pStem = pBest;
! 515: }
! 516: }
! 517: return pCur->pStem;
! 518: }
! 519:
! 520: /*
! 521: ** Insert pNew into queue of pending stems. Then find the stem
! 522: ** with the lowest rCostX and move it into pCur->pStem.
! 523: ** list. The insert is done such the pNew is in the correct order
! 524: ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
! 525: */
! 526: static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
! 527: fuzzer_stem *pX;
! 528: int i;
! 529:
! 530: /* If pCur->pStem exists and is greater than pNew, then make pNew
! 531: ** the new pCur->pStem and insert the old pCur->pStem instead.
! 532: */
! 533: if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
! 534: pNew->pNext = 0;
! 535: pCur->pStem = pNew;
! 536: pNew = pX;
! 537: }
! 538:
! 539: /* Insert the new value */
! 540: pNew->pNext = 0;
! 541: pX = pNew;
! 542: for(i=0; i<=pCur->mxQueue; i++){
! 543: if( pCur->aQueue[i] ){
! 544: pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
! 545: pCur->aQueue[i] = 0;
! 546: }else{
! 547: pCur->aQueue[i] = pX;
! 548: break;
! 549: }
! 550: }
! 551: if( i>pCur->mxQueue ){
! 552: if( i<FUZZER_NQUEUE ){
! 553: pCur->mxQueue = i;
! 554: pCur->aQueue[i] = pX;
! 555: }else{
! 556: assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
! 557: pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
! 558: pCur->aQueue[FUZZER_NQUEUE-1] = pX;
! 559: }
! 560: }
! 561:
! 562: return fuzzerLowestCostStem(pCur);
! 563: }
! 564:
! 565: /*
! 566: ** Allocate a new fuzzer_stem. Add it to the hash table but do not
! 567: ** link it into either the pCur->pStem or pCur->pDone lists.
! 568: */
! 569: static fuzzer_stem *fuzzerNewStem(
! 570: fuzzer_cursor *pCur,
! 571: const char *zWord,
! 572: fuzzer_cost rBaseCost
! 573: ){
! 574: fuzzer_stem *pNew;
! 575: unsigned int h;
! 576:
! 577: pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 );
! 578: if( pNew==0 ) return 0;
! 579: memset(pNew, 0, sizeof(*pNew));
! 580: pNew->zBasis = (char*)&pNew[1];
! 581: pNew->nBasis = strlen(zWord);
! 582: memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
! 583: pNew->pRule = pCur->pVtab->pRule;
! 584: pNew->n = -1;
! 585: pNew->rBaseCost = pNew->rCostX = rBaseCost;
! 586: h = fuzzerHash(pNew->zBasis);
! 587: pNew->pHash = pCur->apHash[h];
! 588: pCur->apHash[h] = pNew;
! 589: pCur->nStem++;
! 590: return pNew;
! 591: }
! 592:
! 593:
! 594: /*
! 595: ** Advance a cursor to its next row of output
! 596: */
! 597: static int fuzzerNext(sqlite3_vtab_cursor *cur){
! 598: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
! 599: int rc;
! 600: fuzzer_stem *pStem, *pNew;
! 601:
! 602: pCur->iRowid++;
! 603:
! 604: /* Use the element the cursor is currently point to to create
! 605: ** a new stem and insert the new stem into the priority queue.
! 606: */
! 607: pStem = pCur->pStem;
! 608: if( pStem->rCostX>0 ){
! 609: rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
! 610: if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
! 611: pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
! 612: if( pNew ){
! 613: if( fuzzerAdvance(pCur, pNew)==0 ){
! 614: pNew->pNext = pCur->pDone;
! 615: pCur->pDone = pNew;
! 616: }else{
! 617: if( fuzzerInsert(pCur, pNew)==pNew ){
! 618: return SQLITE_OK;
! 619: }
! 620: }
! 621: }else{
! 622: return SQLITE_NOMEM;
! 623: }
! 624: }
! 625:
! 626: /* Adjust the priority queue so that the first element of the
! 627: ** stem list is the next lowest cost word.
! 628: */
! 629: while( (pStem = pCur->pStem)!=0 ){
! 630: if( fuzzerAdvance(pCur, pStem) ){
! 631: pCur->pStem = 0;
! 632: pStem = fuzzerInsert(pCur, pStem);
! 633: if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
! 634: if( rc<0 ) return SQLITE_NOMEM;
! 635: continue;
! 636: }
! 637: return SQLITE_OK; /* New word found */
! 638: }
! 639: pCur->pStem = 0;
! 640: pStem->pNext = pCur->pDone;
! 641: pCur->pDone = pStem;
! 642: if( fuzzerLowestCostStem(pCur) ){
! 643: rc = fuzzerSeen(pCur, pCur->pStem);
! 644: if( rc<0 ) return SQLITE_NOMEM;
! 645: if( rc==0 ){
! 646: return SQLITE_OK;
! 647: }
! 648: }
! 649: }
! 650:
! 651: /* Reach this point only if queue has been exhausted and there is
! 652: ** nothing left to be output. */
! 653: pCur->rLimit = (fuzzer_cost)0;
! 654: return SQLITE_OK;
! 655: }
! 656:
! 657: /*
! 658: ** Called to "rewind" a cursor back to the beginning so that
! 659: ** it starts its output over again. Always called at least once
! 660: ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
! 661: */
! 662: static int fuzzerFilter(
! 663: sqlite3_vtab_cursor *pVtabCursor,
! 664: int idxNum, const char *idxStr,
! 665: int argc, sqlite3_value **argv
! 666: ){
! 667: fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
! 668: const char *zWord = 0;
! 669: fuzzer_stem *pStem;
! 670:
! 671: fuzzerClearCursor(pCur, 1);
! 672: pCur->rLimit = 2147483647;
! 673: if( idxNum==1 ){
! 674: zWord = (const char*)sqlite3_value_text(argv[0]);
! 675: }else if( idxNum==2 ){
! 676: pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
! 677: }else if( idxNum==3 ){
! 678: zWord = (const char*)sqlite3_value_text(argv[0]);
! 679: pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
! 680: }
! 681: if( zWord==0 ) zWord = "";
! 682: pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
! 683: if( pStem==0 ) return SQLITE_NOMEM;
! 684: pCur->nullRule.pNext = pCur->pVtab->pRule;
! 685: pCur->nullRule.rCost = 0;
! 686: pCur->nullRule.nFrom = 0;
! 687: pCur->nullRule.nTo = 0;
! 688: pCur->nullRule.zFrom = "";
! 689: pStem->pRule = &pCur->nullRule;
! 690: pStem->n = pStem->nBasis;
! 691: pCur->iRowid = 1;
! 692: return SQLITE_OK;
! 693: }
! 694:
! 695: /*
! 696: ** Only the word and distance columns have values. All other columns
! 697: ** return NULL
! 698: */
! 699: static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
! 700: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
! 701: if( i==0 ){
! 702: /* the "word" column */
! 703: if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
! 704: return SQLITE_NOMEM;
! 705: }
! 706: sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
! 707: }else if( i==1 ){
! 708: /* the "distance" column */
! 709: sqlite3_result_int(ctx, pCur->pStem->rCostX);
! 710: }else{
! 711: /* All other columns are NULL */
! 712: sqlite3_result_null(ctx);
! 713: }
! 714: return SQLITE_OK;
! 715: }
! 716:
! 717: /*
! 718: ** The rowid.
! 719: */
! 720: static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
! 721: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
! 722: *pRowid = pCur->iRowid;
! 723: return SQLITE_OK;
! 724: }
! 725:
! 726: /*
! 727: ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
! 728: ** that the cursor has nothing more to output.
! 729: */
! 730: static int fuzzerEof(sqlite3_vtab_cursor *cur){
! 731: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
! 732: return pCur->rLimit<=(fuzzer_cost)0;
! 733: }
! 734:
! 735: /*
! 736: ** Search for terms of these forms:
! 737: **
! 738: ** word MATCH $str
! 739: ** distance < $value
! 740: ** distance <= $value
! 741: **
! 742: ** The distance< and distance<= are both treated as distance<=.
! 743: ** The query plan number is as follows:
! 744: **
! 745: ** 0: None of the terms above are found
! 746: ** 1: There is a "word MATCH" term with $str in filter.argv[0].
! 747: ** 2: There is a "distance<" term with $value in filter.argv[0].
! 748: ** 3: Both "word MATCH" and "distance<" with $str in argv[0] and
! 749: ** $value in argv[1].
! 750: */
! 751: static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
! 752: int iPlan = 0;
! 753: int iDistTerm = -1;
! 754: int i;
! 755: const struct sqlite3_index_constraint *pConstraint;
! 756: pConstraint = pIdxInfo->aConstraint;
! 757: for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
! 758: if( pConstraint->usable==0 ) continue;
! 759: if( (iPlan & 1)==0
! 760: && pConstraint->iColumn==0
! 761: && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
! 762: ){
! 763: iPlan |= 1;
! 764: pIdxInfo->aConstraintUsage[i].argvIndex = 1;
! 765: pIdxInfo->aConstraintUsage[i].omit = 1;
! 766: }
! 767: if( (iPlan & 2)==0
! 768: && pConstraint->iColumn==1
! 769: && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
! 770: || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
! 771: ){
! 772: iPlan |= 2;
! 773: iDistTerm = i;
! 774: }
! 775: }
! 776: if( iPlan==2 ){
! 777: pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
! 778: }else if( iPlan==3 ){
! 779: pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
! 780: }
! 781: pIdxInfo->idxNum = iPlan;
! 782: if( pIdxInfo->nOrderBy==1
! 783: && pIdxInfo->aOrderBy[0].iColumn==1
! 784: && pIdxInfo->aOrderBy[0].desc==0
! 785: ){
! 786: pIdxInfo->orderByConsumed = 1;
! 787: }
! 788: pIdxInfo->estimatedCost = (double)10000;
! 789:
! 790: return SQLITE_OK;
! 791: }
! 792:
! 793: /*
! 794: ** Disallow all attempts to DELETE or UPDATE. Only INSERTs are allowed.
! 795: **
! 796: ** On an insert, the cFrom, cTo, and cost columns are used to construct
! 797: ** a new rule. All other columns are ignored. The rule is ignored
! 798: ** if cFrom and cTo are identical. A NULL value for cFrom or cTo is
! 799: ** interpreted as an empty string. The cost must be positive.
! 800: */
! 801: static int fuzzerUpdate(
! 802: sqlite3_vtab *pVTab,
! 803: int argc,
! 804: sqlite3_value **argv,
! 805: sqlite_int64 *pRowid
! 806: ){
! 807: fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
! 808: fuzzer_rule *pRule;
! 809: const char *zFrom;
! 810: int nFrom;
! 811: const char *zTo;
! 812: int nTo;
! 813: fuzzer_cost rCost;
! 814: if( argc!=7 ){
! 815: sqlite3_free(pVTab->zErrMsg);
! 816: pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
! 817: p->zClassName);
! 818: return SQLITE_CONSTRAINT;
! 819: }
! 820: if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
! 821: sqlite3_free(pVTab->zErrMsg);
! 822: pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table",
! 823: p->zClassName);
! 824: return SQLITE_CONSTRAINT;
! 825: }
! 826: zFrom = (char*)sqlite3_value_text(argv[4]);
! 827: if( zFrom==0 ) zFrom = "";
! 828: zTo = (char*)sqlite3_value_text(argv[5]);
! 829: if( zTo==0 ) zTo = "";
! 830: if( strcmp(zFrom,zTo)==0 ){
! 831: /* Silently ignore null transformations */
! 832: return SQLITE_OK;
! 833: }
! 834: rCost = sqlite3_value_int(argv[6]);
! 835: if( rCost<=0 ){
! 836: sqlite3_free(pVTab->zErrMsg);
! 837: pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
! 838: return SQLITE_CONSTRAINT;
! 839: }
! 840: nFrom = strlen(zFrom);
! 841: nTo = strlen(zTo);
! 842: pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
! 843: if( pRule==0 ){
! 844: return SQLITE_NOMEM;
! 845: }
! 846: pRule->zFrom = &pRule->zTo[nTo+1];
! 847: pRule->nFrom = nFrom;
! 848: memcpy(pRule->zFrom, zFrom, nFrom+1);
! 849: memcpy(pRule->zTo, zTo, nTo+1);
! 850: pRule->nTo = nTo;
! 851: pRule->rCost = rCost;
! 852: pRule->pNext = p->pNewRule;
! 853: p->pNewRule = pRule;
! 854: return SQLITE_OK;
! 855: }
! 856:
! 857: /*
! 858: ** A virtual table module that provides read-only access to a
! 859: ** Tcl global variable namespace.
! 860: */
! 861: static sqlite3_module fuzzerModule = {
! 862: 0, /* iVersion */
! 863: fuzzerConnect,
! 864: fuzzerConnect,
! 865: fuzzerBestIndex,
! 866: fuzzerDisconnect,
! 867: fuzzerDisconnect,
! 868: fuzzerOpen, /* xOpen - open a cursor */
! 869: fuzzerClose, /* xClose - close a cursor */
! 870: fuzzerFilter, /* xFilter - configure scan constraints */
! 871: fuzzerNext, /* xNext - advance a cursor */
! 872: fuzzerEof, /* xEof - check for end of scan */
! 873: fuzzerColumn, /* xColumn - read data */
! 874: fuzzerRowid, /* xRowid - read data */
! 875: fuzzerUpdate, /* xUpdate - INSERT */
! 876: 0, /* xBegin */
! 877: 0, /* xSync */
! 878: 0, /* xCommit */
! 879: 0, /* xRollback */
! 880: 0, /* xFindMethod */
! 881: 0, /* xRename */
! 882: };
! 883:
! 884: #endif /* SQLITE_OMIT_VIRTUALTABLE */
! 885:
! 886:
! 887: /*
! 888: ** Register the fuzzer virtual table
! 889: */
! 890: int fuzzer_register(sqlite3 *db){
! 891: int rc = SQLITE_OK;
! 892: #ifndef SQLITE_OMIT_VIRTUALTABLE
! 893: rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0);
! 894: #endif
! 895: return rc;
! 896: }
! 897:
! 898: #ifdef SQLITE_TEST
! 899: #include <tcl.h>
! 900: /*
! 901: ** Decode a pointer to an sqlite3 object.
! 902: */
! 903: extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb);
! 904:
! 905: /*
! 906: ** Register the echo virtual table module.
! 907: */
! 908: static int register_fuzzer_module(
! 909: ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
! 910: Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
! 911: int objc, /* Number of arguments */
! 912: Tcl_Obj *CONST objv[] /* Command arguments */
! 913: ){
! 914: sqlite3 *db;
! 915: if( objc!=2 ){
! 916: Tcl_WrongNumArgs(interp, 1, objv, "DB");
! 917: return TCL_ERROR;
! 918: }
! 919: if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
! 920: fuzzer_register(db);
! 921: return TCL_OK;
! 922: }
! 923:
! 924:
! 925: /*
! 926: ** Register commands with the TCL interpreter.
! 927: */
! 928: int Sqlitetestfuzzer_Init(Tcl_Interp *interp){
! 929: static struct {
! 930: char *zName;
! 931: Tcl_ObjCmdProc *xProc;
! 932: void *clientData;
! 933: } aObjCmd[] = {
! 934: { "register_fuzzer_module", register_fuzzer_module, 0 },
! 935: };
! 936: int i;
! 937: for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
! 938: Tcl_CreateObjCommand(interp, aObjCmd[i].zName,
! 939: aObjCmd[i].xProc, aObjCmd[i].clientData, 0);
! 940: }
! 941: return TCL_OK;
! 942: }
! 943:
! 944: #endif /* SQLITE_TEST */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>