Annotation of embedaddon/sqlite3/src/test_fuzzer.c, revision 1.1.1.1
1.1 misho 1: /*
2: ** 2011 March 24
3: **
4: ** The author disclaims copyright to this source code. In place of
5: ** a legal notice, here is a blessing:
6: **
7: ** May you do good and not evil.
8: ** May you find forgiveness for yourself and forgive others.
9: ** May you share freely, never taking more than you give.
10: **
11: *************************************************************************
12: **
13: ** Code for demonstartion virtual table that generates variations
14: ** on an input word at increasing edit distances from the original.
15: **
16: ** A fuzzer virtual table is created like this:
17: **
18: ** CREATE VIRTUAL TABLE temp.f USING fuzzer;
19: **
20: ** The name of the new virtual table in the example above is "f".
21: ** Note that all fuzzer virtual tables must be TEMP tables. The
22: ** "temp." prefix in front of the table name is required when the
23: ** table is being created. The "temp." prefix can be omitted when
24: ** using the table as long as the name is unambiguous.
25: **
26: ** Before being used, the fuzzer needs to be programmed by giving it
27: ** character transformations and a cost associated with each transformation.
28: ** Examples:
29: **
30: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100);
31: **
32: ** The above statement says that the cost of inserting a letter 'a' is
33: ** 100. (All costs are integers. We recommend that costs be scaled so
34: ** that the average cost is around 100.)
35: **
36: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87);
37: **
38: ** The above statement says that the cost of deleting a single letter
39: ** 'b' is 87.
40: **
41: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38);
42: ** INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40);
43: **
44: ** This third example says that the cost of transforming the single
45: ** letter "o" into the two-letter sequence "oe" is 38 and that the
46: ** cost of transforming "oe" back into "o" is 40.
47: **
48: ** After all the transformation costs have been set, the fuzzer table
49: ** can be queried as follows:
50: **
51: ** SELECT word, distance FROM f
52: ** WHERE word MATCH 'abcdefg'
53: ** AND distance<200;
54: **
55: ** This first query outputs the string "abcdefg" and all strings that
56: ** can be derived from that string by appling the specified transformations.
57: ** The strings are output together with their total transformation cost
58: ** (called "distance") and appear in order of increasing cost. No string
59: ** is output more than once. If there are multiple ways to transform the
60: ** target string into the output string then the lowest cost transform is
61: ** the one that is returned. In the example, the search is limited to
62: ** strings with a total distance of less than 200.
63: **
64: ** It is important to put some kind of a limit on the fuzzer output. This
65: ** can be either in the form of a LIMIT clause at the end of the query,
66: ** or better, a "distance<NNN" constraint where NNN is some number. The
67: ** running time and memory requirement is exponential in the value of NNN
68: ** so you want to make sure that NNN is not too big. A value of NNN that
69: ** is about twice the average transformation cost seems to give good results.
70: **
71: ** The fuzzer table can be useful for tasks such as spelling correction.
72: ** Suppose there is a second table vocabulary(w) where the w column contains
73: ** all correctly spelled words. Let $word be a word you want to look up.
74: **
75: ** SELECT vocabulary.w FROM f, vocabulary
76: ** WHERE f.word MATCH $word
77: ** AND f.distance<=200
78: ** AND f.word=vocabulary.w
79: ** LIMIT 20
80: **
81: ** The query above gives the 20 closest words to the $word being tested.
82: ** (Note that for good performance, the vocubulary.w column should be
83: ** indexed.)
84: **
85: ** A similar query can be used to find all words in the dictionary that
86: ** begin with some prefix $prefix:
87: **
88: ** SELECT vocabulary.w FROM f, vocabulary
89: ** WHERE f.word MATCH $prefix
90: ** AND f.distance<=200
91: ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
92: ** LIMIT 50
93: **
94: ** This last query will show up to 50 words out of the vocabulary that
95: ** match or nearly match the $prefix.
96: */
97: #include "sqlite3.h"
98: #include <stdlib.h>
99: #include <string.h>
100: #include <assert.h>
101: #include <stdio.h>
102:
103: #ifndef SQLITE_OMIT_VIRTUALTABLE
104:
105: /*
106: ** Forward declaration of objects used by this implementation
107: */
108: typedef struct fuzzer_vtab fuzzer_vtab;
109: typedef struct fuzzer_cursor fuzzer_cursor;
110: typedef struct fuzzer_rule fuzzer_rule;
111: typedef struct fuzzer_seen fuzzer_seen;
112: typedef struct fuzzer_stem fuzzer_stem;
113:
114: /*
115: ** Type of the "cost" of an edit operation. Might be changed to
116: ** "float" or "double" or "sqlite3_int64" in the future.
117: */
118: typedef int fuzzer_cost;
119:
120:
121: /*
122: ** Each transformation rule is stored as an instance of this object.
123: ** All rules are kept on a linked list sorted by rCost.
124: */
125: struct fuzzer_rule {
126: fuzzer_rule *pNext; /* Next rule in order of increasing rCost */
127: fuzzer_cost rCost; /* Cost of this transformation */
128: int nFrom, nTo; /* Length of the zFrom and zTo strings */
129: char *zFrom; /* Transform from */
130: char zTo[4]; /* Transform to (extra space appended) */
131: };
132:
133: /*
134: ** A stem object is used to generate variants. It is also used to record
135: ** previously generated outputs.
136: **
137: ** Every stem is added to a hash table as it is output. Generation of
138: ** duplicate stems is suppressed.
139: **
140: ** Active stems (those that might generate new outputs) are kepts on a linked
141: ** list sorted by increasing cost. The cost is the sum of rBaseCost and
142: ** pRule->rCost.
143: */
144: struct fuzzer_stem {
145: char *zBasis; /* Word being fuzzed */
146: int nBasis; /* Length of the zBasis string */
147: const fuzzer_rule *pRule; /* Current rule to apply */
148: int n; /* Apply pRule at this character offset */
149: fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */
150: fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */
151: fuzzer_stem *pNext; /* Next stem in rCost order */
152: fuzzer_stem *pHash; /* Next stem with same hash on zBasis */
153: };
154:
155: /*
156: ** A fuzzer virtual-table object
157: */
158: struct fuzzer_vtab {
159: sqlite3_vtab base; /* Base class - must be first */
160: char *zClassName; /* Name of this class. Default: "fuzzer" */
161: fuzzer_rule *pRule; /* All active rules in this fuzzer */
162: fuzzer_rule *pNewRule; /* New rules to add when last cursor expires */
163: int nCursor; /* Number of active cursors */
164: };
165:
166: #define FUZZER_HASH 4001 /* Hash table size */
167: #define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */
168:
169: /* A fuzzer cursor object */
170: struct fuzzer_cursor {
171: sqlite3_vtab_cursor base; /* Base class - must be first */
172: sqlite3_int64 iRowid; /* The rowid of the current word */
173: fuzzer_vtab *pVtab; /* The virtual table this cursor belongs to */
174: fuzzer_cost rLimit; /* Maximum cost of any term */
175: fuzzer_stem *pStem; /* Stem with smallest rCostX */
176: fuzzer_stem *pDone; /* Stems already processed to completion */
177: fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */
178: int mxQueue; /* Largest used index in aQueue[] */
179: char *zBuf; /* Temporary use buffer */
180: int nBuf; /* Bytes allocated for zBuf */
181: int nStem; /* Number of stems allocated */
182: fuzzer_rule nullRule; /* Null rule used first */
183: fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */
184: };
185:
186: /* Methods for the fuzzer module */
187: static int fuzzerConnect(
188: sqlite3 *db,
189: void *pAux,
190: int argc, const char *const*argv,
191: sqlite3_vtab **ppVtab,
192: char **pzErr
193: ){
194: fuzzer_vtab *pNew;
195: int n;
196: if( strcmp(argv[1],"temp")!=0 ){
197: *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]);
198: return SQLITE_ERROR;
199: }
200: n = strlen(argv[0]) + 1;
201: pNew = sqlite3_malloc( sizeof(*pNew) + n );
202: if( pNew==0 ) return SQLITE_NOMEM;
203: pNew->zClassName = (char*)&pNew[1];
204: memcpy(pNew->zClassName, argv[0], n);
205: sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)");
206: memset(pNew, 0, sizeof(*pNew));
207: *ppVtab = &pNew->base;
208: return SQLITE_OK;
209: }
210: /* Note that for this virtual table, the xCreate and xConnect
211: ** methods are identical. */
212:
213: static int fuzzerDisconnect(sqlite3_vtab *pVtab){
214: fuzzer_vtab *p = (fuzzer_vtab*)pVtab;
215: assert( p->nCursor==0 );
216: do{
217: while( p->pRule ){
218: fuzzer_rule *pRule = p->pRule;
219: p->pRule = pRule->pNext;
220: sqlite3_free(pRule);
221: }
222: p->pRule = p->pNewRule;
223: p->pNewRule = 0;
224: }while( p->pRule );
225: sqlite3_free(p);
226: return SQLITE_OK;
227: }
228: /* The xDisconnect and xDestroy methods are also the same */
229:
230: /*
231: ** The two input rule lists are both sorted in order of increasing
232: ** cost. Merge them together into a single list, sorted by cost, and
233: ** return a pointer to the head of that list.
234: */
235: static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){
236: fuzzer_rule head;
237: fuzzer_rule *pTail;
238:
239: pTail = &head;
240: while( pA && pB ){
241: if( pA->rCost<=pB->rCost ){
242: pTail->pNext = pA;
243: pTail = pA;
244: pA = pA->pNext;
245: }else{
246: pTail->pNext = pB;
247: pTail = pB;
248: pB = pB->pNext;
249: }
250: }
251: if( pA==0 ){
252: pTail->pNext = pB;
253: }else{
254: pTail->pNext = pA;
255: }
256: return head.pNext;
257: }
258:
259:
260: /*
261: ** Open a new fuzzer cursor.
262: */
263: static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
264: fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
265: fuzzer_cursor *pCur;
266: pCur = sqlite3_malloc( sizeof(*pCur) );
267: if( pCur==0 ) return SQLITE_NOMEM;
268: memset(pCur, 0, sizeof(*pCur));
269: pCur->pVtab = p;
270: *ppCursor = &pCur->base;
271: if( p->nCursor==0 && p->pNewRule ){
272: unsigned int i;
273: fuzzer_rule *pX;
274: fuzzer_rule *a[15];
275: for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0;
276: while( (pX = p->pNewRule)!=0 ){
277: p->pNewRule = pX->pNext;
278: pX->pNext = 0;
279: for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){
280: pX = fuzzerMergeRules(a[i], pX);
281: a[i] = 0;
282: }
283: a[i] = fuzzerMergeRules(a[i], pX);
284: }
285: for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){
286: pX = fuzzerMergeRules(a[i], pX);
287: }
288: p->pRule = fuzzerMergeRules(p->pRule, pX);
289: }
290: p->nCursor++;
291: return SQLITE_OK;
292: }
293:
294: /*
295: ** Free all stems in a list.
296: */
297: static void fuzzerClearStemList(fuzzer_stem *pStem){
298: while( pStem ){
299: fuzzer_stem *pNext = pStem->pNext;
300: sqlite3_free(pStem);
301: pStem = pNext;
302: }
303: }
304:
305: /*
306: ** Free up all the memory allocated by a cursor. Set it rLimit to 0
307: ** to indicate that it is at EOF.
308: */
309: static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){
310: int i;
311: fuzzerClearStemList(pCur->pStem);
312: fuzzerClearStemList(pCur->pDone);
313: for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]);
314: pCur->rLimit = (fuzzer_cost)0;
315: if( clearHash && pCur->nStem ){
316: pCur->mxQueue = 0;
317: pCur->pStem = 0;
318: pCur->pDone = 0;
319: memset(pCur->aQueue, 0, sizeof(pCur->aQueue));
320: memset(pCur->apHash, 0, sizeof(pCur->apHash));
321: }
322: pCur->nStem = 0;
323: }
324:
325: /*
326: ** Close a fuzzer cursor.
327: */
328: static int fuzzerClose(sqlite3_vtab_cursor *cur){
329: fuzzer_cursor *pCur = (fuzzer_cursor *)cur;
330: fuzzerClearCursor(pCur, 0);
331: sqlite3_free(pCur->zBuf);
332: pCur->pVtab->nCursor--;
333: sqlite3_free(pCur);
334: return SQLITE_OK;
335: }
336:
337: /*
338: ** Compute the current output term for a fuzzer_stem.
339: */
340: static int fuzzerRender(
341: fuzzer_stem *pStem, /* The stem to be rendered */
342: char **pzBuf, /* Write results into this buffer. realloc if needed */
343: int *pnBuf /* Size of the buffer */
344: ){
345: const fuzzer_rule *pRule = pStem->pRule;
346: int n;
347: char *z;
348:
349: n = pStem->nBasis + pRule->nTo - pRule->nFrom;
350: if( (*pnBuf)<n+1 ){
351: (*pzBuf) = sqlite3_realloc((*pzBuf), n+100);
352: if( (*pzBuf)==0 ) return SQLITE_NOMEM;
353: (*pnBuf) = n+100;
354: }
355: n = pStem->n;
356: z = *pzBuf;
357: if( n<0 ){
358: memcpy(z, pStem->zBasis, pStem->nBasis+1);
359: }else{
360: memcpy(z, pStem->zBasis, n);
361: memcpy(&z[n], pRule->zTo, pRule->nTo);
362: memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom],
363: pStem->nBasis-n-pRule->nFrom+1);
364: }
365: return SQLITE_OK;
366: }
367:
368: /*
369: ** Compute a hash on zBasis.
370: */
371: static unsigned int fuzzerHash(const char *z){
372: unsigned int h = 0;
373: while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); }
374: return h % FUZZER_HASH;
375: }
376:
377: /*
378: ** Current cost of a stem
379: */
380: static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){
381: return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost;
382: }
383:
384: #if 0
385: /*
386: ** Print a description of a fuzzer_stem on stderr.
387: */
388: static void fuzzerStemPrint(
389: const char *zPrefix,
390: fuzzer_stem *pStem,
391: const char *zSuffix
392: ){
393: if( pStem->n<0 ){
394: fprintf(stderr, "%s[%s](%d)-->self%s",
395: zPrefix,
396: pStem->zBasis, pStem->rBaseCost,
397: zSuffix
398: );
399: }else{
400: char *zBuf = 0;
401: int nBuf = 0;
402: if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return;
403: fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s",
404: zPrefix,
405: pStem->zBasis, pStem->rBaseCost, zBuf, pStem->,
406: zSuffix
407: );
408: sqlite3_free(zBuf);
409: }
410: }
411: #endif
412:
413: /*
414: ** Return 1 if the string to which the cursor is point has already
415: ** been emitted. Return 0 if not. Return -1 on a memory allocation
416: ** failures.
417: */
418: static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){
419: unsigned int h;
420: fuzzer_stem *pLookup;
421:
422: if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
423: return -1;
424: }
425: h = fuzzerHash(pCur->zBuf);
426: pLookup = pCur->apHash[h];
427: while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){
428: pLookup = pLookup->pHash;
429: }
430: return pLookup!=0;
431: }
432:
433: /*
434: ** Advance a fuzzer_stem to its next value. Return 0 if there are
435: ** no more values that can be generated by this fuzzer_stem. Return
436: ** -1 on a memory allocation failure.
437: */
438: static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){
439: const fuzzer_rule *pRule;
440: while( (pRule = pStem->pRule)!=0 ){
441: while( pStem->n < pStem->nBasis - pRule->nFrom ){
442: pStem->n++;
443: if( pRule->nFrom==0
444: || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0
445: ){
446: /* Found a rewrite case. Make sure it is not a duplicate */
447: int rc = fuzzerSeen(pCur, pStem);
448: if( rc<0 ) return -1;
449: if( rc==0 ){
450: fuzzerCost(pStem);
451: return 1;
452: }
453: }
454: }
455: pStem->n = -1;
456: pStem->pRule = pRule->pNext;
457: if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0;
458: }
459: return 0;
460: }
461:
462: /*
463: ** The two input stem lists are both sorted in order of increasing
464: ** rCostX. Merge them together into a single list, sorted by rCostX, and
465: ** return a pointer to the head of that new list.
466: */
467: static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){
468: fuzzer_stem head;
469: fuzzer_stem *pTail;
470:
471: pTail = &head;
472: while( pA && pB ){
473: if( pA->rCostX<=pB->rCostX ){
474: pTail->pNext = pA;
475: pTail = pA;
476: pA = pA->pNext;
477: }else{
478: pTail->pNext = pB;
479: pTail = pB;
480: pB = pB->pNext;
481: }
482: }
483: if( pA==0 ){
484: pTail->pNext = pB;
485: }else{
486: pTail->pNext = pA;
487: }
488: return head.pNext;
489: }
490:
491: /*
492: ** Load pCur->pStem with the lowest-cost stem. Return a pointer
493: ** to the lowest-cost stem.
494: */
495: static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){
496: fuzzer_stem *pBest, *pX;
497: int iBest;
498: int i;
499:
500: if( pCur->pStem==0 ){
501: iBest = -1;
502: pBest = 0;
503: for(i=0; i<=pCur->mxQueue; i++){
504: pX = pCur->aQueue[i];
505: if( pX==0 ) continue;
506: if( pBest==0 || pBest->rCostX>pX->rCostX ){
507: pBest = pX;
508: iBest = i;
509: }
510: }
511: if( pBest ){
512: pCur->aQueue[iBest] = pBest->pNext;
513: pBest->pNext = 0;
514: pCur->pStem = pBest;
515: }
516: }
517: return pCur->pStem;
518: }
519:
520: /*
521: ** Insert pNew into queue of pending stems. Then find the stem
522: ** with the lowest rCostX and move it into pCur->pStem.
523: ** list. The insert is done such the pNew is in the correct order
524: ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
525: */
526: static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){
527: fuzzer_stem *pX;
528: int i;
529:
530: /* If pCur->pStem exists and is greater than pNew, then make pNew
531: ** the new pCur->pStem and insert the old pCur->pStem instead.
532: */
533: if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){
534: pNew->pNext = 0;
535: pCur->pStem = pNew;
536: pNew = pX;
537: }
538:
539: /* Insert the new value */
540: pNew->pNext = 0;
541: pX = pNew;
542: for(i=0; i<=pCur->mxQueue; i++){
543: if( pCur->aQueue[i] ){
544: pX = fuzzerMergeStems(pX, pCur->aQueue[i]);
545: pCur->aQueue[i] = 0;
546: }else{
547: pCur->aQueue[i] = pX;
548: break;
549: }
550: }
551: if( i>pCur->mxQueue ){
552: if( i<FUZZER_NQUEUE ){
553: pCur->mxQueue = i;
554: pCur->aQueue[i] = pX;
555: }else{
556: assert( pCur->mxQueue==FUZZER_NQUEUE-1 );
557: pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]);
558: pCur->aQueue[FUZZER_NQUEUE-1] = pX;
559: }
560: }
561:
562: return fuzzerLowestCostStem(pCur);
563: }
564:
565: /*
566: ** Allocate a new fuzzer_stem. Add it to the hash table but do not
567: ** link it into either the pCur->pStem or pCur->pDone lists.
568: */
569: static fuzzer_stem *fuzzerNewStem(
570: fuzzer_cursor *pCur,
571: const char *zWord,
572: fuzzer_cost rBaseCost
573: ){
574: fuzzer_stem *pNew;
575: unsigned int h;
576:
577: pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 );
578: if( pNew==0 ) return 0;
579: memset(pNew, 0, sizeof(*pNew));
580: pNew->zBasis = (char*)&pNew[1];
581: pNew->nBasis = strlen(zWord);
582: memcpy(pNew->zBasis, zWord, pNew->nBasis+1);
583: pNew->pRule = pCur->pVtab->pRule;
584: pNew->n = -1;
585: pNew->rBaseCost = pNew->rCostX = rBaseCost;
586: h = fuzzerHash(pNew->zBasis);
587: pNew->pHash = pCur->apHash[h];
588: pCur->apHash[h] = pNew;
589: pCur->nStem++;
590: return pNew;
591: }
592:
593:
594: /*
595: ** Advance a cursor to its next row of output
596: */
597: static int fuzzerNext(sqlite3_vtab_cursor *cur){
598: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
599: int rc;
600: fuzzer_stem *pStem, *pNew;
601:
602: pCur->iRowid++;
603:
604: /* Use the element the cursor is currently point to to create
605: ** a new stem and insert the new stem into the priority queue.
606: */
607: pStem = pCur->pStem;
608: if( pStem->rCostX>0 ){
609: rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf);
610: if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM;
611: pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX);
612: if( pNew ){
613: if( fuzzerAdvance(pCur, pNew)==0 ){
614: pNew->pNext = pCur->pDone;
615: pCur->pDone = pNew;
616: }else{
617: if( fuzzerInsert(pCur, pNew)==pNew ){
618: return SQLITE_OK;
619: }
620: }
621: }else{
622: return SQLITE_NOMEM;
623: }
624: }
625:
626: /* Adjust the priority queue so that the first element of the
627: ** stem list is the next lowest cost word.
628: */
629: while( (pStem = pCur->pStem)!=0 ){
630: if( fuzzerAdvance(pCur, pStem) ){
631: pCur->pStem = 0;
632: pStem = fuzzerInsert(pCur, pStem);
633: if( (rc = fuzzerSeen(pCur, pStem))!=0 ){
634: if( rc<0 ) return SQLITE_NOMEM;
635: continue;
636: }
637: return SQLITE_OK; /* New word found */
638: }
639: pCur->pStem = 0;
640: pStem->pNext = pCur->pDone;
641: pCur->pDone = pStem;
642: if( fuzzerLowestCostStem(pCur) ){
643: rc = fuzzerSeen(pCur, pCur->pStem);
644: if( rc<0 ) return SQLITE_NOMEM;
645: if( rc==0 ){
646: return SQLITE_OK;
647: }
648: }
649: }
650:
651: /* Reach this point only if queue has been exhausted and there is
652: ** nothing left to be output. */
653: pCur->rLimit = (fuzzer_cost)0;
654: return SQLITE_OK;
655: }
656:
657: /*
658: ** Called to "rewind" a cursor back to the beginning so that
659: ** it starts its output over again. Always called at least once
660: ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
661: */
662: static int fuzzerFilter(
663: sqlite3_vtab_cursor *pVtabCursor,
664: int idxNum, const char *idxStr,
665: int argc, sqlite3_value **argv
666: ){
667: fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor;
668: const char *zWord = 0;
669: fuzzer_stem *pStem;
670:
671: fuzzerClearCursor(pCur, 1);
672: pCur->rLimit = 2147483647;
673: if( idxNum==1 ){
674: zWord = (const char*)sqlite3_value_text(argv[0]);
675: }else if( idxNum==2 ){
676: pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]);
677: }else if( idxNum==3 ){
678: zWord = (const char*)sqlite3_value_text(argv[0]);
679: pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]);
680: }
681: if( zWord==0 ) zWord = "";
682: pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0);
683: if( pStem==0 ) return SQLITE_NOMEM;
684: pCur->nullRule.pNext = pCur->pVtab->pRule;
685: pCur->nullRule.rCost = 0;
686: pCur->nullRule.nFrom = 0;
687: pCur->nullRule.nTo = 0;
688: pCur->nullRule.zFrom = "";
689: pStem->pRule = &pCur->nullRule;
690: pStem->n = pStem->nBasis;
691: pCur->iRowid = 1;
692: return SQLITE_OK;
693: }
694:
695: /*
696: ** Only the word and distance columns have values. All other columns
697: ** return NULL
698: */
699: static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
700: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
701: if( i==0 ){
702: /* the "word" column */
703: if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){
704: return SQLITE_NOMEM;
705: }
706: sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT);
707: }else if( i==1 ){
708: /* the "distance" column */
709: sqlite3_result_int(ctx, pCur->pStem->rCostX);
710: }else{
711: /* All other columns are NULL */
712: sqlite3_result_null(ctx);
713: }
714: return SQLITE_OK;
715: }
716:
717: /*
718: ** The rowid.
719: */
720: static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
721: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
722: *pRowid = pCur->iRowid;
723: return SQLITE_OK;
724: }
725:
726: /*
727: ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
728: ** that the cursor has nothing more to output.
729: */
730: static int fuzzerEof(sqlite3_vtab_cursor *cur){
731: fuzzer_cursor *pCur = (fuzzer_cursor*)cur;
732: return pCur->rLimit<=(fuzzer_cost)0;
733: }
734:
735: /*
736: ** Search for terms of these forms:
737: **
738: ** word MATCH $str
739: ** distance < $value
740: ** distance <= $value
741: **
742: ** The distance< and distance<= are both treated as distance<=.
743: ** The query plan number is as follows:
744: **
745: ** 0: None of the terms above are found
746: ** 1: There is a "word MATCH" term with $str in filter.argv[0].
747: ** 2: There is a "distance<" term with $value in filter.argv[0].
748: ** 3: Both "word MATCH" and "distance<" with $str in argv[0] and
749: ** $value in argv[1].
750: */
751: static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
752: int iPlan = 0;
753: int iDistTerm = -1;
754: int i;
755: const struct sqlite3_index_constraint *pConstraint;
756: pConstraint = pIdxInfo->aConstraint;
757: for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
758: if( pConstraint->usable==0 ) continue;
759: if( (iPlan & 1)==0
760: && pConstraint->iColumn==0
761: && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
762: ){
763: iPlan |= 1;
764: pIdxInfo->aConstraintUsage[i].argvIndex = 1;
765: pIdxInfo->aConstraintUsage[i].omit = 1;
766: }
767: if( (iPlan & 2)==0
768: && pConstraint->iColumn==1
769: && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
770: || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
771: ){
772: iPlan |= 2;
773: iDistTerm = i;
774: }
775: }
776: if( iPlan==2 ){
777: pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1;
778: }else if( iPlan==3 ){
779: pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2;
780: }
781: pIdxInfo->idxNum = iPlan;
782: if( pIdxInfo->nOrderBy==1
783: && pIdxInfo->aOrderBy[0].iColumn==1
784: && pIdxInfo->aOrderBy[0].desc==0
785: ){
786: pIdxInfo->orderByConsumed = 1;
787: }
788: pIdxInfo->estimatedCost = (double)10000;
789:
790: return SQLITE_OK;
791: }
792:
793: /*
794: ** Disallow all attempts to DELETE or UPDATE. Only INSERTs are allowed.
795: **
796: ** On an insert, the cFrom, cTo, and cost columns are used to construct
797: ** a new rule. All other columns are ignored. The rule is ignored
798: ** if cFrom and cTo are identical. A NULL value for cFrom or cTo is
799: ** interpreted as an empty string. The cost must be positive.
800: */
801: static int fuzzerUpdate(
802: sqlite3_vtab *pVTab,
803: int argc,
804: sqlite3_value **argv,
805: sqlite_int64 *pRowid
806: ){
807: fuzzer_vtab *p = (fuzzer_vtab*)pVTab;
808: fuzzer_rule *pRule;
809: const char *zFrom;
810: int nFrom;
811: const char *zTo;
812: int nTo;
813: fuzzer_cost rCost;
814: if( argc!=7 ){
815: sqlite3_free(pVTab->zErrMsg);
816: pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table",
817: p->zClassName);
818: return SQLITE_CONSTRAINT;
819: }
820: if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){
821: sqlite3_free(pVTab->zErrMsg);
822: pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table",
823: p->zClassName);
824: return SQLITE_CONSTRAINT;
825: }
826: zFrom = (char*)sqlite3_value_text(argv[4]);
827: if( zFrom==0 ) zFrom = "";
828: zTo = (char*)sqlite3_value_text(argv[5]);
829: if( zTo==0 ) zTo = "";
830: if( strcmp(zFrom,zTo)==0 ){
831: /* Silently ignore null transformations */
832: return SQLITE_OK;
833: }
834: rCost = sqlite3_value_int(argv[6]);
835: if( rCost<=0 ){
836: sqlite3_free(pVTab->zErrMsg);
837: pVTab->zErrMsg = sqlite3_mprintf("cost must be positive");
838: return SQLITE_CONSTRAINT;
839: }
840: nFrom = strlen(zFrom);
841: nTo = strlen(zTo);
842: pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo );
843: if( pRule==0 ){
844: return SQLITE_NOMEM;
845: }
846: pRule->zFrom = &pRule->zTo[nTo+1];
847: pRule->nFrom = nFrom;
848: memcpy(pRule->zFrom, zFrom, nFrom+1);
849: memcpy(pRule->zTo, zTo, nTo+1);
850: pRule->nTo = nTo;
851: pRule->rCost = rCost;
852: pRule->pNext = p->pNewRule;
853: p->pNewRule = pRule;
854: return SQLITE_OK;
855: }
856:
857: /*
858: ** A virtual table module that provides read-only access to a
859: ** Tcl global variable namespace.
860: */
861: static sqlite3_module fuzzerModule = {
862: 0, /* iVersion */
863: fuzzerConnect,
864: fuzzerConnect,
865: fuzzerBestIndex,
866: fuzzerDisconnect,
867: fuzzerDisconnect,
868: fuzzerOpen, /* xOpen - open a cursor */
869: fuzzerClose, /* xClose - close a cursor */
870: fuzzerFilter, /* xFilter - configure scan constraints */
871: fuzzerNext, /* xNext - advance a cursor */
872: fuzzerEof, /* xEof - check for end of scan */
873: fuzzerColumn, /* xColumn - read data */
874: fuzzerRowid, /* xRowid - read data */
875: fuzzerUpdate, /* xUpdate - INSERT */
876: 0, /* xBegin */
877: 0, /* xSync */
878: 0, /* xCommit */
879: 0, /* xRollback */
880: 0, /* xFindMethod */
881: 0, /* xRename */
882: };
883:
884: #endif /* SQLITE_OMIT_VIRTUALTABLE */
885:
886:
887: /*
888: ** Register the fuzzer virtual table
889: */
890: int fuzzer_register(sqlite3 *db){
891: int rc = SQLITE_OK;
892: #ifndef SQLITE_OMIT_VIRTUALTABLE
893: rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0);
894: #endif
895: return rc;
896: }
897:
898: #ifdef SQLITE_TEST
899: #include <tcl.h>
900: /*
901: ** Decode a pointer to an sqlite3 object.
902: */
903: extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite3 **ppDb);
904:
905: /*
906: ** Register the echo virtual table module.
907: */
908: static int register_fuzzer_module(
909: ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
910: Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
911: int objc, /* Number of arguments */
912: Tcl_Obj *CONST objv[] /* Command arguments */
913: ){
914: sqlite3 *db;
915: if( objc!=2 ){
916: Tcl_WrongNumArgs(interp, 1, objv, "DB");
917: return TCL_ERROR;
918: }
919: if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
920: fuzzer_register(db);
921: return TCL_OK;
922: }
923:
924:
925: /*
926: ** Register commands with the TCL interpreter.
927: */
928: int Sqlitetestfuzzer_Init(Tcl_Interp *interp){
929: static struct {
930: char *zName;
931: Tcl_ObjCmdProc *xProc;
932: void *clientData;
933: } aObjCmd[] = {
934: { "register_fuzzer_module", register_fuzzer_module, 0 },
935: };
936: int i;
937: for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
938: Tcl_CreateObjCommand(interp, aObjCmd[i].zName,
939: aObjCmd[i].xProc, aObjCmd[i].clientData, 0);
940: }
941: return TCL_OK;
942: }
943:
944: #endif /* SQLITE_TEST */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>