Annotation of embedaddon/sqlite3/ext/fts3/fts3_expr.c, revision 1.1.1.1
1.1 misho 1: /*
2: ** 2008 Nov 28
3: **
4: ** The author disclaims copyright to this source code. In place of
5: ** a legal notice, here is a blessing:
6: **
7: ** May you do good and not evil.
8: ** May you find forgiveness for yourself and forgive others.
9: ** May you share freely, never taking more than you give.
10: **
11: ******************************************************************************
12: **
13: ** This module contains code that implements a parser for fts3 query strings
14: ** (the right-hand argument to the MATCH operator). Because the supported
15: ** syntax is relatively simple, the whole tokenizer/parser system is
16: ** hand-coded.
17: */
18: #include "fts3Int.h"
19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
20:
21: /*
22: ** By default, this module parses the legacy syntax that has been
23: ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
24: ** is defined, then it uses the new syntax. The differences between
25: ** the new and the old syntaxes are:
26: **
27: ** a) The new syntax supports parenthesis. The old does not.
28: **
29: ** b) The new syntax supports the AND and NOT operators. The old does not.
30: **
31: ** c) The old syntax supports the "-" token qualifier. This is not
32: ** supported by the new syntax (it is replaced by the NOT operator).
33: **
34: ** d) When using the old syntax, the OR operator has a greater precedence
35: ** than an implicit AND. When using the new, both implicity and explicit
36: ** AND operators have a higher precedence than OR.
37: **
38: ** If compiled with SQLITE_TEST defined, then this module exports the
39: ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
40: ** to zero causes the module to use the old syntax. If it is set to
41: ** non-zero the new syntax is activated. This is so both syntaxes can
42: ** be tested using a single build of testfixture.
43: **
44: ** The following describes the syntax supported by the fts3 MATCH
45: ** operator in a similar format to that used by the lemon parser
46: ** generator. This module does not use actually lemon, it uses a
47: ** custom parser.
48: **
49: ** query ::= andexpr (OR andexpr)*.
50: **
51: ** andexpr ::= notexpr (AND? notexpr)*.
52: **
53: ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
54: ** notexpr ::= LP query RP.
55: **
56: ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
57: **
58: ** distance_opt ::= .
59: ** distance_opt ::= / INTEGER.
60: **
61: ** phrase ::= TOKEN.
62: ** phrase ::= COLUMN:TOKEN.
63: ** phrase ::= "TOKEN TOKEN TOKEN...".
64: */
65:
66: #ifdef SQLITE_TEST
67: int sqlite3_fts3_enable_parentheses = 0;
68: #else
69: # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
70: # define sqlite3_fts3_enable_parentheses 1
71: # else
72: # define sqlite3_fts3_enable_parentheses 0
73: # endif
74: #endif
75:
76: /*
77: ** Default span for NEAR operators.
78: */
79: #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
80:
81: #include <string.h>
82: #include <assert.h>
83:
84: /*
85: ** isNot:
86: ** This variable is used by function getNextNode(). When getNextNode() is
87: ** called, it sets ParseContext.isNot to true if the 'next node' is a
88: ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
89: ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
90: ** zero.
91: */
92: typedef struct ParseContext ParseContext;
93: struct ParseContext {
94: sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
95: const char **azCol; /* Array of column names for fts3 table */
96: int bFts4; /* True to allow FTS4-only syntax */
97: int nCol; /* Number of entries in azCol[] */
98: int iDefaultCol; /* Default column to query */
99: int isNot; /* True if getNextNode() sees a unary - */
100: sqlite3_context *pCtx; /* Write error message here */
101: int nNest; /* Number of nested brackets */
102: };
103:
104: /*
105: ** This function is equivalent to the standard isspace() function.
106: **
107: ** The standard isspace() can be awkward to use safely, because although it
108: ** is defined to accept an argument of type int, its behaviour when passed
109: ** an integer that falls outside of the range of the unsigned char type
110: ** is undefined (and sometimes, "undefined" means segfault). This wrapper
111: ** is defined to accept an argument of type char, and always returns 0 for
112: ** any values that fall outside of the range of the unsigned char type (i.e.
113: ** negative values).
114: */
115: static int fts3isspace(char c){
116: return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
117: }
118:
119: /*
120: ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
121: ** zero the memory before returning a pointer to it. If unsuccessful,
122: ** return NULL.
123: */
124: static void *fts3MallocZero(int nByte){
125: void *pRet = sqlite3_malloc(nByte);
126: if( pRet ) memset(pRet, 0, nByte);
127: return pRet;
128: }
129:
130:
131: /*
132: ** Extract the next token from buffer z (length n) using the tokenizer
133: ** and other information (column names etc.) in pParse. Create an Fts3Expr
134: ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
135: ** single token and set *ppExpr to point to it. If the end of the buffer is
136: ** reached before a token is found, set *ppExpr to zero. It is the
137: ** responsibility of the caller to eventually deallocate the allocated
138: ** Fts3Expr structure (if any) by passing it to sqlite3_free().
139: **
140: ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
141: ** fails.
142: */
143: static int getNextToken(
144: ParseContext *pParse, /* fts3 query parse context */
145: int iCol, /* Value for Fts3Phrase.iColumn */
146: const char *z, int n, /* Input string */
147: Fts3Expr **ppExpr, /* OUT: expression */
148: int *pnConsumed /* OUT: Number of bytes consumed */
149: ){
150: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
151: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
152: int rc;
153: sqlite3_tokenizer_cursor *pCursor;
154: Fts3Expr *pRet = 0;
155: int nConsumed = 0;
156:
157: rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
158: if( rc==SQLITE_OK ){
159: const char *zToken;
160: int nToken, iStart, iEnd, iPosition;
161: int nByte; /* total space to allocate */
162:
163: pCursor->pTokenizer = pTokenizer;
164: rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
165:
166: if( rc==SQLITE_OK ){
167: nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
168: pRet = (Fts3Expr *)fts3MallocZero(nByte);
169: if( !pRet ){
170: rc = SQLITE_NOMEM;
171: }else{
172: pRet->eType = FTSQUERY_PHRASE;
173: pRet->pPhrase = (Fts3Phrase *)&pRet[1];
174: pRet->pPhrase->nToken = 1;
175: pRet->pPhrase->iColumn = iCol;
176: pRet->pPhrase->aToken[0].n = nToken;
177: pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
178: memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
179:
180: if( iEnd<n && z[iEnd]=='*' ){
181: pRet->pPhrase->aToken[0].isPrefix = 1;
182: iEnd++;
183: }
184:
185: while( 1 ){
186: if( !sqlite3_fts3_enable_parentheses
187: && iStart>0 && z[iStart-1]=='-'
188: ){
189: pParse->isNot = 1;
190: iStart--;
191: }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
192: pRet->pPhrase->aToken[0].bFirst = 1;
193: iStart--;
194: }else{
195: break;
196: }
197: }
198:
199: }
200: nConsumed = iEnd;
201: }
202:
203: pModule->xClose(pCursor);
204: }
205:
206: *pnConsumed = nConsumed;
207: *ppExpr = pRet;
208: return rc;
209: }
210:
211:
212: /*
213: ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
214: ** then free the old allocation.
215: */
216: static void *fts3ReallocOrFree(void *pOrig, int nNew){
217: void *pRet = sqlite3_realloc(pOrig, nNew);
218: if( !pRet ){
219: sqlite3_free(pOrig);
220: }
221: return pRet;
222: }
223:
224: /*
225: ** Buffer zInput, length nInput, contains the contents of a quoted string
226: ** that appeared as part of an fts3 query expression. Neither quote character
227: ** is included in the buffer. This function attempts to tokenize the entire
228: ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
229: ** containing the results.
230: **
231: ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
232: ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
233: ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
234: ** to 0.
235: */
236: static int getNextString(
237: ParseContext *pParse, /* fts3 query parse context */
238: const char *zInput, int nInput, /* Input string */
239: Fts3Expr **ppExpr /* OUT: expression */
240: ){
241: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
242: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
243: int rc;
244: Fts3Expr *p = 0;
245: sqlite3_tokenizer_cursor *pCursor = 0;
246: char *zTemp = 0;
247: int nTemp = 0;
248:
249: const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
250: int nToken = 0;
251:
252: /* The final Fts3Expr data structure, including the Fts3Phrase,
253: ** Fts3PhraseToken structures token buffers are all stored as a single
254: ** allocation so that the expression can be freed with a single call to
255: ** sqlite3_free(). Setting this up requires a two pass approach.
256: **
257: ** The first pass, in the block below, uses a tokenizer cursor to iterate
258: ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
259: ** to assemble data in two dynamic buffers:
260: **
261: ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
262: ** structure, followed by the array of Fts3PhraseToken
263: ** structures. This pass only populates the Fts3PhraseToken array.
264: **
265: ** Buffer zTemp: Contains copies of all tokens.
266: **
267: ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
268: ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
269: ** structures.
270: */
271: rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
272: if( rc==SQLITE_OK ){
273: int ii;
274: pCursor->pTokenizer = pTokenizer;
275: for(ii=0; rc==SQLITE_OK; ii++){
276: const char *zByte;
277: int nByte, iBegin, iEnd, iPos;
278: rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
279: if( rc==SQLITE_OK ){
280: Fts3PhraseToken *pToken;
281:
282: p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
283: if( !p ) goto no_mem;
284:
285: zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
286: if( !zTemp ) goto no_mem;
287:
288: assert( nToken==ii );
289: pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
290: memset(pToken, 0, sizeof(Fts3PhraseToken));
291:
292: memcpy(&zTemp[nTemp], zByte, nByte);
293: nTemp += nByte;
294:
295: pToken->n = nByte;
296: pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
297: pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
298: nToken = ii+1;
299: }
300: }
301:
302: pModule->xClose(pCursor);
303: pCursor = 0;
304: }
305:
306: if( rc==SQLITE_DONE ){
307: int jj;
308: char *zBuf = 0;
309:
310: p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
311: if( !p ) goto no_mem;
312: memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
313: p->eType = FTSQUERY_PHRASE;
314: p->pPhrase = (Fts3Phrase *)&p[1];
315: p->pPhrase->iColumn = pParse->iDefaultCol;
316: p->pPhrase->nToken = nToken;
317:
318: zBuf = (char *)&p->pPhrase->aToken[nToken];
319: if( zTemp ){
320: memcpy(zBuf, zTemp, nTemp);
321: sqlite3_free(zTemp);
322: }else{
323: assert( nTemp==0 );
324: }
325:
326: for(jj=0; jj<p->pPhrase->nToken; jj++){
327: p->pPhrase->aToken[jj].z = zBuf;
328: zBuf += p->pPhrase->aToken[jj].n;
329: }
330: rc = SQLITE_OK;
331: }
332:
333: *ppExpr = p;
334: return rc;
335: no_mem:
336:
337: if( pCursor ){
338: pModule->xClose(pCursor);
339: }
340: sqlite3_free(zTemp);
341: sqlite3_free(p);
342: *ppExpr = 0;
343: return SQLITE_NOMEM;
344: }
345:
346: /*
347: ** Function getNextNode(), which is called by fts3ExprParse(), may itself
348: ** call fts3ExprParse(). So this forward declaration is required.
349: */
350: static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
351:
352: /*
353: ** The output variable *ppExpr is populated with an allocated Fts3Expr
354: ** structure, or set to 0 if the end of the input buffer is reached.
355: **
356: ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
357: ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
358: ** If SQLITE_ERROR is returned, pContext is populated with an error message.
359: */
360: static int getNextNode(
361: ParseContext *pParse, /* fts3 query parse context */
362: const char *z, int n, /* Input string */
363: Fts3Expr **ppExpr, /* OUT: expression */
364: int *pnConsumed /* OUT: Number of bytes consumed */
365: ){
366: static const struct Fts3Keyword {
367: char *z; /* Keyword text */
368: unsigned char n; /* Length of the keyword */
369: unsigned char parenOnly; /* Only valid in paren mode */
370: unsigned char eType; /* Keyword code */
371: } aKeyword[] = {
372: { "OR" , 2, 0, FTSQUERY_OR },
373: { "AND", 3, 1, FTSQUERY_AND },
374: { "NOT", 3, 1, FTSQUERY_NOT },
375: { "NEAR", 4, 0, FTSQUERY_NEAR }
376: };
377: int ii;
378: int iCol;
379: int iColLen;
380: int rc;
381: Fts3Expr *pRet = 0;
382:
383: const char *zInput = z;
384: int nInput = n;
385:
386: pParse->isNot = 0;
387:
388: /* Skip over any whitespace before checking for a keyword, an open or
389: ** close bracket, or a quoted string.
390: */
391: while( nInput>0 && fts3isspace(*zInput) ){
392: nInput--;
393: zInput++;
394: }
395: if( nInput==0 ){
396: return SQLITE_DONE;
397: }
398:
399: /* See if we are dealing with a keyword. */
400: for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
401: const struct Fts3Keyword *pKey = &aKeyword[ii];
402:
403: if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
404: continue;
405: }
406:
407: if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
408: int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
409: int nKey = pKey->n;
410: char cNext;
411:
412: /* If this is a "NEAR" keyword, check for an explicit nearness. */
413: if( pKey->eType==FTSQUERY_NEAR ){
414: assert( nKey==4 );
415: if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
416: nNear = 0;
417: for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
418: nNear = nNear * 10 + (zInput[nKey] - '0');
419: }
420: }
421: }
422:
423: /* At this point this is probably a keyword. But for that to be true,
424: ** the next byte must contain either whitespace, an open or close
425: ** parenthesis, a quote character, or EOF.
426: */
427: cNext = zInput[nKey];
428: if( fts3isspace(cNext)
429: || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
430: ){
431: pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
432: if( !pRet ){
433: return SQLITE_NOMEM;
434: }
435: pRet->eType = pKey->eType;
436: pRet->nNear = nNear;
437: *ppExpr = pRet;
438: *pnConsumed = (int)((zInput - z) + nKey);
439: return SQLITE_OK;
440: }
441:
442: /* Turns out that wasn't a keyword after all. This happens if the
443: ** user has supplied a token such as "ORacle". Continue.
444: */
445: }
446: }
447:
448: /* Check for an open bracket. */
449: if( sqlite3_fts3_enable_parentheses ){
450: if( *zInput=='(' ){
451: int nConsumed;
452: pParse->nNest++;
453: rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
454: if( rc==SQLITE_OK && !*ppExpr ){
455: rc = SQLITE_DONE;
456: }
457: *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
458: return rc;
459: }
460:
461: /* Check for a close bracket. */
462: if( *zInput==')' ){
463: pParse->nNest--;
464: *pnConsumed = (int)((zInput - z) + 1);
465: return SQLITE_DONE;
466: }
467: }
468:
469: /* See if we are dealing with a quoted phrase. If this is the case, then
470: ** search for the closing quote and pass the whole string to getNextString()
471: ** for processing. This is easy to do, as fts3 has no syntax for escaping
472: ** a quote character embedded in a string.
473: */
474: if( *zInput=='"' ){
475: for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
476: *pnConsumed = (int)((zInput - z) + ii + 1);
477: if( ii==nInput ){
478: return SQLITE_ERROR;
479: }
480: return getNextString(pParse, &zInput[1], ii-1, ppExpr);
481: }
482:
483:
484: /* If control flows to this point, this must be a regular token, or
485: ** the end of the input. Read a regular token using the sqlite3_tokenizer
486: ** interface. Before doing so, figure out if there is an explicit
487: ** column specifier for the token.
488: **
489: ** TODO: Strangely, it is not possible to associate a column specifier
490: ** with a quoted phrase, only with a single token. Not sure if this was
491: ** an implementation artifact or an intentional decision when fts3 was
492: ** first implemented. Whichever it was, this module duplicates the
493: ** limitation.
494: */
495: iCol = pParse->iDefaultCol;
496: iColLen = 0;
497: for(ii=0; ii<pParse->nCol; ii++){
498: const char *zStr = pParse->azCol[ii];
499: int nStr = (int)strlen(zStr);
500: if( nInput>nStr && zInput[nStr]==':'
501: && sqlite3_strnicmp(zStr, zInput, nStr)==0
502: ){
503: iCol = ii;
504: iColLen = (int)((zInput - z) + nStr + 1);
505: break;
506: }
507: }
508: rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
509: *pnConsumed += iColLen;
510: return rc;
511: }
512:
513: /*
514: ** The argument is an Fts3Expr structure for a binary operator (any type
515: ** except an FTSQUERY_PHRASE). Return an integer value representing the
516: ** precedence of the operator. Lower values have a higher precedence (i.e.
517: ** group more tightly). For example, in the C language, the == operator
518: ** groups more tightly than ||, and would therefore have a higher precedence.
519: **
520: ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
521: ** is defined), the order of the operators in precedence from highest to
522: ** lowest is:
523: **
524: ** NEAR
525: ** NOT
526: ** AND (including implicit ANDs)
527: ** OR
528: **
529: ** Note that when using the old query syntax, the OR operator has a higher
530: ** precedence than the AND operator.
531: */
532: static int opPrecedence(Fts3Expr *p){
533: assert( p->eType!=FTSQUERY_PHRASE );
534: if( sqlite3_fts3_enable_parentheses ){
535: return p->eType;
536: }else if( p->eType==FTSQUERY_NEAR ){
537: return 1;
538: }else if( p->eType==FTSQUERY_OR ){
539: return 2;
540: }
541: assert( p->eType==FTSQUERY_AND );
542: return 3;
543: }
544:
545: /*
546: ** Argument ppHead contains a pointer to the current head of a query
547: ** expression tree being parsed. pPrev is the expression node most recently
548: ** inserted into the tree. This function adds pNew, which is always a binary
549: ** operator node, into the expression tree based on the relative precedence
550: ** of pNew and the existing nodes of the tree. This may result in the head
551: ** of the tree changing, in which case *ppHead is set to the new root node.
552: */
553: static void insertBinaryOperator(
554: Fts3Expr **ppHead, /* Pointer to the root node of a tree */
555: Fts3Expr *pPrev, /* Node most recently inserted into the tree */
556: Fts3Expr *pNew /* New binary node to insert into expression tree */
557: ){
558: Fts3Expr *pSplit = pPrev;
559: while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
560: pSplit = pSplit->pParent;
561: }
562:
563: if( pSplit->pParent ){
564: assert( pSplit->pParent->pRight==pSplit );
565: pSplit->pParent->pRight = pNew;
566: pNew->pParent = pSplit->pParent;
567: }else{
568: *ppHead = pNew;
569: }
570: pNew->pLeft = pSplit;
571: pSplit->pParent = pNew;
572: }
573:
574: /*
575: ** Parse the fts3 query expression found in buffer z, length n. This function
576: ** returns either when the end of the buffer is reached or an unmatched
577: ** closing bracket - ')' - is encountered.
578: **
579: ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
580: ** parsed form of the expression and *pnConsumed is set to the number of
581: ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
582: ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
583: */
584: static int fts3ExprParse(
585: ParseContext *pParse, /* fts3 query parse context */
586: const char *z, int n, /* Text of MATCH query */
587: Fts3Expr **ppExpr, /* OUT: Parsed query structure */
588: int *pnConsumed /* OUT: Number of bytes consumed */
589: ){
590: Fts3Expr *pRet = 0;
591: Fts3Expr *pPrev = 0;
592: Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
593: int nIn = n;
594: const char *zIn = z;
595: int rc = SQLITE_OK;
596: int isRequirePhrase = 1;
597:
598: while( rc==SQLITE_OK ){
599: Fts3Expr *p = 0;
600: int nByte = 0;
601: rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
602: if( rc==SQLITE_OK ){
603: int isPhrase;
604:
605: if( !sqlite3_fts3_enable_parentheses
606: && p->eType==FTSQUERY_PHRASE && pParse->isNot
607: ){
608: /* Create an implicit NOT operator. */
609: Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
610: if( !pNot ){
611: sqlite3Fts3ExprFree(p);
612: rc = SQLITE_NOMEM;
613: goto exprparse_out;
614: }
615: pNot->eType = FTSQUERY_NOT;
616: pNot->pRight = p;
617: if( pNotBranch ){
618: pNot->pLeft = pNotBranch;
619: }
620: pNotBranch = pNot;
621: p = pPrev;
622: }else{
623: int eType = p->eType;
624: isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
625:
626: /* The isRequirePhrase variable is set to true if a phrase or
627: ** an expression contained in parenthesis is required. If a
628: ** binary operator (AND, OR, NOT or NEAR) is encounted when
629: ** isRequirePhrase is set, this is a syntax error.
630: */
631: if( !isPhrase && isRequirePhrase ){
632: sqlite3Fts3ExprFree(p);
633: rc = SQLITE_ERROR;
634: goto exprparse_out;
635: }
636:
637: if( isPhrase && !isRequirePhrase ){
638: /* Insert an implicit AND operator. */
639: Fts3Expr *pAnd;
640: assert( pRet && pPrev );
641: pAnd = fts3MallocZero(sizeof(Fts3Expr));
642: if( !pAnd ){
643: sqlite3Fts3ExprFree(p);
644: rc = SQLITE_NOMEM;
645: goto exprparse_out;
646: }
647: pAnd->eType = FTSQUERY_AND;
648: insertBinaryOperator(&pRet, pPrev, pAnd);
649: pPrev = pAnd;
650: }
651:
652: /* This test catches attempts to make either operand of a NEAR
653: ** operator something other than a phrase. For example, either of
654: ** the following:
655: **
656: ** (bracketed expression) NEAR phrase
657: ** phrase NEAR (bracketed expression)
658: **
659: ** Return an error in either case.
660: */
661: if( pPrev && (
662: (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
663: || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
664: )){
665: sqlite3Fts3ExprFree(p);
666: rc = SQLITE_ERROR;
667: goto exprparse_out;
668: }
669:
670: if( isPhrase ){
671: if( pRet ){
672: assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
673: pPrev->pRight = p;
674: p->pParent = pPrev;
675: }else{
676: pRet = p;
677: }
678: }else{
679: insertBinaryOperator(&pRet, pPrev, p);
680: }
681: isRequirePhrase = !isPhrase;
682: }
683: assert( nByte>0 );
684: }
685: assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
686: nIn -= nByte;
687: zIn += nByte;
688: pPrev = p;
689: }
690:
691: if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
692: rc = SQLITE_ERROR;
693: }
694:
695: if( rc==SQLITE_DONE ){
696: rc = SQLITE_OK;
697: if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
698: if( !pRet ){
699: rc = SQLITE_ERROR;
700: }else{
701: Fts3Expr *pIter = pNotBranch;
702: while( pIter->pLeft ){
703: pIter = pIter->pLeft;
704: }
705: pIter->pLeft = pRet;
706: pRet = pNotBranch;
707: }
708: }
709: }
710: *pnConsumed = n - nIn;
711:
712: exprparse_out:
713: if( rc!=SQLITE_OK ){
714: sqlite3Fts3ExprFree(pRet);
715: sqlite3Fts3ExprFree(pNotBranch);
716: pRet = 0;
717: }
718: *ppExpr = pRet;
719: return rc;
720: }
721:
722: /*
723: ** Parameters z and n contain a pointer to and length of a buffer containing
724: ** an fts3 query expression, respectively. This function attempts to parse the
725: ** query expression and create a tree of Fts3Expr structures representing the
726: ** parsed expression. If successful, *ppExpr is set to point to the head
727: ** of the parsed expression tree and SQLITE_OK is returned. If an error
728: ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
729: ** error) is returned and *ppExpr is set to 0.
730: **
731: ** If parameter n is a negative number, then z is assumed to point to a
732: ** nul-terminated string and the length is determined using strlen().
733: **
734: ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
735: ** use to normalize query tokens while parsing the expression. The azCol[]
736: ** array, which is assumed to contain nCol entries, should contain the names
737: ** of each column in the target fts3 table, in order from left to right.
738: ** Column names must be nul-terminated strings.
739: **
740: ** The iDefaultCol parameter should be passed the index of the table column
741: ** that appears on the left-hand-side of the MATCH operator (the default
742: ** column to match against for tokens for which a column name is not explicitly
743: ** specified as part of the query string), or -1 if tokens may by default
744: ** match any table column.
745: */
746: int sqlite3Fts3ExprParse(
747: sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
748: char **azCol, /* Array of column names for fts3 table */
749: int bFts4, /* True to allow FTS4-only syntax */
750: int nCol, /* Number of entries in azCol[] */
751: int iDefaultCol, /* Default column to query */
752: const char *z, int n, /* Text of MATCH query */
753: Fts3Expr **ppExpr /* OUT: Parsed query structure */
754: ){
755: int nParsed;
756: int rc;
757: ParseContext sParse;
758: sParse.pTokenizer = pTokenizer;
759: sParse.azCol = (const char **)azCol;
760: sParse.nCol = nCol;
761: sParse.iDefaultCol = iDefaultCol;
762: sParse.nNest = 0;
763: sParse.bFts4 = bFts4;
764: if( z==0 ){
765: *ppExpr = 0;
766: return SQLITE_OK;
767: }
768: if( n<0 ){
769: n = (int)strlen(z);
770: }
771: rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
772:
773: /* Check for mismatched parenthesis */
774: if( rc==SQLITE_OK && sParse.nNest ){
775: rc = SQLITE_ERROR;
776: sqlite3Fts3ExprFree(*ppExpr);
777: *ppExpr = 0;
778: }
779:
780: return rc;
781: }
782:
783: /*
784: ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
785: */
786: void sqlite3Fts3ExprFree(Fts3Expr *p){
787: if( p ){
788: assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
789: sqlite3Fts3ExprFree(p->pLeft);
790: sqlite3Fts3ExprFree(p->pRight);
791: sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
792: sqlite3_free(p->aMI);
793: sqlite3_free(p);
794: }
795: }
796:
797: /****************************************************************************
798: *****************************************************************************
799: ** Everything after this point is just test code.
800: */
801:
802: #ifdef SQLITE_TEST
803:
804: #include <stdio.h>
805:
806: /*
807: ** Function to query the hash-table of tokenizers (see README.tokenizers).
808: */
809: static int queryTestTokenizer(
810: sqlite3 *db,
811: const char *zName,
812: const sqlite3_tokenizer_module **pp
813: ){
814: int rc;
815: sqlite3_stmt *pStmt;
816: const char zSql[] = "SELECT fts3_tokenizer(?)";
817:
818: *pp = 0;
819: rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
820: if( rc!=SQLITE_OK ){
821: return rc;
822: }
823:
824: sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
825: if( SQLITE_ROW==sqlite3_step(pStmt) ){
826: if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
827: memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
828: }
829: }
830:
831: return sqlite3_finalize(pStmt);
832: }
833:
834: /*
835: ** Return a pointer to a buffer containing a text representation of the
836: ** expression passed as the first argument. The buffer is obtained from
837: ** sqlite3_malloc(). It is the responsibility of the caller to use
838: ** sqlite3_free() to release the memory. If an OOM condition is encountered,
839: ** NULL is returned.
840: **
841: ** If the second argument is not NULL, then its contents are prepended to
842: ** the returned expression text and then freed using sqlite3_free().
843: */
844: static char *exprToString(Fts3Expr *pExpr, char *zBuf){
845: switch( pExpr->eType ){
846: case FTSQUERY_PHRASE: {
847: Fts3Phrase *pPhrase = pExpr->pPhrase;
848: int i;
849: zBuf = sqlite3_mprintf(
850: "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
851: for(i=0; zBuf && i<pPhrase->nToken; i++){
852: zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
853: pPhrase->aToken[i].n, pPhrase->aToken[i].z,
854: (pPhrase->aToken[i].isPrefix?"+":"")
855: );
856: }
857: return zBuf;
858: }
859:
860: case FTSQUERY_NEAR:
861: zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
862: break;
863: case FTSQUERY_NOT:
864: zBuf = sqlite3_mprintf("%zNOT ", zBuf);
865: break;
866: case FTSQUERY_AND:
867: zBuf = sqlite3_mprintf("%zAND ", zBuf);
868: break;
869: case FTSQUERY_OR:
870: zBuf = sqlite3_mprintf("%zOR ", zBuf);
871: break;
872: }
873:
874: if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
875: if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
876: if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
877:
878: if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
879: if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
880:
881: return zBuf;
882: }
883:
884: /*
885: ** This is the implementation of a scalar SQL function used to test the
886: ** expression parser. It should be called as follows:
887: **
888: ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
889: **
890: ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
891: ** to parse the query expression (see README.tokenizers). The second argument
892: ** is the query expression to parse. Each subsequent argument is the name
893: ** of a column of the fts3 table that the query expression may refer to.
894: ** For example:
895: **
896: ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
897: */
898: static void fts3ExprTest(
899: sqlite3_context *context,
900: int argc,
901: sqlite3_value **argv
902: ){
903: sqlite3_tokenizer_module const *pModule = 0;
904: sqlite3_tokenizer *pTokenizer = 0;
905: int rc;
906: char **azCol = 0;
907: const char *zExpr;
908: int nExpr;
909: int nCol;
910: int ii;
911: Fts3Expr *pExpr;
912: char *zBuf = 0;
913: sqlite3 *db = sqlite3_context_db_handle(context);
914:
915: if( argc<3 ){
916: sqlite3_result_error(context,
917: "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
918: );
919: return;
920: }
921:
922: rc = queryTestTokenizer(db,
923: (const char *)sqlite3_value_text(argv[0]), &pModule);
924: if( rc==SQLITE_NOMEM ){
925: sqlite3_result_error_nomem(context);
926: goto exprtest_out;
927: }else if( !pModule ){
928: sqlite3_result_error(context, "No such tokenizer module", -1);
929: goto exprtest_out;
930: }
931:
932: rc = pModule->xCreate(0, 0, &pTokenizer);
933: assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
934: if( rc==SQLITE_NOMEM ){
935: sqlite3_result_error_nomem(context);
936: goto exprtest_out;
937: }
938: pTokenizer->pModule = pModule;
939:
940: zExpr = (const char *)sqlite3_value_text(argv[1]);
941: nExpr = sqlite3_value_bytes(argv[1]);
942: nCol = argc-2;
943: azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
944: if( !azCol ){
945: sqlite3_result_error_nomem(context);
946: goto exprtest_out;
947: }
948: for(ii=0; ii<nCol; ii++){
949: azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
950: }
951:
952: rc = sqlite3Fts3ExprParse(
953: pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
954: );
955: if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
956: sqlite3_result_error(context, "Error parsing expression", -1);
957: }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
958: sqlite3_result_error_nomem(context);
959: }else{
960: sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
961: sqlite3_free(zBuf);
962: }
963:
964: sqlite3Fts3ExprFree(pExpr);
965:
966: exprtest_out:
967: if( pModule && pTokenizer ){
968: rc = pModule->xDestroy(pTokenizer);
969: }
970: sqlite3_free(azCol);
971: }
972:
973: /*
974: ** Register the query expression parser test function fts3_exprtest()
975: ** with database connection db.
976: */
977: int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
978: return sqlite3_create_function(
979: db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
980: );
981: }
982:
983: #endif
984: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>