Annotation of embedaddon/sqlite3/ext/fts3/fts3_expr.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2008 Nov 28
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: ******************************************************************************
! 12: **
! 13: ** This module contains code that implements a parser for fts3 query strings
! 14: ** (the right-hand argument to the MATCH operator). Because the supported
! 15: ** syntax is relatively simple, the whole tokenizer/parser system is
! 16: ** hand-coded.
! 17: */
! 18: #include "fts3Int.h"
! 19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
! 20:
! 21: /*
! 22: ** By default, this module parses the legacy syntax that has been
! 23: ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
! 24: ** is defined, then it uses the new syntax. The differences between
! 25: ** the new and the old syntaxes are:
! 26: **
! 27: ** a) The new syntax supports parenthesis. The old does not.
! 28: **
! 29: ** b) The new syntax supports the AND and NOT operators. The old does not.
! 30: **
! 31: ** c) The old syntax supports the "-" token qualifier. This is not
! 32: ** supported by the new syntax (it is replaced by the NOT operator).
! 33: **
! 34: ** d) When using the old syntax, the OR operator has a greater precedence
! 35: ** than an implicit AND. When using the new, both implicity and explicit
! 36: ** AND operators have a higher precedence than OR.
! 37: **
! 38: ** If compiled with SQLITE_TEST defined, then this module exports the
! 39: ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
! 40: ** to zero causes the module to use the old syntax. If it is set to
! 41: ** non-zero the new syntax is activated. This is so both syntaxes can
! 42: ** be tested using a single build of testfixture.
! 43: **
! 44: ** The following describes the syntax supported by the fts3 MATCH
! 45: ** operator in a similar format to that used by the lemon parser
! 46: ** generator. This module does not use actually lemon, it uses a
! 47: ** custom parser.
! 48: **
! 49: ** query ::= andexpr (OR andexpr)*.
! 50: **
! 51: ** andexpr ::= notexpr (AND? notexpr)*.
! 52: **
! 53: ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
! 54: ** notexpr ::= LP query RP.
! 55: **
! 56: ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
! 57: **
! 58: ** distance_opt ::= .
! 59: ** distance_opt ::= / INTEGER.
! 60: **
! 61: ** phrase ::= TOKEN.
! 62: ** phrase ::= COLUMN:TOKEN.
! 63: ** phrase ::= "TOKEN TOKEN TOKEN...".
! 64: */
! 65:
! 66: #ifdef SQLITE_TEST
! 67: int sqlite3_fts3_enable_parentheses = 0;
! 68: #else
! 69: # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
! 70: # define sqlite3_fts3_enable_parentheses 1
! 71: # else
! 72: # define sqlite3_fts3_enable_parentheses 0
! 73: # endif
! 74: #endif
! 75:
! 76: /*
! 77: ** Default span for NEAR operators.
! 78: */
! 79: #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
! 80:
! 81: #include <string.h>
! 82: #include <assert.h>
! 83:
! 84: /*
! 85: ** isNot:
! 86: ** This variable is used by function getNextNode(). When getNextNode() is
! 87: ** called, it sets ParseContext.isNot to true if the 'next node' is a
! 88: ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
! 89: ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
! 90: ** zero.
! 91: */
! 92: typedef struct ParseContext ParseContext;
! 93: struct ParseContext {
! 94: sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
! 95: const char **azCol; /* Array of column names for fts3 table */
! 96: int bFts4; /* True to allow FTS4-only syntax */
! 97: int nCol; /* Number of entries in azCol[] */
! 98: int iDefaultCol; /* Default column to query */
! 99: int isNot; /* True if getNextNode() sees a unary - */
! 100: sqlite3_context *pCtx; /* Write error message here */
! 101: int nNest; /* Number of nested brackets */
! 102: };
! 103:
! 104: /*
! 105: ** This function is equivalent to the standard isspace() function.
! 106: **
! 107: ** The standard isspace() can be awkward to use safely, because although it
! 108: ** is defined to accept an argument of type int, its behaviour when passed
! 109: ** an integer that falls outside of the range of the unsigned char type
! 110: ** is undefined (and sometimes, "undefined" means segfault). This wrapper
! 111: ** is defined to accept an argument of type char, and always returns 0 for
! 112: ** any values that fall outside of the range of the unsigned char type (i.e.
! 113: ** negative values).
! 114: */
! 115: static int fts3isspace(char c){
! 116: return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
! 117: }
! 118:
! 119: /*
! 120: ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
! 121: ** zero the memory before returning a pointer to it. If unsuccessful,
! 122: ** return NULL.
! 123: */
! 124: static void *fts3MallocZero(int nByte){
! 125: void *pRet = sqlite3_malloc(nByte);
! 126: if( pRet ) memset(pRet, 0, nByte);
! 127: return pRet;
! 128: }
! 129:
! 130:
! 131: /*
! 132: ** Extract the next token from buffer z (length n) using the tokenizer
! 133: ** and other information (column names etc.) in pParse. Create an Fts3Expr
! 134: ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
! 135: ** single token and set *ppExpr to point to it. If the end of the buffer is
! 136: ** reached before a token is found, set *ppExpr to zero. It is the
! 137: ** responsibility of the caller to eventually deallocate the allocated
! 138: ** Fts3Expr structure (if any) by passing it to sqlite3_free().
! 139: **
! 140: ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
! 141: ** fails.
! 142: */
! 143: static int getNextToken(
! 144: ParseContext *pParse, /* fts3 query parse context */
! 145: int iCol, /* Value for Fts3Phrase.iColumn */
! 146: const char *z, int n, /* Input string */
! 147: Fts3Expr **ppExpr, /* OUT: expression */
! 148: int *pnConsumed /* OUT: Number of bytes consumed */
! 149: ){
! 150: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
! 151: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
! 152: int rc;
! 153: sqlite3_tokenizer_cursor *pCursor;
! 154: Fts3Expr *pRet = 0;
! 155: int nConsumed = 0;
! 156:
! 157: rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
! 158: if( rc==SQLITE_OK ){
! 159: const char *zToken;
! 160: int nToken, iStart, iEnd, iPosition;
! 161: int nByte; /* total space to allocate */
! 162:
! 163: pCursor->pTokenizer = pTokenizer;
! 164: rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
! 165:
! 166: if( rc==SQLITE_OK ){
! 167: nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
! 168: pRet = (Fts3Expr *)fts3MallocZero(nByte);
! 169: if( !pRet ){
! 170: rc = SQLITE_NOMEM;
! 171: }else{
! 172: pRet->eType = FTSQUERY_PHRASE;
! 173: pRet->pPhrase = (Fts3Phrase *)&pRet[1];
! 174: pRet->pPhrase->nToken = 1;
! 175: pRet->pPhrase->iColumn = iCol;
! 176: pRet->pPhrase->aToken[0].n = nToken;
! 177: pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
! 178: memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
! 179:
! 180: if( iEnd<n && z[iEnd]=='*' ){
! 181: pRet->pPhrase->aToken[0].isPrefix = 1;
! 182: iEnd++;
! 183: }
! 184:
! 185: while( 1 ){
! 186: if( !sqlite3_fts3_enable_parentheses
! 187: && iStart>0 && z[iStart-1]=='-'
! 188: ){
! 189: pParse->isNot = 1;
! 190: iStart--;
! 191: }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
! 192: pRet->pPhrase->aToken[0].bFirst = 1;
! 193: iStart--;
! 194: }else{
! 195: break;
! 196: }
! 197: }
! 198:
! 199: }
! 200: nConsumed = iEnd;
! 201: }
! 202:
! 203: pModule->xClose(pCursor);
! 204: }
! 205:
! 206: *pnConsumed = nConsumed;
! 207: *ppExpr = pRet;
! 208: return rc;
! 209: }
! 210:
! 211:
! 212: /*
! 213: ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
! 214: ** then free the old allocation.
! 215: */
! 216: static void *fts3ReallocOrFree(void *pOrig, int nNew){
! 217: void *pRet = sqlite3_realloc(pOrig, nNew);
! 218: if( !pRet ){
! 219: sqlite3_free(pOrig);
! 220: }
! 221: return pRet;
! 222: }
! 223:
! 224: /*
! 225: ** Buffer zInput, length nInput, contains the contents of a quoted string
! 226: ** that appeared as part of an fts3 query expression. Neither quote character
! 227: ** is included in the buffer. This function attempts to tokenize the entire
! 228: ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
! 229: ** containing the results.
! 230: **
! 231: ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
! 232: ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
! 233: ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
! 234: ** to 0.
! 235: */
! 236: static int getNextString(
! 237: ParseContext *pParse, /* fts3 query parse context */
! 238: const char *zInput, int nInput, /* Input string */
! 239: Fts3Expr **ppExpr /* OUT: expression */
! 240: ){
! 241: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
! 242: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
! 243: int rc;
! 244: Fts3Expr *p = 0;
! 245: sqlite3_tokenizer_cursor *pCursor = 0;
! 246: char *zTemp = 0;
! 247: int nTemp = 0;
! 248:
! 249: const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
! 250: int nToken = 0;
! 251:
! 252: /* The final Fts3Expr data structure, including the Fts3Phrase,
! 253: ** Fts3PhraseToken structures token buffers are all stored as a single
! 254: ** allocation so that the expression can be freed with a single call to
! 255: ** sqlite3_free(). Setting this up requires a two pass approach.
! 256: **
! 257: ** The first pass, in the block below, uses a tokenizer cursor to iterate
! 258: ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
! 259: ** to assemble data in two dynamic buffers:
! 260: **
! 261: ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
! 262: ** structure, followed by the array of Fts3PhraseToken
! 263: ** structures. This pass only populates the Fts3PhraseToken array.
! 264: **
! 265: ** Buffer zTemp: Contains copies of all tokens.
! 266: **
! 267: ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
! 268: ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
! 269: ** structures.
! 270: */
! 271: rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
! 272: if( rc==SQLITE_OK ){
! 273: int ii;
! 274: pCursor->pTokenizer = pTokenizer;
! 275: for(ii=0; rc==SQLITE_OK; ii++){
! 276: const char *zByte;
! 277: int nByte, iBegin, iEnd, iPos;
! 278: rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
! 279: if( rc==SQLITE_OK ){
! 280: Fts3PhraseToken *pToken;
! 281:
! 282: p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
! 283: if( !p ) goto no_mem;
! 284:
! 285: zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
! 286: if( !zTemp ) goto no_mem;
! 287:
! 288: assert( nToken==ii );
! 289: pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
! 290: memset(pToken, 0, sizeof(Fts3PhraseToken));
! 291:
! 292: memcpy(&zTemp[nTemp], zByte, nByte);
! 293: nTemp += nByte;
! 294:
! 295: pToken->n = nByte;
! 296: pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
! 297: pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
! 298: nToken = ii+1;
! 299: }
! 300: }
! 301:
! 302: pModule->xClose(pCursor);
! 303: pCursor = 0;
! 304: }
! 305:
! 306: if( rc==SQLITE_DONE ){
! 307: int jj;
! 308: char *zBuf = 0;
! 309:
! 310: p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
! 311: if( !p ) goto no_mem;
! 312: memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
! 313: p->eType = FTSQUERY_PHRASE;
! 314: p->pPhrase = (Fts3Phrase *)&p[1];
! 315: p->pPhrase->iColumn = pParse->iDefaultCol;
! 316: p->pPhrase->nToken = nToken;
! 317:
! 318: zBuf = (char *)&p->pPhrase->aToken[nToken];
! 319: if( zTemp ){
! 320: memcpy(zBuf, zTemp, nTemp);
! 321: sqlite3_free(zTemp);
! 322: }else{
! 323: assert( nTemp==0 );
! 324: }
! 325:
! 326: for(jj=0; jj<p->pPhrase->nToken; jj++){
! 327: p->pPhrase->aToken[jj].z = zBuf;
! 328: zBuf += p->pPhrase->aToken[jj].n;
! 329: }
! 330: rc = SQLITE_OK;
! 331: }
! 332:
! 333: *ppExpr = p;
! 334: return rc;
! 335: no_mem:
! 336:
! 337: if( pCursor ){
! 338: pModule->xClose(pCursor);
! 339: }
! 340: sqlite3_free(zTemp);
! 341: sqlite3_free(p);
! 342: *ppExpr = 0;
! 343: return SQLITE_NOMEM;
! 344: }
! 345:
! 346: /*
! 347: ** Function getNextNode(), which is called by fts3ExprParse(), may itself
! 348: ** call fts3ExprParse(). So this forward declaration is required.
! 349: */
! 350: static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
! 351:
! 352: /*
! 353: ** The output variable *ppExpr is populated with an allocated Fts3Expr
! 354: ** structure, or set to 0 if the end of the input buffer is reached.
! 355: **
! 356: ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
! 357: ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
! 358: ** If SQLITE_ERROR is returned, pContext is populated with an error message.
! 359: */
! 360: static int getNextNode(
! 361: ParseContext *pParse, /* fts3 query parse context */
! 362: const char *z, int n, /* Input string */
! 363: Fts3Expr **ppExpr, /* OUT: expression */
! 364: int *pnConsumed /* OUT: Number of bytes consumed */
! 365: ){
! 366: static const struct Fts3Keyword {
! 367: char *z; /* Keyword text */
! 368: unsigned char n; /* Length of the keyword */
! 369: unsigned char parenOnly; /* Only valid in paren mode */
! 370: unsigned char eType; /* Keyword code */
! 371: } aKeyword[] = {
! 372: { "OR" , 2, 0, FTSQUERY_OR },
! 373: { "AND", 3, 1, FTSQUERY_AND },
! 374: { "NOT", 3, 1, FTSQUERY_NOT },
! 375: { "NEAR", 4, 0, FTSQUERY_NEAR }
! 376: };
! 377: int ii;
! 378: int iCol;
! 379: int iColLen;
! 380: int rc;
! 381: Fts3Expr *pRet = 0;
! 382:
! 383: const char *zInput = z;
! 384: int nInput = n;
! 385:
! 386: pParse->isNot = 0;
! 387:
! 388: /* Skip over any whitespace before checking for a keyword, an open or
! 389: ** close bracket, or a quoted string.
! 390: */
! 391: while( nInput>0 && fts3isspace(*zInput) ){
! 392: nInput--;
! 393: zInput++;
! 394: }
! 395: if( nInput==0 ){
! 396: return SQLITE_DONE;
! 397: }
! 398:
! 399: /* See if we are dealing with a keyword. */
! 400: for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
! 401: const struct Fts3Keyword *pKey = &aKeyword[ii];
! 402:
! 403: if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
! 404: continue;
! 405: }
! 406:
! 407: if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
! 408: int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
! 409: int nKey = pKey->n;
! 410: char cNext;
! 411:
! 412: /* If this is a "NEAR" keyword, check for an explicit nearness. */
! 413: if( pKey->eType==FTSQUERY_NEAR ){
! 414: assert( nKey==4 );
! 415: if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
! 416: nNear = 0;
! 417: for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
! 418: nNear = nNear * 10 + (zInput[nKey] - '0');
! 419: }
! 420: }
! 421: }
! 422:
! 423: /* At this point this is probably a keyword. But for that to be true,
! 424: ** the next byte must contain either whitespace, an open or close
! 425: ** parenthesis, a quote character, or EOF.
! 426: */
! 427: cNext = zInput[nKey];
! 428: if( fts3isspace(cNext)
! 429: || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
! 430: ){
! 431: pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
! 432: if( !pRet ){
! 433: return SQLITE_NOMEM;
! 434: }
! 435: pRet->eType = pKey->eType;
! 436: pRet->nNear = nNear;
! 437: *ppExpr = pRet;
! 438: *pnConsumed = (int)((zInput - z) + nKey);
! 439: return SQLITE_OK;
! 440: }
! 441:
! 442: /* Turns out that wasn't a keyword after all. This happens if the
! 443: ** user has supplied a token such as "ORacle". Continue.
! 444: */
! 445: }
! 446: }
! 447:
! 448: /* Check for an open bracket. */
! 449: if( sqlite3_fts3_enable_parentheses ){
! 450: if( *zInput=='(' ){
! 451: int nConsumed;
! 452: pParse->nNest++;
! 453: rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
! 454: if( rc==SQLITE_OK && !*ppExpr ){
! 455: rc = SQLITE_DONE;
! 456: }
! 457: *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
! 458: return rc;
! 459: }
! 460:
! 461: /* Check for a close bracket. */
! 462: if( *zInput==')' ){
! 463: pParse->nNest--;
! 464: *pnConsumed = (int)((zInput - z) + 1);
! 465: return SQLITE_DONE;
! 466: }
! 467: }
! 468:
! 469: /* See if we are dealing with a quoted phrase. If this is the case, then
! 470: ** search for the closing quote and pass the whole string to getNextString()
! 471: ** for processing. This is easy to do, as fts3 has no syntax for escaping
! 472: ** a quote character embedded in a string.
! 473: */
! 474: if( *zInput=='"' ){
! 475: for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
! 476: *pnConsumed = (int)((zInput - z) + ii + 1);
! 477: if( ii==nInput ){
! 478: return SQLITE_ERROR;
! 479: }
! 480: return getNextString(pParse, &zInput[1], ii-1, ppExpr);
! 481: }
! 482:
! 483:
! 484: /* If control flows to this point, this must be a regular token, or
! 485: ** the end of the input. Read a regular token using the sqlite3_tokenizer
! 486: ** interface. Before doing so, figure out if there is an explicit
! 487: ** column specifier for the token.
! 488: **
! 489: ** TODO: Strangely, it is not possible to associate a column specifier
! 490: ** with a quoted phrase, only with a single token. Not sure if this was
! 491: ** an implementation artifact or an intentional decision when fts3 was
! 492: ** first implemented. Whichever it was, this module duplicates the
! 493: ** limitation.
! 494: */
! 495: iCol = pParse->iDefaultCol;
! 496: iColLen = 0;
! 497: for(ii=0; ii<pParse->nCol; ii++){
! 498: const char *zStr = pParse->azCol[ii];
! 499: int nStr = (int)strlen(zStr);
! 500: if( nInput>nStr && zInput[nStr]==':'
! 501: && sqlite3_strnicmp(zStr, zInput, nStr)==0
! 502: ){
! 503: iCol = ii;
! 504: iColLen = (int)((zInput - z) + nStr + 1);
! 505: break;
! 506: }
! 507: }
! 508: rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
! 509: *pnConsumed += iColLen;
! 510: return rc;
! 511: }
! 512:
! 513: /*
! 514: ** The argument is an Fts3Expr structure for a binary operator (any type
! 515: ** except an FTSQUERY_PHRASE). Return an integer value representing the
! 516: ** precedence of the operator. Lower values have a higher precedence (i.e.
! 517: ** group more tightly). For example, in the C language, the == operator
! 518: ** groups more tightly than ||, and would therefore have a higher precedence.
! 519: **
! 520: ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
! 521: ** is defined), the order of the operators in precedence from highest to
! 522: ** lowest is:
! 523: **
! 524: ** NEAR
! 525: ** NOT
! 526: ** AND (including implicit ANDs)
! 527: ** OR
! 528: **
! 529: ** Note that when using the old query syntax, the OR operator has a higher
! 530: ** precedence than the AND operator.
! 531: */
! 532: static int opPrecedence(Fts3Expr *p){
! 533: assert( p->eType!=FTSQUERY_PHRASE );
! 534: if( sqlite3_fts3_enable_parentheses ){
! 535: return p->eType;
! 536: }else if( p->eType==FTSQUERY_NEAR ){
! 537: return 1;
! 538: }else if( p->eType==FTSQUERY_OR ){
! 539: return 2;
! 540: }
! 541: assert( p->eType==FTSQUERY_AND );
! 542: return 3;
! 543: }
! 544:
! 545: /*
! 546: ** Argument ppHead contains a pointer to the current head of a query
! 547: ** expression tree being parsed. pPrev is the expression node most recently
! 548: ** inserted into the tree. This function adds pNew, which is always a binary
! 549: ** operator node, into the expression tree based on the relative precedence
! 550: ** of pNew and the existing nodes of the tree. This may result in the head
! 551: ** of the tree changing, in which case *ppHead is set to the new root node.
! 552: */
! 553: static void insertBinaryOperator(
! 554: Fts3Expr **ppHead, /* Pointer to the root node of a tree */
! 555: Fts3Expr *pPrev, /* Node most recently inserted into the tree */
! 556: Fts3Expr *pNew /* New binary node to insert into expression tree */
! 557: ){
! 558: Fts3Expr *pSplit = pPrev;
! 559: while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
! 560: pSplit = pSplit->pParent;
! 561: }
! 562:
! 563: if( pSplit->pParent ){
! 564: assert( pSplit->pParent->pRight==pSplit );
! 565: pSplit->pParent->pRight = pNew;
! 566: pNew->pParent = pSplit->pParent;
! 567: }else{
! 568: *ppHead = pNew;
! 569: }
! 570: pNew->pLeft = pSplit;
! 571: pSplit->pParent = pNew;
! 572: }
! 573:
! 574: /*
! 575: ** Parse the fts3 query expression found in buffer z, length n. This function
! 576: ** returns either when the end of the buffer is reached or an unmatched
! 577: ** closing bracket - ')' - is encountered.
! 578: **
! 579: ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
! 580: ** parsed form of the expression and *pnConsumed is set to the number of
! 581: ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
! 582: ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
! 583: */
! 584: static int fts3ExprParse(
! 585: ParseContext *pParse, /* fts3 query parse context */
! 586: const char *z, int n, /* Text of MATCH query */
! 587: Fts3Expr **ppExpr, /* OUT: Parsed query structure */
! 588: int *pnConsumed /* OUT: Number of bytes consumed */
! 589: ){
! 590: Fts3Expr *pRet = 0;
! 591: Fts3Expr *pPrev = 0;
! 592: Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
! 593: int nIn = n;
! 594: const char *zIn = z;
! 595: int rc = SQLITE_OK;
! 596: int isRequirePhrase = 1;
! 597:
! 598: while( rc==SQLITE_OK ){
! 599: Fts3Expr *p = 0;
! 600: int nByte = 0;
! 601: rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
! 602: if( rc==SQLITE_OK ){
! 603: int isPhrase;
! 604:
! 605: if( !sqlite3_fts3_enable_parentheses
! 606: && p->eType==FTSQUERY_PHRASE && pParse->isNot
! 607: ){
! 608: /* Create an implicit NOT operator. */
! 609: Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
! 610: if( !pNot ){
! 611: sqlite3Fts3ExprFree(p);
! 612: rc = SQLITE_NOMEM;
! 613: goto exprparse_out;
! 614: }
! 615: pNot->eType = FTSQUERY_NOT;
! 616: pNot->pRight = p;
! 617: if( pNotBranch ){
! 618: pNot->pLeft = pNotBranch;
! 619: }
! 620: pNotBranch = pNot;
! 621: p = pPrev;
! 622: }else{
! 623: int eType = p->eType;
! 624: isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
! 625:
! 626: /* The isRequirePhrase variable is set to true if a phrase or
! 627: ** an expression contained in parenthesis is required. If a
! 628: ** binary operator (AND, OR, NOT or NEAR) is encounted when
! 629: ** isRequirePhrase is set, this is a syntax error.
! 630: */
! 631: if( !isPhrase && isRequirePhrase ){
! 632: sqlite3Fts3ExprFree(p);
! 633: rc = SQLITE_ERROR;
! 634: goto exprparse_out;
! 635: }
! 636:
! 637: if( isPhrase && !isRequirePhrase ){
! 638: /* Insert an implicit AND operator. */
! 639: Fts3Expr *pAnd;
! 640: assert( pRet && pPrev );
! 641: pAnd = fts3MallocZero(sizeof(Fts3Expr));
! 642: if( !pAnd ){
! 643: sqlite3Fts3ExprFree(p);
! 644: rc = SQLITE_NOMEM;
! 645: goto exprparse_out;
! 646: }
! 647: pAnd->eType = FTSQUERY_AND;
! 648: insertBinaryOperator(&pRet, pPrev, pAnd);
! 649: pPrev = pAnd;
! 650: }
! 651:
! 652: /* This test catches attempts to make either operand of a NEAR
! 653: ** operator something other than a phrase. For example, either of
! 654: ** the following:
! 655: **
! 656: ** (bracketed expression) NEAR phrase
! 657: ** phrase NEAR (bracketed expression)
! 658: **
! 659: ** Return an error in either case.
! 660: */
! 661: if( pPrev && (
! 662: (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
! 663: || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
! 664: )){
! 665: sqlite3Fts3ExprFree(p);
! 666: rc = SQLITE_ERROR;
! 667: goto exprparse_out;
! 668: }
! 669:
! 670: if( isPhrase ){
! 671: if( pRet ){
! 672: assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
! 673: pPrev->pRight = p;
! 674: p->pParent = pPrev;
! 675: }else{
! 676: pRet = p;
! 677: }
! 678: }else{
! 679: insertBinaryOperator(&pRet, pPrev, p);
! 680: }
! 681: isRequirePhrase = !isPhrase;
! 682: }
! 683: assert( nByte>0 );
! 684: }
! 685: assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
! 686: nIn -= nByte;
! 687: zIn += nByte;
! 688: pPrev = p;
! 689: }
! 690:
! 691: if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
! 692: rc = SQLITE_ERROR;
! 693: }
! 694:
! 695: if( rc==SQLITE_DONE ){
! 696: rc = SQLITE_OK;
! 697: if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
! 698: if( !pRet ){
! 699: rc = SQLITE_ERROR;
! 700: }else{
! 701: Fts3Expr *pIter = pNotBranch;
! 702: while( pIter->pLeft ){
! 703: pIter = pIter->pLeft;
! 704: }
! 705: pIter->pLeft = pRet;
! 706: pRet = pNotBranch;
! 707: }
! 708: }
! 709: }
! 710: *pnConsumed = n - nIn;
! 711:
! 712: exprparse_out:
! 713: if( rc!=SQLITE_OK ){
! 714: sqlite3Fts3ExprFree(pRet);
! 715: sqlite3Fts3ExprFree(pNotBranch);
! 716: pRet = 0;
! 717: }
! 718: *ppExpr = pRet;
! 719: return rc;
! 720: }
! 721:
! 722: /*
! 723: ** Parameters z and n contain a pointer to and length of a buffer containing
! 724: ** an fts3 query expression, respectively. This function attempts to parse the
! 725: ** query expression and create a tree of Fts3Expr structures representing the
! 726: ** parsed expression. If successful, *ppExpr is set to point to the head
! 727: ** of the parsed expression tree and SQLITE_OK is returned. If an error
! 728: ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
! 729: ** error) is returned and *ppExpr is set to 0.
! 730: **
! 731: ** If parameter n is a negative number, then z is assumed to point to a
! 732: ** nul-terminated string and the length is determined using strlen().
! 733: **
! 734: ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
! 735: ** use to normalize query tokens while parsing the expression. The azCol[]
! 736: ** array, which is assumed to contain nCol entries, should contain the names
! 737: ** of each column in the target fts3 table, in order from left to right.
! 738: ** Column names must be nul-terminated strings.
! 739: **
! 740: ** The iDefaultCol parameter should be passed the index of the table column
! 741: ** that appears on the left-hand-side of the MATCH operator (the default
! 742: ** column to match against for tokens for which a column name is not explicitly
! 743: ** specified as part of the query string), or -1 if tokens may by default
! 744: ** match any table column.
! 745: */
! 746: int sqlite3Fts3ExprParse(
! 747: sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
! 748: char **azCol, /* Array of column names for fts3 table */
! 749: int bFts4, /* True to allow FTS4-only syntax */
! 750: int nCol, /* Number of entries in azCol[] */
! 751: int iDefaultCol, /* Default column to query */
! 752: const char *z, int n, /* Text of MATCH query */
! 753: Fts3Expr **ppExpr /* OUT: Parsed query structure */
! 754: ){
! 755: int nParsed;
! 756: int rc;
! 757: ParseContext sParse;
! 758: sParse.pTokenizer = pTokenizer;
! 759: sParse.azCol = (const char **)azCol;
! 760: sParse.nCol = nCol;
! 761: sParse.iDefaultCol = iDefaultCol;
! 762: sParse.nNest = 0;
! 763: sParse.bFts4 = bFts4;
! 764: if( z==0 ){
! 765: *ppExpr = 0;
! 766: return SQLITE_OK;
! 767: }
! 768: if( n<0 ){
! 769: n = (int)strlen(z);
! 770: }
! 771: rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
! 772:
! 773: /* Check for mismatched parenthesis */
! 774: if( rc==SQLITE_OK && sParse.nNest ){
! 775: rc = SQLITE_ERROR;
! 776: sqlite3Fts3ExprFree(*ppExpr);
! 777: *ppExpr = 0;
! 778: }
! 779:
! 780: return rc;
! 781: }
! 782:
! 783: /*
! 784: ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
! 785: */
! 786: void sqlite3Fts3ExprFree(Fts3Expr *p){
! 787: if( p ){
! 788: assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
! 789: sqlite3Fts3ExprFree(p->pLeft);
! 790: sqlite3Fts3ExprFree(p->pRight);
! 791: sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
! 792: sqlite3_free(p->aMI);
! 793: sqlite3_free(p);
! 794: }
! 795: }
! 796:
! 797: /****************************************************************************
! 798: *****************************************************************************
! 799: ** Everything after this point is just test code.
! 800: */
! 801:
! 802: #ifdef SQLITE_TEST
! 803:
! 804: #include <stdio.h>
! 805:
! 806: /*
! 807: ** Function to query the hash-table of tokenizers (see README.tokenizers).
! 808: */
! 809: static int queryTestTokenizer(
! 810: sqlite3 *db,
! 811: const char *zName,
! 812: const sqlite3_tokenizer_module **pp
! 813: ){
! 814: int rc;
! 815: sqlite3_stmt *pStmt;
! 816: const char zSql[] = "SELECT fts3_tokenizer(?)";
! 817:
! 818: *pp = 0;
! 819: rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
! 820: if( rc!=SQLITE_OK ){
! 821: return rc;
! 822: }
! 823:
! 824: sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
! 825: if( SQLITE_ROW==sqlite3_step(pStmt) ){
! 826: if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
! 827: memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
! 828: }
! 829: }
! 830:
! 831: return sqlite3_finalize(pStmt);
! 832: }
! 833:
! 834: /*
! 835: ** Return a pointer to a buffer containing a text representation of the
! 836: ** expression passed as the first argument. The buffer is obtained from
! 837: ** sqlite3_malloc(). It is the responsibility of the caller to use
! 838: ** sqlite3_free() to release the memory. If an OOM condition is encountered,
! 839: ** NULL is returned.
! 840: **
! 841: ** If the second argument is not NULL, then its contents are prepended to
! 842: ** the returned expression text and then freed using sqlite3_free().
! 843: */
! 844: static char *exprToString(Fts3Expr *pExpr, char *zBuf){
! 845: switch( pExpr->eType ){
! 846: case FTSQUERY_PHRASE: {
! 847: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 848: int i;
! 849: zBuf = sqlite3_mprintf(
! 850: "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
! 851: for(i=0; zBuf && i<pPhrase->nToken; i++){
! 852: zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
! 853: pPhrase->aToken[i].n, pPhrase->aToken[i].z,
! 854: (pPhrase->aToken[i].isPrefix?"+":"")
! 855: );
! 856: }
! 857: return zBuf;
! 858: }
! 859:
! 860: case FTSQUERY_NEAR:
! 861: zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
! 862: break;
! 863: case FTSQUERY_NOT:
! 864: zBuf = sqlite3_mprintf("%zNOT ", zBuf);
! 865: break;
! 866: case FTSQUERY_AND:
! 867: zBuf = sqlite3_mprintf("%zAND ", zBuf);
! 868: break;
! 869: case FTSQUERY_OR:
! 870: zBuf = sqlite3_mprintf("%zOR ", zBuf);
! 871: break;
! 872: }
! 873:
! 874: if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
! 875: if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
! 876: if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
! 877:
! 878: if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
! 879: if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
! 880:
! 881: return zBuf;
! 882: }
! 883:
! 884: /*
! 885: ** This is the implementation of a scalar SQL function used to test the
! 886: ** expression parser. It should be called as follows:
! 887: **
! 888: ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
! 889: **
! 890: ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
! 891: ** to parse the query expression (see README.tokenizers). The second argument
! 892: ** is the query expression to parse. Each subsequent argument is the name
! 893: ** of a column of the fts3 table that the query expression may refer to.
! 894: ** For example:
! 895: **
! 896: ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
! 897: */
! 898: static void fts3ExprTest(
! 899: sqlite3_context *context,
! 900: int argc,
! 901: sqlite3_value **argv
! 902: ){
! 903: sqlite3_tokenizer_module const *pModule = 0;
! 904: sqlite3_tokenizer *pTokenizer = 0;
! 905: int rc;
! 906: char **azCol = 0;
! 907: const char *zExpr;
! 908: int nExpr;
! 909: int nCol;
! 910: int ii;
! 911: Fts3Expr *pExpr;
! 912: char *zBuf = 0;
! 913: sqlite3 *db = sqlite3_context_db_handle(context);
! 914:
! 915: if( argc<3 ){
! 916: sqlite3_result_error(context,
! 917: "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
! 918: );
! 919: return;
! 920: }
! 921:
! 922: rc = queryTestTokenizer(db,
! 923: (const char *)sqlite3_value_text(argv[0]), &pModule);
! 924: if( rc==SQLITE_NOMEM ){
! 925: sqlite3_result_error_nomem(context);
! 926: goto exprtest_out;
! 927: }else if( !pModule ){
! 928: sqlite3_result_error(context, "No such tokenizer module", -1);
! 929: goto exprtest_out;
! 930: }
! 931:
! 932: rc = pModule->xCreate(0, 0, &pTokenizer);
! 933: assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
! 934: if( rc==SQLITE_NOMEM ){
! 935: sqlite3_result_error_nomem(context);
! 936: goto exprtest_out;
! 937: }
! 938: pTokenizer->pModule = pModule;
! 939:
! 940: zExpr = (const char *)sqlite3_value_text(argv[1]);
! 941: nExpr = sqlite3_value_bytes(argv[1]);
! 942: nCol = argc-2;
! 943: azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
! 944: if( !azCol ){
! 945: sqlite3_result_error_nomem(context);
! 946: goto exprtest_out;
! 947: }
! 948: for(ii=0; ii<nCol; ii++){
! 949: azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
! 950: }
! 951:
! 952: rc = sqlite3Fts3ExprParse(
! 953: pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
! 954: );
! 955: if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
! 956: sqlite3_result_error(context, "Error parsing expression", -1);
! 957: }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
! 958: sqlite3_result_error_nomem(context);
! 959: }else{
! 960: sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
! 961: sqlite3_free(zBuf);
! 962: }
! 963:
! 964: sqlite3Fts3ExprFree(pExpr);
! 965:
! 966: exprtest_out:
! 967: if( pModule && pTokenizer ){
! 968: rc = pModule->xDestroy(pTokenizer);
! 969: }
! 970: sqlite3_free(azCol);
! 971: }
! 972:
! 973: /*
! 974: ** Register the query expression parser test function fts3_exprtest()
! 975: ** with database connection db.
! 976: */
! 977: int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
! 978: return sqlite3_create_function(
! 979: db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
! 980: );
! 981: }
! 982:
! 983: #endif
! 984: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>