Return to fts3_expr.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts3 |
1.1 ! misho 1: /* ! 2: ** 2008 Nov 28 ! 3: ** ! 4: ** The author disclaims copyright to this source code. In place of ! 5: ** a legal notice, here is a blessing: ! 6: ** ! 7: ** May you do good and not evil. ! 8: ** May you find forgiveness for yourself and forgive others. ! 9: ** May you share freely, never taking more than you give. ! 10: ** ! 11: ****************************************************************************** ! 12: ** ! 13: ** This module contains code that implements a parser for fts3 query strings ! 14: ** (the right-hand argument to the MATCH operator). Because the supported ! 15: ** syntax is relatively simple, the whole tokenizer/parser system is ! 16: ** hand-coded. ! 17: */ ! 18: #include "fts3Int.h" ! 19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) ! 20: ! 21: /* ! 22: ** By default, this module parses the legacy syntax that has been ! 23: ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS ! 24: ** is defined, then it uses the new syntax. The differences between ! 25: ** the new and the old syntaxes are: ! 26: ** ! 27: ** a) The new syntax supports parenthesis. The old does not. ! 28: ** ! 29: ** b) The new syntax supports the AND and NOT operators. The old does not. ! 30: ** ! 31: ** c) The old syntax supports the "-" token qualifier. This is not ! 32: ** supported by the new syntax (it is replaced by the NOT operator). ! 33: ** ! 34: ** d) When using the old syntax, the OR operator has a greater precedence ! 35: ** than an implicit AND. When using the new, both implicity and explicit ! 36: ** AND operators have a higher precedence than OR. ! 37: ** ! 38: ** If compiled with SQLITE_TEST defined, then this module exports the ! 39: ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable ! 40: ** to zero causes the module to use the old syntax. If it is set to ! 41: ** non-zero the new syntax is activated. This is so both syntaxes can ! 42: ** be tested using a single build of testfixture. ! 43: ** ! 44: ** The following describes the syntax supported by the fts3 MATCH ! 45: ** operator in a similar format to that used by the lemon parser ! 46: ** generator. This module does not use actually lemon, it uses a ! 47: ** custom parser. ! 48: ** ! 49: ** query ::= andexpr (OR andexpr)*. ! 50: ** ! 51: ** andexpr ::= notexpr (AND? notexpr)*. ! 52: ** ! 53: ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. ! 54: ** notexpr ::= LP query RP. ! 55: ** ! 56: ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. ! 57: ** ! 58: ** distance_opt ::= . ! 59: ** distance_opt ::= / INTEGER. ! 60: ** ! 61: ** phrase ::= TOKEN. ! 62: ** phrase ::= COLUMN:TOKEN. ! 63: ** phrase ::= "TOKEN TOKEN TOKEN...". ! 64: */ ! 65: ! 66: #ifdef SQLITE_TEST ! 67: int sqlite3_fts3_enable_parentheses = 0; ! 68: #else ! 69: # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS ! 70: # define sqlite3_fts3_enable_parentheses 1 ! 71: # else ! 72: # define sqlite3_fts3_enable_parentheses 0 ! 73: # endif ! 74: #endif ! 75: ! 76: /* ! 77: ** Default span for NEAR operators. ! 78: */ ! 79: #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 ! 80: ! 81: #include <string.h> ! 82: #include <assert.h> ! 83: ! 84: /* ! 85: ** isNot: ! 86: ** This variable is used by function getNextNode(). When getNextNode() is ! 87: ** called, it sets ParseContext.isNot to true if the 'next node' is a ! 88: ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the ! 89: ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to ! 90: ** zero. ! 91: */ ! 92: typedef struct ParseContext ParseContext; ! 93: struct ParseContext { ! 94: sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ ! 95: const char **azCol; /* Array of column names for fts3 table */ ! 96: int bFts4; /* True to allow FTS4-only syntax */ ! 97: int nCol; /* Number of entries in azCol[] */ ! 98: int iDefaultCol; /* Default column to query */ ! 99: int isNot; /* True if getNextNode() sees a unary - */ ! 100: sqlite3_context *pCtx; /* Write error message here */ ! 101: int nNest; /* Number of nested brackets */ ! 102: }; ! 103: ! 104: /* ! 105: ** This function is equivalent to the standard isspace() function. ! 106: ** ! 107: ** The standard isspace() can be awkward to use safely, because although it ! 108: ** is defined to accept an argument of type int, its behaviour when passed ! 109: ** an integer that falls outside of the range of the unsigned char type ! 110: ** is undefined (and sometimes, "undefined" means segfault). This wrapper ! 111: ** is defined to accept an argument of type char, and always returns 0 for ! 112: ** any values that fall outside of the range of the unsigned char type (i.e. ! 113: ** negative values). ! 114: */ ! 115: static int fts3isspace(char c){ ! 116: return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; ! 117: } ! 118: ! 119: /* ! 120: ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, ! 121: ** zero the memory before returning a pointer to it. If unsuccessful, ! 122: ** return NULL. ! 123: */ ! 124: static void *fts3MallocZero(int nByte){ ! 125: void *pRet = sqlite3_malloc(nByte); ! 126: if( pRet ) memset(pRet, 0, nByte); ! 127: return pRet; ! 128: } ! 129: ! 130: ! 131: /* ! 132: ** Extract the next token from buffer z (length n) using the tokenizer ! 133: ** and other information (column names etc.) in pParse. Create an Fts3Expr ! 134: ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this ! 135: ** single token and set *ppExpr to point to it. If the end of the buffer is ! 136: ** reached before a token is found, set *ppExpr to zero. It is the ! 137: ** responsibility of the caller to eventually deallocate the allocated ! 138: ** Fts3Expr structure (if any) by passing it to sqlite3_free(). ! 139: ** ! 140: ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation ! 141: ** fails. ! 142: */ ! 143: static int getNextToken( ! 144: ParseContext *pParse, /* fts3 query parse context */ ! 145: int iCol, /* Value for Fts3Phrase.iColumn */ ! 146: const char *z, int n, /* Input string */ ! 147: Fts3Expr **ppExpr, /* OUT: expression */ ! 148: int *pnConsumed /* OUT: Number of bytes consumed */ ! 149: ){ ! 150: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; ! 151: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; ! 152: int rc; ! 153: sqlite3_tokenizer_cursor *pCursor; ! 154: Fts3Expr *pRet = 0; ! 155: int nConsumed = 0; ! 156: ! 157: rc = pModule->xOpen(pTokenizer, z, n, &pCursor); ! 158: if( rc==SQLITE_OK ){ ! 159: const char *zToken; ! 160: int nToken, iStart, iEnd, iPosition; ! 161: int nByte; /* total space to allocate */ ! 162: ! 163: pCursor->pTokenizer = pTokenizer; ! 164: rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); ! 165: ! 166: if( rc==SQLITE_OK ){ ! 167: nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; ! 168: pRet = (Fts3Expr *)fts3MallocZero(nByte); ! 169: if( !pRet ){ ! 170: rc = SQLITE_NOMEM; ! 171: }else{ ! 172: pRet->eType = FTSQUERY_PHRASE; ! 173: pRet->pPhrase = (Fts3Phrase *)&pRet[1]; ! 174: pRet->pPhrase->nToken = 1; ! 175: pRet->pPhrase->iColumn = iCol; ! 176: pRet->pPhrase->aToken[0].n = nToken; ! 177: pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; ! 178: memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); ! 179: ! 180: if( iEnd<n && z[iEnd]=='*' ){ ! 181: pRet->pPhrase->aToken[0].isPrefix = 1; ! 182: iEnd++; ! 183: } ! 184: ! 185: while( 1 ){ ! 186: if( !sqlite3_fts3_enable_parentheses ! 187: && iStart>0 && z[iStart-1]=='-' ! 188: ){ ! 189: pParse->isNot = 1; ! 190: iStart--; ! 191: }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ ! 192: pRet->pPhrase->aToken[0].bFirst = 1; ! 193: iStart--; ! 194: }else{ ! 195: break; ! 196: } ! 197: } ! 198: ! 199: } ! 200: nConsumed = iEnd; ! 201: } ! 202: ! 203: pModule->xClose(pCursor); ! 204: } ! 205: ! 206: *pnConsumed = nConsumed; ! 207: *ppExpr = pRet; ! 208: return rc; ! 209: } ! 210: ! 211: ! 212: /* ! 213: ** Enlarge a memory allocation. If an out-of-memory allocation occurs, ! 214: ** then free the old allocation. ! 215: */ ! 216: static void *fts3ReallocOrFree(void *pOrig, int nNew){ ! 217: void *pRet = sqlite3_realloc(pOrig, nNew); ! 218: if( !pRet ){ ! 219: sqlite3_free(pOrig); ! 220: } ! 221: return pRet; ! 222: } ! 223: ! 224: /* ! 225: ** Buffer zInput, length nInput, contains the contents of a quoted string ! 226: ** that appeared as part of an fts3 query expression. Neither quote character ! 227: ** is included in the buffer. This function attempts to tokenize the entire ! 228: ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE ! 229: ** containing the results. ! 230: ** ! 231: ** If successful, SQLITE_OK is returned and *ppExpr set to point at the ! 232: ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory ! 233: ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set ! 234: ** to 0. ! 235: */ ! 236: static int getNextString( ! 237: ParseContext *pParse, /* fts3 query parse context */ ! 238: const char *zInput, int nInput, /* Input string */ ! 239: Fts3Expr **ppExpr /* OUT: expression */ ! 240: ){ ! 241: sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; ! 242: sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; ! 243: int rc; ! 244: Fts3Expr *p = 0; ! 245: sqlite3_tokenizer_cursor *pCursor = 0; ! 246: char *zTemp = 0; ! 247: int nTemp = 0; ! 248: ! 249: const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); ! 250: int nToken = 0; ! 251: ! 252: /* The final Fts3Expr data structure, including the Fts3Phrase, ! 253: ** Fts3PhraseToken structures token buffers are all stored as a single ! 254: ** allocation so that the expression can be freed with a single call to ! 255: ** sqlite3_free(). Setting this up requires a two pass approach. ! 256: ** ! 257: ** The first pass, in the block below, uses a tokenizer cursor to iterate ! 258: ** through the tokens in the expression. This pass uses fts3ReallocOrFree() ! 259: ** to assemble data in two dynamic buffers: ! 260: ** ! 261: ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase ! 262: ** structure, followed by the array of Fts3PhraseToken ! 263: ** structures. This pass only populates the Fts3PhraseToken array. ! 264: ** ! 265: ** Buffer zTemp: Contains copies of all tokens. ! 266: ** ! 267: ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, ! 268: ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase ! 269: ** structures. ! 270: */ ! 271: rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); ! 272: if( rc==SQLITE_OK ){ ! 273: int ii; ! 274: pCursor->pTokenizer = pTokenizer; ! 275: for(ii=0; rc==SQLITE_OK; ii++){ ! 276: const char *zByte; ! 277: int nByte, iBegin, iEnd, iPos; ! 278: rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); ! 279: if( rc==SQLITE_OK ){ ! 280: Fts3PhraseToken *pToken; ! 281: ! 282: p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); ! 283: if( !p ) goto no_mem; ! 284: ! 285: zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); ! 286: if( !zTemp ) goto no_mem; ! 287: ! 288: assert( nToken==ii ); ! 289: pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; ! 290: memset(pToken, 0, sizeof(Fts3PhraseToken)); ! 291: ! 292: memcpy(&zTemp[nTemp], zByte, nByte); ! 293: nTemp += nByte; ! 294: ! 295: pToken->n = nByte; ! 296: pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); ! 297: pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); ! 298: nToken = ii+1; ! 299: } ! 300: } ! 301: ! 302: pModule->xClose(pCursor); ! 303: pCursor = 0; ! 304: } ! 305: ! 306: if( rc==SQLITE_DONE ){ ! 307: int jj; ! 308: char *zBuf = 0; ! 309: ! 310: p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); ! 311: if( !p ) goto no_mem; ! 312: memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); ! 313: p->eType = FTSQUERY_PHRASE; ! 314: p->pPhrase = (Fts3Phrase *)&p[1]; ! 315: p->pPhrase->iColumn = pParse->iDefaultCol; ! 316: p->pPhrase->nToken = nToken; ! 317: ! 318: zBuf = (char *)&p->pPhrase->aToken[nToken]; ! 319: if( zTemp ){ ! 320: memcpy(zBuf, zTemp, nTemp); ! 321: sqlite3_free(zTemp); ! 322: }else{ ! 323: assert( nTemp==0 ); ! 324: } ! 325: ! 326: for(jj=0; jj<p->pPhrase->nToken; jj++){ ! 327: p->pPhrase->aToken[jj].z = zBuf; ! 328: zBuf += p->pPhrase->aToken[jj].n; ! 329: } ! 330: rc = SQLITE_OK; ! 331: } ! 332: ! 333: *ppExpr = p; ! 334: return rc; ! 335: no_mem: ! 336: ! 337: if( pCursor ){ ! 338: pModule->xClose(pCursor); ! 339: } ! 340: sqlite3_free(zTemp); ! 341: sqlite3_free(p); ! 342: *ppExpr = 0; ! 343: return SQLITE_NOMEM; ! 344: } ! 345: ! 346: /* ! 347: ** Function getNextNode(), which is called by fts3ExprParse(), may itself ! 348: ** call fts3ExprParse(). So this forward declaration is required. ! 349: */ ! 350: static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); ! 351: ! 352: /* ! 353: ** The output variable *ppExpr is populated with an allocated Fts3Expr ! 354: ** structure, or set to 0 if the end of the input buffer is reached. ! 355: ** ! 356: ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM ! 357: ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. ! 358: ** If SQLITE_ERROR is returned, pContext is populated with an error message. ! 359: */ ! 360: static int getNextNode( ! 361: ParseContext *pParse, /* fts3 query parse context */ ! 362: const char *z, int n, /* Input string */ ! 363: Fts3Expr **ppExpr, /* OUT: expression */ ! 364: int *pnConsumed /* OUT: Number of bytes consumed */ ! 365: ){ ! 366: static const struct Fts3Keyword { ! 367: char *z; /* Keyword text */ ! 368: unsigned char n; /* Length of the keyword */ ! 369: unsigned char parenOnly; /* Only valid in paren mode */ ! 370: unsigned char eType; /* Keyword code */ ! 371: } aKeyword[] = { ! 372: { "OR" , 2, 0, FTSQUERY_OR }, ! 373: { "AND", 3, 1, FTSQUERY_AND }, ! 374: { "NOT", 3, 1, FTSQUERY_NOT }, ! 375: { "NEAR", 4, 0, FTSQUERY_NEAR } ! 376: }; ! 377: int ii; ! 378: int iCol; ! 379: int iColLen; ! 380: int rc; ! 381: Fts3Expr *pRet = 0; ! 382: ! 383: const char *zInput = z; ! 384: int nInput = n; ! 385: ! 386: pParse->isNot = 0; ! 387: ! 388: /* Skip over any whitespace before checking for a keyword, an open or ! 389: ** close bracket, or a quoted string. ! 390: */ ! 391: while( nInput>0 && fts3isspace(*zInput) ){ ! 392: nInput--; ! 393: zInput++; ! 394: } ! 395: if( nInput==0 ){ ! 396: return SQLITE_DONE; ! 397: } ! 398: ! 399: /* See if we are dealing with a keyword. */ ! 400: for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ ! 401: const struct Fts3Keyword *pKey = &aKeyword[ii]; ! 402: ! 403: if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ ! 404: continue; ! 405: } ! 406: ! 407: if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ ! 408: int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; ! 409: int nKey = pKey->n; ! 410: char cNext; ! 411: ! 412: /* If this is a "NEAR" keyword, check for an explicit nearness. */ ! 413: if( pKey->eType==FTSQUERY_NEAR ){ ! 414: assert( nKey==4 ); ! 415: if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ ! 416: nNear = 0; ! 417: for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ ! 418: nNear = nNear * 10 + (zInput[nKey] - '0'); ! 419: } ! 420: } ! 421: } ! 422: ! 423: /* At this point this is probably a keyword. But for that to be true, ! 424: ** the next byte must contain either whitespace, an open or close ! 425: ** parenthesis, a quote character, or EOF. ! 426: */ ! 427: cNext = zInput[nKey]; ! 428: if( fts3isspace(cNext) ! 429: || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 ! 430: ){ ! 431: pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); ! 432: if( !pRet ){ ! 433: return SQLITE_NOMEM; ! 434: } ! 435: pRet->eType = pKey->eType; ! 436: pRet->nNear = nNear; ! 437: *ppExpr = pRet; ! 438: *pnConsumed = (int)((zInput - z) + nKey); ! 439: return SQLITE_OK; ! 440: } ! 441: ! 442: /* Turns out that wasn't a keyword after all. This happens if the ! 443: ** user has supplied a token such as "ORacle". Continue. ! 444: */ ! 445: } ! 446: } ! 447: ! 448: /* Check for an open bracket. */ ! 449: if( sqlite3_fts3_enable_parentheses ){ ! 450: if( *zInput=='(' ){ ! 451: int nConsumed; ! 452: pParse->nNest++; ! 453: rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); ! 454: if( rc==SQLITE_OK && !*ppExpr ){ ! 455: rc = SQLITE_DONE; ! 456: } ! 457: *pnConsumed = (int)((zInput - z) + 1 + nConsumed); ! 458: return rc; ! 459: } ! 460: ! 461: /* Check for a close bracket. */ ! 462: if( *zInput==')' ){ ! 463: pParse->nNest--; ! 464: *pnConsumed = (int)((zInput - z) + 1); ! 465: return SQLITE_DONE; ! 466: } ! 467: } ! 468: ! 469: /* See if we are dealing with a quoted phrase. If this is the case, then ! 470: ** search for the closing quote and pass the whole string to getNextString() ! 471: ** for processing. This is easy to do, as fts3 has no syntax for escaping ! 472: ** a quote character embedded in a string. ! 473: */ ! 474: if( *zInput=='"' ){ ! 475: for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); ! 476: *pnConsumed = (int)((zInput - z) + ii + 1); ! 477: if( ii==nInput ){ ! 478: return SQLITE_ERROR; ! 479: } ! 480: return getNextString(pParse, &zInput[1], ii-1, ppExpr); ! 481: } ! 482: ! 483: ! 484: /* If control flows to this point, this must be a regular token, or ! 485: ** the end of the input. Read a regular token using the sqlite3_tokenizer ! 486: ** interface. Before doing so, figure out if there is an explicit ! 487: ** column specifier for the token. ! 488: ** ! 489: ** TODO: Strangely, it is not possible to associate a column specifier ! 490: ** with a quoted phrase, only with a single token. Not sure if this was ! 491: ** an implementation artifact or an intentional decision when fts3 was ! 492: ** first implemented. Whichever it was, this module duplicates the ! 493: ** limitation. ! 494: */ ! 495: iCol = pParse->iDefaultCol; ! 496: iColLen = 0; ! 497: for(ii=0; ii<pParse->nCol; ii++){ ! 498: const char *zStr = pParse->azCol[ii]; ! 499: int nStr = (int)strlen(zStr); ! 500: if( nInput>nStr && zInput[nStr]==':' ! 501: && sqlite3_strnicmp(zStr, zInput, nStr)==0 ! 502: ){ ! 503: iCol = ii; ! 504: iColLen = (int)((zInput - z) + nStr + 1); ! 505: break; ! 506: } ! 507: } ! 508: rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); ! 509: *pnConsumed += iColLen; ! 510: return rc; ! 511: } ! 512: ! 513: /* ! 514: ** The argument is an Fts3Expr structure for a binary operator (any type ! 515: ** except an FTSQUERY_PHRASE). Return an integer value representing the ! 516: ** precedence of the operator. Lower values have a higher precedence (i.e. ! 517: ** group more tightly). For example, in the C language, the == operator ! 518: ** groups more tightly than ||, and would therefore have a higher precedence. ! 519: ** ! 520: ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS ! 521: ** is defined), the order of the operators in precedence from highest to ! 522: ** lowest is: ! 523: ** ! 524: ** NEAR ! 525: ** NOT ! 526: ** AND (including implicit ANDs) ! 527: ** OR ! 528: ** ! 529: ** Note that when using the old query syntax, the OR operator has a higher ! 530: ** precedence than the AND operator. ! 531: */ ! 532: static int opPrecedence(Fts3Expr *p){ ! 533: assert( p->eType!=FTSQUERY_PHRASE ); ! 534: if( sqlite3_fts3_enable_parentheses ){ ! 535: return p->eType; ! 536: }else if( p->eType==FTSQUERY_NEAR ){ ! 537: return 1; ! 538: }else if( p->eType==FTSQUERY_OR ){ ! 539: return 2; ! 540: } ! 541: assert( p->eType==FTSQUERY_AND ); ! 542: return 3; ! 543: } ! 544: ! 545: /* ! 546: ** Argument ppHead contains a pointer to the current head of a query ! 547: ** expression tree being parsed. pPrev is the expression node most recently ! 548: ** inserted into the tree. This function adds pNew, which is always a binary ! 549: ** operator node, into the expression tree based on the relative precedence ! 550: ** of pNew and the existing nodes of the tree. This may result in the head ! 551: ** of the tree changing, in which case *ppHead is set to the new root node. ! 552: */ ! 553: static void insertBinaryOperator( ! 554: Fts3Expr **ppHead, /* Pointer to the root node of a tree */ ! 555: Fts3Expr *pPrev, /* Node most recently inserted into the tree */ ! 556: Fts3Expr *pNew /* New binary node to insert into expression tree */ ! 557: ){ ! 558: Fts3Expr *pSplit = pPrev; ! 559: while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ ! 560: pSplit = pSplit->pParent; ! 561: } ! 562: ! 563: if( pSplit->pParent ){ ! 564: assert( pSplit->pParent->pRight==pSplit ); ! 565: pSplit->pParent->pRight = pNew; ! 566: pNew->pParent = pSplit->pParent; ! 567: }else{ ! 568: *ppHead = pNew; ! 569: } ! 570: pNew->pLeft = pSplit; ! 571: pSplit->pParent = pNew; ! 572: } ! 573: ! 574: /* ! 575: ** Parse the fts3 query expression found in buffer z, length n. This function ! 576: ** returns either when the end of the buffer is reached or an unmatched ! 577: ** closing bracket - ')' - is encountered. ! 578: ** ! 579: ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the ! 580: ** parsed form of the expression and *pnConsumed is set to the number of ! 581: ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM ! 582: ** (out of memory error) or SQLITE_ERROR (parse error) is returned. ! 583: */ ! 584: static int fts3ExprParse( ! 585: ParseContext *pParse, /* fts3 query parse context */ ! 586: const char *z, int n, /* Text of MATCH query */ ! 587: Fts3Expr **ppExpr, /* OUT: Parsed query structure */ ! 588: int *pnConsumed /* OUT: Number of bytes consumed */ ! 589: ){ ! 590: Fts3Expr *pRet = 0; ! 591: Fts3Expr *pPrev = 0; ! 592: Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ ! 593: int nIn = n; ! 594: const char *zIn = z; ! 595: int rc = SQLITE_OK; ! 596: int isRequirePhrase = 1; ! 597: ! 598: while( rc==SQLITE_OK ){ ! 599: Fts3Expr *p = 0; ! 600: int nByte = 0; ! 601: rc = getNextNode(pParse, zIn, nIn, &p, &nByte); ! 602: if( rc==SQLITE_OK ){ ! 603: int isPhrase; ! 604: ! 605: if( !sqlite3_fts3_enable_parentheses ! 606: && p->eType==FTSQUERY_PHRASE && pParse->isNot ! 607: ){ ! 608: /* Create an implicit NOT operator. */ ! 609: Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); ! 610: if( !pNot ){ ! 611: sqlite3Fts3ExprFree(p); ! 612: rc = SQLITE_NOMEM; ! 613: goto exprparse_out; ! 614: } ! 615: pNot->eType = FTSQUERY_NOT; ! 616: pNot->pRight = p; ! 617: if( pNotBranch ){ ! 618: pNot->pLeft = pNotBranch; ! 619: } ! 620: pNotBranch = pNot; ! 621: p = pPrev; ! 622: }else{ ! 623: int eType = p->eType; ! 624: isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); ! 625: ! 626: /* The isRequirePhrase variable is set to true if a phrase or ! 627: ** an expression contained in parenthesis is required. If a ! 628: ** binary operator (AND, OR, NOT or NEAR) is encounted when ! 629: ** isRequirePhrase is set, this is a syntax error. ! 630: */ ! 631: if( !isPhrase && isRequirePhrase ){ ! 632: sqlite3Fts3ExprFree(p); ! 633: rc = SQLITE_ERROR; ! 634: goto exprparse_out; ! 635: } ! 636: ! 637: if( isPhrase && !isRequirePhrase ){ ! 638: /* Insert an implicit AND operator. */ ! 639: Fts3Expr *pAnd; ! 640: assert( pRet && pPrev ); ! 641: pAnd = fts3MallocZero(sizeof(Fts3Expr)); ! 642: if( !pAnd ){ ! 643: sqlite3Fts3ExprFree(p); ! 644: rc = SQLITE_NOMEM; ! 645: goto exprparse_out; ! 646: } ! 647: pAnd->eType = FTSQUERY_AND; ! 648: insertBinaryOperator(&pRet, pPrev, pAnd); ! 649: pPrev = pAnd; ! 650: } ! 651: ! 652: /* This test catches attempts to make either operand of a NEAR ! 653: ** operator something other than a phrase. For example, either of ! 654: ** the following: ! 655: ** ! 656: ** (bracketed expression) NEAR phrase ! 657: ** phrase NEAR (bracketed expression) ! 658: ** ! 659: ** Return an error in either case. ! 660: */ ! 661: if( pPrev && ( ! 662: (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) ! 663: || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) ! 664: )){ ! 665: sqlite3Fts3ExprFree(p); ! 666: rc = SQLITE_ERROR; ! 667: goto exprparse_out; ! 668: } ! 669: ! 670: if( isPhrase ){ ! 671: if( pRet ){ ! 672: assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); ! 673: pPrev->pRight = p; ! 674: p->pParent = pPrev; ! 675: }else{ ! 676: pRet = p; ! 677: } ! 678: }else{ ! 679: insertBinaryOperator(&pRet, pPrev, p); ! 680: } ! 681: isRequirePhrase = !isPhrase; ! 682: } ! 683: assert( nByte>0 ); ! 684: } ! 685: assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); ! 686: nIn -= nByte; ! 687: zIn += nByte; ! 688: pPrev = p; ! 689: } ! 690: ! 691: if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ ! 692: rc = SQLITE_ERROR; ! 693: } ! 694: ! 695: if( rc==SQLITE_DONE ){ ! 696: rc = SQLITE_OK; ! 697: if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ ! 698: if( !pRet ){ ! 699: rc = SQLITE_ERROR; ! 700: }else{ ! 701: Fts3Expr *pIter = pNotBranch; ! 702: while( pIter->pLeft ){ ! 703: pIter = pIter->pLeft; ! 704: } ! 705: pIter->pLeft = pRet; ! 706: pRet = pNotBranch; ! 707: } ! 708: } ! 709: } ! 710: *pnConsumed = n - nIn; ! 711: ! 712: exprparse_out: ! 713: if( rc!=SQLITE_OK ){ ! 714: sqlite3Fts3ExprFree(pRet); ! 715: sqlite3Fts3ExprFree(pNotBranch); ! 716: pRet = 0; ! 717: } ! 718: *ppExpr = pRet; ! 719: return rc; ! 720: } ! 721: ! 722: /* ! 723: ** Parameters z and n contain a pointer to and length of a buffer containing ! 724: ** an fts3 query expression, respectively. This function attempts to parse the ! 725: ** query expression and create a tree of Fts3Expr structures representing the ! 726: ** parsed expression. If successful, *ppExpr is set to point to the head ! 727: ** of the parsed expression tree and SQLITE_OK is returned. If an error ! 728: ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse ! 729: ** error) is returned and *ppExpr is set to 0. ! 730: ** ! 731: ** If parameter n is a negative number, then z is assumed to point to a ! 732: ** nul-terminated string and the length is determined using strlen(). ! 733: ** ! 734: ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to ! 735: ** use to normalize query tokens while parsing the expression. The azCol[] ! 736: ** array, which is assumed to contain nCol entries, should contain the names ! 737: ** of each column in the target fts3 table, in order from left to right. ! 738: ** Column names must be nul-terminated strings. ! 739: ** ! 740: ** The iDefaultCol parameter should be passed the index of the table column ! 741: ** that appears on the left-hand-side of the MATCH operator (the default ! 742: ** column to match against for tokens for which a column name is not explicitly ! 743: ** specified as part of the query string), or -1 if tokens may by default ! 744: ** match any table column. ! 745: */ ! 746: int sqlite3Fts3ExprParse( ! 747: sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ ! 748: char **azCol, /* Array of column names for fts3 table */ ! 749: int bFts4, /* True to allow FTS4-only syntax */ ! 750: int nCol, /* Number of entries in azCol[] */ ! 751: int iDefaultCol, /* Default column to query */ ! 752: const char *z, int n, /* Text of MATCH query */ ! 753: Fts3Expr **ppExpr /* OUT: Parsed query structure */ ! 754: ){ ! 755: int nParsed; ! 756: int rc; ! 757: ParseContext sParse; ! 758: sParse.pTokenizer = pTokenizer; ! 759: sParse.azCol = (const char **)azCol; ! 760: sParse.nCol = nCol; ! 761: sParse.iDefaultCol = iDefaultCol; ! 762: sParse.nNest = 0; ! 763: sParse.bFts4 = bFts4; ! 764: if( z==0 ){ ! 765: *ppExpr = 0; ! 766: return SQLITE_OK; ! 767: } ! 768: if( n<0 ){ ! 769: n = (int)strlen(z); ! 770: } ! 771: rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); ! 772: ! 773: /* Check for mismatched parenthesis */ ! 774: if( rc==SQLITE_OK && sParse.nNest ){ ! 775: rc = SQLITE_ERROR; ! 776: sqlite3Fts3ExprFree(*ppExpr); ! 777: *ppExpr = 0; ! 778: } ! 779: ! 780: return rc; ! 781: } ! 782: ! 783: /* ! 784: ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). ! 785: */ ! 786: void sqlite3Fts3ExprFree(Fts3Expr *p){ ! 787: if( p ){ ! 788: assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); ! 789: sqlite3Fts3ExprFree(p->pLeft); ! 790: sqlite3Fts3ExprFree(p->pRight); ! 791: sqlite3Fts3EvalPhraseCleanup(p->pPhrase); ! 792: sqlite3_free(p->aMI); ! 793: sqlite3_free(p); ! 794: } ! 795: } ! 796: ! 797: /**************************************************************************** ! 798: ***************************************************************************** ! 799: ** Everything after this point is just test code. ! 800: */ ! 801: ! 802: #ifdef SQLITE_TEST ! 803: ! 804: #include <stdio.h> ! 805: ! 806: /* ! 807: ** Function to query the hash-table of tokenizers (see README.tokenizers). ! 808: */ ! 809: static int queryTestTokenizer( ! 810: sqlite3 *db, ! 811: const char *zName, ! 812: const sqlite3_tokenizer_module **pp ! 813: ){ ! 814: int rc; ! 815: sqlite3_stmt *pStmt; ! 816: const char zSql[] = "SELECT fts3_tokenizer(?)"; ! 817: ! 818: *pp = 0; ! 819: rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); ! 820: if( rc!=SQLITE_OK ){ ! 821: return rc; ! 822: } ! 823: ! 824: sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); ! 825: if( SQLITE_ROW==sqlite3_step(pStmt) ){ ! 826: if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ ! 827: memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); ! 828: } ! 829: } ! 830: ! 831: return sqlite3_finalize(pStmt); ! 832: } ! 833: ! 834: /* ! 835: ** Return a pointer to a buffer containing a text representation of the ! 836: ** expression passed as the first argument. The buffer is obtained from ! 837: ** sqlite3_malloc(). It is the responsibility of the caller to use ! 838: ** sqlite3_free() to release the memory. If an OOM condition is encountered, ! 839: ** NULL is returned. ! 840: ** ! 841: ** If the second argument is not NULL, then its contents are prepended to ! 842: ** the returned expression text and then freed using sqlite3_free(). ! 843: */ ! 844: static char *exprToString(Fts3Expr *pExpr, char *zBuf){ ! 845: switch( pExpr->eType ){ ! 846: case FTSQUERY_PHRASE: { ! 847: Fts3Phrase *pPhrase = pExpr->pPhrase; ! 848: int i; ! 849: zBuf = sqlite3_mprintf( ! 850: "%zPHRASE %d 0", zBuf, pPhrase->iColumn); ! 851: for(i=0; zBuf && i<pPhrase->nToken; i++){ ! 852: zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, ! 853: pPhrase->aToken[i].n, pPhrase->aToken[i].z, ! 854: (pPhrase->aToken[i].isPrefix?"+":"") ! 855: ); ! 856: } ! 857: return zBuf; ! 858: } ! 859: ! 860: case FTSQUERY_NEAR: ! 861: zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); ! 862: break; ! 863: case FTSQUERY_NOT: ! 864: zBuf = sqlite3_mprintf("%zNOT ", zBuf); ! 865: break; ! 866: case FTSQUERY_AND: ! 867: zBuf = sqlite3_mprintf("%zAND ", zBuf); ! 868: break; ! 869: case FTSQUERY_OR: ! 870: zBuf = sqlite3_mprintf("%zOR ", zBuf); ! 871: break; ! 872: } ! 873: ! 874: if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); ! 875: if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); ! 876: if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); ! 877: ! 878: if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); ! 879: if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); ! 880: ! 881: return zBuf; ! 882: } ! 883: ! 884: /* ! 885: ** This is the implementation of a scalar SQL function used to test the ! 886: ** expression parser. It should be called as follows: ! 887: ** ! 888: ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); ! 889: ** ! 890: ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used ! 891: ** to parse the query expression (see README.tokenizers). The second argument ! 892: ** is the query expression to parse. Each subsequent argument is the name ! 893: ** of a column of the fts3 table that the query expression may refer to. ! 894: ** For example: ! 895: ** ! 896: ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); ! 897: */ ! 898: static void fts3ExprTest( ! 899: sqlite3_context *context, ! 900: int argc, ! 901: sqlite3_value **argv ! 902: ){ ! 903: sqlite3_tokenizer_module const *pModule = 0; ! 904: sqlite3_tokenizer *pTokenizer = 0; ! 905: int rc; ! 906: char **azCol = 0; ! 907: const char *zExpr; ! 908: int nExpr; ! 909: int nCol; ! 910: int ii; ! 911: Fts3Expr *pExpr; ! 912: char *zBuf = 0; ! 913: sqlite3 *db = sqlite3_context_db_handle(context); ! 914: ! 915: if( argc<3 ){ ! 916: sqlite3_result_error(context, ! 917: "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 ! 918: ); ! 919: return; ! 920: } ! 921: ! 922: rc = queryTestTokenizer(db, ! 923: (const char *)sqlite3_value_text(argv[0]), &pModule); ! 924: if( rc==SQLITE_NOMEM ){ ! 925: sqlite3_result_error_nomem(context); ! 926: goto exprtest_out; ! 927: }else if( !pModule ){ ! 928: sqlite3_result_error(context, "No such tokenizer module", -1); ! 929: goto exprtest_out; ! 930: } ! 931: ! 932: rc = pModule->xCreate(0, 0, &pTokenizer); ! 933: assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); ! 934: if( rc==SQLITE_NOMEM ){ ! 935: sqlite3_result_error_nomem(context); ! 936: goto exprtest_out; ! 937: } ! 938: pTokenizer->pModule = pModule; ! 939: ! 940: zExpr = (const char *)sqlite3_value_text(argv[1]); ! 941: nExpr = sqlite3_value_bytes(argv[1]); ! 942: nCol = argc-2; ! 943: azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); ! 944: if( !azCol ){ ! 945: sqlite3_result_error_nomem(context); ! 946: goto exprtest_out; ! 947: } ! 948: for(ii=0; ii<nCol; ii++){ ! 949: azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); ! 950: } ! 951: ! 952: rc = sqlite3Fts3ExprParse( ! 953: pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr ! 954: ); ! 955: if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ ! 956: sqlite3_result_error(context, "Error parsing expression", -1); ! 957: }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ ! 958: sqlite3_result_error_nomem(context); ! 959: }else{ ! 960: sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); ! 961: sqlite3_free(zBuf); ! 962: } ! 963: ! 964: sqlite3Fts3ExprFree(pExpr); ! 965: ! 966: exprtest_out: ! 967: if( pModule && pTokenizer ){ ! 968: rc = pModule->xDestroy(pTokenizer); ! 969: } ! 970: sqlite3_free(azCol); ! 971: } ! 972: ! 973: /* ! 974: ** Register the query expression parser test function fts3_exprtest() ! 975: ** with database connection db. ! 976: */ ! 977: int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ ! 978: return sqlite3_create_function( ! 979: db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 ! 980: ); ! 981: } ! 982: ! 983: #endif ! 984: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */