Annotation of embedaddon/sqlite3/ext/fts3/fts3_expr.c, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2008 Nov 28
                      3: **
                      4: ** The author disclaims copyright to this source code.  In place of
                      5: ** a legal notice, here is a blessing:
                      6: **
                      7: **    May you do good and not evil.
                      8: **    May you find forgiveness for yourself and forgive others.
                      9: **    May you share freely, never taking more than you give.
                     10: **
                     11: ******************************************************************************
                     12: **
                     13: ** This module contains code that implements a parser for fts3 query strings
                     14: ** (the right-hand argument to the MATCH operator). Because the supported 
                     15: ** syntax is relatively simple, the whole tokenizer/parser system is
                     16: ** hand-coded. 
                     17: */
                     18: #include "fts3Int.h"
                     19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
                     20: 
                     21: /*
                     22: ** By default, this module parses the legacy syntax that has been 
                     23: ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
                     24: ** is defined, then it uses the new syntax. The differences between
                     25: ** the new and the old syntaxes are:
                     26: **
                     27: **  a) The new syntax supports parenthesis. The old does not.
                     28: **
                     29: **  b) The new syntax supports the AND and NOT operators. The old does not.
                     30: **
                     31: **  c) The old syntax supports the "-" token qualifier. This is not 
                     32: **     supported by the new syntax (it is replaced by the NOT operator).
                     33: **
                     34: **  d) When using the old syntax, the OR operator has a greater precedence
                     35: **     than an implicit AND. When using the new, both implicity and explicit
                     36: **     AND operators have a higher precedence than OR.
                     37: **
                     38: ** If compiled with SQLITE_TEST defined, then this module exports the
                     39: ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
                     40: ** to zero causes the module to use the old syntax. If it is set to 
                     41: ** non-zero the new syntax is activated. This is so both syntaxes can
                     42: ** be tested using a single build of testfixture.
                     43: **
                     44: ** The following describes the syntax supported by the fts3 MATCH
                     45: ** operator in a similar format to that used by the lemon parser
                     46: ** generator. This module does not use actually lemon, it uses a
                     47: ** custom parser.
                     48: **
                     49: **   query ::= andexpr (OR andexpr)*.
                     50: **
                     51: **   andexpr ::= notexpr (AND? notexpr)*.
                     52: **
                     53: **   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
                     54: **   notexpr ::= LP query RP.
                     55: **
                     56: **   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
                     57: **
                     58: **   distance_opt ::= .
                     59: **   distance_opt ::= / INTEGER.
                     60: **
                     61: **   phrase ::= TOKEN.
                     62: **   phrase ::= COLUMN:TOKEN.
                     63: **   phrase ::= "TOKEN TOKEN TOKEN...".
                     64: */
                     65: 
                     66: #ifdef SQLITE_TEST
                     67: int sqlite3_fts3_enable_parentheses = 0;
                     68: #else
                     69: # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
                     70: #  define sqlite3_fts3_enable_parentheses 1
                     71: # else
                     72: #  define sqlite3_fts3_enable_parentheses 0
                     73: # endif
                     74: #endif
                     75: 
                     76: /*
                     77: ** Default span for NEAR operators.
                     78: */
                     79: #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
                     80: 
                     81: #include <string.h>
                     82: #include <assert.h>
                     83: 
                     84: /*
                     85: ** isNot:
                     86: **   This variable is used by function getNextNode(). When getNextNode() is
                     87: **   called, it sets ParseContext.isNot to true if the 'next node' is a 
                     88: **   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
                     89: **   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
                     90: **   zero.
                     91: */
                     92: typedef struct ParseContext ParseContext;
                     93: struct ParseContext {
                     94:   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
                     95:   const char **azCol;                 /* Array of column names for fts3 table */
                     96:   int bFts4;                          /* True to allow FTS4-only syntax */
                     97:   int nCol;                           /* Number of entries in azCol[] */
                     98:   int iDefaultCol;                    /* Default column to query */
                     99:   int isNot;                          /* True if getNextNode() sees a unary - */
                    100:   sqlite3_context *pCtx;              /* Write error message here */
                    101:   int nNest;                          /* Number of nested brackets */
                    102: };
                    103: 
                    104: /*
                    105: ** This function is equivalent to the standard isspace() function. 
                    106: **
                    107: ** The standard isspace() can be awkward to use safely, because although it
                    108: ** is defined to accept an argument of type int, its behaviour when passed
                    109: ** an integer that falls outside of the range of the unsigned char type
                    110: ** is undefined (and sometimes, "undefined" means segfault). This wrapper
                    111: ** is defined to accept an argument of type char, and always returns 0 for
                    112: ** any values that fall outside of the range of the unsigned char type (i.e.
                    113: ** negative values).
                    114: */
                    115: static int fts3isspace(char c){
                    116:   return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
                    117: }
                    118: 
                    119: /*
                    120: ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
                    121: ** zero the memory before returning a pointer to it. If unsuccessful, 
                    122: ** return NULL.
                    123: */
                    124: static void *fts3MallocZero(int nByte){
                    125:   void *pRet = sqlite3_malloc(nByte);
                    126:   if( pRet ) memset(pRet, 0, nByte);
                    127:   return pRet;
                    128: }
                    129: 
                    130: 
                    131: /*
                    132: ** Extract the next token from buffer z (length n) using the tokenizer
                    133: ** and other information (column names etc.) in pParse. Create an Fts3Expr
                    134: ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
                    135: ** single token and set *ppExpr to point to it. If the end of the buffer is
                    136: ** reached before a token is found, set *ppExpr to zero. It is the
                    137: ** responsibility of the caller to eventually deallocate the allocated 
                    138: ** Fts3Expr structure (if any) by passing it to sqlite3_free().
                    139: **
                    140: ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
                    141: ** fails.
                    142: */
                    143: static int getNextToken(
                    144:   ParseContext *pParse,                   /* fts3 query parse context */
                    145:   int iCol,                               /* Value for Fts3Phrase.iColumn */
                    146:   const char *z, int n,                   /* Input string */
                    147:   Fts3Expr **ppExpr,                      /* OUT: expression */
                    148:   int *pnConsumed                         /* OUT: Number of bytes consumed */
                    149: ){
                    150:   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
                    151:   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
                    152:   int rc;
                    153:   sqlite3_tokenizer_cursor *pCursor;
                    154:   Fts3Expr *pRet = 0;
                    155:   int nConsumed = 0;
                    156: 
                    157:   rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
                    158:   if( rc==SQLITE_OK ){
                    159:     const char *zToken;
                    160:     int nToken, iStart, iEnd, iPosition;
                    161:     int nByte;                               /* total space to allocate */
                    162: 
                    163:     pCursor->pTokenizer = pTokenizer;
                    164:     rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
                    165: 
                    166:     if( rc==SQLITE_OK ){
                    167:       nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
                    168:       pRet = (Fts3Expr *)fts3MallocZero(nByte);
                    169:       if( !pRet ){
                    170:         rc = SQLITE_NOMEM;
                    171:       }else{
                    172:         pRet->eType = FTSQUERY_PHRASE;
                    173:         pRet->pPhrase = (Fts3Phrase *)&pRet[1];
                    174:         pRet->pPhrase->nToken = 1;
                    175:         pRet->pPhrase->iColumn = iCol;
                    176:         pRet->pPhrase->aToken[0].n = nToken;
                    177:         pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
                    178:         memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
                    179: 
                    180:         if( iEnd<n && z[iEnd]=='*' ){
                    181:           pRet->pPhrase->aToken[0].isPrefix = 1;
                    182:           iEnd++;
                    183:         }
                    184: 
                    185:         while( 1 ){
                    186:           if( !sqlite3_fts3_enable_parentheses 
                    187:            && iStart>0 && z[iStart-1]=='-' 
                    188:           ){
                    189:             pParse->isNot = 1;
                    190:             iStart--;
                    191:           }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
                    192:             pRet->pPhrase->aToken[0].bFirst = 1;
                    193:             iStart--;
                    194:           }else{
                    195:             break;
                    196:           }
                    197:         }
                    198: 
                    199:       }
                    200:       nConsumed = iEnd;
                    201:     }
                    202: 
                    203:     pModule->xClose(pCursor);
                    204:   }
                    205:   
                    206:   *pnConsumed = nConsumed;
                    207:   *ppExpr = pRet;
                    208:   return rc;
                    209: }
                    210: 
                    211: 
                    212: /*
                    213: ** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
                    214: ** then free the old allocation.
                    215: */
                    216: static void *fts3ReallocOrFree(void *pOrig, int nNew){
                    217:   void *pRet = sqlite3_realloc(pOrig, nNew);
                    218:   if( !pRet ){
                    219:     sqlite3_free(pOrig);
                    220:   }
                    221:   return pRet;
                    222: }
                    223: 
                    224: /*
                    225: ** Buffer zInput, length nInput, contains the contents of a quoted string
                    226: ** that appeared as part of an fts3 query expression. Neither quote character
                    227: ** is included in the buffer. This function attempts to tokenize the entire
                    228: ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 
                    229: ** containing the results.
                    230: **
                    231: ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
                    232: ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
                    233: ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
                    234: ** to 0.
                    235: */
                    236: static int getNextString(
                    237:   ParseContext *pParse,                   /* fts3 query parse context */
                    238:   const char *zInput, int nInput,         /* Input string */
                    239:   Fts3Expr **ppExpr                       /* OUT: expression */
                    240: ){
                    241:   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
                    242:   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
                    243:   int rc;
                    244:   Fts3Expr *p = 0;
                    245:   sqlite3_tokenizer_cursor *pCursor = 0;
                    246:   char *zTemp = 0;
                    247:   int nTemp = 0;
                    248: 
                    249:   const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
                    250:   int nToken = 0;
                    251: 
                    252:   /* The final Fts3Expr data structure, including the Fts3Phrase,
                    253:   ** Fts3PhraseToken structures token buffers are all stored as a single 
                    254:   ** allocation so that the expression can be freed with a single call to
                    255:   ** sqlite3_free(). Setting this up requires a two pass approach.
                    256:   **
                    257:   ** The first pass, in the block below, uses a tokenizer cursor to iterate
                    258:   ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
                    259:   ** to assemble data in two dynamic buffers:
                    260:   **
                    261:   **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
                    262:   **             structure, followed by the array of Fts3PhraseToken 
                    263:   **             structures. This pass only populates the Fts3PhraseToken array.
                    264:   **
                    265:   **   Buffer zTemp: Contains copies of all tokens.
                    266:   **
                    267:   ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
                    268:   ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
                    269:   ** structures.
                    270:   */
                    271:   rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
                    272:   if( rc==SQLITE_OK ){
                    273:     int ii;
                    274:     pCursor->pTokenizer = pTokenizer;
                    275:     for(ii=0; rc==SQLITE_OK; ii++){
                    276:       const char *zByte;
                    277:       int nByte, iBegin, iEnd, iPos;
                    278:       rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
                    279:       if( rc==SQLITE_OK ){
                    280:         Fts3PhraseToken *pToken;
                    281: 
                    282:         p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
                    283:         if( !p ) goto no_mem;
                    284: 
                    285:         zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
                    286:         if( !zTemp ) goto no_mem;
                    287: 
                    288:         assert( nToken==ii );
                    289:         pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
                    290:         memset(pToken, 0, sizeof(Fts3PhraseToken));
                    291: 
                    292:         memcpy(&zTemp[nTemp], zByte, nByte);
                    293:         nTemp += nByte;
                    294: 
                    295:         pToken->n = nByte;
                    296:         pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
                    297:         pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
                    298:         nToken = ii+1;
                    299:       }
                    300:     }
                    301: 
                    302:     pModule->xClose(pCursor);
                    303:     pCursor = 0;
                    304:   }
                    305: 
                    306:   if( rc==SQLITE_DONE ){
                    307:     int jj;
                    308:     char *zBuf = 0;
                    309: 
                    310:     p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
                    311:     if( !p ) goto no_mem;
                    312:     memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
                    313:     p->eType = FTSQUERY_PHRASE;
                    314:     p->pPhrase = (Fts3Phrase *)&p[1];
                    315:     p->pPhrase->iColumn = pParse->iDefaultCol;
                    316:     p->pPhrase->nToken = nToken;
                    317: 
                    318:     zBuf = (char *)&p->pPhrase->aToken[nToken];
                    319:     if( zTemp ){
                    320:       memcpy(zBuf, zTemp, nTemp);
                    321:       sqlite3_free(zTemp);
                    322:     }else{
                    323:       assert( nTemp==0 );
                    324:     }
                    325: 
                    326:     for(jj=0; jj<p->pPhrase->nToken; jj++){
                    327:       p->pPhrase->aToken[jj].z = zBuf;
                    328:       zBuf += p->pPhrase->aToken[jj].n;
                    329:     }
                    330:     rc = SQLITE_OK;
                    331:   }
                    332: 
                    333:   *ppExpr = p;
                    334:   return rc;
                    335: no_mem:
                    336: 
                    337:   if( pCursor ){
                    338:     pModule->xClose(pCursor);
                    339:   }
                    340:   sqlite3_free(zTemp);
                    341:   sqlite3_free(p);
                    342:   *ppExpr = 0;
                    343:   return SQLITE_NOMEM;
                    344: }
                    345: 
                    346: /*
                    347: ** Function getNextNode(), which is called by fts3ExprParse(), may itself
                    348: ** call fts3ExprParse(). So this forward declaration is required.
                    349: */
                    350: static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
                    351: 
                    352: /*
                    353: ** The output variable *ppExpr is populated with an allocated Fts3Expr 
                    354: ** structure, or set to 0 if the end of the input buffer is reached.
                    355: **
                    356: ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
                    357: ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
                    358: ** If SQLITE_ERROR is returned, pContext is populated with an error message.
                    359: */
                    360: static int getNextNode(
                    361:   ParseContext *pParse,                   /* fts3 query parse context */
                    362:   const char *z, int n,                   /* Input string */
                    363:   Fts3Expr **ppExpr,                      /* OUT: expression */
                    364:   int *pnConsumed                         /* OUT: Number of bytes consumed */
                    365: ){
                    366:   static const struct Fts3Keyword {
                    367:     char *z;                              /* Keyword text */
                    368:     unsigned char n;                      /* Length of the keyword */
                    369:     unsigned char parenOnly;              /* Only valid in paren mode */
                    370:     unsigned char eType;                  /* Keyword code */
                    371:   } aKeyword[] = {
                    372:     { "OR" ,  2, 0, FTSQUERY_OR   },
                    373:     { "AND",  3, 1, FTSQUERY_AND  },
                    374:     { "NOT",  3, 1, FTSQUERY_NOT  },
                    375:     { "NEAR", 4, 0, FTSQUERY_NEAR }
                    376:   };
                    377:   int ii;
                    378:   int iCol;
                    379:   int iColLen;
                    380:   int rc;
                    381:   Fts3Expr *pRet = 0;
                    382: 
                    383:   const char *zInput = z;
                    384:   int nInput = n;
                    385: 
                    386:   pParse->isNot = 0;
                    387: 
                    388:   /* Skip over any whitespace before checking for a keyword, an open or
                    389:   ** close bracket, or a quoted string. 
                    390:   */
                    391:   while( nInput>0 && fts3isspace(*zInput) ){
                    392:     nInput--;
                    393:     zInput++;
                    394:   }
                    395:   if( nInput==0 ){
                    396:     return SQLITE_DONE;
                    397:   }
                    398: 
                    399:   /* See if we are dealing with a keyword. */
                    400:   for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
                    401:     const struct Fts3Keyword *pKey = &aKeyword[ii];
                    402: 
                    403:     if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
                    404:       continue;
                    405:     }
                    406: 
                    407:     if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
                    408:       int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
                    409:       int nKey = pKey->n;
                    410:       char cNext;
                    411: 
                    412:       /* If this is a "NEAR" keyword, check for an explicit nearness. */
                    413:       if( pKey->eType==FTSQUERY_NEAR ){
                    414:         assert( nKey==4 );
                    415:         if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
                    416:           nNear = 0;
                    417:           for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
                    418:             nNear = nNear * 10 + (zInput[nKey] - '0');
                    419:           }
                    420:         }
                    421:       }
                    422: 
                    423:       /* At this point this is probably a keyword. But for that to be true,
                    424:       ** the next byte must contain either whitespace, an open or close
                    425:       ** parenthesis, a quote character, or EOF. 
                    426:       */
                    427:       cNext = zInput[nKey];
                    428:       if( fts3isspace(cNext) 
                    429:        || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
                    430:       ){
                    431:         pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
                    432:         if( !pRet ){
                    433:           return SQLITE_NOMEM;
                    434:         }
                    435:         pRet->eType = pKey->eType;
                    436:         pRet->nNear = nNear;
                    437:         *ppExpr = pRet;
                    438:         *pnConsumed = (int)((zInput - z) + nKey);
                    439:         return SQLITE_OK;
                    440:       }
                    441: 
                    442:       /* Turns out that wasn't a keyword after all. This happens if the
                    443:       ** user has supplied a token such as "ORacle". Continue.
                    444:       */
                    445:     }
                    446:   }
                    447: 
                    448:   /* Check for an open bracket. */
                    449:   if( sqlite3_fts3_enable_parentheses ){
                    450:     if( *zInput=='(' ){
                    451:       int nConsumed;
                    452:       pParse->nNest++;
                    453:       rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
                    454:       if( rc==SQLITE_OK && !*ppExpr ){
                    455:         rc = SQLITE_DONE;
                    456:       }
                    457:       *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
                    458:       return rc;
                    459:     }
                    460:   
                    461:     /* Check for a close bracket. */
                    462:     if( *zInput==')' ){
                    463:       pParse->nNest--;
                    464:       *pnConsumed = (int)((zInput - z) + 1);
                    465:       return SQLITE_DONE;
                    466:     }
                    467:   }
                    468: 
                    469:   /* See if we are dealing with a quoted phrase. If this is the case, then
                    470:   ** search for the closing quote and pass the whole string to getNextString()
                    471:   ** for processing. This is easy to do, as fts3 has no syntax for escaping
                    472:   ** a quote character embedded in a string.
                    473:   */
                    474:   if( *zInput=='"' ){
                    475:     for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
                    476:     *pnConsumed = (int)((zInput - z) + ii + 1);
                    477:     if( ii==nInput ){
                    478:       return SQLITE_ERROR;
                    479:     }
                    480:     return getNextString(pParse, &zInput[1], ii-1, ppExpr);
                    481:   }
                    482: 
                    483: 
                    484:   /* If control flows to this point, this must be a regular token, or 
                    485:   ** the end of the input. Read a regular token using the sqlite3_tokenizer
                    486:   ** interface. Before doing so, figure out if there is an explicit
                    487:   ** column specifier for the token. 
                    488:   **
                    489:   ** TODO: Strangely, it is not possible to associate a column specifier
                    490:   ** with a quoted phrase, only with a single token. Not sure if this was
                    491:   ** an implementation artifact or an intentional decision when fts3 was
                    492:   ** first implemented. Whichever it was, this module duplicates the 
                    493:   ** limitation.
                    494:   */
                    495:   iCol = pParse->iDefaultCol;
                    496:   iColLen = 0;
                    497:   for(ii=0; ii<pParse->nCol; ii++){
                    498:     const char *zStr = pParse->azCol[ii];
                    499:     int nStr = (int)strlen(zStr);
                    500:     if( nInput>nStr && zInput[nStr]==':' 
                    501:      && sqlite3_strnicmp(zStr, zInput, nStr)==0 
                    502:     ){
                    503:       iCol = ii;
                    504:       iColLen = (int)((zInput - z) + nStr + 1);
                    505:       break;
                    506:     }
                    507:   }
                    508:   rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
                    509:   *pnConsumed += iColLen;
                    510:   return rc;
                    511: }
                    512: 
                    513: /*
                    514: ** The argument is an Fts3Expr structure for a binary operator (any type
                    515: ** except an FTSQUERY_PHRASE). Return an integer value representing the
                    516: ** precedence of the operator. Lower values have a higher precedence (i.e.
                    517: ** group more tightly). For example, in the C language, the == operator
                    518: ** groups more tightly than ||, and would therefore have a higher precedence.
                    519: **
                    520: ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
                    521: ** is defined), the order of the operators in precedence from highest to
                    522: ** lowest is:
                    523: **
                    524: **   NEAR
                    525: **   NOT
                    526: **   AND (including implicit ANDs)
                    527: **   OR
                    528: **
                    529: ** Note that when using the old query syntax, the OR operator has a higher
                    530: ** precedence than the AND operator.
                    531: */
                    532: static int opPrecedence(Fts3Expr *p){
                    533:   assert( p->eType!=FTSQUERY_PHRASE );
                    534:   if( sqlite3_fts3_enable_parentheses ){
                    535:     return p->eType;
                    536:   }else if( p->eType==FTSQUERY_NEAR ){
                    537:     return 1;
                    538:   }else if( p->eType==FTSQUERY_OR ){
                    539:     return 2;
                    540:   }
                    541:   assert( p->eType==FTSQUERY_AND );
                    542:   return 3;
                    543: }
                    544: 
                    545: /*
                    546: ** Argument ppHead contains a pointer to the current head of a query 
                    547: ** expression tree being parsed. pPrev is the expression node most recently
                    548: ** inserted into the tree. This function adds pNew, which is always a binary
                    549: ** operator node, into the expression tree based on the relative precedence
                    550: ** of pNew and the existing nodes of the tree. This may result in the head
                    551: ** of the tree changing, in which case *ppHead is set to the new root node.
                    552: */
                    553: static void insertBinaryOperator(
                    554:   Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
                    555:   Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
                    556:   Fts3Expr *pNew           /* New binary node to insert into expression tree */
                    557: ){
                    558:   Fts3Expr *pSplit = pPrev;
                    559:   while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
                    560:     pSplit = pSplit->pParent;
                    561:   }
                    562: 
                    563:   if( pSplit->pParent ){
                    564:     assert( pSplit->pParent->pRight==pSplit );
                    565:     pSplit->pParent->pRight = pNew;
                    566:     pNew->pParent = pSplit->pParent;
                    567:   }else{
                    568:     *ppHead = pNew;
                    569:   }
                    570:   pNew->pLeft = pSplit;
                    571:   pSplit->pParent = pNew;
                    572: }
                    573: 
                    574: /*
                    575: ** Parse the fts3 query expression found in buffer z, length n. This function
                    576: ** returns either when the end of the buffer is reached or an unmatched 
                    577: ** closing bracket - ')' - is encountered.
                    578: **
                    579: ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
                    580: ** parsed form of the expression and *pnConsumed is set to the number of
                    581: ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
                    582: ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
                    583: */
                    584: static int fts3ExprParse(
                    585:   ParseContext *pParse,                   /* fts3 query parse context */
                    586:   const char *z, int n,                   /* Text of MATCH query */
                    587:   Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
                    588:   int *pnConsumed                         /* OUT: Number of bytes consumed */
                    589: ){
                    590:   Fts3Expr *pRet = 0;
                    591:   Fts3Expr *pPrev = 0;
                    592:   Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
                    593:   int nIn = n;
                    594:   const char *zIn = z;
                    595:   int rc = SQLITE_OK;
                    596:   int isRequirePhrase = 1;
                    597: 
                    598:   while( rc==SQLITE_OK ){
                    599:     Fts3Expr *p = 0;
                    600:     int nByte = 0;
                    601:     rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
                    602:     if( rc==SQLITE_OK ){
                    603:       int isPhrase;
                    604: 
                    605:       if( !sqlite3_fts3_enable_parentheses 
                    606:        && p->eType==FTSQUERY_PHRASE && pParse->isNot 
                    607:       ){
                    608:         /* Create an implicit NOT operator. */
                    609:         Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
                    610:         if( !pNot ){
                    611:           sqlite3Fts3ExprFree(p);
                    612:           rc = SQLITE_NOMEM;
                    613:           goto exprparse_out;
                    614:         }
                    615:         pNot->eType = FTSQUERY_NOT;
                    616:         pNot->pRight = p;
                    617:         if( pNotBranch ){
                    618:           pNot->pLeft = pNotBranch;
                    619:         }
                    620:         pNotBranch = pNot;
                    621:         p = pPrev;
                    622:       }else{
                    623:         int eType = p->eType;
                    624:         isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
                    625: 
                    626:         /* The isRequirePhrase variable is set to true if a phrase or
                    627:         ** an expression contained in parenthesis is required. If a
                    628:         ** binary operator (AND, OR, NOT or NEAR) is encounted when
                    629:         ** isRequirePhrase is set, this is a syntax error.
                    630:         */
                    631:         if( !isPhrase && isRequirePhrase ){
                    632:           sqlite3Fts3ExprFree(p);
                    633:           rc = SQLITE_ERROR;
                    634:           goto exprparse_out;
                    635:         }
                    636:   
                    637:         if( isPhrase && !isRequirePhrase ){
                    638:           /* Insert an implicit AND operator. */
                    639:           Fts3Expr *pAnd;
                    640:           assert( pRet && pPrev );
                    641:           pAnd = fts3MallocZero(sizeof(Fts3Expr));
                    642:           if( !pAnd ){
                    643:             sqlite3Fts3ExprFree(p);
                    644:             rc = SQLITE_NOMEM;
                    645:             goto exprparse_out;
                    646:           }
                    647:           pAnd->eType = FTSQUERY_AND;
                    648:           insertBinaryOperator(&pRet, pPrev, pAnd);
                    649:           pPrev = pAnd;
                    650:         }
                    651: 
                    652:         /* This test catches attempts to make either operand of a NEAR
                    653:         ** operator something other than a phrase. For example, either of
                    654:         ** the following:
                    655:         **
                    656:         **    (bracketed expression) NEAR phrase
                    657:         **    phrase NEAR (bracketed expression)
                    658:         **
                    659:         ** Return an error in either case.
                    660:         */
                    661:         if( pPrev && (
                    662:             (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
                    663:          || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
                    664:         )){
                    665:           sqlite3Fts3ExprFree(p);
                    666:           rc = SQLITE_ERROR;
                    667:           goto exprparse_out;
                    668:         }
                    669:   
                    670:         if( isPhrase ){
                    671:           if( pRet ){
                    672:             assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
                    673:             pPrev->pRight = p;
                    674:             p->pParent = pPrev;
                    675:           }else{
                    676:             pRet = p;
                    677:           }
                    678:         }else{
                    679:           insertBinaryOperator(&pRet, pPrev, p);
                    680:         }
                    681:         isRequirePhrase = !isPhrase;
                    682:       }
                    683:       assert( nByte>0 );
                    684:     }
                    685:     assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
                    686:     nIn -= nByte;
                    687:     zIn += nByte;
                    688:     pPrev = p;
                    689:   }
                    690: 
                    691:   if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
                    692:     rc = SQLITE_ERROR;
                    693:   }
                    694: 
                    695:   if( rc==SQLITE_DONE ){
                    696:     rc = SQLITE_OK;
                    697:     if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
                    698:       if( !pRet ){
                    699:         rc = SQLITE_ERROR;
                    700:       }else{
                    701:         Fts3Expr *pIter = pNotBranch;
                    702:         while( pIter->pLeft ){
                    703:           pIter = pIter->pLeft;
                    704:         }
                    705:         pIter->pLeft = pRet;
                    706:         pRet = pNotBranch;
                    707:       }
                    708:     }
                    709:   }
                    710:   *pnConsumed = n - nIn;
                    711: 
                    712: exprparse_out:
                    713:   if( rc!=SQLITE_OK ){
                    714:     sqlite3Fts3ExprFree(pRet);
                    715:     sqlite3Fts3ExprFree(pNotBranch);
                    716:     pRet = 0;
                    717:   }
                    718:   *ppExpr = pRet;
                    719:   return rc;
                    720: }
                    721: 
                    722: /*
                    723: ** Parameters z and n contain a pointer to and length of a buffer containing
                    724: ** an fts3 query expression, respectively. This function attempts to parse the
                    725: ** query expression and create a tree of Fts3Expr structures representing the
                    726: ** parsed expression. If successful, *ppExpr is set to point to the head
                    727: ** of the parsed expression tree and SQLITE_OK is returned. If an error
                    728: ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
                    729: ** error) is returned and *ppExpr is set to 0.
                    730: **
                    731: ** If parameter n is a negative number, then z is assumed to point to a
                    732: ** nul-terminated string and the length is determined using strlen().
                    733: **
                    734: ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
                    735: ** use to normalize query tokens while parsing the expression. The azCol[]
                    736: ** array, which is assumed to contain nCol entries, should contain the names
                    737: ** of each column in the target fts3 table, in order from left to right. 
                    738: ** Column names must be nul-terminated strings.
                    739: **
                    740: ** The iDefaultCol parameter should be passed the index of the table column
                    741: ** that appears on the left-hand-side of the MATCH operator (the default
                    742: ** column to match against for tokens for which a column name is not explicitly
                    743: ** specified as part of the query string), or -1 if tokens may by default
                    744: ** match any table column.
                    745: */
                    746: int sqlite3Fts3ExprParse(
                    747:   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
                    748:   char **azCol,                       /* Array of column names for fts3 table */
                    749:   int bFts4,                          /* True to allow FTS4-only syntax */
                    750:   int nCol,                           /* Number of entries in azCol[] */
                    751:   int iDefaultCol,                    /* Default column to query */
                    752:   const char *z, int n,               /* Text of MATCH query */
                    753:   Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
                    754: ){
                    755:   int nParsed;
                    756:   int rc;
                    757:   ParseContext sParse;
                    758:   sParse.pTokenizer = pTokenizer;
                    759:   sParse.azCol = (const char **)azCol;
                    760:   sParse.nCol = nCol;
                    761:   sParse.iDefaultCol = iDefaultCol;
                    762:   sParse.nNest = 0;
                    763:   sParse.bFts4 = bFts4;
                    764:   if( z==0 ){
                    765:     *ppExpr = 0;
                    766:     return SQLITE_OK;
                    767:   }
                    768:   if( n<0 ){
                    769:     n = (int)strlen(z);
                    770:   }
                    771:   rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
                    772: 
                    773:   /* Check for mismatched parenthesis */
                    774:   if( rc==SQLITE_OK && sParse.nNest ){
                    775:     rc = SQLITE_ERROR;
                    776:     sqlite3Fts3ExprFree(*ppExpr);
                    777:     *ppExpr = 0;
                    778:   }
                    779: 
                    780:   return rc;
                    781: }
                    782: 
                    783: /*
                    784: ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
                    785: */
                    786: void sqlite3Fts3ExprFree(Fts3Expr *p){
                    787:   if( p ){
                    788:     assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
                    789:     sqlite3Fts3ExprFree(p->pLeft);
                    790:     sqlite3Fts3ExprFree(p->pRight);
                    791:     sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
                    792:     sqlite3_free(p->aMI);
                    793:     sqlite3_free(p);
                    794:   }
                    795: }
                    796: 
                    797: /****************************************************************************
                    798: *****************************************************************************
                    799: ** Everything after this point is just test code.
                    800: */
                    801: 
                    802: #ifdef SQLITE_TEST
                    803: 
                    804: #include <stdio.h>
                    805: 
                    806: /*
                    807: ** Function to query the hash-table of tokenizers (see README.tokenizers).
                    808: */
                    809: static int queryTestTokenizer(
                    810:   sqlite3 *db, 
                    811:   const char *zName,  
                    812:   const sqlite3_tokenizer_module **pp
                    813: ){
                    814:   int rc;
                    815:   sqlite3_stmt *pStmt;
                    816:   const char zSql[] = "SELECT fts3_tokenizer(?)";
                    817: 
                    818:   *pp = 0;
                    819:   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
                    820:   if( rc!=SQLITE_OK ){
                    821:     return rc;
                    822:   }
                    823: 
                    824:   sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
                    825:   if( SQLITE_ROW==sqlite3_step(pStmt) ){
                    826:     if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
                    827:       memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
                    828:     }
                    829:   }
                    830: 
                    831:   return sqlite3_finalize(pStmt);
                    832: }
                    833: 
                    834: /*
                    835: ** Return a pointer to a buffer containing a text representation of the
                    836: ** expression passed as the first argument. The buffer is obtained from
                    837: ** sqlite3_malloc(). It is the responsibility of the caller to use 
                    838: ** sqlite3_free() to release the memory. If an OOM condition is encountered,
                    839: ** NULL is returned.
                    840: **
                    841: ** If the second argument is not NULL, then its contents are prepended to 
                    842: ** the returned expression text and then freed using sqlite3_free().
                    843: */
                    844: static char *exprToString(Fts3Expr *pExpr, char *zBuf){
                    845:   switch( pExpr->eType ){
                    846:     case FTSQUERY_PHRASE: {
                    847:       Fts3Phrase *pPhrase = pExpr->pPhrase;
                    848:       int i;
                    849:       zBuf = sqlite3_mprintf(
                    850:           "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
                    851:       for(i=0; zBuf && i<pPhrase->nToken; i++){
                    852:         zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 
                    853:             pPhrase->aToken[i].n, pPhrase->aToken[i].z,
                    854:             (pPhrase->aToken[i].isPrefix?"+":"")
                    855:         );
                    856:       }
                    857:       return zBuf;
                    858:     }
                    859: 
                    860:     case FTSQUERY_NEAR:
                    861:       zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
                    862:       break;
                    863:     case FTSQUERY_NOT:
                    864:       zBuf = sqlite3_mprintf("%zNOT ", zBuf);
                    865:       break;
                    866:     case FTSQUERY_AND:
                    867:       zBuf = sqlite3_mprintf("%zAND ", zBuf);
                    868:       break;
                    869:     case FTSQUERY_OR:
                    870:       zBuf = sqlite3_mprintf("%zOR ", zBuf);
                    871:       break;
                    872:   }
                    873: 
                    874:   if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
                    875:   if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
                    876:   if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
                    877: 
                    878:   if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
                    879:   if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
                    880: 
                    881:   return zBuf;
                    882: }
                    883: 
                    884: /*
                    885: ** This is the implementation of a scalar SQL function used to test the 
                    886: ** expression parser. It should be called as follows:
                    887: **
                    888: **   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
                    889: **
                    890: ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
                    891: ** to parse the query expression (see README.tokenizers). The second argument
                    892: ** is the query expression to parse. Each subsequent argument is the name
                    893: ** of a column of the fts3 table that the query expression may refer to.
                    894: ** For example:
                    895: **
                    896: **   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
                    897: */
                    898: static void fts3ExprTest(
                    899:   sqlite3_context *context,
                    900:   int argc,
                    901:   sqlite3_value **argv
                    902: ){
                    903:   sqlite3_tokenizer_module const *pModule = 0;
                    904:   sqlite3_tokenizer *pTokenizer = 0;
                    905:   int rc;
                    906:   char **azCol = 0;
                    907:   const char *zExpr;
                    908:   int nExpr;
                    909:   int nCol;
                    910:   int ii;
                    911:   Fts3Expr *pExpr;
                    912:   char *zBuf = 0;
                    913:   sqlite3 *db = sqlite3_context_db_handle(context);
                    914: 
                    915:   if( argc<3 ){
                    916:     sqlite3_result_error(context, 
                    917:         "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
                    918:     );
                    919:     return;
                    920:   }
                    921: 
                    922:   rc = queryTestTokenizer(db,
                    923:                           (const char *)sqlite3_value_text(argv[0]), &pModule);
                    924:   if( rc==SQLITE_NOMEM ){
                    925:     sqlite3_result_error_nomem(context);
                    926:     goto exprtest_out;
                    927:   }else if( !pModule ){
                    928:     sqlite3_result_error(context, "No such tokenizer module", -1);
                    929:     goto exprtest_out;
                    930:   }
                    931: 
                    932:   rc = pModule->xCreate(0, 0, &pTokenizer);
                    933:   assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
                    934:   if( rc==SQLITE_NOMEM ){
                    935:     sqlite3_result_error_nomem(context);
                    936:     goto exprtest_out;
                    937:   }
                    938:   pTokenizer->pModule = pModule;
                    939: 
                    940:   zExpr = (const char *)sqlite3_value_text(argv[1]);
                    941:   nExpr = sqlite3_value_bytes(argv[1]);
                    942:   nCol = argc-2;
                    943:   azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
                    944:   if( !azCol ){
                    945:     sqlite3_result_error_nomem(context);
                    946:     goto exprtest_out;
                    947:   }
                    948:   for(ii=0; ii<nCol; ii++){
                    949:     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
                    950:   }
                    951: 
                    952:   rc = sqlite3Fts3ExprParse(
                    953:       pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
                    954:   );
                    955:   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
                    956:     sqlite3_result_error(context, "Error parsing expression", -1);
                    957:   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
                    958:     sqlite3_result_error_nomem(context);
                    959:   }else{
                    960:     sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
                    961:     sqlite3_free(zBuf);
                    962:   }
                    963: 
                    964:   sqlite3Fts3ExprFree(pExpr);
                    965: 
                    966: exprtest_out:
                    967:   if( pModule && pTokenizer ){
                    968:     rc = pModule->xDestroy(pTokenizer);
                    969:   }
                    970:   sqlite3_free(azCol);
                    971: }
                    972: 
                    973: /*
                    974: ** Register the query expression parser test function fts3_exprtest() 
                    975: ** with database connection db. 
                    976: */
                    977: int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
                    978:   return sqlite3_create_function(
                    979:       db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
                    980:   );
                    981: }
                    982: 
                    983: #endif
                    984: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>