Annotation of embedaddon/sqlite3/ext/fts3/fts3_expr.c, revision 1.1

1.1     ! misho       1: /*
        !             2: ** 2008 Nov 28
        !             3: **
        !             4: ** The author disclaims copyright to this source code.  In place of
        !             5: ** a legal notice, here is a blessing:
        !             6: **
        !             7: **    May you do good and not evil.
        !             8: **    May you find forgiveness for yourself and forgive others.
        !             9: **    May you share freely, never taking more than you give.
        !            10: **
        !            11: ******************************************************************************
        !            12: **
        !            13: ** This module contains code that implements a parser for fts3 query strings
        !            14: ** (the right-hand argument to the MATCH operator). Because the supported 
        !            15: ** syntax is relatively simple, the whole tokenizer/parser system is
        !            16: ** hand-coded. 
        !            17: */
        !            18: #include "fts3Int.h"
        !            19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
        !            20: 
        !            21: /*
        !            22: ** By default, this module parses the legacy syntax that has been 
        !            23: ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
        !            24: ** is defined, then it uses the new syntax. The differences between
        !            25: ** the new and the old syntaxes are:
        !            26: **
        !            27: **  a) The new syntax supports parenthesis. The old does not.
        !            28: **
        !            29: **  b) The new syntax supports the AND and NOT operators. The old does not.
        !            30: **
        !            31: **  c) The old syntax supports the "-" token qualifier. This is not 
        !            32: **     supported by the new syntax (it is replaced by the NOT operator).
        !            33: **
        !            34: **  d) When using the old syntax, the OR operator has a greater precedence
        !            35: **     than an implicit AND. When using the new, both implicity and explicit
        !            36: **     AND operators have a higher precedence than OR.
        !            37: **
        !            38: ** If compiled with SQLITE_TEST defined, then this module exports the
        !            39: ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
        !            40: ** to zero causes the module to use the old syntax. If it is set to 
        !            41: ** non-zero the new syntax is activated. This is so both syntaxes can
        !            42: ** be tested using a single build of testfixture.
        !            43: **
        !            44: ** The following describes the syntax supported by the fts3 MATCH
        !            45: ** operator in a similar format to that used by the lemon parser
        !            46: ** generator. This module does not use actually lemon, it uses a
        !            47: ** custom parser.
        !            48: **
        !            49: **   query ::= andexpr (OR andexpr)*.
        !            50: **
        !            51: **   andexpr ::= notexpr (AND? notexpr)*.
        !            52: **
        !            53: **   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
        !            54: **   notexpr ::= LP query RP.
        !            55: **
        !            56: **   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
        !            57: **
        !            58: **   distance_opt ::= .
        !            59: **   distance_opt ::= / INTEGER.
        !            60: **
        !            61: **   phrase ::= TOKEN.
        !            62: **   phrase ::= COLUMN:TOKEN.
        !            63: **   phrase ::= "TOKEN TOKEN TOKEN...".
        !            64: */
        !            65: 
        !            66: #ifdef SQLITE_TEST
        !            67: int sqlite3_fts3_enable_parentheses = 0;
        !            68: #else
        !            69: # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
        !            70: #  define sqlite3_fts3_enable_parentheses 1
        !            71: # else
        !            72: #  define sqlite3_fts3_enable_parentheses 0
        !            73: # endif
        !            74: #endif
        !            75: 
        !            76: /*
        !            77: ** Default span for NEAR operators.
        !            78: */
        !            79: #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
        !            80: 
        !            81: #include <string.h>
        !            82: #include <assert.h>
        !            83: 
        !            84: /*
        !            85: ** isNot:
        !            86: **   This variable is used by function getNextNode(). When getNextNode() is
        !            87: **   called, it sets ParseContext.isNot to true if the 'next node' is a 
        !            88: **   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
        !            89: **   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
        !            90: **   zero.
        !            91: */
        !            92: typedef struct ParseContext ParseContext;
        !            93: struct ParseContext {
        !            94:   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
        !            95:   const char **azCol;                 /* Array of column names for fts3 table */
        !            96:   int bFts4;                          /* True to allow FTS4-only syntax */
        !            97:   int nCol;                           /* Number of entries in azCol[] */
        !            98:   int iDefaultCol;                    /* Default column to query */
        !            99:   int isNot;                          /* True if getNextNode() sees a unary - */
        !           100:   sqlite3_context *pCtx;              /* Write error message here */
        !           101:   int nNest;                          /* Number of nested brackets */
        !           102: };
        !           103: 
        !           104: /*
        !           105: ** This function is equivalent to the standard isspace() function. 
        !           106: **
        !           107: ** The standard isspace() can be awkward to use safely, because although it
        !           108: ** is defined to accept an argument of type int, its behaviour when passed
        !           109: ** an integer that falls outside of the range of the unsigned char type
        !           110: ** is undefined (and sometimes, "undefined" means segfault). This wrapper
        !           111: ** is defined to accept an argument of type char, and always returns 0 for
        !           112: ** any values that fall outside of the range of the unsigned char type (i.e.
        !           113: ** negative values).
        !           114: */
        !           115: static int fts3isspace(char c){
        !           116:   return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
        !           117: }
        !           118: 
        !           119: /*
        !           120: ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
        !           121: ** zero the memory before returning a pointer to it. If unsuccessful, 
        !           122: ** return NULL.
        !           123: */
        !           124: static void *fts3MallocZero(int nByte){
        !           125:   void *pRet = sqlite3_malloc(nByte);
        !           126:   if( pRet ) memset(pRet, 0, nByte);
        !           127:   return pRet;
        !           128: }
        !           129: 
        !           130: 
        !           131: /*
        !           132: ** Extract the next token from buffer z (length n) using the tokenizer
        !           133: ** and other information (column names etc.) in pParse. Create an Fts3Expr
        !           134: ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
        !           135: ** single token and set *ppExpr to point to it. If the end of the buffer is
        !           136: ** reached before a token is found, set *ppExpr to zero. It is the
        !           137: ** responsibility of the caller to eventually deallocate the allocated 
        !           138: ** Fts3Expr structure (if any) by passing it to sqlite3_free().
        !           139: **
        !           140: ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
        !           141: ** fails.
        !           142: */
        !           143: static int getNextToken(
        !           144:   ParseContext *pParse,                   /* fts3 query parse context */
        !           145:   int iCol,                               /* Value for Fts3Phrase.iColumn */
        !           146:   const char *z, int n,                   /* Input string */
        !           147:   Fts3Expr **ppExpr,                      /* OUT: expression */
        !           148:   int *pnConsumed                         /* OUT: Number of bytes consumed */
        !           149: ){
        !           150:   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
        !           151:   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
        !           152:   int rc;
        !           153:   sqlite3_tokenizer_cursor *pCursor;
        !           154:   Fts3Expr *pRet = 0;
        !           155:   int nConsumed = 0;
        !           156: 
        !           157:   rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
        !           158:   if( rc==SQLITE_OK ){
        !           159:     const char *zToken;
        !           160:     int nToken, iStart, iEnd, iPosition;
        !           161:     int nByte;                               /* total space to allocate */
        !           162: 
        !           163:     pCursor->pTokenizer = pTokenizer;
        !           164:     rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
        !           165: 
        !           166:     if( rc==SQLITE_OK ){
        !           167:       nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
        !           168:       pRet = (Fts3Expr *)fts3MallocZero(nByte);
        !           169:       if( !pRet ){
        !           170:         rc = SQLITE_NOMEM;
        !           171:       }else{
        !           172:         pRet->eType = FTSQUERY_PHRASE;
        !           173:         pRet->pPhrase = (Fts3Phrase *)&pRet[1];
        !           174:         pRet->pPhrase->nToken = 1;
        !           175:         pRet->pPhrase->iColumn = iCol;
        !           176:         pRet->pPhrase->aToken[0].n = nToken;
        !           177:         pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        !           178:         memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
        !           179: 
        !           180:         if( iEnd<n && z[iEnd]=='*' ){
        !           181:           pRet->pPhrase->aToken[0].isPrefix = 1;
        !           182:           iEnd++;
        !           183:         }
        !           184: 
        !           185:         while( 1 ){
        !           186:           if( !sqlite3_fts3_enable_parentheses 
        !           187:            && iStart>0 && z[iStart-1]=='-' 
        !           188:           ){
        !           189:             pParse->isNot = 1;
        !           190:             iStart--;
        !           191:           }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
        !           192:             pRet->pPhrase->aToken[0].bFirst = 1;
        !           193:             iStart--;
        !           194:           }else{
        !           195:             break;
        !           196:           }
        !           197:         }
        !           198: 
        !           199:       }
        !           200:       nConsumed = iEnd;
        !           201:     }
        !           202: 
        !           203:     pModule->xClose(pCursor);
        !           204:   }
        !           205:   
        !           206:   *pnConsumed = nConsumed;
        !           207:   *ppExpr = pRet;
        !           208:   return rc;
        !           209: }
        !           210: 
        !           211: 
        !           212: /*
        !           213: ** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
        !           214: ** then free the old allocation.
        !           215: */
        !           216: static void *fts3ReallocOrFree(void *pOrig, int nNew){
        !           217:   void *pRet = sqlite3_realloc(pOrig, nNew);
        !           218:   if( !pRet ){
        !           219:     sqlite3_free(pOrig);
        !           220:   }
        !           221:   return pRet;
        !           222: }
        !           223: 
        !           224: /*
        !           225: ** Buffer zInput, length nInput, contains the contents of a quoted string
        !           226: ** that appeared as part of an fts3 query expression. Neither quote character
        !           227: ** is included in the buffer. This function attempts to tokenize the entire
        !           228: ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 
        !           229: ** containing the results.
        !           230: **
        !           231: ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
        !           232: ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
        !           233: ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
        !           234: ** to 0.
        !           235: */
        !           236: static int getNextString(
        !           237:   ParseContext *pParse,                   /* fts3 query parse context */
        !           238:   const char *zInput, int nInput,         /* Input string */
        !           239:   Fts3Expr **ppExpr                       /* OUT: expression */
        !           240: ){
        !           241:   sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
        !           242:   sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
        !           243:   int rc;
        !           244:   Fts3Expr *p = 0;
        !           245:   sqlite3_tokenizer_cursor *pCursor = 0;
        !           246:   char *zTemp = 0;
        !           247:   int nTemp = 0;
        !           248: 
        !           249:   const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
        !           250:   int nToken = 0;
        !           251: 
        !           252:   /* The final Fts3Expr data structure, including the Fts3Phrase,
        !           253:   ** Fts3PhraseToken structures token buffers are all stored as a single 
        !           254:   ** allocation so that the expression can be freed with a single call to
        !           255:   ** sqlite3_free(). Setting this up requires a two pass approach.
        !           256:   **
        !           257:   ** The first pass, in the block below, uses a tokenizer cursor to iterate
        !           258:   ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
        !           259:   ** to assemble data in two dynamic buffers:
        !           260:   **
        !           261:   **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
        !           262:   **             structure, followed by the array of Fts3PhraseToken 
        !           263:   **             structures. This pass only populates the Fts3PhraseToken array.
        !           264:   **
        !           265:   **   Buffer zTemp: Contains copies of all tokens.
        !           266:   **
        !           267:   ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
        !           268:   ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
        !           269:   ** structures.
        !           270:   */
        !           271:   rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
        !           272:   if( rc==SQLITE_OK ){
        !           273:     int ii;
        !           274:     pCursor->pTokenizer = pTokenizer;
        !           275:     for(ii=0; rc==SQLITE_OK; ii++){
        !           276:       const char *zByte;
        !           277:       int nByte, iBegin, iEnd, iPos;
        !           278:       rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
        !           279:       if( rc==SQLITE_OK ){
        !           280:         Fts3PhraseToken *pToken;
        !           281: 
        !           282:         p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
        !           283:         if( !p ) goto no_mem;
        !           284: 
        !           285:         zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
        !           286:         if( !zTemp ) goto no_mem;
        !           287: 
        !           288:         assert( nToken==ii );
        !           289:         pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
        !           290:         memset(pToken, 0, sizeof(Fts3PhraseToken));
        !           291: 
        !           292:         memcpy(&zTemp[nTemp], zByte, nByte);
        !           293:         nTemp += nByte;
        !           294: 
        !           295:         pToken->n = nByte;
        !           296:         pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
        !           297:         pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
        !           298:         nToken = ii+1;
        !           299:       }
        !           300:     }
        !           301: 
        !           302:     pModule->xClose(pCursor);
        !           303:     pCursor = 0;
        !           304:   }
        !           305: 
        !           306:   if( rc==SQLITE_DONE ){
        !           307:     int jj;
        !           308:     char *zBuf = 0;
        !           309: 
        !           310:     p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
        !           311:     if( !p ) goto no_mem;
        !           312:     memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
        !           313:     p->eType = FTSQUERY_PHRASE;
        !           314:     p->pPhrase = (Fts3Phrase *)&p[1];
        !           315:     p->pPhrase->iColumn = pParse->iDefaultCol;
        !           316:     p->pPhrase->nToken = nToken;
        !           317: 
        !           318:     zBuf = (char *)&p->pPhrase->aToken[nToken];
        !           319:     if( zTemp ){
        !           320:       memcpy(zBuf, zTemp, nTemp);
        !           321:       sqlite3_free(zTemp);
        !           322:     }else{
        !           323:       assert( nTemp==0 );
        !           324:     }
        !           325: 
        !           326:     for(jj=0; jj<p->pPhrase->nToken; jj++){
        !           327:       p->pPhrase->aToken[jj].z = zBuf;
        !           328:       zBuf += p->pPhrase->aToken[jj].n;
        !           329:     }
        !           330:     rc = SQLITE_OK;
        !           331:   }
        !           332: 
        !           333:   *ppExpr = p;
        !           334:   return rc;
        !           335: no_mem:
        !           336: 
        !           337:   if( pCursor ){
        !           338:     pModule->xClose(pCursor);
        !           339:   }
        !           340:   sqlite3_free(zTemp);
        !           341:   sqlite3_free(p);
        !           342:   *ppExpr = 0;
        !           343:   return SQLITE_NOMEM;
        !           344: }
        !           345: 
        !           346: /*
        !           347: ** Function getNextNode(), which is called by fts3ExprParse(), may itself
        !           348: ** call fts3ExprParse(). So this forward declaration is required.
        !           349: */
        !           350: static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
        !           351: 
        !           352: /*
        !           353: ** The output variable *ppExpr is populated with an allocated Fts3Expr 
        !           354: ** structure, or set to 0 if the end of the input buffer is reached.
        !           355: **
        !           356: ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
        !           357: ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
        !           358: ** If SQLITE_ERROR is returned, pContext is populated with an error message.
        !           359: */
        !           360: static int getNextNode(
        !           361:   ParseContext *pParse,                   /* fts3 query parse context */
        !           362:   const char *z, int n,                   /* Input string */
        !           363:   Fts3Expr **ppExpr,                      /* OUT: expression */
        !           364:   int *pnConsumed                         /* OUT: Number of bytes consumed */
        !           365: ){
        !           366:   static const struct Fts3Keyword {
        !           367:     char *z;                              /* Keyword text */
        !           368:     unsigned char n;                      /* Length of the keyword */
        !           369:     unsigned char parenOnly;              /* Only valid in paren mode */
        !           370:     unsigned char eType;                  /* Keyword code */
        !           371:   } aKeyword[] = {
        !           372:     { "OR" ,  2, 0, FTSQUERY_OR   },
        !           373:     { "AND",  3, 1, FTSQUERY_AND  },
        !           374:     { "NOT",  3, 1, FTSQUERY_NOT  },
        !           375:     { "NEAR", 4, 0, FTSQUERY_NEAR }
        !           376:   };
        !           377:   int ii;
        !           378:   int iCol;
        !           379:   int iColLen;
        !           380:   int rc;
        !           381:   Fts3Expr *pRet = 0;
        !           382: 
        !           383:   const char *zInput = z;
        !           384:   int nInput = n;
        !           385: 
        !           386:   pParse->isNot = 0;
        !           387: 
        !           388:   /* Skip over any whitespace before checking for a keyword, an open or
        !           389:   ** close bracket, or a quoted string. 
        !           390:   */
        !           391:   while( nInput>0 && fts3isspace(*zInput) ){
        !           392:     nInput--;
        !           393:     zInput++;
        !           394:   }
        !           395:   if( nInput==0 ){
        !           396:     return SQLITE_DONE;
        !           397:   }
        !           398: 
        !           399:   /* See if we are dealing with a keyword. */
        !           400:   for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
        !           401:     const struct Fts3Keyword *pKey = &aKeyword[ii];
        !           402: 
        !           403:     if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
        !           404:       continue;
        !           405:     }
        !           406: 
        !           407:     if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
        !           408:       int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
        !           409:       int nKey = pKey->n;
        !           410:       char cNext;
        !           411: 
        !           412:       /* If this is a "NEAR" keyword, check for an explicit nearness. */
        !           413:       if( pKey->eType==FTSQUERY_NEAR ){
        !           414:         assert( nKey==4 );
        !           415:         if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
        !           416:           nNear = 0;
        !           417:           for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
        !           418:             nNear = nNear * 10 + (zInput[nKey] - '0');
        !           419:           }
        !           420:         }
        !           421:       }
        !           422: 
        !           423:       /* At this point this is probably a keyword. But for that to be true,
        !           424:       ** the next byte must contain either whitespace, an open or close
        !           425:       ** parenthesis, a quote character, or EOF. 
        !           426:       */
        !           427:       cNext = zInput[nKey];
        !           428:       if( fts3isspace(cNext) 
        !           429:        || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
        !           430:       ){
        !           431:         pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
        !           432:         if( !pRet ){
        !           433:           return SQLITE_NOMEM;
        !           434:         }
        !           435:         pRet->eType = pKey->eType;
        !           436:         pRet->nNear = nNear;
        !           437:         *ppExpr = pRet;
        !           438:         *pnConsumed = (int)((zInput - z) + nKey);
        !           439:         return SQLITE_OK;
        !           440:       }
        !           441: 
        !           442:       /* Turns out that wasn't a keyword after all. This happens if the
        !           443:       ** user has supplied a token such as "ORacle". Continue.
        !           444:       */
        !           445:     }
        !           446:   }
        !           447: 
        !           448:   /* Check for an open bracket. */
        !           449:   if( sqlite3_fts3_enable_parentheses ){
        !           450:     if( *zInput=='(' ){
        !           451:       int nConsumed;
        !           452:       pParse->nNest++;
        !           453:       rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
        !           454:       if( rc==SQLITE_OK && !*ppExpr ){
        !           455:         rc = SQLITE_DONE;
        !           456:       }
        !           457:       *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
        !           458:       return rc;
        !           459:     }
        !           460:   
        !           461:     /* Check for a close bracket. */
        !           462:     if( *zInput==')' ){
        !           463:       pParse->nNest--;
        !           464:       *pnConsumed = (int)((zInput - z) + 1);
        !           465:       return SQLITE_DONE;
        !           466:     }
        !           467:   }
        !           468: 
        !           469:   /* See if we are dealing with a quoted phrase. If this is the case, then
        !           470:   ** search for the closing quote and pass the whole string to getNextString()
        !           471:   ** for processing. This is easy to do, as fts3 has no syntax for escaping
        !           472:   ** a quote character embedded in a string.
        !           473:   */
        !           474:   if( *zInput=='"' ){
        !           475:     for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
        !           476:     *pnConsumed = (int)((zInput - z) + ii + 1);
        !           477:     if( ii==nInput ){
        !           478:       return SQLITE_ERROR;
        !           479:     }
        !           480:     return getNextString(pParse, &zInput[1], ii-1, ppExpr);
        !           481:   }
        !           482: 
        !           483: 
        !           484:   /* If control flows to this point, this must be a regular token, or 
        !           485:   ** the end of the input. Read a regular token using the sqlite3_tokenizer
        !           486:   ** interface. Before doing so, figure out if there is an explicit
        !           487:   ** column specifier for the token. 
        !           488:   **
        !           489:   ** TODO: Strangely, it is not possible to associate a column specifier
        !           490:   ** with a quoted phrase, only with a single token. Not sure if this was
        !           491:   ** an implementation artifact or an intentional decision when fts3 was
        !           492:   ** first implemented. Whichever it was, this module duplicates the 
        !           493:   ** limitation.
        !           494:   */
        !           495:   iCol = pParse->iDefaultCol;
        !           496:   iColLen = 0;
        !           497:   for(ii=0; ii<pParse->nCol; ii++){
        !           498:     const char *zStr = pParse->azCol[ii];
        !           499:     int nStr = (int)strlen(zStr);
        !           500:     if( nInput>nStr && zInput[nStr]==':' 
        !           501:      && sqlite3_strnicmp(zStr, zInput, nStr)==0 
        !           502:     ){
        !           503:       iCol = ii;
        !           504:       iColLen = (int)((zInput - z) + nStr + 1);
        !           505:       break;
        !           506:     }
        !           507:   }
        !           508:   rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
        !           509:   *pnConsumed += iColLen;
        !           510:   return rc;
        !           511: }
        !           512: 
        !           513: /*
        !           514: ** The argument is an Fts3Expr structure for a binary operator (any type
        !           515: ** except an FTSQUERY_PHRASE). Return an integer value representing the
        !           516: ** precedence of the operator. Lower values have a higher precedence (i.e.
        !           517: ** group more tightly). For example, in the C language, the == operator
        !           518: ** groups more tightly than ||, and would therefore have a higher precedence.
        !           519: **
        !           520: ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
        !           521: ** is defined), the order of the operators in precedence from highest to
        !           522: ** lowest is:
        !           523: **
        !           524: **   NEAR
        !           525: **   NOT
        !           526: **   AND (including implicit ANDs)
        !           527: **   OR
        !           528: **
        !           529: ** Note that when using the old query syntax, the OR operator has a higher
        !           530: ** precedence than the AND operator.
        !           531: */
        !           532: static int opPrecedence(Fts3Expr *p){
        !           533:   assert( p->eType!=FTSQUERY_PHRASE );
        !           534:   if( sqlite3_fts3_enable_parentheses ){
        !           535:     return p->eType;
        !           536:   }else if( p->eType==FTSQUERY_NEAR ){
        !           537:     return 1;
        !           538:   }else if( p->eType==FTSQUERY_OR ){
        !           539:     return 2;
        !           540:   }
        !           541:   assert( p->eType==FTSQUERY_AND );
        !           542:   return 3;
        !           543: }
        !           544: 
        !           545: /*
        !           546: ** Argument ppHead contains a pointer to the current head of a query 
        !           547: ** expression tree being parsed. pPrev is the expression node most recently
        !           548: ** inserted into the tree. This function adds pNew, which is always a binary
        !           549: ** operator node, into the expression tree based on the relative precedence
        !           550: ** of pNew and the existing nodes of the tree. This may result in the head
        !           551: ** of the tree changing, in which case *ppHead is set to the new root node.
        !           552: */
        !           553: static void insertBinaryOperator(
        !           554:   Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
        !           555:   Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
        !           556:   Fts3Expr *pNew           /* New binary node to insert into expression tree */
        !           557: ){
        !           558:   Fts3Expr *pSplit = pPrev;
        !           559:   while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
        !           560:     pSplit = pSplit->pParent;
        !           561:   }
        !           562: 
        !           563:   if( pSplit->pParent ){
        !           564:     assert( pSplit->pParent->pRight==pSplit );
        !           565:     pSplit->pParent->pRight = pNew;
        !           566:     pNew->pParent = pSplit->pParent;
        !           567:   }else{
        !           568:     *ppHead = pNew;
        !           569:   }
        !           570:   pNew->pLeft = pSplit;
        !           571:   pSplit->pParent = pNew;
        !           572: }
        !           573: 
        !           574: /*
        !           575: ** Parse the fts3 query expression found in buffer z, length n. This function
        !           576: ** returns either when the end of the buffer is reached or an unmatched 
        !           577: ** closing bracket - ')' - is encountered.
        !           578: **
        !           579: ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
        !           580: ** parsed form of the expression and *pnConsumed is set to the number of
        !           581: ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
        !           582: ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
        !           583: */
        !           584: static int fts3ExprParse(
        !           585:   ParseContext *pParse,                   /* fts3 query parse context */
        !           586:   const char *z, int n,                   /* Text of MATCH query */
        !           587:   Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
        !           588:   int *pnConsumed                         /* OUT: Number of bytes consumed */
        !           589: ){
        !           590:   Fts3Expr *pRet = 0;
        !           591:   Fts3Expr *pPrev = 0;
        !           592:   Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
        !           593:   int nIn = n;
        !           594:   const char *zIn = z;
        !           595:   int rc = SQLITE_OK;
        !           596:   int isRequirePhrase = 1;
        !           597: 
        !           598:   while( rc==SQLITE_OK ){
        !           599:     Fts3Expr *p = 0;
        !           600:     int nByte = 0;
        !           601:     rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
        !           602:     if( rc==SQLITE_OK ){
        !           603:       int isPhrase;
        !           604: 
        !           605:       if( !sqlite3_fts3_enable_parentheses 
        !           606:        && p->eType==FTSQUERY_PHRASE && pParse->isNot 
        !           607:       ){
        !           608:         /* Create an implicit NOT operator. */
        !           609:         Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
        !           610:         if( !pNot ){
        !           611:           sqlite3Fts3ExprFree(p);
        !           612:           rc = SQLITE_NOMEM;
        !           613:           goto exprparse_out;
        !           614:         }
        !           615:         pNot->eType = FTSQUERY_NOT;
        !           616:         pNot->pRight = p;
        !           617:         if( pNotBranch ){
        !           618:           pNot->pLeft = pNotBranch;
        !           619:         }
        !           620:         pNotBranch = pNot;
        !           621:         p = pPrev;
        !           622:       }else{
        !           623:         int eType = p->eType;
        !           624:         isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
        !           625: 
        !           626:         /* The isRequirePhrase variable is set to true if a phrase or
        !           627:         ** an expression contained in parenthesis is required. If a
        !           628:         ** binary operator (AND, OR, NOT or NEAR) is encounted when
        !           629:         ** isRequirePhrase is set, this is a syntax error.
        !           630:         */
        !           631:         if( !isPhrase && isRequirePhrase ){
        !           632:           sqlite3Fts3ExprFree(p);
        !           633:           rc = SQLITE_ERROR;
        !           634:           goto exprparse_out;
        !           635:         }
        !           636:   
        !           637:         if( isPhrase && !isRequirePhrase ){
        !           638:           /* Insert an implicit AND operator. */
        !           639:           Fts3Expr *pAnd;
        !           640:           assert( pRet && pPrev );
        !           641:           pAnd = fts3MallocZero(sizeof(Fts3Expr));
        !           642:           if( !pAnd ){
        !           643:             sqlite3Fts3ExprFree(p);
        !           644:             rc = SQLITE_NOMEM;
        !           645:             goto exprparse_out;
        !           646:           }
        !           647:           pAnd->eType = FTSQUERY_AND;
        !           648:           insertBinaryOperator(&pRet, pPrev, pAnd);
        !           649:           pPrev = pAnd;
        !           650:         }
        !           651: 
        !           652:         /* This test catches attempts to make either operand of a NEAR
        !           653:         ** operator something other than a phrase. For example, either of
        !           654:         ** the following:
        !           655:         **
        !           656:         **    (bracketed expression) NEAR phrase
        !           657:         **    phrase NEAR (bracketed expression)
        !           658:         **
        !           659:         ** Return an error in either case.
        !           660:         */
        !           661:         if( pPrev && (
        !           662:             (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
        !           663:          || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
        !           664:         )){
        !           665:           sqlite3Fts3ExprFree(p);
        !           666:           rc = SQLITE_ERROR;
        !           667:           goto exprparse_out;
        !           668:         }
        !           669:   
        !           670:         if( isPhrase ){
        !           671:           if( pRet ){
        !           672:             assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
        !           673:             pPrev->pRight = p;
        !           674:             p->pParent = pPrev;
        !           675:           }else{
        !           676:             pRet = p;
        !           677:           }
        !           678:         }else{
        !           679:           insertBinaryOperator(&pRet, pPrev, p);
        !           680:         }
        !           681:         isRequirePhrase = !isPhrase;
        !           682:       }
        !           683:       assert( nByte>0 );
        !           684:     }
        !           685:     assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
        !           686:     nIn -= nByte;
        !           687:     zIn += nByte;
        !           688:     pPrev = p;
        !           689:   }
        !           690: 
        !           691:   if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
        !           692:     rc = SQLITE_ERROR;
        !           693:   }
        !           694: 
        !           695:   if( rc==SQLITE_DONE ){
        !           696:     rc = SQLITE_OK;
        !           697:     if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
        !           698:       if( !pRet ){
        !           699:         rc = SQLITE_ERROR;
        !           700:       }else{
        !           701:         Fts3Expr *pIter = pNotBranch;
        !           702:         while( pIter->pLeft ){
        !           703:           pIter = pIter->pLeft;
        !           704:         }
        !           705:         pIter->pLeft = pRet;
        !           706:         pRet = pNotBranch;
        !           707:       }
        !           708:     }
        !           709:   }
        !           710:   *pnConsumed = n - nIn;
        !           711: 
        !           712: exprparse_out:
        !           713:   if( rc!=SQLITE_OK ){
        !           714:     sqlite3Fts3ExprFree(pRet);
        !           715:     sqlite3Fts3ExprFree(pNotBranch);
        !           716:     pRet = 0;
        !           717:   }
        !           718:   *ppExpr = pRet;
        !           719:   return rc;
        !           720: }
        !           721: 
        !           722: /*
        !           723: ** Parameters z and n contain a pointer to and length of a buffer containing
        !           724: ** an fts3 query expression, respectively. This function attempts to parse the
        !           725: ** query expression and create a tree of Fts3Expr structures representing the
        !           726: ** parsed expression. If successful, *ppExpr is set to point to the head
        !           727: ** of the parsed expression tree and SQLITE_OK is returned. If an error
        !           728: ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
        !           729: ** error) is returned and *ppExpr is set to 0.
        !           730: **
        !           731: ** If parameter n is a negative number, then z is assumed to point to a
        !           732: ** nul-terminated string and the length is determined using strlen().
        !           733: **
        !           734: ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
        !           735: ** use to normalize query tokens while parsing the expression. The azCol[]
        !           736: ** array, which is assumed to contain nCol entries, should contain the names
        !           737: ** of each column in the target fts3 table, in order from left to right. 
        !           738: ** Column names must be nul-terminated strings.
        !           739: **
        !           740: ** The iDefaultCol parameter should be passed the index of the table column
        !           741: ** that appears on the left-hand-side of the MATCH operator (the default
        !           742: ** column to match against for tokens for which a column name is not explicitly
        !           743: ** specified as part of the query string), or -1 if tokens may by default
        !           744: ** match any table column.
        !           745: */
        !           746: int sqlite3Fts3ExprParse(
        !           747:   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
        !           748:   char **azCol,                       /* Array of column names for fts3 table */
        !           749:   int bFts4,                          /* True to allow FTS4-only syntax */
        !           750:   int nCol,                           /* Number of entries in azCol[] */
        !           751:   int iDefaultCol,                    /* Default column to query */
        !           752:   const char *z, int n,               /* Text of MATCH query */
        !           753:   Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
        !           754: ){
        !           755:   int nParsed;
        !           756:   int rc;
        !           757:   ParseContext sParse;
        !           758:   sParse.pTokenizer = pTokenizer;
        !           759:   sParse.azCol = (const char **)azCol;
        !           760:   sParse.nCol = nCol;
        !           761:   sParse.iDefaultCol = iDefaultCol;
        !           762:   sParse.nNest = 0;
        !           763:   sParse.bFts4 = bFts4;
        !           764:   if( z==0 ){
        !           765:     *ppExpr = 0;
        !           766:     return SQLITE_OK;
        !           767:   }
        !           768:   if( n<0 ){
        !           769:     n = (int)strlen(z);
        !           770:   }
        !           771:   rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
        !           772: 
        !           773:   /* Check for mismatched parenthesis */
        !           774:   if( rc==SQLITE_OK && sParse.nNest ){
        !           775:     rc = SQLITE_ERROR;
        !           776:     sqlite3Fts3ExprFree(*ppExpr);
        !           777:     *ppExpr = 0;
        !           778:   }
        !           779: 
        !           780:   return rc;
        !           781: }
        !           782: 
        !           783: /*
        !           784: ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
        !           785: */
        !           786: void sqlite3Fts3ExprFree(Fts3Expr *p){
        !           787:   if( p ){
        !           788:     assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
        !           789:     sqlite3Fts3ExprFree(p->pLeft);
        !           790:     sqlite3Fts3ExprFree(p->pRight);
        !           791:     sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
        !           792:     sqlite3_free(p->aMI);
        !           793:     sqlite3_free(p);
        !           794:   }
        !           795: }
        !           796: 
        !           797: /****************************************************************************
        !           798: *****************************************************************************
        !           799: ** Everything after this point is just test code.
        !           800: */
        !           801: 
        !           802: #ifdef SQLITE_TEST
        !           803: 
        !           804: #include <stdio.h>
        !           805: 
        !           806: /*
        !           807: ** Function to query the hash-table of tokenizers (see README.tokenizers).
        !           808: */
        !           809: static int queryTestTokenizer(
        !           810:   sqlite3 *db, 
        !           811:   const char *zName,  
        !           812:   const sqlite3_tokenizer_module **pp
        !           813: ){
        !           814:   int rc;
        !           815:   sqlite3_stmt *pStmt;
        !           816:   const char zSql[] = "SELECT fts3_tokenizer(?)";
        !           817: 
        !           818:   *pp = 0;
        !           819:   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
        !           820:   if( rc!=SQLITE_OK ){
        !           821:     return rc;
        !           822:   }
        !           823: 
        !           824:   sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
        !           825:   if( SQLITE_ROW==sqlite3_step(pStmt) ){
        !           826:     if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
        !           827:       memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
        !           828:     }
        !           829:   }
        !           830: 
        !           831:   return sqlite3_finalize(pStmt);
        !           832: }
        !           833: 
        !           834: /*
        !           835: ** Return a pointer to a buffer containing a text representation of the
        !           836: ** expression passed as the first argument. The buffer is obtained from
        !           837: ** sqlite3_malloc(). It is the responsibility of the caller to use 
        !           838: ** sqlite3_free() to release the memory. If an OOM condition is encountered,
        !           839: ** NULL is returned.
        !           840: **
        !           841: ** If the second argument is not NULL, then its contents are prepended to 
        !           842: ** the returned expression text and then freed using sqlite3_free().
        !           843: */
        !           844: static char *exprToString(Fts3Expr *pExpr, char *zBuf){
        !           845:   switch( pExpr->eType ){
        !           846:     case FTSQUERY_PHRASE: {
        !           847:       Fts3Phrase *pPhrase = pExpr->pPhrase;
        !           848:       int i;
        !           849:       zBuf = sqlite3_mprintf(
        !           850:           "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
        !           851:       for(i=0; zBuf && i<pPhrase->nToken; i++){
        !           852:         zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 
        !           853:             pPhrase->aToken[i].n, pPhrase->aToken[i].z,
        !           854:             (pPhrase->aToken[i].isPrefix?"+":"")
        !           855:         );
        !           856:       }
        !           857:       return zBuf;
        !           858:     }
        !           859: 
        !           860:     case FTSQUERY_NEAR:
        !           861:       zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
        !           862:       break;
        !           863:     case FTSQUERY_NOT:
        !           864:       zBuf = sqlite3_mprintf("%zNOT ", zBuf);
        !           865:       break;
        !           866:     case FTSQUERY_AND:
        !           867:       zBuf = sqlite3_mprintf("%zAND ", zBuf);
        !           868:       break;
        !           869:     case FTSQUERY_OR:
        !           870:       zBuf = sqlite3_mprintf("%zOR ", zBuf);
        !           871:       break;
        !           872:   }
        !           873: 
        !           874:   if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
        !           875:   if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
        !           876:   if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
        !           877: 
        !           878:   if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
        !           879:   if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
        !           880: 
        !           881:   return zBuf;
        !           882: }
        !           883: 
        !           884: /*
        !           885: ** This is the implementation of a scalar SQL function used to test the 
        !           886: ** expression parser. It should be called as follows:
        !           887: **
        !           888: **   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
        !           889: **
        !           890: ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
        !           891: ** to parse the query expression (see README.tokenizers). The second argument
        !           892: ** is the query expression to parse. Each subsequent argument is the name
        !           893: ** of a column of the fts3 table that the query expression may refer to.
        !           894: ** For example:
        !           895: **
        !           896: **   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
        !           897: */
        !           898: static void fts3ExprTest(
        !           899:   sqlite3_context *context,
        !           900:   int argc,
        !           901:   sqlite3_value **argv
        !           902: ){
        !           903:   sqlite3_tokenizer_module const *pModule = 0;
        !           904:   sqlite3_tokenizer *pTokenizer = 0;
        !           905:   int rc;
        !           906:   char **azCol = 0;
        !           907:   const char *zExpr;
        !           908:   int nExpr;
        !           909:   int nCol;
        !           910:   int ii;
        !           911:   Fts3Expr *pExpr;
        !           912:   char *zBuf = 0;
        !           913:   sqlite3 *db = sqlite3_context_db_handle(context);
        !           914: 
        !           915:   if( argc<3 ){
        !           916:     sqlite3_result_error(context, 
        !           917:         "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
        !           918:     );
        !           919:     return;
        !           920:   }
        !           921: 
        !           922:   rc = queryTestTokenizer(db,
        !           923:                           (const char *)sqlite3_value_text(argv[0]), &pModule);
        !           924:   if( rc==SQLITE_NOMEM ){
        !           925:     sqlite3_result_error_nomem(context);
        !           926:     goto exprtest_out;
        !           927:   }else if( !pModule ){
        !           928:     sqlite3_result_error(context, "No such tokenizer module", -1);
        !           929:     goto exprtest_out;
        !           930:   }
        !           931: 
        !           932:   rc = pModule->xCreate(0, 0, &pTokenizer);
        !           933:   assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
        !           934:   if( rc==SQLITE_NOMEM ){
        !           935:     sqlite3_result_error_nomem(context);
        !           936:     goto exprtest_out;
        !           937:   }
        !           938:   pTokenizer->pModule = pModule;
        !           939: 
        !           940:   zExpr = (const char *)sqlite3_value_text(argv[1]);
        !           941:   nExpr = sqlite3_value_bytes(argv[1]);
        !           942:   nCol = argc-2;
        !           943:   azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
        !           944:   if( !azCol ){
        !           945:     sqlite3_result_error_nomem(context);
        !           946:     goto exprtest_out;
        !           947:   }
        !           948:   for(ii=0; ii<nCol; ii++){
        !           949:     azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
        !           950:   }
        !           951: 
        !           952:   rc = sqlite3Fts3ExprParse(
        !           953:       pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
        !           954:   );
        !           955:   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
        !           956:     sqlite3_result_error(context, "Error parsing expression", -1);
        !           957:   }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
        !           958:     sqlite3_result_error_nomem(context);
        !           959:   }else{
        !           960:     sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
        !           961:     sqlite3_free(zBuf);
        !           962:   }
        !           963: 
        !           964:   sqlite3Fts3ExprFree(pExpr);
        !           965: 
        !           966: exprtest_out:
        !           967:   if( pModule && pTokenizer ){
        !           968:     rc = pModule->xDestroy(pTokenizer);
        !           969:   }
        !           970:   sqlite3_free(azCol);
        !           971: }
        !           972: 
        !           973: /*
        !           974: ** Register the query expression parser test function fts3_exprtest() 
        !           975: ** with database connection db. 
        !           976: */
        !           977: int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
        !           978:   return sqlite3_create_function(
        !           979:       db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
        !           980:   );
        !           981: }
        !           982: 
        !           983: #endif
        !           984: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>