Annotation of embedaddon/sqlite3/ext/fts3/fts3_snippet.c, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2009 Oct 23
                      3: **
                      4: ** The author disclaims copyright to this source code.  In place of
                      5: ** a legal notice, here is a blessing:
                      6: **
                      7: **    May you do good and not evil.
                      8: **    May you find forgiveness for yourself and forgive others.
                      9: **    May you share freely, never taking more than you give.
                     10: **
                     11: ******************************************************************************
                     12: */
                     13: 
                     14: #include "fts3Int.h"
                     15: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
                     16: 
                     17: #include <string.h>
                     18: #include <assert.h>
                     19: 
                     20: /*
                     21: ** Characters that may appear in the second argument to matchinfo().
                     22: */
                     23: #define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
                     24: #define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
                     25: #define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
                     26: #define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
                     27: #define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
                     28: #define FTS3_MATCHINFO_LCS       's'        /* nCol values */
                     29: #define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
                     30: 
                     31: /*
                     32: ** The default value for the second argument to matchinfo(). 
                     33: */
                     34: #define FTS3_MATCHINFO_DEFAULT   "pcx"
                     35: 
                     36: 
                     37: /*
                     38: ** Used as an fts3ExprIterate() context when loading phrase doclists to
                     39: ** Fts3Expr.aDoclist[]/nDoclist.
                     40: */
                     41: typedef struct LoadDoclistCtx LoadDoclistCtx;
                     42: struct LoadDoclistCtx {
                     43:   Fts3Cursor *pCsr;               /* FTS3 Cursor */
                     44:   int nPhrase;                    /* Number of phrases seen so far */
                     45:   int nToken;                     /* Number of tokens seen so far */
                     46: };
                     47: 
                     48: /*
                     49: ** The following types are used as part of the implementation of the 
                     50: ** fts3BestSnippet() routine.
                     51: */
                     52: typedef struct SnippetIter SnippetIter;
                     53: typedef struct SnippetPhrase SnippetPhrase;
                     54: typedef struct SnippetFragment SnippetFragment;
                     55: 
                     56: struct SnippetIter {
                     57:   Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */
                     58:   int iCol;                       /* Extract snippet from this column */
                     59:   int nSnippet;                   /* Requested snippet length (in tokens) */
                     60:   int nPhrase;                    /* Number of phrases in query */
                     61:   SnippetPhrase *aPhrase;         /* Array of size nPhrase */
                     62:   int iCurrent;                   /* First token of current snippet */
                     63: };
                     64: 
                     65: struct SnippetPhrase {
                     66:   int nToken;                     /* Number of tokens in phrase */
                     67:   char *pList;                    /* Pointer to start of phrase position list */
                     68:   int iHead;                      /* Next value in position list */
                     69:   char *pHead;                    /* Position list data following iHead */
                     70:   int iTail;                      /* Next value in trailing position list */
                     71:   char *pTail;                    /* Position list data following iTail */
                     72: };
                     73: 
                     74: struct SnippetFragment {
                     75:   int iCol;                       /* Column snippet is extracted from */
                     76:   int iPos;                       /* Index of first token in snippet */
                     77:   u64 covered;                    /* Mask of query phrases covered */
                     78:   u64 hlmask;                     /* Mask of snippet terms to highlight */
                     79: };
                     80: 
                     81: /*
                     82: ** This type is used as an fts3ExprIterate() context object while 
                     83: ** accumulating the data returned by the matchinfo() function.
                     84: */
                     85: typedef struct MatchInfo MatchInfo;
                     86: struct MatchInfo {
                     87:   Fts3Cursor *pCursor;            /* FTS3 Cursor */
                     88:   int nCol;                       /* Number of columns in table */
                     89:   int nPhrase;                    /* Number of matchable phrases in query */
                     90:   sqlite3_int64 nDoc;             /* Number of docs in database */
                     91:   u32 *aMatchinfo;                /* Pre-allocated buffer */
                     92: };
                     93: 
                     94: 
                     95: 
                     96: /*
                     97: ** The snippet() and offsets() functions both return text values. An instance
                     98: ** of the following structure is used to accumulate those values while the
                     99: ** functions are running. See fts3StringAppend() for details.
                    100: */
                    101: typedef struct StrBuffer StrBuffer;
                    102: struct StrBuffer {
                    103:   char *z;                        /* Pointer to buffer containing string */
                    104:   int n;                          /* Length of z in bytes (excl. nul-term) */
                    105:   int nAlloc;                     /* Allocated size of buffer z in bytes */
                    106: };
                    107: 
                    108: 
                    109: /*
                    110: ** This function is used to help iterate through a position-list. A position
                    111: ** list is a list of unique integers, sorted from smallest to largest. Each
                    112: ** element of the list is represented by an FTS3 varint that takes the value
                    113: ** of the difference between the current element and the previous one plus
                    114: ** two. For example, to store the position-list:
                    115: **
                    116: **     4 9 113
                    117: **
                    118: ** the three varints:
                    119: **
                    120: **     6 7 106
                    121: **
                    122: ** are encoded.
                    123: **
                    124: ** When this function is called, *pp points to the start of an element of
                    125: ** the list. *piPos contains the value of the previous entry in the list.
                    126: ** After it returns, *piPos contains the value of the next element of the
                    127: ** list and *pp is advanced to the following varint.
                    128: */
                    129: static void fts3GetDeltaPosition(char **pp, int *piPos){
                    130:   int iVal;
                    131:   *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
                    132:   *piPos += (iVal-2);
                    133: }
                    134: 
                    135: /*
                    136: ** Helper function for fts3ExprIterate() (see below).
                    137: */
                    138: static int fts3ExprIterate2(
                    139:   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
                    140:   int *piPhrase,                  /* Pointer to phrase counter */
                    141:   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
                    142:   void *pCtx                      /* Second argument to pass to callback */
                    143: ){
                    144:   int rc;                         /* Return code */
                    145:   int eType = pExpr->eType;       /* Type of expression node pExpr */
                    146: 
                    147:   if( eType!=FTSQUERY_PHRASE ){
                    148:     assert( pExpr->pLeft && pExpr->pRight );
                    149:     rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
                    150:     if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
                    151:       rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
                    152:     }
                    153:   }else{
                    154:     rc = x(pExpr, *piPhrase, pCtx);
                    155:     (*piPhrase)++;
                    156:   }
                    157:   return rc;
                    158: }
                    159: 
                    160: /*
                    161: ** Iterate through all phrase nodes in an FTS3 query, except those that
                    162: ** are part of a sub-tree that is the right-hand-side of a NOT operator.
                    163: ** For each phrase node found, the supplied callback function is invoked.
                    164: **
                    165: ** If the callback function returns anything other than SQLITE_OK, 
                    166: ** the iteration is abandoned and the error code returned immediately.
                    167: ** Otherwise, SQLITE_OK is returned after a callback has been made for
                    168: ** all eligible phrase nodes.
                    169: */
                    170: static int fts3ExprIterate(
                    171:   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
                    172:   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
                    173:   void *pCtx                      /* Second argument to pass to callback */
                    174: ){
                    175:   int iPhrase = 0;                /* Variable used as the phrase counter */
                    176:   return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
                    177: }
                    178: 
                    179: /*
                    180: ** This is an fts3ExprIterate() callback used while loading the doclists
                    181: ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
                    182: ** fts3ExprLoadDoclists().
                    183: */
                    184: static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
                    185:   int rc = SQLITE_OK;
                    186:   Fts3Phrase *pPhrase = pExpr->pPhrase;
                    187:   LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
                    188: 
                    189:   UNUSED_PARAMETER(iPhrase);
                    190: 
                    191:   p->nPhrase++;
                    192:   p->nToken += pPhrase->nToken;
                    193: 
                    194:   return rc;
                    195: }
                    196: 
                    197: /*
                    198: ** Load the doclists for each phrase in the query associated with FTS3 cursor
                    199: ** pCsr. 
                    200: **
                    201: ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable 
                    202: ** phrases in the expression (all phrases except those directly or 
                    203: ** indirectly descended from the right-hand-side of a NOT operator). If 
                    204: ** pnToken is not NULL, then it is set to the number of tokens in all
                    205: ** matchable phrases of the expression.
                    206: */
                    207: static int fts3ExprLoadDoclists(
                    208:   Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
                    209:   int *pnPhrase,                  /* OUT: Number of phrases in query */
                    210:   int *pnToken                    /* OUT: Number of tokens in query */
                    211: ){
                    212:   int rc;                         /* Return Code */
                    213:   LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
                    214:   sCtx.pCsr = pCsr;
                    215:   rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
                    216:   if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
                    217:   if( pnToken ) *pnToken = sCtx.nToken;
                    218:   return rc;
                    219: }
                    220: 
                    221: static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
                    222:   (*(int *)ctx)++;
                    223:   UNUSED_PARAMETER(pExpr);
                    224:   UNUSED_PARAMETER(iPhrase);
                    225:   return SQLITE_OK;
                    226: }
                    227: static int fts3ExprPhraseCount(Fts3Expr *pExpr){
                    228:   int nPhrase = 0;
                    229:   (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
                    230:   return nPhrase;
                    231: }
                    232: 
                    233: /*
                    234: ** Advance the position list iterator specified by the first two 
                    235: ** arguments so that it points to the first element with a value greater
                    236: ** than or equal to parameter iNext.
                    237: */
                    238: static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
                    239:   char *pIter = *ppIter;
                    240:   if( pIter ){
                    241:     int iIter = *piIter;
                    242: 
                    243:     while( iIter<iNext ){
                    244:       if( 0==(*pIter & 0xFE) ){
                    245:         iIter = -1;
                    246:         pIter = 0;
                    247:         break;
                    248:       }
                    249:       fts3GetDeltaPosition(&pIter, &iIter);
                    250:     }
                    251: 
                    252:     *piIter = iIter;
                    253:     *ppIter = pIter;
                    254:   }
                    255: }
                    256: 
                    257: /*
                    258: ** Advance the snippet iterator to the next candidate snippet.
                    259: */
                    260: static int fts3SnippetNextCandidate(SnippetIter *pIter){
                    261:   int i;                          /* Loop counter */
                    262: 
                    263:   if( pIter->iCurrent<0 ){
                    264:     /* The SnippetIter object has just been initialized. The first snippet
                    265:     ** candidate always starts at offset 0 (even if this candidate has a
                    266:     ** score of 0.0).
                    267:     */
                    268:     pIter->iCurrent = 0;
                    269: 
                    270:     /* Advance the 'head' iterator of each phrase to the first offset that
                    271:     ** is greater than or equal to (iNext+nSnippet).
                    272:     */
                    273:     for(i=0; i<pIter->nPhrase; i++){
                    274:       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
                    275:       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
                    276:     }
                    277:   }else{
                    278:     int iStart;
                    279:     int iEnd = 0x7FFFFFFF;
                    280: 
                    281:     for(i=0; i<pIter->nPhrase; i++){
                    282:       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
                    283:       if( pPhrase->pHead && pPhrase->iHead<iEnd ){
                    284:         iEnd = pPhrase->iHead;
                    285:       }
                    286:     }
                    287:     if( iEnd==0x7FFFFFFF ){
                    288:       return 1;
                    289:     }
                    290: 
                    291:     pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
                    292:     for(i=0; i<pIter->nPhrase; i++){
                    293:       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
                    294:       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
                    295:       fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
                    296:     }
                    297:   }
                    298: 
                    299:   return 0;
                    300: }
                    301: 
                    302: /*
                    303: ** Retrieve information about the current candidate snippet of snippet 
                    304: ** iterator pIter.
                    305: */
                    306: static void fts3SnippetDetails(
                    307:   SnippetIter *pIter,             /* Snippet iterator */
                    308:   u64 mCovered,                   /* Bitmask of phrases already covered */
                    309:   int *piToken,                   /* OUT: First token of proposed snippet */
                    310:   int *piScore,                   /* OUT: "Score" for this snippet */
                    311:   u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
                    312:   u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
                    313: ){
                    314:   int iStart = pIter->iCurrent;   /* First token of snippet */
                    315:   int iScore = 0;                 /* Score of this snippet */
                    316:   int i;                          /* Loop counter */
                    317:   u64 mCover = 0;                 /* Mask of phrases covered by this snippet */
                    318:   u64 mHighlight = 0;             /* Mask of tokens to highlight in snippet */
                    319: 
                    320:   for(i=0; i<pIter->nPhrase; i++){
                    321:     SnippetPhrase *pPhrase = &pIter->aPhrase[i];
                    322:     if( pPhrase->pTail ){
                    323:       char *pCsr = pPhrase->pTail;
                    324:       int iCsr = pPhrase->iTail;
                    325: 
                    326:       while( iCsr<(iStart+pIter->nSnippet) ){
                    327:         int j;
                    328:         u64 mPhrase = (u64)1 << i;
                    329:         u64 mPos = (u64)1 << (iCsr - iStart);
                    330:         assert( iCsr>=iStart );
                    331:         if( (mCover|mCovered)&mPhrase ){
                    332:           iScore++;
                    333:         }else{
                    334:           iScore += 1000;
                    335:         }
                    336:         mCover |= mPhrase;
                    337: 
                    338:         for(j=0; j<pPhrase->nToken; j++){
                    339:           mHighlight |= (mPos>>j);
                    340:         }
                    341: 
                    342:         if( 0==(*pCsr & 0x0FE) ) break;
                    343:         fts3GetDeltaPosition(&pCsr, &iCsr);
                    344:       }
                    345:     }
                    346:   }
                    347: 
                    348:   /* Set the output variables before returning. */
                    349:   *piToken = iStart;
                    350:   *piScore = iScore;
                    351:   *pmCover = mCover;
                    352:   *pmHighlight = mHighlight;
                    353: }
                    354: 
                    355: /*
                    356: ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
                    357: ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
                    358: */
                    359: static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
                    360:   SnippetIter *p = (SnippetIter *)ctx;
                    361:   SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
                    362:   char *pCsr;
                    363: 
                    364:   pPhrase->nToken = pExpr->pPhrase->nToken;
                    365: 
                    366:   pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
                    367:   if( pCsr ){
                    368:     int iFirst = 0;
                    369:     pPhrase->pList = pCsr;
                    370:     fts3GetDeltaPosition(&pCsr, &iFirst);
                    371:     assert( iFirst>=0 );
                    372:     pPhrase->pHead = pCsr;
                    373:     pPhrase->pTail = pCsr;
                    374:     pPhrase->iHead = iFirst;
                    375:     pPhrase->iTail = iFirst;
                    376:   }else{
                    377:     assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 );
                    378:   }
                    379: 
                    380:   return SQLITE_OK;
                    381: }
                    382: 
                    383: /*
                    384: ** Select the fragment of text consisting of nFragment contiguous tokens 
                    385: ** from column iCol that represent the "best" snippet. The best snippet
                    386: ** is the snippet with the highest score, where scores are calculated
                    387: ** by adding:
                    388: **
                    389: **   (a) +1 point for each occurence of a matchable phrase in the snippet.
                    390: **
                    391: **   (b) +1000 points for the first occurence of each matchable phrase in 
                    392: **       the snippet for which the corresponding mCovered bit is not set.
                    393: **
                    394: ** The selected snippet parameters are stored in structure *pFragment before
                    395: ** returning. The score of the selected snippet is stored in *piScore
                    396: ** before returning.
                    397: */
                    398: static int fts3BestSnippet(
                    399:   int nSnippet,                   /* Desired snippet length */
                    400:   Fts3Cursor *pCsr,               /* Cursor to create snippet for */
                    401:   int iCol,                       /* Index of column to create snippet from */
                    402:   u64 mCovered,                   /* Mask of phrases already covered */
                    403:   u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
                    404:   SnippetFragment *pFragment,     /* OUT: Best snippet found */
                    405:   int *piScore                    /* OUT: Score of snippet pFragment */
                    406: ){
                    407:   int rc;                         /* Return Code */
                    408:   int nList;                      /* Number of phrases in expression */
                    409:   SnippetIter sIter;              /* Iterates through snippet candidates */
                    410:   int nByte;                      /* Number of bytes of space to allocate */
                    411:   int iBestScore = -1;            /* Best snippet score found so far */
                    412:   int i;                          /* Loop counter */
                    413: 
                    414:   memset(&sIter, 0, sizeof(sIter));
                    415: 
                    416:   /* Iterate through the phrases in the expression to count them. The same
                    417:   ** callback makes sure the doclists are loaded for each phrase.
                    418:   */
                    419:   rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
                    420:   if( rc!=SQLITE_OK ){
                    421:     return rc;
                    422:   }
                    423: 
                    424:   /* Now that it is known how many phrases there are, allocate and zero
                    425:   ** the required space using malloc().
                    426:   */
                    427:   nByte = sizeof(SnippetPhrase) * nList;
                    428:   sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
                    429:   if( !sIter.aPhrase ){
                    430:     return SQLITE_NOMEM;
                    431:   }
                    432:   memset(sIter.aPhrase, 0, nByte);
                    433: 
                    434:   /* Initialize the contents of the SnippetIter object. Then iterate through
                    435:   ** the set of phrases in the expression to populate the aPhrase[] array.
                    436:   */
                    437:   sIter.pCsr = pCsr;
                    438:   sIter.iCol = iCol;
                    439:   sIter.nSnippet = nSnippet;
                    440:   sIter.nPhrase = nList;
                    441:   sIter.iCurrent = -1;
                    442:   (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
                    443: 
                    444:   /* Set the *pmSeen output variable. */
                    445:   for(i=0; i<nList; i++){
                    446:     if( sIter.aPhrase[i].pHead ){
                    447:       *pmSeen |= (u64)1 << i;
                    448:     }
                    449:   }
                    450: 
                    451:   /* Loop through all candidate snippets. Store the best snippet in 
                    452:   ** *pFragment. Store its associated 'score' in iBestScore.
                    453:   */
                    454:   pFragment->iCol = iCol;
                    455:   while( !fts3SnippetNextCandidate(&sIter) ){
                    456:     int iPos;
                    457:     int iScore;
                    458:     u64 mCover;
                    459:     u64 mHighlight;
                    460:     fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
                    461:     assert( iScore>=0 );
                    462:     if( iScore>iBestScore ){
                    463:       pFragment->iPos = iPos;
                    464:       pFragment->hlmask = mHighlight;
                    465:       pFragment->covered = mCover;
                    466:       iBestScore = iScore;
                    467:     }
                    468:   }
                    469: 
                    470:   sqlite3_free(sIter.aPhrase);
                    471:   *piScore = iBestScore;
                    472:   return SQLITE_OK;
                    473: }
                    474: 
                    475: 
                    476: /*
                    477: ** Append a string to the string-buffer passed as the first argument.
                    478: **
                    479: ** If nAppend is negative, then the length of the string zAppend is
                    480: ** determined using strlen().
                    481: */
                    482: static int fts3StringAppend(
                    483:   StrBuffer *pStr,                /* Buffer to append to */
                    484:   const char *zAppend,            /* Pointer to data to append to buffer */
                    485:   int nAppend                     /* Size of zAppend in bytes (or -1) */
                    486: ){
                    487:   if( nAppend<0 ){
                    488:     nAppend = (int)strlen(zAppend);
                    489:   }
                    490: 
                    491:   /* If there is insufficient space allocated at StrBuffer.z, use realloc()
                    492:   ** to grow the buffer until so that it is big enough to accomadate the
                    493:   ** appended data.
                    494:   */
                    495:   if( pStr->n+nAppend+1>=pStr->nAlloc ){
                    496:     int nAlloc = pStr->nAlloc+nAppend+100;
                    497:     char *zNew = sqlite3_realloc(pStr->z, nAlloc);
                    498:     if( !zNew ){
                    499:       return SQLITE_NOMEM;
                    500:     }
                    501:     pStr->z = zNew;
                    502:     pStr->nAlloc = nAlloc;
                    503:   }
                    504: 
                    505:   /* Append the data to the string buffer. */
                    506:   memcpy(&pStr->z[pStr->n], zAppend, nAppend);
                    507:   pStr->n += nAppend;
                    508:   pStr->z[pStr->n] = '\0';
                    509: 
                    510:   return SQLITE_OK;
                    511: }
                    512: 
                    513: /*
                    514: ** The fts3BestSnippet() function often selects snippets that end with a
                    515: ** query term. That is, the final term of the snippet is always a term
                    516: ** that requires highlighting. For example, if 'X' is a highlighted term
                    517: ** and '.' is a non-highlighted term, BestSnippet() may select:
                    518: **
                    519: **     ........X.....X
                    520: **
                    521: ** This function "shifts" the beginning of the snippet forward in the 
                    522: ** document so that there are approximately the same number of 
                    523: ** non-highlighted terms to the right of the final highlighted term as there
                    524: ** are to the left of the first highlighted term. For example, to this:
                    525: **
                    526: **     ....X.....X....
                    527: **
                    528: ** This is done as part of extracting the snippet text, not when selecting
                    529: ** the snippet. Snippet selection is done based on doclists only, so there
                    530: ** is no way for fts3BestSnippet() to know whether or not the document 
                    531: ** actually contains terms that follow the final highlighted term. 
                    532: */
                    533: static int fts3SnippetShift(
                    534:   Fts3Table *pTab,                /* FTS3 table snippet comes from */
                    535:   int nSnippet,                   /* Number of tokens desired for snippet */
                    536:   const char *zDoc,               /* Document text to extract snippet from */
                    537:   int nDoc,                       /* Size of buffer zDoc in bytes */
                    538:   int *piPos,                     /* IN/OUT: First token of snippet */
                    539:   u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
                    540: ){
                    541:   u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
                    542: 
                    543:   if( hlmask ){
                    544:     int nLeft;                    /* Tokens to the left of first highlight */
                    545:     int nRight;                   /* Tokens to the right of last highlight */
                    546:     int nDesired;                 /* Ideal number of tokens to shift forward */
                    547: 
                    548:     for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
                    549:     for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
                    550:     nDesired = (nLeft-nRight)/2;
                    551: 
                    552:     /* Ideally, the start of the snippet should be pushed forward in the
                    553:     ** document nDesired tokens. This block checks if there are actually
                    554:     ** nDesired tokens to the right of the snippet. If so, *piPos and
                    555:     ** *pHlMask are updated to shift the snippet nDesired tokens to the
                    556:     ** right. Otherwise, the snippet is shifted by the number of tokens
                    557:     ** available.
                    558:     */
                    559:     if( nDesired>0 ){
                    560:       int nShift;                 /* Number of tokens to shift snippet by */
                    561:       int iCurrent = 0;           /* Token counter */
                    562:       int rc;                     /* Return Code */
                    563:       sqlite3_tokenizer_module *pMod;
                    564:       sqlite3_tokenizer_cursor *pC;
                    565:       pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
                    566: 
                    567:       /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
                    568:       ** or more tokens in zDoc/nDoc.
                    569:       */
                    570:       rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
                    571:       if( rc!=SQLITE_OK ){
                    572:         return rc;
                    573:       }
                    574:       pC->pTokenizer = pTab->pTokenizer;
                    575:       while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
                    576:         const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
                    577:         rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
                    578:       }
                    579:       pMod->xClose(pC);
                    580:       if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
                    581: 
                    582:       nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
                    583:       assert( nShift<=nDesired );
                    584:       if( nShift>0 ){
                    585:         *piPos += nShift;
                    586:         *pHlmask = hlmask >> nShift;
                    587:       }
                    588:     }
                    589:   }
                    590:   return SQLITE_OK;
                    591: }
                    592: 
                    593: /*
                    594: ** Extract the snippet text for fragment pFragment from cursor pCsr and
                    595: ** append it to string buffer pOut.
                    596: */
                    597: static int fts3SnippetText(
                    598:   Fts3Cursor *pCsr,               /* FTS3 Cursor */
                    599:   SnippetFragment *pFragment,     /* Snippet to extract */
                    600:   int iFragment,                  /* Fragment number */
                    601:   int isLast,                     /* True for final fragment in snippet */
                    602:   int nSnippet,                   /* Number of tokens in extracted snippet */
                    603:   const char *zOpen,              /* String inserted before highlighted term */
                    604:   const char *zClose,             /* String inserted after highlighted term */
                    605:   const char *zEllipsis,          /* String inserted between snippets */
                    606:   StrBuffer *pOut                 /* Write output here */
                    607: ){
                    608:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                    609:   int rc;                         /* Return code */
                    610:   const char *zDoc;               /* Document text to extract snippet from */
                    611:   int nDoc;                       /* Size of zDoc in bytes */
                    612:   int iCurrent = 0;               /* Current token number of document */
                    613:   int iEnd = 0;                   /* Byte offset of end of current token */
                    614:   int isShiftDone = 0;            /* True after snippet is shifted */
                    615:   int iPos = pFragment->iPos;     /* First token of snippet */
                    616:   u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
                    617:   int iCol = pFragment->iCol+1;   /* Query column to extract text from */
                    618:   sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
                    619:   sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
                    620:   const char *ZDUMMY;             /* Dummy argument used with tokenizer */
                    621:   int DUMMY1;                     /* Dummy argument used with tokenizer */
                    622:   
                    623:   zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
                    624:   if( zDoc==0 ){
                    625:     if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
                    626:       return SQLITE_NOMEM;
                    627:     }
                    628:     return SQLITE_OK;
                    629:   }
                    630:   nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
                    631: 
                    632:   /* Open a token cursor on the document. */
                    633:   pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
                    634:   rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
                    635:   if( rc!=SQLITE_OK ){
                    636:     return rc;
                    637:   }
                    638:   pC->pTokenizer = pTab->pTokenizer;
                    639: 
                    640:   while( rc==SQLITE_OK ){
                    641:     int iBegin;                   /* Offset in zDoc of start of token */
                    642:     int iFin;                     /* Offset in zDoc of end of token */
                    643:     int isHighlight;              /* True for highlighted terms */
                    644: 
                    645:     rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
                    646:     if( rc!=SQLITE_OK ){
                    647:       if( rc==SQLITE_DONE ){
                    648:         /* Special case - the last token of the snippet is also the last token
                    649:         ** of the column. Append any punctuation that occurred between the end
                    650:         ** of the previous token and the end of the document to the output. 
                    651:         ** Then break out of the loop. */
                    652:         rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
                    653:       }
                    654:       break;
                    655:     }
                    656:     if( iCurrent<iPos ){ continue; }
                    657: 
                    658:     if( !isShiftDone ){
                    659:       int n = nDoc - iBegin;
                    660:       rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
                    661:       isShiftDone = 1;
                    662: 
                    663:       /* Now that the shift has been done, check if the initial "..." are
                    664:       ** required. They are required if (a) this is not the first fragment,
                    665:       ** or (b) this fragment does not begin at position 0 of its column. 
                    666:       */
                    667:       if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
                    668:         rc = fts3StringAppend(pOut, zEllipsis, -1);
                    669:       }
                    670:       if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
                    671:     }
                    672: 
                    673:     if( iCurrent>=(iPos+nSnippet) ){
                    674:       if( isLast ){
                    675:         rc = fts3StringAppend(pOut, zEllipsis, -1);
                    676:       }
                    677:       break;
                    678:     }
                    679: 
                    680:     /* Set isHighlight to true if this term should be highlighted. */
                    681:     isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
                    682: 
                    683:     if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
                    684:     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
                    685:     if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
                    686:     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
                    687: 
                    688:     iEnd = iFin;
                    689:   }
                    690: 
                    691:   pMod->xClose(pC);
                    692:   return rc;
                    693: }
                    694: 
                    695: 
                    696: /*
                    697: ** This function is used to count the entries in a column-list (a 
                    698: ** delta-encoded list of term offsets within a single column of a single 
                    699: ** row). When this function is called, *ppCollist should point to the
                    700: ** beginning of the first varint in the column-list (the varint that
                    701: ** contains the position of the first matching term in the column data).
                    702: ** Before returning, *ppCollist is set to point to the first byte after
                    703: ** the last varint in the column-list (either the 0x00 signifying the end
                    704: ** of the position-list, or the 0x01 that precedes the column number of
                    705: ** the next column in the position-list).
                    706: **
                    707: ** The number of elements in the column-list is returned.
                    708: */
                    709: static int fts3ColumnlistCount(char **ppCollist){
                    710:   char *pEnd = *ppCollist;
                    711:   char c = 0;
                    712:   int nEntry = 0;
                    713: 
                    714:   /* A column-list is terminated by either a 0x01 or 0x00. */
                    715:   while( 0xFE & (*pEnd | c) ){
                    716:     c = *pEnd++ & 0x80;
                    717:     if( !c ) nEntry++;
                    718:   }
                    719: 
                    720:   *ppCollist = pEnd;
                    721:   return nEntry;
                    722: }
                    723: 
                    724: /*
                    725: ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
                    726: ** for a single query. 
                    727: **
                    728: ** fts3ExprIterate() callback to load the 'global' elements of a
                    729: ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements 
                    730: ** of the matchinfo array that are constant for all rows returned by the 
                    731: ** current query.
                    732: **
                    733: ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
                    734: ** function populates Matchinfo.aMatchinfo[] as follows:
                    735: **
                    736: **   for(iCol=0; iCol<nCol; iCol++){
                    737: **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
                    738: **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
                    739: **   }
                    740: **
                    741: ** where X is the number of matches for phrase iPhrase is column iCol of all
                    742: ** rows of the table. Y is the number of rows for which column iCol contains
                    743: ** at least one instance of phrase iPhrase.
                    744: **
                    745: ** If the phrase pExpr consists entirely of deferred tokens, then all X and
                    746: ** Y values are set to nDoc, where nDoc is the number of documents in the 
                    747: ** file system. This is done because the full-text index doclist is required
                    748: ** to calculate these values properly, and the full-text index doclist is
                    749: ** not available for deferred tokens.
                    750: */
                    751: static int fts3ExprGlobalHitsCb(
                    752:   Fts3Expr *pExpr,                /* Phrase expression node */
                    753:   int iPhrase,                    /* Phrase number (numbered from zero) */
                    754:   void *pCtx                      /* Pointer to MatchInfo structure */
                    755: ){
                    756:   MatchInfo *p = (MatchInfo *)pCtx;
                    757:   return sqlite3Fts3EvalPhraseStats(
                    758:       p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
                    759:   );
                    760: }
                    761: 
                    762: /*
                    763: ** fts3ExprIterate() callback used to collect the "local" part of the
                    764: ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the 
                    765: ** array that are different for each row returned by the query.
                    766: */
                    767: static int fts3ExprLocalHitsCb(
                    768:   Fts3Expr *pExpr,                /* Phrase expression node */
                    769:   int iPhrase,                    /* Phrase number */
                    770:   void *pCtx                      /* Pointer to MatchInfo structure */
                    771: ){
                    772:   MatchInfo *p = (MatchInfo *)pCtx;
                    773:   int iStart = iPhrase * p->nCol * 3;
                    774:   int i;
                    775: 
                    776:   for(i=0; i<p->nCol; i++){
                    777:     char *pCsr;
                    778:     pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i);
                    779:     if( pCsr ){
                    780:       p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
                    781:     }else{
                    782:       p->aMatchinfo[iStart+i*3] = 0;
                    783:     }
                    784:   }
                    785: 
                    786:   return SQLITE_OK;
                    787: }
                    788: 
                    789: static int fts3MatchinfoCheck(
                    790:   Fts3Table *pTab, 
                    791:   char cArg,
                    792:   char **pzErr
                    793: ){
                    794:   if( (cArg==FTS3_MATCHINFO_NPHRASE)
                    795:    || (cArg==FTS3_MATCHINFO_NCOL)
                    796:    || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat)
                    797:    || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat)
                    798:    || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
                    799:    || (cArg==FTS3_MATCHINFO_LCS)
                    800:    || (cArg==FTS3_MATCHINFO_HITS)
                    801:   ){
                    802:     return SQLITE_OK;
                    803:   }
                    804:   *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
                    805:   return SQLITE_ERROR;
                    806: }
                    807: 
                    808: static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
                    809:   int nVal;                       /* Number of integers output by cArg */
                    810: 
                    811:   switch( cArg ){
                    812:     case FTS3_MATCHINFO_NDOC:
                    813:     case FTS3_MATCHINFO_NPHRASE: 
                    814:     case FTS3_MATCHINFO_NCOL: 
                    815:       nVal = 1;
                    816:       break;
                    817: 
                    818:     case FTS3_MATCHINFO_AVGLENGTH:
                    819:     case FTS3_MATCHINFO_LENGTH:
                    820:     case FTS3_MATCHINFO_LCS:
                    821:       nVal = pInfo->nCol;
                    822:       break;
                    823: 
                    824:     default:
                    825:       assert( cArg==FTS3_MATCHINFO_HITS );
                    826:       nVal = pInfo->nCol * pInfo->nPhrase * 3;
                    827:       break;
                    828:   }
                    829: 
                    830:   return nVal;
                    831: }
                    832: 
                    833: static int fts3MatchinfoSelectDoctotal(
                    834:   Fts3Table *pTab,
                    835:   sqlite3_stmt **ppStmt,
                    836:   sqlite3_int64 *pnDoc,
                    837:   const char **paLen
                    838: ){
                    839:   sqlite3_stmt *pStmt;
                    840:   const char *a;
                    841:   sqlite3_int64 nDoc;
                    842: 
                    843:   if( !*ppStmt ){
                    844:     int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
                    845:     if( rc!=SQLITE_OK ) return rc;
                    846:   }
                    847:   pStmt = *ppStmt;
                    848:   assert( sqlite3_data_count(pStmt)==1 );
                    849: 
                    850:   a = sqlite3_column_blob(pStmt, 0);
                    851:   a += sqlite3Fts3GetVarint(a, &nDoc);
                    852:   if( nDoc==0 ) return FTS_CORRUPT_VTAB;
                    853:   *pnDoc = (u32)nDoc;
                    854: 
                    855:   if( paLen ) *paLen = a;
                    856:   return SQLITE_OK;
                    857: }
                    858: 
                    859: /*
                    860: ** An instance of the following structure is used to store state while 
                    861: ** iterating through a multi-column position-list corresponding to the
                    862: ** hits for a single phrase on a single row in order to calculate the
                    863: ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
                    864: */
                    865: typedef struct LcsIterator LcsIterator;
                    866: struct LcsIterator {
                    867:   Fts3Expr *pExpr;                /* Pointer to phrase expression */
                    868:   int iPosOffset;                 /* Tokens count up to end of this phrase */
                    869:   char *pRead;                    /* Cursor used to iterate through aDoclist */
                    870:   int iPos;                       /* Current position */
                    871: };
                    872: 
                    873: /* 
                    874: ** If LcsIterator.iCol is set to the following value, the iterator has
                    875: ** finished iterating through all offsets for all columns.
                    876: */
                    877: #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
                    878: 
                    879: static int fts3MatchinfoLcsCb(
                    880:   Fts3Expr *pExpr,                /* Phrase expression node */
                    881:   int iPhrase,                    /* Phrase number (numbered from zero) */
                    882:   void *pCtx                      /* Pointer to MatchInfo structure */
                    883: ){
                    884:   LcsIterator *aIter = (LcsIterator *)pCtx;
                    885:   aIter[iPhrase].pExpr = pExpr;
                    886:   return SQLITE_OK;
                    887: }
                    888: 
                    889: /*
                    890: ** Advance the iterator passed as an argument to the next position. Return
                    891: ** 1 if the iterator is at EOF or if it now points to the start of the
                    892: ** position list for the next column.
                    893: */
                    894: static int fts3LcsIteratorAdvance(LcsIterator *pIter){
                    895:   char *pRead = pIter->pRead;
                    896:   sqlite3_int64 iRead;
                    897:   int rc = 0;
                    898: 
                    899:   pRead += sqlite3Fts3GetVarint(pRead, &iRead);
                    900:   if( iRead==0 || iRead==1 ){
                    901:     pRead = 0;
                    902:     rc = 1;
                    903:   }else{
                    904:     pIter->iPos += (int)(iRead-2);
                    905:   }
                    906: 
                    907:   pIter->pRead = pRead;
                    908:   return rc;
                    909: }
                    910:   
                    911: /*
                    912: ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. 
                    913: **
                    914: ** If the call is successful, the longest-common-substring lengths for each
                    915: ** column are written into the first nCol elements of the pInfo->aMatchinfo[] 
                    916: ** array before returning. SQLITE_OK is returned in this case.
                    917: **
                    918: ** Otherwise, if an error occurs, an SQLite error code is returned and the
                    919: ** data written to the first nCol elements of pInfo->aMatchinfo[] is 
                    920: ** undefined.
                    921: */
                    922: static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
                    923:   LcsIterator *aIter;
                    924:   int i;
                    925:   int iCol;
                    926:   int nToken = 0;
                    927: 
                    928:   /* Allocate and populate the array of LcsIterator objects. The array
                    929:   ** contains one element for each matchable phrase in the query.
                    930:   **/
                    931:   aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
                    932:   if( !aIter ) return SQLITE_NOMEM;
                    933:   memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
                    934:   (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
                    935: 
                    936:   for(i=0; i<pInfo->nPhrase; i++){
                    937:     LcsIterator *pIter = &aIter[i];
                    938:     nToken -= pIter->pExpr->pPhrase->nToken;
                    939:     pIter->iPosOffset = nToken;
                    940:   }
                    941: 
                    942:   for(iCol=0; iCol<pInfo->nCol; iCol++){
                    943:     int nLcs = 0;                 /* LCS value for this column */
                    944:     int nLive = 0;                /* Number of iterators in aIter not at EOF */
                    945: 
                    946:     for(i=0; i<pInfo->nPhrase; i++){
                    947:       LcsIterator *pIt = &aIter[i];
                    948:       pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol);
                    949:       if( pIt->pRead ){
                    950:         pIt->iPos = pIt->iPosOffset;
                    951:         fts3LcsIteratorAdvance(&aIter[i]);
                    952:         nLive++;
                    953:       }
                    954:     }
                    955: 
                    956:     while( nLive>0 ){
                    957:       LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
                    958:       int nThisLcs = 0;           /* LCS for the current iterator positions */
                    959: 
                    960:       for(i=0; i<pInfo->nPhrase; i++){
                    961:         LcsIterator *pIter = &aIter[i];
                    962:         if( pIter->pRead==0 ){
                    963:           /* This iterator is already at EOF for this column. */
                    964:           nThisLcs = 0;
                    965:         }else{
                    966:           if( pAdv==0 || pIter->iPos<pAdv->iPos ){
                    967:             pAdv = pIter;
                    968:           }
                    969:           if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
                    970:             nThisLcs++;
                    971:           }else{
                    972:             nThisLcs = 1;
                    973:           }
                    974:           if( nThisLcs>nLcs ) nLcs = nThisLcs;
                    975:         }
                    976:       }
                    977:       if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
                    978:     }
                    979: 
                    980:     pInfo->aMatchinfo[iCol] = nLcs;
                    981:   }
                    982: 
                    983:   sqlite3_free(aIter);
                    984:   return SQLITE_OK;
                    985: }
                    986: 
                    987: /*
                    988: ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
                    989: ** be returned by the matchinfo() function. Argument zArg contains the 
                    990: ** format string passed as the second argument to matchinfo (or the
                    991: ** default value "pcx" if no second argument was specified). The format
                    992: ** string has already been validated and the pInfo->aMatchinfo[] array
                    993: ** is guaranteed to be large enough for the output.
                    994: **
                    995: ** If bGlobal is true, then populate all fields of the matchinfo() output.
                    996: ** If it is false, then assume that those fields that do not change between
                    997: ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
                    998: ** have already been populated.
                    999: **
                   1000: ** Return SQLITE_OK if successful, or an SQLite error code if an error 
                   1001: ** occurs. If a value other than SQLITE_OK is returned, the state the
                   1002: ** pInfo->aMatchinfo[] buffer is left in is undefined.
                   1003: */
                   1004: static int fts3MatchinfoValues(
                   1005:   Fts3Cursor *pCsr,               /* FTS3 cursor object */
                   1006:   int bGlobal,                    /* True to grab the global stats */
                   1007:   MatchInfo *pInfo,               /* Matchinfo context object */
                   1008:   const char *zArg                /* Matchinfo format string */
                   1009: ){
                   1010:   int rc = SQLITE_OK;
                   1011:   int i;
                   1012:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                   1013:   sqlite3_stmt *pSelect = 0;
                   1014: 
                   1015:   for(i=0; rc==SQLITE_OK && zArg[i]; i++){
                   1016: 
                   1017:     switch( zArg[i] ){
                   1018:       case FTS3_MATCHINFO_NPHRASE:
                   1019:         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
                   1020:         break;
                   1021: 
                   1022:       case FTS3_MATCHINFO_NCOL:
                   1023:         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
                   1024:         break;
                   1025:         
                   1026:       case FTS3_MATCHINFO_NDOC:
                   1027:         if( bGlobal ){
                   1028:           sqlite3_int64 nDoc = 0;
                   1029:           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
                   1030:           pInfo->aMatchinfo[0] = (u32)nDoc;
                   1031:         }
                   1032:         break;
                   1033: 
                   1034:       case FTS3_MATCHINFO_AVGLENGTH: 
                   1035:         if( bGlobal ){
                   1036:           sqlite3_int64 nDoc;     /* Number of rows in table */
                   1037:           const char *a;          /* Aggregate column length array */
                   1038: 
                   1039:           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
                   1040:           if( rc==SQLITE_OK ){
                   1041:             int iCol;
                   1042:             for(iCol=0; iCol<pInfo->nCol; iCol++){
                   1043:               u32 iVal;
                   1044:               sqlite3_int64 nToken;
                   1045:               a += sqlite3Fts3GetVarint(a, &nToken);
                   1046:               iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
                   1047:               pInfo->aMatchinfo[iCol] = iVal;
                   1048:             }
                   1049:           }
                   1050:         }
                   1051:         break;
                   1052: 
                   1053:       case FTS3_MATCHINFO_LENGTH: {
                   1054:         sqlite3_stmt *pSelectDocsize = 0;
                   1055:         rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
                   1056:         if( rc==SQLITE_OK ){
                   1057:           int iCol;
                   1058:           const char *a = sqlite3_column_blob(pSelectDocsize, 0);
                   1059:           for(iCol=0; iCol<pInfo->nCol; iCol++){
                   1060:             sqlite3_int64 nToken;
                   1061:             a += sqlite3Fts3GetVarint(a, &nToken);
                   1062:             pInfo->aMatchinfo[iCol] = (u32)nToken;
                   1063:           }
                   1064:         }
                   1065:         sqlite3_reset(pSelectDocsize);
                   1066:         break;
                   1067:       }
                   1068: 
                   1069:       case FTS3_MATCHINFO_LCS:
                   1070:         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
                   1071:         if( rc==SQLITE_OK ){
                   1072:           rc = fts3MatchinfoLcs(pCsr, pInfo);
                   1073:         }
                   1074:         break;
                   1075: 
                   1076:       default: {
                   1077:         Fts3Expr *pExpr;
                   1078:         assert( zArg[i]==FTS3_MATCHINFO_HITS );
                   1079:         pExpr = pCsr->pExpr;
                   1080:         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
                   1081:         if( rc!=SQLITE_OK ) break;
                   1082:         if( bGlobal ){
                   1083:           if( pCsr->pDeferred ){
                   1084:             rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
                   1085:             if( rc!=SQLITE_OK ) break;
                   1086:           }
                   1087:           rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
                   1088:           if( rc!=SQLITE_OK ) break;
                   1089:         }
                   1090:         (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
                   1091:         break;
                   1092:       }
                   1093:     }
                   1094: 
                   1095:     pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
                   1096:   }
                   1097: 
                   1098:   sqlite3_reset(pSelect);
                   1099:   return rc;
                   1100: }
                   1101: 
                   1102: 
                   1103: /*
                   1104: ** Populate pCsr->aMatchinfo[] with data for the current row. The 
                   1105: ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
                   1106: */
                   1107: static int fts3GetMatchinfo(
                   1108:   Fts3Cursor *pCsr,               /* FTS3 Cursor object */
                   1109:   const char *zArg                /* Second argument to matchinfo() function */
                   1110: ){
                   1111:   MatchInfo sInfo;
                   1112:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                   1113:   int rc = SQLITE_OK;
                   1114:   int bGlobal = 0;                /* Collect 'global' stats as well as local */
                   1115: 
                   1116:   memset(&sInfo, 0, sizeof(MatchInfo));
                   1117:   sInfo.pCursor = pCsr;
                   1118:   sInfo.nCol = pTab->nColumn;
                   1119: 
                   1120:   /* If there is cached matchinfo() data, but the format string for the 
                   1121:   ** cache does not match the format string for this request, discard 
                   1122:   ** the cached data. */
                   1123:   if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
                   1124:     assert( pCsr->aMatchinfo );
                   1125:     sqlite3_free(pCsr->aMatchinfo);
                   1126:     pCsr->zMatchinfo = 0;
                   1127:     pCsr->aMatchinfo = 0;
                   1128:   }
                   1129: 
                   1130:   /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
                   1131:   ** matchinfo function has been called for this query. In this case 
                   1132:   ** allocate the array used to accumulate the matchinfo data and
                   1133:   ** initialize those elements that are constant for every row.
                   1134:   */
                   1135:   if( pCsr->aMatchinfo==0 ){
                   1136:     int nMatchinfo = 0;           /* Number of u32 elements in match-info */
                   1137:     int nArg;                     /* Bytes in zArg */
                   1138:     int i;                        /* Used to iterate through zArg */
                   1139: 
                   1140:     /* Determine the number of phrases in the query */
                   1141:     pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
                   1142:     sInfo.nPhrase = pCsr->nPhrase;
                   1143: 
                   1144:     /* Determine the number of integers in the buffer returned by this call. */
                   1145:     for(i=0; zArg[i]; i++){
                   1146:       nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
                   1147:     }
                   1148: 
                   1149:     /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
                   1150:     nArg = (int)strlen(zArg);
                   1151:     pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
                   1152:     if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
                   1153: 
                   1154:     pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
                   1155:     pCsr->nMatchinfo = nMatchinfo;
                   1156:     memcpy(pCsr->zMatchinfo, zArg, nArg+1);
                   1157:     memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
                   1158:     pCsr->isMatchinfoNeeded = 1;
                   1159:     bGlobal = 1;
                   1160:   }
                   1161: 
                   1162:   sInfo.aMatchinfo = pCsr->aMatchinfo;
                   1163:   sInfo.nPhrase = pCsr->nPhrase;
                   1164:   if( pCsr->isMatchinfoNeeded ){
                   1165:     rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
                   1166:     pCsr->isMatchinfoNeeded = 0;
                   1167:   }
                   1168: 
                   1169:   return rc;
                   1170: }
                   1171: 
                   1172: /*
                   1173: ** Implementation of snippet() function.
                   1174: */
                   1175: void sqlite3Fts3Snippet(
                   1176:   sqlite3_context *pCtx,          /* SQLite function call context */
                   1177:   Fts3Cursor *pCsr,               /* Cursor object */
                   1178:   const char *zStart,             /* Snippet start text - "<b>" */
                   1179:   const char *zEnd,               /* Snippet end text - "</b>" */
                   1180:   const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
                   1181:   int iCol,                       /* Extract snippet from this column */
                   1182:   int nToken                      /* Approximate number of tokens in snippet */
                   1183: ){
                   1184:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                   1185:   int rc = SQLITE_OK;
                   1186:   int i;
                   1187:   StrBuffer res = {0, 0, 0};
                   1188: 
                   1189:   /* The returned text includes up to four fragments of text extracted from
                   1190:   ** the data in the current row. The first iteration of the for(...) loop
                   1191:   ** below attempts to locate a single fragment of text nToken tokens in 
                   1192:   ** size that contains at least one instance of all phrases in the query
                   1193:   ** expression that appear in the current row. If such a fragment of text
                   1194:   ** cannot be found, the second iteration of the loop attempts to locate
                   1195:   ** a pair of fragments, and so on.
                   1196:   */
                   1197:   int nSnippet = 0;               /* Number of fragments in this snippet */
                   1198:   SnippetFragment aSnippet[4];    /* Maximum of 4 fragments per snippet */
                   1199:   int nFToken = -1;               /* Number of tokens in each fragment */
                   1200: 
                   1201:   if( !pCsr->pExpr ){
                   1202:     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
                   1203:     return;
                   1204:   }
                   1205: 
                   1206:   for(nSnippet=1; 1; nSnippet++){
                   1207: 
                   1208:     int iSnip;                    /* Loop counter 0..nSnippet-1 */
                   1209:     u64 mCovered = 0;             /* Bitmask of phrases covered by snippet */
                   1210:     u64 mSeen = 0;                /* Bitmask of phrases seen by BestSnippet() */
                   1211: 
                   1212:     if( nToken>=0 ){
                   1213:       nFToken = (nToken+nSnippet-1) / nSnippet;
                   1214:     }else{
                   1215:       nFToken = -1 * nToken;
                   1216:     }
                   1217: 
                   1218:     for(iSnip=0; iSnip<nSnippet; iSnip++){
                   1219:       int iBestScore = -1;        /* Best score of columns checked so far */
                   1220:       int iRead;                  /* Used to iterate through columns */
                   1221:       SnippetFragment *pFragment = &aSnippet[iSnip];
                   1222: 
                   1223:       memset(pFragment, 0, sizeof(*pFragment));
                   1224: 
                   1225:       /* Loop through all columns of the table being considered for snippets.
                   1226:       ** If the iCol argument to this function was negative, this means all
                   1227:       ** columns of the FTS3 table. Otherwise, only column iCol is considered.
                   1228:       */
                   1229:       for(iRead=0; iRead<pTab->nColumn; iRead++){
                   1230:         SnippetFragment sF = {0, 0, 0, 0};
                   1231:         int iS;
                   1232:         if( iCol>=0 && iRead!=iCol ) continue;
                   1233: 
                   1234:         /* Find the best snippet of nFToken tokens in column iRead. */
                   1235:         rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
                   1236:         if( rc!=SQLITE_OK ){
                   1237:           goto snippet_out;
                   1238:         }
                   1239:         if( iS>iBestScore ){
                   1240:           *pFragment = sF;
                   1241:           iBestScore = iS;
                   1242:         }
                   1243:       }
                   1244: 
                   1245:       mCovered |= pFragment->covered;
                   1246:     }
                   1247: 
                   1248:     /* If all query phrases seen by fts3BestSnippet() are present in at least
                   1249:     ** one of the nSnippet snippet fragments, break out of the loop.
                   1250:     */
                   1251:     assert( (mCovered&mSeen)==mCovered );
                   1252:     if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
                   1253:   }
                   1254: 
                   1255:   assert( nFToken>0 );
                   1256: 
                   1257:   for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
                   1258:     rc = fts3SnippetText(pCsr, &aSnippet[i], 
                   1259:         i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
                   1260:     );
                   1261:   }
                   1262: 
                   1263:  snippet_out:
                   1264:   sqlite3Fts3SegmentsClose(pTab);
                   1265:   if( rc!=SQLITE_OK ){
                   1266:     sqlite3_result_error_code(pCtx, rc);
                   1267:     sqlite3_free(res.z);
                   1268:   }else{
                   1269:     sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
                   1270:   }
                   1271: }
                   1272: 
                   1273: 
                   1274: typedef struct TermOffset TermOffset;
                   1275: typedef struct TermOffsetCtx TermOffsetCtx;
                   1276: 
                   1277: struct TermOffset {
                   1278:   char *pList;                    /* Position-list */
                   1279:   int iPos;                       /* Position just read from pList */
                   1280:   int iOff;                       /* Offset of this term from read positions */
                   1281: };
                   1282: 
                   1283: struct TermOffsetCtx {
                   1284:   Fts3Cursor *pCsr;
                   1285:   int iCol;                       /* Column of table to populate aTerm for */
                   1286:   int iTerm;
                   1287:   sqlite3_int64 iDocid;
                   1288:   TermOffset *aTerm;
                   1289: };
                   1290: 
                   1291: /*
                   1292: ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
                   1293: */
                   1294: static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
                   1295:   TermOffsetCtx *p = (TermOffsetCtx *)ctx;
                   1296:   int nTerm;                      /* Number of tokens in phrase */
                   1297:   int iTerm;                      /* For looping through nTerm phrase terms */
                   1298:   char *pList;                    /* Pointer to position list for phrase */
                   1299:   int iPos = 0;                   /* First position in position-list */
                   1300: 
                   1301:   UNUSED_PARAMETER(iPhrase);
                   1302:   pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
                   1303:   nTerm = pExpr->pPhrase->nToken;
                   1304:   if( pList ){
                   1305:     fts3GetDeltaPosition(&pList, &iPos);
                   1306:     assert( iPos>=0 );
                   1307:   }
                   1308: 
                   1309:   for(iTerm=0; iTerm<nTerm; iTerm++){
                   1310:     TermOffset *pT = &p->aTerm[p->iTerm++];
                   1311:     pT->iOff = nTerm-iTerm-1;
                   1312:     pT->pList = pList;
                   1313:     pT->iPos = iPos;
                   1314:   }
                   1315: 
                   1316:   return SQLITE_OK;
                   1317: }
                   1318: 
                   1319: /*
                   1320: ** Implementation of offsets() function.
                   1321: */
                   1322: void sqlite3Fts3Offsets(
                   1323:   sqlite3_context *pCtx,          /* SQLite function call context */
                   1324:   Fts3Cursor *pCsr                /* Cursor object */
                   1325: ){
                   1326:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                   1327:   sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
                   1328:   const char *ZDUMMY;             /* Dummy argument used with xNext() */
                   1329:   int NDUMMY;                     /* Dummy argument used with xNext() */
                   1330:   int rc;                         /* Return Code */
                   1331:   int nToken;                     /* Number of tokens in query */
                   1332:   int iCol;                       /* Column currently being processed */
                   1333:   StrBuffer res = {0, 0, 0};      /* Result string */
                   1334:   TermOffsetCtx sCtx;             /* Context for fts3ExprTermOffsetInit() */
                   1335: 
                   1336:   if( !pCsr->pExpr ){
                   1337:     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
                   1338:     return;
                   1339:   }
                   1340: 
                   1341:   memset(&sCtx, 0, sizeof(sCtx));
                   1342:   assert( pCsr->isRequireSeek==0 );
                   1343: 
                   1344:   /* Count the number of terms in the query */
                   1345:   rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
                   1346:   if( rc!=SQLITE_OK ) goto offsets_out;
                   1347: 
                   1348:   /* Allocate the array of TermOffset iterators. */
                   1349:   sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
                   1350:   if( 0==sCtx.aTerm ){
                   1351:     rc = SQLITE_NOMEM;
                   1352:     goto offsets_out;
                   1353:   }
                   1354:   sCtx.iDocid = pCsr->iPrevId;
                   1355:   sCtx.pCsr = pCsr;
                   1356: 
                   1357:   /* Loop through the table columns, appending offset information to 
                   1358:   ** string-buffer res for each column.
                   1359:   */
                   1360:   for(iCol=0; iCol<pTab->nColumn; iCol++){
                   1361:     sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
                   1362:     int iStart;
                   1363:     int iEnd;
                   1364:     int iCurrent;
                   1365:     const char *zDoc;
                   1366:     int nDoc;
                   1367: 
                   1368:     /* Initialize the contents of sCtx.aTerm[] for column iCol. There is 
                   1369:     ** no way that this operation can fail, so the return code from
                   1370:     ** fts3ExprIterate() can be discarded.
                   1371:     */
                   1372:     sCtx.iCol = iCol;
                   1373:     sCtx.iTerm = 0;
                   1374:     (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
                   1375: 
                   1376:     /* Retreive the text stored in column iCol. If an SQL NULL is stored 
                   1377:     ** in column iCol, jump immediately to the next iteration of the loop.
                   1378:     ** If an OOM occurs while retrieving the data (this can happen if SQLite
                   1379:     ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM 
                   1380:     ** to the caller. 
                   1381:     */
                   1382:     zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
                   1383:     nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
                   1384:     if( zDoc==0 ){
                   1385:       if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
                   1386:         continue;
                   1387:       }
                   1388:       rc = SQLITE_NOMEM;
                   1389:       goto offsets_out;
                   1390:     }
                   1391: 
                   1392:     /* Initialize a tokenizer iterator to iterate through column iCol. */
                   1393:     rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
                   1394:     if( rc!=SQLITE_OK ) goto offsets_out;
                   1395:     pC->pTokenizer = pTab->pTokenizer;
                   1396: 
                   1397:     rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
                   1398:     while( rc==SQLITE_OK ){
                   1399:       int i;                      /* Used to loop through terms */
                   1400:       int iMinPos = 0x7FFFFFFF;   /* Position of next token */
                   1401:       TermOffset *pTerm = 0;      /* TermOffset associated with next token */
                   1402: 
                   1403:       for(i=0; i<nToken; i++){
                   1404:         TermOffset *pT = &sCtx.aTerm[i];
                   1405:         if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
                   1406:           iMinPos = pT->iPos-pT->iOff;
                   1407:           pTerm = pT;
                   1408:         }
                   1409:       }
                   1410: 
                   1411:       if( !pTerm ){
                   1412:         /* All offsets for this column have been gathered. */
                   1413:         rc = SQLITE_DONE;
                   1414:       }else{
                   1415:         assert( iCurrent<=iMinPos );
                   1416:         if( 0==(0xFE&*pTerm->pList) ){
                   1417:           pTerm->pList = 0;
                   1418:         }else{
                   1419:           fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
                   1420:         }
                   1421:         while( rc==SQLITE_OK && iCurrent<iMinPos ){
                   1422:           rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
                   1423:         }
                   1424:         if( rc==SQLITE_OK ){
                   1425:           char aBuffer[64];
                   1426:           sqlite3_snprintf(sizeof(aBuffer), aBuffer, 
                   1427:               "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
                   1428:           );
                   1429:           rc = fts3StringAppend(&res, aBuffer, -1);
                   1430:         }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
                   1431:           rc = FTS_CORRUPT_VTAB;
                   1432:         }
                   1433:       }
                   1434:     }
                   1435:     if( rc==SQLITE_DONE ){
                   1436:       rc = SQLITE_OK;
                   1437:     }
                   1438: 
                   1439:     pMod->xClose(pC);
                   1440:     if( rc!=SQLITE_OK ) goto offsets_out;
                   1441:   }
                   1442: 
                   1443:  offsets_out:
                   1444:   sqlite3_free(sCtx.aTerm);
                   1445:   assert( rc!=SQLITE_DONE );
                   1446:   sqlite3Fts3SegmentsClose(pTab);
                   1447:   if( rc!=SQLITE_OK ){
                   1448:     sqlite3_result_error_code(pCtx,  rc);
                   1449:     sqlite3_free(res.z);
                   1450:   }else{
                   1451:     sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
                   1452:   }
                   1453:   return;
                   1454: }
                   1455: 
                   1456: /*
                   1457: ** Implementation of matchinfo() function.
                   1458: */
                   1459: void sqlite3Fts3Matchinfo(
                   1460:   sqlite3_context *pContext,      /* Function call context */
                   1461:   Fts3Cursor *pCsr,               /* FTS3 table cursor */
                   1462:   const char *zArg                /* Second arg to matchinfo() function */
                   1463: ){
                   1464:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
                   1465:   int rc;
                   1466:   int i;
                   1467:   const char *zFormat;
                   1468: 
                   1469:   if( zArg ){
                   1470:     for(i=0; zArg[i]; i++){
                   1471:       char *zErr = 0;
                   1472:       if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
                   1473:         sqlite3_result_error(pContext, zErr, -1);
                   1474:         sqlite3_free(zErr);
                   1475:         return;
                   1476:       }
                   1477:     }
                   1478:     zFormat = zArg;
                   1479:   }else{
                   1480:     zFormat = FTS3_MATCHINFO_DEFAULT;
                   1481:   }
                   1482: 
                   1483:   if( !pCsr->pExpr ){
                   1484:     sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
                   1485:     return;
                   1486:   }
                   1487: 
                   1488:   /* Retrieve matchinfo() data. */
                   1489:   rc = fts3GetMatchinfo(pCsr, zFormat);
                   1490:   sqlite3Fts3SegmentsClose(pTab);
                   1491: 
                   1492:   if( rc!=SQLITE_OK ){
                   1493:     sqlite3_result_error_code(pContext, rc);
                   1494:   }else{
                   1495:     int n = pCsr->nMatchinfo * sizeof(u32);
                   1496:     sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
                   1497:   }
                   1498: }
                   1499: 
                   1500: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>