Annotation of embedaddon/sqlite3/ext/fts3/fts3_snippet.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2009 Oct 23
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: ******************************************************************************
! 12: */
! 13:
! 14: #include "fts3Int.h"
! 15: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
! 16:
! 17: #include <string.h>
! 18: #include <assert.h>
! 19:
! 20: /*
! 21: ** Characters that may appear in the second argument to matchinfo().
! 22: */
! 23: #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
! 24: #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
! 25: #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
! 26: #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
! 27: #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
! 28: #define FTS3_MATCHINFO_LCS 's' /* nCol values */
! 29: #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
! 30:
! 31: /*
! 32: ** The default value for the second argument to matchinfo().
! 33: */
! 34: #define FTS3_MATCHINFO_DEFAULT "pcx"
! 35:
! 36:
! 37: /*
! 38: ** Used as an fts3ExprIterate() context when loading phrase doclists to
! 39: ** Fts3Expr.aDoclist[]/nDoclist.
! 40: */
! 41: typedef struct LoadDoclistCtx LoadDoclistCtx;
! 42: struct LoadDoclistCtx {
! 43: Fts3Cursor *pCsr; /* FTS3 Cursor */
! 44: int nPhrase; /* Number of phrases seen so far */
! 45: int nToken; /* Number of tokens seen so far */
! 46: };
! 47:
! 48: /*
! 49: ** The following types are used as part of the implementation of the
! 50: ** fts3BestSnippet() routine.
! 51: */
! 52: typedef struct SnippetIter SnippetIter;
! 53: typedef struct SnippetPhrase SnippetPhrase;
! 54: typedef struct SnippetFragment SnippetFragment;
! 55:
! 56: struct SnippetIter {
! 57: Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
! 58: int iCol; /* Extract snippet from this column */
! 59: int nSnippet; /* Requested snippet length (in tokens) */
! 60: int nPhrase; /* Number of phrases in query */
! 61: SnippetPhrase *aPhrase; /* Array of size nPhrase */
! 62: int iCurrent; /* First token of current snippet */
! 63: };
! 64:
! 65: struct SnippetPhrase {
! 66: int nToken; /* Number of tokens in phrase */
! 67: char *pList; /* Pointer to start of phrase position list */
! 68: int iHead; /* Next value in position list */
! 69: char *pHead; /* Position list data following iHead */
! 70: int iTail; /* Next value in trailing position list */
! 71: char *pTail; /* Position list data following iTail */
! 72: };
! 73:
! 74: struct SnippetFragment {
! 75: int iCol; /* Column snippet is extracted from */
! 76: int iPos; /* Index of first token in snippet */
! 77: u64 covered; /* Mask of query phrases covered */
! 78: u64 hlmask; /* Mask of snippet terms to highlight */
! 79: };
! 80:
! 81: /*
! 82: ** This type is used as an fts3ExprIterate() context object while
! 83: ** accumulating the data returned by the matchinfo() function.
! 84: */
! 85: typedef struct MatchInfo MatchInfo;
! 86: struct MatchInfo {
! 87: Fts3Cursor *pCursor; /* FTS3 Cursor */
! 88: int nCol; /* Number of columns in table */
! 89: int nPhrase; /* Number of matchable phrases in query */
! 90: sqlite3_int64 nDoc; /* Number of docs in database */
! 91: u32 *aMatchinfo; /* Pre-allocated buffer */
! 92: };
! 93:
! 94:
! 95:
! 96: /*
! 97: ** The snippet() and offsets() functions both return text values. An instance
! 98: ** of the following structure is used to accumulate those values while the
! 99: ** functions are running. See fts3StringAppend() for details.
! 100: */
! 101: typedef struct StrBuffer StrBuffer;
! 102: struct StrBuffer {
! 103: char *z; /* Pointer to buffer containing string */
! 104: int n; /* Length of z in bytes (excl. nul-term) */
! 105: int nAlloc; /* Allocated size of buffer z in bytes */
! 106: };
! 107:
! 108:
! 109: /*
! 110: ** This function is used to help iterate through a position-list. A position
! 111: ** list is a list of unique integers, sorted from smallest to largest. Each
! 112: ** element of the list is represented by an FTS3 varint that takes the value
! 113: ** of the difference between the current element and the previous one plus
! 114: ** two. For example, to store the position-list:
! 115: **
! 116: ** 4 9 113
! 117: **
! 118: ** the three varints:
! 119: **
! 120: ** 6 7 106
! 121: **
! 122: ** are encoded.
! 123: **
! 124: ** When this function is called, *pp points to the start of an element of
! 125: ** the list. *piPos contains the value of the previous entry in the list.
! 126: ** After it returns, *piPos contains the value of the next element of the
! 127: ** list and *pp is advanced to the following varint.
! 128: */
! 129: static void fts3GetDeltaPosition(char **pp, int *piPos){
! 130: int iVal;
! 131: *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
! 132: *piPos += (iVal-2);
! 133: }
! 134:
! 135: /*
! 136: ** Helper function for fts3ExprIterate() (see below).
! 137: */
! 138: static int fts3ExprIterate2(
! 139: Fts3Expr *pExpr, /* Expression to iterate phrases of */
! 140: int *piPhrase, /* Pointer to phrase counter */
! 141: int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
! 142: void *pCtx /* Second argument to pass to callback */
! 143: ){
! 144: int rc; /* Return code */
! 145: int eType = pExpr->eType; /* Type of expression node pExpr */
! 146:
! 147: if( eType!=FTSQUERY_PHRASE ){
! 148: assert( pExpr->pLeft && pExpr->pRight );
! 149: rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
! 150: if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
! 151: rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
! 152: }
! 153: }else{
! 154: rc = x(pExpr, *piPhrase, pCtx);
! 155: (*piPhrase)++;
! 156: }
! 157: return rc;
! 158: }
! 159:
! 160: /*
! 161: ** Iterate through all phrase nodes in an FTS3 query, except those that
! 162: ** are part of a sub-tree that is the right-hand-side of a NOT operator.
! 163: ** For each phrase node found, the supplied callback function is invoked.
! 164: **
! 165: ** If the callback function returns anything other than SQLITE_OK,
! 166: ** the iteration is abandoned and the error code returned immediately.
! 167: ** Otherwise, SQLITE_OK is returned after a callback has been made for
! 168: ** all eligible phrase nodes.
! 169: */
! 170: static int fts3ExprIterate(
! 171: Fts3Expr *pExpr, /* Expression to iterate phrases of */
! 172: int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
! 173: void *pCtx /* Second argument to pass to callback */
! 174: ){
! 175: int iPhrase = 0; /* Variable used as the phrase counter */
! 176: return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
! 177: }
! 178:
! 179: /*
! 180: ** This is an fts3ExprIterate() callback used while loading the doclists
! 181: ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
! 182: ** fts3ExprLoadDoclists().
! 183: */
! 184: static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
! 185: int rc = SQLITE_OK;
! 186: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 187: LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
! 188:
! 189: UNUSED_PARAMETER(iPhrase);
! 190:
! 191: p->nPhrase++;
! 192: p->nToken += pPhrase->nToken;
! 193:
! 194: return rc;
! 195: }
! 196:
! 197: /*
! 198: ** Load the doclists for each phrase in the query associated with FTS3 cursor
! 199: ** pCsr.
! 200: **
! 201: ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
! 202: ** phrases in the expression (all phrases except those directly or
! 203: ** indirectly descended from the right-hand-side of a NOT operator). If
! 204: ** pnToken is not NULL, then it is set to the number of tokens in all
! 205: ** matchable phrases of the expression.
! 206: */
! 207: static int fts3ExprLoadDoclists(
! 208: Fts3Cursor *pCsr, /* Fts3 cursor for current query */
! 209: int *pnPhrase, /* OUT: Number of phrases in query */
! 210: int *pnToken /* OUT: Number of tokens in query */
! 211: ){
! 212: int rc; /* Return Code */
! 213: LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
! 214: sCtx.pCsr = pCsr;
! 215: rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
! 216: if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
! 217: if( pnToken ) *pnToken = sCtx.nToken;
! 218: return rc;
! 219: }
! 220:
! 221: static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
! 222: (*(int *)ctx)++;
! 223: UNUSED_PARAMETER(pExpr);
! 224: UNUSED_PARAMETER(iPhrase);
! 225: return SQLITE_OK;
! 226: }
! 227: static int fts3ExprPhraseCount(Fts3Expr *pExpr){
! 228: int nPhrase = 0;
! 229: (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
! 230: return nPhrase;
! 231: }
! 232:
! 233: /*
! 234: ** Advance the position list iterator specified by the first two
! 235: ** arguments so that it points to the first element with a value greater
! 236: ** than or equal to parameter iNext.
! 237: */
! 238: static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
! 239: char *pIter = *ppIter;
! 240: if( pIter ){
! 241: int iIter = *piIter;
! 242:
! 243: while( iIter<iNext ){
! 244: if( 0==(*pIter & 0xFE) ){
! 245: iIter = -1;
! 246: pIter = 0;
! 247: break;
! 248: }
! 249: fts3GetDeltaPosition(&pIter, &iIter);
! 250: }
! 251:
! 252: *piIter = iIter;
! 253: *ppIter = pIter;
! 254: }
! 255: }
! 256:
! 257: /*
! 258: ** Advance the snippet iterator to the next candidate snippet.
! 259: */
! 260: static int fts3SnippetNextCandidate(SnippetIter *pIter){
! 261: int i; /* Loop counter */
! 262:
! 263: if( pIter->iCurrent<0 ){
! 264: /* The SnippetIter object has just been initialized. The first snippet
! 265: ** candidate always starts at offset 0 (even if this candidate has a
! 266: ** score of 0.0).
! 267: */
! 268: pIter->iCurrent = 0;
! 269:
! 270: /* Advance the 'head' iterator of each phrase to the first offset that
! 271: ** is greater than or equal to (iNext+nSnippet).
! 272: */
! 273: for(i=0; i<pIter->nPhrase; i++){
! 274: SnippetPhrase *pPhrase = &pIter->aPhrase[i];
! 275: fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
! 276: }
! 277: }else{
! 278: int iStart;
! 279: int iEnd = 0x7FFFFFFF;
! 280:
! 281: for(i=0; i<pIter->nPhrase; i++){
! 282: SnippetPhrase *pPhrase = &pIter->aPhrase[i];
! 283: if( pPhrase->pHead && pPhrase->iHead<iEnd ){
! 284: iEnd = pPhrase->iHead;
! 285: }
! 286: }
! 287: if( iEnd==0x7FFFFFFF ){
! 288: return 1;
! 289: }
! 290:
! 291: pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
! 292: for(i=0; i<pIter->nPhrase; i++){
! 293: SnippetPhrase *pPhrase = &pIter->aPhrase[i];
! 294: fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
! 295: fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
! 296: }
! 297: }
! 298:
! 299: return 0;
! 300: }
! 301:
! 302: /*
! 303: ** Retrieve information about the current candidate snippet of snippet
! 304: ** iterator pIter.
! 305: */
! 306: static void fts3SnippetDetails(
! 307: SnippetIter *pIter, /* Snippet iterator */
! 308: u64 mCovered, /* Bitmask of phrases already covered */
! 309: int *piToken, /* OUT: First token of proposed snippet */
! 310: int *piScore, /* OUT: "Score" for this snippet */
! 311: u64 *pmCover, /* OUT: Bitmask of phrases covered */
! 312: u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
! 313: ){
! 314: int iStart = pIter->iCurrent; /* First token of snippet */
! 315: int iScore = 0; /* Score of this snippet */
! 316: int i; /* Loop counter */
! 317: u64 mCover = 0; /* Mask of phrases covered by this snippet */
! 318: u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
! 319:
! 320: for(i=0; i<pIter->nPhrase; i++){
! 321: SnippetPhrase *pPhrase = &pIter->aPhrase[i];
! 322: if( pPhrase->pTail ){
! 323: char *pCsr = pPhrase->pTail;
! 324: int iCsr = pPhrase->iTail;
! 325:
! 326: while( iCsr<(iStart+pIter->nSnippet) ){
! 327: int j;
! 328: u64 mPhrase = (u64)1 << i;
! 329: u64 mPos = (u64)1 << (iCsr - iStart);
! 330: assert( iCsr>=iStart );
! 331: if( (mCover|mCovered)&mPhrase ){
! 332: iScore++;
! 333: }else{
! 334: iScore += 1000;
! 335: }
! 336: mCover |= mPhrase;
! 337:
! 338: for(j=0; j<pPhrase->nToken; j++){
! 339: mHighlight |= (mPos>>j);
! 340: }
! 341:
! 342: if( 0==(*pCsr & 0x0FE) ) break;
! 343: fts3GetDeltaPosition(&pCsr, &iCsr);
! 344: }
! 345: }
! 346: }
! 347:
! 348: /* Set the output variables before returning. */
! 349: *piToken = iStart;
! 350: *piScore = iScore;
! 351: *pmCover = mCover;
! 352: *pmHighlight = mHighlight;
! 353: }
! 354:
! 355: /*
! 356: ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
! 357: ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
! 358: */
! 359: static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
! 360: SnippetIter *p = (SnippetIter *)ctx;
! 361: SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
! 362: char *pCsr;
! 363:
! 364: pPhrase->nToken = pExpr->pPhrase->nToken;
! 365:
! 366: pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
! 367: if( pCsr ){
! 368: int iFirst = 0;
! 369: pPhrase->pList = pCsr;
! 370: fts3GetDeltaPosition(&pCsr, &iFirst);
! 371: assert( iFirst>=0 );
! 372: pPhrase->pHead = pCsr;
! 373: pPhrase->pTail = pCsr;
! 374: pPhrase->iHead = iFirst;
! 375: pPhrase->iTail = iFirst;
! 376: }else{
! 377: assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 );
! 378: }
! 379:
! 380: return SQLITE_OK;
! 381: }
! 382:
! 383: /*
! 384: ** Select the fragment of text consisting of nFragment contiguous tokens
! 385: ** from column iCol that represent the "best" snippet. The best snippet
! 386: ** is the snippet with the highest score, where scores are calculated
! 387: ** by adding:
! 388: **
! 389: ** (a) +1 point for each occurence of a matchable phrase in the snippet.
! 390: **
! 391: ** (b) +1000 points for the first occurence of each matchable phrase in
! 392: ** the snippet for which the corresponding mCovered bit is not set.
! 393: **
! 394: ** The selected snippet parameters are stored in structure *pFragment before
! 395: ** returning. The score of the selected snippet is stored in *piScore
! 396: ** before returning.
! 397: */
! 398: static int fts3BestSnippet(
! 399: int nSnippet, /* Desired snippet length */
! 400: Fts3Cursor *pCsr, /* Cursor to create snippet for */
! 401: int iCol, /* Index of column to create snippet from */
! 402: u64 mCovered, /* Mask of phrases already covered */
! 403: u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
! 404: SnippetFragment *pFragment, /* OUT: Best snippet found */
! 405: int *piScore /* OUT: Score of snippet pFragment */
! 406: ){
! 407: int rc; /* Return Code */
! 408: int nList; /* Number of phrases in expression */
! 409: SnippetIter sIter; /* Iterates through snippet candidates */
! 410: int nByte; /* Number of bytes of space to allocate */
! 411: int iBestScore = -1; /* Best snippet score found so far */
! 412: int i; /* Loop counter */
! 413:
! 414: memset(&sIter, 0, sizeof(sIter));
! 415:
! 416: /* Iterate through the phrases in the expression to count them. The same
! 417: ** callback makes sure the doclists are loaded for each phrase.
! 418: */
! 419: rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
! 420: if( rc!=SQLITE_OK ){
! 421: return rc;
! 422: }
! 423:
! 424: /* Now that it is known how many phrases there are, allocate and zero
! 425: ** the required space using malloc().
! 426: */
! 427: nByte = sizeof(SnippetPhrase) * nList;
! 428: sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
! 429: if( !sIter.aPhrase ){
! 430: return SQLITE_NOMEM;
! 431: }
! 432: memset(sIter.aPhrase, 0, nByte);
! 433:
! 434: /* Initialize the contents of the SnippetIter object. Then iterate through
! 435: ** the set of phrases in the expression to populate the aPhrase[] array.
! 436: */
! 437: sIter.pCsr = pCsr;
! 438: sIter.iCol = iCol;
! 439: sIter.nSnippet = nSnippet;
! 440: sIter.nPhrase = nList;
! 441: sIter.iCurrent = -1;
! 442: (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
! 443:
! 444: /* Set the *pmSeen output variable. */
! 445: for(i=0; i<nList; i++){
! 446: if( sIter.aPhrase[i].pHead ){
! 447: *pmSeen |= (u64)1 << i;
! 448: }
! 449: }
! 450:
! 451: /* Loop through all candidate snippets. Store the best snippet in
! 452: ** *pFragment. Store its associated 'score' in iBestScore.
! 453: */
! 454: pFragment->iCol = iCol;
! 455: while( !fts3SnippetNextCandidate(&sIter) ){
! 456: int iPos;
! 457: int iScore;
! 458: u64 mCover;
! 459: u64 mHighlight;
! 460: fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
! 461: assert( iScore>=0 );
! 462: if( iScore>iBestScore ){
! 463: pFragment->iPos = iPos;
! 464: pFragment->hlmask = mHighlight;
! 465: pFragment->covered = mCover;
! 466: iBestScore = iScore;
! 467: }
! 468: }
! 469:
! 470: sqlite3_free(sIter.aPhrase);
! 471: *piScore = iBestScore;
! 472: return SQLITE_OK;
! 473: }
! 474:
! 475:
! 476: /*
! 477: ** Append a string to the string-buffer passed as the first argument.
! 478: **
! 479: ** If nAppend is negative, then the length of the string zAppend is
! 480: ** determined using strlen().
! 481: */
! 482: static int fts3StringAppend(
! 483: StrBuffer *pStr, /* Buffer to append to */
! 484: const char *zAppend, /* Pointer to data to append to buffer */
! 485: int nAppend /* Size of zAppend in bytes (or -1) */
! 486: ){
! 487: if( nAppend<0 ){
! 488: nAppend = (int)strlen(zAppend);
! 489: }
! 490:
! 491: /* If there is insufficient space allocated at StrBuffer.z, use realloc()
! 492: ** to grow the buffer until so that it is big enough to accomadate the
! 493: ** appended data.
! 494: */
! 495: if( pStr->n+nAppend+1>=pStr->nAlloc ){
! 496: int nAlloc = pStr->nAlloc+nAppend+100;
! 497: char *zNew = sqlite3_realloc(pStr->z, nAlloc);
! 498: if( !zNew ){
! 499: return SQLITE_NOMEM;
! 500: }
! 501: pStr->z = zNew;
! 502: pStr->nAlloc = nAlloc;
! 503: }
! 504:
! 505: /* Append the data to the string buffer. */
! 506: memcpy(&pStr->z[pStr->n], zAppend, nAppend);
! 507: pStr->n += nAppend;
! 508: pStr->z[pStr->n] = '\0';
! 509:
! 510: return SQLITE_OK;
! 511: }
! 512:
! 513: /*
! 514: ** The fts3BestSnippet() function often selects snippets that end with a
! 515: ** query term. That is, the final term of the snippet is always a term
! 516: ** that requires highlighting. For example, if 'X' is a highlighted term
! 517: ** and '.' is a non-highlighted term, BestSnippet() may select:
! 518: **
! 519: ** ........X.....X
! 520: **
! 521: ** This function "shifts" the beginning of the snippet forward in the
! 522: ** document so that there are approximately the same number of
! 523: ** non-highlighted terms to the right of the final highlighted term as there
! 524: ** are to the left of the first highlighted term. For example, to this:
! 525: **
! 526: ** ....X.....X....
! 527: **
! 528: ** This is done as part of extracting the snippet text, not when selecting
! 529: ** the snippet. Snippet selection is done based on doclists only, so there
! 530: ** is no way for fts3BestSnippet() to know whether or not the document
! 531: ** actually contains terms that follow the final highlighted term.
! 532: */
! 533: static int fts3SnippetShift(
! 534: Fts3Table *pTab, /* FTS3 table snippet comes from */
! 535: int nSnippet, /* Number of tokens desired for snippet */
! 536: const char *zDoc, /* Document text to extract snippet from */
! 537: int nDoc, /* Size of buffer zDoc in bytes */
! 538: int *piPos, /* IN/OUT: First token of snippet */
! 539: u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
! 540: ){
! 541: u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
! 542:
! 543: if( hlmask ){
! 544: int nLeft; /* Tokens to the left of first highlight */
! 545: int nRight; /* Tokens to the right of last highlight */
! 546: int nDesired; /* Ideal number of tokens to shift forward */
! 547:
! 548: for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
! 549: for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
! 550: nDesired = (nLeft-nRight)/2;
! 551:
! 552: /* Ideally, the start of the snippet should be pushed forward in the
! 553: ** document nDesired tokens. This block checks if there are actually
! 554: ** nDesired tokens to the right of the snippet. If so, *piPos and
! 555: ** *pHlMask are updated to shift the snippet nDesired tokens to the
! 556: ** right. Otherwise, the snippet is shifted by the number of tokens
! 557: ** available.
! 558: */
! 559: if( nDesired>0 ){
! 560: int nShift; /* Number of tokens to shift snippet by */
! 561: int iCurrent = 0; /* Token counter */
! 562: int rc; /* Return Code */
! 563: sqlite3_tokenizer_module *pMod;
! 564: sqlite3_tokenizer_cursor *pC;
! 565: pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
! 566:
! 567: /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
! 568: ** or more tokens in zDoc/nDoc.
! 569: */
! 570: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
! 571: if( rc!=SQLITE_OK ){
! 572: return rc;
! 573: }
! 574: pC->pTokenizer = pTab->pTokenizer;
! 575: while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
! 576: const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
! 577: rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
! 578: }
! 579: pMod->xClose(pC);
! 580: if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
! 581:
! 582: nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
! 583: assert( nShift<=nDesired );
! 584: if( nShift>0 ){
! 585: *piPos += nShift;
! 586: *pHlmask = hlmask >> nShift;
! 587: }
! 588: }
! 589: }
! 590: return SQLITE_OK;
! 591: }
! 592:
! 593: /*
! 594: ** Extract the snippet text for fragment pFragment from cursor pCsr and
! 595: ** append it to string buffer pOut.
! 596: */
! 597: static int fts3SnippetText(
! 598: Fts3Cursor *pCsr, /* FTS3 Cursor */
! 599: SnippetFragment *pFragment, /* Snippet to extract */
! 600: int iFragment, /* Fragment number */
! 601: int isLast, /* True for final fragment in snippet */
! 602: int nSnippet, /* Number of tokens in extracted snippet */
! 603: const char *zOpen, /* String inserted before highlighted term */
! 604: const char *zClose, /* String inserted after highlighted term */
! 605: const char *zEllipsis, /* String inserted between snippets */
! 606: StrBuffer *pOut /* Write output here */
! 607: ){
! 608: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 609: int rc; /* Return code */
! 610: const char *zDoc; /* Document text to extract snippet from */
! 611: int nDoc; /* Size of zDoc in bytes */
! 612: int iCurrent = 0; /* Current token number of document */
! 613: int iEnd = 0; /* Byte offset of end of current token */
! 614: int isShiftDone = 0; /* True after snippet is shifted */
! 615: int iPos = pFragment->iPos; /* First token of snippet */
! 616: u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
! 617: int iCol = pFragment->iCol+1; /* Query column to extract text from */
! 618: sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
! 619: sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
! 620: const char *ZDUMMY; /* Dummy argument used with tokenizer */
! 621: int DUMMY1; /* Dummy argument used with tokenizer */
! 622:
! 623: zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
! 624: if( zDoc==0 ){
! 625: if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
! 626: return SQLITE_NOMEM;
! 627: }
! 628: return SQLITE_OK;
! 629: }
! 630: nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
! 631:
! 632: /* Open a token cursor on the document. */
! 633: pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
! 634: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
! 635: if( rc!=SQLITE_OK ){
! 636: return rc;
! 637: }
! 638: pC->pTokenizer = pTab->pTokenizer;
! 639:
! 640: while( rc==SQLITE_OK ){
! 641: int iBegin; /* Offset in zDoc of start of token */
! 642: int iFin; /* Offset in zDoc of end of token */
! 643: int isHighlight; /* True for highlighted terms */
! 644:
! 645: rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
! 646: if( rc!=SQLITE_OK ){
! 647: if( rc==SQLITE_DONE ){
! 648: /* Special case - the last token of the snippet is also the last token
! 649: ** of the column. Append any punctuation that occurred between the end
! 650: ** of the previous token and the end of the document to the output.
! 651: ** Then break out of the loop. */
! 652: rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
! 653: }
! 654: break;
! 655: }
! 656: if( iCurrent<iPos ){ continue; }
! 657:
! 658: if( !isShiftDone ){
! 659: int n = nDoc - iBegin;
! 660: rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
! 661: isShiftDone = 1;
! 662:
! 663: /* Now that the shift has been done, check if the initial "..." are
! 664: ** required. They are required if (a) this is not the first fragment,
! 665: ** or (b) this fragment does not begin at position 0 of its column.
! 666: */
! 667: if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
! 668: rc = fts3StringAppend(pOut, zEllipsis, -1);
! 669: }
! 670: if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
! 671: }
! 672:
! 673: if( iCurrent>=(iPos+nSnippet) ){
! 674: if( isLast ){
! 675: rc = fts3StringAppend(pOut, zEllipsis, -1);
! 676: }
! 677: break;
! 678: }
! 679:
! 680: /* Set isHighlight to true if this term should be highlighted. */
! 681: isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
! 682:
! 683: if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
! 684: if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
! 685: if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
! 686: if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
! 687:
! 688: iEnd = iFin;
! 689: }
! 690:
! 691: pMod->xClose(pC);
! 692: return rc;
! 693: }
! 694:
! 695:
! 696: /*
! 697: ** This function is used to count the entries in a column-list (a
! 698: ** delta-encoded list of term offsets within a single column of a single
! 699: ** row). When this function is called, *ppCollist should point to the
! 700: ** beginning of the first varint in the column-list (the varint that
! 701: ** contains the position of the first matching term in the column data).
! 702: ** Before returning, *ppCollist is set to point to the first byte after
! 703: ** the last varint in the column-list (either the 0x00 signifying the end
! 704: ** of the position-list, or the 0x01 that precedes the column number of
! 705: ** the next column in the position-list).
! 706: **
! 707: ** The number of elements in the column-list is returned.
! 708: */
! 709: static int fts3ColumnlistCount(char **ppCollist){
! 710: char *pEnd = *ppCollist;
! 711: char c = 0;
! 712: int nEntry = 0;
! 713:
! 714: /* A column-list is terminated by either a 0x01 or 0x00. */
! 715: while( 0xFE & (*pEnd | c) ){
! 716: c = *pEnd++ & 0x80;
! 717: if( !c ) nEntry++;
! 718: }
! 719:
! 720: *ppCollist = pEnd;
! 721: return nEntry;
! 722: }
! 723:
! 724: /*
! 725: ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
! 726: ** for a single query.
! 727: **
! 728: ** fts3ExprIterate() callback to load the 'global' elements of a
! 729: ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
! 730: ** of the matchinfo array that are constant for all rows returned by the
! 731: ** current query.
! 732: **
! 733: ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
! 734: ** function populates Matchinfo.aMatchinfo[] as follows:
! 735: **
! 736: ** for(iCol=0; iCol<nCol; iCol++){
! 737: ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
! 738: ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
! 739: ** }
! 740: **
! 741: ** where X is the number of matches for phrase iPhrase is column iCol of all
! 742: ** rows of the table. Y is the number of rows for which column iCol contains
! 743: ** at least one instance of phrase iPhrase.
! 744: **
! 745: ** If the phrase pExpr consists entirely of deferred tokens, then all X and
! 746: ** Y values are set to nDoc, where nDoc is the number of documents in the
! 747: ** file system. This is done because the full-text index doclist is required
! 748: ** to calculate these values properly, and the full-text index doclist is
! 749: ** not available for deferred tokens.
! 750: */
! 751: static int fts3ExprGlobalHitsCb(
! 752: Fts3Expr *pExpr, /* Phrase expression node */
! 753: int iPhrase, /* Phrase number (numbered from zero) */
! 754: void *pCtx /* Pointer to MatchInfo structure */
! 755: ){
! 756: MatchInfo *p = (MatchInfo *)pCtx;
! 757: return sqlite3Fts3EvalPhraseStats(
! 758: p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
! 759: );
! 760: }
! 761:
! 762: /*
! 763: ** fts3ExprIterate() callback used to collect the "local" part of the
! 764: ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
! 765: ** array that are different for each row returned by the query.
! 766: */
! 767: static int fts3ExprLocalHitsCb(
! 768: Fts3Expr *pExpr, /* Phrase expression node */
! 769: int iPhrase, /* Phrase number */
! 770: void *pCtx /* Pointer to MatchInfo structure */
! 771: ){
! 772: MatchInfo *p = (MatchInfo *)pCtx;
! 773: int iStart = iPhrase * p->nCol * 3;
! 774: int i;
! 775:
! 776: for(i=0; i<p->nCol; i++){
! 777: char *pCsr;
! 778: pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i);
! 779: if( pCsr ){
! 780: p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
! 781: }else{
! 782: p->aMatchinfo[iStart+i*3] = 0;
! 783: }
! 784: }
! 785:
! 786: return SQLITE_OK;
! 787: }
! 788:
! 789: static int fts3MatchinfoCheck(
! 790: Fts3Table *pTab,
! 791: char cArg,
! 792: char **pzErr
! 793: ){
! 794: if( (cArg==FTS3_MATCHINFO_NPHRASE)
! 795: || (cArg==FTS3_MATCHINFO_NCOL)
! 796: || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat)
! 797: || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat)
! 798: || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
! 799: || (cArg==FTS3_MATCHINFO_LCS)
! 800: || (cArg==FTS3_MATCHINFO_HITS)
! 801: ){
! 802: return SQLITE_OK;
! 803: }
! 804: *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
! 805: return SQLITE_ERROR;
! 806: }
! 807:
! 808: static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
! 809: int nVal; /* Number of integers output by cArg */
! 810:
! 811: switch( cArg ){
! 812: case FTS3_MATCHINFO_NDOC:
! 813: case FTS3_MATCHINFO_NPHRASE:
! 814: case FTS3_MATCHINFO_NCOL:
! 815: nVal = 1;
! 816: break;
! 817:
! 818: case FTS3_MATCHINFO_AVGLENGTH:
! 819: case FTS3_MATCHINFO_LENGTH:
! 820: case FTS3_MATCHINFO_LCS:
! 821: nVal = pInfo->nCol;
! 822: break;
! 823:
! 824: default:
! 825: assert( cArg==FTS3_MATCHINFO_HITS );
! 826: nVal = pInfo->nCol * pInfo->nPhrase * 3;
! 827: break;
! 828: }
! 829:
! 830: return nVal;
! 831: }
! 832:
! 833: static int fts3MatchinfoSelectDoctotal(
! 834: Fts3Table *pTab,
! 835: sqlite3_stmt **ppStmt,
! 836: sqlite3_int64 *pnDoc,
! 837: const char **paLen
! 838: ){
! 839: sqlite3_stmt *pStmt;
! 840: const char *a;
! 841: sqlite3_int64 nDoc;
! 842:
! 843: if( !*ppStmt ){
! 844: int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
! 845: if( rc!=SQLITE_OK ) return rc;
! 846: }
! 847: pStmt = *ppStmt;
! 848: assert( sqlite3_data_count(pStmt)==1 );
! 849:
! 850: a = sqlite3_column_blob(pStmt, 0);
! 851: a += sqlite3Fts3GetVarint(a, &nDoc);
! 852: if( nDoc==0 ) return FTS_CORRUPT_VTAB;
! 853: *pnDoc = (u32)nDoc;
! 854:
! 855: if( paLen ) *paLen = a;
! 856: return SQLITE_OK;
! 857: }
! 858:
! 859: /*
! 860: ** An instance of the following structure is used to store state while
! 861: ** iterating through a multi-column position-list corresponding to the
! 862: ** hits for a single phrase on a single row in order to calculate the
! 863: ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
! 864: */
! 865: typedef struct LcsIterator LcsIterator;
! 866: struct LcsIterator {
! 867: Fts3Expr *pExpr; /* Pointer to phrase expression */
! 868: int iPosOffset; /* Tokens count up to end of this phrase */
! 869: char *pRead; /* Cursor used to iterate through aDoclist */
! 870: int iPos; /* Current position */
! 871: };
! 872:
! 873: /*
! 874: ** If LcsIterator.iCol is set to the following value, the iterator has
! 875: ** finished iterating through all offsets for all columns.
! 876: */
! 877: #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
! 878:
! 879: static int fts3MatchinfoLcsCb(
! 880: Fts3Expr *pExpr, /* Phrase expression node */
! 881: int iPhrase, /* Phrase number (numbered from zero) */
! 882: void *pCtx /* Pointer to MatchInfo structure */
! 883: ){
! 884: LcsIterator *aIter = (LcsIterator *)pCtx;
! 885: aIter[iPhrase].pExpr = pExpr;
! 886: return SQLITE_OK;
! 887: }
! 888:
! 889: /*
! 890: ** Advance the iterator passed as an argument to the next position. Return
! 891: ** 1 if the iterator is at EOF or if it now points to the start of the
! 892: ** position list for the next column.
! 893: */
! 894: static int fts3LcsIteratorAdvance(LcsIterator *pIter){
! 895: char *pRead = pIter->pRead;
! 896: sqlite3_int64 iRead;
! 897: int rc = 0;
! 898:
! 899: pRead += sqlite3Fts3GetVarint(pRead, &iRead);
! 900: if( iRead==0 || iRead==1 ){
! 901: pRead = 0;
! 902: rc = 1;
! 903: }else{
! 904: pIter->iPos += (int)(iRead-2);
! 905: }
! 906:
! 907: pIter->pRead = pRead;
! 908: return rc;
! 909: }
! 910:
! 911: /*
! 912: ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
! 913: **
! 914: ** If the call is successful, the longest-common-substring lengths for each
! 915: ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
! 916: ** array before returning. SQLITE_OK is returned in this case.
! 917: **
! 918: ** Otherwise, if an error occurs, an SQLite error code is returned and the
! 919: ** data written to the first nCol elements of pInfo->aMatchinfo[] is
! 920: ** undefined.
! 921: */
! 922: static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
! 923: LcsIterator *aIter;
! 924: int i;
! 925: int iCol;
! 926: int nToken = 0;
! 927:
! 928: /* Allocate and populate the array of LcsIterator objects. The array
! 929: ** contains one element for each matchable phrase in the query.
! 930: **/
! 931: aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
! 932: if( !aIter ) return SQLITE_NOMEM;
! 933: memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
! 934: (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
! 935:
! 936: for(i=0; i<pInfo->nPhrase; i++){
! 937: LcsIterator *pIter = &aIter[i];
! 938: nToken -= pIter->pExpr->pPhrase->nToken;
! 939: pIter->iPosOffset = nToken;
! 940: }
! 941:
! 942: for(iCol=0; iCol<pInfo->nCol; iCol++){
! 943: int nLcs = 0; /* LCS value for this column */
! 944: int nLive = 0; /* Number of iterators in aIter not at EOF */
! 945:
! 946: for(i=0; i<pInfo->nPhrase; i++){
! 947: LcsIterator *pIt = &aIter[i];
! 948: pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol);
! 949: if( pIt->pRead ){
! 950: pIt->iPos = pIt->iPosOffset;
! 951: fts3LcsIteratorAdvance(&aIter[i]);
! 952: nLive++;
! 953: }
! 954: }
! 955:
! 956: while( nLive>0 ){
! 957: LcsIterator *pAdv = 0; /* The iterator to advance by one position */
! 958: int nThisLcs = 0; /* LCS for the current iterator positions */
! 959:
! 960: for(i=0; i<pInfo->nPhrase; i++){
! 961: LcsIterator *pIter = &aIter[i];
! 962: if( pIter->pRead==0 ){
! 963: /* This iterator is already at EOF for this column. */
! 964: nThisLcs = 0;
! 965: }else{
! 966: if( pAdv==0 || pIter->iPos<pAdv->iPos ){
! 967: pAdv = pIter;
! 968: }
! 969: if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
! 970: nThisLcs++;
! 971: }else{
! 972: nThisLcs = 1;
! 973: }
! 974: if( nThisLcs>nLcs ) nLcs = nThisLcs;
! 975: }
! 976: }
! 977: if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
! 978: }
! 979:
! 980: pInfo->aMatchinfo[iCol] = nLcs;
! 981: }
! 982:
! 983: sqlite3_free(aIter);
! 984: return SQLITE_OK;
! 985: }
! 986:
! 987: /*
! 988: ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
! 989: ** be returned by the matchinfo() function. Argument zArg contains the
! 990: ** format string passed as the second argument to matchinfo (or the
! 991: ** default value "pcx" if no second argument was specified). The format
! 992: ** string has already been validated and the pInfo->aMatchinfo[] array
! 993: ** is guaranteed to be large enough for the output.
! 994: **
! 995: ** If bGlobal is true, then populate all fields of the matchinfo() output.
! 996: ** If it is false, then assume that those fields that do not change between
! 997: ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
! 998: ** have already been populated.
! 999: **
! 1000: ** Return SQLITE_OK if successful, or an SQLite error code if an error
! 1001: ** occurs. If a value other than SQLITE_OK is returned, the state the
! 1002: ** pInfo->aMatchinfo[] buffer is left in is undefined.
! 1003: */
! 1004: static int fts3MatchinfoValues(
! 1005: Fts3Cursor *pCsr, /* FTS3 cursor object */
! 1006: int bGlobal, /* True to grab the global stats */
! 1007: MatchInfo *pInfo, /* Matchinfo context object */
! 1008: const char *zArg /* Matchinfo format string */
! 1009: ){
! 1010: int rc = SQLITE_OK;
! 1011: int i;
! 1012: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 1013: sqlite3_stmt *pSelect = 0;
! 1014:
! 1015: for(i=0; rc==SQLITE_OK && zArg[i]; i++){
! 1016:
! 1017: switch( zArg[i] ){
! 1018: case FTS3_MATCHINFO_NPHRASE:
! 1019: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
! 1020: break;
! 1021:
! 1022: case FTS3_MATCHINFO_NCOL:
! 1023: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
! 1024: break;
! 1025:
! 1026: case FTS3_MATCHINFO_NDOC:
! 1027: if( bGlobal ){
! 1028: sqlite3_int64 nDoc = 0;
! 1029: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
! 1030: pInfo->aMatchinfo[0] = (u32)nDoc;
! 1031: }
! 1032: break;
! 1033:
! 1034: case FTS3_MATCHINFO_AVGLENGTH:
! 1035: if( bGlobal ){
! 1036: sqlite3_int64 nDoc; /* Number of rows in table */
! 1037: const char *a; /* Aggregate column length array */
! 1038:
! 1039: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
! 1040: if( rc==SQLITE_OK ){
! 1041: int iCol;
! 1042: for(iCol=0; iCol<pInfo->nCol; iCol++){
! 1043: u32 iVal;
! 1044: sqlite3_int64 nToken;
! 1045: a += sqlite3Fts3GetVarint(a, &nToken);
! 1046: iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
! 1047: pInfo->aMatchinfo[iCol] = iVal;
! 1048: }
! 1049: }
! 1050: }
! 1051: break;
! 1052:
! 1053: case FTS3_MATCHINFO_LENGTH: {
! 1054: sqlite3_stmt *pSelectDocsize = 0;
! 1055: rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
! 1056: if( rc==SQLITE_OK ){
! 1057: int iCol;
! 1058: const char *a = sqlite3_column_blob(pSelectDocsize, 0);
! 1059: for(iCol=0; iCol<pInfo->nCol; iCol++){
! 1060: sqlite3_int64 nToken;
! 1061: a += sqlite3Fts3GetVarint(a, &nToken);
! 1062: pInfo->aMatchinfo[iCol] = (u32)nToken;
! 1063: }
! 1064: }
! 1065: sqlite3_reset(pSelectDocsize);
! 1066: break;
! 1067: }
! 1068:
! 1069: case FTS3_MATCHINFO_LCS:
! 1070: rc = fts3ExprLoadDoclists(pCsr, 0, 0);
! 1071: if( rc==SQLITE_OK ){
! 1072: rc = fts3MatchinfoLcs(pCsr, pInfo);
! 1073: }
! 1074: break;
! 1075:
! 1076: default: {
! 1077: Fts3Expr *pExpr;
! 1078: assert( zArg[i]==FTS3_MATCHINFO_HITS );
! 1079: pExpr = pCsr->pExpr;
! 1080: rc = fts3ExprLoadDoclists(pCsr, 0, 0);
! 1081: if( rc!=SQLITE_OK ) break;
! 1082: if( bGlobal ){
! 1083: if( pCsr->pDeferred ){
! 1084: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
! 1085: if( rc!=SQLITE_OK ) break;
! 1086: }
! 1087: rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
! 1088: if( rc!=SQLITE_OK ) break;
! 1089: }
! 1090: (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
! 1091: break;
! 1092: }
! 1093: }
! 1094:
! 1095: pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
! 1096: }
! 1097:
! 1098: sqlite3_reset(pSelect);
! 1099: return rc;
! 1100: }
! 1101:
! 1102:
! 1103: /*
! 1104: ** Populate pCsr->aMatchinfo[] with data for the current row. The
! 1105: ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
! 1106: */
! 1107: static int fts3GetMatchinfo(
! 1108: Fts3Cursor *pCsr, /* FTS3 Cursor object */
! 1109: const char *zArg /* Second argument to matchinfo() function */
! 1110: ){
! 1111: MatchInfo sInfo;
! 1112: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 1113: int rc = SQLITE_OK;
! 1114: int bGlobal = 0; /* Collect 'global' stats as well as local */
! 1115:
! 1116: memset(&sInfo, 0, sizeof(MatchInfo));
! 1117: sInfo.pCursor = pCsr;
! 1118: sInfo.nCol = pTab->nColumn;
! 1119:
! 1120: /* If there is cached matchinfo() data, but the format string for the
! 1121: ** cache does not match the format string for this request, discard
! 1122: ** the cached data. */
! 1123: if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
! 1124: assert( pCsr->aMatchinfo );
! 1125: sqlite3_free(pCsr->aMatchinfo);
! 1126: pCsr->zMatchinfo = 0;
! 1127: pCsr->aMatchinfo = 0;
! 1128: }
! 1129:
! 1130: /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
! 1131: ** matchinfo function has been called for this query. In this case
! 1132: ** allocate the array used to accumulate the matchinfo data and
! 1133: ** initialize those elements that are constant for every row.
! 1134: */
! 1135: if( pCsr->aMatchinfo==0 ){
! 1136: int nMatchinfo = 0; /* Number of u32 elements in match-info */
! 1137: int nArg; /* Bytes in zArg */
! 1138: int i; /* Used to iterate through zArg */
! 1139:
! 1140: /* Determine the number of phrases in the query */
! 1141: pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
! 1142: sInfo.nPhrase = pCsr->nPhrase;
! 1143:
! 1144: /* Determine the number of integers in the buffer returned by this call. */
! 1145: for(i=0; zArg[i]; i++){
! 1146: nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
! 1147: }
! 1148:
! 1149: /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
! 1150: nArg = (int)strlen(zArg);
! 1151: pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
! 1152: if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
! 1153:
! 1154: pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
! 1155: pCsr->nMatchinfo = nMatchinfo;
! 1156: memcpy(pCsr->zMatchinfo, zArg, nArg+1);
! 1157: memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
! 1158: pCsr->isMatchinfoNeeded = 1;
! 1159: bGlobal = 1;
! 1160: }
! 1161:
! 1162: sInfo.aMatchinfo = pCsr->aMatchinfo;
! 1163: sInfo.nPhrase = pCsr->nPhrase;
! 1164: if( pCsr->isMatchinfoNeeded ){
! 1165: rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
! 1166: pCsr->isMatchinfoNeeded = 0;
! 1167: }
! 1168:
! 1169: return rc;
! 1170: }
! 1171:
! 1172: /*
! 1173: ** Implementation of snippet() function.
! 1174: */
! 1175: void sqlite3Fts3Snippet(
! 1176: sqlite3_context *pCtx, /* SQLite function call context */
! 1177: Fts3Cursor *pCsr, /* Cursor object */
! 1178: const char *zStart, /* Snippet start text - "<b>" */
! 1179: const char *zEnd, /* Snippet end text - "</b>" */
! 1180: const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
! 1181: int iCol, /* Extract snippet from this column */
! 1182: int nToken /* Approximate number of tokens in snippet */
! 1183: ){
! 1184: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 1185: int rc = SQLITE_OK;
! 1186: int i;
! 1187: StrBuffer res = {0, 0, 0};
! 1188:
! 1189: /* The returned text includes up to four fragments of text extracted from
! 1190: ** the data in the current row. The first iteration of the for(...) loop
! 1191: ** below attempts to locate a single fragment of text nToken tokens in
! 1192: ** size that contains at least one instance of all phrases in the query
! 1193: ** expression that appear in the current row. If such a fragment of text
! 1194: ** cannot be found, the second iteration of the loop attempts to locate
! 1195: ** a pair of fragments, and so on.
! 1196: */
! 1197: int nSnippet = 0; /* Number of fragments in this snippet */
! 1198: SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
! 1199: int nFToken = -1; /* Number of tokens in each fragment */
! 1200:
! 1201: if( !pCsr->pExpr ){
! 1202: sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
! 1203: return;
! 1204: }
! 1205:
! 1206: for(nSnippet=1; 1; nSnippet++){
! 1207:
! 1208: int iSnip; /* Loop counter 0..nSnippet-1 */
! 1209: u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
! 1210: u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
! 1211:
! 1212: if( nToken>=0 ){
! 1213: nFToken = (nToken+nSnippet-1) / nSnippet;
! 1214: }else{
! 1215: nFToken = -1 * nToken;
! 1216: }
! 1217:
! 1218: for(iSnip=0; iSnip<nSnippet; iSnip++){
! 1219: int iBestScore = -1; /* Best score of columns checked so far */
! 1220: int iRead; /* Used to iterate through columns */
! 1221: SnippetFragment *pFragment = &aSnippet[iSnip];
! 1222:
! 1223: memset(pFragment, 0, sizeof(*pFragment));
! 1224:
! 1225: /* Loop through all columns of the table being considered for snippets.
! 1226: ** If the iCol argument to this function was negative, this means all
! 1227: ** columns of the FTS3 table. Otherwise, only column iCol is considered.
! 1228: */
! 1229: for(iRead=0; iRead<pTab->nColumn; iRead++){
! 1230: SnippetFragment sF = {0, 0, 0, 0};
! 1231: int iS;
! 1232: if( iCol>=0 && iRead!=iCol ) continue;
! 1233:
! 1234: /* Find the best snippet of nFToken tokens in column iRead. */
! 1235: rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
! 1236: if( rc!=SQLITE_OK ){
! 1237: goto snippet_out;
! 1238: }
! 1239: if( iS>iBestScore ){
! 1240: *pFragment = sF;
! 1241: iBestScore = iS;
! 1242: }
! 1243: }
! 1244:
! 1245: mCovered |= pFragment->covered;
! 1246: }
! 1247:
! 1248: /* If all query phrases seen by fts3BestSnippet() are present in at least
! 1249: ** one of the nSnippet snippet fragments, break out of the loop.
! 1250: */
! 1251: assert( (mCovered&mSeen)==mCovered );
! 1252: if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
! 1253: }
! 1254:
! 1255: assert( nFToken>0 );
! 1256:
! 1257: for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
! 1258: rc = fts3SnippetText(pCsr, &aSnippet[i],
! 1259: i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
! 1260: );
! 1261: }
! 1262:
! 1263: snippet_out:
! 1264: sqlite3Fts3SegmentsClose(pTab);
! 1265: if( rc!=SQLITE_OK ){
! 1266: sqlite3_result_error_code(pCtx, rc);
! 1267: sqlite3_free(res.z);
! 1268: }else{
! 1269: sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
! 1270: }
! 1271: }
! 1272:
! 1273:
! 1274: typedef struct TermOffset TermOffset;
! 1275: typedef struct TermOffsetCtx TermOffsetCtx;
! 1276:
! 1277: struct TermOffset {
! 1278: char *pList; /* Position-list */
! 1279: int iPos; /* Position just read from pList */
! 1280: int iOff; /* Offset of this term from read positions */
! 1281: };
! 1282:
! 1283: struct TermOffsetCtx {
! 1284: Fts3Cursor *pCsr;
! 1285: int iCol; /* Column of table to populate aTerm for */
! 1286: int iTerm;
! 1287: sqlite3_int64 iDocid;
! 1288: TermOffset *aTerm;
! 1289: };
! 1290:
! 1291: /*
! 1292: ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
! 1293: */
! 1294: static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
! 1295: TermOffsetCtx *p = (TermOffsetCtx *)ctx;
! 1296: int nTerm; /* Number of tokens in phrase */
! 1297: int iTerm; /* For looping through nTerm phrase terms */
! 1298: char *pList; /* Pointer to position list for phrase */
! 1299: int iPos = 0; /* First position in position-list */
! 1300:
! 1301: UNUSED_PARAMETER(iPhrase);
! 1302: pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
! 1303: nTerm = pExpr->pPhrase->nToken;
! 1304: if( pList ){
! 1305: fts3GetDeltaPosition(&pList, &iPos);
! 1306: assert( iPos>=0 );
! 1307: }
! 1308:
! 1309: for(iTerm=0; iTerm<nTerm; iTerm++){
! 1310: TermOffset *pT = &p->aTerm[p->iTerm++];
! 1311: pT->iOff = nTerm-iTerm-1;
! 1312: pT->pList = pList;
! 1313: pT->iPos = iPos;
! 1314: }
! 1315:
! 1316: return SQLITE_OK;
! 1317: }
! 1318:
! 1319: /*
! 1320: ** Implementation of offsets() function.
! 1321: */
! 1322: void sqlite3Fts3Offsets(
! 1323: sqlite3_context *pCtx, /* SQLite function call context */
! 1324: Fts3Cursor *pCsr /* Cursor object */
! 1325: ){
! 1326: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 1327: sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
! 1328: const char *ZDUMMY; /* Dummy argument used with xNext() */
! 1329: int NDUMMY; /* Dummy argument used with xNext() */
! 1330: int rc; /* Return Code */
! 1331: int nToken; /* Number of tokens in query */
! 1332: int iCol; /* Column currently being processed */
! 1333: StrBuffer res = {0, 0, 0}; /* Result string */
! 1334: TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
! 1335:
! 1336: if( !pCsr->pExpr ){
! 1337: sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
! 1338: return;
! 1339: }
! 1340:
! 1341: memset(&sCtx, 0, sizeof(sCtx));
! 1342: assert( pCsr->isRequireSeek==0 );
! 1343:
! 1344: /* Count the number of terms in the query */
! 1345: rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
! 1346: if( rc!=SQLITE_OK ) goto offsets_out;
! 1347:
! 1348: /* Allocate the array of TermOffset iterators. */
! 1349: sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
! 1350: if( 0==sCtx.aTerm ){
! 1351: rc = SQLITE_NOMEM;
! 1352: goto offsets_out;
! 1353: }
! 1354: sCtx.iDocid = pCsr->iPrevId;
! 1355: sCtx.pCsr = pCsr;
! 1356:
! 1357: /* Loop through the table columns, appending offset information to
! 1358: ** string-buffer res for each column.
! 1359: */
! 1360: for(iCol=0; iCol<pTab->nColumn; iCol++){
! 1361: sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
! 1362: int iStart;
! 1363: int iEnd;
! 1364: int iCurrent;
! 1365: const char *zDoc;
! 1366: int nDoc;
! 1367:
! 1368: /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
! 1369: ** no way that this operation can fail, so the return code from
! 1370: ** fts3ExprIterate() can be discarded.
! 1371: */
! 1372: sCtx.iCol = iCol;
! 1373: sCtx.iTerm = 0;
! 1374: (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
! 1375:
! 1376: /* Retreive the text stored in column iCol. If an SQL NULL is stored
! 1377: ** in column iCol, jump immediately to the next iteration of the loop.
! 1378: ** If an OOM occurs while retrieving the data (this can happen if SQLite
! 1379: ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
! 1380: ** to the caller.
! 1381: */
! 1382: zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
! 1383: nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
! 1384: if( zDoc==0 ){
! 1385: if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
! 1386: continue;
! 1387: }
! 1388: rc = SQLITE_NOMEM;
! 1389: goto offsets_out;
! 1390: }
! 1391:
! 1392: /* Initialize a tokenizer iterator to iterate through column iCol. */
! 1393: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
! 1394: if( rc!=SQLITE_OK ) goto offsets_out;
! 1395: pC->pTokenizer = pTab->pTokenizer;
! 1396:
! 1397: rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
! 1398: while( rc==SQLITE_OK ){
! 1399: int i; /* Used to loop through terms */
! 1400: int iMinPos = 0x7FFFFFFF; /* Position of next token */
! 1401: TermOffset *pTerm = 0; /* TermOffset associated with next token */
! 1402:
! 1403: for(i=0; i<nToken; i++){
! 1404: TermOffset *pT = &sCtx.aTerm[i];
! 1405: if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
! 1406: iMinPos = pT->iPos-pT->iOff;
! 1407: pTerm = pT;
! 1408: }
! 1409: }
! 1410:
! 1411: if( !pTerm ){
! 1412: /* All offsets for this column have been gathered. */
! 1413: rc = SQLITE_DONE;
! 1414: }else{
! 1415: assert( iCurrent<=iMinPos );
! 1416: if( 0==(0xFE&*pTerm->pList) ){
! 1417: pTerm->pList = 0;
! 1418: }else{
! 1419: fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
! 1420: }
! 1421: while( rc==SQLITE_OK && iCurrent<iMinPos ){
! 1422: rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
! 1423: }
! 1424: if( rc==SQLITE_OK ){
! 1425: char aBuffer[64];
! 1426: sqlite3_snprintf(sizeof(aBuffer), aBuffer,
! 1427: "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
! 1428: );
! 1429: rc = fts3StringAppend(&res, aBuffer, -1);
! 1430: }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
! 1431: rc = FTS_CORRUPT_VTAB;
! 1432: }
! 1433: }
! 1434: }
! 1435: if( rc==SQLITE_DONE ){
! 1436: rc = SQLITE_OK;
! 1437: }
! 1438:
! 1439: pMod->xClose(pC);
! 1440: if( rc!=SQLITE_OK ) goto offsets_out;
! 1441: }
! 1442:
! 1443: offsets_out:
! 1444: sqlite3_free(sCtx.aTerm);
! 1445: assert( rc!=SQLITE_DONE );
! 1446: sqlite3Fts3SegmentsClose(pTab);
! 1447: if( rc!=SQLITE_OK ){
! 1448: sqlite3_result_error_code(pCtx, rc);
! 1449: sqlite3_free(res.z);
! 1450: }else{
! 1451: sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
! 1452: }
! 1453: return;
! 1454: }
! 1455:
! 1456: /*
! 1457: ** Implementation of matchinfo() function.
! 1458: */
! 1459: void sqlite3Fts3Matchinfo(
! 1460: sqlite3_context *pContext, /* Function call context */
! 1461: Fts3Cursor *pCsr, /* FTS3 table cursor */
! 1462: const char *zArg /* Second arg to matchinfo() function */
! 1463: ){
! 1464: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 1465: int rc;
! 1466: int i;
! 1467: const char *zFormat;
! 1468:
! 1469: if( zArg ){
! 1470: for(i=0; zArg[i]; i++){
! 1471: char *zErr = 0;
! 1472: if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
! 1473: sqlite3_result_error(pContext, zErr, -1);
! 1474: sqlite3_free(zErr);
! 1475: return;
! 1476: }
! 1477: }
! 1478: zFormat = zArg;
! 1479: }else{
! 1480: zFormat = FTS3_MATCHINFO_DEFAULT;
! 1481: }
! 1482:
! 1483: if( !pCsr->pExpr ){
! 1484: sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
! 1485: return;
! 1486: }
! 1487:
! 1488: /* Retrieve matchinfo() data. */
! 1489: rc = fts3GetMatchinfo(pCsr, zFormat);
! 1490: sqlite3Fts3SegmentsClose(pTab);
! 1491:
! 1492: if( rc!=SQLITE_OK ){
! 1493: sqlite3_result_error_code(pContext, rc);
! 1494: }else{
! 1495: int n = pCsr->nMatchinfo * sizeof(u32);
! 1496: sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
! 1497: }
! 1498: }
! 1499:
! 1500: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>