Return to fts3_snippet.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts3 |
1.1 ! misho 1: /* ! 2: ** 2009 Oct 23 ! 3: ** ! 4: ** The author disclaims copyright to this source code. In place of ! 5: ** a legal notice, here is a blessing: ! 6: ** ! 7: ** May you do good and not evil. ! 8: ** May you find forgiveness for yourself and forgive others. ! 9: ** May you share freely, never taking more than you give. ! 10: ** ! 11: ****************************************************************************** ! 12: */ ! 13: ! 14: #include "fts3Int.h" ! 15: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) ! 16: ! 17: #include <string.h> ! 18: #include <assert.h> ! 19: ! 20: /* ! 21: ** Characters that may appear in the second argument to matchinfo(). ! 22: */ ! 23: #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ ! 24: #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ ! 25: #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ ! 26: #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ ! 27: #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ ! 28: #define FTS3_MATCHINFO_LCS 's' /* nCol values */ ! 29: #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ ! 30: ! 31: /* ! 32: ** The default value for the second argument to matchinfo(). ! 33: */ ! 34: #define FTS3_MATCHINFO_DEFAULT "pcx" ! 35: ! 36: ! 37: /* ! 38: ** Used as an fts3ExprIterate() context when loading phrase doclists to ! 39: ** Fts3Expr.aDoclist[]/nDoclist. ! 40: */ ! 41: typedef struct LoadDoclistCtx LoadDoclistCtx; ! 42: struct LoadDoclistCtx { ! 43: Fts3Cursor *pCsr; /* FTS3 Cursor */ ! 44: int nPhrase; /* Number of phrases seen so far */ ! 45: int nToken; /* Number of tokens seen so far */ ! 46: }; ! 47: ! 48: /* ! 49: ** The following types are used as part of the implementation of the ! 50: ** fts3BestSnippet() routine. ! 51: */ ! 52: typedef struct SnippetIter SnippetIter; ! 53: typedef struct SnippetPhrase SnippetPhrase; ! 54: typedef struct SnippetFragment SnippetFragment; ! 55: ! 56: struct SnippetIter { ! 57: Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ ! 58: int iCol; /* Extract snippet from this column */ ! 59: int nSnippet; /* Requested snippet length (in tokens) */ ! 60: int nPhrase; /* Number of phrases in query */ ! 61: SnippetPhrase *aPhrase; /* Array of size nPhrase */ ! 62: int iCurrent; /* First token of current snippet */ ! 63: }; ! 64: ! 65: struct SnippetPhrase { ! 66: int nToken; /* Number of tokens in phrase */ ! 67: char *pList; /* Pointer to start of phrase position list */ ! 68: int iHead; /* Next value in position list */ ! 69: char *pHead; /* Position list data following iHead */ ! 70: int iTail; /* Next value in trailing position list */ ! 71: char *pTail; /* Position list data following iTail */ ! 72: }; ! 73: ! 74: struct SnippetFragment { ! 75: int iCol; /* Column snippet is extracted from */ ! 76: int iPos; /* Index of first token in snippet */ ! 77: u64 covered; /* Mask of query phrases covered */ ! 78: u64 hlmask; /* Mask of snippet terms to highlight */ ! 79: }; ! 80: ! 81: /* ! 82: ** This type is used as an fts3ExprIterate() context object while ! 83: ** accumulating the data returned by the matchinfo() function. ! 84: */ ! 85: typedef struct MatchInfo MatchInfo; ! 86: struct MatchInfo { ! 87: Fts3Cursor *pCursor; /* FTS3 Cursor */ ! 88: int nCol; /* Number of columns in table */ ! 89: int nPhrase; /* Number of matchable phrases in query */ ! 90: sqlite3_int64 nDoc; /* Number of docs in database */ ! 91: u32 *aMatchinfo; /* Pre-allocated buffer */ ! 92: }; ! 93: ! 94: ! 95: ! 96: /* ! 97: ** The snippet() and offsets() functions both return text values. An instance ! 98: ** of the following structure is used to accumulate those values while the ! 99: ** functions are running. See fts3StringAppend() for details. ! 100: */ ! 101: typedef struct StrBuffer StrBuffer; ! 102: struct StrBuffer { ! 103: char *z; /* Pointer to buffer containing string */ ! 104: int n; /* Length of z in bytes (excl. nul-term) */ ! 105: int nAlloc; /* Allocated size of buffer z in bytes */ ! 106: }; ! 107: ! 108: ! 109: /* ! 110: ** This function is used to help iterate through a position-list. A position ! 111: ** list is a list of unique integers, sorted from smallest to largest. Each ! 112: ** element of the list is represented by an FTS3 varint that takes the value ! 113: ** of the difference between the current element and the previous one plus ! 114: ** two. For example, to store the position-list: ! 115: ** ! 116: ** 4 9 113 ! 117: ** ! 118: ** the three varints: ! 119: ** ! 120: ** 6 7 106 ! 121: ** ! 122: ** are encoded. ! 123: ** ! 124: ** When this function is called, *pp points to the start of an element of ! 125: ** the list. *piPos contains the value of the previous entry in the list. ! 126: ** After it returns, *piPos contains the value of the next element of the ! 127: ** list and *pp is advanced to the following varint. ! 128: */ ! 129: static void fts3GetDeltaPosition(char **pp, int *piPos){ ! 130: int iVal; ! 131: *pp += sqlite3Fts3GetVarint32(*pp, &iVal); ! 132: *piPos += (iVal-2); ! 133: } ! 134: ! 135: /* ! 136: ** Helper function for fts3ExprIterate() (see below). ! 137: */ ! 138: static int fts3ExprIterate2( ! 139: Fts3Expr *pExpr, /* Expression to iterate phrases of */ ! 140: int *piPhrase, /* Pointer to phrase counter */ ! 141: int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ ! 142: void *pCtx /* Second argument to pass to callback */ ! 143: ){ ! 144: int rc; /* Return code */ ! 145: int eType = pExpr->eType; /* Type of expression node pExpr */ ! 146: ! 147: if( eType!=FTSQUERY_PHRASE ){ ! 148: assert( pExpr->pLeft && pExpr->pRight ); ! 149: rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); ! 150: if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ ! 151: rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); ! 152: } ! 153: }else{ ! 154: rc = x(pExpr, *piPhrase, pCtx); ! 155: (*piPhrase)++; ! 156: } ! 157: return rc; ! 158: } ! 159: ! 160: /* ! 161: ** Iterate through all phrase nodes in an FTS3 query, except those that ! 162: ** are part of a sub-tree that is the right-hand-side of a NOT operator. ! 163: ** For each phrase node found, the supplied callback function is invoked. ! 164: ** ! 165: ** If the callback function returns anything other than SQLITE_OK, ! 166: ** the iteration is abandoned and the error code returned immediately. ! 167: ** Otherwise, SQLITE_OK is returned after a callback has been made for ! 168: ** all eligible phrase nodes. ! 169: */ ! 170: static int fts3ExprIterate( ! 171: Fts3Expr *pExpr, /* Expression to iterate phrases of */ ! 172: int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ ! 173: void *pCtx /* Second argument to pass to callback */ ! 174: ){ ! 175: int iPhrase = 0; /* Variable used as the phrase counter */ ! 176: return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); ! 177: } ! 178: ! 179: /* ! 180: ** This is an fts3ExprIterate() callback used while loading the doclists ! 181: ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also ! 182: ** fts3ExprLoadDoclists(). ! 183: */ ! 184: static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ ! 185: int rc = SQLITE_OK; ! 186: Fts3Phrase *pPhrase = pExpr->pPhrase; ! 187: LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; ! 188: ! 189: UNUSED_PARAMETER(iPhrase); ! 190: ! 191: p->nPhrase++; ! 192: p->nToken += pPhrase->nToken; ! 193: ! 194: return rc; ! 195: } ! 196: ! 197: /* ! 198: ** Load the doclists for each phrase in the query associated with FTS3 cursor ! 199: ** pCsr. ! 200: ** ! 201: ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable ! 202: ** phrases in the expression (all phrases except those directly or ! 203: ** indirectly descended from the right-hand-side of a NOT operator). If ! 204: ** pnToken is not NULL, then it is set to the number of tokens in all ! 205: ** matchable phrases of the expression. ! 206: */ ! 207: static int fts3ExprLoadDoclists( ! 208: Fts3Cursor *pCsr, /* Fts3 cursor for current query */ ! 209: int *pnPhrase, /* OUT: Number of phrases in query */ ! 210: int *pnToken /* OUT: Number of tokens in query */ ! 211: ){ ! 212: int rc; /* Return Code */ ! 213: LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ ! 214: sCtx.pCsr = pCsr; ! 215: rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); ! 216: if( pnPhrase ) *pnPhrase = sCtx.nPhrase; ! 217: if( pnToken ) *pnToken = sCtx.nToken; ! 218: return rc; ! 219: } ! 220: ! 221: static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ ! 222: (*(int *)ctx)++; ! 223: UNUSED_PARAMETER(pExpr); ! 224: UNUSED_PARAMETER(iPhrase); ! 225: return SQLITE_OK; ! 226: } ! 227: static int fts3ExprPhraseCount(Fts3Expr *pExpr){ ! 228: int nPhrase = 0; ! 229: (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); ! 230: return nPhrase; ! 231: } ! 232: ! 233: /* ! 234: ** Advance the position list iterator specified by the first two ! 235: ** arguments so that it points to the first element with a value greater ! 236: ** than or equal to parameter iNext. ! 237: */ ! 238: static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ ! 239: char *pIter = *ppIter; ! 240: if( pIter ){ ! 241: int iIter = *piIter; ! 242: ! 243: while( iIter<iNext ){ ! 244: if( 0==(*pIter & 0xFE) ){ ! 245: iIter = -1; ! 246: pIter = 0; ! 247: break; ! 248: } ! 249: fts3GetDeltaPosition(&pIter, &iIter); ! 250: } ! 251: ! 252: *piIter = iIter; ! 253: *ppIter = pIter; ! 254: } ! 255: } ! 256: ! 257: /* ! 258: ** Advance the snippet iterator to the next candidate snippet. ! 259: */ ! 260: static int fts3SnippetNextCandidate(SnippetIter *pIter){ ! 261: int i; /* Loop counter */ ! 262: ! 263: if( pIter->iCurrent<0 ){ ! 264: /* The SnippetIter object has just been initialized. The first snippet ! 265: ** candidate always starts at offset 0 (even if this candidate has a ! 266: ** score of 0.0). ! 267: */ ! 268: pIter->iCurrent = 0; ! 269: ! 270: /* Advance the 'head' iterator of each phrase to the first offset that ! 271: ** is greater than or equal to (iNext+nSnippet). ! 272: */ ! 273: for(i=0; i<pIter->nPhrase; i++){ ! 274: SnippetPhrase *pPhrase = &pIter->aPhrase[i]; ! 275: fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); ! 276: } ! 277: }else{ ! 278: int iStart; ! 279: int iEnd = 0x7FFFFFFF; ! 280: ! 281: for(i=0; i<pIter->nPhrase; i++){ ! 282: SnippetPhrase *pPhrase = &pIter->aPhrase[i]; ! 283: if( pPhrase->pHead && pPhrase->iHead<iEnd ){ ! 284: iEnd = pPhrase->iHead; ! 285: } ! 286: } ! 287: if( iEnd==0x7FFFFFFF ){ ! 288: return 1; ! 289: } ! 290: ! 291: pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; ! 292: for(i=0; i<pIter->nPhrase; i++){ ! 293: SnippetPhrase *pPhrase = &pIter->aPhrase[i]; ! 294: fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); ! 295: fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); ! 296: } ! 297: } ! 298: ! 299: return 0; ! 300: } ! 301: ! 302: /* ! 303: ** Retrieve information about the current candidate snippet of snippet ! 304: ** iterator pIter. ! 305: */ ! 306: static void fts3SnippetDetails( ! 307: SnippetIter *pIter, /* Snippet iterator */ ! 308: u64 mCovered, /* Bitmask of phrases already covered */ ! 309: int *piToken, /* OUT: First token of proposed snippet */ ! 310: int *piScore, /* OUT: "Score" for this snippet */ ! 311: u64 *pmCover, /* OUT: Bitmask of phrases covered */ ! 312: u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ ! 313: ){ ! 314: int iStart = pIter->iCurrent; /* First token of snippet */ ! 315: int iScore = 0; /* Score of this snippet */ ! 316: int i; /* Loop counter */ ! 317: u64 mCover = 0; /* Mask of phrases covered by this snippet */ ! 318: u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ ! 319: ! 320: for(i=0; i<pIter->nPhrase; i++){ ! 321: SnippetPhrase *pPhrase = &pIter->aPhrase[i]; ! 322: if( pPhrase->pTail ){ ! 323: char *pCsr = pPhrase->pTail; ! 324: int iCsr = pPhrase->iTail; ! 325: ! 326: while( iCsr<(iStart+pIter->nSnippet) ){ ! 327: int j; ! 328: u64 mPhrase = (u64)1 << i; ! 329: u64 mPos = (u64)1 << (iCsr - iStart); ! 330: assert( iCsr>=iStart ); ! 331: if( (mCover|mCovered)&mPhrase ){ ! 332: iScore++; ! 333: }else{ ! 334: iScore += 1000; ! 335: } ! 336: mCover |= mPhrase; ! 337: ! 338: for(j=0; j<pPhrase->nToken; j++){ ! 339: mHighlight |= (mPos>>j); ! 340: } ! 341: ! 342: if( 0==(*pCsr & 0x0FE) ) break; ! 343: fts3GetDeltaPosition(&pCsr, &iCsr); ! 344: } ! 345: } ! 346: } ! 347: ! 348: /* Set the output variables before returning. */ ! 349: *piToken = iStart; ! 350: *piScore = iScore; ! 351: *pmCover = mCover; ! 352: *pmHighlight = mHighlight; ! 353: } ! 354: ! 355: /* ! 356: ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). ! 357: ** Each invocation populates an element of the SnippetIter.aPhrase[] array. ! 358: */ ! 359: static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ ! 360: SnippetIter *p = (SnippetIter *)ctx; ! 361: SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; ! 362: char *pCsr; ! 363: ! 364: pPhrase->nToken = pExpr->pPhrase->nToken; ! 365: ! 366: pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); ! 367: if( pCsr ){ ! 368: int iFirst = 0; ! 369: pPhrase->pList = pCsr; ! 370: fts3GetDeltaPosition(&pCsr, &iFirst); ! 371: assert( iFirst>=0 ); ! 372: pPhrase->pHead = pCsr; ! 373: pPhrase->pTail = pCsr; ! 374: pPhrase->iHead = iFirst; ! 375: pPhrase->iTail = iFirst; ! 376: }else{ ! 377: assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); ! 378: } ! 379: ! 380: return SQLITE_OK; ! 381: } ! 382: ! 383: /* ! 384: ** Select the fragment of text consisting of nFragment contiguous tokens ! 385: ** from column iCol that represent the "best" snippet. The best snippet ! 386: ** is the snippet with the highest score, where scores are calculated ! 387: ** by adding: ! 388: ** ! 389: ** (a) +1 point for each occurence of a matchable phrase in the snippet. ! 390: ** ! 391: ** (b) +1000 points for the first occurence of each matchable phrase in ! 392: ** the snippet for which the corresponding mCovered bit is not set. ! 393: ** ! 394: ** The selected snippet parameters are stored in structure *pFragment before ! 395: ** returning. The score of the selected snippet is stored in *piScore ! 396: ** before returning. ! 397: */ ! 398: static int fts3BestSnippet( ! 399: int nSnippet, /* Desired snippet length */ ! 400: Fts3Cursor *pCsr, /* Cursor to create snippet for */ ! 401: int iCol, /* Index of column to create snippet from */ ! 402: u64 mCovered, /* Mask of phrases already covered */ ! 403: u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ ! 404: SnippetFragment *pFragment, /* OUT: Best snippet found */ ! 405: int *piScore /* OUT: Score of snippet pFragment */ ! 406: ){ ! 407: int rc; /* Return Code */ ! 408: int nList; /* Number of phrases in expression */ ! 409: SnippetIter sIter; /* Iterates through snippet candidates */ ! 410: int nByte; /* Number of bytes of space to allocate */ ! 411: int iBestScore = -1; /* Best snippet score found so far */ ! 412: int i; /* Loop counter */ ! 413: ! 414: memset(&sIter, 0, sizeof(sIter)); ! 415: ! 416: /* Iterate through the phrases in the expression to count them. The same ! 417: ** callback makes sure the doclists are loaded for each phrase. ! 418: */ ! 419: rc = fts3ExprLoadDoclists(pCsr, &nList, 0); ! 420: if( rc!=SQLITE_OK ){ ! 421: return rc; ! 422: } ! 423: ! 424: /* Now that it is known how many phrases there are, allocate and zero ! 425: ** the required space using malloc(). ! 426: */ ! 427: nByte = sizeof(SnippetPhrase) * nList; ! 428: sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); ! 429: if( !sIter.aPhrase ){ ! 430: return SQLITE_NOMEM; ! 431: } ! 432: memset(sIter.aPhrase, 0, nByte); ! 433: ! 434: /* Initialize the contents of the SnippetIter object. Then iterate through ! 435: ** the set of phrases in the expression to populate the aPhrase[] array. ! 436: */ ! 437: sIter.pCsr = pCsr; ! 438: sIter.iCol = iCol; ! 439: sIter.nSnippet = nSnippet; ! 440: sIter.nPhrase = nList; ! 441: sIter.iCurrent = -1; ! 442: (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); ! 443: ! 444: /* Set the *pmSeen output variable. */ ! 445: for(i=0; i<nList; i++){ ! 446: if( sIter.aPhrase[i].pHead ){ ! 447: *pmSeen |= (u64)1 << i; ! 448: } ! 449: } ! 450: ! 451: /* Loop through all candidate snippets. Store the best snippet in ! 452: ** *pFragment. Store its associated 'score' in iBestScore. ! 453: */ ! 454: pFragment->iCol = iCol; ! 455: while( !fts3SnippetNextCandidate(&sIter) ){ ! 456: int iPos; ! 457: int iScore; ! 458: u64 mCover; ! 459: u64 mHighlight; ! 460: fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight); ! 461: assert( iScore>=0 ); ! 462: if( iScore>iBestScore ){ ! 463: pFragment->iPos = iPos; ! 464: pFragment->hlmask = mHighlight; ! 465: pFragment->covered = mCover; ! 466: iBestScore = iScore; ! 467: } ! 468: } ! 469: ! 470: sqlite3_free(sIter.aPhrase); ! 471: *piScore = iBestScore; ! 472: return SQLITE_OK; ! 473: } ! 474: ! 475: ! 476: /* ! 477: ** Append a string to the string-buffer passed as the first argument. ! 478: ** ! 479: ** If nAppend is negative, then the length of the string zAppend is ! 480: ** determined using strlen(). ! 481: */ ! 482: static int fts3StringAppend( ! 483: StrBuffer *pStr, /* Buffer to append to */ ! 484: const char *zAppend, /* Pointer to data to append to buffer */ ! 485: int nAppend /* Size of zAppend in bytes (or -1) */ ! 486: ){ ! 487: if( nAppend<0 ){ ! 488: nAppend = (int)strlen(zAppend); ! 489: } ! 490: ! 491: /* If there is insufficient space allocated at StrBuffer.z, use realloc() ! 492: ** to grow the buffer until so that it is big enough to accomadate the ! 493: ** appended data. ! 494: */ ! 495: if( pStr->n+nAppend+1>=pStr->nAlloc ){ ! 496: int nAlloc = pStr->nAlloc+nAppend+100; ! 497: char *zNew = sqlite3_realloc(pStr->z, nAlloc); ! 498: if( !zNew ){ ! 499: return SQLITE_NOMEM; ! 500: } ! 501: pStr->z = zNew; ! 502: pStr->nAlloc = nAlloc; ! 503: } ! 504: ! 505: /* Append the data to the string buffer. */ ! 506: memcpy(&pStr->z[pStr->n], zAppend, nAppend); ! 507: pStr->n += nAppend; ! 508: pStr->z[pStr->n] = '\0'; ! 509: ! 510: return SQLITE_OK; ! 511: } ! 512: ! 513: /* ! 514: ** The fts3BestSnippet() function often selects snippets that end with a ! 515: ** query term. That is, the final term of the snippet is always a term ! 516: ** that requires highlighting. For example, if 'X' is a highlighted term ! 517: ** and '.' is a non-highlighted term, BestSnippet() may select: ! 518: ** ! 519: ** ........X.....X ! 520: ** ! 521: ** This function "shifts" the beginning of the snippet forward in the ! 522: ** document so that there are approximately the same number of ! 523: ** non-highlighted terms to the right of the final highlighted term as there ! 524: ** are to the left of the first highlighted term. For example, to this: ! 525: ** ! 526: ** ....X.....X.... ! 527: ** ! 528: ** This is done as part of extracting the snippet text, not when selecting ! 529: ** the snippet. Snippet selection is done based on doclists only, so there ! 530: ** is no way for fts3BestSnippet() to know whether or not the document ! 531: ** actually contains terms that follow the final highlighted term. ! 532: */ ! 533: static int fts3SnippetShift( ! 534: Fts3Table *pTab, /* FTS3 table snippet comes from */ ! 535: int nSnippet, /* Number of tokens desired for snippet */ ! 536: const char *zDoc, /* Document text to extract snippet from */ ! 537: int nDoc, /* Size of buffer zDoc in bytes */ ! 538: int *piPos, /* IN/OUT: First token of snippet */ ! 539: u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ ! 540: ){ ! 541: u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ ! 542: ! 543: if( hlmask ){ ! 544: int nLeft; /* Tokens to the left of first highlight */ ! 545: int nRight; /* Tokens to the right of last highlight */ ! 546: int nDesired; /* Ideal number of tokens to shift forward */ ! 547: ! 548: for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); ! 549: for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); ! 550: nDesired = (nLeft-nRight)/2; ! 551: ! 552: /* Ideally, the start of the snippet should be pushed forward in the ! 553: ** document nDesired tokens. This block checks if there are actually ! 554: ** nDesired tokens to the right of the snippet. If so, *piPos and ! 555: ** *pHlMask are updated to shift the snippet nDesired tokens to the ! 556: ** right. Otherwise, the snippet is shifted by the number of tokens ! 557: ** available. ! 558: */ ! 559: if( nDesired>0 ){ ! 560: int nShift; /* Number of tokens to shift snippet by */ ! 561: int iCurrent = 0; /* Token counter */ ! 562: int rc; /* Return Code */ ! 563: sqlite3_tokenizer_module *pMod; ! 564: sqlite3_tokenizer_cursor *pC; ! 565: pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; ! 566: ! 567: /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) ! 568: ** or more tokens in zDoc/nDoc. ! 569: */ ! 570: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); ! 571: if( rc!=SQLITE_OK ){ ! 572: return rc; ! 573: } ! 574: pC->pTokenizer = pTab->pTokenizer; ! 575: while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ ! 576: const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; ! 577: rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); ! 578: } ! 579: pMod->xClose(pC); ! 580: if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } ! 581: ! 582: nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; ! 583: assert( nShift<=nDesired ); ! 584: if( nShift>0 ){ ! 585: *piPos += nShift; ! 586: *pHlmask = hlmask >> nShift; ! 587: } ! 588: } ! 589: } ! 590: return SQLITE_OK; ! 591: } ! 592: ! 593: /* ! 594: ** Extract the snippet text for fragment pFragment from cursor pCsr and ! 595: ** append it to string buffer pOut. ! 596: */ ! 597: static int fts3SnippetText( ! 598: Fts3Cursor *pCsr, /* FTS3 Cursor */ ! 599: SnippetFragment *pFragment, /* Snippet to extract */ ! 600: int iFragment, /* Fragment number */ ! 601: int isLast, /* True for final fragment in snippet */ ! 602: int nSnippet, /* Number of tokens in extracted snippet */ ! 603: const char *zOpen, /* String inserted before highlighted term */ ! 604: const char *zClose, /* String inserted after highlighted term */ ! 605: const char *zEllipsis, /* String inserted between snippets */ ! 606: StrBuffer *pOut /* Write output here */ ! 607: ){ ! 608: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 609: int rc; /* Return code */ ! 610: const char *zDoc; /* Document text to extract snippet from */ ! 611: int nDoc; /* Size of zDoc in bytes */ ! 612: int iCurrent = 0; /* Current token number of document */ ! 613: int iEnd = 0; /* Byte offset of end of current token */ ! 614: int isShiftDone = 0; /* True after snippet is shifted */ ! 615: int iPos = pFragment->iPos; /* First token of snippet */ ! 616: u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ ! 617: int iCol = pFragment->iCol+1; /* Query column to extract text from */ ! 618: sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ ! 619: sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ ! 620: const char *ZDUMMY; /* Dummy argument used with tokenizer */ ! 621: int DUMMY1; /* Dummy argument used with tokenizer */ ! 622: ! 623: zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); ! 624: if( zDoc==0 ){ ! 625: if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ ! 626: return SQLITE_NOMEM; ! 627: } ! 628: return SQLITE_OK; ! 629: } ! 630: nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); ! 631: ! 632: /* Open a token cursor on the document. */ ! 633: pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; ! 634: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); ! 635: if( rc!=SQLITE_OK ){ ! 636: return rc; ! 637: } ! 638: pC->pTokenizer = pTab->pTokenizer; ! 639: ! 640: while( rc==SQLITE_OK ){ ! 641: int iBegin; /* Offset in zDoc of start of token */ ! 642: int iFin; /* Offset in zDoc of end of token */ ! 643: int isHighlight; /* True for highlighted terms */ ! 644: ! 645: rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); ! 646: if( rc!=SQLITE_OK ){ ! 647: if( rc==SQLITE_DONE ){ ! 648: /* Special case - the last token of the snippet is also the last token ! 649: ** of the column. Append any punctuation that occurred between the end ! 650: ** of the previous token and the end of the document to the output. ! 651: ** Then break out of the loop. */ ! 652: rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); ! 653: } ! 654: break; ! 655: } ! 656: if( iCurrent<iPos ){ continue; } ! 657: ! 658: if( !isShiftDone ){ ! 659: int n = nDoc - iBegin; ! 660: rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); ! 661: isShiftDone = 1; ! 662: ! 663: /* Now that the shift has been done, check if the initial "..." are ! 664: ** required. They are required if (a) this is not the first fragment, ! 665: ** or (b) this fragment does not begin at position 0 of its column. ! 666: */ ! 667: if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ ! 668: rc = fts3StringAppend(pOut, zEllipsis, -1); ! 669: } ! 670: if( rc!=SQLITE_OK || iCurrent<iPos ) continue; ! 671: } ! 672: ! 673: if( iCurrent>=(iPos+nSnippet) ){ ! 674: if( isLast ){ ! 675: rc = fts3StringAppend(pOut, zEllipsis, -1); ! 676: } ! 677: break; ! 678: } ! 679: ! 680: /* Set isHighlight to true if this term should be highlighted. */ ! 681: isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; ! 682: ! 683: if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); ! 684: if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); ! 685: if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); ! 686: if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); ! 687: ! 688: iEnd = iFin; ! 689: } ! 690: ! 691: pMod->xClose(pC); ! 692: return rc; ! 693: } ! 694: ! 695: ! 696: /* ! 697: ** This function is used to count the entries in a column-list (a ! 698: ** delta-encoded list of term offsets within a single column of a single ! 699: ** row). When this function is called, *ppCollist should point to the ! 700: ** beginning of the first varint in the column-list (the varint that ! 701: ** contains the position of the first matching term in the column data). ! 702: ** Before returning, *ppCollist is set to point to the first byte after ! 703: ** the last varint in the column-list (either the 0x00 signifying the end ! 704: ** of the position-list, or the 0x01 that precedes the column number of ! 705: ** the next column in the position-list). ! 706: ** ! 707: ** The number of elements in the column-list is returned. ! 708: */ ! 709: static int fts3ColumnlistCount(char **ppCollist){ ! 710: char *pEnd = *ppCollist; ! 711: char c = 0; ! 712: int nEntry = 0; ! 713: ! 714: /* A column-list is terminated by either a 0x01 or 0x00. */ ! 715: while( 0xFE & (*pEnd | c) ){ ! 716: c = *pEnd++ & 0x80; ! 717: if( !c ) nEntry++; ! 718: } ! 719: ! 720: *ppCollist = pEnd; ! 721: return nEntry; ! 722: } ! 723: ! 724: /* ! 725: ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ! 726: ** for a single query. ! 727: ** ! 728: ** fts3ExprIterate() callback to load the 'global' elements of a ! 729: ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements ! 730: ** of the matchinfo array that are constant for all rows returned by the ! 731: ** current query. ! 732: ** ! 733: ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This ! 734: ** function populates Matchinfo.aMatchinfo[] as follows: ! 735: ** ! 736: ** for(iCol=0; iCol<nCol; iCol++){ ! 737: ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; ! 738: ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; ! 739: ** } ! 740: ** ! 741: ** where X is the number of matches for phrase iPhrase is column iCol of all ! 742: ** rows of the table. Y is the number of rows for which column iCol contains ! 743: ** at least one instance of phrase iPhrase. ! 744: ** ! 745: ** If the phrase pExpr consists entirely of deferred tokens, then all X and ! 746: ** Y values are set to nDoc, where nDoc is the number of documents in the ! 747: ** file system. This is done because the full-text index doclist is required ! 748: ** to calculate these values properly, and the full-text index doclist is ! 749: ** not available for deferred tokens. ! 750: */ ! 751: static int fts3ExprGlobalHitsCb( ! 752: Fts3Expr *pExpr, /* Phrase expression node */ ! 753: int iPhrase, /* Phrase number (numbered from zero) */ ! 754: void *pCtx /* Pointer to MatchInfo structure */ ! 755: ){ ! 756: MatchInfo *p = (MatchInfo *)pCtx; ! 757: return sqlite3Fts3EvalPhraseStats( ! 758: p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] ! 759: ); ! 760: } ! 761: ! 762: /* ! 763: ** fts3ExprIterate() callback used to collect the "local" part of the ! 764: ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the ! 765: ** array that are different for each row returned by the query. ! 766: */ ! 767: static int fts3ExprLocalHitsCb( ! 768: Fts3Expr *pExpr, /* Phrase expression node */ ! 769: int iPhrase, /* Phrase number */ ! 770: void *pCtx /* Pointer to MatchInfo structure */ ! 771: ){ ! 772: MatchInfo *p = (MatchInfo *)pCtx; ! 773: int iStart = iPhrase * p->nCol * 3; ! 774: int i; ! 775: ! 776: for(i=0; i<p->nCol; i++){ ! 777: char *pCsr; ! 778: pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i); ! 779: if( pCsr ){ ! 780: p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); ! 781: }else{ ! 782: p->aMatchinfo[iStart+i*3] = 0; ! 783: } ! 784: } ! 785: ! 786: return SQLITE_OK; ! 787: } ! 788: ! 789: static int fts3MatchinfoCheck( ! 790: Fts3Table *pTab, ! 791: char cArg, ! 792: char **pzErr ! 793: ){ ! 794: if( (cArg==FTS3_MATCHINFO_NPHRASE) ! 795: || (cArg==FTS3_MATCHINFO_NCOL) ! 796: || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) ! 797: || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) ! 798: || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) ! 799: || (cArg==FTS3_MATCHINFO_LCS) ! 800: || (cArg==FTS3_MATCHINFO_HITS) ! 801: ){ ! 802: return SQLITE_OK; ! 803: } ! 804: *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); ! 805: return SQLITE_ERROR; ! 806: } ! 807: ! 808: static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ ! 809: int nVal; /* Number of integers output by cArg */ ! 810: ! 811: switch( cArg ){ ! 812: case FTS3_MATCHINFO_NDOC: ! 813: case FTS3_MATCHINFO_NPHRASE: ! 814: case FTS3_MATCHINFO_NCOL: ! 815: nVal = 1; ! 816: break; ! 817: ! 818: case FTS3_MATCHINFO_AVGLENGTH: ! 819: case FTS3_MATCHINFO_LENGTH: ! 820: case FTS3_MATCHINFO_LCS: ! 821: nVal = pInfo->nCol; ! 822: break; ! 823: ! 824: default: ! 825: assert( cArg==FTS3_MATCHINFO_HITS ); ! 826: nVal = pInfo->nCol * pInfo->nPhrase * 3; ! 827: break; ! 828: } ! 829: ! 830: return nVal; ! 831: } ! 832: ! 833: static int fts3MatchinfoSelectDoctotal( ! 834: Fts3Table *pTab, ! 835: sqlite3_stmt **ppStmt, ! 836: sqlite3_int64 *pnDoc, ! 837: const char **paLen ! 838: ){ ! 839: sqlite3_stmt *pStmt; ! 840: const char *a; ! 841: sqlite3_int64 nDoc; ! 842: ! 843: if( !*ppStmt ){ ! 844: int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); ! 845: if( rc!=SQLITE_OK ) return rc; ! 846: } ! 847: pStmt = *ppStmt; ! 848: assert( sqlite3_data_count(pStmt)==1 ); ! 849: ! 850: a = sqlite3_column_blob(pStmt, 0); ! 851: a += sqlite3Fts3GetVarint(a, &nDoc); ! 852: if( nDoc==0 ) return FTS_CORRUPT_VTAB; ! 853: *pnDoc = (u32)nDoc; ! 854: ! 855: if( paLen ) *paLen = a; ! 856: return SQLITE_OK; ! 857: } ! 858: ! 859: /* ! 860: ** An instance of the following structure is used to store state while ! 861: ** iterating through a multi-column position-list corresponding to the ! 862: ** hits for a single phrase on a single row in order to calculate the ! 863: ** values for a matchinfo() FTS3_MATCHINFO_LCS request. ! 864: */ ! 865: typedef struct LcsIterator LcsIterator; ! 866: struct LcsIterator { ! 867: Fts3Expr *pExpr; /* Pointer to phrase expression */ ! 868: int iPosOffset; /* Tokens count up to end of this phrase */ ! 869: char *pRead; /* Cursor used to iterate through aDoclist */ ! 870: int iPos; /* Current position */ ! 871: }; ! 872: ! 873: /* ! 874: ** If LcsIterator.iCol is set to the following value, the iterator has ! 875: ** finished iterating through all offsets for all columns. ! 876: */ ! 877: #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; ! 878: ! 879: static int fts3MatchinfoLcsCb( ! 880: Fts3Expr *pExpr, /* Phrase expression node */ ! 881: int iPhrase, /* Phrase number (numbered from zero) */ ! 882: void *pCtx /* Pointer to MatchInfo structure */ ! 883: ){ ! 884: LcsIterator *aIter = (LcsIterator *)pCtx; ! 885: aIter[iPhrase].pExpr = pExpr; ! 886: return SQLITE_OK; ! 887: } ! 888: ! 889: /* ! 890: ** Advance the iterator passed as an argument to the next position. Return ! 891: ** 1 if the iterator is at EOF or if it now points to the start of the ! 892: ** position list for the next column. ! 893: */ ! 894: static int fts3LcsIteratorAdvance(LcsIterator *pIter){ ! 895: char *pRead = pIter->pRead; ! 896: sqlite3_int64 iRead; ! 897: int rc = 0; ! 898: ! 899: pRead += sqlite3Fts3GetVarint(pRead, &iRead); ! 900: if( iRead==0 || iRead==1 ){ ! 901: pRead = 0; ! 902: rc = 1; ! 903: }else{ ! 904: pIter->iPos += (int)(iRead-2); ! 905: } ! 906: ! 907: pIter->pRead = pRead; ! 908: return rc; ! 909: } ! 910: ! 911: /* ! 912: ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. ! 913: ** ! 914: ** If the call is successful, the longest-common-substring lengths for each ! 915: ** column are written into the first nCol elements of the pInfo->aMatchinfo[] ! 916: ** array before returning. SQLITE_OK is returned in this case. ! 917: ** ! 918: ** Otherwise, if an error occurs, an SQLite error code is returned and the ! 919: ** data written to the first nCol elements of pInfo->aMatchinfo[] is ! 920: ** undefined. ! 921: */ ! 922: static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ ! 923: LcsIterator *aIter; ! 924: int i; ! 925: int iCol; ! 926: int nToken = 0; ! 927: ! 928: /* Allocate and populate the array of LcsIterator objects. The array ! 929: ** contains one element for each matchable phrase in the query. ! 930: **/ ! 931: aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); ! 932: if( !aIter ) return SQLITE_NOMEM; ! 933: memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); ! 934: (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); ! 935: ! 936: for(i=0; i<pInfo->nPhrase; i++){ ! 937: LcsIterator *pIter = &aIter[i]; ! 938: nToken -= pIter->pExpr->pPhrase->nToken; ! 939: pIter->iPosOffset = nToken; ! 940: } ! 941: ! 942: for(iCol=0; iCol<pInfo->nCol; iCol++){ ! 943: int nLcs = 0; /* LCS value for this column */ ! 944: int nLive = 0; /* Number of iterators in aIter not at EOF */ ! 945: ! 946: for(i=0; i<pInfo->nPhrase; i++){ ! 947: LcsIterator *pIt = &aIter[i]; ! 948: pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol); ! 949: if( pIt->pRead ){ ! 950: pIt->iPos = pIt->iPosOffset; ! 951: fts3LcsIteratorAdvance(&aIter[i]); ! 952: nLive++; ! 953: } ! 954: } ! 955: ! 956: while( nLive>0 ){ ! 957: LcsIterator *pAdv = 0; /* The iterator to advance by one position */ ! 958: int nThisLcs = 0; /* LCS for the current iterator positions */ ! 959: ! 960: for(i=0; i<pInfo->nPhrase; i++){ ! 961: LcsIterator *pIter = &aIter[i]; ! 962: if( pIter->pRead==0 ){ ! 963: /* This iterator is already at EOF for this column. */ ! 964: nThisLcs = 0; ! 965: }else{ ! 966: if( pAdv==0 || pIter->iPos<pAdv->iPos ){ ! 967: pAdv = pIter; ! 968: } ! 969: if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ ! 970: nThisLcs++; ! 971: }else{ ! 972: nThisLcs = 1; ! 973: } ! 974: if( nThisLcs>nLcs ) nLcs = nThisLcs; ! 975: } ! 976: } ! 977: if( fts3LcsIteratorAdvance(pAdv) ) nLive--; ! 978: } ! 979: ! 980: pInfo->aMatchinfo[iCol] = nLcs; ! 981: } ! 982: ! 983: sqlite3_free(aIter); ! 984: return SQLITE_OK; ! 985: } ! 986: ! 987: /* ! 988: ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to ! 989: ** be returned by the matchinfo() function. Argument zArg contains the ! 990: ** format string passed as the second argument to matchinfo (or the ! 991: ** default value "pcx" if no second argument was specified). The format ! 992: ** string has already been validated and the pInfo->aMatchinfo[] array ! 993: ** is guaranteed to be large enough for the output. ! 994: ** ! 995: ** If bGlobal is true, then populate all fields of the matchinfo() output. ! 996: ** If it is false, then assume that those fields that do not change between ! 997: ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) ! 998: ** have already been populated. ! 999: ** ! 1000: ** Return SQLITE_OK if successful, or an SQLite error code if an error ! 1001: ** occurs. If a value other than SQLITE_OK is returned, the state the ! 1002: ** pInfo->aMatchinfo[] buffer is left in is undefined. ! 1003: */ ! 1004: static int fts3MatchinfoValues( ! 1005: Fts3Cursor *pCsr, /* FTS3 cursor object */ ! 1006: int bGlobal, /* True to grab the global stats */ ! 1007: MatchInfo *pInfo, /* Matchinfo context object */ ! 1008: const char *zArg /* Matchinfo format string */ ! 1009: ){ ! 1010: int rc = SQLITE_OK; ! 1011: int i; ! 1012: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 1013: sqlite3_stmt *pSelect = 0; ! 1014: ! 1015: for(i=0; rc==SQLITE_OK && zArg[i]; i++){ ! 1016: ! 1017: switch( zArg[i] ){ ! 1018: case FTS3_MATCHINFO_NPHRASE: ! 1019: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; ! 1020: break; ! 1021: ! 1022: case FTS3_MATCHINFO_NCOL: ! 1023: if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; ! 1024: break; ! 1025: ! 1026: case FTS3_MATCHINFO_NDOC: ! 1027: if( bGlobal ){ ! 1028: sqlite3_int64 nDoc = 0; ! 1029: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); ! 1030: pInfo->aMatchinfo[0] = (u32)nDoc; ! 1031: } ! 1032: break; ! 1033: ! 1034: case FTS3_MATCHINFO_AVGLENGTH: ! 1035: if( bGlobal ){ ! 1036: sqlite3_int64 nDoc; /* Number of rows in table */ ! 1037: const char *a; /* Aggregate column length array */ ! 1038: ! 1039: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); ! 1040: if( rc==SQLITE_OK ){ ! 1041: int iCol; ! 1042: for(iCol=0; iCol<pInfo->nCol; iCol++){ ! 1043: u32 iVal; ! 1044: sqlite3_int64 nToken; ! 1045: a += sqlite3Fts3GetVarint(a, &nToken); ! 1046: iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); ! 1047: pInfo->aMatchinfo[iCol] = iVal; ! 1048: } ! 1049: } ! 1050: } ! 1051: break; ! 1052: ! 1053: case FTS3_MATCHINFO_LENGTH: { ! 1054: sqlite3_stmt *pSelectDocsize = 0; ! 1055: rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); ! 1056: if( rc==SQLITE_OK ){ ! 1057: int iCol; ! 1058: const char *a = sqlite3_column_blob(pSelectDocsize, 0); ! 1059: for(iCol=0; iCol<pInfo->nCol; iCol++){ ! 1060: sqlite3_int64 nToken; ! 1061: a += sqlite3Fts3GetVarint(a, &nToken); ! 1062: pInfo->aMatchinfo[iCol] = (u32)nToken; ! 1063: } ! 1064: } ! 1065: sqlite3_reset(pSelectDocsize); ! 1066: break; ! 1067: } ! 1068: ! 1069: case FTS3_MATCHINFO_LCS: ! 1070: rc = fts3ExprLoadDoclists(pCsr, 0, 0); ! 1071: if( rc==SQLITE_OK ){ ! 1072: rc = fts3MatchinfoLcs(pCsr, pInfo); ! 1073: } ! 1074: break; ! 1075: ! 1076: default: { ! 1077: Fts3Expr *pExpr; ! 1078: assert( zArg[i]==FTS3_MATCHINFO_HITS ); ! 1079: pExpr = pCsr->pExpr; ! 1080: rc = fts3ExprLoadDoclists(pCsr, 0, 0); ! 1081: if( rc!=SQLITE_OK ) break; ! 1082: if( bGlobal ){ ! 1083: if( pCsr->pDeferred ){ ! 1084: rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); ! 1085: if( rc!=SQLITE_OK ) break; ! 1086: } ! 1087: rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); ! 1088: if( rc!=SQLITE_OK ) break; ! 1089: } ! 1090: (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); ! 1091: break; ! 1092: } ! 1093: } ! 1094: ! 1095: pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); ! 1096: } ! 1097: ! 1098: sqlite3_reset(pSelect); ! 1099: return rc; ! 1100: } ! 1101: ! 1102: ! 1103: /* ! 1104: ** Populate pCsr->aMatchinfo[] with data for the current row. The ! 1105: ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). ! 1106: */ ! 1107: static int fts3GetMatchinfo( ! 1108: Fts3Cursor *pCsr, /* FTS3 Cursor object */ ! 1109: const char *zArg /* Second argument to matchinfo() function */ ! 1110: ){ ! 1111: MatchInfo sInfo; ! 1112: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 1113: int rc = SQLITE_OK; ! 1114: int bGlobal = 0; /* Collect 'global' stats as well as local */ ! 1115: ! 1116: memset(&sInfo, 0, sizeof(MatchInfo)); ! 1117: sInfo.pCursor = pCsr; ! 1118: sInfo.nCol = pTab->nColumn; ! 1119: ! 1120: /* If there is cached matchinfo() data, but the format string for the ! 1121: ** cache does not match the format string for this request, discard ! 1122: ** the cached data. */ ! 1123: if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ ! 1124: assert( pCsr->aMatchinfo ); ! 1125: sqlite3_free(pCsr->aMatchinfo); ! 1126: pCsr->zMatchinfo = 0; ! 1127: pCsr->aMatchinfo = 0; ! 1128: } ! 1129: ! 1130: /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the ! 1131: ** matchinfo function has been called for this query. In this case ! 1132: ** allocate the array used to accumulate the matchinfo data and ! 1133: ** initialize those elements that are constant for every row. ! 1134: */ ! 1135: if( pCsr->aMatchinfo==0 ){ ! 1136: int nMatchinfo = 0; /* Number of u32 elements in match-info */ ! 1137: int nArg; /* Bytes in zArg */ ! 1138: int i; /* Used to iterate through zArg */ ! 1139: ! 1140: /* Determine the number of phrases in the query */ ! 1141: pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); ! 1142: sInfo.nPhrase = pCsr->nPhrase; ! 1143: ! 1144: /* Determine the number of integers in the buffer returned by this call. */ ! 1145: for(i=0; zArg[i]; i++){ ! 1146: nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); ! 1147: } ! 1148: ! 1149: /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ ! 1150: nArg = (int)strlen(zArg); ! 1151: pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); ! 1152: if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; ! 1153: ! 1154: pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; ! 1155: pCsr->nMatchinfo = nMatchinfo; ! 1156: memcpy(pCsr->zMatchinfo, zArg, nArg+1); ! 1157: memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); ! 1158: pCsr->isMatchinfoNeeded = 1; ! 1159: bGlobal = 1; ! 1160: } ! 1161: ! 1162: sInfo.aMatchinfo = pCsr->aMatchinfo; ! 1163: sInfo.nPhrase = pCsr->nPhrase; ! 1164: if( pCsr->isMatchinfoNeeded ){ ! 1165: rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); ! 1166: pCsr->isMatchinfoNeeded = 0; ! 1167: } ! 1168: ! 1169: return rc; ! 1170: } ! 1171: ! 1172: /* ! 1173: ** Implementation of snippet() function. ! 1174: */ ! 1175: void sqlite3Fts3Snippet( ! 1176: sqlite3_context *pCtx, /* SQLite function call context */ ! 1177: Fts3Cursor *pCsr, /* Cursor object */ ! 1178: const char *zStart, /* Snippet start text - "<b>" */ ! 1179: const char *zEnd, /* Snippet end text - "</b>" */ ! 1180: const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ ! 1181: int iCol, /* Extract snippet from this column */ ! 1182: int nToken /* Approximate number of tokens in snippet */ ! 1183: ){ ! 1184: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 1185: int rc = SQLITE_OK; ! 1186: int i; ! 1187: StrBuffer res = {0, 0, 0}; ! 1188: ! 1189: /* The returned text includes up to four fragments of text extracted from ! 1190: ** the data in the current row. The first iteration of the for(...) loop ! 1191: ** below attempts to locate a single fragment of text nToken tokens in ! 1192: ** size that contains at least one instance of all phrases in the query ! 1193: ** expression that appear in the current row. If such a fragment of text ! 1194: ** cannot be found, the second iteration of the loop attempts to locate ! 1195: ** a pair of fragments, and so on. ! 1196: */ ! 1197: int nSnippet = 0; /* Number of fragments in this snippet */ ! 1198: SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ ! 1199: int nFToken = -1; /* Number of tokens in each fragment */ ! 1200: ! 1201: if( !pCsr->pExpr ){ ! 1202: sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); ! 1203: return; ! 1204: } ! 1205: ! 1206: for(nSnippet=1; 1; nSnippet++){ ! 1207: ! 1208: int iSnip; /* Loop counter 0..nSnippet-1 */ ! 1209: u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ ! 1210: u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ ! 1211: ! 1212: if( nToken>=0 ){ ! 1213: nFToken = (nToken+nSnippet-1) / nSnippet; ! 1214: }else{ ! 1215: nFToken = -1 * nToken; ! 1216: } ! 1217: ! 1218: for(iSnip=0; iSnip<nSnippet; iSnip++){ ! 1219: int iBestScore = -1; /* Best score of columns checked so far */ ! 1220: int iRead; /* Used to iterate through columns */ ! 1221: SnippetFragment *pFragment = &aSnippet[iSnip]; ! 1222: ! 1223: memset(pFragment, 0, sizeof(*pFragment)); ! 1224: ! 1225: /* Loop through all columns of the table being considered for snippets. ! 1226: ** If the iCol argument to this function was negative, this means all ! 1227: ** columns of the FTS3 table. Otherwise, only column iCol is considered. ! 1228: */ ! 1229: for(iRead=0; iRead<pTab->nColumn; iRead++){ ! 1230: SnippetFragment sF = {0, 0, 0, 0}; ! 1231: int iS; ! 1232: if( iCol>=0 && iRead!=iCol ) continue; ! 1233: ! 1234: /* Find the best snippet of nFToken tokens in column iRead. */ ! 1235: rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); ! 1236: if( rc!=SQLITE_OK ){ ! 1237: goto snippet_out; ! 1238: } ! 1239: if( iS>iBestScore ){ ! 1240: *pFragment = sF; ! 1241: iBestScore = iS; ! 1242: } ! 1243: } ! 1244: ! 1245: mCovered |= pFragment->covered; ! 1246: } ! 1247: ! 1248: /* If all query phrases seen by fts3BestSnippet() are present in at least ! 1249: ** one of the nSnippet snippet fragments, break out of the loop. ! 1250: */ ! 1251: assert( (mCovered&mSeen)==mCovered ); ! 1252: if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; ! 1253: } ! 1254: ! 1255: assert( nFToken>0 ); ! 1256: ! 1257: for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ ! 1258: rc = fts3SnippetText(pCsr, &aSnippet[i], ! 1259: i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res ! 1260: ); ! 1261: } ! 1262: ! 1263: snippet_out: ! 1264: sqlite3Fts3SegmentsClose(pTab); ! 1265: if( rc!=SQLITE_OK ){ ! 1266: sqlite3_result_error_code(pCtx, rc); ! 1267: sqlite3_free(res.z); ! 1268: }else{ ! 1269: sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); ! 1270: } ! 1271: } ! 1272: ! 1273: ! 1274: typedef struct TermOffset TermOffset; ! 1275: typedef struct TermOffsetCtx TermOffsetCtx; ! 1276: ! 1277: struct TermOffset { ! 1278: char *pList; /* Position-list */ ! 1279: int iPos; /* Position just read from pList */ ! 1280: int iOff; /* Offset of this term from read positions */ ! 1281: }; ! 1282: ! 1283: struct TermOffsetCtx { ! 1284: Fts3Cursor *pCsr; ! 1285: int iCol; /* Column of table to populate aTerm for */ ! 1286: int iTerm; ! 1287: sqlite3_int64 iDocid; ! 1288: TermOffset *aTerm; ! 1289: }; ! 1290: ! 1291: /* ! 1292: ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). ! 1293: */ ! 1294: static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ ! 1295: TermOffsetCtx *p = (TermOffsetCtx *)ctx; ! 1296: int nTerm; /* Number of tokens in phrase */ ! 1297: int iTerm; /* For looping through nTerm phrase terms */ ! 1298: char *pList; /* Pointer to position list for phrase */ ! 1299: int iPos = 0; /* First position in position-list */ ! 1300: ! 1301: UNUSED_PARAMETER(iPhrase); ! 1302: pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); ! 1303: nTerm = pExpr->pPhrase->nToken; ! 1304: if( pList ){ ! 1305: fts3GetDeltaPosition(&pList, &iPos); ! 1306: assert( iPos>=0 ); ! 1307: } ! 1308: ! 1309: for(iTerm=0; iTerm<nTerm; iTerm++){ ! 1310: TermOffset *pT = &p->aTerm[p->iTerm++]; ! 1311: pT->iOff = nTerm-iTerm-1; ! 1312: pT->pList = pList; ! 1313: pT->iPos = iPos; ! 1314: } ! 1315: ! 1316: return SQLITE_OK; ! 1317: } ! 1318: ! 1319: /* ! 1320: ** Implementation of offsets() function. ! 1321: */ ! 1322: void sqlite3Fts3Offsets( ! 1323: sqlite3_context *pCtx, /* SQLite function call context */ ! 1324: Fts3Cursor *pCsr /* Cursor object */ ! 1325: ){ ! 1326: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 1327: sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; ! 1328: const char *ZDUMMY; /* Dummy argument used with xNext() */ ! 1329: int NDUMMY; /* Dummy argument used with xNext() */ ! 1330: int rc; /* Return Code */ ! 1331: int nToken; /* Number of tokens in query */ ! 1332: int iCol; /* Column currently being processed */ ! 1333: StrBuffer res = {0, 0, 0}; /* Result string */ ! 1334: TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ ! 1335: ! 1336: if( !pCsr->pExpr ){ ! 1337: sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); ! 1338: return; ! 1339: } ! 1340: ! 1341: memset(&sCtx, 0, sizeof(sCtx)); ! 1342: assert( pCsr->isRequireSeek==0 ); ! 1343: ! 1344: /* Count the number of terms in the query */ ! 1345: rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); ! 1346: if( rc!=SQLITE_OK ) goto offsets_out; ! 1347: ! 1348: /* Allocate the array of TermOffset iterators. */ ! 1349: sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); ! 1350: if( 0==sCtx.aTerm ){ ! 1351: rc = SQLITE_NOMEM; ! 1352: goto offsets_out; ! 1353: } ! 1354: sCtx.iDocid = pCsr->iPrevId; ! 1355: sCtx.pCsr = pCsr; ! 1356: ! 1357: /* Loop through the table columns, appending offset information to ! 1358: ** string-buffer res for each column. ! 1359: */ ! 1360: for(iCol=0; iCol<pTab->nColumn; iCol++){ ! 1361: sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ ! 1362: int iStart; ! 1363: int iEnd; ! 1364: int iCurrent; ! 1365: const char *zDoc; ! 1366: int nDoc; ! 1367: ! 1368: /* Initialize the contents of sCtx.aTerm[] for column iCol. There is ! 1369: ** no way that this operation can fail, so the return code from ! 1370: ** fts3ExprIterate() can be discarded. ! 1371: */ ! 1372: sCtx.iCol = iCol; ! 1373: sCtx.iTerm = 0; ! 1374: (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); ! 1375: ! 1376: /* Retreive the text stored in column iCol. If an SQL NULL is stored ! 1377: ** in column iCol, jump immediately to the next iteration of the loop. ! 1378: ** If an OOM occurs while retrieving the data (this can happen if SQLite ! 1379: ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM ! 1380: ** to the caller. ! 1381: */ ! 1382: zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); ! 1383: nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); ! 1384: if( zDoc==0 ){ ! 1385: if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ ! 1386: continue; ! 1387: } ! 1388: rc = SQLITE_NOMEM; ! 1389: goto offsets_out; ! 1390: } ! 1391: ! 1392: /* Initialize a tokenizer iterator to iterate through column iCol. */ ! 1393: rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); ! 1394: if( rc!=SQLITE_OK ) goto offsets_out; ! 1395: pC->pTokenizer = pTab->pTokenizer; ! 1396: ! 1397: rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); ! 1398: while( rc==SQLITE_OK ){ ! 1399: int i; /* Used to loop through terms */ ! 1400: int iMinPos = 0x7FFFFFFF; /* Position of next token */ ! 1401: TermOffset *pTerm = 0; /* TermOffset associated with next token */ ! 1402: ! 1403: for(i=0; i<nToken; i++){ ! 1404: TermOffset *pT = &sCtx.aTerm[i]; ! 1405: if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ ! 1406: iMinPos = pT->iPos-pT->iOff; ! 1407: pTerm = pT; ! 1408: } ! 1409: } ! 1410: ! 1411: if( !pTerm ){ ! 1412: /* All offsets for this column have been gathered. */ ! 1413: rc = SQLITE_DONE; ! 1414: }else{ ! 1415: assert( iCurrent<=iMinPos ); ! 1416: if( 0==(0xFE&*pTerm->pList) ){ ! 1417: pTerm->pList = 0; ! 1418: }else{ ! 1419: fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); ! 1420: } ! 1421: while( rc==SQLITE_OK && iCurrent<iMinPos ){ ! 1422: rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); ! 1423: } ! 1424: if( rc==SQLITE_OK ){ ! 1425: char aBuffer[64]; ! 1426: sqlite3_snprintf(sizeof(aBuffer), aBuffer, ! 1427: "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart ! 1428: ); ! 1429: rc = fts3StringAppend(&res, aBuffer, -1); ! 1430: }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ ! 1431: rc = FTS_CORRUPT_VTAB; ! 1432: } ! 1433: } ! 1434: } ! 1435: if( rc==SQLITE_DONE ){ ! 1436: rc = SQLITE_OK; ! 1437: } ! 1438: ! 1439: pMod->xClose(pC); ! 1440: if( rc!=SQLITE_OK ) goto offsets_out; ! 1441: } ! 1442: ! 1443: offsets_out: ! 1444: sqlite3_free(sCtx.aTerm); ! 1445: assert( rc!=SQLITE_DONE ); ! 1446: sqlite3Fts3SegmentsClose(pTab); ! 1447: if( rc!=SQLITE_OK ){ ! 1448: sqlite3_result_error_code(pCtx, rc); ! 1449: sqlite3_free(res.z); ! 1450: }else{ ! 1451: sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); ! 1452: } ! 1453: return; ! 1454: } ! 1455: ! 1456: /* ! 1457: ** Implementation of matchinfo() function. ! 1458: */ ! 1459: void sqlite3Fts3Matchinfo( ! 1460: sqlite3_context *pContext, /* Function call context */ ! 1461: Fts3Cursor *pCsr, /* FTS3 table cursor */ ! 1462: const char *zArg /* Second arg to matchinfo() function */ ! 1463: ){ ! 1464: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; ! 1465: int rc; ! 1466: int i; ! 1467: const char *zFormat; ! 1468: ! 1469: if( zArg ){ ! 1470: for(i=0; zArg[i]; i++){ ! 1471: char *zErr = 0; ! 1472: if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ ! 1473: sqlite3_result_error(pContext, zErr, -1); ! 1474: sqlite3_free(zErr); ! 1475: return; ! 1476: } ! 1477: } ! 1478: zFormat = zArg; ! 1479: }else{ ! 1480: zFormat = FTS3_MATCHINFO_DEFAULT; ! 1481: } ! 1482: ! 1483: if( !pCsr->pExpr ){ ! 1484: sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); ! 1485: return; ! 1486: } ! 1487: ! 1488: /* Retrieve matchinfo() data. */ ! 1489: rc = fts3GetMatchinfo(pCsr, zFormat); ! 1490: sqlite3Fts3SegmentsClose(pTab); ! 1491: ! 1492: if( rc!=SQLITE_OK ){ ! 1493: sqlite3_result_error_code(pContext, rc); ! 1494: }else{ ! 1495: int n = pCsr->nMatchinfo * sizeof(u32); ! 1496: sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); ! 1497: } ! 1498: } ! 1499: ! 1500: #endif