Annotation of embedaddon/sqlite3/ext/fts1/fts1.c, revision 1.1
1.1 ! misho 1: /* fts1 has a design flaw which can lead to database corruption (see
! 2: ** below). It is recommended not to use it any longer, instead use
! 3: ** fts3 (or higher). If you believe that your use of fts1 is safe,
! 4: ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
! 5: */
! 6: #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
! 7: && !defined(SQLITE_ENABLE_BROKEN_FTS1)
! 8: #error fts1 has a design flaw and has been deprecated.
! 9: #endif
! 10: /* The flaw is that fts1 uses the content table's unaliased rowid as
! 11: ** the unique docid. fts1 embeds the rowid in the index it builds,
! 12: ** and expects the rowid to not change. The SQLite VACUUM operation
! 13: ** will renumber such rowids, thereby breaking fts1. If you are using
! 14: ** fts1 in a system which has disabled VACUUM, then you can continue
! 15: ** to use it safely. Note that PRAGMA auto_vacuum does NOT disable
! 16: ** VACUUM, though systems using auto_vacuum are unlikely to invoke
! 17: ** VACUUM.
! 18: **
! 19: ** fts1 should be safe even across VACUUM if you only insert documents
! 20: ** and never delete.
! 21: */
! 22:
! 23: /* The author disclaims copyright to this source code.
! 24: *
! 25: * This is an SQLite module implementing full-text search.
! 26: */
! 27:
! 28: /*
! 29: ** The code in this file is only compiled if:
! 30: **
! 31: ** * The FTS1 module is being built as an extension
! 32: ** (in which case SQLITE_CORE is not defined), or
! 33: **
! 34: ** * The FTS1 module is being built into the core of
! 35: ** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
! 36: */
! 37: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
! 38:
! 39: #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
! 40: # define SQLITE_CORE 1
! 41: #endif
! 42:
! 43: #include <assert.h>
! 44: #include <stdlib.h>
! 45: #include <stdio.h>
! 46: #include <string.h>
! 47: #include <ctype.h>
! 48:
! 49: #include "fts1.h"
! 50: #include "fts1_hash.h"
! 51: #include "fts1_tokenizer.h"
! 52: #include "sqlite3.h"
! 53: #include "sqlite3ext.h"
! 54: SQLITE_EXTENSION_INIT1
! 55:
! 56:
! 57: #if 0
! 58: # define TRACE(A) printf A; fflush(stdout)
! 59: #else
! 60: # define TRACE(A)
! 61: #endif
! 62:
! 63: /* utility functions */
! 64:
! 65: typedef struct StringBuffer {
! 66: int len; /* length, not including null terminator */
! 67: int alloced; /* Space allocated for s[] */
! 68: char *s; /* Content of the string */
! 69: } StringBuffer;
! 70:
! 71: static void initStringBuffer(StringBuffer *sb){
! 72: sb->len = 0;
! 73: sb->alloced = 100;
! 74: sb->s = malloc(100);
! 75: sb->s[0] = '\0';
! 76: }
! 77:
! 78: static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
! 79: if( sb->len + nFrom >= sb->alloced ){
! 80: sb->alloced = sb->len + nFrom + 100;
! 81: sb->s = realloc(sb->s, sb->alloced+1);
! 82: if( sb->s==0 ){
! 83: initStringBuffer(sb);
! 84: return;
! 85: }
! 86: }
! 87: memcpy(sb->s + sb->len, zFrom, nFrom);
! 88: sb->len += nFrom;
! 89: sb->s[sb->len] = 0;
! 90: }
! 91: static void append(StringBuffer *sb, const char *zFrom){
! 92: nappend(sb, zFrom, strlen(zFrom));
! 93: }
! 94:
! 95: /* We encode variable-length integers in little-endian order using seven bits
! 96: * per byte as follows:
! 97: **
! 98: ** KEY:
! 99: ** A = 0xxxxxxx 7 bits of data and one flag bit
! 100: ** B = 1xxxxxxx 7 bits of data and one flag bit
! 101: **
! 102: ** 7 bits - A
! 103: ** 14 bits - BA
! 104: ** 21 bits - BBA
! 105: ** and so on.
! 106: */
! 107:
! 108: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
! 109: #define VARINT_MAX 10
! 110:
! 111: /* Write a 64-bit variable-length integer to memory starting at p[0].
! 112: * The length of data written will be between 1 and VARINT_MAX bytes.
! 113: * The number of bytes written is returned. */
! 114: static int putVarint(char *p, sqlite_int64 v){
! 115: unsigned char *q = (unsigned char *) p;
! 116: sqlite_uint64 vu = v;
! 117: do{
! 118: *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
! 119: vu >>= 7;
! 120: }while( vu!=0 );
! 121: q[-1] &= 0x7f; /* turn off high bit in final byte */
! 122: assert( q - (unsigned char *)p <= VARINT_MAX );
! 123: return (int) (q - (unsigned char *)p);
! 124: }
! 125:
! 126: /* Read a 64-bit variable-length integer from memory starting at p[0].
! 127: * Return the number of bytes read, or 0 on error.
! 128: * The value is stored in *v. */
! 129: static int getVarint(const char *p, sqlite_int64 *v){
! 130: const unsigned char *q = (const unsigned char *) p;
! 131: sqlite_uint64 x = 0, y = 1;
! 132: while( (*q & 0x80) == 0x80 ){
! 133: x += y * (*q++ & 0x7f);
! 134: y <<= 7;
! 135: if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */
! 136: assert( 0 );
! 137: return 0;
! 138: }
! 139: }
! 140: x += y * (*q++);
! 141: *v = (sqlite_int64) x;
! 142: return (int) (q - (unsigned char *)p);
! 143: }
! 144:
! 145: static int getVarint32(const char *p, int *pi){
! 146: sqlite_int64 i;
! 147: int ret = getVarint(p, &i);
! 148: *pi = (int) i;
! 149: assert( *pi==i );
! 150: return ret;
! 151: }
! 152:
! 153: /*** Document lists ***
! 154: *
! 155: * A document list holds a sorted list of varint-encoded document IDs.
! 156: *
! 157: * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
! 158: *
! 159: * array {
! 160: * varint docid;
! 161: * array {
! 162: * varint position; (delta from previous position plus POS_BASE)
! 163: * varint startOffset; (delta from previous startOffset)
! 164: * varint endOffset; (delta from startOffset)
! 165: * }
! 166: * }
! 167: *
! 168: * Here, array { X } means zero or more occurrences of X, adjacent in memory.
! 169: *
! 170: * A position list may hold positions for text in multiple columns. A position
! 171: * POS_COLUMN is followed by a varint containing the index of the column for
! 172: * following positions in the list. Any positions appearing before any
! 173: * occurrences of POS_COLUMN are for column 0.
! 174: *
! 175: * A doclist with type DL_POSITIONS is like the above, but holds only docids
! 176: * and positions without offset information.
! 177: *
! 178: * A doclist with type DL_DOCIDS is like the above, but holds only docids
! 179: * without positions or offset information.
! 180: *
! 181: * On disk, every document list has positions and offsets, so we don't bother
! 182: * to serialize a doclist's type.
! 183: *
! 184: * We don't yet delta-encode document IDs; doing so will probably be a
! 185: * modest win.
! 186: *
! 187: * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
! 188: * After the first offset, estimate the next offset by using the
! 189: * current token position and the previous token position and offset,
! 190: * offset to handle some variance. So the estimate would be
! 191: * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
! 192: * as normal. Offsets more than 64 chars from the estimate are
! 193: * encoded as the delta to the previous start offset + 128. An
! 194: * additional tiny increment can be gained by using the end offset of
! 195: * the previous token to make the estimate a tiny bit more precise.
! 196: */
! 197:
! 198: /* It is not safe to call isspace(), tolower(), or isalnum() on
! 199: ** hi-bit-set characters. This is the same solution used in the
! 200: ** tokenizer.
! 201: */
! 202: /* TODO(shess) The snippet-generation code should be using the
! 203: ** tokenizer-generated tokens rather than doing its own local
! 204: ** tokenization.
! 205: */
! 206: /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
! 207: static int safe_isspace(char c){
! 208: return (c&0x80)==0 ? isspace(c) : 0;
! 209: }
! 210: static int safe_tolower(char c){
! 211: return (c&0x80)==0 ? tolower(c) : c;
! 212: }
! 213: static int safe_isalnum(char c){
! 214: return (c&0x80)==0 ? isalnum(c) : 0;
! 215: }
! 216:
! 217: typedef enum DocListType {
! 218: DL_DOCIDS, /* docids only */
! 219: DL_POSITIONS, /* docids + positions */
! 220: DL_POSITIONS_OFFSETS /* docids + positions + offsets */
! 221: } DocListType;
! 222:
! 223: /*
! 224: ** By default, only positions and not offsets are stored in the doclists.
! 225: ** To change this so that offsets are stored too, compile with
! 226: **
! 227: ** -DDL_DEFAULT=DL_POSITIONS_OFFSETS
! 228: **
! 229: */
! 230: #ifndef DL_DEFAULT
! 231: # define DL_DEFAULT DL_POSITIONS
! 232: #endif
! 233:
! 234: typedef struct DocList {
! 235: char *pData;
! 236: int nData;
! 237: DocListType iType;
! 238: int iLastColumn; /* the last column written */
! 239: int iLastPos; /* the last position written */
! 240: int iLastOffset; /* the last start offset written */
! 241: } DocList;
! 242:
! 243: enum {
! 244: POS_END = 0, /* end of this position list */
! 245: POS_COLUMN, /* followed by new column number */
! 246: POS_BASE
! 247: };
! 248:
! 249: /* Initialize a new DocList to hold the given data. */
! 250: static void docListInit(DocList *d, DocListType iType,
! 251: const char *pData, int nData){
! 252: d->nData = nData;
! 253: if( nData>0 ){
! 254: d->pData = malloc(nData);
! 255: memcpy(d->pData, pData, nData);
! 256: } else {
! 257: d->pData = NULL;
! 258: }
! 259: d->iType = iType;
! 260: d->iLastColumn = 0;
! 261: d->iLastPos = d->iLastOffset = 0;
! 262: }
! 263:
! 264: /* Create a new dynamically-allocated DocList. */
! 265: static DocList *docListNew(DocListType iType){
! 266: DocList *d = (DocList *) malloc(sizeof(DocList));
! 267: docListInit(d, iType, 0, 0);
! 268: return d;
! 269: }
! 270:
! 271: static void docListDestroy(DocList *d){
! 272: free(d->pData);
! 273: #ifndef NDEBUG
! 274: memset(d, 0x55, sizeof(*d));
! 275: #endif
! 276: }
! 277:
! 278: static void docListDelete(DocList *d){
! 279: docListDestroy(d);
! 280: free(d);
! 281: }
! 282:
! 283: static char *docListEnd(DocList *d){
! 284: return d->pData + d->nData;
! 285: }
! 286:
! 287: /* Append a varint to a DocList's data. */
! 288: static void appendVarint(DocList *d, sqlite_int64 i){
! 289: char c[VARINT_MAX];
! 290: int n = putVarint(c, i);
! 291: d->pData = realloc(d->pData, d->nData + n);
! 292: memcpy(d->pData + d->nData, c, n);
! 293: d->nData += n;
! 294: }
! 295:
! 296: static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
! 297: appendVarint(d, iDocid);
! 298: if( d->iType>=DL_POSITIONS ){
! 299: appendVarint(d, POS_END); /* initially empty position list */
! 300: d->iLastColumn = 0;
! 301: d->iLastPos = d->iLastOffset = 0;
! 302: }
! 303: }
! 304:
! 305: /* helper function for docListAddPos and docListAddPosOffset */
! 306: static void addPos(DocList *d, int iColumn, int iPos){
! 307: assert( d->nData>0 );
! 308: --d->nData; /* remove previous terminator */
! 309: if( iColumn!=d->iLastColumn ){
! 310: assert( iColumn>d->iLastColumn );
! 311: appendVarint(d, POS_COLUMN);
! 312: appendVarint(d, iColumn);
! 313: d->iLastColumn = iColumn;
! 314: d->iLastPos = d->iLastOffset = 0;
! 315: }
! 316: assert( iPos>=d->iLastPos );
! 317: appendVarint(d, iPos-d->iLastPos+POS_BASE);
! 318: d->iLastPos = iPos;
! 319: }
! 320:
! 321: /* Add a position to the last position list in a doclist. */
! 322: static void docListAddPos(DocList *d, int iColumn, int iPos){
! 323: assert( d->iType==DL_POSITIONS );
! 324: addPos(d, iColumn, iPos);
! 325: appendVarint(d, POS_END); /* add new terminator */
! 326: }
! 327:
! 328: /*
! 329: ** Add a position and starting and ending offsets to a doclist.
! 330: **
! 331: ** If the doclist is setup to handle only positions, then insert
! 332: ** the position only and ignore the offsets.
! 333: */
! 334: static void docListAddPosOffset(
! 335: DocList *d, /* Doclist under construction */
! 336: int iColumn, /* Column the inserted term is part of */
! 337: int iPos, /* Position of the inserted term */
! 338: int iStartOffset, /* Starting offset of inserted term */
! 339: int iEndOffset /* Ending offset of inserted term */
! 340: ){
! 341: assert( d->iType>=DL_POSITIONS );
! 342: addPos(d, iColumn, iPos);
! 343: if( d->iType==DL_POSITIONS_OFFSETS ){
! 344: assert( iStartOffset>=d->iLastOffset );
! 345: appendVarint(d, iStartOffset-d->iLastOffset);
! 346: d->iLastOffset = iStartOffset;
! 347: assert( iEndOffset>=iStartOffset );
! 348: appendVarint(d, iEndOffset-iStartOffset);
! 349: }
! 350: appendVarint(d, POS_END); /* add new terminator */
! 351: }
! 352:
! 353: /*
! 354: ** A DocListReader object is a cursor into a doclist. Initialize
! 355: ** the cursor to the beginning of the doclist by calling readerInit().
! 356: ** Then use routines
! 357: **
! 358: ** peekDocid()
! 359: ** readDocid()
! 360: ** readPosition()
! 361: ** skipPositionList()
! 362: ** and so forth...
! 363: **
! 364: ** to read information out of the doclist. When we reach the end
! 365: ** of the doclist, atEnd() returns TRUE.
! 366: */
! 367: typedef struct DocListReader {
! 368: DocList *pDoclist; /* The document list we are stepping through */
! 369: char *p; /* Pointer to next unread byte in the doclist */
! 370: int iLastColumn;
! 371: int iLastPos; /* the last position read, or -1 when not in a position list */
! 372: } DocListReader;
! 373:
! 374: /*
! 375: ** Initialize the DocListReader r to point to the beginning of pDoclist.
! 376: */
! 377: static void readerInit(DocListReader *r, DocList *pDoclist){
! 378: r->pDoclist = pDoclist;
! 379: if( pDoclist!=NULL ){
! 380: r->p = pDoclist->pData;
! 381: }
! 382: r->iLastColumn = -1;
! 383: r->iLastPos = -1;
! 384: }
! 385:
! 386: /*
! 387: ** Return TRUE if we have reached then end of pReader and there is
! 388: ** nothing else left to read.
! 389: */
! 390: static int atEnd(DocListReader *pReader){
! 391: return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
! 392: }
! 393:
! 394: /* Peek at the next docid without advancing the read pointer.
! 395: */
! 396: static sqlite_int64 peekDocid(DocListReader *pReader){
! 397: sqlite_int64 ret;
! 398: assert( !atEnd(pReader) );
! 399: assert( pReader->iLastPos==-1 );
! 400: getVarint(pReader->p, &ret);
! 401: return ret;
! 402: }
! 403:
! 404: /* Read the next docid. See also nextDocid().
! 405: */
! 406: static sqlite_int64 readDocid(DocListReader *pReader){
! 407: sqlite_int64 ret;
! 408: assert( !atEnd(pReader) );
! 409: assert( pReader->iLastPos==-1 );
! 410: pReader->p += getVarint(pReader->p, &ret);
! 411: if( pReader->pDoclist->iType>=DL_POSITIONS ){
! 412: pReader->iLastColumn = 0;
! 413: pReader->iLastPos = 0;
! 414: }
! 415: return ret;
! 416: }
! 417:
! 418: /* Read the next position and column index from a position list.
! 419: * Returns the position, or -1 at the end of the list. */
! 420: static int readPosition(DocListReader *pReader, int *iColumn){
! 421: int i;
! 422: int iType = pReader->pDoclist->iType;
! 423:
! 424: if( pReader->iLastPos==-1 ){
! 425: return -1;
! 426: }
! 427: assert( !atEnd(pReader) );
! 428:
! 429: if( iType<DL_POSITIONS ){
! 430: return -1;
! 431: }
! 432: pReader->p += getVarint32(pReader->p, &i);
! 433: if( i==POS_END ){
! 434: pReader->iLastColumn = pReader->iLastPos = -1;
! 435: *iColumn = -1;
! 436: return -1;
! 437: }
! 438: if( i==POS_COLUMN ){
! 439: pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
! 440: pReader->iLastPos = 0;
! 441: pReader->p += getVarint32(pReader->p, &i);
! 442: assert( i>=POS_BASE );
! 443: }
! 444: pReader->iLastPos += ((int) i)-POS_BASE;
! 445: if( iType>=DL_POSITIONS_OFFSETS ){
! 446: /* Skip over offsets, ignoring them for now. */
! 447: int iStart, iEnd;
! 448: pReader->p += getVarint32(pReader->p, &iStart);
! 449: pReader->p += getVarint32(pReader->p, &iEnd);
! 450: }
! 451: *iColumn = pReader->iLastColumn;
! 452: return pReader->iLastPos;
! 453: }
! 454:
! 455: /* Skip past the end of a position list. */
! 456: static void skipPositionList(DocListReader *pReader){
! 457: DocList *p = pReader->pDoclist;
! 458: if( p && p->iType>=DL_POSITIONS ){
! 459: int iColumn;
! 460: while( readPosition(pReader, &iColumn)!=-1 ){}
! 461: }
! 462: }
! 463:
! 464: /* Skip over a docid, including its position list if the doclist has
! 465: * positions. */
! 466: static void skipDocument(DocListReader *pReader){
! 467: readDocid(pReader);
! 468: skipPositionList(pReader);
! 469: }
! 470:
! 471: /* Skip past all docids which are less than [iDocid]. Returns 1 if a docid
! 472: * matching [iDocid] was found. */
! 473: static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
! 474: sqlite_int64 d = 0;
! 475: while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
! 476: skipDocument(pReader);
! 477: }
! 478: return !atEnd(pReader) && d==iDocid;
! 479: }
! 480:
! 481: /* Return the first document in a document list.
! 482: */
! 483: static sqlite_int64 firstDocid(DocList *d){
! 484: DocListReader r;
! 485: readerInit(&r, d);
! 486: return readDocid(&r);
! 487: }
! 488:
! 489: #ifdef SQLITE_DEBUG
! 490: /*
! 491: ** This routine is used for debugging purpose only.
! 492: **
! 493: ** Write the content of a doclist to standard output.
! 494: */
! 495: static void printDoclist(DocList *p){
! 496: DocListReader r;
! 497: const char *zSep = "";
! 498:
! 499: readerInit(&r, p);
! 500: while( !atEnd(&r) ){
! 501: sqlite_int64 docid = readDocid(&r);
! 502: if( docid==0 ){
! 503: skipPositionList(&r);
! 504: continue;
! 505: }
! 506: printf("%s%lld", zSep, docid);
! 507: zSep = ",";
! 508: if( p->iType>=DL_POSITIONS ){
! 509: int iPos, iCol;
! 510: const char *zDiv = "";
! 511: printf("(");
! 512: while( (iPos = readPosition(&r, &iCol))>=0 ){
! 513: printf("%s%d:%d", zDiv, iCol, iPos);
! 514: zDiv = ":";
! 515: }
! 516: printf(")");
! 517: }
! 518: }
! 519: printf("\n");
! 520: fflush(stdout);
! 521: }
! 522: #endif /* SQLITE_DEBUG */
! 523:
! 524: /* Trim the given doclist to contain only positions in column
! 525: * [iRestrictColumn]. */
! 526: static void docListRestrictColumn(DocList *in, int iRestrictColumn){
! 527: DocListReader r;
! 528: DocList out;
! 529:
! 530: assert( in->iType>=DL_POSITIONS );
! 531: readerInit(&r, in);
! 532: docListInit(&out, DL_POSITIONS, NULL, 0);
! 533:
! 534: while( !atEnd(&r) ){
! 535: sqlite_int64 iDocid = readDocid(&r);
! 536: int iPos, iColumn;
! 537:
! 538: docListAddDocid(&out, iDocid);
! 539: while( (iPos = readPosition(&r, &iColumn)) != -1 ){
! 540: if( iColumn==iRestrictColumn ){
! 541: docListAddPos(&out, iColumn, iPos);
! 542: }
! 543: }
! 544: }
! 545:
! 546: docListDestroy(in);
! 547: *in = out;
! 548: }
! 549:
! 550: /* Trim the given doclist by discarding any docids without any remaining
! 551: * positions. */
! 552: static void docListDiscardEmpty(DocList *in) {
! 553: DocListReader r;
! 554: DocList out;
! 555:
! 556: /* TODO: It would be nice to implement this operation in place; that
! 557: * could save a significant amount of memory in queries with long doclists. */
! 558: assert( in->iType>=DL_POSITIONS );
! 559: readerInit(&r, in);
! 560: docListInit(&out, DL_POSITIONS, NULL, 0);
! 561:
! 562: while( !atEnd(&r) ){
! 563: sqlite_int64 iDocid = readDocid(&r);
! 564: int match = 0;
! 565: int iPos, iColumn;
! 566: while( (iPos = readPosition(&r, &iColumn)) != -1 ){
! 567: if( !match ){
! 568: docListAddDocid(&out, iDocid);
! 569: match = 1;
! 570: }
! 571: docListAddPos(&out, iColumn, iPos);
! 572: }
! 573: }
! 574:
! 575: docListDestroy(in);
! 576: *in = out;
! 577: }
! 578:
! 579: /* Helper function for docListUpdate() and docListAccumulate().
! 580: ** Splices a doclist element into the doclist represented by r,
! 581: ** leaving r pointing after the newly spliced element.
! 582: */
! 583: static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
! 584: const char *pSource, int nSource){
! 585: DocList *d = r->pDoclist;
! 586: char *pTarget;
! 587: int nTarget, found;
! 588:
! 589: found = skipToDocid(r, iDocid);
! 590:
! 591: /* Describe slice in d to place pSource/nSource. */
! 592: pTarget = r->p;
! 593: if( found ){
! 594: skipDocument(r);
! 595: nTarget = r->p-pTarget;
! 596: }else{
! 597: nTarget = 0;
! 598: }
! 599:
! 600: /* The sense of the following is that there are three possibilities.
! 601: ** If nTarget==nSource, we should not move any memory nor realloc.
! 602: ** If nTarget>nSource, trim target and realloc.
! 603: ** If nTarget<nSource, realloc then expand target.
! 604: */
! 605: if( nTarget>nSource ){
! 606: memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
! 607: }
! 608: if( nTarget!=nSource ){
! 609: int iDoclist = pTarget-d->pData;
! 610: d->pData = realloc(d->pData, d->nData+nSource-nTarget);
! 611: pTarget = d->pData+iDoclist;
! 612: }
! 613: if( nTarget<nSource ){
! 614: memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
! 615: }
! 616:
! 617: memcpy(pTarget, pSource, nSource);
! 618: d->nData += nSource-nTarget;
! 619: r->p = pTarget+nSource;
! 620: }
! 621:
! 622: /* Insert/update pUpdate into the doclist. */
! 623: static void docListUpdate(DocList *d, DocList *pUpdate){
! 624: DocListReader reader;
! 625:
! 626: assert( d!=NULL && pUpdate!=NULL );
! 627: assert( d->iType==pUpdate->iType);
! 628:
! 629: readerInit(&reader, d);
! 630: docListSpliceElement(&reader, firstDocid(pUpdate),
! 631: pUpdate->pData, pUpdate->nData);
! 632: }
! 633:
! 634: /* Propagate elements from pUpdate to pAcc, overwriting elements with
! 635: ** matching docids.
! 636: */
! 637: static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
! 638: DocListReader accReader, updateReader;
! 639:
! 640: /* Handle edge cases where one doclist is empty. */
! 641: assert( pAcc!=NULL );
! 642: if( pUpdate==NULL || pUpdate->nData==0 ) return;
! 643: if( pAcc->nData==0 ){
! 644: pAcc->pData = malloc(pUpdate->nData);
! 645: memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
! 646: pAcc->nData = pUpdate->nData;
! 647: return;
! 648: }
! 649:
! 650: readerInit(&accReader, pAcc);
! 651: readerInit(&updateReader, pUpdate);
! 652:
! 653: while( !atEnd(&updateReader) ){
! 654: char *pSource = updateReader.p;
! 655: sqlite_int64 iDocid = readDocid(&updateReader);
! 656: skipPositionList(&updateReader);
! 657: docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
! 658: }
! 659: }
! 660:
! 661: /*
! 662: ** Read the next docid off of pIn. Return 0 if we reach the end.
! 663: *
! 664: * TODO: This assumes that docids are never 0, but they may actually be 0 since
! 665: * users can choose docids when inserting into a full-text table. Fix this.
! 666: */
! 667: static sqlite_int64 nextDocid(DocListReader *pIn){
! 668: skipPositionList(pIn);
! 669: return atEnd(pIn) ? 0 : readDocid(pIn);
! 670: }
! 671:
! 672: /*
! 673: ** pLeft and pRight are two DocListReaders that are pointing to
! 674: ** positions lists of the same document: iDocid.
! 675: **
! 676: ** If there are no instances in pLeft or pRight where the position
! 677: ** of pLeft is one less than the position of pRight, then this
! 678: ** routine adds nothing to pOut.
! 679: **
! 680: ** If there are one or more instances where positions from pLeft
! 681: ** are exactly one less than positions from pRight, then add a new
! 682: ** document record to pOut. If pOut wants to hold positions, then
! 683: ** include the positions from pRight that are one more than a
! 684: ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
! 685: **
! 686: ** pLeft and pRight are left pointing at the next document record.
! 687: */
! 688: static void mergePosList(
! 689: DocListReader *pLeft, /* Left position list */
! 690: DocListReader *pRight, /* Right position list */
! 691: sqlite_int64 iDocid, /* The docid from pLeft and pRight */
! 692: DocList *pOut /* Write the merged document record here */
! 693: ){
! 694: int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
! 695: int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
! 696: int match = 0;
! 697:
! 698: /* Loop until we've reached the end of both position lists. */
! 699: while( iLeftPos!=-1 && iRightPos!=-1 ){
! 700: if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
! 701: if( !match ){
! 702: docListAddDocid(pOut, iDocid);
! 703: match = 1;
! 704: }
! 705: if( pOut->iType>=DL_POSITIONS ){
! 706: docListAddPos(pOut, iRightCol, iRightPos);
! 707: }
! 708: iLeftPos = readPosition(pLeft, &iLeftCol);
! 709: iRightPos = readPosition(pRight, &iRightCol);
! 710: }else if( iRightCol<iLeftCol ||
! 711: (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
! 712: iRightPos = readPosition(pRight, &iRightCol);
! 713: }else{
! 714: iLeftPos = readPosition(pLeft, &iLeftCol);
! 715: }
! 716: }
! 717: if( iLeftPos>=0 ) skipPositionList(pLeft);
! 718: if( iRightPos>=0 ) skipPositionList(pRight);
! 719: }
! 720:
! 721: /* We have two doclists: pLeft and pRight.
! 722: ** Write the phrase intersection of these two doclists into pOut.
! 723: **
! 724: ** A phrase intersection means that two documents only match
! 725: ** if pLeft.iPos+1==pRight.iPos.
! 726: **
! 727: ** The output pOut may or may not contain positions. If pOut
! 728: ** does contain positions, they are the positions of pRight.
! 729: */
! 730: static void docListPhraseMerge(
! 731: DocList *pLeft, /* Doclist resulting from the words on the left */
! 732: DocList *pRight, /* Doclist for the next word to the right */
! 733: DocList *pOut /* Write the combined doclist here */
! 734: ){
! 735: DocListReader left, right;
! 736: sqlite_int64 docidLeft, docidRight;
! 737:
! 738: readerInit(&left, pLeft);
! 739: readerInit(&right, pRight);
! 740: docidLeft = nextDocid(&left);
! 741: docidRight = nextDocid(&right);
! 742:
! 743: while( docidLeft>0 && docidRight>0 ){
! 744: if( docidLeft<docidRight ){
! 745: docidLeft = nextDocid(&left);
! 746: }else if( docidRight<docidLeft ){
! 747: docidRight = nextDocid(&right);
! 748: }else{
! 749: mergePosList(&left, &right, docidLeft, pOut);
! 750: docidLeft = nextDocid(&left);
! 751: docidRight = nextDocid(&right);
! 752: }
! 753: }
! 754: }
! 755:
! 756: /* We have two doclists: pLeft and pRight.
! 757: ** Write the intersection of these two doclists into pOut.
! 758: ** Only docids are matched. Position information is ignored.
! 759: **
! 760: ** The output pOut never holds positions.
! 761: */
! 762: static void docListAndMerge(
! 763: DocList *pLeft, /* Doclist resulting from the words on the left */
! 764: DocList *pRight, /* Doclist for the next word to the right */
! 765: DocList *pOut /* Write the combined doclist here */
! 766: ){
! 767: DocListReader left, right;
! 768: sqlite_int64 docidLeft, docidRight;
! 769:
! 770: assert( pOut->iType<DL_POSITIONS );
! 771:
! 772: readerInit(&left, pLeft);
! 773: readerInit(&right, pRight);
! 774: docidLeft = nextDocid(&left);
! 775: docidRight = nextDocid(&right);
! 776:
! 777: while( docidLeft>0 && docidRight>0 ){
! 778: if( docidLeft<docidRight ){
! 779: docidLeft = nextDocid(&left);
! 780: }else if( docidRight<docidLeft ){
! 781: docidRight = nextDocid(&right);
! 782: }else{
! 783: docListAddDocid(pOut, docidLeft);
! 784: docidLeft = nextDocid(&left);
! 785: docidRight = nextDocid(&right);
! 786: }
! 787: }
! 788: }
! 789:
! 790: /* We have two doclists: pLeft and pRight.
! 791: ** Write the union of these two doclists into pOut.
! 792: ** Only docids are matched. Position information is ignored.
! 793: **
! 794: ** The output pOut never holds positions.
! 795: */
! 796: static void docListOrMerge(
! 797: DocList *pLeft, /* Doclist resulting from the words on the left */
! 798: DocList *pRight, /* Doclist for the next word to the right */
! 799: DocList *pOut /* Write the combined doclist here */
! 800: ){
! 801: DocListReader left, right;
! 802: sqlite_int64 docidLeft, docidRight, priorLeft;
! 803:
! 804: readerInit(&left, pLeft);
! 805: readerInit(&right, pRight);
! 806: docidLeft = nextDocid(&left);
! 807: docidRight = nextDocid(&right);
! 808:
! 809: while( docidLeft>0 && docidRight>0 ){
! 810: if( docidLeft<=docidRight ){
! 811: docListAddDocid(pOut, docidLeft);
! 812: }else{
! 813: docListAddDocid(pOut, docidRight);
! 814: }
! 815: priorLeft = docidLeft;
! 816: if( docidLeft<=docidRight ){
! 817: docidLeft = nextDocid(&left);
! 818: }
! 819: if( docidRight>0 && docidRight<=priorLeft ){
! 820: docidRight = nextDocid(&right);
! 821: }
! 822: }
! 823: while( docidLeft>0 ){
! 824: docListAddDocid(pOut, docidLeft);
! 825: docidLeft = nextDocid(&left);
! 826: }
! 827: while( docidRight>0 ){
! 828: docListAddDocid(pOut, docidRight);
! 829: docidRight = nextDocid(&right);
! 830: }
! 831: }
! 832:
! 833: /* We have two doclists: pLeft and pRight.
! 834: ** Write into pOut all documents that occur in pLeft but not
! 835: ** in pRight.
! 836: **
! 837: ** Only docids are matched. Position information is ignored.
! 838: **
! 839: ** The output pOut never holds positions.
! 840: */
! 841: static void docListExceptMerge(
! 842: DocList *pLeft, /* Doclist resulting from the words on the left */
! 843: DocList *pRight, /* Doclist for the next word to the right */
! 844: DocList *pOut /* Write the combined doclist here */
! 845: ){
! 846: DocListReader left, right;
! 847: sqlite_int64 docidLeft, docidRight, priorLeft;
! 848:
! 849: readerInit(&left, pLeft);
! 850: readerInit(&right, pRight);
! 851: docidLeft = nextDocid(&left);
! 852: docidRight = nextDocid(&right);
! 853:
! 854: while( docidLeft>0 && docidRight>0 ){
! 855: priorLeft = docidLeft;
! 856: if( docidLeft<docidRight ){
! 857: docListAddDocid(pOut, docidLeft);
! 858: }
! 859: if( docidLeft<=docidRight ){
! 860: docidLeft = nextDocid(&left);
! 861: }
! 862: if( docidRight>0 && docidRight<=priorLeft ){
! 863: docidRight = nextDocid(&right);
! 864: }
! 865: }
! 866: while( docidLeft>0 ){
! 867: docListAddDocid(pOut, docidLeft);
! 868: docidLeft = nextDocid(&left);
! 869: }
! 870: }
! 871:
! 872: static char *string_dup_n(const char *s, int n){
! 873: char *str = malloc(n + 1);
! 874: memcpy(str, s, n);
! 875: str[n] = '\0';
! 876: return str;
! 877: }
! 878:
! 879: /* Duplicate a string; the caller must free() the returned string.
! 880: * (We don't use strdup() since it is not part of the standard C library and
! 881: * may not be available everywhere.) */
! 882: static char *string_dup(const char *s){
! 883: return string_dup_n(s, strlen(s));
! 884: }
! 885:
! 886: /* Format a string, replacing each occurrence of the % character with
! 887: * zDb.zName. This may be more convenient than sqlite_mprintf()
! 888: * when one string is used repeatedly in a format string.
! 889: * The caller must free() the returned string. */
! 890: static char *string_format(const char *zFormat,
! 891: const char *zDb, const char *zName){
! 892: const char *p;
! 893: size_t len = 0;
! 894: size_t nDb = strlen(zDb);
! 895: size_t nName = strlen(zName);
! 896: size_t nFullTableName = nDb+1+nName;
! 897: char *result;
! 898: char *r;
! 899:
! 900: /* first compute length needed */
! 901: for(p = zFormat ; *p ; ++p){
! 902: len += (*p=='%' ? nFullTableName : 1);
! 903: }
! 904: len += 1; /* for null terminator */
! 905:
! 906: r = result = malloc(len);
! 907: for(p = zFormat; *p; ++p){
! 908: if( *p=='%' ){
! 909: memcpy(r, zDb, nDb);
! 910: r += nDb;
! 911: *r++ = '.';
! 912: memcpy(r, zName, nName);
! 913: r += nName;
! 914: } else {
! 915: *r++ = *p;
! 916: }
! 917: }
! 918: *r++ = '\0';
! 919: assert( r == result + len );
! 920: return result;
! 921: }
! 922:
! 923: static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
! 924: const char *zFormat){
! 925: char *zCommand = string_format(zFormat, zDb, zName);
! 926: int rc;
! 927: TRACE(("FTS1 sql: %s\n", zCommand));
! 928: rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
! 929: free(zCommand);
! 930: return rc;
! 931: }
! 932:
! 933: static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
! 934: sqlite3_stmt **ppStmt, const char *zFormat){
! 935: char *zCommand = string_format(zFormat, zDb, zName);
! 936: int rc;
! 937: TRACE(("FTS1 prepare: %s\n", zCommand));
! 938: rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
! 939: free(zCommand);
! 940: return rc;
! 941: }
! 942:
! 943: /* end utility functions */
! 944:
! 945: /* Forward reference */
! 946: typedef struct fulltext_vtab fulltext_vtab;
! 947:
! 948: /* A single term in a query is represented by an instances of
! 949: ** the following structure.
! 950: */
! 951: typedef struct QueryTerm {
! 952: short int nPhrase; /* How many following terms are part of the same phrase */
! 953: short int iPhrase; /* This is the i-th term of a phrase. */
! 954: short int iColumn; /* Column of the index that must match this term */
! 955: signed char isOr; /* this term is preceded by "OR" */
! 956: signed char isNot; /* this term is preceded by "-" */
! 957: char *pTerm; /* text of the term. '\000' terminated. malloced */
! 958: int nTerm; /* Number of bytes in pTerm[] */
! 959: } QueryTerm;
! 960:
! 961:
! 962: /* A query string is parsed into a Query structure.
! 963: *
! 964: * We could, in theory, allow query strings to be complicated
! 965: * nested expressions with precedence determined by parentheses.
! 966: * But none of the major search engines do this. (Perhaps the
! 967: * feeling is that an parenthesized expression is two complex of
! 968: * an idea for the average user to grasp.) Taking our lead from
! 969: * the major search engines, we will allow queries to be a list
! 970: * of terms (with an implied AND operator) or phrases in double-quotes,
! 971: * with a single optional "-" before each non-phrase term to designate
! 972: * negation and an optional OR connector.
! 973: *
! 974: * OR binds more tightly than the implied AND, which is what the
! 975: * major search engines seem to do. So, for example:
! 976: *
! 977: * [one two OR three] ==> one AND (two OR three)
! 978: * [one OR two three] ==> (one OR two) AND three
! 979: *
! 980: * A "-" before a term matches all entries that lack that term.
! 981: * The "-" must occur immediately before the term with in intervening
! 982: * space. This is how the search engines do it.
! 983: *
! 984: * A NOT term cannot be the right-hand operand of an OR. If this
! 985: * occurs in the query string, the NOT is ignored:
! 986: *
! 987: * [one OR -two] ==> one OR two
! 988: *
! 989: */
! 990: typedef struct Query {
! 991: fulltext_vtab *pFts; /* The full text index */
! 992: int nTerms; /* Number of terms in the query */
! 993: QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */
! 994: int nextIsOr; /* Set the isOr flag on the next inserted term */
! 995: int nextColumn; /* Next word parsed must be in this column */
! 996: int dfltColumn; /* The default column */
! 997: } Query;
! 998:
! 999:
! 1000: /*
! 1001: ** An instance of the following structure keeps track of generated
! 1002: ** matching-word offset information and snippets.
! 1003: */
! 1004: typedef struct Snippet {
! 1005: int nMatch; /* Total number of matches */
! 1006: int nAlloc; /* Space allocated for aMatch[] */
! 1007: struct snippetMatch { /* One entry for each matching term */
! 1008: char snStatus; /* Status flag for use while constructing snippets */
! 1009: short int iCol; /* The column that contains the match */
! 1010: short int iTerm; /* The index in Query.pTerms[] of the matching term */
! 1011: short int nByte; /* Number of bytes in the term */
! 1012: int iStart; /* The offset to the first character of the term */
! 1013: } *aMatch; /* Points to space obtained from malloc */
! 1014: char *zOffset; /* Text rendering of aMatch[] */
! 1015: int nOffset; /* strlen(zOffset) */
! 1016: char *zSnippet; /* Snippet text */
! 1017: int nSnippet; /* strlen(zSnippet) */
! 1018: } Snippet;
! 1019:
! 1020:
! 1021: typedef enum QueryType {
! 1022: QUERY_GENERIC, /* table scan */
! 1023: QUERY_ROWID, /* lookup by rowid */
! 1024: QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
! 1025: } QueryType;
! 1026:
! 1027: /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
! 1028: ** before we start aggregating into larger segments. Lower CHUNK_MAX
! 1029: ** means that for a given input we have more individual segments per
! 1030: ** term, which means more rows in the table and a bigger index (due to
! 1031: ** both more rows and bigger rowids). But it also reduces the average
! 1032: ** cost of adding new elements to the segment 0 doclist, and it seems
! 1033: ** to reduce the number of pages read and written during inserts. 256
! 1034: ** was chosen by measuring insertion times for a certain input (first
! 1035: ** 10k documents of Enron corpus), though including query performance
! 1036: ** in the decision may argue for a larger value.
! 1037: */
! 1038: #define CHUNK_MAX 256
! 1039:
! 1040: typedef enum fulltext_statement {
! 1041: CONTENT_INSERT_STMT,
! 1042: CONTENT_SELECT_STMT,
! 1043: CONTENT_UPDATE_STMT,
! 1044: CONTENT_DELETE_STMT,
! 1045:
! 1046: TERM_SELECT_STMT,
! 1047: TERM_SELECT_ALL_STMT,
! 1048: TERM_INSERT_STMT,
! 1049: TERM_UPDATE_STMT,
! 1050: TERM_DELETE_STMT,
! 1051:
! 1052: MAX_STMT /* Always at end! */
! 1053: } fulltext_statement;
! 1054:
! 1055: /* These must exactly match the enum above. */
! 1056: /* TODO(adam): Is there some risk that a statement (in particular,
! 1057: ** pTermSelectStmt) will be used in two cursors at once, e.g. if a
! 1058: ** query joins a virtual table to itself? If so perhaps we should
! 1059: ** move some of these to the cursor object.
! 1060: */
! 1061: static const char *const fulltext_zStatement[MAX_STMT] = {
! 1062: /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */
! 1063: /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
! 1064: /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */
! 1065: /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
! 1066:
! 1067: /* TERM_SELECT */
! 1068: "select rowid, doclist from %_term where term = ? and segment = ?",
! 1069: /* TERM_SELECT_ALL */
! 1070: "select doclist from %_term where term = ? order by segment",
! 1071: /* TERM_INSERT */
! 1072: "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
! 1073: /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
! 1074: /* TERM_DELETE */ "delete from %_term where rowid = ?",
! 1075: };
! 1076:
! 1077: /*
! 1078: ** A connection to a fulltext index is an instance of the following
! 1079: ** structure. The xCreate and xConnect methods create an instance
! 1080: ** of this structure and xDestroy and xDisconnect free that instance.
! 1081: ** All other methods receive a pointer to the structure as one of their
! 1082: ** arguments.
! 1083: */
! 1084: struct fulltext_vtab {
! 1085: sqlite3_vtab base; /* Base class used by SQLite core */
! 1086: sqlite3 *db; /* The database connection */
! 1087: const char *zDb; /* logical database name */
! 1088: const char *zName; /* virtual table name */
! 1089: int nColumn; /* number of columns in virtual table */
! 1090: char **azColumn; /* column names. malloced */
! 1091: char **azContentColumn; /* column names in content table; malloced */
! 1092: sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
! 1093:
! 1094: /* Precompiled statements which we keep as long as the table is
! 1095: ** open.
! 1096: */
! 1097: sqlite3_stmt *pFulltextStatements[MAX_STMT];
! 1098: };
! 1099:
! 1100: /*
! 1101: ** When the core wants to do a query, it create a cursor using a
! 1102: ** call to xOpen. This structure is an instance of a cursor. It
! 1103: ** is destroyed by xClose.
! 1104: */
! 1105: typedef struct fulltext_cursor {
! 1106: sqlite3_vtab_cursor base; /* Base class used by SQLite core */
! 1107: QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */
! 1108: sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
! 1109: int eof; /* True if at End Of Results */
! 1110: Query q; /* Parsed query string */
! 1111: Snippet snippet; /* Cached snippet for the current row */
! 1112: int iColumn; /* Column being searched */
! 1113: DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */
! 1114: } fulltext_cursor;
! 1115:
! 1116: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
! 1117: return (fulltext_vtab *) c->base.pVtab;
! 1118: }
! 1119:
! 1120: static const sqlite3_module fulltextModule; /* forward declaration */
! 1121:
! 1122: /* Append a list of strings separated by commas to a StringBuffer. */
! 1123: static void appendList(StringBuffer *sb, int nString, char **azString){
! 1124: int i;
! 1125: for(i=0; i<nString; ++i){
! 1126: if( i>0 ) append(sb, ", ");
! 1127: append(sb, azString[i]);
! 1128: }
! 1129: }
! 1130:
! 1131: /* Return a dynamically generated statement of the form
! 1132: * insert into %_content (rowid, ...) values (?, ...)
! 1133: */
! 1134: static const char *contentInsertStatement(fulltext_vtab *v){
! 1135: StringBuffer sb;
! 1136: int i;
! 1137:
! 1138: initStringBuffer(&sb);
! 1139: append(&sb, "insert into %_content (rowid, ");
! 1140: appendList(&sb, v->nColumn, v->azContentColumn);
! 1141: append(&sb, ") values (?");
! 1142: for(i=0; i<v->nColumn; ++i)
! 1143: append(&sb, ", ?");
! 1144: append(&sb, ")");
! 1145: return sb.s;
! 1146: }
! 1147:
! 1148: /* Return a dynamically generated statement of the form
! 1149: * update %_content set [col_0] = ?, [col_1] = ?, ...
! 1150: * where rowid = ?
! 1151: */
! 1152: static const char *contentUpdateStatement(fulltext_vtab *v){
! 1153: StringBuffer sb;
! 1154: int i;
! 1155:
! 1156: initStringBuffer(&sb);
! 1157: append(&sb, "update %_content set ");
! 1158: for(i=0; i<v->nColumn; ++i) {
! 1159: if( i>0 ){
! 1160: append(&sb, ", ");
! 1161: }
! 1162: append(&sb, v->azContentColumn[i]);
! 1163: append(&sb, " = ?");
! 1164: }
! 1165: append(&sb, " where rowid = ?");
! 1166: return sb.s;
! 1167: }
! 1168:
! 1169: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
! 1170: ** If the indicated statement has never been prepared, it is prepared
! 1171: ** and cached, otherwise the cached version is reset.
! 1172: */
! 1173: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
! 1174: sqlite3_stmt **ppStmt){
! 1175: assert( iStmt<MAX_STMT );
! 1176: if( v->pFulltextStatements[iStmt]==NULL ){
! 1177: const char *zStmt;
! 1178: int rc;
! 1179: switch( iStmt ){
! 1180: case CONTENT_INSERT_STMT:
! 1181: zStmt = contentInsertStatement(v); break;
! 1182: case CONTENT_UPDATE_STMT:
! 1183: zStmt = contentUpdateStatement(v); break;
! 1184: default:
! 1185: zStmt = fulltext_zStatement[iStmt];
! 1186: }
! 1187: rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
! 1188: zStmt);
! 1189: if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
! 1190: if( rc!=SQLITE_OK ) return rc;
! 1191: } else {
! 1192: int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
! 1193: if( rc!=SQLITE_OK ) return rc;
! 1194: }
! 1195:
! 1196: *ppStmt = v->pFulltextStatements[iStmt];
! 1197: return SQLITE_OK;
! 1198: }
! 1199:
! 1200: /* Step the indicated statement, handling errors SQLITE_BUSY (by
! 1201: ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
! 1202: ** bindings to the new statement).
! 1203: ** TODO(adam): We should extend this function so that it can work with
! 1204: ** statements declared locally, not only globally cached statements.
! 1205: */
! 1206: static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
! 1207: sqlite3_stmt **ppStmt){
! 1208: int rc;
! 1209: sqlite3_stmt *s = *ppStmt;
! 1210: assert( iStmt<MAX_STMT );
! 1211: assert( s==v->pFulltextStatements[iStmt] );
! 1212:
! 1213: while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
! 1214: if( rc==SQLITE_BUSY ) continue;
! 1215: if( rc!=SQLITE_ERROR ) return rc;
! 1216:
! 1217: /* If an SQLITE_SCHEMA error has occurred, then finalizing this
! 1218: * statement is going to delete the fulltext_vtab structure. If
! 1219: * the statement just executed is in the pFulltextStatements[]
! 1220: * array, it will be finalized twice. So remove it before
! 1221: * calling sqlite3_finalize().
! 1222: */
! 1223: v->pFulltextStatements[iStmt] = NULL;
! 1224: rc = sqlite3_finalize(s);
! 1225: break;
! 1226: }
! 1227: return rc;
! 1228:
! 1229: err:
! 1230: sqlite3_finalize(s);
! 1231: return rc;
! 1232: }
! 1233:
! 1234: /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
! 1235: ** Useful for statements like UPDATE, where we expect no results.
! 1236: */
! 1237: static int sql_single_step_statement(fulltext_vtab *v,
! 1238: fulltext_statement iStmt,
! 1239: sqlite3_stmt **ppStmt){
! 1240: int rc = sql_step_statement(v, iStmt, ppStmt);
! 1241: return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
! 1242: }
! 1243:
! 1244: /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
! 1245: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
! 1246: sqlite3_value **pValues){
! 1247: sqlite3_stmt *s;
! 1248: int i;
! 1249: int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
! 1250: if( rc!=SQLITE_OK ) return rc;
! 1251:
! 1252: rc = sqlite3_bind_value(s, 1, rowid);
! 1253: if( rc!=SQLITE_OK ) return rc;
! 1254:
! 1255: for(i=0; i<v->nColumn; ++i){
! 1256: rc = sqlite3_bind_value(s, 2+i, pValues[i]);
! 1257: if( rc!=SQLITE_OK ) return rc;
! 1258: }
! 1259:
! 1260: return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
! 1261: }
! 1262:
! 1263: /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
! 1264: * where rowid = [iRowid] */
! 1265: static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
! 1266: sqlite_int64 iRowid){
! 1267: sqlite3_stmt *s;
! 1268: int i;
! 1269: int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
! 1270: if( rc!=SQLITE_OK ) return rc;
! 1271:
! 1272: for(i=0; i<v->nColumn; ++i){
! 1273: rc = sqlite3_bind_value(s, 1+i, pValues[i]);
! 1274: if( rc!=SQLITE_OK ) return rc;
! 1275: }
! 1276:
! 1277: rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
! 1278: if( rc!=SQLITE_OK ) return rc;
! 1279:
! 1280: return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
! 1281: }
! 1282:
! 1283: static void freeStringArray(int nString, const char **pString){
! 1284: int i;
! 1285:
! 1286: for (i=0 ; i < nString ; ++i) {
! 1287: if( pString[i]!=NULL ) free((void *) pString[i]);
! 1288: }
! 1289: free((void *) pString);
! 1290: }
! 1291:
! 1292: /* select * from %_content where rowid = [iRow]
! 1293: * The caller must delete the returned array and all strings in it.
! 1294: * null fields will be NULL in the returned array.
! 1295: *
! 1296: * TODO: Perhaps we should return pointer/length strings here for consistency
! 1297: * with other code which uses pointer/length. */
! 1298: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
! 1299: const char ***pValues){
! 1300: sqlite3_stmt *s;
! 1301: const char **values;
! 1302: int i;
! 1303: int rc;
! 1304:
! 1305: *pValues = NULL;
! 1306:
! 1307: rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
! 1308: if( rc!=SQLITE_OK ) return rc;
! 1309:
! 1310: rc = sqlite3_bind_int64(s, 1, iRow);
! 1311: if( rc!=SQLITE_OK ) return rc;
! 1312:
! 1313: rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
! 1314: if( rc!=SQLITE_ROW ) return rc;
! 1315:
! 1316: values = (const char **) malloc(v->nColumn * sizeof(const char *));
! 1317: for(i=0; i<v->nColumn; ++i){
! 1318: if( sqlite3_column_type(s, i)==SQLITE_NULL ){
! 1319: values[i] = NULL;
! 1320: }else{
! 1321: values[i] = string_dup((char*)sqlite3_column_text(s, i));
! 1322: }
! 1323: }
! 1324:
! 1325: /* We expect only one row. We must execute another sqlite3_step()
! 1326: * to complete the iteration; otherwise the table will remain locked. */
! 1327: rc = sqlite3_step(s);
! 1328: if( rc==SQLITE_DONE ){
! 1329: *pValues = values;
! 1330: return SQLITE_OK;
! 1331: }
! 1332:
! 1333: freeStringArray(v->nColumn, values);
! 1334: return rc;
! 1335: }
! 1336:
! 1337: /* delete from %_content where rowid = [iRow ] */
! 1338: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
! 1339: sqlite3_stmt *s;
! 1340: int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
! 1341: if( rc!=SQLITE_OK ) return rc;
! 1342:
! 1343: rc = sqlite3_bind_int64(s, 1, iRow);
! 1344: if( rc!=SQLITE_OK ) return rc;
! 1345:
! 1346: return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
! 1347: }
! 1348:
! 1349: /* select rowid, doclist from %_term
! 1350: * where term = [pTerm] and segment = [iSegment]
! 1351: * If found, returns SQLITE_ROW; the caller must free the
! 1352: * returned doclist. If no rows found, returns SQLITE_DONE. */
! 1353: static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
! 1354: int iSegment,
! 1355: sqlite_int64 *rowid, DocList *out){
! 1356: sqlite3_stmt *s;
! 1357: int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
! 1358: if( rc!=SQLITE_OK ) return rc;
! 1359:
! 1360: rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
! 1361: if( rc!=SQLITE_OK ) return rc;
! 1362:
! 1363: rc = sqlite3_bind_int(s, 2, iSegment);
! 1364: if( rc!=SQLITE_OK ) return rc;
! 1365:
! 1366: rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
! 1367: if( rc!=SQLITE_ROW ) return rc;
! 1368:
! 1369: *rowid = sqlite3_column_int64(s, 0);
! 1370: docListInit(out, DL_DEFAULT,
! 1371: sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
! 1372:
! 1373: /* We expect only one row. We must execute another sqlite3_step()
! 1374: * to complete the iteration; otherwise the table will remain locked. */
! 1375: rc = sqlite3_step(s);
! 1376: return rc==SQLITE_DONE ? SQLITE_ROW : rc;
! 1377: }
! 1378:
! 1379: /* Load the segment doclists for term pTerm and merge them in
! 1380: ** appropriate order into out. Returns SQLITE_OK if successful. If
! 1381: ** there are no segments for pTerm, successfully returns an empty
! 1382: ** doclist in out.
! 1383: **
! 1384: ** Each document consists of 1 or more "columns". The number of
! 1385: ** columns is v->nColumn. If iColumn==v->nColumn, then return
! 1386: ** position information about all columns. If iColumn<v->nColumn,
! 1387: ** then only return position information about the iColumn-th column
! 1388: ** (where the first column is 0).
! 1389: */
! 1390: static int term_select_all(
! 1391: fulltext_vtab *v, /* The fulltext index we are querying against */
! 1392: int iColumn, /* If <nColumn, only look at the iColumn-th column */
! 1393: const char *pTerm, /* The term whose posting lists we want */
! 1394: int nTerm, /* Number of bytes in pTerm */
! 1395: DocList *out /* Write the resulting doclist here */
! 1396: ){
! 1397: DocList doclist;
! 1398: sqlite3_stmt *s;
! 1399: int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
! 1400: if( rc!=SQLITE_OK ) return rc;
! 1401:
! 1402: rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
! 1403: if( rc!=SQLITE_OK ) return rc;
! 1404:
! 1405: docListInit(&doclist, DL_DEFAULT, 0, 0);
! 1406:
! 1407: /* TODO(shess) Handle schema and busy errors. */
! 1408: while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
! 1409: DocList old;
! 1410:
! 1411: /* TODO(shess) If we processed doclists from oldest to newest, we
! 1412: ** could skip the malloc() involved with the following call. For
! 1413: ** now, I'd rather keep this logic similar to index_insert_term().
! 1414: ** We could additionally drop elements when we see deletes, but
! 1415: ** that would require a distinct version of docListAccumulate().
! 1416: */
! 1417: docListInit(&old, DL_DEFAULT,
! 1418: sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
! 1419:
! 1420: if( iColumn<v->nColumn ){ /* querying a single column */
! 1421: docListRestrictColumn(&old, iColumn);
! 1422: }
! 1423:
! 1424: /* doclist contains the newer data, so write it over old. Then
! 1425: ** steal accumulated result for doclist.
! 1426: */
! 1427: docListAccumulate(&old, &doclist);
! 1428: docListDestroy(&doclist);
! 1429: doclist = old;
! 1430: }
! 1431: if( rc!=SQLITE_DONE ){
! 1432: docListDestroy(&doclist);
! 1433: return rc;
! 1434: }
! 1435:
! 1436: docListDiscardEmpty(&doclist);
! 1437: *out = doclist;
! 1438: return SQLITE_OK;
! 1439: }
! 1440:
! 1441: /* insert into %_term (rowid, term, segment, doclist)
! 1442: values ([piRowid], [pTerm], [iSegment], [doclist])
! 1443: ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
! 1444: **
! 1445: ** NOTE(shess) piRowid is IN, with values of "space of int64" plus
! 1446: ** null, it is not used to pass data back to the caller.
! 1447: */
! 1448: static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
! 1449: const char *pTerm, int nTerm,
! 1450: int iSegment, DocList *doclist){
! 1451: sqlite3_stmt *s;
! 1452: int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
! 1453: if( rc!=SQLITE_OK ) return rc;
! 1454:
! 1455: if( piRowid==NULL ){
! 1456: rc = sqlite3_bind_null(s, 1);
! 1457: }else{
! 1458: rc = sqlite3_bind_int64(s, 1, *piRowid);
! 1459: }
! 1460: if( rc!=SQLITE_OK ) return rc;
! 1461:
! 1462: rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
! 1463: if( rc!=SQLITE_OK ) return rc;
! 1464:
! 1465: rc = sqlite3_bind_int(s, 3, iSegment);
! 1466: if( rc!=SQLITE_OK ) return rc;
! 1467:
! 1468: rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
! 1469: if( rc!=SQLITE_OK ) return rc;
! 1470:
! 1471: return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
! 1472: }
! 1473:
! 1474: /* update %_term set doclist = [doclist] where rowid = [rowid] */
! 1475: static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
! 1476: DocList *doclist){
! 1477: sqlite3_stmt *s;
! 1478: int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
! 1479: if( rc!=SQLITE_OK ) return rc;
! 1480:
! 1481: rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
! 1482: if( rc!=SQLITE_OK ) return rc;
! 1483:
! 1484: rc = sqlite3_bind_int64(s, 2, rowid);
! 1485: if( rc!=SQLITE_OK ) return rc;
! 1486:
! 1487: return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
! 1488: }
! 1489:
! 1490: static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
! 1491: sqlite3_stmt *s;
! 1492: int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
! 1493: if( rc!=SQLITE_OK ) return rc;
! 1494:
! 1495: rc = sqlite3_bind_int64(s, 1, rowid);
! 1496: if( rc!=SQLITE_OK ) return rc;
! 1497:
! 1498: return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
! 1499: }
! 1500:
! 1501: /*
! 1502: ** Free the memory used to contain a fulltext_vtab structure.
! 1503: */
! 1504: static void fulltext_vtab_destroy(fulltext_vtab *v){
! 1505: int iStmt, i;
! 1506:
! 1507: TRACE(("FTS1 Destroy %p\n", v));
! 1508: for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
! 1509: if( v->pFulltextStatements[iStmt]!=NULL ){
! 1510: sqlite3_finalize(v->pFulltextStatements[iStmt]);
! 1511: v->pFulltextStatements[iStmt] = NULL;
! 1512: }
! 1513: }
! 1514:
! 1515: if( v->pTokenizer!=NULL ){
! 1516: v->pTokenizer->pModule->xDestroy(v->pTokenizer);
! 1517: v->pTokenizer = NULL;
! 1518: }
! 1519:
! 1520: free(v->azColumn);
! 1521: for(i = 0; i < v->nColumn; ++i) {
! 1522: sqlite3_free(v->azContentColumn[i]);
! 1523: }
! 1524: free(v->azContentColumn);
! 1525: free(v);
! 1526: }
! 1527:
! 1528: /*
! 1529: ** Token types for parsing the arguments to xConnect or xCreate.
! 1530: */
! 1531: #define TOKEN_EOF 0 /* End of file */
! 1532: #define TOKEN_SPACE 1 /* Any kind of whitespace */
! 1533: #define TOKEN_ID 2 /* An identifier */
! 1534: #define TOKEN_STRING 3 /* A string literal */
! 1535: #define TOKEN_PUNCT 4 /* A single punctuation character */
! 1536:
! 1537: /*
! 1538: ** If X is a character that can be used in an identifier then
! 1539: ** IdChar(X) will be true. Otherwise it is false.
! 1540: **
! 1541: ** For ASCII, any character with the high-order bit set is
! 1542: ** allowed in an identifier. For 7-bit characters,
! 1543: ** sqlite3IsIdChar[X] must be 1.
! 1544: **
! 1545: ** Ticket #1066. the SQL standard does not allow '$' in the
! 1546: ** middle of identfiers. But many SQL implementations do.
! 1547: ** SQLite will allow '$' in identifiers for compatibility.
! 1548: ** But the feature is undocumented.
! 1549: */
! 1550: static const char isIdChar[] = {
! 1551: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
! 1552: 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
! 1553: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
! 1554: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
! 1555: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
! 1556: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
! 1557: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
! 1558: };
! 1559: #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
! 1560:
! 1561:
! 1562: /*
! 1563: ** Return the length of the token that begins at z[0].
! 1564: ** Store the token type in *tokenType before returning.
! 1565: */
! 1566: static int getToken(const char *z, int *tokenType){
! 1567: int i, c;
! 1568: switch( *z ){
! 1569: case 0: {
! 1570: *tokenType = TOKEN_EOF;
! 1571: return 0;
! 1572: }
! 1573: case ' ': case '\t': case '\n': case '\f': case '\r': {
! 1574: for(i=1; safe_isspace(z[i]); i++){}
! 1575: *tokenType = TOKEN_SPACE;
! 1576: return i;
! 1577: }
! 1578: case '`':
! 1579: case '\'':
! 1580: case '"': {
! 1581: int delim = z[0];
! 1582: for(i=1; (c=z[i])!=0; i++){
! 1583: if( c==delim ){
! 1584: if( z[i+1]==delim ){
! 1585: i++;
! 1586: }else{
! 1587: break;
! 1588: }
! 1589: }
! 1590: }
! 1591: *tokenType = TOKEN_STRING;
! 1592: return i + (c!=0);
! 1593: }
! 1594: case '[': {
! 1595: for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
! 1596: *tokenType = TOKEN_ID;
! 1597: return i;
! 1598: }
! 1599: default: {
! 1600: if( !IdChar(*z) ){
! 1601: break;
! 1602: }
! 1603: for(i=1; IdChar(z[i]); i++){}
! 1604: *tokenType = TOKEN_ID;
! 1605: return i;
! 1606: }
! 1607: }
! 1608: *tokenType = TOKEN_PUNCT;
! 1609: return 1;
! 1610: }
! 1611:
! 1612: /*
! 1613: ** A token extracted from a string is an instance of the following
! 1614: ** structure.
! 1615: */
! 1616: typedef struct Token {
! 1617: const char *z; /* Pointer to token text. Not '\000' terminated */
! 1618: short int n; /* Length of the token text in bytes. */
! 1619: } Token;
! 1620:
! 1621: /*
! 1622: ** Given a input string (which is really one of the argv[] parameters
! 1623: ** passed into xConnect or xCreate) split the string up into tokens.
! 1624: ** Return an array of pointers to '\000' terminated strings, one string
! 1625: ** for each non-whitespace token.
! 1626: **
! 1627: ** The returned array is terminated by a single NULL pointer.
! 1628: **
! 1629: ** Space to hold the returned array is obtained from a single
! 1630: ** malloc and should be freed by passing the return value to free().
! 1631: ** The individual strings within the token list are all a part of
! 1632: ** the single memory allocation and will all be freed at once.
! 1633: */
! 1634: static char **tokenizeString(const char *z, int *pnToken){
! 1635: int nToken = 0;
! 1636: Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
! 1637: int n = 1;
! 1638: int e, i;
! 1639: int totalSize = 0;
! 1640: char **azToken;
! 1641: char *zCopy;
! 1642: while( n>0 ){
! 1643: n = getToken(z, &e);
! 1644: if( e!=TOKEN_SPACE ){
! 1645: aToken[nToken].z = z;
! 1646: aToken[nToken].n = n;
! 1647: nToken++;
! 1648: totalSize += n+1;
! 1649: }
! 1650: z += n;
! 1651: }
! 1652: azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
! 1653: zCopy = (char*)&azToken[nToken];
! 1654: nToken--;
! 1655: for(i=0; i<nToken; i++){
! 1656: azToken[i] = zCopy;
! 1657: n = aToken[i].n;
! 1658: memcpy(zCopy, aToken[i].z, n);
! 1659: zCopy[n] = 0;
! 1660: zCopy += n+1;
! 1661: }
! 1662: azToken[nToken] = 0;
! 1663: free(aToken);
! 1664: *pnToken = nToken;
! 1665: return azToken;
! 1666: }
! 1667:
! 1668: /*
! 1669: ** Convert an SQL-style quoted string into a normal string by removing
! 1670: ** the quote characters. The conversion is done in-place. If the
! 1671: ** input does not begin with a quote character, then this routine
! 1672: ** is a no-op.
! 1673: **
! 1674: ** Examples:
! 1675: **
! 1676: ** "abc" becomes abc
! 1677: ** 'xyz' becomes xyz
! 1678: ** [pqr] becomes pqr
! 1679: ** `mno` becomes mno
! 1680: */
! 1681: static void dequoteString(char *z){
! 1682: int quote;
! 1683: int i, j;
! 1684: if( z==0 ) return;
! 1685: quote = z[0];
! 1686: switch( quote ){
! 1687: case '\'': break;
! 1688: case '"': break;
! 1689: case '`': break; /* For MySQL compatibility */
! 1690: case '[': quote = ']'; break; /* For MS SqlServer compatibility */
! 1691: default: return;
! 1692: }
! 1693: for(i=1, j=0; z[i]; i++){
! 1694: if( z[i]==quote ){
! 1695: if( z[i+1]==quote ){
! 1696: z[j++] = quote;
! 1697: i++;
! 1698: }else{
! 1699: z[j++] = 0;
! 1700: break;
! 1701: }
! 1702: }else{
! 1703: z[j++] = z[i];
! 1704: }
! 1705: }
! 1706: }
! 1707:
! 1708: /*
! 1709: ** The input azIn is a NULL-terminated list of tokens. Remove the first
! 1710: ** token and all punctuation tokens. Remove the quotes from
! 1711: ** around string literal tokens.
! 1712: **
! 1713: ** Example:
! 1714: **
! 1715: ** input: tokenize chinese ( 'simplifed' , 'mixed' )
! 1716: ** output: chinese simplifed mixed
! 1717: **
! 1718: ** Another example:
! 1719: **
! 1720: ** input: delimiters ( '[' , ']' , '...' )
! 1721: ** output: [ ] ...
! 1722: */
! 1723: static void tokenListToIdList(char **azIn){
! 1724: int i, j;
! 1725: if( azIn ){
! 1726: for(i=0, j=-1; azIn[i]; i++){
! 1727: if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
! 1728: dequoteString(azIn[i]);
! 1729: if( j>=0 ){
! 1730: azIn[j] = azIn[i];
! 1731: }
! 1732: j++;
! 1733: }
! 1734: }
! 1735: azIn[j] = 0;
! 1736: }
! 1737: }
! 1738:
! 1739:
! 1740: /*
! 1741: ** Find the first alphanumeric token in the string zIn. Null-terminate
! 1742: ** this token. Remove any quotation marks. And return a pointer to
! 1743: ** the result.
! 1744: */
! 1745: static char *firstToken(char *zIn, char **pzTail){
! 1746: int n, ttype;
! 1747: while(1){
! 1748: n = getToken(zIn, &ttype);
! 1749: if( ttype==TOKEN_SPACE ){
! 1750: zIn += n;
! 1751: }else if( ttype==TOKEN_EOF ){
! 1752: *pzTail = zIn;
! 1753: return 0;
! 1754: }else{
! 1755: zIn[n] = 0;
! 1756: *pzTail = &zIn[1];
! 1757: dequoteString(zIn);
! 1758: return zIn;
! 1759: }
! 1760: }
! 1761: /*NOTREACHED*/
! 1762: }
! 1763:
! 1764: /* Return true if...
! 1765: **
! 1766: ** * s begins with the string t, ignoring case
! 1767: ** * s is longer than t
! 1768: ** * The first character of s beyond t is not a alphanumeric
! 1769: **
! 1770: ** Ignore leading space in *s.
! 1771: **
! 1772: ** To put it another way, return true if the first token of
! 1773: ** s[] is t[].
! 1774: */
! 1775: static int startsWith(const char *s, const char *t){
! 1776: while( safe_isspace(*s) ){ s++; }
! 1777: while( *t ){
! 1778: if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
! 1779: }
! 1780: return *s!='_' && !safe_isalnum(*s);
! 1781: }
! 1782:
! 1783: /*
! 1784: ** An instance of this structure defines the "spec" of a
! 1785: ** full text index. This structure is populated by parseSpec
! 1786: ** and use by fulltextConnect and fulltextCreate.
! 1787: */
! 1788: typedef struct TableSpec {
! 1789: const char *zDb; /* Logical database name */
! 1790: const char *zName; /* Name of the full-text index */
! 1791: int nColumn; /* Number of columns to be indexed */
! 1792: char **azColumn; /* Original names of columns to be indexed */
! 1793: char **azContentColumn; /* Column names for %_content */
! 1794: char **azTokenizer; /* Name of tokenizer and its arguments */
! 1795: } TableSpec;
! 1796:
! 1797: /*
! 1798: ** Reclaim all of the memory used by a TableSpec
! 1799: */
! 1800: static void clearTableSpec(TableSpec *p) {
! 1801: free(p->azColumn);
! 1802: free(p->azContentColumn);
! 1803: free(p->azTokenizer);
! 1804: }
! 1805:
! 1806: /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
! 1807: *
! 1808: * CREATE VIRTUAL TABLE email
! 1809: * USING fts1(subject, body, tokenize mytokenizer(myarg))
! 1810: *
! 1811: * We return parsed information in a TableSpec structure.
! 1812: *
! 1813: */
! 1814: static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
! 1815: char**pzErr){
! 1816: int i, n;
! 1817: char *z, *zDummy;
! 1818: char **azArg;
! 1819: const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */
! 1820:
! 1821: assert( argc>=3 );
! 1822: /* Current interface:
! 1823: ** argv[0] - module name
! 1824: ** argv[1] - database name
! 1825: ** argv[2] - table name
! 1826: ** argv[3..] - columns, optionally followed by tokenizer specification
! 1827: ** and snippet delimiters specification.
! 1828: */
! 1829:
! 1830: /* Make a copy of the complete argv[][] array in a single allocation.
! 1831: ** The argv[][] array is read-only and transient. We can write to the
! 1832: ** copy in order to modify things and the copy is persistent.
! 1833: */
! 1834: memset(pSpec, 0, sizeof(*pSpec));
! 1835: for(i=n=0; i<argc; i++){
! 1836: n += strlen(argv[i]) + 1;
! 1837: }
! 1838: azArg = malloc( sizeof(char*)*argc + n );
! 1839: if( azArg==0 ){
! 1840: return SQLITE_NOMEM;
! 1841: }
! 1842: z = (char*)&azArg[argc];
! 1843: for(i=0; i<argc; i++){
! 1844: azArg[i] = z;
! 1845: strcpy(z, argv[i]);
! 1846: z += strlen(z)+1;
! 1847: }
! 1848:
! 1849: /* Identify the column names and the tokenizer and delimiter arguments
! 1850: ** in the argv[][] array.
! 1851: */
! 1852: pSpec->zDb = azArg[1];
! 1853: pSpec->zName = azArg[2];
! 1854: pSpec->nColumn = 0;
! 1855: pSpec->azColumn = azArg;
! 1856: zTokenizer = "tokenize simple";
! 1857: for(i=3; i<argc; ++i){
! 1858: if( startsWith(azArg[i],"tokenize") ){
! 1859: zTokenizer = azArg[i];
! 1860: }else{
! 1861: z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
! 1862: pSpec->nColumn++;
! 1863: }
! 1864: }
! 1865: if( pSpec->nColumn==0 ){
! 1866: azArg[0] = "content";
! 1867: pSpec->nColumn = 1;
! 1868: }
! 1869:
! 1870: /*
! 1871: ** Construct the list of content column names.
! 1872: **
! 1873: ** Each content column name will be of the form cNNAAAA
! 1874: ** where NN is the column number and AAAA is the sanitized
! 1875: ** column name. "sanitized" means that special characters are
! 1876: ** converted to "_". The cNN prefix guarantees that all column
! 1877: ** names are unique.
! 1878: **
! 1879: ** The AAAA suffix is not strictly necessary. It is included
! 1880: ** for the convenience of people who might examine the generated
! 1881: ** %_content table and wonder what the columns are used for.
! 1882: */
! 1883: pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
! 1884: if( pSpec->azContentColumn==0 ){
! 1885: clearTableSpec(pSpec);
! 1886: return SQLITE_NOMEM;
! 1887: }
! 1888: for(i=0; i<pSpec->nColumn; i++){
! 1889: char *p;
! 1890: pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
! 1891: for (p = pSpec->azContentColumn[i]; *p ; ++p) {
! 1892: if( !safe_isalnum(*p) ) *p = '_';
! 1893: }
! 1894: }
! 1895:
! 1896: /*
! 1897: ** Parse the tokenizer specification string.
! 1898: */
! 1899: pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
! 1900: tokenListToIdList(pSpec->azTokenizer);
! 1901:
! 1902: return SQLITE_OK;
! 1903: }
! 1904:
! 1905: /*
! 1906: ** Generate a CREATE TABLE statement that describes the schema of
! 1907: ** the virtual table. Return a pointer to this schema string.
! 1908: **
! 1909: ** Space is obtained from sqlite3_mprintf() and should be freed
! 1910: ** using sqlite3_free().
! 1911: */
! 1912: static char *fulltextSchema(
! 1913: int nColumn, /* Number of columns */
! 1914: const char *const* azColumn, /* List of columns */
! 1915: const char *zTableName /* Name of the table */
! 1916: ){
! 1917: int i;
! 1918: char *zSchema, *zNext;
! 1919: const char *zSep = "(";
! 1920: zSchema = sqlite3_mprintf("CREATE TABLE x");
! 1921: for(i=0; i<nColumn; i++){
! 1922: zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
! 1923: sqlite3_free(zSchema);
! 1924: zSchema = zNext;
! 1925: zSep = ",";
! 1926: }
! 1927: zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
! 1928: sqlite3_free(zSchema);
! 1929: return zNext;
! 1930: }
! 1931:
! 1932: /*
! 1933: ** Build a new sqlite3_vtab structure that will describe the
! 1934: ** fulltext index defined by spec.
! 1935: */
! 1936: static int constructVtab(
! 1937: sqlite3 *db, /* The SQLite database connection */
! 1938: TableSpec *spec, /* Parsed spec information from parseSpec() */
! 1939: sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
! 1940: char **pzErr /* Write any error message here */
! 1941: ){
! 1942: int rc;
! 1943: int n;
! 1944: fulltext_vtab *v = 0;
! 1945: const sqlite3_tokenizer_module *m = NULL;
! 1946: char *schema;
! 1947:
! 1948: v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
! 1949: if( v==0 ) return SQLITE_NOMEM;
! 1950: memset(v, 0, sizeof(*v));
! 1951: /* sqlite will initialize v->base */
! 1952: v->db = db;
! 1953: v->zDb = spec->zDb; /* Freed when azColumn is freed */
! 1954: v->zName = spec->zName; /* Freed when azColumn is freed */
! 1955: v->nColumn = spec->nColumn;
! 1956: v->azContentColumn = spec->azContentColumn;
! 1957: spec->azContentColumn = 0;
! 1958: v->azColumn = spec->azColumn;
! 1959: spec->azColumn = 0;
! 1960:
! 1961: if( spec->azTokenizer==0 ){
! 1962: return SQLITE_NOMEM;
! 1963: }
! 1964: /* TODO(shess) For now, add new tokenizers as else if clauses. */
! 1965: if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
! 1966: sqlite3Fts1SimpleTokenizerModule(&m);
! 1967: }else if( startsWith(spec->azTokenizer[0], "porter") ){
! 1968: sqlite3Fts1PorterTokenizerModule(&m);
! 1969: }else{
! 1970: *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
! 1971: rc = SQLITE_ERROR;
! 1972: goto err;
! 1973: }
! 1974: for(n=0; spec->azTokenizer[n]; n++){}
! 1975: if( n ){
! 1976: rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
! 1977: &v->pTokenizer);
! 1978: }else{
! 1979: rc = m->xCreate(0, 0, &v->pTokenizer);
! 1980: }
! 1981: if( rc!=SQLITE_OK ) goto err;
! 1982: v->pTokenizer->pModule = m;
! 1983:
! 1984: /* TODO: verify the existence of backing tables foo_content, foo_term */
! 1985:
! 1986: schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
! 1987: spec->zName);
! 1988: rc = sqlite3_declare_vtab(db, schema);
! 1989: sqlite3_free(schema);
! 1990: if( rc!=SQLITE_OK ) goto err;
! 1991:
! 1992: memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
! 1993:
! 1994: *ppVTab = &v->base;
! 1995: TRACE(("FTS1 Connect %p\n", v));
! 1996:
! 1997: return rc;
! 1998:
! 1999: err:
! 2000: fulltext_vtab_destroy(v);
! 2001: return rc;
! 2002: }
! 2003:
! 2004: static int fulltextConnect(
! 2005: sqlite3 *db,
! 2006: void *pAux,
! 2007: int argc, const char *const*argv,
! 2008: sqlite3_vtab **ppVTab,
! 2009: char **pzErr
! 2010: ){
! 2011: TableSpec spec;
! 2012: int rc = parseSpec(&spec, argc, argv, pzErr);
! 2013: if( rc!=SQLITE_OK ) return rc;
! 2014:
! 2015: rc = constructVtab(db, &spec, ppVTab, pzErr);
! 2016: clearTableSpec(&spec);
! 2017: return rc;
! 2018: }
! 2019:
! 2020: /* The %_content table holds the text of each document, with
! 2021: ** the rowid used as the docid.
! 2022: **
! 2023: ** The %_term table maps each term to a document list blob
! 2024: ** containing elements sorted by ascending docid, each element
! 2025: ** encoded as:
! 2026: **
! 2027: ** docid varint-encoded
! 2028: ** token elements:
! 2029: ** position+1 varint-encoded as delta from previous position
! 2030: ** start offset varint-encoded as delta from previous start offset
! 2031: ** end offset varint-encoded as delta from start offset
! 2032: **
! 2033: ** The sentinel position of 0 indicates the end of the token list.
! 2034: **
! 2035: ** Additionally, doclist blobs are chunked into multiple segments,
! 2036: ** using segment to order the segments. New elements are added to
! 2037: ** the segment at segment 0, until it exceeds CHUNK_MAX. Then
! 2038: ** segment 0 is deleted, and the doclist is inserted at segment 1.
! 2039: ** If there is already a doclist at segment 1, the segment 0 doclist
! 2040: ** is merged with it, the segment 1 doclist is deleted, and the
! 2041: ** merged doclist is inserted at segment 2, repeating those
! 2042: ** operations until an insert succeeds.
! 2043: **
! 2044: ** Since this structure doesn't allow us to update elements in place
! 2045: ** in case of deletion or update, these are simply written to
! 2046: ** segment 0 (with an empty token list in case of deletion), with
! 2047: ** docListAccumulate() taking care to retain lower-segment
! 2048: ** information in preference to higher-segment information.
! 2049: */
! 2050: /* TODO(shess) Provide a VACUUM type operation which both removes
! 2051: ** deleted elements which are no longer necessary, and duplicated
! 2052: ** elements. I suspect this will probably not be necessary in
! 2053: ** practice, though.
! 2054: */
! 2055: static int fulltextCreate(sqlite3 *db, void *pAux,
! 2056: int argc, const char * const *argv,
! 2057: sqlite3_vtab **ppVTab, char **pzErr){
! 2058: int rc;
! 2059: TableSpec spec;
! 2060: StringBuffer schema;
! 2061: TRACE(("FTS1 Create\n"));
! 2062:
! 2063: rc = parseSpec(&spec, argc, argv, pzErr);
! 2064: if( rc!=SQLITE_OK ) return rc;
! 2065:
! 2066: initStringBuffer(&schema);
! 2067: append(&schema, "CREATE TABLE %_content(");
! 2068: appendList(&schema, spec.nColumn, spec.azContentColumn);
! 2069: append(&schema, ")");
! 2070: rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
! 2071: free(schema.s);
! 2072: if( rc!=SQLITE_OK ) goto out;
! 2073:
! 2074: rc = sql_exec(db, spec.zDb, spec.zName,
! 2075: "create table %_term(term text, segment integer, doclist blob, "
! 2076: "primary key(term, segment));");
! 2077: if( rc!=SQLITE_OK ) goto out;
! 2078:
! 2079: rc = constructVtab(db, &spec, ppVTab, pzErr);
! 2080:
! 2081: out:
! 2082: clearTableSpec(&spec);
! 2083: return rc;
! 2084: }
! 2085:
! 2086: /* Decide how to handle an SQL query. */
! 2087: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
! 2088: int i;
! 2089: TRACE(("FTS1 BestIndex\n"));
! 2090:
! 2091: for(i=0; i<pInfo->nConstraint; ++i){
! 2092: const struct sqlite3_index_constraint *pConstraint;
! 2093: pConstraint = &pInfo->aConstraint[i];
! 2094: if( pConstraint->usable ) {
! 2095: if( pConstraint->iColumn==-1 &&
! 2096: pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
! 2097: pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */
! 2098: TRACE(("FTS1 QUERY_ROWID\n"));
! 2099: } else if( pConstraint->iColumn>=0 &&
! 2100: pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
! 2101: /* full-text search */
! 2102: pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
! 2103: TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
! 2104: } else continue;
! 2105:
! 2106: pInfo->aConstraintUsage[i].argvIndex = 1;
! 2107: pInfo->aConstraintUsage[i].omit = 1;
! 2108:
! 2109: /* An arbitrary value for now.
! 2110: * TODO: Perhaps rowid matches should be considered cheaper than
! 2111: * full-text searches. */
! 2112: pInfo->estimatedCost = 1.0;
! 2113:
! 2114: return SQLITE_OK;
! 2115: }
! 2116: }
! 2117: pInfo->idxNum = QUERY_GENERIC;
! 2118: return SQLITE_OK;
! 2119: }
! 2120:
! 2121: static int fulltextDisconnect(sqlite3_vtab *pVTab){
! 2122: TRACE(("FTS1 Disconnect %p\n", pVTab));
! 2123: fulltext_vtab_destroy((fulltext_vtab *)pVTab);
! 2124: return SQLITE_OK;
! 2125: }
! 2126:
! 2127: static int fulltextDestroy(sqlite3_vtab *pVTab){
! 2128: fulltext_vtab *v = (fulltext_vtab *)pVTab;
! 2129: int rc;
! 2130:
! 2131: TRACE(("FTS1 Destroy %p\n", pVTab));
! 2132: rc = sql_exec(v->db, v->zDb, v->zName,
! 2133: "drop table if exists %_content;"
! 2134: "drop table if exists %_term;"
! 2135: );
! 2136: if( rc!=SQLITE_OK ) return rc;
! 2137:
! 2138: fulltext_vtab_destroy((fulltext_vtab *)pVTab);
! 2139: return SQLITE_OK;
! 2140: }
! 2141:
! 2142: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
! 2143: fulltext_cursor *c;
! 2144:
! 2145: c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
! 2146: /* sqlite will initialize c->base */
! 2147: *ppCursor = &c->base;
! 2148: TRACE(("FTS1 Open %p: %p\n", pVTab, c));
! 2149:
! 2150: return SQLITE_OK;
! 2151: }
! 2152:
! 2153:
! 2154: /* Free all of the dynamically allocated memory held by *q
! 2155: */
! 2156: static void queryClear(Query *q){
! 2157: int i;
! 2158: for(i = 0; i < q->nTerms; ++i){
! 2159: free(q->pTerms[i].pTerm);
! 2160: }
! 2161: free(q->pTerms);
! 2162: memset(q, 0, sizeof(*q));
! 2163: }
! 2164:
! 2165: /* Free all of the dynamically allocated memory held by the
! 2166: ** Snippet
! 2167: */
! 2168: static void snippetClear(Snippet *p){
! 2169: free(p->aMatch);
! 2170: free(p->zOffset);
! 2171: free(p->zSnippet);
! 2172: memset(p, 0, sizeof(*p));
! 2173: }
! 2174: /*
! 2175: ** Append a single entry to the p->aMatch[] log.
! 2176: */
! 2177: static void snippetAppendMatch(
! 2178: Snippet *p, /* Append the entry to this snippet */
! 2179: int iCol, int iTerm, /* The column and query term */
! 2180: int iStart, int nByte /* Offset and size of the match */
! 2181: ){
! 2182: int i;
! 2183: struct snippetMatch *pMatch;
! 2184: if( p->nMatch+1>=p->nAlloc ){
! 2185: p->nAlloc = p->nAlloc*2 + 10;
! 2186: p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
! 2187: if( p->aMatch==0 ){
! 2188: p->nMatch = 0;
! 2189: p->nAlloc = 0;
! 2190: return;
! 2191: }
! 2192: }
! 2193: i = p->nMatch++;
! 2194: pMatch = &p->aMatch[i];
! 2195: pMatch->iCol = iCol;
! 2196: pMatch->iTerm = iTerm;
! 2197: pMatch->iStart = iStart;
! 2198: pMatch->nByte = nByte;
! 2199: }
! 2200:
! 2201: /*
! 2202: ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
! 2203: */
! 2204: #define FTS1_ROTOR_SZ (32)
! 2205: #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
! 2206:
! 2207: /*
! 2208: ** Add entries to pSnippet->aMatch[] for every match that occurs against
! 2209: ** document zDoc[0..nDoc-1] which is stored in column iColumn.
! 2210: */
! 2211: static void snippetOffsetsOfColumn(
! 2212: Query *pQuery,
! 2213: Snippet *pSnippet,
! 2214: int iColumn,
! 2215: const char *zDoc,
! 2216: int nDoc
! 2217: ){
! 2218: const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */
! 2219: sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */
! 2220: sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
! 2221: fulltext_vtab *pVtab; /* The full text index */
! 2222: int nColumn; /* Number of columns in the index */
! 2223: const QueryTerm *aTerm; /* Query string terms */
! 2224: int nTerm; /* Number of query string terms */
! 2225: int i, j; /* Loop counters */
! 2226: int rc; /* Return code */
! 2227: unsigned int match, prevMatch; /* Phrase search bitmasks */
! 2228: const char *zToken; /* Next token from the tokenizer */
! 2229: int nToken; /* Size of zToken */
! 2230: int iBegin, iEnd, iPos; /* Offsets of beginning and end */
! 2231:
! 2232: /* The following variables keep a circular buffer of the last
! 2233: ** few tokens */
! 2234: unsigned int iRotor = 0; /* Index of current token */
! 2235: int iRotorBegin[FTS1_ROTOR_SZ]; /* Beginning offset of token */
! 2236: int iRotorLen[FTS1_ROTOR_SZ]; /* Length of token */
! 2237:
! 2238: pVtab = pQuery->pFts;
! 2239: nColumn = pVtab->nColumn;
! 2240: pTokenizer = pVtab->pTokenizer;
! 2241: pTModule = pTokenizer->pModule;
! 2242: rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
! 2243: if( rc ) return;
! 2244: pTCursor->pTokenizer = pTokenizer;
! 2245: aTerm = pQuery->pTerms;
! 2246: nTerm = pQuery->nTerms;
! 2247: if( nTerm>=FTS1_ROTOR_SZ ){
! 2248: nTerm = FTS1_ROTOR_SZ - 1;
! 2249: }
! 2250: prevMatch = 0;
! 2251: while(1){
! 2252: rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
! 2253: if( rc ) break;
! 2254: iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
! 2255: iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
! 2256: match = 0;
! 2257: for(i=0; i<nTerm; i++){
! 2258: int iCol;
! 2259: iCol = aTerm[i].iColumn;
! 2260: if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
! 2261: if( aTerm[i].nTerm!=nToken ) continue;
! 2262: if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
! 2263: if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
! 2264: match |= 1<<i;
! 2265: if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
! 2266: for(j=aTerm[i].iPhrase-1; j>=0; j--){
! 2267: int k = (iRotor-j) & FTS1_ROTOR_MASK;
! 2268: snippetAppendMatch(pSnippet, iColumn, i-j,
! 2269: iRotorBegin[k], iRotorLen[k]);
! 2270: }
! 2271: }
! 2272: }
! 2273: prevMatch = match<<1;
! 2274: iRotor++;
! 2275: }
! 2276: pTModule->xClose(pTCursor);
! 2277: }
! 2278:
! 2279:
! 2280: /*
! 2281: ** Compute all offsets for the current row of the query.
! 2282: ** If the offsets have already been computed, this routine is a no-op.
! 2283: */
! 2284: static void snippetAllOffsets(fulltext_cursor *p){
! 2285: int nColumn;
! 2286: int iColumn, i;
! 2287: int iFirst, iLast;
! 2288: fulltext_vtab *pFts;
! 2289:
! 2290: if( p->snippet.nMatch ) return;
! 2291: if( p->q.nTerms==0 ) return;
! 2292: pFts = p->q.pFts;
! 2293: nColumn = pFts->nColumn;
! 2294: iColumn = p->iCursorType - QUERY_FULLTEXT;
! 2295: if( iColumn<0 || iColumn>=nColumn ){
! 2296: iFirst = 0;
! 2297: iLast = nColumn-1;
! 2298: }else{
! 2299: iFirst = iColumn;
! 2300: iLast = iColumn;
! 2301: }
! 2302: for(i=iFirst; i<=iLast; i++){
! 2303: const char *zDoc;
! 2304: int nDoc;
! 2305: zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
! 2306: nDoc = sqlite3_column_bytes(p->pStmt, i+1);
! 2307: snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
! 2308: }
! 2309: }
! 2310:
! 2311: /*
! 2312: ** Convert the information in the aMatch[] array of the snippet
! 2313: ** into the string zOffset[0..nOffset-1].
! 2314: */
! 2315: static void snippetOffsetText(Snippet *p){
! 2316: int i;
! 2317: int cnt = 0;
! 2318: StringBuffer sb;
! 2319: char zBuf[200];
! 2320: if( p->zOffset ) return;
! 2321: initStringBuffer(&sb);
! 2322: for(i=0; i<p->nMatch; i++){
! 2323: struct snippetMatch *pMatch = &p->aMatch[i];
! 2324: zBuf[0] = ' ';
! 2325: sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
! 2326: pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
! 2327: append(&sb, zBuf);
! 2328: cnt++;
! 2329: }
! 2330: p->zOffset = sb.s;
! 2331: p->nOffset = sb.len;
! 2332: }
! 2333:
! 2334: /*
! 2335: ** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
! 2336: ** of matching words some of which might be in zDoc. zDoc is column
! 2337: ** number iCol.
! 2338: **
! 2339: ** iBreak is suggested spot in zDoc where we could begin or end an
! 2340: ** excerpt. Return a value similar to iBreak but possibly adjusted
! 2341: ** to be a little left or right so that the break point is better.
! 2342: */
! 2343: static int wordBoundary(
! 2344: int iBreak, /* The suggested break point */
! 2345: const char *zDoc, /* Document text */
! 2346: int nDoc, /* Number of bytes in zDoc[] */
! 2347: struct snippetMatch *aMatch, /* Matching words */
! 2348: int nMatch, /* Number of entries in aMatch[] */
! 2349: int iCol /* The column number for zDoc[] */
! 2350: ){
! 2351: int i;
! 2352: if( iBreak<=10 ){
! 2353: return 0;
! 2354: }
! 2355: if( iBreak>=nDoc-10 ){
! 2356: return nDoc;
! 2357: }
! 2358: for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
! 2359: while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
! 2360: if( i<nMatch ){
! 2361: if( aMatch[i].iStart<iBreak+10 ){
! 2362: return aMatch[i].iStart;
! 2363: }
! 2364: if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
! 2365: return aMatch[i-1].iStart;
! 2366: }
! 2367: }
! 2368: for(i=1; i<=10; i++){
! 2369: if( safe_isspace(zDoc[iBreak-i]) ){
! 2370: return iBreak - i + 1;
! 2371: }
! 2372: if( safe_isspace(zDoc[iBreak+i]) ){
! 2373: return iBreak + i + 1;
! 2374: }
! 2375: }
! 2376: return iBreak;
! 2377: }
! 2378:
! 2379: /*
! 2380: ** If the StringBuffer does not end in white space, add a single
! 2381: ** space character to the end.
! 2382: */
! 2383: static void appendWhiteSpace(StringBuffer *p){
! 2384: if( p->len==0 ) return;
! 2385: if( safe_isspace(p->s[p->len-1]) ) return;
! 2386: append(p, " ");
! 2387: }
! 2388:
! 2389: /*
! 2390: ** Remove white space from teh end of the StringBuffer
! 2391: */
! 2392: static void trimWhiteSpace(StringBuffer *p){
! 2393: while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
! 2394: p->len--;
! 2395: }
! 2396: }
! 2397:
! 2398:
! 2399:
! 2400: /*
! 2401: ** Allowed values for Snippet.aMatch[].snStatus
! 2402: */
! 2403: #define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
! 2404: #define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
! 2405:
! 2406: /*
! 2407: ** Generate the text of a snippet.
! 2408: */
! 2409: static void snippetText(
! 2410: fulltext_cursor *pCursor, /* The cursor we need the snippet for */
! 2411: const char *zStartMark, /* Markup to appear before each match */
! 2412: const char *zEndMark, /* Markup to appear after each match */
! 2413: const char *zEllipsis /* Ellipsis mark */
! 2414: ){
! 2415: int i, j;
! 2416: struct snippetMatch *aMatch;
! 2417: int nMatch;
! 2418: int nDesired;
! 2419: StringBuffer sb;
! 2420: int tailCol;
! 2421: int tailOffset;
! 2422: int iCol;
! 2423: int nDoc;
! 2424: const char *zDoc;
! 2425: int iStart, iEnd;
! 2426: int tailEllipsis = 0;
! 2427: int iMatch;
! 2428:
! 2429:
! 2430: free(pCursor->snippet.zSnippet);
! 2431: pCursor->snippet.zSnippet = 0;
! 2432: aMatch = pCursor->snippet.aMatch;
! 2433: nMatch = pCursor->snippet.nMatch;
! 2434: initStringBuffer(&sb);
! 2435:
! 2436: for(i=0; i<nMatch; i++){
! 2437: aMatch[i].snStatus = SNIPPET_IGNORE;
! 2438: }
! 2439: nDesired = 0;
! 2440: for(i=0; i<pCursor->q.nTerms; i++){
! 2441: for(j=0; j<nMatch; j++){
! 2442: if( aMatch[j].iTerm==i ){
! 2443: aMatch[j].snStatus = SNIPPET_DESIRED;
! 2444: nDesired++;
! 2445: break;
! 2446: }
! 2447: }
! 2448: }
! 2449:
! 2450: iMatch = 0;
! 2451: tailCol = -1;
! 2452: tailOffset = 0;
! 2453: for(i=0; i<nMatch && nDesired>0; i++){
! 2454: if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
! 2455: nDesired--;
! 2456: iCol = aMatch[i].iCol;
! 2457: zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
! 2458: nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
! 2459: iStart = aMatch[i].iStart - 40;
! 2460: iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
! 2461: if( iStart<=10 ){
! 2462: iStart = 0;
! 2463: }
! 2464: if( iCol==tailCol && iStart<=tailOffset+20 ){
! 2465: iStart = tailOffset;
! 2466: }
! 2467: if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
! 2468: trimWhiteSpace(&sb);
! 2469: appendWhiteSpace(&sb);
! 2470: append(&sb, zEllipsis);
! 2471: appendWhiteSpace(&sb);
! 2472: }
! 2473: iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
! 2474: iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
! 2475: if( iEnd>=nDoc-10 ){
! 2476: iEnd = nDoc;
! 2477: tailEllipsis = 0;
! 2478: }else{
! 2479: tailEllipsis = 1;
! 2480: }
! 2481: while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
! 2482: while( iStart<iEnd ){
! 2483: while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
! 2484: && aMatch[iMatch].iCol<=iCol ){
! 2485: iMatch++;
! 2486: }
! 2487: if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
! 2488: && aMatch[iMatch].iCol==iCol ){
! 2489: nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
! 2490: iStart = aMatch[iMatch].iStart;
! 2491: append(&sb, zStartMark);
! 2492: nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
! 2493: append(&sb, zEndMark);
! 2494: iStart += aMatch[iMatch].nByte;
! 2495: for(j=iMatch+1; j<nMatch; j++){
! 2496: if( aMatch[j].iTerm==aMatch[iMatch].iTerm
! 2497: && aMatch[j].snStatus==SNIPPET_DESIRED ){
! 2498: nDesired--;
! 2499: aMatch[j].snStatus = SNIPPET_IGNORE;
! 2500: }
! 2501: }
! 2502: }else{
! 2503: nappend(&sb, &zDoc[iStart], iEnd - iStart);
! 2504: iStart = iEnd;
! 2505: }
! 2506: }
! 2507: tailCol = iCol;
! 2508: tailOffset = iEnd;
! 2509: }
! 2510: trimWhiteSpace(&sb);
! 2511: if( tailEllipsis ){
! 2512: appendWhiteSpace(&sb);
! 2513: append(&sb, zEllipsis);
! 2514: }
! 2515: pCursor->snippet.zSnippet = sb.s;
! 2516: pCursor->snippet.nSnippet = sb.len;
! 2517: }
! 2518:
! 2519:
! 2520: /*
! 2521: ** Close the cursor. For additional information see the documentation
! 2522: ** on the xClose method of the virtual table interface.
! 2523: */
! 2524: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
! 2525: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2526: TRACE(("FTS1 Close %p\n", c));
! 2527: sqlite3_finalize(c->pStmt);
! 2528: queryClear(&c->q);
! 2529: snippetClear(&c->snippet);
! 2530: if( c->result.pDoclist!=NULL ){
! 2531: docListDelete(c->result.pDoclist);
! 2532: }
! 2533: free(c);
! 2534: return SQLITE_OK;
! 2535: }
! 2536:
! 2537: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
! 2538: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2539: sqlite_int64 iDocid;
! 2540: int rc;
! 2541:
! 2542: TRACE(("FTS1 Next %p\n", pCursor));
! 2543: snippetClear(&c->snippet);
! 2544: if( c->iCursorType < QUERY_FULLTEXT ){
! 2545: /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
! 2546: rc = sqlite3_step(c->pStmt);
! 2547: switch( rc ){
! 2548: case SQLITE_ROW:
! 2549: c->eof = 0;
! 2550: return SQLITE_OK;
! 2551: case SQLITE_DONE:
! 2552: c->eof = 1;
! 2553: return SQLITE_OK;
! 2554: default:
! 2555: c->eof = 1;
! 2556: return rc;
! 2557: }
! 2558: } else { /* full-text query */
! 2559: rc = sqlite3_reset(c->pStmt);
! 2560: if( rc!=SQLITE_OK ) return rc;
! 2561:
! 2562: iDocid = nextDocid(&c->result);
! 2563: if( iDocid==0 ){
! 2564: c->eof = 1;
! 2565: return SQLITE_OK;
! 2566: }
! 2567: rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
! 2568: if( rc!=SQLITE_OK ) return rc;
! 2569: /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
! 2570: rc = sqlite3_step(c->pStmt);
! 2571: if( rc==SQLITE_ROW ){ /* the case we expect */
! 2572: c->eof = 0;
! 2573: return SQLITE_OK;
! 2574: }
! 2575: /* an error occurred; abort */
! 2576: return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
! 2577: }
! 2578: }
! 2579:
! 2580:
! 2581: /* Return a DocList corresponding to the query term *pTerm. If *pTerm
! 2582: ** is the first term of a phrase query, go ahead and evaluate the phrase
! 2583: ** query and return the doclist for the entire phrase query.
! 2584: **
! 2585: ** The result is stored in pTerm->doclist.
! 2586: */
! 2587: static int docListOfTerm(
! 2588: fulltext_vtab *v, /* The full text index */
! 2589: int iColumn, /* column to restrict to. No restrition if >=nColumn */
! 2590: QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */
! 2591: DocList **ppResult /* Write the result here */
! 2592: ){
! 2593: DocList *pLeft, *pRight, *pNew;
! 2594: int i, rc;
! 2595:
! 2596: pLeft = docListNew(DL_POSITIONS);
! 2597: rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
! 2598: if( rc ){
! 2599: docListDelete(pLeft);
! 2600: return rc;
! 2601: }
! 2602: for(i=1; i<=pQTerm->nPhrase; i++){
! 2603: pRight = docListNew(DL_POSITIONS);
! 2604: rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
! 2605: if( rc ){
! 2606: docListDelete(pLeft);
! 2607: return rc;
! 2608: }
! 2609: pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
! 2610: docListPhraseMerge(pLeft, pRight, pNew);
! 2611: docListDelete(pLeft);
! 2612: docListDelete(pRight);
! 2613: pLeft = pNew;
! 2614: }
! 2615: *ppResult = pLeft;
! 2616: return SQLITE_OK;
! 2617: }
! 2618:
! 2619: /* Add a new term pTerm[0..nTerm-1] to the query *q.
! 2620: */
! 2621: static void queryAdd(Query *q, const char *pTerm, int nTerm){
! 2622: QueryTerm *t;
! 2623: ++q->nTerms;
! 2624: q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
! 2625: if( q->pTerms==0 ){
! 2626: q->nTerms = 0;
! 2627: return;
! 2628: }
! 2629: t = &q->pTerms[q->nTerms - 1];
! 2630: memset(t, 0, sizeof(*t));
! 2631: t->pTerm = malloc(nTerm+1);
! 2632: memcpy(t->pTerm, pTerm, nTerm);
! 2633: t->pTerm[nTerm] = 0;
! 2634: t->nTerm = nTerm;
! 2635: t->isOr = q->nextIsOr;
! 2636: q->nextIsOr = 0;
! 2637: t->iColumn = q->nextColumn;
! 2638: q->nextColumn = q->dfltColumn;
! 2639: }
! 2640:
! 2641: /*
! 2642: ** Check to see if the string zToken[0...nToken-1] matches any
! 2643: ** column name in the virtual table. If it does,
! 2644: ** return the zero-indexed column number. If not, return -1.
! 2645: */
! 2646: static int checkColumnSpecifier(
! 2647: fulltext_vtab *pVtab, /* The virtual table */
! 2648: const char *zToken, /* Text of the token */
! 2649: int nToken /* Number of characters in the token */
! 2650: ){
! 2651: int i;
! 2652: for(i=0; i<pVtab->nColumn; i++){
! 2653: if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
! 2654: && pVtab->azColumn[i][nToken]==0 ){
! 2655: return i;
! 2656: }
! 2657: }
! 2658: return -1;
! 2659: }
! 2660:
! 2661: /*
! 2662: ** Parse the text at pSegment[0..nSegment-1]. Add additional terms
! 2663: ** to the query being assemblied in pQuery.
! 2664: **
! 2665: ** inPhrase is true if pSegment[0..nSegement-1] is contained within
! 2666: ** double-quotes. If inPhrase is true, then the first term
! 2667: ** is marked with the number of terms in the phrase less one and
! 2668: ** OR and "-" syntax is ignored. If inPhrase is false, then every
! 2669: ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
! 2670: */
! 2671: static int tokenizeSegment(
! 2672: sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */
! 2673: const char *pSegment, int nSegment, /* Query expression being parsed */
! 2674: int inPhrase, /* True if within "..." */
! 2675: Query *pQuery /* Append results here */
! 2676: ){
! 2677: const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
! 2678: sqlite3_tokenizer_cursor *pCursor;
! 2679: int firstIndex = pQuery->nTerms;
! 2680: int iCol;
! 2681: int nTerm = 1;
! 2682:
! 2683: int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
! 2684: if( rc!=SQLITE_OK ) return rc;
! 2685: pCursor->pTokenizer = pTokenizer;
! 2686:
! 2687: while( 1 ){
! 2688: const char *pToken;
! 2689: int nToken, iBegin, iEnd, iPos;
! 2690:
! 2691: rc = pModule->xNext(pCursor,
! 2692: &pToken, &nToken,
! 2693: &iBegin, &iEnd, &iPos);
! 2694: if( rc!=SQLITE_OK ) break;
! 2695: if( !inPhrase &&
! 2696: pSegment[iEnd]==':' &&
! 2697: (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
! 2698: pQuery->nextColumn = iCol;
! 2699: continue;
! 2700: }
! 2701: if( !inPhrase && pQuery->nTerms>0 && nToken==2
! 2702: && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
! 2703: pQuery->nextIsOr = 1;
! 2704: continue;
! 2705: }
! 2706: queryAdd(pQuery, pToken, nToken);
! 2707: if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
! 2708: pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
! 2709: }
! 2710: pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
! 2711: if( inPhrase ){
! 2712: nTerm++;
! 2713: }
! 2714: }
! 2715:
! 2716: if( inPhrase && pQuery->nTerms>firstIndex ){
! 2717: pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
! 2718: }
! 2719:
! 2720: return pModule->xClose(pCursor);
! 2721: }
! 2722:
! 2723: /* Parse a query string, yielding a Query object pQuery.
! 2724: **
! 2725: ** The calling function will need to queryClear() to clean up
! 2726: ** the dynamically allocated memory held by pQuery.
! 2727: */
! 2728: static int parseQuery(
! 2729: fulltext_vtab *v, /* The fulltext index */
! 2730: const char *zInput, /* Input text of the query string */
! 2731: int nInput, /* Size of the input text */
! 2732: int dfltColumn, /* Default column of the index to match against */
! 2733: Query *pQuery /* Write the parse results here. */
! 2734: ){
! 2735: int iInput, inPhrase = 0;
! 2736:
! 2737: if( zInput==0 ) nInput = 0;
! 2738: if( nInput<0 ) nInput = strlen(zInput);
! 2739: pQuery->nTerms = 0;
! 2740: pQuery->pTerms = NULL;
! 2741: pQuery->nextIsOr = 0;
! 2742: pQuery->nextColumn = dfltColumn;
! 2743: pQuery->dfltColumn = dfltColumn;
! 2744: pQuery->pFts = v;
! 2745:
! 2746: for(iInput=0; iInput<nInput; ++iInput){
! 2747: int i;
! 2748: for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
! 2749: if( i>iInput ){
! 2750: tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
! 2751: pQuery);
! 2752: }
! 2753: iInput = i;
! 2754: if( i<nInput ){
! 2755: assert( zInput[i]=='"' );
! 2756: inPhrase = !inPhrase;
! 2757: }
! 2758: }
! 2759:
! 2760: if( inPhrase ){
! 2761: /* unmatched quote */
! 2762: queryClear(pQuery);
! 2763: return SQLITE_ERROR;
! 2764: }
! 2765: return SQLITE_OK;
! 2766: }
! 2767:
! 2768: /* Perform a full-text query using the search expression in
! 2769: ** zInput[0..nInput-1]. Return a list of matching documents
! 2770: ** in pResult.
! 2771: **
! 2772: ** Queries must match column iColumn. Or if iColumn>=nColumn
! 2773: ** they are allowed to match against any column.
! 2774: */
! 2775: static int fulltextQuery(
! 2776: fulltext_vtab *v, /* The full text index */
! 2777: int iColumn, /* Match against this column by default */
! 2778: const char *zInput, /* The query string */
! 2779: int nInput, /* Number of bytes in zInput[] */
! 2780: DocList **pResult, /* Write the result doclist here */
! 2781: Query *pQuery /* Put parsed query string here */
! 2782: ){
! 2783: int i, iNext, rc;
! 2784: DocList *pLeft = NULL;
! 2785: DocList *pRight, *pNew, *pOr;
! 2786: int nNot = 0;
! 2787: QueryTerm *aTerm;
! 2788:
! 2789: rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
! 2790: if( rc!=SQLITE_OK ) return rc;
! 2791:
! 2792: /* Merge AND terms. */
! 2793: aTerm = pQuery->pTerms;
! 2794: for(i = 0; i<pQuery->nTerms; i=iNext){
! 2795: if( aTerm[i].isNot ){
! 2796: /* Handle all NOT terms in a separate pass */
! 2797: nNot++;
! 2798: iNext = i + aTerm[i].nPhrase+1;
! 2799: continue;
! 2800: }
! 2801: iNext = i + aTerm[i].nPhrase + 1;
! 2802: rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
! 2803: if( rc ){
! 2804: queryClear(pQuery);
! 2805: return rc;
! 2806: }
! 2807: while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
! 2808: rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
! 2809: iNext += aTerm[iNext].nPhrase + 1;
! 2810: if( rc ){
! 2811: queryClear(pQuery);
! 2812: return rc;
! 2813: }
! 2814: pNew = docListNew(DL_DOCIDS);
! 2815: docListOrMerge(pRight, pOr, pNew);
! 2816: docListDelete(pRight);
! 2817: docListDelete(pOr);
! 2818: pRight = pNew;
! 2819: }
! 2820: if( pLeft==0 ){
! 2821: pLeft = pRight;
! 2822: }else{
! 2823: pNew = docListNew(DL_DOCIDS);
! 2824: docListAndMerge(pLeft, pRight, pNew);
! 2825: docListDelete(pRight);
! 2826: docListDelete(pLeft);
! 2827: pLeft = pNew;
! 2828: }
! 2829: }
! 2830:
! 2831: if( nNot && pLeft==0 ){
! 2832: /* We do not yet know how to handle a query of only NOT terms */
! 2833: return SQLITE_ERROR;
! 2834: }
! 2835:
! 2836: /* Do the EXCEPT terms */
! 2837: for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){
! 2838: if( !aTerm[i].isNot ) continue;
! 2839: rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
! 2840: if( rc ){
! 2841: queryClear(pQuery);
! 2842: docListDelete(pLeft);
! 2843: return rc;
! 2844: }
! 2845: pNew = docListNew(DL_DOCIDS);
! 2846: docListExceptMerge(pLeft, pRight, pNew);
! 2847: docListDelete(pRight);
! 2848: docListDelete(pLeft);
! 2849: pLeft = pNew;
! 2850: }
! 2851:
! 2852: *pResult = pLeft;
! 2853: return rc;
! 2854: }
! 2855:
! 2856: /*
! 2857: ** This is the xFilter interface for the virtual table. See
! 2858: ** the virtual table xFilter method documentation for additional
! 2859: ** information.
! 2860: **
! 2861: ** If idxNum==QUERY_GENERIC then do a full table scan against
! 2862: ** the %_content table.
! 2863: **
! 2864: ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
! 2865: ** in the %_content table.
! 2866: **
! 2867: ** If idxNum>=QUERY_FULLTEXT then use the full text index. The
! 2868: ** column on the left-hand side of the MATCH operator is column
! 2869: ** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand
! 2870: ** side of the MATCH operator.
! 2871: */
! 2872: /* TODO(shess) Upgrade the cursor initialization and destruction to
! 2873: ** account for fulltextFilter() being called multiple times on the
! 2874: ** same cursor. The current solution is very fragile. Apply fix to
! 2875: ** fts2 as appropriate.
! 2876: */
! 2877: static int fulltextFilter(
! 2878: sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
! 2879: int idxNum, const char *idxStr, /* Which indexing scheme to use */
! 2880: int argc, sqlite3_value **argv /* Arguments for the indexing scheme */
! 2881: ){
! 2882: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2883: fulltext_vtab *v = cursor_vtab(c);
! 2884: int rc;
! 2885: char *zSql;
! 2886:
! 2887: TRACE(("FTS1 Filter %p\n",pCursor));
! 2888:
! 2889: zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
! 2890: idxNum==QUERY_GENERIC ? "" : "where rowid=?");
! 2891: sqlite3_finalize(c->pStmt);
! 2892: rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
! 2893: sqlite3_free(zSql);
! 2894: if( rc!=SQLITE_OK ) return rc;
! 2895:
! 2896: c->iCursorType = idxNum;
! 2897: switch( idxNum ){
! 2898: case QUERY_GENERIC:
! 2899: break;
! 2900:
! 2901: case QUERY_ROWID:
! 2902: rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
! 2903: if( rc!=SQLITE_OK ) return rc;
! 2904: break;
! 2905:
! 2906: default: /* full-text search */
! 2907: {
! 2908: const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
! 2909: DocList *pResult;
! 2910: assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
! 2911: assert( argc==1 );
! 2912: queryClear(&c->q);
! 2913: rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
! 2914: if( rc!=SQLITE_OK ) return rc;
! 2915: if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
! 2916: readerInit(&c->result, pResult);
! 2917: break;
! 2918: }
! 2919: }
! 2920:
! 2921: return fulltextNext(pCursor);
! 2922: }
! 2923:
! 2924: /* This is the xEof method of the virtual table. The SQLite core
! 2925: ** calls this routine to find out if it has reached the end of
! 2926: ** a query's results set.
! 2927: */
! 2928: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
! 2929: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2930: return c->eof;
! 2931: }
! 2932:
! 2933: /* This is the xColumn method of the virtual table. The SQLite
! 2934: ** core calls this method during a query when it needs the value
! 2935: ** of a column from the virtual table. This method needs to use
! 2936: ** one of the sqlite3_result_*() routines to store the requested
! 2937: ** value back in the pContext.
! 2938: */
! 2939: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
! 2940: sqlite3_context *pContext, int idxCol){
! 2941: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2942: fulltext_vtab *v = cursor_vtab(c);
! 2943:
! 2944: if( idxCol<v->nColumn ){
! 2945: sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
! 2946: sqlite3_result_value(pContext, pVal);
! 2947: }else if( idxCol==v->nColumn ){
! 2948: /* The extra column whose name is the same as the table.
! 2949: ** Return a blob which is a pointer to the cursor
! 2950: */
! 2951: sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
! 2952: }
! 2953: return SQLITE_OK;
! 2954: }
! 2955:
! 2956: /* This is the xRowid method. The SQLite core calls this routine to
! 2957: ** retrive the rowid for the current row of the result set. The
! 2958: ** rowid should be written to *pRowid.
! 2959: */
! 2960: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
! 2961: fulltext_cursor *c = (fulltext_cursor *) pCursor;
! 2962:
! 2963: *pRowid = sqlite3_column_int64(c->pStmt, 0);
! 2964: return SQLITE_OK;
! 2965: }
! 2966:
! 2967: /* Add all terms in [zText] to the given hash table. If [iColumn] > 0,
! 2968: * we also store positions and offsets in the hash table using the given
! 2969: * column number. */
! 2970: static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
! 2971: const char *zText, int iColumn){
! 2972: sqlite3_tokenizer *pTokenizer = v->pTokenizer;
! 2973: sqlite3_tokenizer_cursor *pCursor;
! 2974: const char *pToken;
! 2975: int nTokenBytes;
! 2976: int iStartOffset, iEndOffset, iPosition;
! 2977: int rc;
! 2978:
! 2979: rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
! 2980: if( rc!=SQLITE_OK ) return rc;
! 2981:
! 2982: pCursor->pTokenizer = pTokenizer;
! 2983: while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
! 2984: &pToken, &nTokenBytes,
! 2985: &iStartOffset, &iEndOffset,
! 2986: &iPosition) ){
! 2987: DocList *p;
! 2988:
! 2989: /* Positions can't be negative; we use -1 as a terminator internally. */
! 2990: if( iPosition<0 ){
! 2991: pTokenizer->pModule->xClose(pCursor);
! 2992: return SQLITE_ERROR;
! 2993: }
! 2994:
! 2995: p = fts1HashFind(terms, pToken, nTokenBytes);
! 2996: if( p==NULL ){
! 2997: p = docListNew(DL_DEFAULT);
! 2998: docListAddDocid(p, iDocid);
! 2999: fts1HashInsert(terms, pToken, nTokenBytes, p);
! 3000: }
! 3001: if( iColumn>=0 ){
! 3002: docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
! 3003: }
! 3004: }
! 3005:
! 3006: /* TODO(shess) Check return? Should this be able to cause errors at
! 3007: ** this point? Actually, same question about sqlite3_finalize(),
! 3008: ** though one could argue that failure there means that the data is
! 3009: ** not durable. *ponder*
! 3010: */
! 3011: pTokenizer->pModule->xClose(pCursor);
! 3012: return rc;
! 3013: }
! 3014:
! 3015: /* Update the %_terms table to map the term [pTerm] to the given rowid. */
! 3016: static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
! 3017: DocList *d){
! 3018: sqlite_int64 iIndexRow;
! 3019: DocList doclist;
! 3020: int iSegment = 0, rc;
! 3021:
! 3022: rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
! 3023: if( rc==SQLITE_DONE ){
! 3024: docListInit(&doclist, DL_DEFAULT, 0, 0);
! 3025: docListUpdate(&doclist, d);
! 3026: /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
! 3027: rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
! 3028: goto err;
! 3029: }
! 3030: if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
! 3031:
! 3032: docListUpdate(&doclist, d);
! 3033: if( doclist.nData<=CHUNK_MAX ){
! 3034: rc = term_update(v, iIndexRow, &doclist);
! 3035: goto err;
! 3036: }
! 3037:
! 3038: /* Doclist doesn't fit, delete what's there, and accumulate
! 3039: ** forward.
! 3040: */
! 3041: rc = term_delete(v, iIndexRow);
! 3042: if( rc!=SQLITE_OK ) goto err;
! 3043:
! 3044: /* Try to insert the doclist into a higher segment bucket. On
! 3045: ** failure, accumulate existing doclist with the doclist from that
! 3046: ** bucket, and put results in the next bucket.
! 3047: */
! 3048: iSegment++;
! 3049: while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
! 3050: &doclist))!=SQLITE_OK ){
! 3051: sqlite_int64 iSegmentRow;
! 3052: DocList old;
! 3053: int rc2;
! 3054:
! 3055: /* Retain old error in case the term_insert() error was really an
! 3056: ** error rather than a bounced insert.
! 3057: */
! 3058: rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
! 3059: if( rc2!=SQLITE_ROW ) goto err;
! 3060:
! 3061: rc = term_delete(v, iSegmentRow);
! 3062: if( rc!=SQLITE_OK ) goto err;
! 3063:
! 3064: /* Reusing lowest-number deleted row keeps the index smaller. */
! 3065: if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
! 3066:
! 3067: /* doclist contains the newer data, so accumulate it over old.
! 3068: ** Then steal accumulated data for doclist.
! 3069: */
! 3070: docListAccumulate(&old, &doclist);
! 3071: docListDestroy(&doclist);
! 3072: doclist = old;
! 3073:
! 3074: iSegment++;
! 3075: }
! 3076:
! 3077: err:
! 3078: docListDestroy(&doclist);
! 3079: return rc;
! 3080: }
! 3081:
! 3082: /* Add doclists for all terms in [pValues] to the hash table [terms]. */
! 3083: static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
! 3084: sqlite3_value **pValues){
! 3085: int i;
! 3086: for(i = 0; i < v->nColumn ; ++i){
! 3087: char *zText = (char*)sqlite3_value_text(pValues[i]);
! 3088: int rc = buildTerms(v, terms, iRowid, zText, i);
! 3089: if( rc!=SQLITE_OK ) return rc;
! 3090: }
! 3091: return SQLITE_OK;
! 3092: }
! 3093:
! 3094: /* Add empty doclists for all terms in the given row's content to the hash
! 3095: * table [pTerms]. */
! 3096: static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
! 3097: const char **pValues;
! 3098: int i;
! 3099:
! 3100: int rc = content_select(v, iRowid, &pValues);
! 3101: if( rc!=SQLITE_OK ) return rc;
! 3102:
! 3103: for(i = 0 ; i < v->nColumn; ++i) {
! 3104: rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
! 3105: if( rc!=SQLITE_OK ) break;
! 3106: }
! 3107:
! 3108: freeStringArray(v->nColumn, pValues);
! 3109: return SQLITE_OK;
! 3110: }
! 3111:
! 3112: /* Insert a row into the %_content table; set *piRowid to be the ID of the
! 3113: * new row. Fill [pTerms] with new doclists for the %_term table. */
! 3114: static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
! 3115: sqlite3_value **pValues,
! 3116: sqlite_int64 *piRowid, fts1Hash *pTerms){
! 3117: int rc;
! 3118:
! 3119: rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */
! 3120: if( rc!=SQLITE_OK ) return rc;
! 3121: *piRowid = sqlite3_last_insert_rowid(v->db);
! 3122: return insertTerms(v, pTerms, *piRowid, pValues);
! 3123: }
! 3124:
! 3125: /* Delete a row from the %_content table; fill [pTerms] with empty doclists
! 3126: * to be written to the %_term table. */
! 3127: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
! 3128: int rc = deleteTerms(v, pTerms, iRow);
! 3129: if( rc!=SQLITE_OK ) return rc;
! 3130: return content_delete(v, iRow); /* execute an SQL DELETE */
! 3131: }
! 3132:
! 3133: /* Update a row in the %_content table; fill [pTerms] with new doclists for the
! 3134: * %_term table. */
! 3135: static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
! 3136: sqlite3_value **pValues, fts1Hash *pTerms){
! 3137: /* Generate an empty doclist for each term that previously appeared in this
! 3138: * row. */
! 3139: int rc = deleteTerms(v, pTerms, iRow);
! 3140: if( rc!=SQLITE_OK ) return rc;
! 3141:
! 3142: rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */
! 3143: if( rc!=SQLITE_OK ) return rc;
! 3144:
! 3145: /* Now add positions for terms which appear in the updated row. */
! 3146: return insertTerms(v, pTerms, iRow, pValues);
! 3147: }
! 3148:
! 3149: /* This function implements the xUpdate callback; it is the top-level entry
! 3150: * point for inserting, deleting or updating a row in a full-text table. */
! 3151: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
! 3152: sqlite_int64 *pRowid){
! 3153: fulltext_vtab *v = (fulltext_vtab *) pVtab;
! 3154: fts1Hash terms; /* maps term string -> PosList */
! 3155: int rc;
! 3156: fts1HashElem *e;
! 3157:
! 3158: TRACE(("FTS1 Update %p\n", pVtab));
! 3159:
! 3160: fts1HashInit(&terms, FTS1_HASH_STRING, 1);
! 3161:
! 3162: if( nArg<2 ){
! 3163: rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
! 3164: } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
! 3165: /* An update:
! 3166: * ppArg[0] = old rowid
! 3167: * ppArg[1] = new rowid
! 3168: * ppArg[2..2+v->nColumn-1] = values
! 3169: * ppArg[2+v->nColumn] = value for magic column (we ignore this)
! 3170: */
! 3171: sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
! 3172: if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
! 3173: sqlite3_value_int64(ppArg[1]) != rowid ){
! 3174: rc = SQLITE_ERROR; /* we don't allow changing the rowid */
! 3175: } else {
! 3176: assert( nArg==2+v->nColumn+1);
! 3177: rc = index_update(v, rowid, &ppArg[2], &terms);
! 3178: }
! 3179: } else {
! 3180: /* An insert:
! 3181: * ppArg[1] = requested rowid
! 3182: * ppArg[2..2+v->nColumn-1] = values
! 3183: * ppArg[2+v->nColumn] = value for magic column (we ignore this)
! 3184: */
! 3185: assert( nArg==2+v->nColumn+1);
! 3186: rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
! 3187: }
! 3188:
! 3189: if( rc==SQLITE_OK ){
! 3190: /* Write updated doclists to disk. */
! 3191: for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
! 3192: DocList *p = fts1HashData(e);
! 3193: rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
! 3194: if( rc!=SQLITE_OK ) break;
! 3195: }
! 3196: }
! 3197:
! 3198: /* clean up */
! 3199: for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
! 3200: DocList *p = fts1HashData(e);
! 3201: docListDelete(p);
! 3202: }
! 3203: fts1HashClear(&terms);
! 3204:
! 3205: return rc;
! 3206: }
! 3207:
! 3208: /*
! 3209: ** Implementation of the snippet() function for FTS1
! 3210: */
! 3211: static void snippetFunc(
! 3212: sqlite3_context *pContext,
! 3213: int argc,
! 3214: sqlite3_value **argv
! 3215: ){
! 3216: fulltext_cursor *pCursor;
! 3217: if( argc<1 ) return;
! 3218: if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
! 3219: sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
! 3220: sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
! 3221: }else{
! 3222: const char *zStart = "<b>";
! 3223: const char *zEnd = "</b>";
! 3224: const char *zEllipsis = "<b>...</b>";
! 3225: memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
! 3226: if( argc>=2 ){
! 3227: zStart = (const char*)sqlite3_value_text(argv[1]);
! 3228: if( argc>=3 ){
! 3229: zEnd = (const char*)sqlite3_value_text(argv[2]);
! 3230: if( argc>=4 ){
! 3231: zEllipsis = (const char*)sqlite3_value_text(argv[3]);
! 3232: }
! 3233: }
! 3234: }
! 3235: snippetAllOffsets(pCursor);
! 3236: snippetText(pCursor, zStart, zEnd, zEllipsis);
! 3237: sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
! 3238: pCursor->snippet.nSnippet, SQLITE_STATIC);
! 3239: }
! 3240: }
! 3241:
! 3242: /*
! 3243: ** Implementation of the offsets() function for FTS1
! 3244: */
! 3245: static void snippetOffsetsFunc(
! 3246: sqlite3_context *pContext,
! 3247: int argc,
! 3248: sqlite3_value **argv
! 3249: ){
! 3250: fulltext_cursor *pCursor;
! 3251: if( argc<1 ) return;
! 3252: if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
! 3253: sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
! 3254: sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
! 3255: }else{
! 3256: memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
! 3257: snippetAllOffsets(pCursor);
! 3258: snippetOffsetText(&pCursor->snippet);
! 3259: sqlite3_result_text(pContext,
! 3260: pCursor->snippet.zOffset, pCursor->snippet.nOffset,
! 3261: SQLITE_STATIC);
! 3262: }
! 3263: }
! 3264:
! 3265: /*
! 3266: ** This routine implements the xFindFunction method for the FTS1
! 3267: ** virtual table.
! 3268: */
! 3269: static int fulltextFindFunction(
! 3270: sqlite3_vtab *pVtab,
! 3271: int nArg,
! 3272: const char *zName,
! 3273: void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
! 3274: void **ppArg
! 3275: ){
! 3276: if( strcmp(zName,"snippet")==0 ){
! 3277: *pxFunc = snippetFunc;
! 3278: return 1;
! 3279: }else if( strcmp(zName,"offsets")==0 ){
! 3280: *pxFunc = snippetOffsetsFunc;
! 3281: return 1;
! 3282: }
! 3283: return 0;
! 3284: }
! 3285:
! 3286: /*
! 3287: ** Rename an fts1 table.
! 3288: */
! 3289: static int fulltextRename(
! 3290: sqlite3_vtab *pVtab,
! 3291: const char *zName
! 3292: ){
! 3293: fulltext_vtab *p = (fulltext_vtab *)pVtab;
! 3294: int rc = SQLITE_NOMEM;
! 3295: char *zSql = sqlite3_mprintf(
! 3296: "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';"
! 3297: "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
! 3298: , p->zDb, p->zName, zName
! 3299: , p->zDb, p->zName, zName
! 3300: );
! 3301: if( zSql ){
! 3302: rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
! 3303: sqlite3_free(zSql);
! 3304: }
! 3305: return rc;
! 3306: }
! 3307:
! 3308: static const sqlite3_module fulltextModule = {
! 3309: /* iVersion */ 0,
! 3310: /* xCreate */ fulltextCreate,
! 3311: /* xConnect */ fulltextConnect,
! 3312: /* xBestIndex */ fulltextBestIndex,
! 3313: /* xDisconnect */ fulltextDisconnect,
! 3314: /* xDestroy */ fulltextDestroy,
! 3315: /* xOpen */ fulltextOpen,
! 3316: /* xClose */ fulltextClose,
! 3317: /* xFilter */ fulltextFilter,
! 3318: /* xNext */ fulltextNext,
! 3319: /* xEof */ fulltextEof,
! 3320: /* xColumn */ fulltextColumn,
! 3321: /* xRowid */ fulltextRowid,
! 3322: /* xUpdate */ fulltextUpdate,
! 3323: /* xBegin */ 0,
! 3324: /* xSync */ 0,
! 3325: /* xCommit */ 0,
! 3326: /* xRollback */ 0,
! 3327: /* xFindFunction */ fulltextFindFunction,
! 3328: /* xRename */ fulltextRename,
! 3329: };
! 3330:
! 3331: int sqlite3Fts1Init(sqlite3 *db){
! 3332: sqlite3_overload_function(db, "snippet", -1);
! 3333: sqlite3_overload_function(db, "offsets", -1);
! 3334: return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
! 3335: }
! 3336:
! 3337: #if !SQLITE_CORE
! 3338: int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
! 3339: const sqlite3_api_routines *pApi){
! 3340: SQLITE_EXTENSION_INIT2(pApi)
! 3341: return sqlite3Fts1Init(db);
! 3342: }
! 3343: #endif
! 3344:
! 3345: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>