embedaddon/sqlite3/ext/fts1/fts1.c - annotate

Return to fts1.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts1
Annotation of embedaddon/sqlite3/ext/fts1/fts1.c, revision 1.1.1.1

1.1       misho       1: /* fts1 has a design flaw which can lead to database corruption (see
                      2: ** below).  It is recommended not to use it any longer, instead use
                      3: ** fts3 (or higher).  If you believe that your use of fts1 is safe,
                      4: ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
                      5: */
                      6: #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
                      7:         && !defined(SQLITE_ENABLE_BROKEN_FTS1)
                      8: #error fts1 has a design flaw and has been deprecated.
                      9: #endif
                     10: /* The flaw is that fts1 uses the content table's unaliased rowid as
                     11: ** the unique docid.  fts1 embeds the rowid in the index it builds,
                     12: ** and expects the rowid to not change.  The SQLite VACUUM operation
                     13: ** will renumber such rowids, thereby breaking fts1.  If you are using
                     14: ** fts1 in a system which has disabled VACUUM, then you can continue
                     15: ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
                     16: ** VACUUM, though systems using auto_vacuum are unlikely to invoke
                     17: ** VACUUM.
                     18: **
                     19: ** fts1 should be safe even across VACUUM if you only insert documents
                     20: ** and never delete.
                     21: */
                     22: 
                     23: /* The author disclaims copyright to this source code.
                     24:  *
                     25:  * This is an SQLite module implementing full-text search.
                     26:  */
                     27: 
                     28: /*
                     29: ** The code in this file is only compiled if:
                     30: **
                     31: **     * The FTS1 module is being built as an extension
                     32: **       (in which case SQLITE_CORE is not defined), or
                     33: **
                     34: **     * The FTS1 module is being built into the core of
                     35: **       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
                     36: */
                     37: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
                     38: 
                     39: #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
                     40: # define SQLITE_CORE 1
                     41: #endif
                     42: 
                     43: #include <assert.h>
                     44: #include <stdlib.h>
                     45: #include <stdio.h>
                     46: #include <string.h>
                     47: #include <ctype.h>
                     48: 
                     49: #include "fts1.h"
                     50: #include "fts1_hash.h"
                     51: #include "fts1_tokenizer.h"
                     52: #include "sqlite3.h"
                     53: #include "sqlite3ext.h"
                     54: SQLITE_EXTENSION_INIT1
                     55: 
                     56: 
                     57: #if 0
                     58: # define TRACE(A)  printf A; fflush(stdout)
                     59: #else
                     60: # define TRACE(A)
                     61: #endif
                     62: 
                     63: /* utility functions */
                     64: 
                     65: typedef struct StringBuffer {
                     66:   int len;      /* length, not including null terminator */
                     67:   int alloced;  /* Space allocated for s[] */ 
                     68:   char *s;      /* Content of the string */
                     69: } StringBuffer;
                     70: 
                     71: static void initStringBuffer(StringBuffer *sb){
                     72:   sb->len = 0;
                     73:   sb->alloced = 100;
                     74:   sb->s = malloc(100);
                     75:   sb->s[0] = '\0';
                     76: }
                     77: 
                     78: static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
                     79:   if( sb->len + nFrom >= sb->alloced ){
                     80:     sb->alloced = sb->len + nFrom + 100;
                     81:     sb->s = realloc(sb->s, sb->alloced+1);
                     82:     if( sb->s==0 ){
                     83:       initStringBuffer(sb);
                     84:       return;
                     85:     }
                     86:   }
                     87:   memcpy(sb->s + sb->len, zFrom, nFrom);
                     88:   sb->len += nFrom;
                     89:   sb->s[sb->len] = 0;
                     90: }
                     91: static void append(StringBuffer *sb, const char *zFrom){
                     92:   nappend(sb, zFrom, strlen(zFrom));
                     93: }
                     94: 
                     95: /* We encode variable-length integers in little-endian order using seven bits
                     96:  * per byte as follows:
                     97: **
                     98: ** KEY:
                     99: **         A = 0xxxxxxx    7 bits of data and one flag bit
                    100: **         B = 1xxxxxxx    7 bits of data and one flag bit
                    101: **
                    102: **  7 bits - A
                    103: ** 14 bits - BA
                    104: ** 21 bits - BBA
                    105: ** and so on.
                    106: */
                    107: 
                    108: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
                    109: #define VARINT_MAX 10
                    110: 
                    111: /* Write a 64-bit variable-length integer to memory starting at p[0].
                    112:  * The length of data written will be between 1 and VARINT_MAX bytes.
                    113:  * The number of bytes written is returned. */
                    114: static int putVarint(char *p, sqlite_int64 v){
                    115:   unsigned char *q = (unsigned char *) p;
                    116:   sqlite_uint64 vu = v;
                    117:   do{
                    118:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
                    119:     vu >>= 7;
                    120:   }while( vu!=0 );
                    121:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
                    122:   assert( q - (unsigned char *)p <= VARINT_MAX );
                    123:   return (int) (q - (unsigned char *)p);
                    124: }
                    125: 
                    126: /* Read a 64-bit variable-length integer from memory starting at p[0].
                    127:  * Return the number of bytes read, or 0 on error.
                    128:  * The value is stored in *v. */
                    129: static int getVarint(const char *p, sqlite_int64 *v){
                    130:   const unsigned char *q = (const unsigned char *) p;
                    131:   sqlite_uint64 x = 0, y = 1;
                    132:   while( (*q & 0x80) == 0x80 ){
                    133:     x += y * (*q++ & 0x7f);
                    134:     y <<= 7;
                    135:     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
                    136:       assert( 0 );
                    137:       return 0;
                    138:     }
                    139:   }
                    140:   x += y * (*q++);
                    141:   *v = (sqlite_int64) x;
                    142:   return (int) (q - (unsigned char *)p);
                    143: }
                    144: 
                    145: static int getVarint32(const char *p, int *pi){
                    146:  sqlite_int64 i;
                    147:  int ret = getVarint(p, &i);
                    148:  *pi = (int) i;
                    149:  assert( *pi==i );
                    150:  return ret;
                    151: }
                    152: 
                    153: /*** Document lists ***
                    154:  *
                    155:  * A document list holds a sorted list of varint-encoded document IDs.
                    156:  *
                    157:  * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
                    158:  *
                    159:  * array {
                    160:  *   varint docid;
                    161:  *   array {
                    162:  *     varint position;     (delta from previous position plus POS_BASE)
                    163:  *     varint startOffset;  (delta from previous startOffset)
                    164:  *     varint endOffset;    (delta from startOffset)
                    165:  *   }
                    166:  * }
                    167:  *
                    168:  * Here, array { X } means zero or more occurrences of X, adjacent in memory.
                    169:  *
                    170:  * A position list may hold positions for text in multiple columns.  A position
                    171:  * POS_COLUMN is followed by a varint containing the index of the column for
                    172:  * following positions in the list.  Any positions appearing before any
                    173:  * occurrences of POS_COLUMN are for column 0.
                    174:  *
                    175:  * A doclist with type DL_POSITIONS is like the above, but holds only docids
                    176:  * and positions without offset information.
                    177:  *
                    178:  * A doclist with type DL_DOCIDS is like the above, but holds only docids
                    179:  * without positions or offset information.
                    180:  *
                    181:  * On disk, every document list has positions and offsets, so we don't bother
                    182:  * to serialize a doclist's type.
                    183:  * 
                    184:  * We don't yet delta-encode document IDs; doing so will probably be a
                    185:  * modest win.
                    186:  *
                    187:  * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
                    188:  * After the first offset, estimate the next offset by using the
                    189:  * current token position and the previous token position and offset,
                    190:  * offset to handle some variance.  So the estimate would be
                    191:  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
                    192:  * as normal.  Offsets more than 64 chars from the estimate are
                    193:  * encoded as the delta to the previous start offset + 128.  An
                    194:  * additional tiny increment can be gained by using the end offset of
                    195:  * the previous token to make the estimate a tiny bit more precise.
                    196: */
                    197: 
                    198: /* It is not safe to call isspace(), tolower(), or isalnum() on
                    199: ** hi-bit-set characters.  This is the same solution used in the
                    200: ** tokenizer.
                    201: */
                    202: /* TODO(shess) The snippet-generation code should be using the
                    203: ** tokenizer-generated tokens rather than doing its own local
                    204: ** tokenization.
                    205: */
                    206: /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
                    207: static int safe_isspace(char c){
                    208:   return (c&0x80)==0 ? isspace(c) : 0;
                    209: }
                    210: static int safe_tolower(char c){
                    211:   return (c&0x80)==0 ? tolower(c) : c;
                    212: }
                    213: static int safe_isalnum(char c){
                    214:   return (c&0x80)==0 ? isalnum(c) : 0;
                    215: }
                    216: 
                    217: typedef enum DocListType {
                    218:   DL_DOCIDS,              /* docids only */
                    219:   DL_POSITIONS,           /* docids + positions */
                    220:   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
                    221: } DocListType;
                    222: 
                    223: /*
                    224: ** By default, only positions and not offsets are stored in the doclists.
                    225: ** To change this so that offsets are stored too, compile with
                    226: **
                    227: **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
                    228: **
                    229: */
                    230: #ifndef DL_DEFAULT
                    231: # define DL_DEFAULT DL_POSITIONS
                    232: #endif
                    233: 
                    234: typedef struct DocList {
                    235:   char *pData;
                    236:   int nData;
                    237:   DocListType iType;
                    238:   int iLastColumn;    /* the last column written */
                    239:   int iLastPos;       /* the last position written */
                    240:   int iLastOffset;    /* the last start offset written */
                    241: } DocList;
                    242: 
                    243: enum {
                    244:   POS_END = 0,        /* end of this position list */
                    245:   POS_COLUMN,         /* followed by new column number */
                    246:   POS_BASE
                    247: };
                    248: 
                    249: /* Initialize a new DocList to hold the given data. */
                    250: static void docListInit(DocList *d, DocListType iType,
                    251:                         const char *pData, int nData){
                    252:   d->nData = nData;
                    253:   if( nData>0 ){
                    254:     d->pData = malloc(nData);
                    255:     memcpy(d->pData, pData, nData);
                    256:   } else {
                    257:     d->pData = NULL;
                    258:   }
                    259:   d->iType = iType;
                    260:   d->iLastColumn = 0;
                    261:   d->iLastPos = d->iLastOffset = 0;
                    262: }
                    263: 
                    264: /* Create a new dynamically-allocated DocList. */
                    265: static DocList *docListNew(DocListType iType){
                    266:   DocList *d = (DocList *) malloc(sizeof(DocList));
                    267:   docListInit(d, iType, 0, 0);
                    268:   return d;
                    269: }
                    270: 
                    271: static void docListDestroy(DocList *d){
                    272:   free(d->pData);
                    273: #ifndef NDEBUG
                    274:   memset(d, 0x55, sizeof(*d));
                    275: #endif
                    276: }
                    277: 
                    278: static void docListDelete(DocList *d){
                    279:   docListDestroy(d);
                    280:   free(d);
                    281: }
                    282: 
                    283: static char *docListEnd(DocList *d){
                    284:   return d->pData + d->nData;
                    285: }
                    286: 
                    287: /* Append a varint to a DocList's data. */
                    288: static void appendVarint(DocList *d, sqlite_int64 i){
                    289:   char c[VARINT_MAX];
                    290:   int n = putVarint(c, i);
                    291:   d->pData = realloc(d->pData, d->nData + n);
                    292:   memcpy(d->pData + d->nData, c, n);
                    293:   d->nData += n;
                    294: }
                    295: 
                    296: static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
                    297:   appendVarint(d, iDocid);
                    298:   if( d->iType>=DL_POSITIONS ){
                    299:     appendVarint(d, POS_END);  /* initially empty position list */
                    300:     d->iLastColumn = 0;
                    301:     d->iLastPos = d->iLastOffset = 0;
                    302:   }
                    303: }
                    304: 
                    305: /* helper function for docListAddPos and docListAddPosOffset */
                    306: static void addPos(DocList *d, int iColumn, int iPos){
                    307:   assert( d->nData>0 );
                    308:   --d->nData;  /* remove previous terminator */
                    309:   if( iColumn!=d->iLastColumn ){
                    310:     assert( iColumn>d->iLastColumn );
                    311:     appendVarint(d, POS_COLUMN);
                    312:     appendVarint(d, iColumn);
                    313:     d->iLastColumn = iColumn;
                    314:     d->iLastPos = d->iLastOffset = 0;
                    315:   }
                    316:   assert( iPos>=d->iLastPos );
                    317:   appendVarint(d, iPos-d->iLastPos+POS_BASE);
                    318:   d->iLastPos = iPos;
                    319: }
                    320: 
                    321: /* Add a position to the last position list in a doclist. */
                    322: static void docListAddPos(DocList *d, int iColumn, int iPos){
                    323:   assert( d->iType==DL_POSITIONS );
                    324:   addPos(d, iColumn, iPos);
                    325:   appendVarint(d, POS_END);  /* add new terminator */
                    326: }
                    327: 
                    328: /*
                    329: ** Add a position and starting and ending offsets to a doclist.
                    330: **
                    331: ** If the doclist is setup to handle only positions, then insert
                    332: ** the position only and ignore the offsets.
                    333: */
                    334: static void docListAddPosOffset(
                    335:   DocList *d,             /* Doclist under construction */
                    336:   int iColumn,            /* Column the inserted term is part of */
                    337:   int iPos,               /* Position of the inserted term */
                    338:   int iStartOffset,       /* Starting offset of inserted term */
                    339:   int iEndOffset          /* Ending offset of inserted term */
                    340: ){
                    341:   assert( d->iType>=DL_POSITIONS );
                    342:   addPos(d, iColumn, iPos);
                    343:   if( d->iType==DL_POSITIONS_OFFSETS ){
                    344:     assert( iStartOffset>=d->iLastOffset );
                    345:     appendVarint(d, iStartOffset-d->iLastOffset);
                    346:     d->iLastOffset = iStartOffset;
                    347:     assert( iEndOffset>=iStartOffset );
                    348:     appendVarint(d, iEndOffset-iStartOffset);
                    349:   }
                    350:   appendVarint(d, POS_END);  /* add new terminator */
                    351: }
                    352: 
                    353: /*
                    354: ** A DocListReader object is a cursor into a doclist.  Initialize
                    355: ** the cursor to the beginning of the doclist by calling readerInit().
                    356: ** Then use routines
                    357: **
                    358: **      peekDocid()
                    359: **      readDocid()
                    360: **      readPosition()
                    361: **      skipPositionList()
                    362: **      and so forth...
                    363: **
                    364: ** to read information out of the doclist.  When we reach the end
                    365: ** of the doclist, atEnd() returns TRUE.
                    366: */
                    367: typedef struct DocListReader {
                    368:   DocList *pDoclist;  /* The document list we are stepping through */
                    369:   char *p;            /* Pointer to next unread byte in the doclist */
                    370:   int iLastColumn;
                    371:   int iLastPos;  /* the last position read, or -1 when not in a position list */
                    372: } DocListReader;
                    373: 
                    374: /*
                    375: ** Initialize the DocListReader r to point to the beginning of pDoclist.
                    376: */
                    377: static void readerInit(DocListReader *r, DocList *pDoclist){
                    378:   r->pDoclist = pDoclist;
                    379:   if( pDoclist!=NULL ){
                    380:     r->p = pDoclist->pData;
                    381:   }
                    382:   r->iLastColumn = -1;
                    383:   r->iLastPos = -1;
                    384: }
                    385: 
                    386: /*
                    387: ** Return TRUE if we have reached then end of pReader and there is
                    388: ** nothing else left to read.
                    389: */
                    390: static int atEnd(DocListReader *pReader){
                    391:   return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
                    392: }
                    393: 
                    394: /* Peek at the next docid without advancing the read pointer. 
                    395: */
                    396: static sqlite_int64 peekDocid(DocListReader *pReader){
                    397:   sqlite_int64 ret;
                    398:   assert( !atEnd(pReader) );
                    399:   assert( pReader->iLastPos==-1 );
                    400:   getVarint(pReader->p, &ret);
                    401:   return ret;
                    402: }
                    403: 
                    404: /* Read the next docid.   See also nextDocid().
                    405: */
                    406: static sqlite_int64 readDocid(DocListReader *pReader){
                    407:   sqlite_int64 ret;
                    408:   assert( !atEnd(pReader) );
                    409:   assert( pReader->iLastPos==-1 );
                    410:   pReader->p += getVarint(pReader->p, &ret);
                    411:   if( pReader->pDoclist->iType>=DL_POSITIONS ){
                    412:     pReader->iLastColumn = 0;
                    413:     pReader->iLastPos = 0;
                    414:   }
                    415:   return ret;
                    416: }
                    417: 
                    418: /* Read the next position and column index from a position list.
                    419:  * Returns the position, or -1 at the end of the list. */
                    420: static int readPosition(DocListReader *pReader, int *iColumn){
                    421:   int i;
                    422:   int iType = pReader->pDoclist->iType;
                    423: 
                    424:   if( pReader->iLastPos==-1 ){
                    425:     return -1;
                    426:   }
                    427:   assert( !atEnd(pReader) );
                    428: 
                    429:   if( iType<DL_POSITIONS ){
                    430:     return -1;
                    431:   }
                    432:   pReader->p += getVarint32(pReader->p, &i);
                    433:   if( i==POS_END ){
                    434:     pReader->iLastColumn = pReader->iLastPos = -1;
                    435:     *iColumn = -1;
                    436:     return -1;
                    437:   }
                    438:   if( i==POS_COLUMN ){
                    439:     pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
                    440:     pReader->iLastPos = 0;
                    441:     pReader->p += getVarint32(pReader->p, &i);
                    442:     assert( i>=POS_BASE );
                    443:   }
                    444:   pReader->iLastPos += ((int) i)-POS_BASE;
                    445:   if( iType>=DL_POSITIONS_OFFSETS ){
                    446:     /* Skip over offsets, ignoring them for now. */
                    447:     int iStart, iEnd;
                    448:     pReader->p += getVarint32(pReader->p, &iStart);
                    449:     pReader->p += getVarint32(pReader->p, &iEnd);
                    450:   }
                    451:   *iColumn = pReader->iLastColumn;
                    452:   return pReader->iLastPos;
                    453: }
                    454: 
                    455: /* Skip past the end of a position list. */
                    456: static void skipPositionList(DocListReader *pReader){
                    457:   DocList *p = pReader->pDoclist;
                    458:   if( p && p->iType>=DL_POSITIONS ){
                    459:     int iColumn;
                    460:     while( readPosition(pReader, &iColumn)!=-1 ){}
                    461:   }
                    462: }
                    463: 
                    464: /* Skip over a docid, including its position list if the doclist has
                    465:  * positions. */
                    466: static void skipDocument(DocListReader *pReader){
                    467:   readDocid(pReader);
                    468:   skipPositionList(pReader);
                    469: }
                    470: 
                    471: /* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
                    472:  * matching [iDocid] was found.  */
                    473: static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
                    474:   sqlite_int64 d = 0;
                    475:   while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
                    476:     skipDocument(pReader);
                    477:   }
                    478:   return !atEnd(pReader) && d==iDocid;
                    479: }
                    480: 
                    481: /* Return the first document in a document list.
                    482: */
                    483: static sqlite_int64 firstDocid(DocList *d){
                    484:   DocListReader r;
                    485:   readerInit(&r, d);
                    486:   return readDocid(&r);
                    487: }
                    488: 
                    489: #ifdef SQLITE_DEBUG
                    490: /*
                    491: ** This routine is used for debugging purpose only.
                    492: **
                    493: ** Write the content of a doclist to standard output.
                    494: */
                    495: static void printDoclist(DocList *p){
                    496:   DocListReader r;
                    497:   const char *zSep = "";
                    498: 
                    499:   readerInit(&r, p);
                    500:   while( !atEnd(&r) ){
                    501:     sqlite_int64 docid = readDocid(&r);
                    502:     if( docid==0 ){
                    503:       skipPositionList(&r);
                    504:       continue;
                    505:     }
                    506:     printf("%s%lld", zSep, docid);
                    507:     zSep =  ",";
                    508:     if( p->iType>=DL_POSITIONS ){
                    509:       int iPos, iCol;
                    510:       const char *zDiv = "";
                    511:       printf("(");
                    512:       while( (iPos = readPosition(&r, &iCol))>=0 ){
                    513:         printf("%s%d:%d", zDiv, iCol, iPos);
                    514:         zDiv = ":";
                    515:       }
                    516:       printf(")");
                    517:     }
                    518:   }
                    519:   printf("\n");
                    520:   fflush(stdout);
                    521: }
                    522: #endif /* SQLITE_DEBUG */
                    523: 
                    524: /* Trim the given doclist to contain only positions in column
                    525:  * [iRestrictColumn]. */
                    526: static void docListRestrictColumn(DocList *in, int iRestrictColumn){
                    527:   DocListReader r;
                    528:   DocList out;
                    529: 
                    530:   assert( in->iType>=DL_POSITIONS );
                    531:   readerInit(&r, in);
                    532:   docListInit(&out, DL_POSITIONS, NULL, 0);
                    533: 
                    534:   while( !atEnd(&r) ){
                    535:     sqlite_int64 iDocid = readDocid(&r);
                    536:     int iPos, iColumn;
                    537: 
                    538:     docListAddDocid(&out, iDocid);
                    539:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
                    540:       if( iColumn==iRestrictColumn ){
                    541:         docListAddPos(&out, iColumn, iPos);
                    542:       }
                    543:     }
                    544:   }
                    545: 
                    546:   docListDestroy(in);
                    547:   *in = out;
                    548: }
                    549: 
                    550: /* Trim the given doclist by discarding any docids without any remaining
                    551:  * positions. */
                    552: static void docListDiscardEmpty(DocList *in) {
                    553:   DocListReader r;
                    554:   DocList out;
                    555: 
                    556:   /* TODO: It would be nice to implement this operation in place; that
                    557:    * could save a significant amount of memory in queries with long doclists. */
                    558:   assert( in->iType>=DL_POSITIONS );
                    559:   readerInit(&r, in);
                    560:   docListInit(&out, DL_POSITIONS, NULL, 0);
                    561: 
                    562:   while( !atEnd(&r) ){
                    563:     sqlite_int64 iDocid = readDocid(&r);
                    564:     int match = 0;
                    565:     int iPos, iColumn;
                    566:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
                    567:       if( !match ){
                    568:         docListAddDocid(&out, iDocid);
                    569:         match = 1;
                    570:       }
                    571:       docListAddPos(&out, iColumn, iPos);
                    572:     }
                    573:   }
                    574: 
                    575:   docListDestroy(in);
                    576:   *in = out;
                    577: }
                    578: 
                    579: /* Helper function for docListUpdate() and docListAccumulate().
                    580: ** Splices a doclist element into the doclist represented by r,
                    581: ** leaving r pointing after the newly spliced element.
                    582: */
                    583: static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
                    584:                                  const char *pSource, int nSource){
                    585:   DocList *d = r->pDoclist;
                    586:   char *pTarget;
                    587:   int nTarget, found;
                    588: 
                    589:   found = skipToDocid(r, iDocid);
                    590: 
                    591:   /* Describe slice in d to place pSource/nSource. */
                    592:   pTarget = r->p;
                    593:   if( found ){
                    594:     skipDocument(r);
                    595:     nTarget = r->p-pTarget;
                    596:   }else{
                    597:     nTarget = 0;
                    598:   }
                    599: 
                    600:   /* The sense of the following is that there are three possibilities.
                    601:   ** If nTarget==nSource, we should not move any memory nor realloc.
                    602:   ** If nTarget>nSource, trim target and realloc.
                    603:   ** If nTarget<nSource, realloc then expand target.
                    604:   */
                    605:   if( nTarget>nSource ){
                    606:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
                    607:   }
                    608:   if( nTarget!=nSource ){
                    609:     int iDoclist = pTarget-d->pData;
                    610:     d->pData = realloc(d->pData, d->nData+nSource-nTarget);
                    611:     pTarget = d->pData+iDoclist;
                    612:   }
                    613:   if( nTarget<nSource ){
                    614:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
                    615:   }
                    616: 
                    617:   memcpy(pTarget, pSource, nSource);
                    618:   d->nData += nSource-nTarget;
                    619:   r->p = pTarget+nSource;
                    620: }
                    621: 
                    622: /* Insert/update pUpdate into the doclist. */
                    623: static void docListUpdate(DocList *d, DocList *pUpdate){
                    624:   DocListReader reader;
                    625: 
                    626:   assert( d!=NULL && pUpdate!=NULL );
                    627:   assert( d->iType==pUpdate->iType);
                    628: 
                    629:   readerInit(&reader, d);
                    630:   docListSpliceElement(&reader, firstDocid(pUpdate),
                    631:                        pUpdate->pData, pUpdate->nData);
                    632: }
                    633: 
                    634: /* Propagate elements from pUpdate to pAcc, overwriting elements with
                    635: ** matching docids.
                    636: */
                    637: static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
                    638:   DocListReader accReader, updateReader;
                    639: 
                    640:   /* Handle edge cases where one doclist is empty. */
                    641:   assert( pAcc!=NULL );
                    642:   if( pUpdate==NULL || pUpdate->nData==0 ) return;
                    643:   if( pAcc->nData==0 ){
                    644:     pAcc->pData = malloc(pUpdate->nData);
                    645:     memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
                    646:     pAcc->nData = pUpdate->nData;
                    647:     return;
                    648:   }
                    649: 
                    650:   readerInit(&accReader, pAcc);
                    651:   readerInit(&updateReader, pUpdate);
                    652: 
                    653:   while( !atEnd(&updateReader) ){
                    654:     char *pSource = updateReader.p;
                    655:     sqlite_int64 iDocid = readDocid(&updateReader);
                    656:     skipPositionList(&updateReader);
                    657:     docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
                    658:   }
                    659: }
                    660: 
                    661: /*
                    662: ** Read the next docid off of pIn.  Return 0 if we reach the end.
                    663: *
                    664: * TODO: This assumes that docids are never 0, but they may actually be 0 since
                    665: * users can choose docids when inserting into a full-text table.  Fix this.
                    666: */
                    667: static sqlite_int64 nextDocid(DocListReader *pIn){
                    668:   skipPositionList(pIn);
                    669:   return atEnd(pIn) ? 0 : readDocid(pIn);
                    670: }
                    671: 
                    672: /*
                    673: ** pLeft and pRight are two DocListReaders that are pointing to
                    674: ** positions lists of the same document: iDocid. 
                    675: **
                    676: ** If there are no instances in pLeft or pRight where the position
                    677: ** of pLeft is one less than the position of pRight, then this
                    678: ** routine adds nothing to pOut.
                    679: **
                    680: ** If there are one or more instances where positions from pLeft
                    681: ** are exactly one less than positions from pRight, then add a new
                    682: ** document record to pOut.  If pOut wants to hold positions, then
                    683: ** include the positions from pRight that are one more than a
                    684: ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
                    685: **
                    686: ** pLeft and pRight are left pointing at the next document record.
                    687: */
                    688: static void mergePosList(
                    689:   DocListReader *pLeft,    /* Left position list */
                    690:   DocListReader *pRight,   /* Right position list */
                    691:   sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
                    692:   DocList *pOut            /* Write the merged document record here */
                    693: ){
                    694:   int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
                    695:   int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
                    696:   int match = 0;
                    697: 
                    698:   /* Loop until we've reached the end of both position lists. */
                    699:   while( iLeftPos!=-1 && iRightPos!=-1 ){
                    700:     if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
                    701:       if( !match ){
                    702:         docListAddDocid(pOut, iDocid);
                    703:         match = 1;
                    704:       }
                    705:       if( pOut->iType>=DL_POSITIONS ){
                    706:         docListAddPos(pOut, iRightCol, iRightPos);
                    707:       }
                    708:       iLeftPos = readPosition(pLeft, &iLeftCol);
                    709:       iRightPos = readPosition(pRight, &iRightCol);
                    710:     }else if( iRightCol<iLeftCol ||
                    711:               (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
                    712:       iRightPos = readPosition(pRight, &iRightCol);
                    713:     }else{
                    714:       iLeftPos = readPosition(pLeft, &iLeftCol);
                    715:     }
                    716:   }
                    717:   if( iLeftPos>=0 ) skipPositionList(pLeft);
                    718:   if( iRightPos>=0 ) skipPositionList(pRight);
                    719: }
                    720: 
                    721: /* We have two doclists:  pLeft and pRight.
                    722: ** Write the phrase intersection of these two doclists into pOut.
                    723: **
                    724: ** A phrase intersection means that two documents only match
                    725: ** if pLeft.iPos+1==pRight.iPos.
                    726: **
                    727: ** The output pOut may or may not contain positions.  If pOut
                    728: ** does contain positions, they are the positions of pRight.
                    729: */
                    730: static void docListPhraseMerge(
                    731:   DocList *pLeft,    /* Doclist resulting from the words on the left */
                    732:   DocList *pRight,   /* Doclist for the next word to the right */
                    733:   DocList *pOut      /* Write the combined doclist here */
                    734: ){
                    735:   DocListReader left, right;
                    736:   sqlite_int64 docidLeft, docidRight;
                    737: 
                    738:   readerInit(&left, pLeft);
                    739:   readerInit(&right, pRight);
                    740:   docidLeft = nextDocid(&left);
                    741:   docidRight = nextDocid(&right);
                    742: 
                    743:   while( docidLeft>0 && docidRight>0 ){
                    744:     if( docidLeft<docidRight ){
                    745:       docidLeft = nextDocid(&left);
                    746:     }else if( docidRight<docidLeft ){
                    747:       docidRight = nextDocid(&right);
                    748:     }else{
                    749:       mergePosList(&left, &right, docidLeft, pOut);
                    750:       docidLeft = nextDocid(&left);
                    751:       docidRight = nextDocid(&right);
                    752:     }
                    753:   }
                    754: }
                    755: 
                    756: /* We have two doclists:  pLeft and pRight.
                    757: ** Write the intersection of these two doclists into pOut.
                    758: ** Only docids are matched.  Position information is ignored.
                    759: **
                    760: ** The output pOut never holds positions.
                    761: */
                    762: static void docListAndMerge(
                    763:   DocList *pLeft,    /* Doclist resulting from the words on the left */
                    764:   DocList *pRight,   /* Doclist for the next word to the right */
                    765:   DocList *pOut      /* Write the combined doclist here */
                    766: ){
                    767:   DocListReader left, right;
                    768:   sqlite_int64 docidLeft, docidRight;
                    769: 
                    770:   assert( pOut->iType<DL_POSITIONS );
                    771: 
                    772:   readerInit(&left, pLeft);
                    773:   readerInit(&right, pRight);
                    774:   docidLeft = nextDocid(&left);
                    775:   docidRight = nextDocid(&right);
                    776: 
                    777:   while( docidLeft>0 && docidRight>0 ){
                    778:     if( docidLeft<docidRight ){
                    779:       docidLeft = nextDocid(&left);
                    780:     }else if( docidRight<docidLeft ){
                    781:       docidRight = nextDocid(&right);
                    782:     }else{
                    783:       docListAddDocid(pOut, docidLeft);
                    784:       docidLeft = nextDocid(&left);
                    785:       docidRight = nextDocid(&right);
                    786:     }
                    787:   }
                    788: }
                    789: 
                    790: /* We have two doclists:  pLeft and pRight.
                    791: ** Write the union of these two doclists into pOut.
                    792: ** Only docids are matched.  Position information is ignored.
                    793: **
                    794: ** The output pOut never holds positions.
                    795: */
                    796: static void docListOrMerge(
                    797:   DocList *pLeft,    /* Doclist resulting from the words on the left */
                    798:   DocList *pRight,   /* Doclist for the next word to the right */
                    799:   DocList *pOut      /* Write the combined doclist here */
                    800: ){
                    801:   DocListReader left, right;
                    802:   sqlite_int64 docidLeft, docidRight, priorLeft;
                    803: 
                    804:   readerInit(&left, pLeft);
                    805:   readerInit(&right, pRight);
                    806:   docidLeft = nextDocid(&left);
                    807:   docidRight = nextDocid(&right);
                    808: 
                    809:   while( docidLeft>0 && docidRight>0 ){
                    810:     if( docidLeft<=docidRight ){
                    811:       docListAddDocid(pOut, docidLeft);
                    812:     }else{
                    813:       docListAddDocid(pOut, docidRight);
                    814:     }
                    815:     priorLeft = docidLeft;
                    816:     if( docidLeft<=docidRight ){
                    817:       docidLeft = nextDocid(&left);
                    818:     }
                    819:     if( docidRight>0 && docidRight<=priorLeft ){
                    820:       docidRight = nextDocid(&right);
                    821:     }
                    822:   }
                    823:   while( docidLeft>0 ){
                    824:     docListAddDocid(pOut, docidLeft);
                    825:     docidLeft = nextDocid(&left);
                    826:   }
                    827:   while( docidRight>0 ){
                    828:     docListAddDocid(pOut, docidRight);
                    829:     docidRight = nextDocid(&right);
                    830:   }
                    831: }
                    832: 
                    833: /* We have two doclists:  pLeft and pRight.
                    834: ** Write into pOut all documents that occur in pLeft but not
                    835: ** in pRight.
                    836: **
                    837: ** Only docids are matched.  Position information is ignored.
                    838: **
                    839: ** The output pOut never holds positions.
                    840: */
                    841: static void docListExceptMerge(
                    842:   DocList *pLeft,    /* Doclist resulting from the words on the left */
                    843:   DocList *pRight,   /* Doclist for the next word to the right */
                    844:   DocList *pOut      /* Write the combined doclist here */
                    845: ){
                    846:   DocListReader left, right;
                    847:   sqlite_int64 docidLeft, docidRight, priorLeft;
                    848: 
                    849:   readerInit(&left, pLeft);
                    850:   readerInit(&right, pRight);
                    851:   docidLeft = nextDocid(&left);
                    852:   docidRight = nextDocid(&right);
                    853: 
                    854:   while( docidLeft>0 && docidRight>0 ){
                    855:     priorLeft = docidLeft;
                    856:     if( docidLeft<docidRight ){
                    857:       docListAddDocid(pOut, docidLeft);
                    858:     }
                    859:     if( docidLeft<=docidRight ){
                    860:       docidLeft = nextDocid(&left);
                    861:     }
                    862:     if( docidRight>0 && docidRight<=priorLeft ){
                    863:       docidRight = nextDocid(&right);
                    864:     }
                    865:   }
                    866:   while( docidLeft>0 ){
                    867:     docListAddDocid(pOut, docidLeft);
                    868:     docidLeft = nextDocid(&left);
                    869:   }
                    870: }
                    871: 
                    872: static char *string_dup_n(const char *s, int n){
                    873:   char *str = malloc(n + 1);
                    874:   memcpy(str, s, n);
                    875:   str[n] = '\0';
                    876:   return str;
                    877: }
                    878: 
                    879: /* Duplicate a string; the caller must free() the returned string.
                    880:  * (We don't use strdup() since it is not part of the standard C library and
                    881:  * may not be available everywhere.) */
                    882: static char *string_dup(const char *s){
                    883:   return string_dup_n(s, strlen(s));
                    884: }
                    885: 
                    886: /* Format a string, replacing each occurrence of the % character with
                    887:  * zDb.zName.  This may be more convenient than sqlite_mprintf()
                    888:  * when one string is used repeatedly in a format string.
                    889:  * The caller must free() the returned string. */
                    890: static char *string_format(const char *zFormat,
                    891:                            const char *zDb, const char *zName){
                    892:   const char *p;
                    893:   size_t len = 0;
                    894:   size_t nDb = strlen(zDb);
                    895:   size_t nName = strlen(zName);
                    896:   size_t nFullTableName = nDb+1+nName;
                    897:   char *result;
                    898:   char *r;
                    899: 
                    900:   /* first compute length needed */
                    901:   for(p = zFormat ; *p ; ++p){
                    902:     len += (*p=='%' ? nFullTableName : 1);
                    903:   }
                    904:   len += 1;  /* for null terminator */
                    905: 
                    906:   r = result = malloc(len);
                    907:   for(p = zFormat; *p; ++p){
                    908:     if( *p=='%' ){
                    909:       memcpy(r, zDb, nDb);
                    910:       r += nDb;
                    911:       *r++ = '.';
                    912:       memcpy(r, zName, nName);
                    913:       r += nName;
                    914:     } else {
                    915:       *r++ = *p;
                    916:     }
                    917:   }
                    918:   *r++ = '\0';
                    919:   assert( r == result + len );
                    920:   return result;
                    921: }
                    922: 
                    923: static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
                    924:                     const char *zFormat){
                    925:   char *zCommand = string_format(zFormat, zDb, zName);
                    926:   int rc;
                    927:   TRACE(("FTS1 sql: %s\n", zCommand));
                    928:   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
                    929:   free(zCommand);
                    930:   return rc;
                    931: }
                    932: 
                    933: static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
                    934:                        sqlite3_stmt **ppStmt, const char *zFormat){
                    935:   char *zCommand = string_format(zFormat, zDb, zName);
                    936:   int rc;
                    937:   TRACE(("FTS1 prepare: %s\n", zCommand));
                    938:   rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
                    939:   free(zCommand);
                    940:   return rc;
                    941: }
                    942: 
                    943: /* end utility functions */
                    944: 
                    945: /* Forward reference */
                    946: typedef struct fulltext_vtab fulltext_vtab;
                    947: 
                    948: /* A single term in a query is represented by an instances of
                    949: ** the following structure.
                    950: */
                    951: typedef struct QueryTerm {
                    952:   short int nPhrase; /* How many following terms are part of the same phrase */
                    953:   short int iPhrase; /* This is the i-th term of a phrase. */
                    954:   short int iColumn; /* Column of the index that must match this term */
                    955:   signed char isOr;  /* this term is preceded by "OR" */
                    956:   signed char isNot; /* this term is preceded by "-" */
                    957:   char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
                    958:   int nTerm;         /* Number of bytes in pTerm[] */
                    959: } QueryTerm;
                    960: 
                    961: 
                    962: /* A query string is parsed into a Query structure.
                    963:  *
                    964:  * We could, in theory, allow query strings to be complicated
                    965:  * nested expressions with precedence determined by parentheses.
                    966:  * But none of the major search engines do this.  (Perhaps the
                    967:  * feeling is that an parenthesized expression is two complex of
                    968:  * an idea for the average user to grasp.)  Taking our lead from
                    969:  * the major search engines, we will allow queries to be a list
                    970:  * of terms (with an implied AND operator) or phrases in double-quotes,
                    971:  * with a single optional "-" before each non-phrase term to designate
                    972:  * negation and an optional OR connector.
                    973:  *
                    974:  * OR binds more tightly than the implied AND, which is what the
                    975:  * major search engines seem to do.  So, for example:
                    976:  * 
                    977:  *    [one two OR three]     ==>    one AND (two OR three)
                    978:  *    [one OR two three]     ==>    (one OR two) AND three
                    979:  *
                    980:  * A "-" before a term matches all entries that lack that term.
                    981:  * The "-" must occur immediately before the term with in intervening
                    982:  * space.  This is how the search engines do it.
                    983:  *
                    984:  * A NOT term cannot be the right-hand operand of an OR.  If this
                    985:  * occurs in the query string, the NOT is ignored:
                    986:  *
                    987:  *    [one OR -two]          ==>    one OR two
                    988:  *
                    989:  */
                    990: typedef struct Query {
                    991:   fulltext_vtab *pFts;  /* The full text index */
                    992:   int nTerms;           /* Number of terms in the query */
                    993:   QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
                    994:   int nextIsOr;         /* Set the isOr flag on the next inserted term */
                    995:   int nextColumn;       /* Next word parsed must be in this column */
                    996:   int dfltColumn;       /* The default column */
                    997: } Query;
                    998: 
                    999: 
                   1000: /*
                   1001: ** An instance of the following structure keeps track of generated
                   1002: ** matching-word offset information and snippets.
                   1003: */
                   1004: typedef struct Snippet {
                   1005:   int nMatch;     /* Total number of matches */
                   1006:   int nAlloc;     /* Space allocated for aMatch[] */
                   1007:   struct snippetMatch { /* One entry for each matching term */
                   1008:     char snStatus;       /* Status flag for use while constructing snippets */
                   1009:     short int iCol;      /* The column that contains the match */
                   1010:     short int iTerm;     /* The index in Query.pTerms[] of the matching term */
                   1011:     short int nByte;     /* Number of bytes in the term */
                   1012:     int iStart;          /* The offset to the first character of the term */
                   1013:   } *aMatch;      /* Points to space obtained from malloc */
                   1014:   char *zOffset;  /* Text rendering of aMatch[] */
                   1015:   int nOffset;    /* strlen(zOffset) */
                   1016:   char *zSnippet; /* Snippet text */
                   1017:   int nSnippet;   /* strlen(zSnippet) */
                   1018: } Snippet;
                   1019: 
                   1020: 
                   1021: typedef enum QueryType {
                   1022:   QUERY_GENERIC,   /* table scan */
                   1023:   QUERY_ROWID,     /* lookup by rowid */
                   1024:   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
                   1025: } QueryType;
                   1026: 
                   1027: /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
                   1028: ** before we start aggregating into larger segments.  Lower CHUNK_MAX
                   1029: ** means that for a given input we have more individual segments per
                   1030: ** term, which means more rows in the table and a bigger index (due to
                   1031: ** both more rows and bigger rowids).  But it also reduces the average
                   1032: ** cost of adding new elements to the segment 0 doclist, and it seems
                   1033: ** to reduce the number of pages read and written during inserts.  256
                   1034: ** was chosen by measuring insertion times for a certain input (first
                   1035: ** 10k documents of Enron corpus), though including query performance
                   1036: ** in the decision may argue for a larger value.
                   1037: */
                   1038: #define CHUNK_MAX 256
                   1039: 
                   1040: typedef enum fulltext_statement {
                   1041:   CONTENT_INSERT_STMT,
                   1042:   CONTENT_SELECT_STMT,
                   1043:   CONTENT_UPDATE_STMT,
                   1044:   CONTENT_DELETE_STMT,
                   1045: 
                   1046:   TERM_SELECT_STMT,
                   1047:   TERM_SELECT_ALL_STMT,
                   1048:   TERM_INSERT_STMT,
                   1049:   TERM_UPDATE_STMT,
                   1050:   TERM_DELETE_STMT,
                   1051: 
                   1052:   MAX_STMT                     /* Always at end! */
                   1053: } fulltext_statement;
                   1054: 
                   1055: /* These must exactly match the enum above. */
                   1056: /* TODO(adam): Is there some risk that a statement (in particular,
                   1057: ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
                   1058: ** query joins a virtual table to itself?  If so perhaps we should
                   1059: ** move some of these to the cursor object.
                   1060: */
                   1061: static const char *const fulltext_zStatement[MAX_STMT] = {
                   1062:   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
                   1063:   /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
                   1064:   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
                   1065:   /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
                   1066: 
                   1067:   /* TERM_SELECT */
                   1068:   "select rowid, doclist from %_term where term = ? and segment = ?",
                   1069:   /* TERM_SELECT_ALL */
                   1070:   "select doclist from %_term where term = ? order by segment",
                   1071:   /* TERM_INSERT */
                   1072:   "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
                   1073:   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
                   1074:   /* TERM_DELETE */ "delete from %_term where rowid = ?",
                   1075: };
                   1076: 
                   1077: /*
                   1078: ** A connection to a fulltext index is an instance of the following
                   1079: ** structure.  The xCreate and xConnect methods create an instance
                   1080: ** of this structure and xDestroy and xDisconnect free that instance.
                   1081: ** All other methods receive a pointer to the structure as one of their
                   1082: ** arguments.
                   1083: */
                   1084: struct fulltext_vtab {
                   1085:   sqlite3_vtab base;               /* Base class used by SQLite core */
                   1086:   sqlite3 *db;                     /* The database connection */
                   1087:   const char *zDb;                 /* logical database name */
                   1088:   const char *zName;               /* virtual table name */
                   1089:   int nColumn;                     /* number of columns in virtual table */
                   1090:   char **azColumn;                 /* column names.  malloced */
                   1091:   char **azContentColumn;          /* column names in content table; malloced */
                   1092:   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
                   1093: 
                   1094:   /* Precompiled statements which we keep as long as the table is
                   1095:   ** open.
                   1096:   */
                   1097:   sqlite3_stmt *pFulltextStatements[MAX_STMT];
                   1098: };
                   1099: 
                   1100: /*
                   1101: ** When the core wants to do a query, it create a cursor using a
                   1102: ** call to xOpen.  This structure is an instance of a cursor.  It
                   1103: ** is destroyed by xClose.
                   1104: */
                   1105: typedef struct fulltext_cursor {
                   1106:   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
                   1107:   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
                   1108:   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
                   1109:   int eof;                         /* True if at End Of Results */
                   1110:   Query q;                         /* Parsed query string */
                   1111:   Snippet snippet;                 /* Cached snippet for the current row */
                   1112:   int iColumn;                     /* Column being searched */
                   1113:   DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
                   1114: } fulltext_cursor;
                   1115: 
                   1116: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
                   1117:   return (fulltext_vtab *) c->base.pVtab;
                   1118: }
                   1119: 
                   1120: static const sqlite3_module fulltextModule;   /* forward declaration */
                   1121: 
                   1122: /* Append a list of strings separated by commas to a StringBuffer. */
                   1123: static void appendList(StringBuffer *sb, int nString, char **azString){
                   1124:   int i;
                   1125:   for(i=0; i<nString; ++i){
                   1126:     if( i>0 ) append(sb, ", ");
                   1127:     append(sb, azString[i]);
                   1128:   }
                   1129: }
                   1130: 
                   1131: /* Return a dynamically generated statement of the form
                   1132:  *   insert into %_content (rowid, ...) values (?, ...)
                   1133:  */
                   1134: static const char *contentInsertStatement(fulltext_vtab *v){
                   1135:   StringBuffer sb;
                   1136:   int i;
                   1137: 
                   1138:   initStringBuffer(&sb);
                   1139:   append(&sb, "insert into %_content (rowid, ");
                   1140:   appendList(&sb, v->nColumn, v->azContentColumn);
                   1141:   append(&sb, ") values (?");
                   1142:   for(i=0; i<v->nColumn; ++i)
                   1143:     append(&sb, ", ?");
                   1144:   append(&sb, ")");
                   1145:   return sb.s;
                   1146: }
                   1147: 
                   1148: /* Return a dynamically generated statement of the form
                   1149:  *   update %_content set [col_0] = ?, [col_1] = ?, ...
                   1150:  *                    where rowid = ?
                   1151:  */
                   1152: static const char *contentUpdateStatement(fulltext_vtab *v){
                   1153:   StringBuffer sb;
                   1154:   int i;
                   1155: 
                   1156:   initStringBuffer(&sb);
                   1157:   append(&sb, "update %_content set ");
                   1158:   for(i=0; i<v->nColumn; ++i) {
                   1159:     if( i>0 ){
                   1160:       append(&sb, ", ");
                   1161:     }
                   1162:     append(&sb, v->azContentColumn[i]);
                   1163:     append(&sb, " = ?");
                   1164:   }
                   1165:   append(&sb, " where rowid = ?");
                   1166:   return sb.s;
                   1167: }
                   1168: 
                   1169: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
                   1170: ** If the indicated statement has never been prepared, it is prepared
                   1171: ** and cached, otherwise the cached version is reset.
                   1172: */
                   1173: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
                   1174:                              sqlite3_stmt **ppStmt){
                   1175:   assert( iStmt<MAX_STMT );
                   1176:   if( v->pFulltextStatements[iStmt]==NULL ){
                   1177:     const char *zStmt;
                   1178:     int rc;
                   1179:     switch( iStmt ){
                   1180:       case CONTENT_INSERT_STMT:
                   1181:         zStmt = contentInsertStatement(v); break;
                   1182:       case CONTENT_UPDATE_STMT:
                   1183:         zStmt = contentUpdateStatement(v); break;
                   1184:       default:
                   1185:         zStmt = fulltext_zStatement[iStmt];
                   1186:     }
                   1187:     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
                   1188:                          zStmt);
                   1189:     if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
                   1190:     if( rc!=SQLITE_OK ) return rc;
                   1191:   } else {
                   1192:     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
                   1193:     if( rc!=SQLITE_OK ) return rc;
                   1194:   }
                   1195: 
                   1196:   *ppStmt = v->pFulltextStatements[iStmt];
                   1197:   return SQLITE_OK;
                   1198: }
                   1199: 
                   1200: /* Step the indicated statement, handling errors SQLITE_BUSY (by
                   1201: ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
                   1202: ** bindings to the new statement).
                   1203: ** TODO(adam): We should extend this function so that it can work with
                   1204: ** statements declared locally, not only globally cached statements.
                   1205: */
                   1206: static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
                   1207:                               sqlite3_stmt **ppStmt){
                   1208:   int rc;
                   1209:   sqlite3_stmt *s = *ppStmt;
                   1210:   assert( iStmt<MAX_STMT );
                   1211:   assert( s==v->pFulltextStatements[iStmt] );
                   1212: 
                   1213:   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
                   1214:     if( rc==SQLITE_BUSY ) continue;
                   1215:     if( rc!=SQLITE_ERROR ) return rc;
                   1216: 
                   1217:     /* If an SQLITE_SCHEMA error has occurred, then finalizing this
                   1218:      * statement is going to delete the fulltext_vtab structure. If
                   1219:      * the statement just executed is in the pFulltextStatements[]
                   1220:      * array, it will be finalized twice. So remove it before
                   1221:      * calling sqlite3_finalize().
                   1222:      */
                   1223:     v->pFulltextStatements[iStmt] = NULL;
                   1224:     rc = sqlite3_finalize(s);
                   1225:     break;
                   1226:   }
                   1227:   return rc;
                   1228: 
                   1229:  err:
                   1230:   sqlite3_finalize(s);
                   1231:   return rc;
                   1232: }
                   1233: 
                   1234: /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
                   1235: ** Useful for statements like UPDATE, where we expect no results.
                   1236: */
                   1237: static int sql_single_step_statement(fulltext_vtab *v,
                   1238:                                      fulltext_statement iStmt,
                   1239:                                      sqlite3_stmt **ppStmt){
                   1240:   int rc = sql_step_statement(v, iStmt, ppStmt);
                   1241:   return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
                   1242: }
                   1243: 
                   1244: /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
                   1245: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
                   1246:                           sqlite3_value **pValues){
                   1247:   sqlite3_stmt *s;
                   1248:   int i;
                   1249:   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
                   1250:   if( rc!=SQLITE_OK ) return rc;
                   1251: 
                   1252:   rc = sqlite3_bind_value(s, 1, rowid);
                   1253:   if( rc!=SQLITE_OK ) return rc;
                   1254: 
                   1255:   for(i=0; i<v->nColumn; ++i){
                   1256:     rc = sqlite3_bind_value(s, 2+i, pValues[i]);
                   1257:     if( rc!=SQLITE_OK ) return rc;
                   1258:   }
                   1259: 
                   1260:   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
                   1261: }
                   1262: 
                   1263: /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
                   1264:  *                  where rowid = [iRowid] */
                   1265: static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
                   1266:                           sqlite_int64 iRowid){
                   1267:   sqlite3_stmt *s;
                   1268:   int i;
                   1269:   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
                   1270:   if( rc!=SQLITE_OK ) return rc;
                   1271: 
                   1272:   for(i=0; i<v->nColumn; ++i){
                   1273:     rc = sqlite3_bind_value(s, 1+i, pValues[i]);
                   1274:     if( rc!=SQLITE_OK ) return rc;
                   1275:   }
                   1276: 
                   1277:   rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
                   1278:   if( rc!=SQLITE_OK ) return rc;
                   1279: 
                   1280:   return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
                   1281: }
                   1282: 
                   1283: static void freeStringArray(int nString, const char **pString){
                   1284:   int i;
                   1285: 
                   1286:   for (i=0 ; i < nString ; ++i) {
                   1287:     if( pString[i]!=NULL ) free((void *) pString[i]);
                   1288:   }
                   1289:   free((void *) pString);
                   1290: }
                   1291: 
                   1292: /* select * from %_content where rowid = [iRow]
                   1293:  * The caller must delete the returned array and all strings in it.
                   1294:  * null fields will be NULL in the returned array.
                   1295:  *
                   1296:  * TODO: Perhaps we should return pointer/length strings here for consistency
                   1297:  * with other code which uses pointer/length. */
                   1298: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
                   1299:                           const char ***pValues){
                   1300:   sqlite3_stmt *s;
                   1301:   const char **values;
                   1302:   int i;
                   1303:   int rc;
                   1304: 
                   1305:   *pValues = NULL;
                   1306: 
                   1307:   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
                   1308:   if( rc!=SQLITE_OK ) return rc;
                   1309: 
                   1310:   rc = sqlite3_bind_int64(s, 1, iRow);
                   1311:   if( rc!=SQLITE_OK ) return rc;
                   1312: 
                   1313:   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
                   1314:   if( rc!=SQLITE_ROW ) return rc;
                   1315: 
                   1316:   values = (const char **) malloc(v->nColumn * sizeof(const char *));
                   1317:   for(i=0; i<v->nColumn; ++i){
                   1318:     if( sqlite3_column_type(s, i)==SQLITE_NULL ){
                   1319:       values[i] = NULL;
                   1320:     }else{
                   1321:       values[i] = string_dup((char*)sqlite3_column_text(s, i));
                   1322:     }
                   1323:   }
                   1324: 
                   1325:   /* We expect only one row.  We must execute another sqlite3_step()
                   1326:    * to complete the iteration; otherwise the table will remain locked. */
                   1327:   rc = sqlite3_step(s);
                   1328:   if( rc==SQLITE_DONE ){
                   1329:     *pValues = values;
                   1330:     return SQLITE_OK;
                   1331:   }
                   1332: 
                   1333:   freeStringArray(v->nColumn, values);
                   1334:   return rc;
                   1335: }
                   1336: 
                   1337: /* delete from %_content where rowid = [iRow ] */
                   1338: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
                   1339:   sqlite3_stmt *s;
                   1340:   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
                   1341:   if( rc!=SQLITE_OK ) return rc;
                   1342: 
                   1343:   rc = sqlite3_bind_int64(s, 1, iRow);
                   1344:   if( rc!=SQLITE_OK ) return rc;
                   1345: 
                   1346:   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
                   1347: }
                   1348: 
                   1349: /* select rowid, doclist from %_term
                   1350:  *  where term = [pTerm] and segment = [iSegment]
                   1351:  * If found, returns SQLITE_ROW; the caller must free the
                   1352:  * returned doclist.  If no rows found, returns SQLITE_DONE. */
                   1353: static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
                   1354:                        int iSegment,
                   1355:                        sqlite_int64 *rowid, DocList *out){
                   1356:   sqlite3_stmt *s;
                   1357:   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
                   1358:   if( rc!=SQLITE_OK ) return rc;
                   1359: 
                   1360:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
                   1361:   if( rc!=SQLITE_OK ) return rc;
                   1362: 
                   1363:   rc = sqlite3_bind_int(s, 2, iSegment);
                   1364:   if( rc!=SQLITE_OK ) return rc;
                   1365: 
                   1366:   rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
                   1367:   if( rc!=SQLITE_ROW ) return rc;
                   1368: 
                   1369:   *rowid = sqlite3_column_int64(s, 0);
                   1370:   docListInit(out, DL_DEFAULT,
                   1371:               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
                   1372: 
                   1373:   /* We expect only one row.  We must execute another sqlite3_step()
                   1374:    * to complete the iteration; otherwise the table will remain locked. */
                   1375:   rc = sqlite3_step(s);
                   1376:   return rc==SQLITE_DONE ? SQLITE_ROW : rc;
                   1377: }
                   1378: 
                   1379: /* Load the segment doclists for term pTerm and merge them in
                   1380: ** appropriate order into out.  Returns SQLITE_OK if successful.  If
                   1381: ** there are no segments for pTerm, successfully returns an empty
                   1382: ** doclist in out.
                   1383: **
                   1384: ** Each document consists of 1 or more "columns".  The number of
                   1385: ** columns is v->nColumn.  If iColumn==v->nColumn, then return
                   1386: ** position information about all columns.  If iColumn<v->nColumn,
                   1387: ** then only return position information about the iColumn-th column
                   1388: ** (where the first column is 0).
                   1389: */
                   1390: static int term_select_all(
                   1391:   fulltext_vtab *v,     /* The fulltext index we are querying against */
                   1392:   int iColumn,          /* If <nColumn, only look at the iColumn-th column */
                   1393:   const char *pTerm,    /* The term whose posting lists we want */
                   1394:   int nTerm,            /* Number of bytes in pTerm */
                   1395:   DocList *out          /* Write the resulting doclist here */
                   1396: ){
                   1397:   DocList doclist;
                   1398:   sqlite3_stmt *s;
                   1399:   int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
                   1400:   if( rc!=SQLITE_OK ) return rc;
                   1401: 
                   1402:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
                   1403:   if( rc!=SQLITE_OK ) return rc;
                   1404: 
                   1405:   docListInit(&doclist, DL_DEFAULT, 0, 0);
                   1406: 
                   1407:   /* TODO(shess) Handle schema and busy errors. */
                   1408:   while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
                   1409:     DocList old;
                   1410: 
                   1411:     /* TODO(shess) If we processed doclists from oldest to newest, we
                   1412:     ** could skip the malloc() involved with the following call.  For
                   1413:     ** now, I'd rather keep this logic similar to index_insert_term().
                   1414:     ** We could additionally drop elements when we see deletes, but
                   1415:     ** that would require a distinct version of docListAccumulate().
                   1416:     */
                   1417:     docListInit(&old, DL_DEFAULT,
                   1418:                 sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
                   1419: 
                   1420:     if( iColumn<v->nColumn ){   /* querying a single column */
                   1421:       docListRestrictColumn(&old, iColumn);
                   1422:     }
                   1423: 
                   1424:     /* doclist contains the newer data, so write it over old.  Then
                   1425:     ** steal accumulated result for doclist.
                   1426:     */
                   1427:     docListAccumulate(&old, &doclist);
                   1428:     docListDestroy(&doclist);
                   1429:     doclist = old;
                   1430:   }
                   1431:   if( rc!=SQLITE_DONE ){
                   1432:     docListDestroy(&doclist);
                   1433:     return rc;
                   1434:   }
                   1435: 
                   1436:   docListDiscardEmpty(&doclist);
                   1437:   *out = doclist;
                   1438:   return SQLITE_OK;
                   1439: }
                   1440: 
                   1441: /* insert into %_term (rowid, term, segment, doclist)
                   1442:                values ([piRowid], [pTerm], [iSegment], [doclist])
                   1443: ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
                   1444: **
                   1445: ** NOTE(shess) piRowid is IN, with values of "space of int64" plus
                   1446: ** null, it is not used to pass data back to the caller.
                   1447: */
                   1448: static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
                   1449:                        const char *pTerm, int nTerm,
                   1450:                        int iSegment, DocList *doclist){
                   1451:   sqlite3_stmt *s;
                   1452:   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
                   1453:   if( rc!=SQLITE_OK ) return rc;
                   1454: 
                   1455:   if( piRowid==NULL ){
                   1456:     rc = sqlite3_bind_null(s, 1);
                   1457:   }else{
                   1458:     rc = sqlite3_bind_int64(s, 1, *piRowid);
                   1459:   }
                   1460:   if( rc!=SQLITE_OK ) return rc;
                   1461: 
                   1462:   rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
                   1463:   if( rc!=SQLITE_OK ) return rc;
                   1464: 
                   1465:   rc = sqlite3_bind_int(s, 3, iSegment);
                   1466:   if( rc!=SQLITE_OK ) return rc;
                   1467: 
                   1468:   rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
                   1469:   if( rc!=SQLITE_OK ) return rc;
                   1470: 
                   1471:   return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
                   1472: }
                   1473: 
                   1474: /* update %_term set doclist = [doclist] where rowid = [rowid] */
                   1475: static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
                   1476:                        DocList *doclist){
                   1477:   sqlite3_stmt *s;
                   1478:   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
                   1479:   if( rc!=SQLITE_OK ) return rc;
                   1480: 
                   1481:   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
                   1482:   if( rc!=SQLITE_OK ) return rc;
                   1483: 
                   1484:   rc = sqlite3_bind_int64(s, 2, rowid);
                   1485:   if( rc!=SQLITE_OK ) return rc;
                   1486: 
                   1487:   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
                   1488: }
                   1489: 
                   1490: static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
                   1491:   sqlite3_stmt *s;
                   1492:   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
                   1493:   if( rc!=SQLITE_OK ) return rc;
                   1494: 
                   1495:   rc = sqlite3_bind_int64(s, 1, rowid);
                   1496:   if( rc!=SQLITE_OK ) return rc;
                   1497: 
                   1498:   return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
                   1499: }
                   1500: 
                   1501: /*
                   1502: ** Free the memory used to contain a fulltext_vtab structure.
                   1503: */
                   1504: static void fulltext_vtab_destroy(fulltext_vtab *v){
                   1505:   int iStmt, i;
                   1506: 
                   1507:   TRACE(("FTS1 Destroy %p\n", v));
                   1508:   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
                   1509:     if( v->pFulltextStatements[iStmt]!=NULL ){
                   1510:       sqlite3_finalize(v->pFulltextStatements[iStmt]);
                   1511:       v->pFulltextStatements[iStmt] = NULL;
                   1512:     }
                   1513:   }
                   1514: 
                   1515:   if( v->pTokenizer!=NULL ){
                   1516:     v->pTokenizer->pModule->xDestroy(v->pTokenizer);
                   1517:     v->pTokenizer = NULL;
                   1518:   }
                   1519:   
                   1520:   free(v->azColumn);
                   1521:   for(i = 0; i < v->nColumn; ++i) {
                   1522:     sqlite3_free(v->azContentColumn[i]);
                   1523:   }
                   1524:   free(v->azContentColumn);
                   1525:   free(v);
                   1526: }
                   1527: 
                   1528: /*
                   1529: ** Token types for parsing the arguments to xConnect or xCreate.
                   1530: */
                   1531: #define TOKEN_EOF         0    /* End of file */
                   1532: #define TOKEN_SPACE       1    /* Any kind of whitespace */
                   1533: #define TOKEN_ID          2    /* An identifier */
                   1534: #define TOKEN_STRING      3    /* A string literal */
                   1535: #define TOKEN_PUNCT       4    /* A single punctuation character */
                   1536: 
                   1537: /*
                   1538: ** If X is a character that can be used in an identifier then
                   1539: ** IdChar(X) will be true.  Otherwise it is false.
                   1540: **
                   1541: ** For ASCII, any character with the high-order bit set is
                   1542: ** allowed in an identifier.  For 7-bit characters, 
                   1543: ** sqlite3IsIdChar[X] must be 1.
                   1544: **
                   1545: ** Ticket #1066.  the SQL standard does not allow '$' in the
                   1546: ** middle of identfiers.  But many SQL implementations do. 
                   1547: ** SQLite will allow '$' in identifiers for compatibility.
                   1548: ** But the feature is undocumented.
                   1549: */
                   1550: static const char isIdChar[] = {
                   1551: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
                   1552:     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
                   1553:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
                   1554:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
                   1555:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
                   1556:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
                   1557:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
                   1558: };
                   1559: #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
                   1560: 
                   1561: 
                   1562: /*
                   1563: ** Return the length of the token that begins at z[0]. 
                   1564: ** Store the token type in *tokenType before returning.
                   1565: */
                   1566: static int getToken(const char *z, int *tokenType){
                   1567:   int i, c;
                   1568:   switch( *z ){
                   1569:     case 0: {
                   1570:       *tokenType = TOKEN_EOF;
                   1571:       return 0;
                   1572:     }
                   1573:     case ' ': case '\t': case '\n': case '\f': case '\r': {
                   1574:       for(i=1; safe_isspace(z[i]); i++){}
                   1575:       *tokenType = TOKEN_SPACE;
                   1576:       return i;
                   1577:     }
                   1578:     case '`':
                   1579:     case '\'':
                   1580:     case '"': {
                   1581:       int delim = z[0];
                   1582:       for(i=1; (c=z[i])!=0; i++){
                   1583:         if( c==delim ){
                   1584:           if( z[i+1]==delim ){
                   1585:             i++;
                   1586:           }else{
                   1587:             break;
                   1588:           }
                   1589:         }
                   1590:       }
                   1591:       *tokenType = TOKEN_STRING;
                   1592:       return i + (c!=0);
                   1593:     }
                   1594:     case '[': {
                   1595:       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
                   1596:       *tokenType = TOKEN_ID;
                   1597:       return i;
                   1598:     }
                   1599:     default: {
                   1600:       if( !IdChar(*z) ){
                   1601:         break;
                   1602:       }
                   1603:       for(i=1; IdChar(z[i]); i++){}
                   1604:       *tokenType = TOKEN_ID;
                   1605:       return i;
                   1606:     }
                   1607:   }
                   1608:   *tokenType = TOKEN_PUNCT;
                   1609:   return 1;
                   1610: }
                   1611: 
                   1612: /*
                   1613: ** A token extracted from a string is an instance of the following
                   1614: ** structure.
                   1615: */
                   1616: typedef struct Token {
                   1617:   const char *z;       /* Pointer to token text.  Not '\000' terminated */
                   1618:   short int n;         /* Length of the token text in bytes. */
                   1619: } Token;
                   1620: 
                   1621: /*
                   1622: ** Given a input string (which is really one of the argv[] parameters
                   1623: ** passed into xConnect or xCreate) split the string up into tokens.
                   1624: ** Return an array of pointers to '\000' terminated strings, one string
                   1625: ** for each non-whitespace token.
                   1626: **
                   1627: ** The returned array is terminated by a single NULL pointer.
                   1628: **
                   1629: ** Space to hold the returned array is obtained from a single
                   1630: ** malloc and should be freed by passing the return value to free().
                   1631: ** The individual strings within the token list are all a part of
                   1632: ** the single memory allocation and will all be freed at once.
                   1633: */
                   1634: static char **tokenizeString(const char *z, int *pnToken){
                   1635:   int nToken = 0;
                   1636:   Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
                   1637:   int n = 1;
                   1638:   int e, i;
                   1639:   int totalSize = 0;
                   1640:   char **azToken;
                   1641:   char *zCopy;
                   1642:   while( n>0 ){
                   1643:     n = getToken(z, &e);
                   1644:     if( e!=TOKEN_SPACE ){
                   1645:       aToken[nToken].z = z;
                   1646:       aToken[nToken].n = n;
                   1647:       nToken++;
                   1648:       totalSize += n+1;
                   1649:     }
                   1650:     z += n;
                   1651:   }
                   1652:   azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
                   1653:   zCopy = (char*)&azToken[nToken];
                   1654:   nToken--;
                   1655:   for(i=0; i<nToken; i++){
                   1656:     azToken[i] = zCopy;
                   1657:     n = aToken[i].n;
                   1658:     memcpy(zCopy, aToken[i].z, n);
                   1659:     zCopy[n] = 0;
                   1660:     zCopy += n+1;
                   1661:   }
                   1662:   azToken[nToken] = 0;
                   1663:   free(aToken);
                   1664:   *pnToken = nToken;
                   1665:   return azToken;
                   1666: }
                   1667: 
                   1668: /*
                   1669: ** Convert an SQL-style quoted string into a normal string by removing
                   1670: ** the quote characters.  The conversion is done in-place.  If the
                   1671: ** input does not begin with a quote character, then this routine
                   1672: ** is a no-op.
                   1673: **
                   1674: ** Examples:
                   1675: **
                   1676: **     "abc"   becomes   abc
                   1677: **     'xyz'   becomes   xyz
                   1678: **     [pqr]   becomes   pqr
                   1679: **     `mno`   becomes   mno
                   1680: */
                   1681: static void dequoteString(char *z){
                   1682:   int quote;
                   1683:   int i, j;
                   1684:   if( z==0 ) return;
                   1685:   quote = z[0];
                   1686:   switch( quote ){
                   1687:     case '\'':  break;
                   1688:     case '"':   break;
                   1689:     case '`':   break;                /* For MySQL compatibility */
                   1690:     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
                   1691:     default:    return;
                   1692:   }
                   1693:   for(i=1, j=0; z[i]; i++){
                   1694:     if( z[i]==quote ){
                   1695:       if( z[i+1]==quote ){
                   1696:         z[j++] = quote;
                   1697:         i++;
                   1698:       }else{
                   1699:         z[j++] = 0;
                   1700:         break;
                   1701:       }
                   1702:     }else{
                   1703:       z[j++] = z[i];
                   1704:     }
                   1705:   }
                   1706: }
                   1707: 
                   1708: /*
                   1709: ** The input azIn is a NULL-terminated list of tokens.  Remove the first
                   1710: ** token and all punctuation tokens.  Remove the quotes from
                   1711: ** around string literal tokens.
                   1712: **
                   1713: ** Example:
                   1714: **
                   1715: **     input:      tokenize chinese ( 'simplifed' , 'mixed' )
                   1716: **     output:     chinese simplifed mixed
                   1717: **
                   1718: ** Another example:
                   1719: **
                   1720: **     input:      delimiters ( '[' , ']' , '...' )
                   1721: **     output:     [ ] ...
                   1722: */
                   1723: static void tokenListToIdList(char **azIn){
                   1724:   int i, j;
                   1725:   if( azIn ){
                   1726:     for(i=0, j=-1; azIn[i]; i++){
                   1727:       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
                   1728:         dequoteString(azIn[i]);
                   1729:         if( j>=0 ){
                   1730:           azIn[j] = azIn[i];
                   1731:         }
                   1732:         j++;
                   1733:       }
                   1734:     }
                   1735:     azIn[j] = 0;
                   1736:   }
                   1737: }
                   1738: 
                   1739: 
                   1740: /*
                   1741: ** Find the first alphanumeric token in the string zIn.  Null-terminate
                   1742: ** this token.  Remove any quotation marks.  And return a pointer to
                   1743: ** the result.
                   1744: */
                   1745: static char *firstToken(char *zIn, char **pzTail){
                   1746:   int n, ttype;
                   1747:   while(1){
                   1748:     n = getToken(zIn, &ttype);
                   1749:     if( ttype==TOKEN_SPACE ){
                   1750:       zIn += n;
                   1751:     }else if( ttype==TOKEN_EOF ){
                   1752:       *pzTail = zIn;
                   1753:       return 0;
                   1754:     }else{
                   1755:       zIn[n] = 0;
                   1756:       *pzTail = &zIn[1];
                   1757:       dequoteString(zIn);
                   1758:       return zIn;
                   1759:     }
                   1760:   }
                   1761:   /*NOTREACHED*/
                   1762: }
                   1763: 
                   1764: /* Return true if...
                   1765: **
                   1766: **   *  s begins with the string t, ignoring case
                   1767: **   *  s is longer than t
                   1768: **   *  The first character of s beyond t is not a alphanumeric
                   1769: ** 
                   1770: ** Ignore leading space in *s.
                   1771: **
                   1772: ** To put it another way, return true if the first token of
                   1773: ** s[] is t[].
                   1774: */
                   1775: static int startsWith(const char *s, const char *t){
                   1776:   while( safe_isspace(*s) ){ s++; }
                   1777:   while( *t ){
                   1778:     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
                   1779:   }
                   1780:   return *s!='_' && !safe_isalnum(*s);
                   1781: }
                   1782: 
                   1783: /*
                   1784: ** An instance of this structure defines the "spec" of a
                   1785: ** full text index.  This structure is populated by parseSpec
                   1786: ** and use by fulltextConnect and fulltextCreate.
                   1787: */
                   1788: typedef struct TableSpec {
                   1789:   const char *zDb;         /* Logical database name */
                   1790:   const char *zName;       /* Name of the full-text index */
                   1791:   int nColumn;             /* Number of columns to be indexed */
                   1792:   char **azColumn;         /* Original names of columns to be indexed */
                   1793:   char **azContentColumn;  /* Column names for %_content */
                   1794:   char **azTokenizer;      /* Name of tokenizer and its arguments */
                   1795: } TableSpec;
                   1796: 
                   1797: /*
                   1798: ** Reclaim all of the memory used by a TableSpec
                   1799: */
                   1800: static void clearTableSpec(TableSpec *p) {
                   1801:   free(p->azColumn);
                   1802:   free(p->azContentColumn);
                   1803:   free(p->azTokenizer);
                   1804: }
                   1805: 
                   1806: /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
                   1807:  *
                   1808:  * CREATE VIRTUAL TABLE email
                   1809:  *        USING fts1(subject, body, tokenize mytokenizer(myarg))
                   1810:  *
                   1811:  * We return parsed information in a TableSpec structure.
                   1812:  * 
                   1813:  */
                   1814: static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
                   1815:                      char**pzErr){
                   1816:   int i, n;
                   1817:   char *z, *zDummy;
                   1818:   char **azArg;
                   1819:   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
                   1820: 
                   1821:   assert( argc>=3 );
                   1822:   /* Current interface:
                   1823:   ** argv[0] - module name
                   1824:   ** argv[1] - database name
                   1825:   ** argv[2] - table name
                   1826:   ** argv[3..] - columns, optionally followed by tokenizer specification
                   1827:   **             and snippet delimiters specification.
                   1828:   */
                   1829: 
                   1830:   /* Make a copy of the complete argv[][] array in a single allocation.
                   1831:   ** The argv[][] array is read-only and transient.  We can write to the
                   1832:   ** copy in order to modify things and the copy is persistent.
                   1833:   */
                   1834:   memset(pSpec, 0, sizeof(*pSpec));
                   1835:   for(i=n=0; i<argc; i++){
                   1836:     n += strlen(argv[i]) + 1;
                   1837:   }
                   1838:   azArg = malloc( sizeof(char*)*argc + n );
                   1839:   if( azArg==0 ){
                   1840:     return SQLITE_NOMEM;
                   1841:   }
                   1842:   z = (char*)&azArg[argc];
                   1843:   for(i=0; i<argc; i++){
                   1844:     azArg[i] = z;
                   1845:     strcpy(z, argv[i]);
                   1846:     z += strlen(z)+1;
                   1847:   }
                   1848: 
                   1849:   /* Identify the column names and the tokenizer and delimiter arguments
                   1850:   ** in the argv[][] array.
                   1851:   */
                   1852:   pSpec->zDb = azArg[1];
                   1853:   pSpec->zName = azArg[2];
                   1854:   pSpec->nColumn = 0;
                   1855:   pSpec->azColumn = azArg;
                   1856:   zTokenizer = "tokenize simple";
                   1857:   for(i=3; i<argc; ++i){
                   1858:     if( startsWith(azArg[i],"tokenize") ){
                   1859:       zTokenizer = azArg[i];
                   1860:     }else{
                   1861:       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
                   1862:       pSpec->nColumn++;
                   1863:     }
                   1864:   }
                   1865:   if( pSpec->nColumn==0 ){
                   1866:     azArg[0] = "content";
                   1867:     pSpec->nColumn = 1;
                   1868:   }
                   1869: 
                   1870:   /*
                   1871:   ** Construct the list of content column names.
                   1872:   **
                   1873:   ** Each content column name will be of the form cNNAAAA
                   1874:   ** where NN is the column number and AAAA is the sanitized
                   1875:   ** column name.  "sanitized" means that special characters are
                   1876:   ** converted to "_".  The cNN prefix guarantees that all column
                   1877:   ** names are unique.
                   1878:   **
                   1879:   ** The AAAA suffix is not strictly necessary.  It is included
                   1880:   ** for the convenience of people who might examine the generated
                   1881:   ** %_content table and wonder what the columns are used for.
                   1882:   */
                   1883:   pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
                   1884:   if( pSpec->azContentColumn==0 ){
                   1885:     clearTableSpec(pSpec);
                   1886:     return SQLITE_NOMEM;
                   1887:   }
                   1888:   for(i=0; i<pSpec->nColumn; i++){
                   1889:     char *p;
                   1890:     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
                   1891:     for (p = pSpec->azContentColumn[i]; *p ; ++p) {
                   1892:       if( !safe_isalnum(*p) ) *p = '_';
                   1893:     }
                   1894:   }
                   1895: 
                   1896:   /*
                   1897:   ** Parse the tokenizer specification string.
                   1898:   */
                   1899:   pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
                   1900:   tokenListToIdList(pSpec->azTokenizer);
                   1901: 
                   1902:   return SQLITE_OK;
                   1903: }
                   1904: 
                   1905: /*
                   1906: ** Generate a CREATE TABLE statement that describes the schema of
                   1907: ** the virtual table.  Return a pointer to this schema string.
                   1908: **
                   1909: ** Space is obtained from sqlite3_mprintf() and should be freed
                   1910: ** using sqlite3_free().
                   1911: */
                   1912: static char *fulltextSchema(
                   1913:   int nColumn,                  /* Number of columns */
                   1914:   const char *const* azColumn,  /* List of columns */
                   1915:   const char *zTableName        /* Name of the table */
                   1916: ){
                   1917:   int i;
                   1918:   char *zSchema, *zNext;
                   1919:   const char *zSep = "(";
                   1920:   zSchema = sqlite3_mprintf("CREATE TABLE x");
                   1921:   for(i=0; i<nColumn; i++){
                   1922:     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
                   1923:     sqlite3_free(zSchema);
                   1924:     zSchema = zNext;
                   1925:     zSep = ",";
                   1926:   }
                   1927:   zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
                   1928:   sqlite3_free(zSchema);
                   1929:   return zNext;
                   1930: }
                   1931: 
                   1932: /*
                   1933: ** Build a new sqlite3_vtab structure that will describe the
                   1934: ** fulltext index defined by spec.
                   1935: */
                   1936: static int constructVtab(
                   1937:   sqlite3 *db,              /* The SQLite database connection */
                   1938:   TableSpec *spec,          /* Parsed spec information from parseSpec() */
                   1939:   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
                   1940:   char **pzErr              /* Write any error message here */
                   1941: ){
                   1942:   int rc;
                   1943:   int n;
                   1944:   fulltext_vtab *v = 0;
                   1945:   const sqlite3_tokenizer_module *m = NULL;
                   1946:   char *schema;
                   1947: 
                   1948:   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
                   1949:   if( v==0 ) return SQLITE_NOMEM;
                   1950:   memset(v, 0, sizeof(*v));
                   1951:   /* sqlite will initialize v->base */
                   1952:   v->db = db;
                   1953:   v->zDb = spec->zDb;       /* Freed when azColumn is freed */
                   1954:   v->zName = spec->zName;   /* Freed when azColumn is freed */
                   1955:   v->nColumn = spec->nColumn;
                   1956:   v->azContentColumn = spec->azContentColumn;
                   1957:   spec->azContentColumn = 0;
                   1958:   v->azColumn = spec->azColumn;
                   1959:   spec->azColumn = 0;
                   1960: 
                   1961:   if( spec->azTokenizer==0 ){
                   1962:     return SQLITE_NOMEM;
                   1963:   }
                   1964:   /* TODO(shess) For now, add new tokenizers as else if clauses. */
                   1965:   if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
                   1966:     sqlite3Fts1SimpleTokenizerModule(&m);
                   1967:   }else if( startsWith(spec->azTokenizer[0], "porter") ){
                   1968:     sqlite3Fts1PorterTokenizerModule(&m);
                   1969:   }else{
                   1970:     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
                   1971:     rc = SQLITE_ERROR;
                   1972:     goto err;
                   1973:   }
                   1974:   for(n=0; spec->azTokenizer[n]; n++){}
                   1975:   if( n ){
                   1976:     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
                   1977:                     &v->pTokenizer);
                   1978:   }else{
                   1979:     rc = m->xCreate(0, 0, &v->pTokenizer);
                   1980:   }
                   1981:   if( rc!=SQLITE_OK ) goto err;
                   1982:   v->pTokenizer->pModule = m;
                   1983: 
                   1984:   /* TODO: verify the existence of backing tables foo_content, foo_term */
                   1985: 
                   1986:   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
                   1987:                           spec->zName);
                   1988:   rc = sqlite3_declare_vtab(db, schema);
                   1989:   sqlite3_free(schema);
                   1990:   if( rc!=SQLITE_OK ) goto err;
                   1991: 
                   1992:   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
                   1993: 
                   1994:   *ppVTab = &v->base;
                   1995:   TRACE(("FTS1 Connect %p\n", v));
                   1996: 
                   1997:   return rc;
                   1998: 
                   1999: err:
                   2000:   fulltext_vtab_destroy(v);
                   2001:   return rc;
                   2002: }
                   2003: 
                   2004: static int fulltextConnect(
                   2005:   sqlite3 *db,
                   2006:   void *pAux,
                   2007:   int argc, const char *const*argv,
                   2008:   sqlite3_vtab **ppVTab,
                   2009:   char **pzErr
                   2010: ){
                   2011:   TableSpec spec;
                   2012:   int rc = parseSpec(&spec, argc, argv, pzErr);
                   2013:   if( rc!=SQLITE_OK ) return rc;
                   2014: 
                   2015:   rc = constructVtab(db, &spec, ppVTab, pzErr);
                   2016:   clearTableSpec(&spec);
                   2017:   return rc;
                   2018: }
                   2019: 
                   2020:   /* The %_content table holds the text of each document, with
                   2021:   ** the rowid used as the docid.
                   2022:   **
                   2023:   ** The %_term table maps each term to a document list blob
                   2024:   ** containing elements sorted by ascending docid, each element
                   2025:   ** encoded as:
                   2026:   **
                   2027:   **   docid varint-encoded
                   2028:   **   token elements:
                   2029:   **     position+1 varint-encoded as delta from previous position
                   2030:   **     start offset varint-encoded as delta from previous start offset
                   2031:   **     end offset varint-encoded as delta from start offset
                   2032:   **
                   2033:   ** The sentinel position of 0 indicates the end of the token list.
                   2034:   **
                   2035:   ** Additionally, doclist blobs are chunked into multiple segments,
                   2036:   ** using segment to order the segments.  New elements are added to
                   2037:   ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
                   2038:   ** segment 0 is deleted, and the doclist is inserted at segment 1.
                   2039:   ** If there is already a doclist at segment 1, the segment 0 doclist
                   2040:   ** is merged with it, the segment 1 doclist is deleted, and the
                   2041:   ** merged doclist is inserted at segment 2, repeating those
                   2042:   ** operations until an insert succeeds.
                   2043:   **
                   2044:   ** Since this structure doesn't allow us to update elements in place
                   2045:   ** in case of deletion or update, these are simply written to
                   2046:   ** segment 0 (with an empty token list in case of deletion), with
                   2047:   ** docListAccumulate() taking care to retain lower-segment
                   2048:   ** information in preference to higher-segment information.
                   2049:   */
                   2050:   /* TODO(shess) Provide a VACUUM type operation which both removes
                   2051:   ** deleted elements which are no longer necessary, and duplicated
                   2052:   ** elements.  I suspect this will probably not be necessary in
                   2053:   ** practice, though.
                   2054:   */
                   2055: static int fulltextCreate(sqlite3 *db, void *pAux,
                   2056:                           int argc, const char * const *argv,
                   2057:                           sqlite3_vtab **ppVTab, char **pzErr){
                   2058:   int rc;
                   2059:   TableSpec spec;
                   2060:   StringBuffer schema;
                   2061:   TRACE(("FTS1 Create\n"));
                   2062: 
                   2063:   rc = parseSpec(&spec, argc, argv, pzErr);
                   2064:   if( rc!=SQLITE_OK ) return rc;
                   2065: 
                   2066:   initStringBuffer(&schema);
                   2067:   append(&schema, "CREATE TABLE %_content(");
                   2068:   appendList(&schema, spec.nColumn, spec.azContentColumn);
                   2069:   append(&schema, ")");
                   2070:   rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
                   2071:   free(schema.s);
                   2072:   if( rc!=SQLITE_OK ) goto out;
                   2073: 
                   2074:   rc = sql_exec(db, spec.zDb, spec.zName,
                   2075:     "create table %_term(term text, segment integer, doclist blob, "
                   2076:                         "primary key(term, segment));");
                   2077:   if( rc!=SQLITE_OK ) goto out;
                   2078: 
                   2079:   rc = constructVtab(db, &spec, ppVTab, pzErr);
                   2080: 
                   2081: out:
                   2082:   clearTableSpec(&spec);
                   2083:   return rc;
                   2084: }
                   2085: 
                   2086: /* Decide how to handle an SQL query. */
                   2087: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
                   2088:   int i;
                   2089:   TRACE(("FTS1 BestIndex\n"));
                   2090: 
                   2091:   for(i=0; i<pInfo->nConstraint; ++i){
                   2092:     const struct sqlite3_index_constraint *pConstraint;
                   2093:     pConstraint = &pInfo->aConstraint[i];
                   2094:     if( pConstraint->usable ) {
                   2095:       if( pConstraint->iColumn==-1 &&
                   2096:           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
                   2097:         pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
                   2098:         TRACE(("FTS1 QUERY_ROWID\n"));
                   2099:       } else if( pConstraint->iColumn>=0 &&
                   2100:                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
                   2101:         /* full-text search */
                   2102:         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
                   2103:         TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
                   2104:       } else continue;
                   2105: 
                   2106:       pInfo->aConstraintUsage[i].argvIndex = 1;
                   2107:       pInfo->aConstraintUsage[i].omit = 1;
                   2108: 
                   2109:       /* An arbitrary value for now.
                   2110:        * TODO: Perhaps rowid matches should be considered cheaper than
                   2111:        * full-text searches. */
                   2112:       pInfo->estimatedCost = 1.0;   
                   2113: 
                   2114:       return SQLITE_OK;
                   2115:     }
                   2116:   }
                   2117:   pInfo->idxNum = QUERY_GENERIC;
                   2118:   return SQLITE_OK;
                   2119: }
                   2120: 
                   2121: static int fulltextDisconnect(sqlite3_vtab *pVTab){
                   2122:   TRACE(("FTS1 Disconnect %p\n", pVTab));
                   2123:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
                   2124:   return SQLITE_OK;
                   2125: }
                   2126: 
                   2127: static int fulltextDestroy(sqlite3_vtab *pVTab){
                   2128:   fulltext_vtab *v = (fulltext_vtab *)pVTab;
                   2129:   int rc;
                   2130: 
                   2131:   TRACE(("FTS1 Destroy %p\n", pVTab));
                   2132:   rc = sql_exec(v->db, v->zDb, v->zName,
                   2133:                 "drop table if exists %_content;"
                   2134:                 "drop table if exists %_term;"
                   2135:                 );
                   2136:   if( rc!=SQLITE_OK ) return rc;
                   2137: 
                   2138:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
                   2139:   return SQLITE_OK;
                   2140: }
                   2141: 
                   2142: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
                   2143:   fulltext_cursor *c;
                   2144: 
                   2145:   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
                   2146:   /* sqlite will initialize c->base */
                   2147:   *ppCursor = &c->base;
                   2148:   TRACE(("FTS1 Open %p: %p\n", pVTab, c));
                   2149: 
                   2150:   return SQLITE_OK;
                   2151: }
                   2152: 
                   2153: 
                   2154: /* Free all of the dynamically allocated memory held by *q
                   2155: */
                   2156: static void queryClear(Query *q){
                   2157:   int i;
                   2158:   for(i = 0; i < q->nTerms; ++i){
                   2159:     free(q->pTerms[i].pTerm);
                   2160:   }
                   2161:   free(q->pTerms);
                   2162:   memset(q, 0, sizeof(*q));
                   2163: }
                   2164: 
                   2165: /* Free all of the dynamically allocated memory held by the
                   2166: ** Snippet
                   2167: */
                   2168: static void snippetClear(Snippet *p){
                   2169:   free(p->aMatch);
                   2170:   free(p->zOffset);
                   2171:   free(p->zSnippet);
                   2172:   memset(p, 0, sizeof(*p));
                   2173: }
                   2174: /*
                   2175: ** Append a single entry to the p->aMatch[] log.
                   2176: */
                   2177: static void snippetAppendMatch(
                   2178:   Snippet *p,               /* Append the entry to this snippet */
                   2179:   int iCol, int iTerm,      /* The column and query term */
                   2180:   int iStart, int nByte     /* Offset and size of the match */
                   2181: ){
                   2182:   int i;
                   2183:   struct snippetMatch *pMatch;
                   2184:   if( p->nMatch+1>=p->nAlloc ){
                   2185:     p->nAlloc = p->nAlloc*2 + 10;
                   2186:     p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
                   2187:     if( p->aMatch==0 ){
                   2188:       p->nMatch = 0;
                   2189:       p->nAlloc = 0;
                   2190:       return;
                   2191:     }
                   2192:   }
                   2193:   i = p->nMatch++;
                   2194:   pMatch = &p->aMatch[i];
                   2195:   pMatch->iCol = iCol;
                   2196:   pMatch->iTerm = iTerm;
                   2197:   pMatch->iStart = iStart;
                   2198:   pMatch->nByte = nByte;
                   2199: }
                   2200: 
                   2201: /*
                   2202: ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
                   2203: */
                   2204: #define FTS1_ROTOR_SZ   (32)
                   2205: #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
                   2206: 
                   2207: /*
                   2208: ** Add entries to pSnippet->aMatch[] for every match that occurs against
                   2209: ** document zDoc[0..nDoc-1] which is stored in column iColumn.
                   2210: */
                   2211: static void snippetOffsetsOfColumn(
                   2212:   Query *pQuery,
                   2213:   Snippet *pSnippet,
                   2214:   int iColumn,
                   2215:   const char *zDoc,
                   2216:   int nDoc
                   2217: ){
                   2218:   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
                   2219:   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
                   2220:   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
                   2221:   fulltext_vtab *pVtab;                /* The full text index */
                   2222:   int nColumn;                         /* Number of columns in the index */
                   2223:   const QueryTerm *aTerm;              /* Query string terms */
                   2224:   int nTerm;                           /* Number of query string terms */  
                   2225:   int i, j;                            /* Loop counters */
                   2226:   int rc;                              /* Return code */
                   2227:   unsigned int match, prevMatch;       /* Phrase search bitmasks */
                   2228:   const char *zToken;                  /* Next token from the tokenizer */
                   2229:   int nToken;                          /* Size of zToken */
                   2230:   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
                   2231: 
                   2232:   /* The following variables keep a circular buffer of the last
                   2233:   ** few tokens */
                   2234:   unsigned int iRotor = 0;             /* Index of current token */
                   2235:   int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
                   2236:   int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
                   2237: 
                   2238:   pVtab = pQuery->pFts;
                   2239:   nColumn = pVtab->nColumn;
                   2240:   pTokenizer = pVtab->pTokenizer;
                   2241:   pTModule = pTokenizer->pModule;
                   2242:   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
                   2243:   if( rc ) return;
                   2244:   pTCursor->pTokenizer = pTokenizer;
                   2245:   aTerm = pQuery->pTerms;
                   2246:   nTerm = pQuery->nTerms;
                   2247:   if( nTerm>=FTS1_ROTOR_SZ ){
                   2248:     nTerm = FTS1_ROTOR_SZ - 1;
                   2249:   }
                   2250:   prevMatch = 0;
                   2251:   while(1){
                   2252:     rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
                   2253:     if( rc ) break;
                   2254:     iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
                   2255:     iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
                   2256:     match = 0;
                   2257:     for(i=0; i<nTerm; i++){
                   2258:       int iCol;
                   2259:       iCol = aTerm[i].iColumn;
                   2260:       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
                   2261:       if( aTerm[i].nTerm!=nToken ) continue;
                   2262:       if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
                   2263:       if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
                   2264:       match |= 1<<i;
                   2265:       if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
                   2266:         for(j=aTerm[i].iPhrase-1; j>=0; j--){
                   2267:           int k = (iRotor-j) & FTS1_ROTOR_MASK;
                   2268:           snippetAppendMatch(pSnippet, iColumn, i-j,
                   2269:                 iRotorBegin[k], iRotorLen[k]);
                   2270:         }
                   2271:       }
                   2272:     }
                   2273:     prevMatch = match<<1;
                   2274:     iRotor++;
                   2275:   }
                   2276:   pTModule->xClose(pTCursor);  
                   2277: }
                   2278: 
                   2279: 
                   2280: /*
                   2281: ** Compute all offsets for the current row of the query.  
                   2282: ** If the offsets have already been computed, this routine is a no-op.
                   2283: */
                   2284: static void snippetAllOffsets(fulltext_cursor *p){
                   2285:   int nColumn;
                   2286:   int iColumn, i;
                   2287:   int iFirst, iLast;
                   2288:   fulltext_vtab *pFts;
                   2289: 
                   2290:   if( p->snippet.nMatch ) return;
                   2291:   if( p->q.nTerms==0 ) return;
                   2292:   pFts = p->q.pFts;
                   2293:   nColumn = pFts->nColumn;
                   2294:   iColumn = p->iCursorType - QUERY_FULLTEXT;
                   2295:   if( iColumn<0 || iColumn>=nColumn ){
                   2296:     iFirst = 0;
                   2297:     iLast = nColumn-1;
                   2298:   }else{
                   2299:     iFirst = iColumn;
                   2300:     iLast = iColumn;
                   2301:   }
                   2302:   for(i=iFirst; i<=iLast; i++){
                   2303:     const char *zDoc;
                   2304:     int nDoc;
                   2305:     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
                   2306:     nDoc = sqlite3_column_bytes(p->pStmt, i+1);
                   2307:     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
                   2308:   }
                   2309: }
                   2310: 
                   2311: /*
                   2312: ** Convert the information in the aMatch[] array of the snippet
                   2313: ** into the string zOffset[0..nOffset-1].
                   2314: */
                   2315: static void snippetOffsetText(Snippet *p){
                   2316:   int i;
                   2317:   int cnt = 0;
                   2318:   StringBuffer sb;
                   2319:   char zBuf[200];
                   2320:   if( p->zOffset ) return;
                   2321:   initStringBuffer(&sb);
                   2322:   for(i=0; i<p->nMatch; i++){
                   2323:     struct snippetMatch *pMatch = &p->aMatch[i];
                   2324:     zBuf[0] = ' ';
                   2325:     sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
                   2326:         pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
                   2327:     append(&sb, zBuf);
                   2328:     cnt++;
                   2329:   }
                   2330:   p->zOffset = sb.s;
                   2331:   p->nOffset = sb.len;
                   2332: }
                   2333: 
                   2334: /*
                   2335: ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
                   2336: ** of matching words some of which might be in zDoc.  zDoc is column
                   2337: ** number iCol.
                   2338: **
                   2339: ** iBreak is suggested spot in zDoc where we could begin or end an
                   2340: ** excerpt.  Return a value similar to iBreak but possibly adjusted
                   2341: ** to be a little left or right so that the break point is better.
                   2342: */
                   2343: static int wordBoundary(
                   2344:   int iBreak,                   /* The suggested break point */
                   2345:   const char *zDoc,             /* Document text */
                   2346:   int nDoc,                     /* Number of bytes in zDoc[] */
                   2347:   struct snippetMatch *aMatch,  /* Matching words */
                   2348:   int nMatch,                   /* Number of entries in aMatch[] */
                   2349:   int iCol                      /* The column number for zDoc[] */
                   2350: ){
                   2351:   int i;
                   2352:   if( iBreak<=10 ){
                   2353:     return 0;
                   2354:   }
                   2355:   if( iBreak>=nDoc-10 ){
                   2356:     return nDoc;
                   2357:   }
                   2358:   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
                   2359:   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
                   2360:   if( i<nMatch ){
                   2361:     if( aMatch[i].iStart<iBreak+10 ){
                   2362:       return aMatch[i].iStart;
                   2363:     }
                   2364:     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
                   2365:       return aMatch[i-1].iStart;
                   2366:     }
                   2367:   }
                   2368:   for(i=1; i<=10; i++){
                   2369:     if( safe_isspace(zDoc[iBreak-i]) ){
                   2370:       return iBreak - i + 1;
                   2371:     }
                   2372:     if( safe_isspace(zDoc[iBreak+i]) ){
                   2373:       return iBreak + i + 1;
                   2374:     }
                   2375:   }
                   2376:   return iBreak;
                   2377: }
                   2378: 
                   2379: /*
                   2380: ** If the StringBuffer does not end in white space, add a single
                   2381: ** space character to the end.
                   2382: */
                   2383: static void appendWhiteSpace(StringBuffer *p){
                   2384:   if( p->len==0 ) return;
                   2385:   if( safe_isspace(p->s[p->len-1]) ) return;
                   2386:   append(p, " ");
                   2387: }
                   2388: 
                   2389: /*
                   2390: ** Remove white space from teh end of the StringBuffer
                   2391: */
                   2392: static void trimWhiteSpace(StringBuffer *p){
                   2393:   while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
                   2394:     p->len--;
                   2395:   }
                   2396: }
                   2397: 
                   2398: 
                   2399: 
                   2400: /*
                   2401: ** Allowed values for Snippet.aMatch[].snStatus
                   2402: */
                   2403: #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
                   2404: #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
                   2405: 
                   2406: /*
                   2407: ** Generate the text of a snippet.
                   2408: */
                   2409: static void snippetText(
                   2410:   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
                   2411:   const char *zStartMark,     /* Markup to appear before each match */
                   2412:   const char *zEndMark,       /* Markup to appear after each match */
                   2413:   const char *zEllipsis       /* Ellipsis mark */
                   2414: ){
                   2415:   int i, j;
                   2416:   struct snippetMatch *aMatch;
                   2417:   int nMatch;
                   2418:   int nDesired;
                   2419:   StringBuffer sb;
                   2420:   int tailCol;
                   2421:   int tailOffset;
                   2422:   int iCol;
                   2423:   int nDoc;
                   2424:   const char *zDoc;
                   2425:   int iStart, iEnd;
                   2426:   int tailEllipsis = 0;
                   2427:   int iMatch;
                   2428:   
                   2429: 
                   2430:   free(pCursor->snippet.zSnippet);
                   2431:   pCursor->snippet.zSnippet = 0;
                   2432:   aMatch = pCursor->snippet.aMatch;
                   2433:   nMatch = pCursor->snippet.nMatch;
                   2434:   initStringBuffer(&sb);
                   2435: 
                   2436:   for(i=0; i<nMatch; i++){
                   2437:     aMatch[i].snStatus = SNIPPET_IGNORE;
                   2438:   }
                   2439:   nDesired = 0;
                   2440:   for(i=0; i<pCursor->q.nTerms; i++){
                   2441:     for(j=0; j<nMatch; j++){
                   2442:       if( aMatch[j].iTerm==i ){
                   2443:         aMatch[j].snStatus = SNIPPET_DESIRED;
                   2444:         nDesired++;
                   2445:         break;
                   2446:       }
                   2447:     }
                   2448:   }
                   2449: 
                   2450:   iMatch = 0;
                   2451:   tailCol = -1;
                   2452:   tailOffset = 0;
                   2453:   for(i=0; i<nMatch && nDesired>0; i++){
                   2454:     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
                   2455:     nDesired--;
                   2456:     iCol = aMatch[i].iCol;
                   2457:     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
                   2458:     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
                   2459:     iStart = aMatch[i].iStart - 40;
                   2460:     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
                   2461:     if( iStart<=10 ){
                   2462:       iStart = 0;
                   2463:     }
                   2464:     if( iCol==tailCol && iStart<=tailOffset+20 ){
                   2465:       iStart = tailOffset;
                   2466:     }
                   2467:     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
                   2468:       trimWhiteSpace(&sb);
                   2469:       appendWhiteSpace(&sb);
                   2470:       append(&sb, zEllipsis);
                   2471:       appendWhiteSpace(&sb);
                   2472:     }
                   2473:     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
                   2474:     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
                   2475:     if( iEnd>=nDoc-10 ){
                   2476:       iEnd = nDoc;
                   2477:       tailEllipsis = 0;
                   2478:     }else{
                   2479:       tailEllipsis = 1;
                   2480:     }
                   2481:     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
                   2482:     while( iStart<iEnd ){
                   2483:       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
                   2484:              && aMatch[iMatch].iCol<=iCol ){
                   2485:         iMatch++;
                   2486:       }
                   2487:       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
                   2488:              && aMatch[iMatch].iCol==iCol ){
                   2489:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
                   2490:         iStart = aMatch[iMatch].iStart;
                   2491:         append(&sb, zStartMark);
                   2492:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
                   2493:         append(&sb, zEndMark);
                   2494:         iStart += aMatch[iMatch].nByte;
                   2495:         for(j=iMatch+1; j<nMatch; j++){
                   2496:           if( aMatch[j].iTerm==aMatch[iMatch].iTerm
                   2497:               && aMatch[j].snStatus==SNIPPET_DESIRED ){
                   2498:             nDesired--;
                   2499:             aMatch[j].snStatus = SNIPPET_IGNORE;
                   2500:           }
                   2501:         }
                   2502:       }else{
                   2503:         nappend(&sb, &zDoc[iStart], iEnd - iStart);
                   2504:         iStart = iEnd;
                   2505:       }
                   2506:     }
                   2507:     tailCol = iCol;
                   2508:     tailOffset = iEnd;
                   2509:   }
                   2510:   trimWhiteSpace(&sb);
                   2511:   if( tailEllipsis ){
                   2512:     appendWhiteSpace(&sb);
                   2513:     append(&sb, zEllipsis);
                   2514:   }
                   2515:   pCursor->snippet.zSnippet = sb.s;
                   2516:   pCursor->snippet.nSnippet = sb.len;  
                   2517: }
                   2518: 
                   2519: 
                   2520: /*
                   2521: ** Close the cursor.  For additional information see the documentation
                   2522: ** on the xClose method of the virtual table interface.
                   2523: */
                   2524: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
                   2525:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2526:   TRACE(("FTS1 Close %p\n", c));
                   2527:   sqlite3_finalize(c->pStmt);
                   2528:   queryClear(&c->q);
                   2529:   snippetClear(&c->snippet);
                   2530:   if( c->result.pDoclist!=NULL ){
                   2531:     docListDelete(c->result.pDoclist);
                   2532:   }
                   2533:   free(c);
                   2534:   return SQLITE_OK;
                   2535: }
                   2536: 
                   2537: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
                   2538:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2539:   sqlite_int64 iDocid;
                   2540:   int rc;
                   2541: 
                   2542:   TRACE(("FTS1 Next %p\n", pCursor));
                   2543:   snippetClear(&c->snippet);
                   2544:   if( c->iCursorType < QUERY_FULLTEXT ){
                   2545:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
                   2546:     rc = sqlite3_step(c->pStmt);
                   2547:     switch( rc ){
                   2548:       case SQLITE_ROW:
                   2549:         c->eof = 0;
                   2550:         return SQLITE_OK;
                   2551:       case SQLITE_DONE:
                   2552:         c->eof = 1;
                   2553:         return SQLITE_OK;
                   2554:       default:
                   2555:         c->eof = 1;
                   2556:         return rc;
                   2557:     }
                   2558:   } else {  /* full-text query */
                   2559:     rc = sqlite3_reset(c->pStmt);
                   2560:     if( rc!=SQLITE_OK ) return rc;
                   2561: 
                   2562:     iDocid = nextDocid(&c->result);
                   2563:     if( iDocid==0 ){
                   2564:       c->eof = 1;
                   2565:       return SQLITE_OK;
                   2566:     }
                   2567:     rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
                   2568:     if( rc!=SQLITE_OK ) return rc;
                   2569:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
                   2570:     rc = sqlite3_step(c->pStmt);
                   2571:     if( rc==SQLITE_ROW ){   /* the case we expect */
                   2572:       c->eof = 0;
                   2573:       return SQLITE_OK;
                   2574:     }
                   2575:     /* an error occurred; abort */
                   2576:     return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
                   2577:   }
                   2578: }
                   2579: 
                   2580: 
                   2581: /* Return a DocList corresponding to the query term *pTerm.  If *pTerm
                   2582: ** is the first term of a phrase query, go ahead and evaluate the phrase
                   2583: ** query and return the doclist for the entire phrase query.
                   2584: **
                   2585: ** The result is stored in pTerm->doclist.
                   2586: */
                   2587: static int docListOfTerm(
                   2588:   fulltext_vtab *v,     /* The full text index */
                   2589:   int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
                   2590:   QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
                   2591:   DocList **ppResult    /* Write the result here */
                   2592: ){
                   2593:   DocList *pLeft, *pRight, *pNew;
                   2594:   int i, rc;
                   2595: 
                   2596:   pLeft = docListNew(DL_POSITIONS);
                   2597:   rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
                   2598:   if( rc ){
                   2599:     docListDelete(pLeft);
                   2600:     return rc;
                   2601:   }
                   2602:   for(i=1; i<=pQTerm->nPhrase; i++){
                   2603:     pRight = docListNew(DL_POSITIONS);
                   2604:     rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
                   2605:     if( rc ){
                   2606:       docListDelete(pLeft);
                   2607:       return rc;
                   2608:     }
                   2609:     pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
                   2610:     docListPhraseMerge(pLeft, pRight, pNew);
                   2611:     docListDelete(pLeft);
                   2612:     docListDelete(pRight);
                   2613:     pLeft = pNew;
                   2614:   }
                   2615:   *ppResult = pLeft;
                   2616:   return SQLITE_OK;
                   2617: }
                   2618: 
                   2619: /* Add a new term pTerm[0..nTerm-1] to the query *q.
                   2620: */
                   2621: static void queryAdd(Query *q, const char *pTerm, int nTerm){
                   2622:   QueryTerm *t;
                   2623:   ++q->nTerms;
                   2624:   q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
                   2625:   if( q->pTerms==0 ){
                   2626:     q->nTerms = 0;
                   2627:     return;
                   2628:   }
                   2629:   t = &q->pTerms[q->nTerms - 1];
                   2630:   memset(t, 0, sizeof(*t));
                   2631:   t->pTerm = malloc(nTerm+1);
                   2632:   memcpy(t->pTerm, pTerm, nTerm);
                   2633:   t->pTerm[nTerm] = 0;
                   2634:   t->nTerm = nTerm;
                   2635:   t->isOr = q->nextIsOr;
                   2636:   q->nextIsOr = 0;
                   2637:   t->iColumn = q->nextColumn;
                   2638:   q->nextColumn = q->dfltColumn;
                   2639: }
                   2640: 
                   2641: /*
                   2642: ** Check to see if the string zToken[0...nToken-1] matches any
                   2643: ** column name in the virtual table.   If it does,
                   2644: ** return the zero-indexed column number.  If not, return -1.
                   2645: */
                   2646: static int checkColumnSpecifier(
                   2647:   fulltext_vtab *pVtab,    /* The virtual table */
                   2648:   const char *zToken,      /* Text of the token */
                   2649:   int nToken               /* Number of characters in the token */
                   2650: ){
                   2651:   int i;
                   2652:   for(i=0; i<pVtab->nColumn; i++){
                   2653:     if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
                   2654:         && pVtab->azColumn[i][nToken]==0 ){
                   2655:       return i;
                   2656:     }
                   2657:   }
                   2658:   return -1;
                   2659: }
                   2660: 
                   2661: /*
                   2662: ** Parse the text at pSegment[0..nSegment-1].  Add additional terms
                   2663: ** to the query being assemblied in pQuery.
                   2664: **
                   2665: ** inPhrase is true if pSegment[0..nSegement-1] is contained within
                   2666: ** double-quotes.  If inPhrase is true, then the first term
                   2667: ** is marked with the number of terms in the phrase less one and
                   2668: ** OR and "-" syntax is ignored.  If inPhrase is false, then every
                   2669: ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
                   2670: */
                   2671: static int tokenizeSegment(
                   2672:   sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
                   2673:   const char *pSegment, int nSegment,     /* Query expression being parsed */
                   2674:   int inPhrase,                           /* True if within "..." */
                   2675:   Query *pQuery                           /* Append results here */
                   2676: ){
                   2677:   const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
                   2678:   sqlite3_tokenizer_cursor *pCursor;
                   2679:   int firstIndex = pQuery->nTerms;
                   2680:   int iCol;
                   2681:   int nTerm = 1;
                   2682:   
                   2683:   int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
                   2684:   if( rc!=SQLITE_OK ) return rc;
                   2685:   pCursor->pTokenizer = pTokenizer;
                   2686: 
                   2687:   while( 1 ){
                   2688:     const char *pToken;
                   2689:     int nToken, iBegin, iEnd, iPos;
                   2690: 
                   2691:     rc = pModule->xNext(pCursor,
                   2692:                         &pToken, &nToken,
                   2693:                         &iBegin, &iEnd, &iPos);
                   2694:     if( rc!=SQLITE_OK ) break;
                   2695:     if( !inPhrase &&
                   2696:         pSegment[iEnd]==':' &&
                   2697:          (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
                   2698:       pQuery->nextColumn = iCol;
                   2699:       continue;
                   2700:     }
                   2701:     if( !inPhrase && pQuery->nTerms>0 && nToken==2
                   2702:          && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
                   2703:       pQuery->nextIsOr = 1;
                   2704:       continue;
                   2705:     }
                   2706:     queryAdd(pQuery, pToken, nToken);
                   2707:     if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
                   2708:       pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
                   2709:     }
                   2710:     pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
                   2711:     if( inPhrase ){
                   2712:       nTerm++;
                   2713:     }
                   2714:   }
                   2715: 
                   2716:   if( inPhrase && pQuery->nTerms>firstIndex ){
                   2717:     pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
                   2718:   }
                   2719: 
                   2720:   return pModule->xClose(pCursor);
                   2721: }
                   2722: 
                   2723: /* Parse a query string, yielding a Query object pQuery.
                   2724: **
                   2725: ** The calling function will need to queryClear() to clean up
                   2726: ** the dynamically allocated memory held by pQuery.
                   2727: */
                   2728: static int parseQuery(
                   2729:   fulltext_vtab *v,        /* The fulltext index */
                   2730:   const char *zInput,      /* Input text of the query string */
                   2731:   int nInput,              /* Size of the input text */
                   2732:   int dfltColumn,          /* Default column of the index to match against */
                   2733:   Query *pQuery            /* Write the parse results here. */
                   2734: ){
                   2735:   int iInput, inPhrase = 0;
                   2736: 
                   2737:   if( zInput==0 ) nInput = 0;
                   2738:   if( nInput<0 ) nInput = strlen(zInput);
                   2739:   pQuery->nTerms = 0;
                   2740:   pQuery->pTerms = NULL;
                   2741:   pQuery->nextIsOr = 0;
                   2742:   pQuery->nextColumn = dfltColumn;
                   2743:   pQuery->dfltColumn = dfltColumn;
                   2744:   pQuery->pFts = v;
                   2745: 
                   2746:   for(iInput=0; iInput<nInput; ++iInput){
                   2747:     int i;
                   2748:     for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
                   2749:     if( i>iInput ){
                   2750:       tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
                   2751:                        pQuery);
                   2752:     }
                   2753:     iInput = i;
                   2754:     if( i<nInput ){
                   2755:       assert( zInput[i]=='"' );
                   2756:       inPhrase = !inPhrase;
                   2757:     }
                   2758:   }
                   2759: 
                   2760:   if( inPhrase ){
                   2761:     /* unmatched quote */
                   2762:     queryClear(pQuery);
                   2763:     return SQLITE_ERROR;
                   2764:   }
                   2765:   return SQLITE_OK;
                   2766: }
                   2767: 
                   2768: /* Perform a full-text query using the search expression in
                   2769: ** zInput[0..nInput-1].  Return a list of matching documents
                   2770: ** in pResult.
                   2771: **
                   2772: ** Queries must match column iColumn.  Or if iColumn>=nColumn
                   2773: ** they are allowed to match against any column.
                   2774: */
                   2775: static int fulltextQuery(
                   2776:   fulltext_vtab *v,      /* The full text index */
                   2777:   int iColumn,           /* Match against this column by default */
                   2778:   const char *zInput,    /* The query string */
                   2779:   int nInput,            /* Number of bytes in zInput[] */
                   2780:   DocList **pResult,     /* Write the result doclist here */
                   2781:   Query *pQuery          /* Put parsed query string here */
                   2782: ){
                   2783:   int i, iNext, rc;
                   2784:   DocList *pLeft = NULL;
                   2785:   DocList *pRight, *pNew, *pOr;
                   2786:   int nNot = 0;
                   2787:   QueryTerm *aTerm;
                   2788: 
                   2789:   rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
                   2790:   if( rc!=SQLITE_OK ) return rc;
                   2791: 
                   2792:   /* Merge AND terms. */
                   2793:   aTerm = pQuery->pTerms;
                   2794:   for(i = 0; i<pQuery->nTerms; i=iNext){
                   2795:     if( aTerm[i].isNot ){
                   2796:       /* Handle all NOT terms in a separate pass */
                   2797:       nNot++;
                   2798:       iNext = i + aTerm[i].nPhrase+1;
                   2799:       continue;
                   2800:     }
                   2801:     iNext = i + aTerm[i].nPhrase + 1;
                   2802:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
                   2803:     if( rc ){
                   2804:       queryClear(pQuery);
                   2805:       return rc;
                   2806:     }
                   2807:     while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
                   2808:       rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
                   2809:       iNext += aTerm[iNext].nPhrase + 1;
                   2810:       if( rc ){
                   2811:         queryClear(pQuery);
                   2812:         return rc;
                   2813:       }
                   2814:       pNew = docListNew(DL_DOCIDS);
                   2815:       docListOrMerge(pRight, pOr, pNew);
                   2816:       docListDelete(pRight);
                   2817:       docListDelete(pOr);
                   2818:       pRight = pNew;
                   2819:     }
                   2820:     if( pLeft==0 ){
                   2821:       pLeft = pRight;
                   2822:     }else{
                   2823:       pNew = docListNew(DL_DOCIDS);
                   2824:       docListAndMerge(pLeft, pRight, pNew);
                   2825:       docListDelete(pRight);
                   2826:       docListDelete(pLeft);
                   2827:       pLeft = pNew;
                   2828:     }
                   2829:   }
                   2830: 
                   2831:   if( nNot && pLeft==0 ){
                   2832:     /* We do not yet know how to handle a query of only NOT terms */
                   2833:     return SQLITE_ERROR;
                   2834:   }
                   2835: 
                   2836:   /* Do the EXCEPT terms */
                   2837:   for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
                   2838:     if( !aTerm[i].isNot ) continue;
                   2839:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
                   2840:     if( rc ){
                   2841:       queryClear(pQuery);
                   2842:       docListDelete(pLeft);
                   2843:       return rc;
                   2844:     }
                   2845:     pNew = docListNew(DL_DOCIDS);
                   2846:     docListExceptMerge(pLeft, pRight, pNew);
                   2847:     docListDelete(pRight);
                   2848:     docListDelete(pLeft);
                   2849:     pLeft = pNew;
                   2850:   }
                   2851: 
                   2852:   *pResult = pLeft;
                   2853:   return rc;
                   2854: }
                   2855: 
                   2856: /*
                   2857: ** This is the xFilter interface for the virtual table.  See
                   2858: ** the virtual table xFilter method documentation for additional
                   2859: ** information.
                   2860: **
                   2861: ** If idxNum==QUERY_GENERIC then do a full table scan against
                   2862: ** the %_content table.
                   2863: **
                   2864: ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
                   2865: ** in the %_content table.
                   2866: **
                   2867: ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
                   2868: ** column on the left-hand side of the MATCH operator is column
                   2869: ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
                   2870: ** side of the MATCH operator.
                   2871: */
                   2872: /* TODO(shess) Upgrade the cursor initialization and destruction to
                   2873: ** account for fulltextFilter() being called multiple times on the
                   2874: ** same cursor.  The current solution is very fragile.  Apply fix to
                   2875: ** fts2 as appropriate.
                   2876: */
                   2877: static int fulltextFilter(
                   2878:   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
                   2879:   int idxNum, const char *idxStr,   /* Which indexing scheme to use */
                   2880:   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
                   2881: ){
                   2882:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2883:   fulltext_vtab *v = cursor_vtab(c);
                   2884:   int rc;
                   2885:   char *zSql;
                   2886: 
                   2887:   TRACE(("FTS1 Filter %p\n",pCursor));
                   2888: 
                   2889:   zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
                   2890:                           idxNum==QUERY_GENERIC ? "" : "where rowid=?");
                   2891:   sqlite3_finalize(c->pStmt);
                   2892:   rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
                   2893:   sqlite3_free(zSql);
                   2894:   if( rc!=SQLITE_OK ) return rc;
                   2895: 
                   2896:   c->iCursorType = idxNum;
                   2897:   switch( idxNum ){
                   2898:     case QUERY_GENERIC:
                   2899:       break;
                   2900: 
                   2901:     case QUERY_ROWID:
                   2902:       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
                   2903:       if( rc!=SQLITE_OK ) return rc;
                   2904:       break;
                   2905: 
                   2906:     default:   /* full-text search */
                   2907:     {
                   2908:       const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
                   2909:       DocList *pResult;
                   2910:       assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
                   2911:       assert( argc==1 );
                   2912:       queryClear(&c->q);
                   2913:       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
                   2914:       if( rc!=SQLITE_OK ) return rc;
                   2915:       if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
                   2916:       readerInit(&c->result, pResult);
                   2917:       break;
                   2918:     }
                   2919:   }
                   2920: 
                   2921:   return fulltextNext(pCursor);
                   2922: }
                   2923: 
                   2924: /* This is the xEof method of the virtual table.  The SQLite core
                   2925: ** calls this routine to find out if it has reached the end of
                   2926: ** a query's results set.
                   2927: */
                   2928: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
                   2929:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2930:   return c->eof;
                   2931: }
                   2932: 
                   2933: /* This is the xColumn method of the virtual table.  The SQLite
                   2934: ** core calls this method during a query when it needs the value
                   2935: ** of a column from the virtual table.  This method needs to use
                   2936: ** one of the sqlite3_result_*() routines to store the requested
                   2937: ** value back in the pContext.
                   2938: */
                   2939: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
                   2940:                           sqlite3_context *pContext, int idxCol){
                   2941:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2942:   fulltext_vtab *v = cursor_vtab(c);
                   2943: 
                   2944:   if( idxCol<v->nColumn ){
                   2945:     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
                   2946:     sqlite3_result_value(pContext, pVal);
                   2947:   }else if( idxCol==v->nColumn ){
                   2948:     /* The extra column whose name is the same as the table.
                   2949:     ** Return a blob which is a pointer to the cursor
                   2950:     */
                   2951:     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
                   2952:   }
                   2953:   return SQLITE_OK;
                   2954: }
                   2955: 
                   2956: /* This is the xRowid method.  The SQLite core calls this routine to
                   2957: ** retrive the rowid for the current row of the result set.  The
                   2958: ** rowid should be written to *pRowid.
                   2959: */
                   2960: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
                   2961:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
                   2962: 
                   2963:   *pRowid = sqlite3_column_int64(c->pStmt, 0);
                   2964:   return SQLITE_OK;
                   2965: }
                   2966: 
                   2967: /* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
                   2968:  * we also store positions and offsets in the hash table using the given
                   2969:  * column number. */
                   2970: static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
                   2971:                       const char *zText, int iColumn){
                   2972:   sqlite3_tokenizer *pTokenizer = v->pTokenizer;
                   2973:   sqlite3_tokenizer_cursor *pCursor;
                   2974:   const char *pToken;
                   2975:   int nTokenBytes;
                   2976:   int iStartOffset, iEndOffset, iPosition;
                   2977:   int rc;
                   2978: 
                   2979:   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
                   2980:   if( rc!=SQLITE_OK ) return rc;
                   2981: 
                   2982:   pCursor->pTokenizer = pTokenizer;
                   2983:   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
                   2984:                                                &pToken, &nTokenBytes,
                   2985:                                                &iStartOffset, &iEndOffset,
                   2986:                                                &iPosition) ){
                   2987:     DocList *p;
                   2988: 
                   2989:     /* Positions can't be negative; we use -1 as a terminator internally. */
                   2990:     if( iPosition<0 ){
                   2991:       pTokenizer->pModule->xClose(pCursor);
                   2992:       return SQLITE_ERROR;
                   2993:     }
                   2994: 
                   2995:     p = fts1HashFind(terms, pToken, nTokenBytes);
                   2996:     if( p==NULL ){
                   2997:       p = docListNew(DL_DEFAULT);
                   2998:       docListAddDocid(p, iDocid);
                   2999:       fts1HashInsert(terms, pToken, nTokenBytes, p);
                   3000:     }
                   3001:     if( iColumn>=0 ){
                   3002:       docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
                   3003:     }
                   3004:   }
                   3005: 
                   3006:   /* TODO(shess) Check return?  Should this be able to cause errors at
                   3007:   ** this point?  Actually, same question about sqlite3_finalize(),
                   3008:   ** though one could argue that failure there means that the data is
                   3009:   ** not durable.  *ponder*
                   3010:   */
                   3011:   pTokenizer->pModule->xClose(pCursor);
                   3012:   return rc;
                   3013: }
                   3014: 
                   3015: /* Update the %_terms table to map the term [pTerm] to the given rowid. */
                   3016: static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
                   3017:                              DocList *d){
                   3018:   sqlite_int64 iIndexRow;
                   3019:   DocList doclist;
                   3020:   int iSegment = 0, rc;
                   3021: 
                   3022:   rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
                   3023:   if( rc==SQLITE_DONE ){
                   3024:     docListInit(&doclist, DL_DEFAULT, 0, 0);
                   3025:     docListUpdate(&doclist, d);
                   3026:     /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
                   3027:     rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
                   3028:     goto err;
                   3029:   }
                   3030:   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
                   3031: 
                   3032:   docListUpdate(&doclist, d);
                   3033:   if( doclist.nData<=CHUNK_MAX ){
                   3034:     rc = term_update(v, iIndexRow, &doclist);
                   3035:     goto err;
                   3036:   }
                   3037: 
                   3038:   /* Doclist doesn't fit, delete what's there, and accumulate
                   3039:   ** forward.
                   3040:   */
                   3041:   rc = term_delete(v, iIndexRow);
                   3042:   if( rc!=SQLITE_OK ) goto err;
                   3043: 
                   3044:   /* Try to insert the doclist into a higher segment bucket.  On
                   3045:   ** failure, accumulate existing doclist with the doclist from that
                   3046:   ** bucket, and put results in the next bucket.
                   3047:   */
                   3048:   iSegment++;
                   3049:   while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
                   3050:                          &doclist))!=SQLITE_OK ){
                   3051:     sqlite_int64 iSegmentRow;
                   3052:     DocList old;
                   3053:     int rc2;
                   3054: 
                   3055:     /* Retain old error in case the term_insert() error was really an
                   3056:     ** error rather than a bounced insert.
                   3057:     */
                   3058:     rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
                   3059:     if( rc2!=SQLITE_ROW ) goto err;
                   3060: 
                   3061:     rc = term_delete(v, iSegmentRow);
                   3062:     if( rc!=SQLITE_OK ) goto err;
                   3063: 
                   3064:     /* Reusing lowest-number deleted row keeps the index smaller. */
                   3065:     if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
                   3066: 
                   3067:     /* doclist contains the newer data, so accumulate it over old.
                   3068:     ** Then steal accumulated data for doclist.
                   3069:     */
                   3070:     docListAccumulate(&old, &doclist);
                   3071:     docListDestroy(&doclist);
                   3072:     doclist = old;
                   3073: 
                   3074:     iSegment++;
                   3075:   }
                   3076: 
                   3077:  err:
                   3078:   docListDestroy(&doclist);
                   3079:   return rc;
                   3080: }
                   3081: 
                   3082: /* Add doclists for all terms in [pValues] to the hash table [terms]. */
                   3083: static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
                   3084:                 sqlite3_value **pValues){
                   3085:   int i;
                   3086:   for(i = 0; i < v->nColumn ; ++i){
                   3087:     char *zText = (char*)sqlite3_value_text(pValues[i]);
                   3088:     int rc = buildTerms(v, terms, iRowid, zText, i);
                   3089:     if( rc!=SQLITE_OK ) return rc;
                   3090:   }
                   3091:   return SQLITE_OK;
                   3092: }
                   3093: 
                   3094: /* Add empty doclists for all terms in the given row's content to the hash
                   3095:  * table [pTerms]. */
                   3096: static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
                   3097:   const char **pValues;
                   3098:   int i;
                   3099: 
                   3100:   int rc = content_select(v, iRowid, &pValues);
                   3101:   if( rc!=SQLITE_OK ) return rc;
                   3102: 
                   3103:   for(i = 0 ; i < v->nColumn; ++i) {
                   3104:     rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
                   3105:     if( rc!=SQLITE_OK ) break;
                   3106:   }
                   3107: 
                   3108:   freeStringArray(v->nColumn, pValues);
                   3109:   return SQLITE_OK;
                   3110: }
                   3111: 
                   3112: /* Insert a row into the %_content table; set *piRowid to be the ID of the
                   3113:  * new row.  Fill [pTerms] with new doclists for the %_term table. */
                   3114: static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
                   3115:                         sqlite3_value **pValues,
                   3116:                         sqlite_int64 *piRowid, fts1Hash *pTerms){
                   3117:   int rc;
                   3118: 
                   3119:   rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
                   3120:   if( rc!=SQLITE_OK ) return rc;
                   3121:   *piRowid = sqlite3_last_insert_rowid(v->db);
                   3122:   return insertTerms(v, pTerms, *piRowid, pValues);
                   3123: }
                   3124: 
                   3125: /* Delete a row from the %_content table; fill [pTerms] with empty doclists
                   3126:  * to be written to the %_term table. */
                   3127: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
                   3128:   int rc = deleteTerms(v, pTerms, iRow);
                   3129:   if( rc!=SQLITE_OK ) return rc;
                   3130:   return content_delete(v, iRow);  /* execute an SQL DELETE */
                   3131: }
                   3132: 
                   3133: /* Update a row in the %_content table; fill [pTerms] with new doclists for the
                   3134:  * %_term table. */
                   3135: static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
                   3136:                         sqlite3_value **pValues, fts1Hash *pTerms){
                   3137:   /* Generate an empty doclist for each term that previously appeared in this
                   3138:    * row. */
                   3139:   int rc = deleteTerms(v, pTerms, iRow);
                   3140:   if( rc!=SQLITE_OK ) return rc;
                   3141: 
                   3142:   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
                   3143:   if( rc!=SQLITE_OK ) return rc;
                   3144: 
                   3145:   /* Now add positions for terms which appear in the updated row. */
                   3146:   return insertTerms(v, pTerms, iRow, pValues);
                   3147: }
                   3148: 
                   3149: /* This function implements the xUpdate callback; it is the top-level entry
                   3150:  * point for inserting, deleting or updating a row in a full-text table. */
                   3151: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
                   3152:                    sqlite_int64 *pRowid){
                   3153:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
                   3154:   fts1Hash terms;   /* maps term string -> PosList */
                   3155:   int rc;
                   3156:   fts1HashElem *e;
                   3157: 
                   3158:   TRACE(("FTS1 Update %p\n", pVtab));
                   3159:   
                   3160:   fts1HashInit(&terms, FTS1_HASH_STRING, 1);
                   3161: 
                   3162:   if( nArg<2 ){
                   3163:     rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
                   3164:   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
                   3165:     /* An update:
                   3166:      * ppArg[0] = old rowid
                   3167:      * ppArg[1] = new rowid
                   3168:      * ppArg[2..2+v->nColumn-1] = values
                   3169:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
                   3170:      */
                   3171:     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
                   3172:     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
                   3173:       sqlite3_value_int64(ppArg[1]) != rowid ){
                   3174:       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
                   3175:     } else {
                   3176:       assert( nArg==2+v->nColumn+1);
                   3177:       rc = index_update(v, rowid, &ppArg[2], &terms);
                   3178:     }
                   3179:   } else {
                   3180:     /* An insert:
                   3181:      * ppArg[1] = requested rowid
                   3182:      * ppArg[2..2+v->nColumn-1] = values
                   3183:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
                   3184:      */
                   3185:     assert( nArg==2+v->nColumn+1);
                   3186:     rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
                   3187:   }
                   3188: 
                   3189:   if( rc==SQLITE_OK ){
                   3190:     /* Write updated doclists to disk. */
                   3191:     for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
                   3192:       DocList *p = fts1HashData(e);
                   3193:       rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
                   3194:       if( rc!=SQLITE_OK ) break;
                   3195:     }
                   3196:   }
                   3197: 
                   3198:   /* clean up */
                   3199:   for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
                   3200:     DocList *p = fts1HashData(e);
                   3201:     docListDelete(p);
                   3202:   }
                   3203:   fts1HashClear(&terms);
                   3204: 
                   3205:   return rc;
                   3206: }
                   3207: 
                   3208: /*
                   3209: ** Implementation of the snippet() function for FTS1
                   3210: */
                   3211: static void snippetFunc(
                   3212:   sqlite3_context *pContext,
                   3213:   int argc,
                   3214:   sqlite3_value **argv
                   3215: ){
                   3216:   fulltext_cursor *pCursor;
                   3217:   if( argc<1 ) return;
                   3218:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
                   3219:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
                   3220:     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
                   3221:   }else{
                   3222:     const char *zStart = "<b>";
                   3223:     const char *zEnd = "</b>";
                   3224:     const char *zEllipsis = "<b>...</b>";
                   3225:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
                   3226:     if( argc>=2 ){
                   3227:       zStart = (const char*)sqlite3_value_text(argv[1]);
                   3228:       if( argc>=3 ){
                   3229:         zEnd = (const char*)sqlite3_value_text(argv[2]);
                   3230:         if( argc>=4 ){
                   3231:           zEllipsis = (const char*)sqlite3_value_text(argv[3]);
                   3232:         }
                   3233:       }
                   3234:     }
                   3235:     snippetAllOffsets(pCursor);
                   3236:     snippetText(pCursor, zStart, zEnd, zEllipsis);
                   3237:     sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
                   3238:                         pCursor->snippet.nSnippet, SQLITE_STATIC);
                   3239:   }
                   3240: }
                   3241: 
                   3242: /*
                   3243: ** Implementation of the offsets() function for FTS1
                   3244: */
                   3245: static void snippetOffsetsFunc(
                   3246:   sqlite3_context *pContext,
                   3247:   int argc,
                   3248:   sqlite3_value **argv
                   3249: ){
                   3250:   fulltext_cursor *pCursor;
                   3251:   if( argc<1 ) return;
                   3252:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
                   3253:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
                   3254:     sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
                   3255:   }else{
                   3256:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
                   3257:     snippetAllOffsets(pCursor);
                   3258:     snippetOffsetText(&pCursor->snippet);
                   3259:     sqlite3_result_text(pContext,
                   3260:                         pCursor->snippet.zOffset, pCursor->snippet.nOffset,
                   3261:                         SQLITE_STATIC);
                   3262:   }
                   3263: }
                   3264: 
                   3265: /*
                   3266: ** This routine implements the xFindFunction method for the FTS1
                   3267: ** virtual table.
                   3268: */
                   3269: static int fulltextFindFunction(
                   3270:   sqlite3_vtab *pVtab,
                   3271:   int nArg,
                   3272:   const char *zName,
                   3273:   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
                   3274:   void **ppArg
                   3275: ){
                   3276:   if( strcmp(zName,"snippet")==0 ){
                   3277:     *pxFunc = snippetFunc;
                   3278:     return 1;
                   3279:   }else if( strcmp(zName,"offsets")==0 ){
                   3280:     *pxFunc = snippetOffsetsFunc;
                   3281:     return 1;
                   3282:   }
                   3283:   return 0;
                   3284: }
                   3285: 
                   3286: /*
                   3287: ** Rename an fts1 table.
                   3288: */
                   3289: static int fulltextRename(
                   3290:   sqlite3_vtab *pVtab,
                   3291:   const char *zName
                   3292: ){
                   3293:   fulltext_vtab *p = (fulltext_vtab *)pVtab;
                   3294:   int rc = SQLITE_NOMEM;
                   3295:   char *zSql = sqlite3_mprintf(
                   3296:     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
                   3297:     "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
                   3298:     , p->zDb, p->zName, zName
                   3299:     , p->zDb, p->zName, zName
                   3300:   );
                   3301:   if( zSql ){
                   3302:     rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
                   3303:     sqlite3_free(zSql);
                   3304:   }
                   3305:   return rc;
                   3306: }
                   3307: 
                   3308: static const sqlite3_module fulltextModule = {
                   3309:   /* iVersion      */ 0,
                   3310:   /* xCreate       */ fulltextCreate,
                   3311:   /* xConnect      */ fulltextConnect,
                   3312:   /* xBestIndex    */ fulltextBestIndex,
                   3313:   /* xDisconnect   */ fulltextDisconnect,
                   3314:   /* xDestroy      */ fulltextDestroy,
                   3315:   /* xOpen         */ fulltextOpen,
                   3316:   /* xClose        */ fulltextClose,
                   3317:   /* xFilter       */ fulltextFilter,
                   3318:   /* xNext         */ fulltextNext,
                   3319:   /* xEof          */ fulltextEof,
                   3320:   /* xColumn       */ fulltextColumn,
                   3321:   /* xRowid        */ fulltextRowid,
                   3322:   /* xUpdate       */ fulltextUpdate,
                   3323:   /* xBegin        */ 0, 
                   3324:   /* xSync         */ 0,
                   3325:   /* xCommit       */ 0,
                   3326:   /* xRollback     */ 0,
                   3327:   /* xFindFunction */ fulltextFindFunction,
                   3328:   /* xRename       */ fulltextRename,
                   3329: };
                   3330: 
                   3331: int sqlite3Fts1Init(sqlite3 *db){
                   3332:   sqlite3_overload_function(db, "snippet", -1);
                   3333:   sqlite3_overload_function(db, "offsets", -1);
                   3334:   return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
                   3335: }
                   3336: 
                   3337: #if !SQLITE_CORE
                   3338: int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
                   3339:                            const sqlite3_api_routines *pApi){
                   3340:   SQLITE_EXTENSION_INIT2(pApi)
                   3341:   return sqlite3Fts1Init(db);
                   3342: }
                   3343: #endif
                   3344: 
                   3345: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>