Annotation of embedaddon/sqlite3/ext/fts1/fts1.c, revision 1.1

1.1     ! misho       1: /* fts1 has a design flaw which can lead to database corruption (see
        !             2: ** below).  It is recommended not to use it any longer, instead use
        !             3: ** fts3 (or higher).  If you believe that your use of fts1 is safe,
        !             4: ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
        !             5: */
        !             6: #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
        !             7:         && !defined(SQLITE_ENABLE_BROKEN_FTS1)
        !             8: #error fts1 has a design flaw and has been deprecated.
        !             9: #endif
        !            10: /* The flaw is that fts1 uses the content table's unaliased rowid as
        !            11: ** the unique docid.  fts1 embeds the rowid in the index it builds,
        !            12: ** and expects the rowid to not change.  The SQLite VACUUM operation
        !            13: ** will renumber such rowids, thereby breaking fts1.  If you are using
        !            14: ** fts1 in a system which has disabled VACUUM, then you can continue
        !            15: ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
        !            16: ** VACUUM, though systems using auto_vacuum are unlikely to invoke
        !            17: ** VACUUM.
        !            18: **
        !            19: ** fts1 should be safe even across VACUUM if you only insert documents
        !            20: ** and never delete.
        !            21: */
        !            22: 
        !            23: /* The author disclaims copyright to this source code.
        !            24:  *
        !            25:  * This is an SQLite module implementing full-text search.
        !            26:  */
        !            27: 
        !            28: /*
        !            29: ** The code in this file is only compiled if:
        !            30: **
        !            31: **     * The FTS1 module is being built as an extension
        !            32: **       (in which case SQLITE_CORE is not defined), or
        !            33: **
        !            34: **     * The FTS1 module is being built into the core of
        !            35: **       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
        !            36: */
        !            37: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
        !            38: 
        !            39: #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
        !            40: # define SQLITE_CORE 1
        !            41: #endif
        !            42: 
        !            43: #include <assert.h>
        !            44: #include <stdlib.h>
        !            45: #include <stdio.h>
        !            46: #include <string.h>
        !            47: #include <ctype.h>
        !            48: 
        !            49: #include "fts1.h"
        !            50: #include "fts1_hash.h"
        !            51: #include "fts1_tokenizer.h"
        !            52: #include "sqlite3.h"
        !            53: #include "sqlite3ext.h"
        !            54: SQLITE_EXTENSION_INIT1
        !            55: 
        !            56: 
        !            57: #if 0
        !            58: # define TRACE(A)  printf A; fflush(stdout)
        !            59: #else
        !            60: # define TRACE(A)
        !            61: #endif
        !            62: 
        !            63: /* utility functions */
        !            64: 
        !            65: typedef struct StringBuffer {
        !            66:   int len;      /* length, not including null terminator */
        !            67:   int alloced;  /* Space allocated for s[] */ 
        !            68:   char *s;      /* Content of the string */
        !            69: } StringBuffer;
        !            70: 
        !            71: static void initStringBuffer(StringBuffer *sb){
        !            72:   sb->len = 0;
        !            73:   sb->alloced = 100;
        !            74:   sb->s = malloc(100);
        !            75:   sb->s[0] = '\0';
        !            76: }
        !            77: 
        !            78: static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
        !            79:   if( sb->len + nFrom >= sb->alloced ){
        !            80:     sb->alloced = sb->len + nFrom + 100;
        !            81:     sb->s = realloc(sb->s, sb->alloced+1);
        !            82:     if( sb->s==0 ){
        !            83:       initStringBuffer(sb);
        !            84:       return;
        !            85:     }
        !            86:   }
        !            87:   memcpy(sb->s + sb->len, zFrom, nFrom);
        !            88:   sb->len += nFrom;
        !            89:   sb->s[sb->len] = 0;
        !            90: }
        !            91: static void append(StringBuffer *sb, const char *zFrom){
        !            92:   nappend(sb, zFrom, strlen(zFrom));
        !            93: }
        !            94: 
        !            95: /* We encode variable-length integers in little-endian order using seven bits
        !            96:  * per byte as follows:
        !            97: **
        !            98: ** KEY:
        !            99: **         A = 0xxxxxxx    7 bits of data and one flag bit
        !           100: **         B = 1xxxxxxx    7 bits of data and one flag bit
        !           101: **
        !           102: **  7 bits - A
        !           103: ** 14 bits - BA
        !           104: ** 21 bits - BBA
        !           105: ** and so on.
        !           106: */
        !           107: 
        !           108: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
        !           109: #define VARINT_MAX 10
        !           110: 
        !           111: /* Write a 64-bit variable-length integer to memory starting at p[0].
        !           112:  * The length of data written will be between 1 and VARINT_MAX bytes.
        !           113:  * The number of bytes written is returned. */
        !           114: static int putVarint(char *p, sqlite_int64 v){
        !           115:   unsigned char *q = (unsigned char *) p;
        !           116:   sqlite_uint64 vu = v;
        !           117:   do{
        !           118:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
        !           119:     vu >>= 7;
        !           120:   }while( vu!=0 );
        !           121:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
        !           122:   assert( q - (unsigned char *)p <= VARINT_MAX );
        !           123:   return (int) (q - (unsigned char *)p);
        !           124: }
        !           125: 
        !           126: /* Read a 64-bit variable-length integer from memory starting at p[0].
        !           127:  * Return the number of bytes read, or 0 on error.
        !           128:  * The value is stored in *v. */
        !           129: static int getVarint(const char *p, sqlite_int64 *v){
        !           130:   const unsigned char *q = (const unsigned char *) p;
        !           131:   sqlite_uint64 x = 0, y = 1;
        !           132:   while( (*q & 0x80) == 0x80 ){
        !           133:     x += y * (*q++ & 0x7f);
        !           134:     y <<= 7;
        !           135:     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
        !           136:       assert( 0 );
        !           137:       return 0;
        !           138:     }
        !           139:   }
        !           140:   x += y * (*q++);
        !           141:   *v = (sqlite_int64) x;
        !           142:   return (int) (q - (unsigned char *)p);
        !           143: }
        !           144: 
        !           145: static int getVarint32(const char *p, int *pi){
        !           146:  sqlite_int64 i;
        !           147:  int ret = getVarint(p, &i);
        !           148:  *pi = (int) i;
        !           149:  assert( *pi==i );
        !           150:  return ret;
        !           151: }
        !           152: 
        !           153: /*** Document lists ***
        !           154:  *
        !           155:  * A document list holds a sorted list of varint-encoded document IDs.
        !           156:  *
        !           157:  * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
        !           158:  *
        !           159:  * array {
        !           160:  *   varint docid;
        !           161:  *   array {
        !           162:  *     varint position;     (delta from previous position plus POS_BASE)
        !           163:  *     varint startOffset;  (delta from previous startOffset)
        !           164:  *     varint endOffset;    (delta from startOffset)
        !           165:  *   }
        !           166:  * }
        !           167:  *
        !           168:  * Here, array { X } means zero or more occurrences of X, adjacent in memory.
        !           169:  *
        !           170:  * A position list may hold positions for text in multiple columns.  A position
        !           171:  * POS_COLUMN is followed by a varint containing the index of the column for
        !           172:  * following positions in the list.  Any positions appearing before any
        !           173:  * occurrences of POS_COLUMN are for column 0.
        !           174:  *
        !           175:  * A doclist with type DL_POSITIONS is like the above, but holds only docids
        !           176:  * and positions without offset information.
        !           177:  *
        !           178:  * A doclist with type DL_DOCIDS is like the above, but holds only docids
        !           179:  * without positions or offset information.
        !           180:  *
        !           181:  * On disk, every document list has positions and offsets, so we don't bother
        !           182:  * to serialize a doclist's type.
        !           183:  * 
        !           184:  * We don't yet delta-encode document IDs; doing so will probably be a
        !           185:  * modest win.
        !           186:  *
        !           187:  * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
        !           188:  * After the first offset, estimate the next offset by using the
        !           189:  * current token position and the previous token position and offset,
        !           190:  * offset to handle some variance.  So the estimate would be
        !           191:  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
        !           192:  * as normal.  Offsets more than 64 chars from the estimate are
        !           193:  * encoded as the delta to the previous start offset + 128.  An
        !           194:  * additional tiny increment can be gained by using the end offset of
        !           195:  * the previous token to make the estimate a tiny bit more precise.
        !           196: */
        !           197: 
        !           198: /* It is not safe to call isspace(), tolower(), or isalnum() on
        !           199: ** hi-bit-set characters.  This is the same solution used in the
        !           200: ** tokenizer.
        !           201: */
        !           202: /* TODO(shess) The snippet-generation code should be using the
        !           203: ** tokenizer-generated tokens rather than doing its own local
        !           204: ** tokenization.
        !           205: */
        !           206: /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
        !           207: static int safe_isspace(char c){
        !           208:   return (c&0x80)==0 ? isspace(c) : 0;
        !           209: }
        !           210: static int safe_tolower(char c){
        !           211:   return (c&0x80)==0 ? tolower(c) : c;
        !           212: }
        !           213: static int safe_isalnum(char c){
        !           214:   return (c&0x80)==0 ? isalnum(c) : 0;
        !           215: }
        !           216: 
        !           217: typedef enum DocListType {
        !           218:   DL_DOCIDS,              /* docids only */
        !           219:   DL_POSITIONS,           /* docids + positions */
        !           220:   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
        !           221: } DocListType;
        !           222: 
        !           223: /*
        !           224: ** By default, only positions and not offsets are stored in the doclists.
        !           225: ** To change this so that offsets are stored too, compile with
        !           226: **
        !           227: **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
        !           228: **
        !           229: */
        !           230: #ifndef DL_DEFAULT
        !           231: # define DL_DEFAULT DL_POSITIONS
        !           232: #endif
        !           233: 
        !           234: typedef struct DocList {
        !           235:   char *pData;
        !           236:   int nData;
        !           237:   DocListType iType;
        !           238:   int iLastColumn;    /* the last column written */
        !           239:   int iLastPos;       /* the last position written */
        !           240:   int iLastOffset;    /* the last start offset written */
        !           241: } DocList;
        !           242: 
        !           243: enum {
        !           244:   POS_END = 0,        /* end of this position list */
        !           245:   POS_COLUMN,         /* followed by new column number */
        !           246:   POS_BASE
        !           247: };
        !           248: 
        !           249: /* Initialize a new DocList to hold the given data. */
        !           250: static void docListInit(DocList *d, DocListType iType,
        !           251:                         const char *pData, int nData){
        !           252:   d->nData = nData;
        !           253:   if( nData>0 ){
        !           254:     d->pData = malloc(nData);
        !           255:     memcpy(d->pData, pData, nData);
        !           256:   } else {
        !           257:     d->pData = NULL;
        !           258:   }
        !           259:   d->iType = iType;
        !           260:   d->iLastColumn = 0;
        !           261:   d->iLastPos = d->iLastOffset = 0;
        !           262: }
        !           263: 
        !           264: /* Create a new dynamically-allocated DocList. */
        !           265: static DocList *docListNew(DocListType iType){
        !           266:   DocList *d = (DocList *) malloc(sizeof(DocList));
        !           267:   docListInit(d, iType, 0, 0);
        !           268:   return d;
        !           269: }
        !           270: 
        !           271: static void docListDestroy(DocList *d){
        !           272:   free(d->pData);
        !           273: #ifndef NDEBUG
        !           274:   memset(d, 0x55, sizeof(*d));
        !           275: #endif
        !           276: }
        !           277: 
        !           278: static void docListDelete(DocList *d){
        !           279:   docListDestroy(d);
        !           280:   free(d);
        !           281: }
        !           282: 
        !           283: static char *docListEnd(DocList *d){
        !           284:   return d->pData + d->nData;
        !           285: }
        !           286: 
        !           287: /* Append a varint to a DocList's data. */
        !           288: static void appendVarint(DocList *d, sqlite_int64 i){
        !           289:   char c[VARINT_MAX];
        !           290:   int n = putVarint(c, i);
        !           291:   d->pData = realloc(d->pData, d->nData + n);
        !           292:   memcpy(d->pData + d->nData, c, n);
        !           293:   d->nData += n;
        !           294: }
        !           295: 
        !           296: static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
        !           297:   appendVarint(d, iDocid);
        !           298:   if( d->iType>=DL_POSITIONS ){
        !           299:     appendVarint(d, POS_END);  /* initially empty position list */
        !           300:     d->iLastColumn = 0;
        !           301:     d->iLastPos = d->iLastOffset = 0;
        !           302:   }
        !           303: }
        !           304: 
        !           305: /* helper function for docListAddPos and docListAddPosOffset */
        !           306: static void addPos(DocList *d, int iColumn, int iPos){
        !           307:   assert( d->nData>0 );
        !           308:   --d->nData;  /* remove previous terminator */
        !           309:   if( iColumn!=d->iLastColumn ){
        !           310:     assert( iColumn>d->iLastColumn );
        !           311:     appendVarint(d, POS_COLUMN);
        !           312:     appendVarint(d, iColumn);
        !           313:     d->iLastColumn = iColumn;
        !           314:     d->iLastPos = d->iLastOffset = 0;
        !           315:   }
        !           316:   assert( iPos>=d->iLastPos );
        !           317:   appendVarint(d, iPos-d->iLastPos+POS_BASE);
        !           318:   d->iLastPos = iPos;
        !           319: }
        !           320: 
        !           321: /* Add a position to the last position list in a doclist. */
        !           322: static void docListAddPos(DocList *d, int iColumn, int iPos){
        !           323:   assert( d->iType==DL_POSITIONS );
        !           324:   addPos(d, iColumn, iPos);
        !           325:   appendVarint(d, POS_END);  /* add new terminator */
        !           326: }
        !           327: 
        !           328: /*
        !           329: ** Add a position and starting and ending offsets to a doclist.
        !           330: **
        !           331: ** If the doclist is setup to handle only positions, then insert
        !           332: ** the position only and ignore the offsets.
        !           333: */
        !           334: static void docListAddPosOffset(
        !           335:   DocList *d,             /* Doclist under construction */
        !           336:   int iColumn,            /* Column the inserted term is part of */
        !           337:   int iPos,               /* Position of the inserted term */
        !           338:   int iStartOffset,       /* Starting offset of inserted term */
        !           339:   int iEndOffset          /* Ending offset of inserted term */
        !           340: ){
        !           341:   assert( d->iType>=DL_POSITIONS );
        !           342:   addPos(d, iColumn, iPos);
        !           343:   if( d->iType==DL_POSITIONS_OFFSETS ){
        !           344:     assert( iStartOffset>=d->iLastOffset );
        !           345:     appendVarint(d, iStartOffset-d->iLastOffset);
        !           346:     d->iLastOffset = iStartOffset;
        !           347:     assert( iEndOffset>=iStartOffset );
        !           348:     appendVarint(d, iEndOffset-iStartOffset);
        !           349:   }
        !           350:   appendVarint(d, POS_END);  /* add new terminator */
        !           351: }
        !           352: 
        !           353: /*
        !           354: ** A DocListReader object is a cursor into a doclist.  Initialize
        !           355: ** the cursor to the beginning of the doclist by calling readerInit().
        !           356: ** Then use routines
        !           357: **
        !           358: **      peekDocid()
        !           359: **      readDocid()
        !           360: **      readPosition()
        !           361: **      skipPositionList()
        !           362: **      and so forth...
        !           363: **
        !           364: ** to read information out of the doclist.  When we reach the end
        !           365: ** of the doclist, atEnd() returns TRUE.
        !           366: */
        !           367: typedef struct DocListReader {
        !           368:   DocList *pDoclist;  /* The document list we are stepping through */
        !           369:   char *p;            /* Pointer to next unread byte in the doclist */
        !           370:   int iLastColumn;
        !           371:   int iLastPos;  /* the last position read, or -1 when not in a position list */
        !           372: } DocListReader;
        !           373: 
        !           374: /*
        !           375: ** Initialize the DocListReader r to point to the beginning of pDoclist.
        !           376: */
        !           377: static void readerInit(DocListReader *r, DocList *pDoclist){
        !           378:   r->pDoclist = pDoclist;
        !           379:   if( pDoclist!=NULL ){
        !           380:     r->p = pDoclist->pData;
        !           381:   }
        !           382:   r->iLastColumn = -1;
        !           383:   r->iLastPos = -1;
        !           384: }
        !           385: 
        !           386: /*
        !           387: ** Return TRUE if we have reached then end of pReader and there is
        !           388: ** nothing else left to read.
        !           389: */
        !           390: static int atEnd(DocListReader *pReader){
        !           391:   return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
        !           392: }
        !           393: 
        !           394: /* Peek at the next docid without advancing the read pointer. 
        !           395: */
        !           396: static sqlite_int64 peekDocid(DocListReader *pReader){
        !           397:   sqlite_int64 ret;
        !           398:   assert( !atEnd(pReader) );
        !           399:   assert( pReader->iLastPos==-1 );
        !           400:   getVarint(pReader->p, &ret);
        !           401:   return ret;
        !           402: }
        !           403: 
        !           404: /* Read the next docid.   See also nextDocid().
        !           405: */
        !           406: static sqlite_int64 readDocid(DocListReader *pReader){
        !           407:   sqlite_int64 ret;
        !           408:   assert( !atEnd(pReader) );
        !           409:   assert( pReader->iLastPos==-1 );
        !           410:   pReader->p += getVarint(pReader->p, &ret);
        !           411:   if( pReader->pDoclist->iType>=DL_POSITIONS ){
        !           412:     pReader->iLastColumn = 0;
        !           413:     pReader->iLastPos = 0;
        !           414:   }
        !           415:   return ret;
        !           416: }
        !           417: 
        !           418: /* Read the next position and column index from a position list.
        !           419:  * Returns the position, or -1 at the end of the list. */
        !           420: static int readPosition(DocListReader *pReader, int *iColumn){
        !           421:   int i;
        !           422:   int iType = pReader->pDoclist->iType;
        !           423: 
        !           424:   if( pReader->iLastPos==-1 ){
        !           425:     return -1;
        !           426:   }
        !           427:   assert( !atEnd(pReader) );
        !           428: 
        !           429:   if( iType<DL_POSITIONS ){
        !           430:     return -1;
        !           431:   }
        !           432:   pReader->p += getVarint32(pReader->p, &i);
        !           433:   if( i==POS_END ){
        !           434:     pReader->iLastColumn = pReader->iLastPos = -1;
        !           435:     *iColumn = -1;
        !           436:     return -1;
        !           437:   }
        !           438:   if( i==POS_COLUMN ){
        !           439:     pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
        !           440:     pReader->iLastPos = 0;
        !           441:     pReader->p += getVarint32(pReader->p, &i);
        !           442:     assert( i>=POS_BASE );
        !           443:   }
        !           444:   pReader->iLastPos += ((int) i)-POS_BASE;
        !           445:   if( iType>=DL_POSITIONS_OFFSETS ){
        !           446:     /* Skip over offsets, ignoring them for now. */
        !           447:     int iStart, iEnd;
        !           448:     pReader->p += getVarint32(pReader->p, &iStart);
        !           449:     pReader->p += getVarint32(pReader->p, &iEnd);
        !           450:   }
        !           451:   *iColumn = pReader->iLastColumn;
        !           452:   return pReader->iLastPos;
        !           453: }
        !           454: 
        !           455: /* Skip past the end of a position list. */
        !           456: static void skipPositionList(DocListReader *pReader){
        !           457:   DocList *p = pReader->pDoclist;
        !           458:   if( p && p->iType>=DL_POSITIONS ){
        !           459:     int iColumn;
        !           460:     while( readPosition(pReader, &iColumn)!=-1 ){}
        !           461:   }
        !           462: }
        !           463: 
        !           464: /* Skip over a docid, including its position list if the doclist has
        !           465:  * positions. */
        !           466: static void skipDocument(DocListReader *pReader){
        !           467:   readDocid(pReader);
        !           468:   skipPositionList(pReader);
        !           469: }
        !           470: 
        !           471: /* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
        !           472:  * matching [iDocid] was found.  */
        !           473: static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
        !           474:   sqlite_int64 d = 0;
        !           475:   while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
        !           476:     skipDocument(pReader);
        !           477:   }
        !           478:   return !atEnd(pReader) && d==iDocid;
        !           479: }
        !           480: 
        !           481: /* Return the first document in a document list.
        !           482: */
        !           483: static sqlite_int64 firstDocid(DocList *d){
        !           484:   DocListReader r;
        !           485:   readerInit(&r, d);
        !           486:   return readDocid(&r);
        !           487: }
        !           488: 
        !           489: #ifdef SQLITE_DEBUG
        !           490: /*
        !           491: ** This routine is used for debugging purpose only.
        !           492: **
        !           493: ** Write the content of a doclist to standard output.
        !           494: */
        !           495: static void printDoclist(DocList *p){
        !           496:   DocListReader r;
        !           497:   const char *zSep = "";
        !           498: 
        !           499:   readerInit(&r, p);
        !           500:   while( !atEnd(&r) ){
        !           501:     sqlite_int64 docid = readDocid(&r);
        !           502:     if( docid==0 ){
        !           503:       skipPositionList(&r);
        !           504:       continue;
        !           505:     }
        !           506:     printf("%s%lld", zSep, docid);
        !           507:     zSep =  ",";
        !           508:     if( p->iType>=DL_POSITIONS ){
        !           509:       int iPos, iCol;
        !           510:       const char *zDiv = "";
        !           511:       printf("(");
        !           512:       while( (iPos = readPosition(&r, &iCol))>=0 ){
        !           513:         printf("%s%d:%d", zDiv, iCol, iPos);
        !           514:         zDiv = ":";
        !           515:       }
        !           516:       printf(")");
        !           517:     }
        !           518:   }
        !           519:   printf("\n");
        !           520:   fflush(stdout);
        !           521: }
        !           522: #endif /* SQLITE_DEBUG */
        !           523: 
        !           524: /* Trim the given doclist to contain only positions in column
        !           525:  * [iRestrictColumn]. */
        !           526: static void docListRestrictColumn(DocList *in, int iRestrictColumn){
        !           527:   DocListReader r;
        !           528:   DocList out;
        !           529: 
        !           530:   assert( in->iType>=DL_POSITIONS );
        !           531:   readerInit(&r, in);
        !           532:   docListInit(&out, DL_POSITIONS, NULL, 0);
        !           533: 
        !           534:   while( !atEnd(&r) ){
        !           535:     sqlite_int64 iDocid = readDocid(&r);
        !           536:     int iPos, iColumn;
        !           537: 
        !           538:     docListAddDocid(&out, iDocid);
        !           539:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
        !           540:       if( iColumn==iRestrictColumn ){
        !           541:         docListAddPos(&out, iColumn, iPos);
        !           542:       }
        !           543:     }
        !           544:   }
        !           545: 
        !           546:   docListDestroy(in);
        !           547:   *in = out;
        !           548: }
        !           549: 
        !           550: /* Trim the given doclist by discarding any docids without any remaining
        !           551:  * positions. */
        !           552: static void docListDiscardEmpty(DocList *in) {
        !           553:   DocListReader r;
        !           554:   DocList out;
        !           555: 
        !           556:   /* TODO: It would be nice to implement this operation in place; that
        !           557:    * could save a significant amount of memory in queries with long doclists. */
        !           558:   assert( in->iType>=DL_POSITIONS );
        !           559:   readerInit(&r, in);
        !           560:   docListInit(&out, DL_POSITIONS, NULL, 0);
        !           561: 
        !           562:   while( !atEnd(&r) ){
        !           563:     sqlite_int64 iDocid = readDocid(&r);
        !           564:     int match = 0;
        !           565:     int iPos, iColumn;
        !           566:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
        !           567:       if( !match ){
        !           568:         docListAddDocid(&out, iDocid);
        !           569:         match = 1;
        !           570:       }
        !           571:       docListAddPos(&out, iColumn, iPos);
        !           572:     }
        !           573:   }
        !           574: 
        !           575:   docListDestroy(in);
        !           576:   *in = out;
        !           577: }
        !           578: 
        !           579: /* Helper function for docListUpdate() and docListAccumulate().
        !           580: ** Splices a doclist element into the doclist represented by r,
        !           581: ** leaving r pointing after the newly spliced element.
        !           582: */
        !           583: static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
        !           584:                                  const char *pSource, int nSource){
        !           585:   DocList *d = r->pDoclist;
        !           586:   char *pTarget;
        !           587:   int nTarget, found;
        !           588: 
        !           589:   found = skipToDocid(r, iDocid);
        !           590: 
        !           591:   /* Describe slice in d to place pSource/nSource. */
        !           592:   pTarget = r->p;
        !           593:   if( found ){
        !           594:     skipDocument(r);
        !           595:     nTarget = r->p-pTarget;
        !           596:   }else{
        !           597:     nTarget = 0;
        !           598:   }
        !           599: 
        !           600:   /* The sense of the following is that there are three possibilities.
        !           601:   ** If nTarget==nSource, we should not move any memory nor realloc.
        !           602:   ** If nTarget>nSource, trim target and realloc.
        !           603:   ** If nTarget<nSource, realloc then expand target.
        !           604:   */
        !           605:   if( nTarget>nSource ){
        !           606:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
        !           607:   }
        !           608:   if( nTarget!=nSource ){
        !           609:     int iDoclist = pTarget-d->pData;
        !           610:     d->pData = realloc(d->pData, d->nData+nSource-nTarget);
        !           611:     pTarget = d->pData+iDoclist;
        !           612:   }
        !           613:   if( nTarget<nSource ){
        !           614:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
        !           615:   }
        !           616: 
        !           617:   memcpy(pTarget, pSource, nSource);
        !           618:   d->nData += nSource-nTarget;
        !           619:   r->p = pTarget+nSource;
        !           620: }
        !           621: 
        !           622: /* Insert/update pUpdate into the doclist. */
        !           623: static void docListUpdate(DocList *d, DocList *pUpdate){
        !           624:   DocListReader reader;
        !           625: 
        !           626:   assert( d!=NULL && pUpdate!=NULL );
        !           627:   assert( d->iType==pUpdate->iType);
        !           628: 
        !           629:   readerInit(&reader, d);
        !           630:   docListSpliceElement(&reader, firstDocid(pUpdate),
        !           631:                        pUpdate->pData, pUpdate->nData);
        !           632: }
        !           633: 
        !           634: /* Propagate elements from pUpdate to pAcc, overwriting elements with
        !           635: ** matching docids.
        !           636: */
        !           637: static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
        !           638:   DocListReader accReader, updateReader;
        !           639: 
        !           640:   /* Handle edge cases where one doclist is empty. */
        !           641:   assert( pAcc!=NULL );
        !           642:   if( pUpdate==NULL || pUpdate->nData==0 ) return;
        !           643:   if( pAcc->nData==0 ){
        !           644:     pAcc->pData = malloc(pUpdate->nData);
        !           645:     memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
        !           646:     pAcc->nData = pUpdate->nData;
        !           647:     return;
        !           648:   }
        !           649: 
        !           650:   readerInit(&accReader, pAcc);
        !           651:   readerInit(&updateReader, pUpdate);
        !           652: 
        !           653:   while( !atEnd(&updateReader) ){
        !           654:     char *pSource = updateReader.p;
        !           655:     sqlite_int64 iDocid = readDocid(&updateReader);
        !           656:     skipPositionList(&updateReader);
        !           657:     docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
        !           658:   }
        !           659: }
        !           660: 
        !           661: /*
        !           662: ** Read the next docid off of pIn.  Return 0 if we reach the end.
        !           663: *
        !           664: * TODO: This assumes that docids are never 0, but they may actually be 0 since
        !           665: * users can choose docids when inserting into a full-text table.  Fix this.
        !           666: */
        !           667: static sqlite_int64 nextDocid(DocListReader *pIn){
        !           668:   skipPositionList(pIn);
        !           669:   return atEnd(pIn) ? 0 : readDocid(pIn);
        !           670: }
        !           671: 
        !           672: /*
        !           673: ** pLeft and pRight are two DocListReaders that are pointing to
        !           674: ** positions lists of the same document: iDocid. 
        !           675: **
        !           676: ** If there are no instances in pLeft or pRight where the position
        !           677: ** of pLeft is one less than the position of pRight, then this
        !           678: ** routine adds nothing to pOut.
        !           679: **
        !           680: ** If there are one or more instances where positions from pLeft
        !           681: ** are exactly one less than positions from pRight, then add a new
        !           682: ** document record to pOut.  If pOut wants to hold positions, then
        !           683: ** include the positions from pRight that are one more than a
        !           684: ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
        !           685: **
        !           686: ** pLeft and pRight are left pointing at the next document record.
        !           687: */
        !           688: static void mergePosList(
        !           689:   DocListReader *pLeft,    /* Left position list */
        !           690:   DocListReader *pRight,   /* Right position list */
        !           691:   sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
        !           692:   DocList *pOut            /* Write the merged document record here */
        !           693: ){
        !           694:   int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
        !           695:   int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
        !           696:   int match = 0;
        !           697: 
        !           698:   /* Loop until we've reached the end of both position lists. */
        !           699:   while( iLeftPos!=-1 && iRightPos!=-1 ){
        !           700:     if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
        !           701:       if( !match ){
        !           702:         docListAddDocid(pOut, iDocid);
        !           703:         match = 1;
        !           704:       }
        !           705:       if( pOut->iType>=DL_POSITIONS ){
        !           706:         docListAddPos(pOut, iRightCol, iRightPos);
        !           707:       }
        !           708:       iLeftPos = readPosition(pLeft, &iLeftCol);
        !           709:       iRightPos = readPosition(pRight, &iRightCol);
        !           710:     }else if( iRightCol<iLeftCol ||
        !           711:               (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
        !           712:       iRightPos = readPosition(pRight, &iRightCol);
        !           713:     }else{
        !           714:       iLeftPos = readPosition(pLeft, &iLeftCol);
        !           715:     }
        !           716:   }
        !           717:   if( iLeftPos>=0 ) skipPositionList(pLeft);
        !           718:   if( iRightPos>=0 ) skipPositionList(pRight);
        !           719: }
        !           720: 
        !           721: /* We have two doclists:  pLeft and pRight.
        !           722: ** Write the phrase intersection of these two doclists into pOut.
        !           723: **
        !           724: ** A phrase intersection means that two documents only match
        !           725: ** if pLeft.iPos+1==pRight.iPos.
        !           726: **
        !           727: ** The output pOut may or may not contain positions.  If pOut
        !           728: ** does contain positions, they are the positions of pRight.
        !           729: */
        !           730: static void docListPhraseMerge(
        !           731:   DocList *pLeft,    /* Doclist resulting from the words on the left */
        !           732:   DocList *pRight,   /* Doclist for the next word to the right */
        !           733:   DocList *pOut      /* Write the combined doclist here */
        !           734: ){
        !           735:   DocListReader left, right;
        !           736:   sqlite_int64 docidLeft, docidRight;
        !           737: 
        !           738:   readerInit(&left, pLeft);
        !           739:   readerInit(&right, pRight);
        !           740:   docidLeft = nextDocid(&left);
        !           741:   docidRight = nextDocid(&right);
        !           742: 
        !           743:   while( docidLeft>0 && docidRight>0 ){
        !           744:     if( docidLeft<docidRight ){
        !           745:       docidLeft = nextDocid(&left);
        !           746:     }else if( docidRight<docidLeft ){
        !           747:       docidRight = nextDocid(&right);
        !           748:     }else{
        !           749:       mergePosList(&left, &right, docidLeft, pOut);
        !           750:       docidLeft = nextDocid(&left);
        !           751:       docidRight = nextDocid(&right);
        !           752:     }
        !           753:   }
        !           754: }
        !           755: 
        !           756: /* We have two doclists:  pLeft and pRight.
        !           757: ** Write the intersection of these two doclists into pOut.
        !           758: ** Only docids are matched.  Position information is ignored.
        !           759: **
        !           760: ** The output pOut never holds positions.
        !           761: */
        !           762: static void docListAndMerge(
        !           763:   DocList *pLeft,    /* Doclist resulting from the words on the left */
        !           764:   DocList *pRight,   /* Doclist for the next word to the right */
        !           765:   DocList *pOut      /* Write the combined doclist here */
        !           766: ){
        !           767:   DocListReader left, right;
        !           768:   sqlite_int64 docidLeft, docidRight;
        !           769: 
        !           770:   assert( pOut->iType<DL_POSITIONS );
        !           771: 
        !           772:   readerInit(&left, pLeft);
        !           773:   readerInit(&right, pRight);
        !           774:   docidLeft = nextDocid(&left);
        !           775:   docidRight = nextDocid(&right);
        !           776: 
        !           777:   while( docidLeft>0 && docidRight>0 ){
        !           778:     if( docidLeft<docidRight ){
        !           779:       docidLeft = nextDocid(&left);
        !           780:     }else if( docidRight<docidLeft ){
        !           781:       docidRight = nextDocid(&right);
        !           782:     }else{
        !           783:       docListAddDocid(pOut, docidLeft);
        !           784:       docidLeft = nextDocid(&left);
        !           785:       docidRight = nextDocid(&right);
        !           786:     }
        !           787:   }
        !           788: }
        !           789: 
        !           790: /* We have two doclists:  pLeft and pRight.
        !           791: ** Write the union of these two doclists into pOut.
        !           792: ** Only docids are matched.  Position information is ignored.
        !           793: **
        !           794: ** The output pOut never holds positions.
        !           795: */
        !           796: static void docListOrMerge(
        !           797:   DocList *pLeft,    /* Doclist resulting from the words on the left */
        !           798:   DocList *pRight,   /* Doclist for the next word to the right */
        !           799:   DocList *pOut      /* Write the combined doclist here */
        !           800: ){
        !           801:   DocListReader left, right;
        !           802:   sqlite_int64 docidLeft, docidRight, priorLeft;
        !           803: 
        !           804:   readerInit(&left, pLeft);
        !           805:   readerInit(&right, pRight);
        !           806:   docidLeft = nextDocid(&left);
        !           807:   docidRight = nextDocid(&right);
        !           808: 
        !           809:   while( docidLeft>0 && docidRight>0 ){
        !           810:     if( docidLeft<=docidRight ){
        !           811:       docListAddDocid(pOut, docidLeft);
        !           812:     }else{
        !           813:       docListAddDocid(pOut, docidRight);
        !           814:     }
        !           815:     priorLeft = docidLeft;
        !           816:     if( docidLeft<=docidRight ){
        !           817:       docidLeft = nextDocid(&left);
        !           818:     }
        !           819:     if( docidRight>0 && docidRight<=priorLeft ){
        !           820:       docidRight = nextDocid(&right);
        !           821:     }
        !           822:   }
        !           823:   while( docidLeft>0 ){
        !           824:     docListAddDocid(pOut, docidLeft);
        !           825:     docidLeft = nextDocid(&left);
        !           826:   }
        !           827:   while( docidRight>0 ){
        !           828:     docListAddDocid(pOut, docidRight);
        !           829:     docidRight = nextDocid(&right);
        !           830:   }
        !           831: }
        !           832: 
        !           833: /* We have two doclists:  pLeft and pRight.
        !           834: ** Write into pOut all documents that occur in pLeft but not
        !           835: ** in pRight.
        !           836: **
        !           837: ** Only docids are matched.  Position information is ignored.
        !           838: **
        !           839: ** The output pOut never holds positions.
        !           840: */
        !           841: static void docListExceptMerge(
        !           842:   DocList *pLeft,    /* Doclist resulting from the words on the left */
        !           843:   DocList *pRight,   /* Doclist for the next word to the right */
        !           844:   DocList *pOut      /* Write the combined doclist here */
        !           845: ){
        !           846:   DocListReader left, right;
        !           847:   sqlite_int64 docidLeft, docidRight, priorLeft;
        !           848: 
        !           849:   readerInit(&left, pLeft);
        !           850:   readerInit(&right, pRight);
        !           851:   docidLeft = nextDocid(&left);
        !           852:   docidRight = nextDocid(&right);
        !           853: 
        !           854:   while( docidLeft>0 && docidRight>0 ){
        !           855:     priorLeft = docidLeft;
        !           856:     if( docidLeft<docidRight ){
        !           857:       docListAddDocid(pOut, docidLeft);
        !           858:     }
        !           859:     if( docidLeft<=docidRight ){
        !           860:       docidLeft = nextDocid(&left);
        !           861:     }
        !           862:     if( docidRight>0 && docidRight<=priorLeft ){
        !           863:       docidRight = nextDocid(&right);
        !           864:     }
        !           865:   }
        !           866:   while( docidLeft>0 ){
        !           867:     docListAddDocid(pOut, docidLeft);
        !           868:     docidLeft = nextDocid(&left);
        !           869:   }
        !           870: }
        !           871: 
        !           872: static char *string_dup_n(const char *s, int n){
        !           873:   char *str = malloc(n + 1);
        !           874:   memcpy(str, s, n);
        !           875:   str[n] = '\0';
        !           876:   return str;
        !           877: }
        !           878: 
        !           879: /* Duplicate a string; the caller must free() the returned string.
        !           880:  * (We don't use strdup() since it is not part of the standard C library and
        !           881:  * may not be available everywhere.) */
        !           882: static char *string_dup(const char *s){
        !           883:   return string_dup_n(s, strlen(s));
        !           884: }
        !           885: 
        !           886: /* Format a string, replacing each occurrence of the % character with
        !           887:  * zDb.zName.  This may be more convenient than sqlite_mprintf()
        !           888:  * when one string is used repeatedly in a format string.
        !           889:  * The caller must free() the returned string. */
        !           890: static char *string_format(const char *zFormat,
        !           891:                            const char *zDb, const char *zName){
        !           892:   const char *p;
        !           893:   size_t len = 0;
        !           894:   size_t nDb = strlen(zDb);
        !           895:   size_t nName = strlen(zName);
        !           896:   size_t nFullTableName = nDb+1+nName;
        !           897:   char *result;
        !           898:   char *r;
        !           899: 
        !           900:   /* first compute length needed */
        !           901:   for(p = zFormat ; *p ; ++p){
        !           902:     len += (*p=='%' ? nFullTableName : 1);
        !           903:   }
        !           904:   len += 1;  /* for null terminator */
        !           905: 
        !           906:   r = result = malloc(len);
        !           907:   for(p = zFormat; *p; ++p){
        !           908:     if( *p=='%' ){
        !           909:       memcpy(r, zDb, nDb);
        !           910:       r += nDb;
        !           911:       *r++ = '.';
        !           912:       memcpy(r, zName, nName);
        !           913:       r += nName;
        !           914:     } else {
        !           915:       *r++ = *p;
        !           916:     }
        !           917:   }
        !           918:   *r++ = '\0';
        !           919:   assert( r == result + len );
        !           920:   return result;
        !           921: }
        !           922: 
        !           923: static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
        !           924:                     const char *zFormat){
        !           925:   char *zCommand = string_format(zFormat, zDb, zName);
        !           926:   int rc;
        !           927:   TRACE(("FTS1 sql: %s\n", zCommand));
        !           928:   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
        !           929:   free(zCommand);
        !           930:   return rc;
        !           931: }
        !           932: 
        !           933: static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
        !           934:                        sqlite3_stmt **ppStmt, const char *zFormat){
        !           935:   char *zCommand = string_format(zFormat, zDb, zName);
        !           936:   int rc;
        !           937:   TRACE(("FTS1 prepare: %s\n", zCommand));
        !           938:   rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
        !           939:   free(zCommand);
        !           940:   return rc;
        !           941: }
        !           942: 
        !           943: /* end utility functions */
        !           944: 
        !           945: /* Forward reference */
        !           946: typedef struct fulltext_vtab fulltext_vtab;
        !           947: 
        !           948: /* A single term in a query is represented by an instances of
        !           949: ** the following structure.
        !           950: */
        !           951: typedef struct QueryTerm {
        !           952:   short int nPhrase; /* How many following terms are part of the same phrase */
        !           953:   short int iPhrase; /* This is the i-th term of a phrase. */
        !           954:   short int iColumn; /* Column of the index that must match this term */
        !           955:   signed char isOr;  /* this term is preceded by "OR" */
        !           956:   signed char isNot; /* this term is preceded by "-" */
        !           957:   char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
        !           958:   int nTerm;         /* Number of bytes in pTerm[] */
        !           959: } QueryTerm;
        !           960: 
        !           961: 
        !           962: /* A query string is parsed into a Query structure.
        !           963:  *
        !           964:  * We could, in theory, allow query strings to be complicated
        !           965:  * nested expressions with precedence determined by parentheses.
        !           966:  * But none of the major search engines do this.  (Perhaps the
        !           967:  * feeling is that an parenthesized expression is two complex of
        !           968:  * an idea for the average user to grasp.)  Taking our lead from
        !           969:  * the major search engines, we will allow queries to be a list
        !           970:  * of terms (with an implied AND operator) or phrases in double-quotes,
        !           971:  * with a single optional "-" before each non-phrase term to designate
        !           972:  * negation and an optional OR connector.
        !           973:  *
        !           974:  * OR binds more tightly than the implied AND, which is what the
        !           975:  * major search engines seem to do.  So, for example:
        !           976:  * 
        !           977:  *    [one two OR three]     ==>    one AND (two OR three)
        !           978:  *    [one OR two three]     ==>    (one OR two) AND three
        !           979:  *
        !           980:  * A "-" before a term matches all entries that lack that term.
        !           981:  * The "-" must occur immediately before the term with in intervening
        !           982:  * space.  This is how the search engines do it.
        !           983:  *
        !           984:  * A NOT term cannot be the right-hand operand of an OR.  If this
        !           985:  * occurs in the query string, the NOT is ignored:
        !           986:  *
        !           987:  *    [one OR -two]          ==>    one OR two
        !           988:  *
        !           989:  */
        !           990: typedef struct Query {
        !           991:   fulltext_vtab *pFts;  /* The full text index */
        !           992:   int nTerms;           /* Number of terms in the query */
        !           993:   QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
        !           994:   int nextIsOr;         /* Set the isOr flag on the next inserted term */
        !           995:   int nextColumn;       /* Next word parsed must be in this column */
        !           996:   int dfltColumn;       /* The default column */
        !           997: } Query;
        !           998: 
        !           999: 
        !          1000: /*
        !          1001: ** An instance of the following structure keeps track of generated
        !          1002: ** matching-word offset information and snippets.
        !          1003: */
        !          1004: typedef struct Snippet {
        !          1005:   int nMatch;     /* Total number of matches */
        !          1006:   int nAlloc;     /* Space allocated for aMatch[] */
        !          1007:   struct snippetMatch { /* One entry for each matching term */
        !          1008:     char snStatus;       /* Status flag for use while constructing snippets */
        !          1009:     short int iCol;      /* The column that contains the match */
        !          1010:     short int iTerm;     /* The index in Query.pTerms[] of the matching term */
        !          1011:     short int nByte;     /* Number of bytes in the term */
        !          1012:     int iStart;          /* The offset to the first character of the term */
        !          1013:   } *aMatch;      /* Points to space obtained from malloc */
        !          1014:   char *zOffset;  /* Text rendering of aMatch[] */
        !          1015:   int nOffset;    /* strlen(zOffset) */
        !          1016:   char *zSnippet; /* Snippet text */
        !          1017:   int nSnippet;   /* strlen(zSnippet) */
        !          1018: } Snippet;
        !          1019: 
        !          1020: 
        !          1021: typedef enum QueryType {
        !          1022:   QUERY_GENERIC,   /* table scan */
        !          1023:   QUERY_ROWID,     /* lookup by rowid */
        !          1024:   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
        !          1025: } QueryType;
        !          1026: 
        !          1027: /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
        !          1028: ** before we start aggregating into larger segments.  Lower CHUNK_MAX
        !          1029: ** means that for a given input we have more individual segments per
        !          1030: ** term, which means more rows in the table and a bigger index (due to
        !          1031: ** both more rows and bigger rowids).  But it also reduces the average
        !          1032: ** cost of adding new elements to the segment 0 doclist, and it seems
        !          1033: ** to reduce the number of pages read and written during inserts.  256
        !          1034: ** was chosen by measuring insertion times for a certain input (first
        !          1035: ** 10k documents of Enron corpus), though including query performance
        !          1036: ** in the decision may argue for a larger value.
        !          1037: */
        !          1038: #define CHUNK_MAX 256
        !          1039: 
        !          1040: typedef enum fulltext_statement {
        !          1041:   CONTENT_INSERT_STMT,
        !          1042:   CONTENT_SELECT_STMT,
        !          1043:   CONTENT_UPDATE_STMT,
        !          1044:   CONTENT_DELETE_STMT,
        !          1045: 
        !          1046:   TERM_SELECT_STMT,
        !          1047:   TERM_SELECT_ALL_STMT,
        !          1048:   TERM_INSERT_STMT,
        !          1049:   TERM_UPDATE_STMT,
        !          1050:   TERM_DELETE_STMT,
        !          1051: 
        !          1052:   MAX_STMT                     /* Always at end! */
        !          1053: } fulltext_statement;
        !          1054: 
        !          1055: /* These must exactly match the enum above. */
        !          1056: /* TODO(adam): Is there some risk that a statement (in particular,
        !          1057: ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
        !          1058: ** query joins a virtual table to itself?  If so perhaps we should
        !          1059: ** move some of these to the cursor object.
        !          1060: */
        !          1061: static const char *const fulltext_zStatement[MAX_STMT] = {
        !          1062:   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
        !          1063:   /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
        !          1064:   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
        !          1065:   /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
        !          1066: 
        !          1067:   /* TERM_SELECT */
        !          1068:   "select rowid, doclist from %_term where term = ? and segment = ?",
        !          1069:   /* TERM_SELECT_ALL */
        !          1070:   "select doclist from %_term where term = ? order by segment",
        !          1071:   /* TERM_INSERT */
        !          1072:   "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
        !          1073:   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
        !          1074:   /* TERM_DELETE */ "delete from %_term where rowid = ?",
        !          1075: };
        !          1076: 
        !          1077: /*
        !          1078: ** A connection to a fulltext index is an instance of the following
        !          1079: ** structure.  The xCreate and xConnect methods create an instance
        !          1080: ** of this structure and xDestroy and xDisconnect free that instance.
        !          1081: ** All other methods receive a pointer to the structure as one of their
        !          1082: ** arguments.
        !          1083: */
        !          1084: struct fulltext_vtab {
        !          1085:   sqlite3_vtab base;               /* Base class used by SQLite core */
        !          1086:   sqlite3 *db;                     /* The database connection */
        !          1087:   const char *zDb;                 /* logical database name */
        !          1088:   const char *zName;               /* virtual table name */
        !          1089:   int nColumn;                     /* number of columns in virtual table */
        !          1090:   char **azColumn;                 /* column names.  malloced */
        !          1091:   char **azContentColumn;          /* column names in content table; malloced */
        !          1092:   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
        !          1093: 
        !          1094:   /* Precompiled statements which we keep as long as the table is
        !          1095:   ** open.
        !          1096:   */
        !          1097:   sqlite3_stmt *pFulltextStatements[MAX_STMT];
        !          1098: };
        !          1099: 
        !          1100: /*
        !          1101: ** When the core wants to do a query, it create a cursor using a
        !          1102: ** call to xOpen.  This structure is an instance of a cursor.  It
        !          1103: ** is destroyed by xClose.
        !          1104: */
        !          1105: typedef struct fulltext_cursor {
        !          1106:   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
        !          1107:   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
        !          1108:   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
        !          1109:   int eof;                         /* True if at End Of Results */
        !          1110:   Query q;                         /* Parsed query string */
        !          1111:   Snippet snippet;                 /* Cached snippet for the current row */
        !          1112:   int iColumn;                     /* Column being searched */
        !          1113:   DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
        !          1114: } fulltext_cursor;
        !          1115: 
        !          1116: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
        !          1117:   return (fulltext_vtab *) c->base.pVtab;
        !          1118: }
        !          1119: 
        !          1120: static const sqlite3_module fulltextModule;   /* forward declaration */
        !          1121: 
        !          1122: /* Append a list of strings separated by commas to a StringBuffer. */
        !          1123: static void appendList(StringBuffer *sb, int nString, char **azString){
        !          1124:   int i;
        !          1125:   for(i=0; i<nString; ++i){
        !          1126:     if( i>0 ) append(sb, ", ");
        !          1127:     append(sb, azString[i]);
        !          1128:   }
        !          1129: }
        !          1130: 
        !          1131: /* Return a dynamically generated statement of the form
        !          1132:  *   insert into %_content (rowid, ...) values (?, ...)
        !          1133:  */
        !          1134: static const char *contentInsertStatement(fulltext_vtab *v){
        !          1135:   StringBuffer sb;
        !          1136:   int i;
        !          1137: 
        !          1138:   initStringBuffer(&sb);
        !          1139:   append(&sb, "insert into %_content (rowid, ");
        !          1140:   appendList(&sb, v->nColumn, v->azContentColumn);
        !          1141:   append(&sb, ") values (?");
        !          1142:   for(i=0; i<v->nColumn; ++i)
        !          1143:     append(&sb, ", ?");
        !          1144:   append(&sb, ")");
        !          1145:   return sb.s;
        !          1146: }
        !          1147: 
        !          1148: /* Return a dynamically generated statement of the form
        !          1149:  *   update %_content set [col_0] = ?, [col_1] = ?, ...
        !          1150:  *                    where rowid = ?
        !          1151:  */
        !          1152: static const char *contentUpdateStatement(fulltext_vtab *v){
        !          1153:   StringBuffer sb;
        !          1154:   int i;
        !          1155: 
        !          1156:   initStringBuffer(&sb);
        !          1157:   append(&sb, "update %_content set ");
        !          1158:   for(i=0; i<v->nColumn; ++i) {
        !          1159:     if( i>0 ){
        !          1160:       append(&sb, ", ");
        !          1161:     }
        !          1162:     append(&sb, v->azContentColumn[i]);
        !          1163:     append(&sb, " = ?");
        !          1164:   }
        !          1165:   append(&sb, " where rowid = ?");
        !          1166:   return sb.s;
        !          1167: }
        !          1168: 
        !          1169: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
        !          1170: ** If the indicated statement has never been prepared, it is prepared
        !          1171: ** and cached, otherwise the cached version is reset.
        !          1172: */
        !          1173: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
        !          1174:                              sqlite3_stmt **ppStmt){
        !          1175:   assert( iStmt<MAX_STMT );
        !          1176:   if( v->pFulltextStatements[iStmt]==NULL ){
        !          1177:     const char *zStmt;
        !          1178:     int rc;
        !          1179:     switch( iStmt ){
        !          1180:       case CONTENT_INSERT_STMT:
        !          1181:         zStmt = contentInsertStatement(v); break;
        !          1182:       case CONTENT_UPDATE_STMT:
        !          1183:         zStmt = contentUpdateStatement(v); break;
        !          1184:       default:
        !          1185:         zStmt = fulltext_zStatement[iStmt];
        !          1186:     }
        !          1187:     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
        !          1188:                          zStmt);
        !          1189:     if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
        !          1190:     if( rc!=SQLITE_OK ) return rc;
        !          1191:   } else {
        !          1192:     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
        !          1193:     if( rc!=SQLITE_OK ) return rc;
        !          1194:   }
        !          1195: 
        !          1196:   *ppStmt = v->pFulltextStatements[iStmt];
        !          1197:   return SQLITE_OK;
        !          1198: }
        !          1199: 
        !          1200: /* Step the indicated statement, handling errors SQLITE_BUSY (by
        !          1201: ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
        !          1202: ** bindings to the new statement).
        !          1203: ** TODO(adam): We should extend this function so that it can work with
        !          1204: ** statements declared locally, not only globally cached statements.
        !          1205: */
        !          1206: static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
        !          1207:                               sqlite3_stmt **ppStmt){
        !          1208:   int rc;
        !          1209:   sqlite3_stmt *s = *ppStmt;
        !          1210:   assert( iStmt<MAX_STMT );
        !          1211:   assert( s==v->pFulltextStatements[iStmt] );
        !          1212: 
        !          1213:   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
        !          1214:     if( rc==SQLITE_BUSY ) continue;
        !          1215:     if( rc!=SQLITE_ERROR ) return rc;
        !          1216: 
        !          1217:     /* If an SQLITE_SCHEMA error has occurred, then finalizing this
        !          1218:      * statement is going to delete the fulltext_vtab structure. If
        !          1219:      * the statement just executed is in the pFulltextStatements[]
        !          1220:      * array, it will be finalized twice. So remove it before
        !          1221:      * calling sqlite3_finalize().
        !          1222:      */
        !          1223:     v->pFulltextStatements[iStmt] = NULL;
        !          1224:     rc = sqlite3_finalize(s);
        !          1225:     break;
        !          1226:   }
        !          1227:   return rc;
        !          1228: 
        !          1229:  err:
        !          1230:   sqlite3_finalize(s);
        !          1231:   return rc;
        !          1232: }
        !          1233: 
        !          1234: /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
        !          1235: ** Useful for statements like UPDATE, where we expect no results.
        !          1236: */
        !          1237: static int sql_single_step_statement(fulltext_vtab *v,
        !          1238:                                      fulltext_statement iStmt,
        !          1239:                                      sqlite3_stmt **ppStmt){
        !          1240:   int rc = sql_step_statement(v, iStmt, ppStmt);
        !          1241:   return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
        !          1242: }
        !          1243: 
        !          1244: /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
        !          1245: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
        !          1246:                           sqlite3_value **pValues){
        !          1247:   sqlite3_stmt *s;
        !          1248:   int i;
        !          1249:   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
        !          1250:   if( rc!=SQLITE_OK ) return rc;
        !          1251: 
        !          1252:   rc = sqlite3_bind_value(s, 1, rowid);
        !          1253:   if( rc!=SQLITE_OK ) return rc;
        !          1254: 
        !          1255:   for(i=0; i<v->nColumn; ++i){
        !          1256:     rc = sqlite3_bind_value(s, 2+i, pValues[i]);
        !          1257:     if( rc!=SQLITE_OK ) return rc;
        !          1258:   }
        !          1259: 
        !          1260:   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
        !          1261: }
        !          1262: 
        !          1263: /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
        !          1264:  *                  where rowid = [iRowid] */
        !          1265: static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
        !          1266:                           sqlite_int64 iRowid){
        !          1267:   sqlite3_stmt *s;
        !          1268:   int i;
        !          1269:   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
        !          1270:   if( rc!=SQLITE_OK ) return rc;
        !          1271: 
        !          1272:   for(i=0; i<v->nColumn; ++i){
        !          1273:     rc = sqlite3_bind_value(s, 1+i, pValues[i]);
        !          1274:     if( rc!=SQLITE_OK ) return rc;
        !          1275:   }
        !          1276: 
        !          1277:   rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
        !          1278:   if( rc!=SQLITE_OK ) return rc;
        !          1279: 
        !          1280:   return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
        !          1281: }
        !          1282: 
        !          1283: static void freeStringArray(int nString, const char **pString){
        !          1284:   int i;
        !          1285: 
        !          1286:   for (i=0 ; i < nString ; ++i) {
        !          1287:     if( pString[i]!=NULL ) free((void *) pString[i]);
        !          1288:   }
        !          1289:   free((void *) pString);
        !          1290: }
        !          1291: 
        !          1292: /* select * from %_content where rowid = [iRow]
        !          1293:  * The caller must delete the returned array and all strings in it.
        !          1294:  * null fields will be NULL in the returned array.
        !          1295:  *
        !          1296:  * TODO: Perhaps we should return pointer/length strings here for consistency
        !          1297:  * with other code which uses pointer/length. */
        !          1298: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
        !          1299:                           const char ***pValues){
        !          1300:   sqlite3_stmt *s;
        !          1301:   const char **values;
        !          1302:   int i;
        !          1303:   int rc;
        !          1304: 
        !          1305:   *pValues = NULL;
        !          1306: 
        !          1307:   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
        !          1308:   if( rc!=SQLITE_OK ) return rc;
        !          1309: 
        !          1310:   rc = sqlite3_bind_int64(s, 1, iRow);
        !          1311:   if( rc!=SQLITE_OK ) return rc;
        !          1312: 
        !          1313:   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
        !          1314:   if( rc!=SQLITE_ROW ) return rc;
        !          1315: 
        !          1316:   values = (const char **) malloc(v->nColumn * sizeof(const char *));
        !          1317:   for(i=0; i<v->nColumn; ++i){
        !          1318:     if( sqlite3_column_type(s, i)==SQLITE_NULL ){
        !          1319:       values[i] = NULL;
        !          1320:     }else{
        !          1321:       values[i] = string_dup((char*)sqlite3_column_text(s, i));
        !          1322:     }
        !          1323:   }
        !          1324: 
        !          1325:   /* We expect only one row.  We must execute another sqlite3_step()
        !          1326:    * to complete the iteration; otherwise the table will remain locked. */
        !          1327:   rc = sqlite3_step(s);
        !          1328:   if( rc==SQLITE_DONE ){
        !          1329:     *pValues = values;
        !          1330:     return SQLITE_OK;
        !          1331:   }
        !          1332: 
        !          1333:   freeStringArray(v->nColumn, values);
        !          1334:   return rc;
        !          1335: }
        !          1336: 
        !          1337: /* delete from %_content where rowid = [iRow ] */
        !          1338: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
        !          1339:   sqlite3_stmt *s;
        !          1340:   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
        !          1341:   if( rc!=SQLITE_OK ) return rc;
        !          1342: 
        !          1343:   rc = sqlite3_bind_int64(s, 1, iRow);
        !          1344:   if( rc!=SQLITE_OK ) return rc;
        !          1345: 
        !          1346:   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
        !          1347: }
        !          1348: 
        !          1349: /* select rowid, doclist from %_term
        !          1350:  *  where term = [pTerm] and segment = [iSegment]
        !          1351:  * If found, returns SQLITE_ROW; the caller must free the
        !          1352:  * returned doclist.  If no rows found, returns SQLITE_DONE. */
        !          1353: static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
        !          1354:                        int iSegment,
        !          1355:                        sqlite_int64 *rowid, DocList *out){
        !          1356:   sqlite3_stmt *s;
        !          1357:   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
        !          1358:   if( rc!=SQLITE_OK ) return rc;
        !          1359: 
        !          1360:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
        !          1361:   if( rc!=SQLITE_OK ) return rc;
        !          1362: 
        !          1363:   rc = sqlite3_bind_int(s, 2, iSegment);
        !          1364:   if( rc!=SQLITE_OK ) return rc;
        !          1365: 
        !          1366:   rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
        !          1367:   if( rc!=SQLITE_ROW ) return rc;
        !          1368: 
        !          1369:   *rowid = sqlite3_column_int64(s, 0);
        !          1370:   docListInit(out, DL_DEFAULT,
        !          1371:               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
        !          1372: 
        !          1373:   /* We expect only one row.  We must execute another sqlite3_step()
        !          1374:    * to complete the iteration; otherwise the table will remain locked. */
        !          1375:   rc = sqlite3_step(s);
        !          1376:   return rc==SQLITE_DONE ? SQLITE_ROW : rc;
        !          1377: }
        !          1378: 
        !          1379: /* Load the segment doclists for term pTerm and merge them in
        !          1380: ** appropriate order into out.  Returns SQLITE_OK if successful.  If
        !          1381: ** there are no segments for pTerm, successfully returns an empty
        !          1382: ** doclist in out.
        !          1383: **
        !          1384: ** Each document consists of 1 or more "columns".  The number of
        !          1385: ** columns is v->nColumn.  If iColumn==v->nColumn, then return
        !          1386: ** position information about all columns.  If iColumn<v->nColumn,
        !          1387: ** then only return position information about the iColumn-th column
        !          1388: ** (where the first column is 0).
        !          1389: */
        !          1390: static int term_select_all(
        !          1391:   fulltext_vtab *v,     /* The fulltext index we are querying against */
        !          1392:   int iColumn,          /* If <nColumn, only look at the iColumn-th column */
        !          1393:   const char *pTerm,    /* The term whose posting lists we want */
        !          1394:   int nTerm,            /* Number of bytes in pTerm */
        !          1395:   DocList *out          /* Write the resulting doclist here */
        !          1396: ){
        !          1397:   DocList doclist;
        !          1398:   sqlite3_stmt *s;
        !          1399:   int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
        !          1400:   if( rc!=SQLITE_OK ) return rc;
        !          1401: 
        !          1402:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
        !          1403:   if( rc!=SQLITE_OK ) return rc;
        !          1404: 
        !          1405:   docListInit(&doclist, DL_DEFAULT, 0, 0);
        !          1406: 
        !          1407:   /* TODO(shess) Handle schema and busy errors. */
        !          1408:   while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
        !          1409:     DocList old;
        !          1410: 
        !          1411:     /* TODO(shess) If we processed doclists from oldest to newest, we
        !          1412:     ** could skip the malloc() involved with the following call.  For
        !          1413:     ** now, I'd rather keep this logic similar to index_insert_term().
        !          1414:     ** We could additionally drop elements when we see deletes, but
        !          1415:     ** that would require a distinct version of docListAccumulate().
        !          1416:     */
        !          1417:     docListInit(&old, DL_DEFAULT,
        !          1418:                 sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
        !          1419: 
        !          1420:     if( iColumn<v->nColumn ){   /* querying a single column */
        !          1421:       docListRestrictColumn(&old, iColumn);
        !          1422:     }
        !          1423: 
        !          1424:     /* doclist contains the newer data, so write it over old.  Then
        !          1425:     ** steal accumulated result for doclist.
        !          1426:     */
        !          1427:     docListAccumulate(&old, &doclist);
        !          1428:     docListDestroy(&doclist);
        !          1429:     doclist = old;
        !          1430:   }
        !          1431:   if( rc!=SQLITE_DONE ){
        !          1432:     docListDestroy(&doclist);
        !          1433:     return rc;
        !          1434:   }
        !          1435: 
        !          1436:   docListDiscardEmpty(&doclist);
        !          1437:   *out = doclist;
        !          1438:   return SQLITE_OK;
        !          1439: }
        !          1440: 
        !          1441: /* insert into %_term (rowid, term, segment, doclist)
        !          1442:                values ([piRowid], [pTerm], [iSegment], [doclist])
        !          1443: ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
        !          1444: **
        !          1445: ** NOTE(shess) piRowid is IN, with values of "space of int64" plus
        !          1446: ** null, it is not used to pass data back to the caller.
        !          1447: */
        !          1448: static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
        !          1449:                        const char *pTerm, int nTerm,
        !          1450:                        int iSegment, DocList *doclist){
        !          1451:   sqlite3_stmt *s;
        !          1452:   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
        !          1453:   if( rc!=SQLITE_OK ) return rc;
        !          1454: 
        !          1455:   if( piRowid==NULL ){
        !          1456:     rc = sqlite3_bind_null(s, 1);
        !          1457:   }else{
        !          1458:     rc = sqlite3_bind_int64(s, 1, *piRowid);
        !          1459:   }
        !          1460:   if( rc!=SQLITE_OK ) return rc;
        !          1461: 
        !          1462:   rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
        !          1463:   if( rc!=SQLITE_OK ) return rc;
        !          1464: 
        !          1465:   rc = sqlite3_bind_int(s, 3, iSegment);
        !          1466:   if( rc!=SQLITE_OK ) return rc;
        !          1467: 
        !          1468:   rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
        !          1469:   if( rc!=SQLITE_OK ) return rc;
        !          1470: 
        !          1471:   return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
        !          1472: }
        !          1473: 
        !          1474: /* update %_term set doclist = [doclist] where rowid = [rowid] */
        !          1475: static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
        !          1476:                        DocList *doclist){
        !          1477:   sqlite3_stmt *s;
        !          1478:   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
        !          1479:   if( rc!=SQLITE_OK ) return rc;
        !          1480: 
        !          1481:   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
        !          1482:   if( rc!=SQLITE_OK ) return rc;
        !          1483: 
        !          1484:   rc = sqlite3_bind_int64(s, 2, rowid);
        !          1485:   if( rc!=SQLITE_OK ) return rc;
        !          1486: 
        !          1487:   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
        !          1488: }
        !          1489: 
        !          1490: static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
        !          1491:   sqlite3_stmt *s;
        !          1492:   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
        !          1493:   if( rc!=SQLITE_OK ) return rc;
        !          1494: 
        !          1495:   rc = sqlite3_bind_int64(s, 1, rowid);
        !          1496:   if( rc!=SQLITE_OK ) return rc;
        !          1497: 
        !          1498:   return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
        !          1499: }
        !          1500: 
        !          1501: /*
        !          1502: ** Free the memory used to contain a fulltext_vtab structure.
        !          1503: */
        !          1504: static void fulltext_vtab_destroy(fulltext_vtab *v){
        !          1505:   int iStmt, i;
        !          1506: 
        !          1507:   TRACE(("FTS1 Destroy %p\n", v));
        !          1508:   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
        !          1509:     if( v->pFulltextStatements[iStmt]!=NULL ){
        !          1510:       sqlite3_finalize(v->pFulltextStatements[iStmt]);
        !          1511:       v->pFulltextStatements[iStmt] = NULL;
        !          1512:     }
        !          1513:   }
        !          1514: 
        !          1515:   if( v->pTokenizer!=NULL ){
        !          1516:     v->pTokenizer->pModule->xDestroy(v->pTokenizer);
        !          1517:     v->pTokenizer = NULL;
        !          1518:   }
        !          1519:   
        !          1520:   free(v->azColumn);
        !          1521:   for(i = 0; i < v->nColumn; ++i) {
        !          1522:     sqlite3_free(v->azContentColumn[i]);
        !          1523:   }
        !          1524:   free(v->azContentColumn);
        !          1525:   free(v);
        !          1526: }
        !          1527: 
        !          1528: /*
        !          1529: ** Token types for parsing the arguments to xConnect or xCreate.
        !          1530: */
        !          1531: #define TOKEN_EOF         0    /* End of file */
        !          1532: #define TOKEN_SPACE       1    /* Any kind of whitespace */
        !          1533: #define TOKEN_ID          2    /* An identifier */
        !          1534: #define TOKEN_STRING      3    /* A string literal */
        !          1535: #define TOKEN_PUNCT       4    /* A single punctuation character */
        !          1536: 
        !          1537: /*
        !          1538: ** If X is a character that can be used in an identifier then
        !          1539: ** IdChar(X) will be true.  Otherwise it is false.
        !          1540: **
        !          1541: ** For ASCII, any character with the high-order bit set is
        !          1542: ** allowed in an identifier.  For 7-bit characters, 
        !          1543: ** sqlite3IsIdChar[X] must be 1.
        !          1544: **
        !          1545: ** Ticket #1066.  the SQL standard does not allow '$' in the
        !          1546: ** middle of identfiers.  But many SQL implementations do. 
        !          1547: ** SQLite will allow '$' in identifiers for compatibility.
        !          1548: ** But the feature is undocumented.
        !          1549: */
        !          1550: static const char isIdChar[] = {
        !          1551: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
        !          1552:     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
        !          1553:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
        !          1554:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
        !          1555:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
        !          1556:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
        !          1557:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
        !          1558: };
        !          1559: #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
        !          1560: 
        !          1561: 
        !          1562: /*
        !          1563: ** Return the length of the token that begins at z[0]. 
        !          1564: ** Store the token type in *tokenType before returning.
        !          1565: */
        !          1566: static int getToken(const char *z, int *tokenType){
        !          1567:   int i, c;
        !          1568:   switch( *z ){
        !          1569:     case 0: {
        !          1570:       *tokenType = TOKEN_EOF;
        !          1571:       return 0;
        !          1572:     }
        !          1573:     case ' ': case '\t': case '\n': case '\f': case '\r': {
        !          1574:       for(i=1; safe_isspace(z[i]); i++){}
        !          1575:       *tokenType = TOKEN_SPACE;
        !          1576:       return i;
        !          1577:     }
        !          1578:     case '`':
        !          1579:     case '\'':
        !          1580:     case '"': {
        !          1581:       int delim = z[0];
        !          1582:       for(i=1; (c=z[i])!=0; i++){
        !          1583:         if( c==delim ){
        !          1584:           if( z[i+1]==delim ){
        !          1585:             i++;
        !          1586:           }else{
        !          1587:             break;
        !          1588:           }
        !          1589:         }
        !          1590:       }
        !          1591:       *tokenType = TOKEN_STRING;
        !          1592:       return i + (c!=0);
        !          1593:     }
        !          1594:     case '[': {
        !          1595:       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
        !          1596:       *tokenType = TOKEN_ID;
        !          1597:       return i;
        !          1598:     }
        !          1599:     default: {
        !          1600:       if( !IdChar(*z) ){
        !          1601:         break;
        !          1602:       }
        !          1603:       for(i=1; IdChar(z[i]); i++){}
        !          1604:       *tokenType = TOKEN_ID;
        !          1605:       return i;
        !          1606:     }
        !          1607:   }
        !          1608:   *tokenType = TOKEN_PUNCT;
        !          1609:   return 1;
        !          1610: }
        !          1611: 
        !          1612: /*
        !          1613: ** A token extracted from a string is an instance of the following
        !          1614: ** structure.
        !          1615: */
        !          1616: typedef struct Token {
        !          1617:   const char *z;       /* Pointer to token text.  Not '\000' terminated */
        !          1618:   short int n;         /* Length of the token text in bytes. */
        !          1619: } Token;
        !          1620: 
        !          1621: /*
        !          1622: ** Given a input string (which is really one of the argv[] parameters
        !          1623: ** passed into xConnect or xCreate) split the string up into tokens.
        !          1624: ** Return an array of pointers to '\000' terminated strings, one string
        !          1625: ** for each non-whitespace token.
        !          1626: **
        !          1627: ** The returned array is terminated by a single NULL pointer.
        !          1628: **
        !          1629: ** Space to hold the returned array is obtained from a single
        !          1630: ** malloc and should be freed by passing the return value to free().
        !          1631: ** The individual strings within the token list are all a part of
        !          1632: ** the single memory allocation and will all be freed at once.
        !          1633: */
        !          1634: static char **tokenizeString(const char *z, int *pnToken){
        !          1635:   int nToken = 0;
        !          1636:   Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
        !          1637:   int n = 1;
        !          1638:   int e, i;
        !          1639:   int totalSize = 0;
        !          1640:   char **azToken;
        !          1641:   char *zCopy;
        !          1642:   while( n>0 ){
        !          1643:     n = getToken(z, &e);
        !          1644:     if( e!=TOKEN_SPACE ){
        !          1645:       aToken[nToken].z = z;
        !          1646:       aToken[nToken].n = n;
        !          1647:       nToken++;
        !          1648:       totalSize += n+1;
        !          1649:     }
        !          1650:     z += n;
        !          1651:   }
        !          1652:   azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
        !          1653:   zCopy = (char*)&azToken[nToken];
        !          1654:   nToken--;
        !          1655:   for(i=0; i<nToken; i++){
        !          1656:     azToken[i] = zCopy;
        !          1657:     n = aToken[i].n;
        !          1658:     memcpy(zCopy, aToken[i].z, n);
        !          1659:     zCopy[n] = 0;
        !          1660:     zCopy += n+1;
        !          1661:   }
        !          1662:   azToken[nToken] = 0;
        !          1663:   free(aToken);
        !          1664:   *pnToken = nToken;
        !          1665:   return azToken;
        !          1666: }
        !          1667: 
        !          1668: /*
        !          1669: ** Convert an SQL-style quoted string into a normal string by removing
        !          1670: ** the quote characters.  The conversion is done in-place.  If the
        !          1671: ** input does not begin with a quote character, then this routine
        !          1672: ** is a no-op.
        !          1673: **
        !          1674: ** Examples:
        !          1675: **
        !          1676: **     "abc"   becomes   abc
        !          1677: **     'xyz'   becomes   xyz
        !          1678: **     [pqr]   becomes   pqr
        !          1679: **     `mno`   becomes   mno
        !          1680: */
        !          1681: static void dequoteString(char *z){
        !          1682:   int quote;
        !          1683:   int i, j;
        !          1684:   if( z==0 ) return;
        !          1685:   quote = z[0];
        !          1686:   switch( quote ){
        !          1687:     case '\'':  break;
        !          1688:     case '"':   break;
        !          1689:     case '`':   break;                /* For MySQL compatibility */
        !          1690:     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
        !          1691:     default:    return;
        !          1692:   }
        !          1693:   for(i=1, j=0; z[i]; i++){
        !          1694:     if( z[i]==quote ){
        !          1695:       if( z[i+1]==quote ){
        !          1696:         z[j++] = quote;
        !          1697:         i++;
        !          1698:       }else{
        !          1699:         z[j++] = 0;
        !          1700:         break;
        !          1701:       }
        !          1702:     }else{
        !          1703:       z[j++] = z[i];
        !          1704:     }
        !          1705:   }
        !          1706: }
        !          1707: 
        !          1708: /*
        !          1709: ** The input azIn is a NULL-terminated list of tokens.  Remove the first
        !          1710: ** token and all punctuation tokens.  Remove the quotes from
        !          1711: ** around string literal tokens.
        !          1712: **
        !          1713: ** Example:
        !          1714: **
        !          1715: **     input:      tokenize chinese ( 'simplifed' , 'mixed' )
        !          1716: **     output:     chinese simplifed mixed
        !          1717: **
        !          1718: ** Another example:
        !          1719: **
        !          1720: **     input:      delimiters ( '[' , ']' , '...' )
        !          1721: **     output:     [ ] ...
        !          1722: */
        !          1723: static void tokenListToIdList(char **azIn){
        !          1724:   int i, j;
        !          1725:   if( azIn ){
        !          1726:     for(i=0, j=-1; azIn[i]; i++){
        !          1727:       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
        !          1728:         dequoteString(azIn[i]);
        !          1729:         if( j>=0 ){
        !          1730:           azIn[j] = azIn[i];
        !          1731:         }
        !          1732:         j++;
        !          1733:       }
        !          1734:     }
        !          1735:     azIn[j] = 0;
        !          1736:   }
        !          1737: }
        !          1738: 
        !          1739: 
        !          1740: /*
        !          1741: ** Find the first alphanumeric token in the string zIn.  Null-terminate
        !          1742: ** this token.  Remove any quotation marks.  And return a pointer to
        !          1743: ** the result.
        !          1744: */
        !          1745: static char *firstToken(char *zIn, char **pzTail){
        !          1746:   int n, ttype;
        !          1747:   while(1){
        !          1748:     n = getToken(zIn, &ttype);
        !          1749:     if( ttype==TOKEN_SPACE ){
        !          1750:       zIn += n;
        !          1751:     }else if( ttype==TOKEN_EOF ){
        !          1752:       *pzTail = zIn;
        !          1753:       return 0;
        !          1754:     }else{
        !          1755:       zIn[n] = 0;
        !          1756:       *pzTail = &zIn[1];
        !          1757:       dequoteString(zIn);
        !          1758:       return zIn;
        !          1759:     }
        !          1760:   }
        !          1761:   /*NOTREACHED*/
        !          1762: }
        !          1763: 
        !          1764: /* Return true if...
        !          1765: **
        !          1766: **   *  s begins with the string t, ignoring case
        !          1767: **   *  s is longer than t
        !          1768: **   *  The first character of s beyond t is not a alphanumeric
        !          1769: ** 
        !          1770: ** Ignore leading space in *s.
        !          1771: **
        !          1772: ** To put it another way, return true if the first token of
        !          1773: ** s[] is t[].
        !          1774: */
        !          1775: static int startsWith(const char *s, const char *t){
        !          1776:   while( safe_isspace(*s) ){ s++; }
        !          1777:   while( *t ){
        !          1778:     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
        !          1779:   }
        !          1780:   return *s!='_' && !safe_isalnum(*s);
        !          1781: }
        !          1782: 
        !          1783: /*
        !          1784: ** An instance of this structure defines the "spec" of a
        !          1785: ** full text index.  This structure is populated by parseSpec
        !          1786: ** and use by fulltextConnect and fulltextCreate.
        !          1787: */
        !          1788: typedef struct TableSpec {
        !          1789:   const char *zDb;         /* Logical database name */
        !          1790:   const char *zName;       /* Name of the full-text index */
        !          1791:   int nColumn;             /* Number of columns to be indexed */
        !          1792:   char **azColumn;         /* Original names of columns to be indexed */
        !          1793:   char **azContentColumn;  /* Column names for %_content */
        !          1794:   char **azTokenizer;      /* Name of tokenizer and its arguments */
        !          1795: } TableSpec;
        !          1796: 
        !          1797: /*
        !          1798: ** Reclaim all of the memory used by a TableSpec
        !          1799: */
        !          1800: static void clearTableSpec(TableSpec *p) {
        !          1801:   free(p->azColumn);
        !          1802:   free(p->azContentColumn);
        !          1803:   free(p->azTokenizer);
        !          1804: }
        !          1805: 
        !          1806: /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
        !          1807:  *
        !          1808:  * CREATE VIRTUAL TABLE email
        !          1809:  *        USING fts1(subject, body, tokenize mytokenizer(myarg))
        !          1810:  *
        !          1811:  * We return parsed information in a TableSpec structure.
        !          1812:  * 
        !          1813:  */
        !          1814: static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
        !          1815:                      char**pzErr){
        !          1816:   int i, n;
        !          1817:   char *z, *zDummy;
        !          1818:   char **azArg;
        !          1819:   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
        !          1820: 
        !          1821:   assert( argc>=3 );
        !          1822:   /* Current interface:
        !          1823:   ** argv[0] - module name
        !          1824:   ** argv[1] - database name
        !          1825:   ** argv[2] - table name
        !          1826:   ** argv[3..] - columns, optionally followed by tokenizer specification
        !          1827:   **             and snippet delimiters specification.
        !          1828:   */
        !          1829: 
        !          1830:   /* Make a copy of the complete argv[][] array in a single allocation.
        !          1831:   ** The argv[][] array is read-only and transient.  We can write to the
        !          1832:   ** copy in order to modify things and the copy is persistent.
        !          1833:   */
        !          1834:   memset(pSpec, 0, sizeof(*pSpec));
        !          1835:   for(i=n=0; i<argc; i++){
        !          1836:     n += strlen(argv[i]) + 1;
        !          1837:   }
        !          1838:   azArg = malloc( sizeof(char*)*argc + n );
        !          1839:   if( azArg==0 ){
        !          1840:     return SQLITE_NOMEM;
        !          1841:   }
        !          1842:   z = (char*)&azArg[argc];
        !          1843:   for(i=0; i<argc; i++){
        !          1844:     azArg[i] = z;
        !          1845:     strcpy(z, argv[i]);
        !          1846:     z += strlen(z)+1;
        !          1847:   }
        !          1848: 
        !          1849:   /* Identify the column names and the tokenizer and delimiter arguments
        !          1850:   ** in the argv[][] array.
        !          1851:   */
        !          1852:   pSpec->zDb = azArg[1];
        !          1853:   pSpec->zName = azArg[2];
        !          1854:   pSpec->nColumn = 0;
        !          1855:   pSpec->azColumn = azArg;
        !          1856:   zTokenizer = "tokenize simple";
        !          1857:   for(i=3; i<argc; ++i){
        !          1858:     if( startsWith(azArg[i],"tokenize") ){
        !          1859:       zTokenizer = azArg[i];
        !          1860:     }else{
        !          1861:       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
        !          1862:       pSpec->nColumn++;
        !          1863:     }
        !          1864:   }
        !          1865:   if( pSpec->nColumn==0 ){
        !          1866:     azArg[0] = "content";
        !          1867:     pSpec->nColumn = 1;
        !          1868:   }
        !          1869: 
        !          1870:   /*
        !          1871:   ** Construct the list of content column names.
        !          1872:   **
        !          1873:   ** Each content column name will be of the form cNNAAAA
        !          1874:   ** where NN is the column number and AAAA is the sanitized
        !          1875:   ** column name.  "sanitized" means that special characters are
        !          1876:   ** converted to "_".  The cNN prefix guarantees that all column
        !          1877:   ** names are unique.
        !          1878:   **
        !          1879:   ** The AAAA suffix is not strictly necessary.  It is included
        !          1880:   ** for the convenience of people who might examine the generated
        !          1881:   ** %_content table and wonder what the columns are used for.
        !          1882:   */
        !          1883:   pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
        !          1884:   if( pSpec->azContentColumn==0 ){
        !          1885:     clearTableSpec(pSpec);
        !          1886:     return SQLITE_NOMEM;
        !          1887:   }
        !          1888:   for(i=0; i<pSpec->nColumn; i++){
        !          1889:     char *p;
        !          1890:     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
        !          1891:     for (p = pSpec->azContentColumn[i]; *p ; ++p) {
        !          1892:       if( !safe_isalnum(*p) ) *p = '_';
        !          1893:     }
        !          1894:   }
        !          1895: 
        !          1896:   /*
        !          1897:   ** Parse the tokenizer specification string.
        !          1898:   */
        !          1899:   pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
        !          1900:   tokenListToIdList(pSpec->azTokenizer);
        !          1901: 
        !          1902:   return SQLITE_OK;
        !          1903: }
        !          1904: 
        !          1905: /*
        !          1906: ** Generate a CREATE TABLE statement that describes the schema of
        !          1907: ** the virtual table.  Return a pointer to this schema string.
        !          1908: **
        !          1909: ** Space is obtained from sqlite3_mprintf() and should be freed
        !          1910: ** using sqlite3_free().
        !          1911: */
        !          1912: static char *fulltextSchema(
        !          1913:   int nColumn,                  /* Number of columns */
        !          1914:   const char *const* azColumn,  /* List of columns */
        !          1915:   const char *zTableName        /* Name of the table */
        !          1916: ){
        !          1917:   int i;
        !          1918:   char *zSchema, *zNext;
        !          1919:   const char *zSep = "(";
        !          1920:   zSchema = sqlite3_mprintf("CREATE TABLE x");
        !          1921:   for(i=0; i<nColumn; i++){
        !          1922:     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
        !          1923:     sqlite3_free(zSchema);
        !          1924:     zSchema = zNext;
        !          1925:     zSep = ",";
        !          1926:   }
        !          1927:   zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
        !          1928:   sqlite3_free(zSchema);
        !          1929:   return zNext;
        !          1930: }
        !          1931: 
        !          1932: /*
        !          1933: ** Build a new sqlite3_vtab structure that will describe the
        !          1934: ** fulltext index defined by spec.
        !          1935: */
        !          1936: static int constructVtab(
        !          1937:   sqlite3 *db,              /* The SQLite database connection */
        !          1938:   TableSpec *spec,          /* Parsed spec information from parseSpec() */
        !          1939:   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
        !          1940:   char **pzErr              /* Write any error message here */
        !          1941: ){
        !          1942:   int rc;
        !          1943:   int n;
        !          1944:   fulltext_vtab *v = 0;
        !          1945:   const sqlite3_tokenizer_module *m = NULL;
        !          1946:   char *schema;
        !          1947: 
        !          1948:   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
        !          1949:   if( v==0 ) return SQLITE_NOMEM;
        !          1950:   memset(v, 0, sizeof(*v));
        !          1951:   /* sqlite will initialize v->base */
        !          1952:   v->db = db;
        !          1953:   v->zDb = spec->zDb;       /* Freed when azColumn is freed */
        !          1954:   v->zName = spec->zName;   /* Freed when azColumn is freed */
        !          1955:   v->nColumn = spec->nColumn;
        !          1956:   v->azContentColumn = spec->azContentColumn;
        !          1957:   spec->azContentColumn = 0;
        !          1958:   v->azColumn = spec->azColumn;
        !          1959:   spec->azColumn = 0;
        !          1960: 
        !          1961:   if( spec->azTokenizer==0 ){
        !          1962:     return SQLITE_NOMEM;
        !          1963:   }
        !          1964:   /* TODO(shess) For now, add new tokenizers as else if clauses. */
        !          1965:   if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
        !          1966:     sqlite3Fts1SimpleTokenizerModule(&m);
        !          1967:   }else if( startsWith(spec->azTokenizer[0], "porter") ){
        !          1968:     sqlite3Fts1PorterTokenizerModule(&m);
        !          1969:   }else{
        !          1970:     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
        !          1971:     rc = SQLITE_ERROR;
        !          1972:     goto err;
        !          1973:   }
        !          1974:   for(n=0; spec->azTokenizer[n]; n++){}
        !          1975:   if( n ){
        !          1976:     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
        !          1977:                     &v->pTokenizer);
        !          1978:   }else{
        !          1979:     rc = m->xCreate(0, 0, &v->pTokenizer);
        !          1980:   }
        !          1981:   if( rc!=SQLITE_OK ) goto err;
        !          1982:   v->pTokenizer->pModule = m;
        !          1983: 
        !          1984:   /* TODO: verify the existence of backing tables foo_content, foo_term */
        !          1985: 
        !          1986:   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
        !          1987:                           spec->zName);
        !          1988:   rc = sqlite3_declare_vtab(db, schema);
        !          1989:   sqlite3_free(schema);
        !          1990:   if( rc!=SQLITE_OK ) goto err;
        !          1991: 
        !          1992:   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
        !          1993: 
        !          1994:   *ppVTab = &v->base;
        !          1995:   TRACE(("FTS1 Connect %p\n", v));
        !          1996: 
        !          1997:   return rc;
        !          1998: 
        !          1999: err:
        !          2000:   fulltext_vtab_destroy(v);
        !          2001:   return rc;
        !          2002: }
        !          2003: 
        !          2004: static int fulltextConnect(
        !          2005:   sqlite3 *db,
        !          2006:   void *pAux,
        !          2007:   int argc, const char *const*argv,
        !          2008:   sqlite3_vtab **ppVTab,
        !          2009:   char **pzErr
        !          2010: ){
        !          2011:   TableSpec spec;
        !          2012:   int rc = parseSpec(&spec, argc, argv, pzErr);
        !          2013:   if( rc!=SQLITE_OK ) return rc;
        !          2014: 
        !          2015:   rc = constructVtab(db, &spec, ppVTab, pzErr);
        !          2016:   clearTableSpec(&spec);
        !          2017:   return rc;
        !          2018: }
        !          2019: 
        !          2020:   /* The %_content table holds the text of each document, with
        !          2021:   ** the rowid used as the docid.
        !          2022:   **
        !          2023:   ** The %_term table maps each term to a document list blob
        !          2024:   ** containing elements sorted by ascending docid, each element
        !          2025:   ** encoded as:
        !          2026:   **
        !          2027:   **   docid varint-encoded
        !          2028:   **   token elements:
        !          2029:   **     position+1 varint-encoded as delta from previous position
        !          2030:   **     start offset varint-encoded as delta from previous start offset
        !          2031:   **     end offset varint-encoded as delta from start offset
        !          2032:   **
        !          2033:   ** The sentinel position of 0 indicates the end of the token list.
        !          2034:   **
        !          2035:   ** Additionally, doclist blobs are chunked into multiple segments,
        !          2036:   ** using segment to order the segments.  New elements are added to
        !          2037:   ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
        !          2038:   ** segment 0 is deleted, and the doclist is inserted at segment 1.
        !          2039:   ** If there is already a doclist at segment 1, the segment 0 doclist
        !          2040:   ** is merged with it, the segment 1 doclist is deleted, and the
        !          2041:   ** merged doclist is inserted at segment 2, repeating those
        !          2042:   ** operations until an insert succeeds.
        !          2043:   **
        !          2044:   ** Since this structure doesn't allow us to update elements in place
        !          2045:   ** in case of deletion or update, these are simply written to
        !          2046:   ** segment 0 (with an empty token list in case of deletion), with
        !          2047:   ** docListAccumulate() taking care to retain lower-segment
        !          2048:   ** information in preference to higher-segment information.
        !          2049:   */
        !          2050:   /* TODO(shess) Provide a VACUUM type operation which both removes
        !          2051:   ** deleted elements which are no longer necessary, and duplicated
        !          2052:   ** elements.  I suspect this will probably not be necessary in
        !          2053:   ** practice, though.
        !          2054:   */
        !          2055: static int fulltextCreate(sqlite3 *db, void *pAux,
        !          2056:                           int argc, const char * const *argv,
        !          2057:                           sqlite3_vtab **ppVTab, char **pzErr){
        !          2058:   int rc;
        !          2059:   TableSpec spec;
        !          2060:   StringBuffer schema;
        !          2061:   TRACE(("FTS1 Create\n"));
        !          2062: 
        !          2063:   rc = parseSpec(&spec, argc, argv, pzErr);
        !          2064:   if( rc!=SQLITE_OK ) return rc;
        !          2065: 
        !          2066:   initStringBuffer(&schema);
        !          2067:   append(&schema, "CREATE TABLE %_content(");
        !          2068:   appendList(&schema, spec.nColumn, spec.azContentColumn);
        !          2069:   append(&schema, ")");
        !          2070:   rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
        !          2071:   free(schema.s);
        !          2072:   if( rc!=SQLITE_OK ) goto out;
        !          2073: 
        !          2074:   rc = sql_exec(db, spec.zDb, spec.zName,
        !          2075:     "create table %_term(term text, segment integer, doclist blob, "
        !          2076:                         "primary key(term, segment));");
        !          2077:   if( rc!=SQLITE_OK ) goto out;
        !          2078: 
        !          2079:   rc = constructVtab(db, &spec, ppVTab, pzErr);
        !          2080: 
        !          2081: out:
        !          2082:   clearTableSpec(&spec);
        !          2083:   return rc;
        !          2084: }
        !          2085: 
        !          2086: /* Decide how to handle an SQL query. */
        !          2087: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
        !          2088:   int i;
        !          2089:   TRACE(("FTS1 BestIndex\n"));
        !          2090: 
        !          2091:   for(i=0; i<pInfo->nConstraint; ++i){
        !          2092:     const struct sqlite3_index_constraint *pConstraint;
        !          2093:     pConstraint = &pInfo->aConstraint[i];
        !          2094:     if( pConstraint->usable ) {
        !          2095:       if( pConstraint->iColumn==-1 &&
        !          2096:           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
        !          2097:         pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
        !          2098:         TRACE(("FTS1 QUERY_ROWID\n"));
        !          2099:       } else if( pConstraint->iColumn>=0 &&
        !          2100:                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
        !          2101:         /* full-text search */
        !          2102:         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
        !          2103:         TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
        !          2104:       } else continue;
        !          2105: 
        !          2106:       pInfo->aConstraintUsage[i].argvIndex = 1;
        !          2107:       pInfo->aConstraintUsage[i].omit = 1;
        !          2108: 
        !          2109:       /* An arbitrary value for now.
        !          2110:        * TODO: Perhaps rowid matches should be considered cheaper than
        !          2111:        * full-text searches. */
        !          2112:       pInfo->estimatedCost = 1.0;   
        !          2113: 
        !          2114:       return SQLITE_OK;
        !          2115:     }
        !          2116:   }
        !          2117:   pInfo->idxNum = QUERY_GENERIC;
        !          2118:   return SQLITE_OK;
        !          2119: }
        !          2120: 
        !          2121: static int fulltextDisconnect(sqlite3_vtab *pVTab){
        !          2122:   TRACE(("FTS1 Disconnect %p\n", pVTab));
        !          2123:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !          2124:   return SQLITE_OK;
        !          2125: }
        !          2126: 
        !          2127: static int fulltextDestroy(sqlite3_vtab *pVTab){
        !          2128:   fulltext_vtab *v = (fulltext_vtab *)pVTab;
        !          2129:   int rc;
        !          2130: 
        !          2131:   TRACE(("FTS1 Destroy %p\n", pVTab));
        !          2132:   rc = sql_exec(v->db, v->zDb, v->zName,
        !          2133:                 "drop table if exists %_content;"
        !          2134:                 "drop table if exists %_term;"
        !          2135:                 );
        !          2136:   if( rc!=SQLITE_OK ) return rc;
        !          2137: 
        !          2138:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !          2139:   return SQLITE_OK;
        !          2140: }
        !          2141: 
        !          2142: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
        !          2143:   fulltext_cursor *c;
        !          2144: 
        !          2145:   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
        !          2146:   /* sqlite will initialize c->base */
        !          2147:   *ppCursor = &c->base;
        !          2148:   TRACE(("FTS1 Open %p: %p\n", pVTab, c));
        !          2149: 
        !          2150:   return SQLITE_OK;
        !          2151: }
        !          2152: 
        !          2153: 
        !          2154: /* Free all of the dynamically allocated memory held by *q
        !          2155: */
        !          2156: static void queryClear(Query *q){
        !          2157:   int i;
        !          2158:   for(i = 0; i < q->nTerms; ++i){
        !          2159:     free(q->pTerms[i].pTerm);
        !          2160:   }
        !          2161:   free(q->pTerms);
        !          2162:   memset(q, 0, sizeof(*q));
        !          2163: }
        !          2164: 
        !          2165: /* Free all of the dynamically allocated memory held by the
        !          2166: ** Snippet
        !          2167: */
        !          2168: static void snippetClear(Snippet *p){
        !          2169:   free(p->aMatch);
        !          2170:   free(p->zOffset);
        !          2171:   free(p->zSnippet);
        !          2172:   memset(p, 0, sizeof(*p));
        !          2173: }
        !          2174: /*
        !          2175: ** Append a single entry to the p->aMatch[] log.
        !          2176: */
        !          2177: static void snippetAppendMatch(
        !          2178:   Snippet *p,               /* Append the entry to this snippet */
        !          2179:   int iCol, int iTerm,      /* The column and query term */
        !          2180:   int iStart, int nByte     /* Offset and size of the match */
        !          2181: ){
        !          2182:   int i;
        !          2183:   struct snippetMatch *pMatch;
        !          2184:   if( p->nMatch+1>=p->nAlloc ){
        !          2185:     p->nAlloc = p->nAlloc*2 + 10;
        !          2186:     p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
        !          2187:     if( p->aMatch==0 ){
        !          2188:       p->nMatch = 0;
        !          2189:       p->nAlloc = 0;
        !          2190:       return;
        !          2191:     }
        !          2192:   }
        !          2193:   i = p->nMatch++;
        !          2194:   pMatch = &p->aMatch[i];
        !          2195:   pMatch->iCol = iCol;
        !          2196:   pMatch->iTerm = iTerm;
        !          2197:   pMatch->iStart = iStart;
        !          2198:   pMatch->nByte = nByte;
        !          2199: }
        !          2200: 
        !          2201: /*
        !          2202: ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
        !          2203: */
        !          2204: #define FTS1_ROTOR_SZ   (32)
        !          2205: #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
        !          2206: 
        !          2207: /*
        !          2208: ** Add entries to pSnippet->aMatch[] for every match that occurs against
        !          2209: ** document zDoc[0..nDoc-1] which is stored in column iColumn.
        !          2210: */
        !          2211: static void snippetOffsetsOfColumn(
        !          2212:   Query *pQuery,
        !          2213:   Snippet *pSnippet,
        !          2214:   int iColumn,
        !          2215:   const char *zDoc,
        !          2216:   int nDoc
        !          2217: ){
        !          2218:   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
        !          2219:   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
        !          2220:   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
        !          2221:   fulltext_vtab *pVtab;                /* The full text index */
        !          2222:   int nColumn;                         /* Number of columns in the index */
        !          2223:   const QueryTerm *aTerm;              /* Query string terms */
        !          2224:   int nTerm;                           /* Number of query string terms */  
        !          2225:   int i, j;                            /* Loop counters */
        !          2226:   int rc;                              /* Return code */
        !          2227:   unsigned int match, prevMatch;       /* Phrase search bitmasks */
        !          2228:   const char *zToken;                  /* Next token from the tokenizer */
        !          2229:   int nToken;                          /* Size of zToken */
        !          2230:   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
        !          2231: 
        !          2232:   /* The following variables keep a circular buffer of the last
        !          2233:   ** few tokens */
        !          2234:   unsigned int iRotor = 0;             /* Index of current token */
        !          2235:   int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
        !          2236:   int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
        !          2237: 
        !          2238:   pVtab = pQuery->pFts;
        !          2239:   nColumn = pVtab->nColumn;
        !          2240:   pTokenizer = pVtab->pTokenizer;
        !          2241:   pTModule = pTokenizer->pModule;
        !          2242:   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
        !          2243:   if( rc ) return;
        !          2244:   pTCursor->pTokenizer = pTokenizer;
        !          2245:   aTerm = pQuery->pTerms;
        !          2246:   nTerm = pQuery->nTerms;
        !          2247:   if( nTerm>=FTS1_ROTOR_SZ ){
        !          2248:     nTerm = FTS1_ROTOR_SZ - 1;
        !          2249:   }
        !          2250:   prevMatch = 0;
        !          2251:   while(1){
        !          2252:     rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
        !          2253:     if( rc ) break;
        !          2254:     iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
        !          2255:     iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
        !          2256:     match = 0;
        !          2257:     for(i=0; i<nTerm; i++){
        !          2258:       int iCol;
        !          2259:       iCol = aTerm[i].iColumn;
        !          2260:       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
        !          2261:       if( aTerm[i].nTerm!=nToken ) continue;
        !          2262:       if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
        !          2263:       if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
        !          2264:       match |= 1<<i;
        !          2265:       if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        !          2266:         for(j=aTerm[i].iPhrase-1; j>=0; j--){
        !          2267:           int k = (iRotor-j) & FTS1_ROTOR_MASK;
        !          2268:           snippetAppendMatch(pSnippet, iColumn, i-j,
        !          2269:                 iRotorBegin[k], iRotorLen[k]);
        !          2270:         }
        !          2271:       }
        !          2272:     }
        !          2273:     prevMatch = match<<1;
        !          2274:     iRotor++;
        !          2275:   }
        !          2276:   pTModule->xClose(pTCursor);  
        !          2277: }
        !          2278: 
        !          2279: 
        !          2280: /*
        !          2281: ** Compute all offsets for the current row of the query.  
        !          2282: ** If the offsets have already been computed, this routine is a no-op.
        !          2283: */
        !          2284: static void snippetAllOffsets(fulltext_cursor *p){
        !          2285:   int nColumn;
        !          2286:   int iColumn, i;
        !          2287:   int iFirst, iLast;
        !          2288:   fulltext_vtab *pFts;
        !          2289: 
        !          2290:   if( p->snippet.nMatch ) return;
        !          2291:   if( p->q.nTerms==0 ) return;
        !          2292:   pFts = p->q.pFts;
        !          2293:   nColumn = pFts->nColumn;
        !          2294:   iColumn = p->iCursorType - QUERY_FULLTEXT;
        !          2295:   if( iColumn<0 || iColumn>=nColumn ){
        !          2296:     iFirst = 0;
        !          2297:     iLast = nColumn-1;
        !          2298:   }else{
        !          2299:     iFirst = iColumn;
        !          2300:     iLast = iColumn;
        !          2301:   }
        !          2302:   for(i=iFirst; i<=iLast; i++){
        !          2303:     const char *zDoc;
        !          2304:     int nDoc;
        !          2305:     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
        !          2306:     nDoc = sqlite3_column_bytes(p->pStmt, i+1);
        !          2307:     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
        !          2308:   }
        !          2309: }
        !          2310: 
        !          2311: /*
        !          2312: ** Convert the information in the aMatch[] array of the snippet
        !          2313: ** into the string zOffset[0..nOffset-1].
        !          2314: */
        !          2315: static void snippetOffsetText(Snippet *p){
        !          2316:   int i;
        !          2317:   int cnt = 0;
        !          2318:   StringBuffer sb;
        !          2319:   char zBuf[200];
        !          2320:   if( p->zOffset ) return;
        !          2321:   initStringBuffer(&sb);
        !          2322:   for(i=0; i<p->nMatch; i++){
        !          2323:     struct snippetMatch *pMatch = &p->aMatch[i];
        !          2324:     zBuf[0] = ' ';
        !          2325:     sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
        !          2326:         pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
        !          2327:     append(&sb, zBuf);
        !          2328:     cnt++;
        !          2329:   }
        !          2330:   p->zOffset = sb.s;
        !          2331:   p->nOffset = sb.len;
        !          2332: }
        !          2333: 
        !          2334: /*
        !          2335: ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
        !          2336: ** of matching words some of which might be in zDoc.  zDoc is column
        !          2337: ** number iCol.
        !          2338: **
        !          2339: ** iBreak is suggested spot in zDoc where we could begin or end an
        !          2340: ** excerpt.  Return a value similar to iBreak but possibly adjusted
        !          2341: ** to be a little left or right so that the break point is better.
        !          2342: */
        !          2343: static int wordBoundary(
        !          2344:   int iBreak,                   /* The suggested break point */
        !          2345:   const char *zDoc,             /* Document text */
        !          2346:   int nDoc,                     /* Number of bytes in zDoc[] */
        !          2347:   struct snippetMatch *aMatch,  /* Matching words */
        !          2348:   int nMatch,                   /* Number of entries in aMatch[] */
        !          2349:   int iCol                      /* The column number for zDoc[] */
        !          2350: ){
        !          2351:   int i;
        !          2352:   if( iBreak<=10 ){
        !          2353:     return 0;
        !          2354:   }
        !          2355:   if( iBreak>=nDoc-10 ){
        !          2356:     return nDoc;
        !          2357:   }
        !          2358:   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
        !          2359:   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
        !          2360:   if( i<nMatch ){
        !          2361:     if( aMatch[i].iStart<iBreak+10 ){
        !          2362:       return aMatch[i].iStart;
        !          2363:     }
        !          2364:     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
        !          2365:       return aMatch[i-1].iStart;
        !          2366:     }
        !          2367:   }
        !          2368:   for(i=1; i<=10; i++){
        !          2369:     if( safe_isspace(zDoc[iBreak-i]) ){
        !          2370:       return iBreak - i + 1;
        !          2371:     }
        !          2372:     if( safe_isspace(zDoc[iBreak+i]) ){
        !          2373:       return iBreak + i + 1;
        !          2374:     }
        !          2375:   }
        !          2376:   return iBreak;
        !          2377: }
        !          2378: 
        !          2379: /*
        !          2380: ** If the StringBuffer does not end in white space, add a single
        !          2381: ** space character to the end.
        !          2382: */
        !          2383: static void appendWhiteSpace(StringBuffer *p){
        !          2384:   if( p->len==0 ) return;
        !          2385:   if( safe_isspace(p->s[p->len-1]) ) return;
        !          2386:   append(p, " ");
        !          2387: }
        !          2388: 
        !          2389: /*
        !          2390: ** Remove white space from teh end of the StringBuffer
        !          2391: */
        !          2392: static void trimWhiteSpace(StringBuffer *p){
        !          2393:   while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
        !          2394:     p->len--;
        !          2395:   }
        !          2396: }
        !          2397: 
        !          2398: 
        !          2399: 
        !          2400: /*
        !          2401: ** Allowed values for Snippet.aMatch[].snStatus
        !          2402: */
        !          2403: #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
        !          2404: #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
        !          2405: 
        !          2406: /*
        !          2407: ** Generate the text of a snippet.
        !          2408: */
        !          2409: static void snippetText(
        !          2410:   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
        !          2411:   const char *zStartMark,     /* Markup to appear before each match */
        !          2412:   const char *zEndMark,       /* Markup to appear after each match */
        !          2413:   const char *zEllipsis       /* Ellipsis mark */
        !          2414: ){
        !          2415:   int i, j;
        !          2416:   struct snippetMatch *aMatch;
        !          2417:   int nMatch;
        !          2418:   int nDesired;
        !          2419:   StringBuffer sb;
        !          2420:   int tailCol;
        !          2421:   int tailOffset;
        !          2422:   int iCol;
        !          2423:   int nDoc;
        !          2424:   const char *zDoc;
        !          2425:   int iStart, iEnd;
        !          2426:   int tailEllipsis = 0;
        !          2427:   int iMatch;
        !          2428:   
        !          2429: 
        !          2430:   free(pCursor->snippet.zSnippet);
        !          2431:   pCursor->snippet.zSnippet = 0;
        !          2432:   aMatch = pCursor->snippet.aMatch;
        !          2433:   nMatch = pCursor->snippet.nMatch;
        !          2434:   initStringBuffer(&sb);
        !          2435: 
        !          2436:   for(i=0; i<nMatch; i++){
        !          2437:     aMatch[i].snStatus = SNIPPET_IGNORE;
        !          2438:   }
        !          2439:   nDesired = 0;
        !          2440:   for(i=0; i<pCursor->q.nTerms; i++){
        !          2441:     for(j=0; j<nMatch; j++){
        !          2442:       if( aMatch[j].iTerm==i ){
        !          2443:         aMatch[j].snStatus = SNIPPET_DESIRED;
        !          2444:         nDesired++;
        !          2445:         break;
        !          2446:       }
        !          2447:     }
        !          2448:   }
        !          2449: 
        !          2450:   iMatch = 0;
        !          2451:   tailCol = -1;
        !          2452:   tailOffset = 0;
        !          2453:   for(i=0; i<nMatch && nDesired>0; i++){
        !          2454:     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
        !          2455:     nDesired--;
        !          2456:     iCol = aMatch[i].iCol;
        !          2457:     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
        !          2458:     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
        !          2459:     iStart = aMatch[i].iStart - 40;
        !          2460:     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
        !          2461:     if( iStart<=10 ){
        !          2462:       iStart = 0;
        !          2463:     }
        !          2464:     if( iCol==tailCol && iStart<=tailOffset+20 ){
        !          2465:       iStart = tailOffset;
        !          2466:     }
        !          2467:     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
        !          2468:       trimWhiteSpace(&sb);
        !          2469:       appendWhiteSpace(&sb);
        !          2470:       append(&sb, zEllipsis);
        !          2471:       appendWhiteSpace(&sb);
        !          2472:     }
        !          2473:     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
        !          2474:     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
        !          2475:     if( iEnd>=nDoc-10 ){
        !          2476:       iEnd = nDoc;
        !          2477:       tailEllipsis = 0;
        !          2478:     }else{
        !          2479:       tailEllipsis = 1;
        !          2480:     }
        !          2481:     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
        !          2482:     while( iStart<iEnd ){
        !          2483:       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
        !          2484:              && aMatch[iMatch].iCol<=iCol ){
        !          2485:         iMatch++;
        !          2486:       }
        !          2487:       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
        !          2488:              && aMatch[iMatch].iCol==iCol ){
        !          2489:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
        !          2490:         iStart = aMatch[iMatch].iStart;
        !          2491:         append(&sb, zStartMark);
        !          2492:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
        !          2493:         append(&sb, zEndMark);
        !          2494:         iStart += aMatch[iMatch].nByte;
        !          2495:         for(j=iMatch+1; j<nMatch; j++){
        !          2496:           if( aMatch[j].iTerm==aMatch[iMatch].iTerm
        !          2497:               && aMatch[j].snStatus==SNIPPET_DESIRED ){
        !          2498:             nDesired--;
        !          2499:             aMatch[j].snStatus = SNIPPET_IGNORE;
        !          2500:           }
        !          2501:         }
        !          2502:       }else{
        !          2503:         nappend(&sb, &zDoc[iStart], iEnd - iStart);
        !          2504:         iStart = iEnd;
        !          2505:       }
        !          2506:     }
        !          2507:     tailCol = iCol;
        !          2508:     tailOffset = iEnd;
        !          2509:   }
        !          2510:   trimWhiteSpace(&sb);
        !          2511:   if( tailEllipsis ){
        !          2512:     appendWhiteSpace(&sb);
        !          2513:     append(&sb, zEllipsis);
        !          2514:   }
        !          2515:   pCursor->snippet.zSnippet = sb.s;
        !          2516:   pCursor->snippet.nSnippet = sb.len;  
        !          2517: }
        !          2518: 
        !          2519: 
        !          2520: /*
        !          2521: ** Close the cursor.  For additional information see the documentation
        !          2522: ** on the xClose method of the virtual table interface.
        !          2523: */
        !          2524: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
        !          2525:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2526:   TRACE(("FTS1 Close %p\n", c));
        !          2527:   sqlite3_finalize(c->pStmt);
        !          2528:   queryClear(&c->q);
        !          2529:   snippetClear(&c->snippet);
        !          2530:   if( c->result.pDoclist!=NULL ){
        !          2531:     docListDelete(c->result.pDoclist);
        !          2532:   }
        !          2533:   free(c);
        !          2534:   return SQLITE_OK;
        !          2535: }
        !          2536: 
        !          2537: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
        !          2538:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2539:   sqlite_int64 iDocid;
        !          2540:   int rc;
        !          2541: 
        !          2542:   TRACE(("FTS1 Next %p\n", pCursor));
        !          2543:   snippetClear(&c->snippet);
        !          2544:   if( c->iCursorType < QUERY_FULLTEXT ){
        !          2545:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          2546:     rc = sqlite3_step(c->pStmt);
        !          2547:     switch( rc ){
        !          2548:       case SQLITE_ROW:
        !          2549:         c->eof = 0;
        !          2550:         return SQLITE_OK;
        !          2551:       case SQLITE_DONE:
        !          2552:         c->eof = 1;
        !          2553:         return SQLITE_OK;
        !          2554:       default:
        !          2555:         c->eof = 1;
        !          2556:         return rc;
        !          2557:     }
        !          2558:   } else {  /* full-text query */
        !          2559:     rc = sqlite3_reset(c->pStmt);
        !          2560:     if( rc!=SQLITE_OK ) return rc;
        !          2561: 
        !          2562:     iDocid = nextDocid(&c->result);
        !          2563:     if( iDocid==0 ){
        !          2564:       c->eof = 1;
        !          2565:       return SQLITE_OK;
        !          2566:     }
        !          2567:     rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
        !          2568:     if( rc!=SQLITE_OK ) return rc;
        !          2569:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          2570:     rc = sqlite3_step(c->pStmt);
        !          2571:     if( rc==SQLITE_ROW ){   /* the case we expect */
        !          2572:       c->eof = 0;
        !          2573:       return SQLITE_OK;
        !          2574:     }
        !          2575:     /* an error occurred; abort */
        !          2576:     return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
        !          2577:   }
        !          2578: }
        !          2579: 
        !          2580: 
        !          2581: /* Return a DocList corresponding to the query term *pTerm.  If *pTerm
        !          2582: ** is the first term of a phrase query, go ahead and evaluate the phrase
        !          2583: ** query and return the doclist for the entire phrase query.
        !          2584: **
        !          2585: ** The result is stored in pTerm->doclist.
        !          2586: */
        !          2587: static int docListOfTerm(
        !          2588:   fulltext_vtab *v,     /* The full text index */
        !          2589:   int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
        !          2590:   QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
        !          2591:   DocList **ppResult    /* Write the result here */
        !          2592: ){
        !          2593:   DocList *pLeft, *pRight, *pNew;
        !          2594:   int i, rc;
        !          2595: 
        !          2596:   pLeft = docListNew(DL_POSITIONS);
        !          2597:   rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
        !          2598:   if( rc ){
        !          2599:     docListDelete(pLeft);
        !          2600:     return rc;
        !          2601:   }
        !          2602:   for(i=1; i<=pQTerm->nPhrase; i++){
        !          2603:     pRight = docListNew(DL_POSITIONS);
        !          2604:     rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
        !          2605:     if( rc ){
        !          2606:       docListDelete(pLeft);
        !          2607:       return rc;
        !          2608:     }
        !          2609:     pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
        !          2610:     docListPhraseMerge(pLeft, pRight, pNew);
        !          2611:     docListDelete(pLeft);
        !          2612:     docListDelete(pRight);
        !          2613:     pLeft = pNew;
        !          2614:   }
        !          2615:   *ppResult = pLeft;
        !          2616:   return SQLITE_OK;
        !          2617: }
        !          2618: 
        !          2619: /* Add a new term pTerm[0..nTerm-1] to the query *q.
        !          2620: */
        !          2621: static void queryAdd(Query *q, const char *pTerm, int nTerm){
        !          2622:   QueryTerm *t;
        !          2623:   ++q->nTerms;
        !          2624:   q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
        !          2625:   if( q->pTerms==0 ){
        !          2626:     q->nTerms = 0;
        !          2627:     return;
        !          2628:   }
        !          2629:   t = &q->pTerms[q->nTerms - 1];
        !          2630:   memset(t, 0, sizeof(*t));
        !          2631:   t->pTerm = malloc(nTerm+1);
        !          2632:   memcpy(t->pTerm, pTerm, nTerm);
        !          2633:   t->pTerm[nTerm] = 0;
        !          2634:   t->nTerm = nTerm;
        !          2635:   t->isOr = q->nextIsOr;
        !          2636:   q->nextIsOr = 0;
        !          2637:   t->iColumn = q->nextColumn;
        !          2638:   q->nextColumn = q->dfltColumn;
        !          2639: }
        !          2640: 
        !          2641: /*
        !          2642: ** Check to see if the string zToken[0...nToken-1] matches any
        !          2643: ** column name in the virtual table.   If it does,
        !          2644: ** return the zero-indexed column number.  If not, return -1.
        !          2645: */
        !          2646: static int checkColumnSpecifier(
        !          2647:   fulltext_vtab *pVtab,    /* The virtual table */
        !          2648:   const char *zToken,      /* Text of the token */
        !          2649:   int nToken               /* Number of characters in the token */
        !          2650: ){
        !          2651:   int i;
        !          2652:   for(i=0; i<pVtab->nColumn; i++){
        !          2653:     if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
        !          2654:         && pVtab->azColumn[i][nToken]==0 ){
        !          2655:       return i;
        !          2656:     }
        !          2657:   }
        !          2658:   return -1;
        !          2659: }
        !          2660: 
        !          2661: /*
        !          2662: ** Parse the text at pSegment[0..nSegment-1].  Add additional terms
        !          2663: ** to the query being assemblied in pQuery.
        !          2664: **
        !          2665: ** inPhrase is true if pSegment[0..nSegement-1] is contained within
        !          2666: ** double-quotes.  If inPhrase is true, then the first term
        !          2667: ** is marked with the number of terms in the phrase less one and
        !          2668: ** OR and "-" syntax is ignored.  If inPhrase is false, then every
        !          2669: ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
        !          2670: */
        !          2671: static int tokenizeSegment(
        !          2672:   sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
        !          2673:   const char *pSegment, int nSegment,     /* Query expression being parsed */
        !          2674:   int inPhrase,                           /* True if within "..." */
        !          2675:   Query *pQuery                           /* Append results here */
        !          2676: ){
        !          2677:   const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
        !          2678:   sqlite3_tokenizer_cursor *pCursor;
        !          2679:   int firstIndex = pQuery->nTerms;
        !          2680:   int iCol;
        !          2681:   int nTerm = 1;
        !          2682:   
        !          2683:   int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
        !          2684:   if( rc!=SQLITE_OK ) return rc;
        !          2685:   pCursor->pTokenizer = pTokenizer;
        !          2686: 
        !          2687:   while( 1 ){
        !          2688:     const char *pToken;
        !          2689:     int nToken, iBegin, iEnd, iPos;
        !          2690: 
        !          2691:     rc = pModule->xNext(pCursor,
        !          2692:                         &pToken, &nToken,
        !          2693:                         &iBegin, &iEnd, &iPos);
        !          2694:     if( rc!=SQLITE_OK ) break;
        !          2695:     if( !inPhrase &&
        !          2696:         pSegment[iEnd]==':' &&
        !          2697:          (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
        !          2698:       pQuery->nextColumn = iCol;
        !          2699:       continue;
        !          2700:     }
        !          2701:     if( !inPhrase && pQuery->nTerms>0 && nToken==2
        !          2702:          && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
        !          2703:       pQuery->nextIsOr = 1;
        !          2704:       continue;
        !          2705:     }
        !          2706:     queryAdd(pQuery, pToken, nToken);
        !          2707:     if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
        !          2708:       pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
        !          2709:     }
        !          2710:     pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
        !          2711:     if( inPhrase ){
        !          2712:       nTerm++;
        !          2713:     }
        !          2714:   }
        !          2715: 
        !          2716:   if( inPhrase && pQuery->nTerms>firstIndex ){
        !          2717:     pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
        !          2718:   }
        !          2719: 
        !          2720:   return pModule->xClose(pCursor);
        !          2721: }
        !          2722: 
        !          2723: /* Parse a query string, yielding a Query object pQuery.
        !          2724: **
        !          2725: ** The calling function will need to queryClear() to clean up
        !          2726: ** the dynamically allocated memory held by pQuery.
        !          2727: */
        !          2728: static int parseQuery(
        !          2729:   fulltext_vtab *v,        /* The fulltext index */
        !          2730:   const char *zInput,      /* Input text of the query string */
        !          2731:   int nInput,              /* Size of the input text */
        !          2732:   int dfltColumn,          /* Default column of the index to match against */
        !          2733:   Query *pQuery            /* Write the parse results here. */
        !          2734: ){
        !          2735:   int iInput, inPhrase = 0;
        !          2736: 
        !          2737:   if( zInput==0 ) nInput = 0;
        !          2738:   if( nInput<0 ) nInput = strlen(zInput);
        !          2739:   pQuery->nTerms = 0;
        !          2740:   pQuery->pTerms = NULL;
        !          2741:   pQuery->nextIsOr = 0;
        !          2742:   pQuery->nextColumn = dfltColumn;
        !          2743:   pQuery->dfltColumn = dfltColumn;
        !          2744:   pQuery->pFts = v;
        !          2745: 
        !          2746:   for(iInput=0; iInput<nInput; ++iInput){
        !          2747:     int i;
        !          2748:     for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
        !          2749:     if( i>iInput ){
        !          2750:       tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
        !          2751:                        pQuery);
        !          2752:     }
        !          2753:     iInput = i;
        !          2754:     if( i<nInput ){
        !          2755:       assert( zInput[i]=='"' );
        !          2756:       inPhrase = !inPhrase;
        !          2757:     }
        !          2758:   }
        !          2759: 
        !          2760:   if( inPhrase ){
        !          2761:     /* unmatched quote */
        !          2762:     queryClear(pQuery);
        !          2763:     return SQLITE_ERROR;
        !          2764:   }
        !          2765:   return SQLITE_OK;
        !          2766: }
        !          2767: 
        !          2768: /* Perform a full-text query using the search expression in
        !          2769: ** zInput[0..nInput-1].  Return a list of matching documents
        !          2770: ** in pResult.
        !          2771: **
        !          2772: ** Queries must match column iColumn.  Or if iColumn>=nColumn
        !          2773: ** they are allowed to match against any column.
        !          2774: */
        !          2775: static int fulltextQuery(
        !          2776:   fulltext_vtab *v,      /* The full text index */
        !          2777:   int iColumn,           /* Match against this column by default */
        !          2778:   const char *zInput,    /* The query string */
        !          2779:   int nInput,            /* Number of bytes in zInput[] */
        !          2780:   DocList **pResult,     /* Write the result doclist here */
        !          2781:   Query *pQuery          /* Put parsed query string here */
        !          2782: ){
        !          2783:   int i, iNext, rc;
        !          2784:   DocList *pLeft = NULL;
        !          2785:   DocList *pRight, *pNew, *pOr;
        !          2786:   int nNot = 0;
        !          2787:   QueryTerm *aTerm;
        !          2788: 
        !          2789:   rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
        !          2790:   if( rc!=SQLITE_OK ) return rc;
        !          2791: 
        !          2792:   /* Merge AND terms. */
        !          2793:   aTerm = pQuery->pTerms;
        !          2794:   for(i = 0; i<pQuery->nTerms; i=iNext){
        !          2795:     if( aTerm[i].isNot ){
        !          2796:       /* Handle all NOT terms in a separate pass */
        !          2797:       nNot++;
        !          2798:       iNext = i + aTerm[i].nPhrase+1;
        !          2799:       continue;
        !          2800:     }
        !          2801:     iNext = i + aTerm[i].nPhrase + 1;
        !          2802:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
        !          2803:     if( rc ){
        !          2804:       queryClear(pQuery);
        !          2805:       return rc;
        !          2806:     }
        !          2807:     while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
        !          2808:       rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
        !          2809:       iNext += aTerm[iNext].nPhrase + 1;
        !          2810:       if( rc ){
        !          2811:         queryClear(pQuery);
        !          2812:         return rc;
        !          2813:       }
        !          2814:       pNew = docListNew(DL_DOCIDS);
        !          2815:       docListOrMerge(pRight, pOr, pNew);
        !          2816:       docListDelete(pRight);
        !          2817:       docListDelete(pOr);
        !          2818:       pRight = pNew;
        !          2819:     }
        !          2820:     if( pLeft==0 ){
        !          2821:       pLeft = pRight;
        !          2822:     }else{
        !          2823:       pNew = docListNew(DL_DOCIDS);
        !          2824:       docListAndMerge(pLeft, pRight, pNew);
        !          2825:       docListDelete(pRight);
        !          2826:       docListDelete(pLeft);
        !          2827:       pLeft = pNew;
        !          2828:     }
        !          2829:   }
        !          2830: 
        !          2831:   if( nNot && pLeft==0 ){
        !          2832:     /* We do not yet know how to handle a query of only NOT terms */
        !          2833:     return SQLITE_ERROR;
        !          2834:   }
        !          2835: 
        !          2836:   /* Do the EXCEPT terms */
        !          2837:   for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
        !          2838:     if( !aTerm[i].isNot ) continue;
        !          2839:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
        !          2840:     if( rc ){
        !          2841:       queryClear(pQuery);
        !          2842:       docListDelete(pLeft);
        !          2843:       return rc;
        !          2844:     }
        !          2845:     pNew = docListNew(DL_DOCIDS);
        !          2846:     docListExceptMerge(pLeft, pRight, pNew);
        !          2847:     docListDelete(pRight);
        !          2848:     docListDelete(pLeft);
        !          2849:     pLeft = pNew;
        !          2850:   }
        !          2851: 
        !          2852:   *pResult = pLeft;
        !          2853:   return rc;
        !          2854: }
        !          2855: 
        !          2856: /*
        !          2857: ** This is the xFilter interface for the virtual table.  See
        !          2858: ** the virtual table xFilter method documentation for additional
        !          2859: ** information.
        !          2860: **
        !          2861: ** If idxNum==QUERY_GENERIC then do a full table scan against
        !          2862: ** the %_content table.
        !          2863: **
        !          2864: ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
        !          2865: ** in the %_content table.
        !          2866: **
        !          2867: ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
        !          2868: ** column on the left-hand side of the MATCH operator is column
        !          2869: ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
        !          2870: ** side of the MATCH operator.
        !          2871: */
        !          2872: /* TODO(shess) Upgrade the cursor initialization and destruction to
        !          2873: ** account for fulltextFilter() being called multiple times on the
        !          2874: ** same cursor.  The current solution is very fragile.  Apply fix to
        !          2875: ** fts2 as appropriate.
        !          2876: */
        !          2877: static int fulltextFilter(
        !          2878:   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
        !          2879:   int idxNum, const char *idxStr,   /* Which indexing scheme to use */
        !          2880:   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
        !          2881: ){
        !          2882:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2883:   fulltext_vtab *v = cursor_vtab(c);
        !          2884:   int rc;
        !          2885:   char *zSql;
        !          2886: 
        !          2887:   TRACE(("FTS1 Filter %p\n",pCursor));
        !          2888: 
        !          2889:   zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
        !          2890:                           idxNum==QUERY_GENERIC ? "" : "where rowid=?");
        !          2891:   sqlite3_finalize(c->pStmt);
        !          2892:   rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
        !          2893:   sqlite3_free(zSql);
        !          2894:   if( rc!=SQLITE_OK ) return rc;
        !          2895: 
        !          2896:   c->iCursorType = idxNum;
        !          2897:   switch( idxNum ){
        !          2898:     case QUERY_GENERIC:
        !          2899:       break;
        !          2900: 
        !          2901:     case QUERY_ROWID:
        !          2902:       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
        !          2903:       if( rc!=SQLITE_OK ) return rc;
        !          2904:       break;
        !          2905: 
        !          2906:     default:   /* full-text search */
        !          2907:     {
        !          2908:       const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
        !          2909:       DocList *pResult;
        !          2910:       assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
        !          2911:       assert( argc==1 );
        !          2912:       queryClear(&c->q);
        !          2913:       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
        !          2914:       if( rc!=SQLITE_OK ) return rc;
        !          2915:       if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
        !          2916:       readerInit(&c->result, pResult);
        !          2917:       break;
        !          2918:     }
        !          2919:   }
        !          2920: 
        !          2921:   return fulltextNext(pCursor);
        !          2922: }
        !          2923: 
        !          2924: /* This is the xEof method of the virtual table.  The SQLite core
        !          2925: ** calls this routine to find out if it has reached the end of
        !          2926: ** a query's results set.
        !          2927: */
        !          2928: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
        !          2929:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2930:   return c->eof;
        !          2931: }
        !          2932: 
        !          2933: /* This is the xColumn method of the virtual table.  The SQLite
        !          2934: ** core calls this method during a query when it needs the value
        !          2935: ** of a column from the virtual table.  This method needs to use
        !          2936: ** one of the sqlite3_result_*() routines to store the requested
        !          2937: ** value back in the pContext.
        !          2938: */
        !          2939: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
        !          2940:                           sqlite3_context *pContext, int idxCol){
        !          2941:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2942:   fulltext_vtab *v = cursor_vtab(c);
        !          2943: 
        !          2944:   if( idxCol<v->nColumn ){
        !          2945:     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
        !          2946:     sqlite3_result_value(pContext, pVal);
        !          2947:   }else if( idxCol==v->nColumn ){
        !          2948:     /* The extra column whose name is the same as the table.
        !          2949:     ** Return a blob which is a pointer to the cursor
        !          2950:     */
        !          2951:     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
        !          2952:   }
        !          2953:   return SQLITE_OK;
        !          2954: }
        !          2955: 
        !          2956: /* This is the xRowid method.  The SQLite core calls this routine to
        !          2957: ** retrive the rowid for the current row of the result set.  The
        !          2958: ** rowid should be written to *pRowid.
        !          2959: */
        !          2960: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
        !          2961:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          2962: 
        !          2963:   *pRowid = sqlite3_column_int64(c->pStmt, 0);
        !          2964:   return SQLITE_OK;
        !          2965: }
        !          2966: 
        !          2967: /* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
        !          2968:  * we also store positions and offsets in the hash table using the given
        !          2969:  * column number. */
        !          2970: static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
        !          2971:                       const char *zText, int iColumn){
        !          2972:   sqlite3_tokenizer *pTokenizer = v->pTokenizer;
        !          2973:   sqlite3_tokenizer_cursor *pCursor;
        !          2974:   const char *pToken;
        !          2975:   int nTokenBytes;
        !          2976:   int iStartOffset, iEndOffset, iPosition;
        !          2977:   int rc;
        !          2978: 
        !          2979:   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
        !          2980:   if( rc!=SQLITE_OK ) return rc;
        !          2981: 
        !          2982:   pCursor->pTokenizer = pTokenizer;
        !          2983:   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
        !          2984:                                                &pToken, &nTokenBytes,
        !          2985:                                                &iStartOffset, &iEndOffset,
        !          2986:                                                &iPosition) ){
        !          2987:     DocList *p;
        !          2988: 
        !          2989:     /* Positions can't be negative; we use -1 as a terminator internally. */
        !          2990:     if( iPosition<0 ){
        !          2991:       pTokenizer->pModule->xClose(pCursor);
        !          2992:       return SQLITE_ERROR;
        !          2993:     }
        !          2994: 
        !          2995:     p = fts1HashFind(terms, pToken, nTokenBytes);
        !          2996:     if( p==NULL ){
        !          2997:       p = docListNew(DL_DEFAULT);
        !          2998:       docListAddDocid(p, iDocid);
        !          2999:       fts1HashInsert(terms, pToken, nTokenBytes, p);
        !          3000:     }
        !          3001:     if( iColumn>=0 ){
        !          3002:       docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
        !          3003:     }
        !          3004:   }
        !          3005: 
        !          3006:   /* TODO(shess) Check return?  Should this be able to cause errors at
        !          3007:   ** this point?  Actually, same question about sqlite3_finalize(),
        !          3008:   ** though one could argue that failure there means that the data is
        !          3009:   ** not durable.  *ponder*
        !          3010:   */
        !          3011:   pTokenizer->pModule->xClose(pCursor);
        !          3012:   return rc;
        !          3013: }
        !          3014: 
        !          3015: /* Update the %_terms table to map the term [pTerm] to the given rowid. */
        !          3016: static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
        !          3017:                              DocList *d){
        !          3018:   sqlite_int64 iIndexRow;
        !          3019:   DocList doclist;
        !          3020:   int iSegment = 0, rc;
        !          3021: 
        !          3022:   rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
        !          3023:   if( rc==SQLITE_DONE ){
        !          3024:     docListInit(&doclist, DL_DEFAULT, 0, 0);
        !          3025:     docListUpdate(&doclist, d);
        !          3026:     /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
        !          3027:     rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
        !          3028:     goto err;
        !          3029:   }
        !          3030:   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
        !          3031: 
        !          3032:   docListUpdate(&doclist, d);
        !          3033:   if( doclist.nData<=CHUNK_MAX ){
        !          3034:     rc = term_update(v, iIndexRow, &doclist);
        !          3035:     goto err;
        !          3036:   }
        !          3037: 
        !          3038:   /* Doclist doesn't fit, delete what's there, and accumulate
        !          3039:   ** forward.
        !          3040:   */
        !          3041:   rc = term_delete(v, iIndexRow);
        !          3042:   if( rc!=SQLITE_OK ) goto err;
        !          3043: 
        !          3044:   /* Try to insert the doclist into a higher segment bucket.  On
        !          3045:   ** failure, accumulate existing doclist with the doclist from that
        !          3046:   ** bucket, and put results in the next bucket.
        !          3047:   */
        !          3048:   iSegment++;
        !          3049:   while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
        !          3050:                          &doclist))!=SQLITE_OK ){
        !          3051:     sqlite_int64 iSegmentRow;
        !          3052:     DocList old;
        !          3053:     int rc2;
        !          3054: 
        !          3055:     /* Retain old error in case the term_insert() error was really an
        !          3056:     ** error rather than a bounced insert.
        !          3057:     */
        !          3058:     rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
        !          3059:     if( rc2!=SQLITE_ROW ) goto err;
        !          3060: 
        !          3061:     rc = term_delete(v, iSegmentRow);
        !          3062:     if( rc!=SQLITE_OK ) goto err;
        !          3063: 
        !          3064:     /* Reusing lowest-number deleted row keeps the index smaller. */
        !          3065:     if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
        !          3066: 
        !          3067:     /* doclist contains the newer data, so accumulate it over old.
        !          3068:     ** Then steal accumulated data for doclist.
        !          3069:     */
        !          3070:     docListAccumulate(&old, &doclist);
        !          3071:     docListDestroy(&doclist);
        !          3072:     doclist = old;
        !          3073: 
        !          3074:     iSegment++;
        !          3075:   }
        !          3076: 
        !          3077:  err:
        !          3078:   docListDestroy(&doclist);
        !          3079:   return rc;
        !          3080: }
        !          3081: 
        !          3082: /* Add doclists for all terms in [pValues] to the hash table [terms]. */
        !          3083: static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
        !          3084:                 sqlite3_value **pValues){
        !          3085:   int i;
        !          3086:   for(i = 0; i < v->nColumn ; ++i){
        !          3087:     char *zText = (char*)sqlite3_value_text(pValues[i]);
        !          3088:     int rc = buildTerms(v, terms, iRowid, zText, i);
        !          3089:     if( rc!=SQLITE_OK ) return rc;
        !          3090:   }
        !          3091:   return SQLITE_OK;
        !          3092: }
        !          3093: 
        !          3094: /* Add empty doclists for all terms in the given row's content to the hash
        !          3095:  * table [pTerms]. */
        !          3096: static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
        !          3097:   const char **pValues;
        !          3098:   int i;
        !          3099: 
        !          3100:   int rc = content_select(v, iRowid, &pValues);
        !          3101:   if( rc!=SQLITE_OK ) return rc;
        !          3102: 
        !          3103:   for(i = 0 ; i < v->nColumn; ++i) {
        !          3104:     rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
        !          3105:     if( rc!=SQLITE_OK ) break;
        !          3106:   }
        !          3107: 
        !          3108:   freeStringArray(v->nColumn, pValues);
        !          3109:   return SQLITE_OK;
        !          3110: }
        !          3111: 
        !          3112: /* Insert a row into the %_content table; set *piRowid to be the ID of the
        !          3113:  * new row.  Fill [pTerms] with new doclists for the %_term table. */
        !          3114: static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
        !          3115:                         sqlite3_value **pValues,
        !          3116:                         sqlite_int64 *piRowid, fts1Hash *pTerms){
        !          3117:   int rc;
        !          3118: 
        !          3119:   rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
        !          3120:   if( rc!=SQLITE_OK ) return rc;
        !          3121:   *piRowid = sqlite3_last_insert_rowid(v->db);
        !          3122:   return insertTerms(v, pTerms, *piRowid, pValues);
        !          3123: }
        !          3124: 
        !          3125: /* Delete a row from the %_content table; fill [pTerms] with empty doclists
        !          3126:  * to be written to the %_term table. */
        !          3127: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
        !          3128:   int rc = deleteTerms(v, pTerms, iRow);
        !          3129:   if( rc!=SQLITE_OK ) return rc;
        !          3130:   return content_delete(v, iRow);  /* execute an SQL DELETE */
        !          3131: }
        !          3132: 
        !          3133: /* Update a row in the %_content table; fill [pTerms] with new doclists for the
        !          3134:  * %_term table. */
        !          3135: static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
        !          3136:                         sqlite3_value **pValues, fts1Hash *pTerms){
        !          3137:   /* Generate an empty doclist for each term that previously appeared in this
        !          3138:    * row. */
        !          3139:   int rc = deleteTerms(v, pTerms, iRow);
        !          3140:   if( rc!=SQLITE_OK ) return rc;
        !          3141: 
        !          3142:   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
        !          3143:   if( rc!=SQLITE_OK ) return rc;
        !          3144: 
        !          3145:   /* Now add positions for terms which appear in the updated row. */
        !          3146:   return insertTerms(v, pTerms, iRow, pValues);
        !          3147: }
        !          3148: 
        !          3149: /* This function implements the xUpdate callback; it is the top-level entry
        !          3150:  * point for inserting, deleting or updating a row in a full-text table. */
        !          3151: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
        !          3152:                    sqlite_int64 *pRowid){
        !          3153:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
        !          3154:   fts1Hash terms;   /* maps term string -> PosList */
        !          3155:   int rc;
        !          3156:   fts1HashElem *e;
        !          3157: 
        !          3158:   TRACE(("FTS1 Update %p\n", pVtab));
        !          3159:   
        !          3160:   fts1HashInit(&terms, FTS1_HASH_STRING, 1);
        !          3161: 
        !          3162:   if( nArg<2 ){
        !          3163:     rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
        !          3164:   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
        !          3165:     /* An update:
        !          3166:      * ppArg[0] = old rowid
        !          3167:      * ppArg[1] = new rowid
        !          3168:      * ppArg[2..2+v->nColumn-1] = values
        !          3169:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
        !          3170:      */
        !          3171:     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
        !          3172:     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
        !          3173:       sqlite3_value_int64(ppArg[1]) != rowid ){
        !          3174:       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
        !          3175:     } else {
        !          3176:       assert( nArg==2+v->nColumn+1);
        !          3177:       rc = index_update(v, rowid, &ppArg[2], &terms);
        !          3178:     }
        !          3179:   } else {
        !          3180:     /* An insert:
        !          3181:      * ppArg[1] = requested rowid
        !          3182:      * ppArg[2..2+v->nColumn-1] = values
        !          3183:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
        !          3184:      */
        !          3185:     assert( nArg==2+v->nColumn+1);
        !          3186:     rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
        !          3187:   }
        !          3188: 
        !          3189:   if( rc==SQLITE_OK ){
        !          3190:     /* Write updated doclists to disk. */
        !          3191:     for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
        !          3192:       DocList *p = fts1HashData(e);
        !          3193:       rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
        !          3194:       if( rc!=SQLITE_OK ) break;
        !          3195:     }
        !          3196:   }
        !          3197: 
        !          3198:   /* clean up */
        !          3199:   for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
        !          3200:     DocList *p = fts1HashData(e);
        !          3201:     docListDelete(p);
        !          3202:   }
        !          3203:   fts1HashClear(&terms);
        !          3204: 
        !          3205:   return rc;
        !          3206: }
        !          3207: 
        !          3208: /*
        !          3209: ** Implementation of the snippet() function for FTS1
        !          3210: */
        !          3211: static void snippetFunc(
        !          3212:   sqlite3_context *pContext,
        !          3213:   int argc,
        !          3214:   sqlite3_value **argv
        !          3215: ){
        !          3216:   fulltext_cursor *pCursor;
        !          3217:   if( argc<1 ) return;
        !          3218:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          3219:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          3220:     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
        !          3221:   }else{
        !          3222:     const char *zStart = "<b>";
        !          3223:     const char *zEnd = "</b>";
        !          3224:     const char *zEllipsis = "<b>...</b>";
        !          3225:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          3226:     if( argc>=2 ){
        !          3227:       zStart = (const char*)sqlite3_value_text(argv[1]);
        !          3228:       if( argc>=3 ){
        !          3229:         zEnd = (const char*)sqlite3_value_text(argv[2]);
        !          3230:         if( argc>=4 ){
        !          3231:           zEllipsis = (const char*)sqlite3_value_text(argv[3]);
        !          3232:         }
        !          3233:       }
        !          3234:     }
        !          3235:     snippetAllOffsets(pCursor);
        !          3236:     snippetText(pCursor, zStart, zEnd, zEllipsis);
        !          3237:     sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
        !          3238:                         pCursor->snippet.nSnippet, SQLITE_STATIC);
        !          3239:   }
        !          3240: }
        !          3241: 
        !          3242: /*
        !          3243: ** Implementation of the offsets() function for FTS1
        !          3244: */
        !          3245: static void snippetOffsetsFunc(
        !          3246:   sqlite3_context *pContext,
        !          3247:   int argc,
        !          3248:   sqlite3_value **argv
        !          3249: ){
        !          3250:   fulltext_cursor *pCursor;
        !          3251:   if( argc<1 ) return;
        !          3252:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          3253:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          3254:     sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
        !          3255:   }else{
        !          3256:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          3257:     snippetAllOffsets(pCursor);
        !          3258:     snippetOffsetText(&pCursor->snippet);
        !          3259:     sqlite3_result_text(pContext,
        !          3260:                         pCursor->snippet.zOffset, pCursor->snippet.nOffset,
        !          3261:                         SQLITE_STATIC);
        !          3262:   }
        !          3263: }
        !          3264: 
        !          3265: /*
        !          3266: ** This routine implements the xFindFunction method for the FTS1
        !          3267: ** virtual table.
        !          3268: */
        !          3269: static int fulltextFindFunction(
        !          3270:   sqlite3_vtab *pVtab,
        !          3271:   int nArg,
        !          3272:   const char *zName,
        !          3273:   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
        !          3274:   void **ppArg
        !          3275: ){
        !          3276:   if( strcmp(zName,"snippet")==0 ){
        !          3277:     *pxFunc = snippetFunc;
        !          3278:     return 1;
        !          3279:   }else if( strcmp(zName,"offsets")==0 ){
        !          3280:     *pxFunc = snippetOffsetsFunc;
        !          3281:     return 1;
        !          3282:   }
        !          3283:   return 0;
        !          3284: }
        !          3285: 
        !          3286: /*
        !          3287: ** Rename an fts1 table.
        !          3288: */
        !          3289: static int fulltextRename(
        !          3290:   sqlite3_vtab *pVtab,
        !          3291:   const char *zName
        !          3292: ){
        !          3293:   fulltext_vtab *p = (fulltext_vtab *)pVtab;
        !          3294:   int rc = SQLITE_NOMEM;
        !          3295:   char *zSql = sqlite3_mprintf(
        !          3296:     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
        !          3297:     "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
        !          3298:     , p->zDb, p->zName, zName
        !          3299:     , p->zDb, p->zName, zName
        !          3300:   );
        !          3301:   if( zSql ){
        !          3302:     rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
        !          3303:     sqlite3_free(zSql);
        !          3304:   }
        !          3305:   return rc;
        !          3306: }
        !          3307: 
        !          3308: static const sqlite3_module fulltextModule = {
        !          3309:   /* iVersion      */ 0,
        !          3310:   /* xCreate       */ fulltextCreate,
        !          3311:   /* xConnect      */ fulltextConnect,
        !          3312:   /* xBestIndex    */ fulltextBestIndex,
        !          3313:   /* xDisconnect   */ fulltextDisconnect,
        !          3314:   /* xDestroy      */ fulltextDestroy,
        !          3315:   /* xOpen         */ fulltextOpen,
        !          3316:   /* xClose        */ fulltextClose,
        !          3317:   /* xFilter       */ fulltextFilter,
        !          3318:   /* xNext         */ fulltextNext,
        !          3319:   /* xEof          */ fulltextEof,
        !          3320:   /* xColumn       */ fulltextColumn,
        !          3321:   /* xRowid        */ fulltextRowid,
        !          3322:   /* xUpdate       */ fulltextUpdate,
        !          3323:   /* xBegin        */ 0, 
        !          3324:   /* xSync         */ 0,
        !          3325:   /* xCommit       */ 0,
        !          3326:   /* xRollback     */ 0,
        !          3327:   /* xFindFunction */ fulltextFindFunction,
        !          3328:   /* xRename       */ fulltextRename,
        !          3329: };
        !          3330: 
        !          3331: int sqlite3Fts1Init(sqlite3 *db){
        !          3332:   sqlite3_overload_function(db, "snippet", -1);
        !          3333:   sqlite3_overload_function(db, "offsets", -1);
        !          3334:   return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
        !          3335: }
        !          3336: 
        !          3337: #if !SQLITE_CORE
        !          3338: int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
        !          3339:                            const sqlite3_api_routines *pApi){
        !          3340:   SQLITE_EXTENSION_INIT2(pApi)
        !          3341:   return sqlite3Fts1Init(db);
        !          3342: }
        !          3343: #endif
        !          3344: 
        !          3345: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>