Annotation of embedaddon/sqlite3/ext/fts1/fulltext.c, revision 1.1

1.1     ! misho       1: /* The author disclaims copyright to this source code.
        !             2:  *
        !             3:  * This is an SQLite module implementing full-text search.
        !             4:  */
        !             5: 
        !             6: #include <assert.h>
        !             7: #if !defined(__APPLE__)
        !             8: #include <malloc.h>
        !             9: #else
        !            10: #include <stdlib.h>
        !            11: #endif
        !            12: #include <stdio.h>
        !            13: #include <string.h>
        !            14: #include <ctype.h>
        !            15: 
        !            16: #include "fulltext.h"
        !            17: #include "ft_hash.h"
        !            18: #include "tokenizer.h"
        !            19: #include "sqlite3.h"
        !            20: #include "sqlite3ext.h"
        !            21: SQLITE_EXTENSION_INIT1
        !            22: 
        !            23: /* utility functions */
        !            24: 
        !            25: /* We encode variable-length integers in little-endian order using seven bits
        !            26:  * per byte as follows:
        !            27: **
        !            28: ** KEY:
        !            29: **         A = 0xxxxxxx    7 bits of data and one flag bit
        !            30: **         B = 1xxxxxxx    7 bits of data and one flag bit
        !            31: **
        !            32: **  7 bits - A
        !            33: ** 14 bits - BA
        !            34: ** 21 bits - BBA
        !            35: ** and so on.
        !            36: */
        !            37: 
        !            38: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
        !            39: #define VARINT_MAX 10
        !            40: 
        !            41: /* Write a 64-bit variable-length integer to memory starting at p[0].
        !            42:  * The length of data written will be between 1 and VARINT_MAX bytes.
        !            43:  * The number of bytes written is returned. */
        !            44: static int putVarint(char *p, sqlite_int64 v){
        !            45:   unsigned char *q = (unsigned char *) p;
        !            46:   sqlite_uint64 vu = v;
        !            47:   do{
        !            48:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
        !            49:     vu >>= 7;
        !            50:   }while( vu!=0 );
        !            51:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
        !            52:   assert( q - (unsigned char *)p <= VARINT_MAX );
        !            53:   return (int) (q - (unsigned char *)p);
        !            54: }
        !            55: 
        !            56: /* Read a 64-bit variable-length integer from memory starting at p[0].
        !            57:  * Return the number of bytes read, or 0 on error.
        !            58:  * The value is stored in *v. */
        !            59: static int getVarint(const char *p, sqlite_int64 *v){
        !            60:   const unsigned char *q = (const unsigned char *) p;
        !            61:   sqlite_uint64 x = 0, y = 1;
        !            62:   while( (*q & 0x80) == 0x80 ){
        !            63:     x += y * (*q++ & 0x7f);
        !            64:     y <<= 7;
        !            65:     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
        !            66:       assert( 0 );
        !            67:       return 0;
        !            68:     }
        !            69:   }
        !            70:   x += y * (*q++);
        !            71:   *v = (sqlite_int64) x;
        !            72:   return (int) (q - (unsigned char *)p);
        !            73: }
        !            74: 
        !            75: static int getVarint32(const char *p, int *pi){
        !            76:  sqlite_int64 i;
        !            77:  int ret = getVarint(p, &i);
        !            78:  *pi = (int) i;
        !            79:  assert( *pi==i );
        !            80:  return ret;
        !            81: }
        !            82: 
        !            83: /*** Document lists ***
        !            84:  *
        !            85:  * A document list holds a sorted list of varint-encoded document IDs.
        !            86:  *
        !            87:  * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
        !            88:  *
        !            89:  * array {
        !            90:  *   varint docid;
        !            91:  *   array {
        !            92:  *     varint position;     (delta from previous position plus 1, or 0 for end)
        !            93:  *     varint startOffset;  (delta from previous startOffset)
        !            94:  *     varint endOffset;    (delta from startOffset)
        !            95:  *   }
        !            96:  * }
        !            97:  *
        !            98:  * Here, array { X } means zero or more occurrences of X, adjacent in memory.
        !            99:  *
        !           100:  * A doclist with type DL_POSITIONS is like the above, but holds only docids
        !           101:  * and positions without offset information.
        !           102:  *
        !           103:  * A doclist with type DL_DOCIDS is like the above, but holds only docids
        !           104:  * without positions or offset information.
        !           105:  *
        !           106:  * On disk, every document list has positions and offsets, so we don't bother
        !           107:  * to serialize a doclist's type.
        !           108:  * 
        !           109:  * We don't yet delta-encode document IDs; doing so will probably be a
        !           110:  * modest win.
        !           111:  *
        !           112:  * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
        !           113:  * After the first offset, estimate the next offset by using the
        !           114:  * current token position and the previous token position and offset,
        !           115:  * offset to handle some variance.  So the estimate would be
        !           116:  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
        !           117:  * as normal.  Offsets more than 64 chars from the estimate are
        !           118:  * encoded as the delta to the previous start offset + 128.  An
        !           119:  * additional tiny increment can be gained by using the end offset of
        !           120:  * the previous token to make the estimate a tiny bit more precise.
        !           121: */
        !           122: 
        !           123: typedef enum DocListType {
        !           124:   DL_DOCIDS,              /* docids only */
        !           125:   DL_POSITIONS,           /* docids + positions */
        !           126:   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
        !           127: } DocListType;
        !           128: 
        !           129: typedef struct DocList {
        !           130:   char *pData;
        !           131:   int nData;
        !           132:   DocListType iType;
        !           133:   int iLastPos;       /* the last position written */
        !           134:   int iLastOffset;    /* the last start offset written */
        !           135: } DocList;
        !           136: 
        !           137: /* Initialize a new DocList to hold the given data. */
        !           138: static void docListInit(DocList *d, DocListType iType,
        !           139:                         const char *pData, int nData){
        !           140:   d->nData = nData;
        !           141:   if( nData>0 ){
        !           142:     d->pData = malloc(nData);
        !           143:     memcpy(d->pData, pData, nData);
        !           144:   } else {
        !           145:     d->pData = NULL;
        !           146:   }
        !           147:   d->iType = iType;
        !           148:   d->iLastPos = 0;
        !           149:   d->iLastOffset = 0;
        !           150: }
        !           151: 
        !           152: /* Create a new dynamically-allocated DocList. */
        !           153: static DocList *docListNew(DocListType iType){
        !           154:   DocList *d = (DocList *) malloc(sizeof(DocList));
        !           155:   docListInit(d, iType, 0, 0);
        !           156:   return d;
        !           157: }
        !           158: 
        !           159: static void docListDestroy(DocList *d){
        !           160:   free(d->pData);
        !           161: #ifndef NDEBUG
        !           162:   memset(d, 0x55, sizeof(*d));
        !           163: #endif
        !           164: }
        !           165: 
        !           166: static void docListDelete(DocList *d){
        !           167:   docListDestroy(d);
        !           168:   free(d);
        !           169: }
        !           170: 
        !           171: static char *docListEnd(DocList *d){
        !           172:   return d->pData + d->nData;
        !           173: }
        !           174: 
        !           175: /* Append a varint to a DocList's data. */
        !           176: static void appendVarint(DocList *d, sqlite_int64 i){
        !           177:   char c[VARINT_MAX];
        !           178:   int n = putVarint(c, i);
        !           179:   d->pData = realloc(d->pData, d->nData + n);
        !           180:   memcpy(d->pData + d->nData, c, n);
        !           181:   d->nData += n;
        !           182: }
        !           183: 
        !           184: static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
        !           185:   appendVarint(d, iDocid);
        !           186:   d->iLastPos = 0;
        !           187: }
        !           188: 
        !           189: /* Add a position to the last position list in a doclist. */
        !           190: static void docListAddPos(DocList *d, int iPos){
        !           191:   assert( d->iType>=DL_POSITIONS );
        !           192:   appendVarint(d, iPos-d->iLastPos+1);
        !           193:   d->iLastPos = iPos;
        !           194: }
        !           195: 
        !           196: static void docListAddPosOffset(DocList *d, int iPos,
        !           197:                                 int iStartOffset, int iEndOffset){
        !           198:   assert( d->iType==DL_POSITIONS_OFFSETS );
        !           199:   docListAddPos(d, iPos);
        !           200:   appendVarint(d, iStartOffset-d->iLastOffset);
        !           201:   d->iLastOffset = iStartOffset;
        !           202:   appendVarint(d, iEndOffset-iStartOffset);
        !           203: }
        !           204: 
        !           205: /* Terminate the last position list in the given doclist. */
        !           206: static void docListAddEndPos(DocList *d){
        !           207:   appendVarint(d, 0);
        !           208: }
        !           209: 
        !           210: typedef struct DocListReader {
        !           211:   DocList *pDoclist;
        !           212:   char *p;
        !           213:   int iLastPos;    /* the last position read */
        !           214: } DocListReader;
        !           215: 
        !           216: static void readerInit(DocListReader *r, DocList *pDoclist){
        !           217:   r->pDoclist = pDoclist;
        !           218:   if( pDoclist!=NULL ){
        !           219:     r->p = pDoclist->pData;
        !           220:   }
        !           221:   r->iLastPos = 0;
        !           222: }
        !           223: 
        !           224: static int readerAtEnd(DocListReader *pReader){
        !           225:   return pReader->p >= docListEnd(pReader->pDoclist);
        !           226: }
        !           227: 
        !           228: /* Peek at the next docid without advancing the read pointer. */
        !           229: static sqlite_int64 peekDocid(DocListReader *pReader){
        !           230:   sqlite_int64 ret;
        !           231:   assert( !readerAtEnd(pReader) );
        !           232:   getVarint(pReader->p, &ret);
        !           233:   return ret;
        !           234: }
        !           235: 
        !           236: /* Read the next docid. */
        !           237: static sqlite_int64 readDocid(DocListReader *pReader){
        !           238:   sqlite_int64 ret;
        !           239:   assert( !readerAtEnd(pReader) );
        !           240:   pReader->p += getVarint(pReader->p, &ret);
        !           241:   pReader->iLastPos = 0;
        !           242:   return ret;
        !           243: }
        !           244: 
        !           245: /* Read the next position from a position list.
        !           246:  * Returns the position, or -1 at the end of the list. */
        !           247: static int readPosition(DocListReader *pReader){
        !           248:   int i;
        !           249:   int iType = pReader->pDoclist->iType;
        !           250:   assert( iType>=DL_POSITIONS );
        !           251:   assert( !readerAtEnd(pReader) );
        !           252: 
        !           253:   pReader->p += getVarint32(pReader->p, &i);
        !           254:   if( i==0 ){
        !           255:     pReader->iLastPos = -1;
        !           256:     return -1;
        !           257:   }
        !           258:   pReader->iLastPos += ((int) i)-1;
        !           259:   if( iType>=DL_POSITIONS_OFFSETS ){
        !           260:     /* Skip over offsets, ignoring them for now. */
        !           261:     int iStart, iEnd;
        !           262:     pReader->p += getVarint32(pReader->p, &iStart);
        !           263:     pReader->p += getVarint32(pReader->p, &iEnd);
        !           264:   }
        !           265:   return pReader->iLastPos;
        !           266: }
        !           267: 
        !           268: /* Skip past the end of a position list. */
        !           269: static void skipPositionList(DocListReader *pReader){
        !           270:   while( readPosition(pReader)!=-1 )
        !           271:     ;
        !           272: }
        !           273: 
        !           274: /* Skip over a docid, including its position list if the doclist has
        !           275:  * positions. */
        !           276: static void skipDocument(DocListReader *pReader){
        !           277:   readDocid(pReader);
        !           278:   if( pReader->pDoclist->iType >= DL_POSITIONS ){
        !           279:     skipPositionList(pReader);
        !           280:   }
        !           281: }
        !           282: 
        !           283: static sqlite_int64 firstDocid(DocList *d){
        !           284:   DocListReader r;
        !           285:   readerInit(&r, d);
        !           286:   return readDocid(&r);
        !           287: }
        !           288: 
        !           289: /* Doclist multi-tool.  Pass pUpdate==NULL to delete the indicated docid;
        !           290:  * otherwise pUpdate, which must contain only the single docid [iDocid], is
        !           291:  * inserted (if not present) or updated (if already present). */
        !           292: static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){
        !           293:   int modified = 0;
        !           294:   DocListReader reader;
        !           295:   char *p;
        !           296: 
        !           297:   if( pUpdate!=NULL ){
        !           298:     assert( d->iType==pUpdate->iType);
        !           299:     assert( iDocid==firstDocid(pUpdate) );
        !           300:   }
        !           301: 
        !           302:   readerInit(&reader, d);
        !           303:   while( !readerAtEnd(&reader) && peekDocid(&reader)<iDocid ){
        !           304:     skipDocument(&reader);
        !           305:   }
        !           306: 
        !           307:   p = reader.p;
        !           308:   /* Delete if there is a matching element. */
        !           309:   if( !readerAtEnd(&reader) && iDocid==peekDocid(&reader) ){
        !           310:     skipDocument(&reader);
        !           311:     memmove(p, reader.p, docListEnd(d) - reader.p);
        !           312:     d->nData -= (reader.p - p);
        !           313:     modified = 1;
        !           314:   }
        !           315: 
        !           316:   /* Insert if indicated. */
        !           317:   if( pUpdate!=NULL ){
        !           318:     int iDoclist = p-d->pData;
        !           319:     docListAddEndPos(pUpdate);
        !           320: 
        !           321:     d->pData = realloc(d->pData, d->nData+pUpdate->nData);
        !           322:     p = d->pData + iDoclist;
        !           323: 
        !           324:     memmove(p+pUpdate->nData, p, docListEnd(d) - p);
        !           325:     memcpy(p, pUpdate->pData, pUpdate->nData);
        !           326:     d->nData += pUpdate->nData;
        !           327:     modified = 1;
        !           328:   }
        !           329: 
        !           330:   return modified;
        !           331: }
        !           332: 
        !           333: /* Split the second half of doclist d into a separate doclist d2.  Returns 1
        !           334:  * if successful, or 0 if d contains a single document and hence can't be
        !           335:  * split. */
        !           336: static int docListSplit(DocList *d, DocList *d2){
        !           337:   const char *pSplitPoint = d->pData + d->nData / 2;
        !           338:   DocListReader reader;
        !           339: 
        !           340:   readerInit(&reader, d);
        !           341:   while( reader.p<pSplitPoint ){
        !           342:     skipDocument(&reader);
        !           343:   }
        !           344:   if( readerAtEnd(&reader) ) return 0;
        !           345:   docListInit(d2, d->iType, reader.p, docListEnd(d) - reader.p);
        !           346:   d->nData = reader.p - d->pData;
        !           347:   d->pData = realloc(d->pData, d->nData);
        !           348:   return 1;
        !           349: }
        !           350: 
        !           351: /* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked
        !           352:  * on-disk doclist, resulting in another in-memory DocList [out].  [in]
        !           353:  * and [out] may or may not store position information according to the
        !           354:  * caller's wishes.  The on-disk doclist always comes with positions.
        !           355:  *
        !           356:  * The caller must read each chunk of the on-disk doclist in succession and
        !           357:  * pass it to mergeBlock().
        !           358:  *
        !           359:  * If [in] has positions, then the merge output contains only documents with
        !           360:  * matching positions in the two input doclists.  If [in] does not have
        !           361:  * positions, then the merge output contains all documents common to the two
        !           362:  * input doclists.
        !           363:  *
        !           364:  * If [in] is NULL, then the on-disk doclist is copied to [out] directly.
        !           365:  *
        !           366:  * A merge is performed using an integer [iOffset] provided by the caller.
        !           367:  * [iOffset] is subtracted from each position in the on-disk doclist for the
        !           368:  * purpose of position comparison; this is helpful in implementing phrase
        !           369:  * searches.
        !           370:  *
        !           371:  * A DocListMerge is not yet able to propagate offsets through query
        !           372:  * processing; we should add that capability soon.
        !           373: */
        !           374: typedef struct DocListMerge {
        !           375:   DocListReader in;
        !           376:   DocList *pOut;
        !           377:   int iOffset;
        !           378: } DocListMerge;
        !           379: 
        !           380: static void mergeInit(DocListMerge *m,
        !           381:                       DocList *pIn, int iOffset, DocList *pOut){
        !           382:   readerInit(&m->in, pIn);
        !           383:   m->pOut = pOut;
        !           384:   m->iOffset = iOffset;
        !           385: 
        !           386:   /* can't handle offsets yet */
        !           387:   assert( pIn==NULL || pIn->iType <= DL_POSITIONS );
        !           388:   assert( pOut->iType <= DL_POSITIONS );
        !           389: }
        !           390: 
        !           391: /* A helper function for mergeBlock(), below.  Merge the position lists
        !           392:  * pointed to by m->in and pBlockReader.
        !           393:  * If the merge matches, write [iDocid] to m->pOut; if m->pOut
        !           394:  * has positions then write all matching positions as well. */
        !           395: static void mergePosList(DocListMerge *m, sqlite_int64 iDocid,
        !           396:                   DocListReader *pBlockReader){
        !           397:   int block_pos = readPosition(pBlockReader);
        !           398:   int in_pos = readPosition(&m->in);
        !           399:   int match = 0;
        !           400:   while( block_pos!=-1 || in_pos!=-1 ){
        !           401:     if( block_pos-m->iOffset==in_pos ){
        !           402:       if( !match ){
        !           403:         docListAddDocid(m->pOut, iDocid);
        !           404:         match = 1;
        !           405:       }
        !           406:       if( m->pOut->iType >= DL_POSITIONS ){
        !           407:         docListAddPos(m->pOut, in_pos);
        !           408:       }
        !           409:       block_pos = readPosition(pBlockReader);
        !           410:       in_pos = readPosition(&m->in);
        !           411:     } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffset<in_pos) ){
        !           412:       block_pos = readPosition(pBlockReader);
        !           413:     } else {
        !           414:       in_pos = readPosition(&m->in);
        !           415:     }
        !           416:   }
        !           417:   if( m->pOut->iType >= DL_POSITIONS && match ){
        !           418:     docListAddEndPos(m->pOut);
        !           419:   }
        !           420: }
        !           421: 
        !           422: /* Merge one block of an on-disk doclist into a DocListMerge. */
        !           423: static void mergeBlock(DocListMerge *m, DocList *pBlock){
        !           424:   DocListReader blockReader;
        !           425:   assert( pBlock->iType >= DL_POSITIONS );
        !           426:   readerInit(&blockReader, pBlock);
        !           427:   while( !readerAtEnd(&blockReader) ){
        !           428:     sqlite_int64 iDocid = readDocid(&blockReader);
        !           429:     if( m->in.pDoclist!=NULL ){
        !           430:       while( 1 ){
        !           431:         if( readerAtEnd(&m->in) ) return;  /* nothing more to merge */
        !           432:         if( peekDocid(&m->in)>=iDocid ) break;
        !           433:         skipDocument(&m->in);
        !           434:       }
        !           435:       if( peekDocid(&m->in)>iDocid ){  /* [pIn] has no match with iDocid */
        !           436:         skipPositionList(&blockReader);  /* skip this docid in the block */
        !           437:         continue;
        !           438:       }
        !           439:       readDocid(&m->in);
        !           440:     }
        !           441:     /* We have a document match. */
        !           442:     if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){
        !           443:       /* We don't need to do a poslist merge. */
        !           444:       docListAddDocid(m->pOut, iDocid);
        !           445:       if( m->pOut->iType >= DL_POSITIONS ){
        !           446:         /* Copy all positions to the output doclist. */
        !           447:         while( 1 ){
        !           448:           int pos = readPosition(&blockReader);
        !           449:           if( pos==-1 ) break;
        !           450:           docListAddPos(m->pOut, pos);
        !           451:         }
        !           452:         docListAddEndPos(m->pOut);
        !           453:       } else skipPositionList(&blockReader);
        !           454:       continue;
        !           455:     }
        !           456:     mergePosList(m, iDocid, &blockReader);
        !           457:   }
        !           458: }
        !           459: 
        !           460: static char *string_dup_n(const char *s, int n){
        !           461:   char *str = malloc(n + 1);
        !           462:   memcpy(str, s, n);
        !           463:   str[n] = '\0';
        !           464:   return str;
        !           465: }
        !           466: 
        !           467: /* Duplicate a string; the caller must free() the returned string.
        !           468:  * (We don't use strdup() since it's not part of the standard C library and
        !           469:  * may not be available everywhere.) */
        !           470: static char *string_dup(const char *s){
        !           471:   return string_dup_n(s, strlen(s));
        !           472: }
        !           473: 
        !           474: /* Format a string, replacing each occurrence of the % character with
        !           475:  * zName.  This may be more convenient than sqlite_mprintf()
        !           476:  * when one string is used repeatedly in a format string.
        !           477:  * The caller must free() the returned string. */
        !           478: static char *string_format(const char *zFormat, const char *zName){
        !           479:   const char *p;
        !           480:   size_t len = 0;
        !           481:   size_t nName = strlen(zName);
        !           482:   char *result;
        !           483:   char *r;
        !           484: 
        !           485:   /* first compute length needed */
        !           486:   for(p = zFormat ; *p ; ++p){
        !           487:     len += (*p=='%' ? nName : 1);
        !           488:   }
        !           489:   len += 1;  /* for null terminator */
        !           490: 
        !           491:   r = result = malloc(len);
        !           492:   for(p = zFormat; *p; ++p){
        !           493:     if( *p=='%' ){
        !           494:       memcpy(r, zName, nName);
        !           495:       r += nName;
        !           496:     } else {
        !           497:       *r++ = *p;
        !           498:     }
        !           499:   }
        !           500:   *r++ = '\0';
        !           501:   assert( r == result + len );
        !           502:   return result;
        !           503: }
        !           504: 
        !           505: static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){
        !           506:   char *zCommand = string_format(zFormat, zName);
        !           507:   int rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
        !           508:   free(zCommand);
        !           509:   return rc;
        !           510: }
        !           511: 
        !           512: static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt,
        !           513:                 const char *zFormat){
        !           514:   char *zCommand = string_format(zFormat, zName);
        !           515:   int rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
        !           516:   free(zCommand);
        !           517:   return rc;
        !           518: }
        !           519: 
        !           520: /* end utility functions */
        !           521: 
        !           522: #define QUERY_GENERIC 0
        !           523: #define QUERY_FULLTEXT 1
        !           524: 
        !           525: #define CHUNK_MAX 1024
        !           526: 
        !           527: typedef enum fulltext_statement {
        !           528:   CONTENT_INSERT_STMT,
        !           529:   CONTENT_SELECT_STMT,
        !           530:   CONTENT_DELETE_STMT,
        !           531: 
        !           532:   TERM_SELECT_STMT,
        !           533:   TERM_CHUNK_SELECT_STMT,
        !           534:   TERM_INSERT_STMT,
        !           535:   TERM_UPDATE_STMT,
        !           536:   TERM_DELETE_STMT,
        !           537: 
        !           538:   MAX_STMT                     /* Always at end! */
        !           539: } fulltext_statement;
        !           540: 
        !           541: /* These must exactly match the enum above. */
        !           542: /* TODO(adam): Is there some risk that a statement (in particular,
        !           543: ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
        !           544: ** query joins a virtual table to itself?  If so perhaps we should
        !           545: ** move some of these to the cursor object.
        !           546: */
        !           547: static const char *fulltext_zStatement[MAX_STMT] = {
        !           548:   /* CONTENT_INSERT */ "insert into %_content (rowid, content) values (?, ?)",
        !           549:   /* CONTENT_SELECT */ "select content from %_content where rowid = ?",
        !           550:   /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
        !           551: 
        !           552:   /* TERM_SELECT */
        !           553:   "select rowid, doclist from %_term where term = ? and first = ?",
        !           554:   /* TERM_CHUNK_SELECT */
        !           555:   "select max(first) from %_term where term = ? and first <= ?",
        !           556:   /* TERM_INSERT */
        !           557:   "insert into %_term (term, first, doclist) values (?, ?, ?)",
        !           558:   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
        !           559:   /* TERM_DELETE */ "delete from %_term where rowid = ?",
        !           560: };
        !           561: 
        !           562: typedef struct fulltext_vtab {
        !           563:   sqlite3_vtab base;
        !           564:   sqlite3 *db;
        !           565:   const char *zName;               /* virtual table name */
        !           566:   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
        !           567: 
        !           568:   /* Precompiled statements which we keep as long as the table is
        !           569:   ** open.
        !           570:   */
        !           571:   sqlite3_stmt *pFulltextStatements[MAX_STMT];
        !           572: } fulltext_vtab;
        !           573: 
        !           574: typedef struct fulltext_cursor {
        !           575:   sqlite3_vtab_cursor base;
        !           576:   int iCursorType;  /* QUERY_GENERIC or QUERY_FULLTEXT */
        !           577: 
        !           578:   sqlite3_stmt *pStmt;
        !           579: 
        !           580:   int eof;
        !           581: 
        !           582:   /* The following is used only when iCursorType == QUERY_FULLTEXT. */
        !           583:   DocListReader result;
        !           584: } fulltext_cursor;
        !           585: 
        !           586: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
        !           587:   return (fulltext_vtab *) c->base.pVtab;
        !           588: }
        !           589: 
        !           590: static sqlite3_module fulltextModule;   /* forward declaration */
        !           591: 
        !           592: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
        !           593: ** If the indicated statement has never been prepared, it is prepared
        !           594: ** and cached, otherwise the cached version is reset.
        !           595: */
        !           596: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
        !           597:                              sqlite3_stmt **ppStmt){
        !           598:   assert( iStmt<MAX_STMT );
        !           599:   if( v->pFulltextStatements[iStmt]==NULL ){
        !           600:     int rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt],
        !           601:                          fulltext_zStatement[iStmt]);
        !           602:     if( rc!=SQLITE_OK ) return rc;
        !           603:   } else {
        !           604:     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
        !           605:     if( rc!=SQLITE_OK ) return rc;
        !           606:   }
        !           607: 
        !           608:   *ppStmt = v->pFulltextStatements[iStmt];
        !           609:   return SQLITE_OK;
        !           610: }
        !           611: 
        !           612: /* Step the indicated statement, handling errors SQLITE_BUSY (by
        !           613: ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
        !           614: ** bindings to the new statement).
        !           615: ** TODO(adam): We should extend this function so that it can work with
        !           616: ** statements declared locally, not only globally cached statements.
        !           617: */
        !           618: static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
        !           619:                               sqlite3_stmt **ppStmt){
        !           620:   int rc;
        !           621:   sqlite3_stmt *s = *ppStmt;
        !           622:   assert( iStmt<MAX_STMT );
        !           623:   assert( s==v->pFulltextStatements[iStmt] );
        !           624: 
        !           625:   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
        !           626:     sqlite3_stmt *pNewStmt;
        !           627: 
        !           628:     if( rc==SQLITE_BUSY ) continue;
        !           629:     if( rc!=SQLITE_ERROR ) return rc;
        !           630: 
        !           631:     rc = sqlite3_reset(s);
        !           632:     if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR;
        !           633: 
        !           634:     v->pFulltextStatements[iStmt] = NULL;   /* Still in s */
        !           635:     rc = sql_get_statement(v, iStmt, &pNewStmt);
        !           636:     if( rc!=SQLITE_OK ) goto err;
        !           637:     *ppStmt = pNewStmt;
        !           638: 
        !           639:     rc = sqlite3_transfer_bindings(s, pNewStmt);
        !           640:     if( rc!=SQLITE_OK ) goto err;
        !           641: 
        !           642:     rc = sqlite3_finalize(s);
        !           643:     if( rc!=SQLITE_OK ) return rc;
        !           644:     s = pNewStmt;
        !           645:   }
        !           646:   return rc;
        !           647: 
        !           648:  err:
        !           649:   sqlite3_finalize(s);
        !           650:   return rc;
        !           651: }
        !           652: 
        !           653: /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
        !           654: ** Useful for statements like UPDATE, where we expect no results.
        !           655: */
        !           656: static int sql_single_step_statement(fulltext_vtab *v,
        !           657:                                      fulltext_statement iStmt,
        !           658:                                      sqlite3_stmt **ppStmt){
        !           659:   int rc = sql_step_statement(v, iStmt, ppStmt);
        !           660:   return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
        !           661: }
        !           662: 
        !           663: /* insert into %_content (rowid, content) values ([rowid], [zContent]) */
        !           664: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
        !           665:                           const char *zContent, int nContent){
        !           666:   sqlite3_stmt *s;
        !           667:   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
        !           668:   if( rc!=SQLITE_OK ) return rc;
        !           669: 
        !           670:   rc = sqlite3_bind_value(s, 1, rowid);
        !           671:   if( rc!=SQLITE_OK ) return rc;
        !           672: 
        !           673:   rc = sqlite3_bind_text(s, 2, zContent, nContent, SQLITE_STATIC);
        !           674:   if( rc!=SQLITE_OK ) return rc;
        !           675: 
        !           676:   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
        !           677: }
        !           678: 
        !           679: /* select content from %_content where rowid = [iRow]
        !           680:  * The caller must delete the returned string. */
        !           681: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
        !           682:                           char **pzContent){
        !           683:   sqlite3_stmt *s;
        !           684:   int rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
        !           685:   if( rc!=SQLITE_OK ) return rc;
        !           686: 
        !           687:   rc = sqlite3_bind_int64(s, 1, iRow);
        !           688:   if( rc!=SQLITE_OK ) return rc;
        !           689: 
        !           690:   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
        !           691:   if( rc!=SQLITE_ROW ) return rc;
        !           692: 
        !           693:   *pzContent = string_dup((const char *)sqlite3_column_text(s, 0));
        !           694: 
        !           695:   /* We expect only one row.  We must execute another sqlite3_step()
        !           696:    * to complete the iteration; otherwise the table will remain locked. */
        !           697:   rc = sqlite3_step(s);
        !           698:   if( rc==SQLITE_DONE ) return SQLITE_OK;
        !           699: 
        !           700:   free(*pzContent);
        !           701:   return rc;
        !           702: }
        !           703: 
        !           704: /* delete from %_content where rowid = [iRow ] */
        !           705: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
        !           706:   sqlite3_stmt *s;
        !           707:   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
        !           708:   if( rc!=SQLITE_OK ) return rc;
        !           709: 
        !           710:   rc = sqlite3_bind_int64(s, 1, iRow);
        !           711:   if( rc!=SQLITE_OK ) return rc;
        !           712: 
        !           713:   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
        !           714: }
        !           715: 
        !           716: /* select rowid, doclist from %_term where term = [zTerm] and first = [iFirst]
        !           717:  * If found, returns SQLITE_OK; the caller must free the returned doclist.
        !           718:  * If no rows found, returns SQLITE_ERROR. */
        !           719: static int term_select(fulltext_vtab *v, const char *zTerm, int nTerm,
        !           720:                        sqlite_int64 iFirst,
        !           721:                        sqlite_int64 *rowid,
        !           722:                        DocList *out){
        !           723:   sqlite3_stmt *s;
        !           724:   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
        !           725:   if( rc!=SQLITE_OK ) return rc;
        !           726: 
        !           727:   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_TRANSIENT);
        !           728:   if( rc!=SQLITE_OK ) return rc;
        !           729: 
        !           730:   rc = sqlite3_bind_int64(s, 2, iFirst);
        !           731:   if( rc!=SQLITE_OK ) return rc;
        !           732: 
        !           733:   rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
        !           734:   if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
        !           735: 
        !           736:   *rowid = sqlite3_column_int64(s, 0);
        !           737:   docListInit(out, DL_POSITIONS_OFFSETS,
        !           738:               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
        !           739: 
        !           740:   /* We expect only one row.  We must execute another sqlite3_step()
        !           741:    * to complete the iteration; otherwise the table will remain locked. */
        !           742:   rc = sqlite3_step(s);
        !           743:   return rc==SQLITE_DONE ? SQLITE_OK : rc;
        !           744: }
        !           745: 
        !           746: /* select max(first) from %_term where term = [zTerm] and first <= [iFirst]
        !           747:  * If found, returns SQLITE_ROW and result in *piResult; if the query returns
        !           748:  * NULL (meaning no row found) returns SQLITE_DONE.
        !           749:  */
        !           750: static int term_chunk_select(fulltext_vtab *v, const char *zTerm, int nTerm,
        !           751:                            sqlite_int64 iFirst, sqlite_int64 *piResult){
        !           752:   sqlite3_stmt *s;
        !           753:   int rc = sql_get_statement(v, TERM_CHUNK_SELECT_STMT, &s);
        !           754:   if( rc!=SQLITE_OK ) return rc;
        !           755: 
        !           756:   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC);
        !           757:   if( rc!=SQLITE_OK ) return rc;
        !           758: 
        !           759:   rc = sqlite3_bind_int64(s, 2, iFirst);
        !           760:   if( rc!=SQLITE_OK ) return rc;
        !           761: 
        !           762:   rc = sql_step_statement(v, TERM_CHUNK_SELECT_STMT, &s);
        !           763:   if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
        !           764: 
        !           765:   switch( sqlite3_column_type(s, 0) ){
        !           766:     case SQLITE_NULL:
        !           767:       rc = SQLITE_DONE;
        !           768:       break;
        !           769:     case SQLITE_INTEGER:
        !           770:      *piResult = sqlite3_column_int64(s, 0);
        !           771:      break;
        !           772:     default:
        !           773:       return SQLITE_ERROR;
        !           774:   }
        !           775:   /* We expect only one row.  We must execute another sqlite3_step()
        !           776:    * to complete the iteration; otherwise the table will remain locked. */
        !           777:   if( sqlite3_step(s) != SQLITE_DONE ) return SQLITE_ERROR;
        !           778:   return rc;
        !           779: }
        !           780: 
        !           781: /* insert into %_term (term, first, doclist)
        !           782:                values ([zTerm], [iFirst], [doclist]) */
        !           783: static int term_insert(fulltext_vtab *v, const char *zTerm, int nTerm,
        !           784:                        sqlite_int64 iFirst, DocList *doclist){
        !           785:   sqlite3_stmt *s;
        !           786:   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
        !           787:   if( rc!=SQLITE_OK ) return rc;
        !           788: 
        !           789:   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC);
        !           790:   if( rc!=SQLITE_OK ) return rc;
        !           791: 
        !           792:   rc = sqlite3_bind_int64(s, 2, iFirst);
        !           793:   if( rc!=SQLITE_OK ) return rc;
        !           794: 
        !           795:   rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC);
        !           796:   if( rc!=SQLITE_OK ) return rc;
        !           797: 
        !           798:   return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
        !           799: }
        !           800: 
        !           801: /* update %_term set doclist = [doclist] where rowid = [rowid] */
        !           802: static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
        !           803:                        DocList *doclist){
        !           804:   sqlite3_stmt *s;
        !           805:   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
        !           806:   if( rc!=SQLITE_OK ) return rc;
        !           807: 
        !           808:   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData,
        !           809:                          SQLITE_STATIC);
        !           810:   if( rc!=SQLITE_OK ) return rc;
        !           811: 
        !           812:   rc = sqlite3_bind_int64(s, 2, rowid);
        !           813:   if( rc!=SQLITE_OK ) return rc;
        !           814: 
        !           815:   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
        !           816: }
        !           817: 
        !           818: static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
        !           819:   sqlite3_stmt *s;
        !           820:   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
        !           821:   if( rc!=SQLITE_OK ) return rc;
        !           822: 
        !           823:   rc = sqlite3_bind_int64(s, 1, rowid);
        !           824:   if( rc!=SQLITE_OK ) return rc;
        !           825: 
        !           826:   return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
        !           827: }
        !           828: 
        !           829: static void fulltext_vtab_destroy(fulltext_vtab *v){
        !           830:   int iStmt;
        !           831: 
        !           832:   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
        !           833:     if( v->pFulltextStatements[iStmt]!=NULL ){
        !           834:       sqlite3_finalize(v->pFulltextStatements[iStmt]);
        !           835:       v->pFulltextStatements[iStmt] = NULL;
        !           836:     }
        !           837:   }
        !           838: 
        !           839:   if( v->pTokenizer!=NULL ){
        !           840:     v->pTokenizer->pModule->xDestroy(v->pTokenizer);
        !           841:     v->pTokenizer = NULL;
        !           842:   }
        !           843: 
        !           844:   free((void *) v->zName);
        !           845:   free(v);
        !           846: }
        !           847: 
        !           848: /* Current interface:
        !           849: ** argv[0] - module name
        !           850: ** argv[1] - database name
        !           851: ** argv[2] - table name
        !           852: ** argv[3] - tokenizer name (optional, a sensible default is provided)
        !           853: ** argv[4..] - passed to tokenizer (optional based on tokenizer)
        !           854: **/
        !           855: static int fulltextConnect(sqlite3 *db, void *pAux, int argc, char **argv,
        !           856:                            sqlite3_vtab **ppVTab){
        !           857:   int rc;
        !           858:   fulltext_vtab *v;
        !           859:   sqlite3_tokenizer_module *m = NULL;
        !           860: 
        !           861:   assert( argc>=3 );
        !           862:   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
        !           863:   /* sqlite will initialize v->base */
        !           864:   v->db = db;
        !           865:   v->zName = string_dup(argv[2]);
        !           866:   v->pTokenizer = NULL;
        !           867: 
        !           868:   if( argc==3 ){
        !           869:     get_simple_tokenizer_module(&m);
        !           870:   } else {
        !           871:     /* TODO(shess) For now, add new tokenizers as else if clauses. */
        !           872:     if( !strcmp(argv[3], "simple") ){
        !           873:       get_simple_tokenizer_module(&m);
        !           874:     } else {
        !           875:       assert( "unrecognized tokenizer"==NULL );
        !           876:     }
        !           877:   }
        !           878: 
        !           879:   /* TODO(shess) Since tokenization impacts the index, the parameters
        !           880:   ** to the tokenizer need to be identical when a persistent virtual
        !           881:   ** table is re-created.  One solution would be a meta-table to track
        !           882:   ** such information in the database.  Then we could verify that the
        !           883:   ** information is identical on subsequent creates.
        !           884:   */
        !           885:   /* TODO(shess) Why isn't argv already (const char **)? */
        !           886:   rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer);
        !           887:   if( rc!=SQLITE_OK ) return rc;
        !           888:   v->pTokenizer->pModule = m;
        !           889: 
        !           890:   /* TODO: verify the existence of backing tables foo_content, foo_term */
        !           891: 
        !           892:   rc = sqlite3_declare_vtab(db, "create table x(content text)");
        !           893:   if( rc!=SQLITE_OK ) return rc;
        !           894: 
        !           895:   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
        !           896: 
        !           897:   *ppVTab = &v->base;
        !           898:   return SQLITE_OK;
        !           899: }
        !           900: 
        !           901: static int fulltextCreate(sqlite3 *db, void *pAux, int argc, char **argv,
        !           902:                           sqlite3_vtab **ppVTab){
        !           903:   int rc;
        !           904:   assert( argc>=3 );
        !           905: 
        !           906:   /* The %_content table holds the text of each full-text item, with
        !           907:   ** the rowid used as the docid.
        !           908:   **
        !           909:   ** The %_term table maps each term to a document list blob
        !           910:   ** containing elements sorted by ascending docid, each element
        !           911:   ** encoded as:
        !           912:   **
        !           913:   **   docid varint-encoded
        !           914:   **   token count varint-encoded
        !           915:   **   "count" token elements (poslist):
        !           916:   **     position varint-encoded as delta from previous position
        !           917:   **     start offset varint-encoded as delta from previous start offset
        !           918:   **     end offset varint-encoded as delta from start offset
        !           919:   **
        !           920:   ** Additionally, doclist blobs can be chunked into multiple rows,
        !           921:   ** using "first" to order the blobs.  "first" is simply the first
        !           922:   ** docid in the blob.
        !           923:   */
        !           924:   /*
        !           925:   ** NOTE(shess) That last sentence is incorrect in the face of
        !           926:   ** deletion, which can leave a doclist that doesn't contain the
        !           927:   ** first from that row.  I _believe_ this does not matter to the
        !           928:   ** operation of the system, but it might be reasonable to update
        !           929:   ** appropriately in case this assumption becomes more important.
        !           930:   */
        !           931:   rc = sql_exec(db, argv[2],
        !           932:     "create table %_content(content text);"
        !           933:     "create table %_term(term text, first integer, doclist blob);"
        !           934:     "create index %_index on %_term(term, first)");
        !           935:   if( rc!=SQLITE_OK ) return rc;
        !           936: 
        !           937:   return fulltextConnect(db, pAux, argc, argv, ppVTab);
        !           938: }
        !           939: 
        !           940: /* Decide how to handle an SQL query.
        !           941:  * At the moment, MATCH queries can include implicit boolean ANDs; we
        !           942:  * haven't implemented phrase searches or OR yet. */
        !           943: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
        !           944:   int i;
        !           945: 
        !           946:   for(i=0; i<pInfo->nConstraint; ++i){
        !           947:     const struct sqlite3_index_constraint *pConstraint;
        !           948:     pConstraint = &pInfo->aConstraint[i];
        !           949:     if( pConstraint->iColumn==0 &&
        !           950:         pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH &&
        !           951:         pConstraint->usable ){   /* a full-text search */
        !           952:       pInfo->aConstraintUsage[i].argvIndex = 1;
        !           953:       pInfo->aConstraintUsage[i].omit = 1;
        !           954:       pInfo->idxNum = QUERY_FULLTEXT;
        !           955:       pInfo->estimatedCost = 1.0;   /* an arbitrary value for now */
        !           956:       return SQLITE_OK;
        !           957:     }
        !           958:   }
        !           959:   pInfo->idxNum = QUERY_GENERIC;
        !           960:   return SQLITE_OK;
        !           961: }
        !           962: 
        !           963: static int fulltextDisconnect(sqlite3_vtab *pVTab){
        !           964:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !           965:   return SQLITE_OK;
        !           966: }
        !           967: 
        !           968: static int fulltextDestroy(sqlite3_vtab *pVTab){
        !           969:   fulltext_vtab *v = (fulltext_vtab *)pVTab;
        !           970: 
        !           971:   int rc = sql_exec(v->db, v->zName,
        !           972:                     "drop table %_content; drop table %_term");
        !           973:   if( rc!=SQLITE_OK ) return rc;
        !           974: 
        !           975:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !           976:   return SQLITE_OK;
        !           977: }
        !           978: 
        !           979: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
        !           980:   fulltext_cursor *c;
        !           981: 
        !           982:   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
        !           983:   /* sqlite will initialize c->base */
        !           984:   *ppCursor = &c->base;
        !           985: 
        !           986:   return SQLITE_OK;
        !           987: }
        !           988: 
        !           989: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
        !           990:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !           991:   sqlite3_finalize(c->pStmt);
        !           992:   if( c->result.pDoclist!=NULL ){
        !           993:     docListDelete(c->result.pDoclist);
        !           994:   }
        !           995:   free(c);
        !           996:   return SQLITE_OK;
        !           997: }
        !           998: 
        !           999: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
        !          1000:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          1001:   sqlite_int64 iDocid;
        !          1002:   int rc;
        !          1003: 
        !          1004:   switch( c->iCursorType ){
        !          1005:     case QUERY_GENERIC:
        !          1006:       /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          1007:       rc = sqlite3_step(c->pStmt);
        !          1008:       switch( rc ){
        !          1009:         case SQLITE_ROW:
        !          1010:           c->eof = 0;
        !          1011:           return SQLITE_OK;
        !          1012:         case SQLITE_DONE:
        !          1013:           c->eof = 1;
        !          1014:           return SQLITE_OK;
        !          1015:         default:
        !          1016:           c->eof = 1;
        !          1017:           return rc;
        !          1018:       }
        !          1019:     case QUERY_FULLTEXT:
        !          1020:       rc = sqlite3_reset(c->pStmt);
        !          1021:       if( rc!=SQLITE_OK ) return rc;
        !          1022: 
        !          1023:       if( readerAtEnd(&c->result)){
        !          1024:         c->eof = 1;
        !          1025:         return SQLITE_OK;
        !          1026:       }
        !          1027:       iDocid = readDocid(&c->result);
        !          1028:       rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
        !          1029:       if( rc!=SQLITE_OK ) return rc;
        !          1030:       /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          1031:       rc = sqlite3_step(c->pStmt);
        !          1032:       if( rc==SQLITE_ROW ){   /* the case we expect */
        !          1033:         c->eof = 0;
        !          1034:         return SQLITE_OK;
        !          1035:       }
        !          1036:       /* an error occurred; abort */
        !          1037:       return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
        !          1038:     default:
        !          1039:       assert( 0 );
        !          1040:       return SQLITE_ERROR;  /* not reached */
        !          1041:   }
        !          1042: }
        !          1043: 
        !          1044: static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm,
        !          1045:                                sqlite3_stmt **ppStmt){
        !          1046:   int rc;
        !          1047:   if( *ppStmt ){
        !          1048:     rc = sqlite3_reset(*ppStmt);
        !          1049:   } else {
        !          1050:     rc = sql_prepare(v->db, v->zName, ppStmt,
        !          1051:       "select doclist from %_term where term = ? order by first");
        !          1052:   }
        !          1053:   if( rc!=SQLITE_OK ) return rc;
        !          1054: 
        !          1055:   rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT);
        !          1056:   if( rc!=SQLITE_OK ) return rc;
        !          1057: 
        !          1058:   return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */
        !          1059: }
        !          1060: 
        !          1061: /* Read the posting list for [zTerm]; AND it with the doclist [in] to
        !          1062:  * produce the doclist [out], using the given offset [iOffset] for phrase
        !          1063:  * matching.
        !          1064:  * (*pSelect) is used to hold an SQLite statement used inside this function;
        !          1065:  * the caller should initialize *pSelect to NULL before the first call.
        !          1066:  */
        !          1067: static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect,
        !          1068:                        const char *zTerm,
        !          1069:                        DocList *pIn, int iOffset, DocList *out){
        !          1070:   int rc;
        !          1071:   DocListMerge merge;
        !          1072: 
        !          1073:   if( pIn!=NULL && !pIn->nData ){
        !          1074:     /* If [pIn] is already empty, there's no point in reading the
        !          1075:      * posting list to AND it in; return immediately. */
        !          1076:       return SQLITE_OK;
        !          1077:   }
        !          1078: 
        !          1079:   rc = term_select_doclist(v, zTerm, -1, pSelect);
        !          1080:   if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;
        !          1081: 
        !          1082:   mergeInit(&merge, pIn, iOffset, out);
        !          1083:   while( rc==SQLITE_ROW ){
        !          1084:     DocList block;
        !          1085:     docListInit(&block, DL_POSITIONS_OFFSETS,
        !          1086:                 sqlite3_column_blob(*pSelect, 0),
        !          1087:                 sqlite3_column_bytes(*pSelect, 0));
        !          1088:     mergeBlock(&merge, &block);
        !          1089:     docListDestroy(&block);
        !          1090: 
        !          1091:     rc = sqlite3_step(*pSelect);
        !          1092:     if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){
        !          1093:       return rc;
        !          1094:     }
        !          1095:   }
        !          1096:   
        !          1097:   return SQLITE_OK;
        !          1098: }
        !          1099: 
        !          1100: typedef struct QueryTerm {
        !          1101:   int is_phrase;    /* true if this term begins a new phrase */
        !          1102:   const char *zTerm;
        !          1103: } QueryTerm;
        !          1104: 
        !          1105: /* A parsed query.
        !          1106:  *
        !          1107:  * As an example, parsing the query ["four score" years "new nation"] will
        !          1108:  * yield a Query with 5 terms:
        !          1109:  *   "four",   is_phrase = 1
        !          1110:  *   "score",  is_phrase = 0
        !          1111:  *   "years",  is_phrase = 1
        !          1112:  *   "new",    is_phrase = 1
        !          1113:  *   "nation", is_phrase = 0
        !          1114:  */
        !          1115: typedef struct Query {
        !          1116:   int nTerms;
        !          1117:   QueryTerm *pTerm;
        !          1118: } Query;
        !          1119: 
        !          1120: static void query_add(Query *q, int is_phrase, const char *zTerm){
        !          1121:   QueryTerm *t;
        !          1122:   ++q->nTerms;
        !          1123:   q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0]));
        !          1124:   t = &q->pTerm[q->nTerms - 1];
        !          1125:   t->is_phrase = is_phrase;
        !          1126:   t->zTerm = zTerm;
        !          1127: }
        !          1128:     
        !          1129: static void query_free(Query *q){
        !          1130:   int i;
        !          1131:   for(i = 0; i < q->nTerms; ++i){
        !          1132:     free((void *) q->pTerm[i].zTerm);
        !          1133:   }
        !          1134:   free(q->pTerm);
        !          1135: }
        !          1136: 
        !          1137: static int tokenize_segment(sqlite3_tokenizer *pTokenizer,
        !          1138:                             const char *zQuery, int in_phrase,
        !          1139:                             Query *pQuery){
        !          1140:   sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
        !          1141:   sqlite3_tokenizer_cursor *pCursor;
        !          1142:   int is_first = 1;
        !          1143:   
        !          1144:   int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor);
        !          1145:   if( rc!=SQLITE_OK ) return rc;
        !          1146:   pCursor->pTokenizer = pTokenizer;
        !          1147: 
        !          1148:   while( 1 ){
        !          1149:     const char *zToken;
        !          1150:     int nToken, iStartOffset, iEndOffset, dummy_pos;
        !          1151: 
        !          1152:     rc = pModule->xNext(pCursor,
        !          1153:                         &zToken, &nToken,
        !          1154:                         &iStartOffset, &iEndOffset,
        !          1155:                         &dummy_pos);
        !          1156:     if( rc!=SQLITE_OK ) break;
        !          1157:     query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken));
        !          1158:     is_first = 0;
        !          1159:   }
        !          1160: 
        !          1161:   return pModule->xClose(pCursor);
        !          1162: }
        !          1163: 
        !          1164: /* Parse a query string, yielding a Query object. */
        !          1165: static int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){
        !          1166:   char *zQuery1 = string_dup(zQuery);
        !          1167:   int in_phrase = 0;
        !          1168:   char *s = zQuery1;
        !          1169:   pQuery->nTerms = 0;
        !          1170:   pQuery->pTerm = NULL;
        !          1171: 
        !          1172:   while( *s ){
        !          1173:     char *t = s;
        !          1174:     while( *t ){
        !          1175:       if( *t=='"' ){
        !          1176:         *t++ = '\0';
        !          1177:         break;
        !          1178:       }
        !          1179:       ++t;
        !          1180:     }
        !          1181:     if( *s ){
        !          1182:       tokenize_segment(v->pTokenizer, s, in_phrase, pQuery);
        !          1183:     }
        !          1184:     s = t;
        !          1185:     in_phrase = !in_phrase;
        !          1186:   }
        !          1187:   
        !          1188:   free(zQuery1);
        !          1189:   return SQLITE_OK;
        !          1190: }
        !          1191: 
        !          1192: /* Perform a full-text query; return a list of documents in [pResult]. */
        !          1193: static int fulltext_query(fulltext_vtab *v, const char *zQuery,
        !          1194:                           DocList **pResult){
        !          1195:   Query q;
        !          1196:   int phrase_start = -1;
        !          1197:   int i;
        !          1198:   sqlite3_stmt *pSelect = NULL;
        !          1199:   DocList *d = NULL;
        !          1200: 
        !          1201:   int rc = parse_query(v, zQuery, &q);
        !          1202:   if( rc!=SQLITE_OK ) return rc;
        !          1203: 
        !          1204:   /* Merge terms. */
        !          1205:   for(i = 0 ; i < q.nTerms ; ++i){
        !          1206:     /* In each merge step, we need to generate positions whenever we're
        !          1207:      * processing a phrase which hasn't ended yet. */
        !          1208:     int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase;
        !          1209:     DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS);
        !          1210:     if( q.pTerm[i].is_phrase ){
        !          1211:       phrase_start = i;
        !          1212:     }
        !          1213:     rc = query_merge(v, &pSelect, q.pTerm[i].zTerm, d, i - phrase_start, next);
        !          1214:     if( rc!=SQLITE_OK ) break;
        !          1215:     if( d!=NULL ){
        !          1216:       docListDelete(d);
        !          1217:     }
        !          1218:     d = next;
        !          1219:   }
        !          1220: 
        !          1221:   sqlite3_finalize(pSelect);
        !          1222:   query_free(&q);
        !          1223:   *pResult = d;
        !          1224:   return rc;
        !          1225: }
        !          1226: 
        !          1227: static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
        !          1228:                           int idxNum, const char *idxStr,
        !          1229:                           int argc, sqlite3_value **argv){
        !          1230:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          1231:   fulltext_vtab *v = cursor_vtab(c);
        !          1232:   int rc;
        !          1233:   const char *zStatement;
        !          1234: 
        !          1235:   c->iCursorType = idxNum;
        !          1236:   switch( idxNum ){
        !          1237:     case QUERY_GENERIC:
        !          1238:       zStatement = "select rowid, content from %_content";
        !          1239:       break;
        !          1240: 
        !          1241:     case QUERY_FULLTEXT:   /* full-text search */
        !          1242:     {
        !          1243:       const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
        !          1244:       DocList *pResult;
        !          1245:       assert( argc==1 );
        !          1246:       rc = fulltext_query(v, zQuery, &pResult);
        !          1247:       if( rc!=SQLITE_OK ) return rc;
        !          1248:       readerInit(&c->result, pResult);
        !          1249:       zStatement = "select rowid, content from %_content where rowid = ?";
        !          1250:       break;
        !          1251:     }
        !          1252: 
        !          1253:     default:
        !          1254:       assert( 0 );
        !          1255:   }
        !          1256: 
        !          1257:   rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement);
        !          1258:   if( rc!=SQLITE_OK ) return rc;
        !          1259: 
        !          1260:   return fulltextNext(pCursor);
        !          1261: }
        !          1262: 
        !          1263: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
        !          1264:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          1265:   return c->eof;
        !          1266: }
        !          1267: 
        !          1268: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
        !          1269:                           sqlite3_context *pContext, int idxCol){
        !          1270:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          1271:   const char *s;
        !          1272: 
        !          1273:   assert( idxCol==0 );
        !          1274:   s = (const char *) sqlite3_column_text(c->pStmt, 1);
        !          1275:   sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT);
        !          1276: 
        !          1277:   return SQLITE_OK;
        !          1278: }
        !          1279: 
        !          1280: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
        !          1281:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          1282: 
        !          1283:   *pRowid = sqlite3_column_int64(c->pStmt, 0);
        !          1284:   return SQLITE_OK;
        !          1285: }
        !          1286: 
        !          1287: /* Build a hash table containing all terms in zText. */
        !          1288: static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer,
        !          1289:                        const char *zText, sqlite_int64 iDocid){
        !          1290:   sqlite3_tokenizer_cursor *pCursor;
        !          1291:   const char *pToken;
        !          1292:   int nTokenBytes;
        !          1293:   int iStartOffset, iEndOffset, iPosition;
        !          1294: 
        !          1295:   int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
        !          1296:   if( rc!=SQLITE_OK ) return rc;
        !          1297: 
        !          1298:   pCursor->pTokenizer = pTokenizer;
        !          1299:   HashInit(terms, HASH_STRING, 1);
        !          1300:   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
        !          1301:                                                &pToken, &nTokenBytes,
        !          1302:                                                &iStartOffset, &iEndOffset,
        !          1303:                                                &iPosition) ){
        !          1304:     DocList *p;
        !          1305: 
        !          1306:     /* Positions can't be negative; we use -1 as a terminator internally. */
        !          1307:     if( iPosition<0 ) {
        !          1308:       rc = SQLITE_ERROR;  
        !          1309:       goto err;
        !          1310:     }
        !          1311: 
        !          1312:     p = HashFind(terms, pToken, nTokenBytes);
        !          1313:     if( p==NULL ){
        !          1314:       p = docListNew(DL_POSITIONS_OFFSETS);
        !          1315:       docListAddDocid(p, iDocid);
        !          1316:       HashInsert(terms, pToken, nTokenBytes, p);
        !          1317:     }
        !          1318:     docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset);
        !          1319:   }
        !          1320: 
        !          1321: err:
        !          1322:   /* TODO(shess) Check return?  Should this be able to cause errors at
        !          1323:   ** this point?  Actually, same question about sqlite3_finalize(),
        !          1324:   ** though one could argue that failure there means that the data is
        !          1325:   ** not durable.  *ponder*
        !          1326:   */
        !          1327:   pTokenizer->pModule->xClose(pCursor);
        !          1328:   return rc;
        !          1329: }
        !          1330: /* Update the %_terms table to map the term [zTerm] to the given rowid. */
        !          1331: static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm,
        !          1332:                              sqlite_int64 iDocid, DocList *p){
        !          1333:   sqlite_int64 iFirst;
        !          1334:   sqlite_int64 iIndexRow;
        !          1335:   DocList doclist;
        !          1336: 
        !          1337:   int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);
        !          1338:   if( rc==SQLITE_DONE ){
        !          1339:     docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
        !          1340:     if( docListUpdate(&doclist, iDocid, p) ){
        !          1341:       rc = term_insert(v, zTerm, nTerm, iDocid, &doclist);
        !          1342:       docListDestroy(&doclist);
        !          1343:       return rc;
        !          1344:     }
        !          1345:     return SQLITE_OK;
        !          1346:   }
        !          1347:   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
        !          1348: 
        !          1349:   /* This word is in the index; add this document ID to its blob. */
        !          1350: 
        !          1351:   rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);
        !          1352:   if( rc!=SQLITE_OK ) return rc;
        !          1353: 
        !          1354:   if( docListUpdate(&doclist, iDocid, p) ){
        !          1355:     /* If the blob is too big, split it in half. */
        !          1356:     if( doclist.nData>CHUNK_MAX ){
        !          1357:       DocList half;
        !          1358:       if( docListSplit(&doclist, &half) ){
        !          1359:         rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half);
        !          1360:         docListDestroy(&half);
        !          1361:         if( rc!=SQLITE_OK ) goto err;
        !          1362:       }
        !          1363:     }
        !          1364:     rc = term_update(v, iIndexRow, &doclist);
        !          1365:   }
        !          1366: 
        !          1367: err:
        !          1368:   docListDestroy(&doclist);
        !          1369:   return rc;
        !          1370: }
        !          1371: 
        !          1372: /* Insert a row into the full-text index; set *piRowid to be the ID of the
        !          1373:  * new row. */
        !          1374: static int index_insert(fulltext_vtab *v,
        !          1375:                         sqlite3_value *pRequestRowid, const char *zText,
        !          1376:                         sqlite_int64 *piRowid){
        !          1377:   Hash terms;  /* maps term string -> PosList */
        !          1378:   HashElem *e;
        !          1379: 
        !          1380:   int rc = content_insert(v, pRequestRowid, zText, -1);
        !          1381:   if( rc!=SQLITE_OK ) return rc;
        !          1382:   *piRowid = sqlite3_last_insert_rowid(v->db);
        !          1383: 
        !          1384:   if( !zText ) return SQLITE_OK;   /* nothing to index */
        !          1385: 
        !          1386:   rc = build_terms(&terms, v->pTokenizer, zText, *piRowid);
        !          1387:   if( rc!=SQLITE_OK ) return rc;
        !          1388: 
        !          1389:   for(e=HashFirst(&terms); e; e=HashNext(e)){
        !          1390:     DocList *p = HashData(e);
        !          1391:     rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p);
        !          1392:     if( rc!=SQLITE_OK ) break;
        !          1393:   }
        !          1394: 
        !          1395:   for(e=HashFirst(&terms); e; e=HashNext(e)){
        !          1396:     DocList *p = HashData(e);
        !          1397:     docListDelete(p);
        !          1398:   }
        !          1399:   HashClear(&terms);
        !          1400:   return rc;
        !          1401: }
        !          1402: 
        !          1403: static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm,
        !          1404:                              sqlite_int64 iDocid){
        !          1405:   sqlite_int64 iFirst;
        !          1406:   sqlite_int64 iIndexRow;
        !          1407:   DocList doclist;
        !          1408: 
        !          1409:   int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst);
        !          1410:   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
        !          1411: 
        !          1412:   rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist);
        !          1413:   if( rc!=SQLITE_OK ) return rc;
        !          1414: 
        !          1415:   if( docListUpdate(&doclist, iDocid, NULL) ){
        !          1416:     if( doclist.nData>0 ){
        !          1417:       rc = term_update(v, iIndexRow, &doclist);
        !          1418:     } else {  /* empty posting list */
        !          1419:       rc = term_delete(v, iIndexRow);
        !          1420:     }
        !          1421:   }
        !          1422:   docListDestroy(&doclist);
        !          1423:   return rc;
        !          1424: }
        !          1425: 
        !          1426: /* Delete a row from the full-text index. */
        !          1427: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
        !          1428:   char *zText;
        !          1429:   Hash terms;
        !          1430:   HashElem *e;
        !          1431: 
        !          1432:   int rc = content_select(v, iRow, &zText);
        !          1433:   if( rc!=SQLITE_OK ) return rc;
        !          1434: 
        !          1435:   rc = build_terms(&terms, v->pTokenizer, zText, iRow);
        !          1436:   free(zText);
        !          1437:   if( rc!=SQLITE_OK ) return rc;
        !          1438: 
        !          1439:   for(e=HashFirst(&terms); e; e=HashNext(e)){
        !          1440:     rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow);
        !          1441:     if( rc!=SQLITE_OK ) break;
        !          1442:   }
        !          1443:   for(e=HashFirst(&terms); e; e=HashNext(e)){
        !          1444:     DocList *p = HashData(e);
        !          1445:     docListDelete(p);
        !          1446:   }
        !          1447:   HashClear(&terms);
        !          1448: 
        !          1449:   return content_delete(v, iRow);
        !          1450: }
        !          1451: 
        !          1452: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
        !          1453:                    sqlite_int64 *pRowid){
        !          1454:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
        !          1455: 
        !          1456:   if( nArg<2 ){
        !          1457:     return index_delete(v, sqlite3_value_int64(ppArg[0]));
        !          1458:   }
        !          1459: 
        !          1460:   if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
        !          1461:     return SQLITE_ERROR;   /* an update; not yet supported */
        !          1462:   }
        !          1463: 
        !          1464:   assert( nArg==3 );    /* ppArg[1] = rowid, ppArg[2] = content */
        !          1465:   return index_insert(v, ppArg[1],
        !          1466:                       (const char *)sqlite3_value_text(ppArg[2]), pRowid);
        !          1467: }
        !          1468: 
        !          1469: static sqlite3_module fulltextModule = {
        !          1470:   0,
        !          1471:   fulltextCreate,
        !          1472:   fulltextConnect,
        !          1473:   fulltextBestIndex,
        !          1474:   fulltextDisconnect,
        !          1475:   fulltextDestroy,
        !          1476:   fulltextOpen,
        !          1477:   fulltextClose,
        !          1478:   fulltextFilter,
        !          1479:   fulltextNext,
        !          1480:   fulltextEof,
        !          1481:   fulltextColumn,
        !          1482:   fulltextRowid,
        !          1483:   fulltextUpdate
        !          1484: };
        !          1485: 
        !          1486: int fulltext_init(sqlite3 *db){
        !          1487:  return sqlite3_create_module(db, "fulltext", &fulltextModule, 0);
        !          1488: }
        !          1489: 
        !          1490: #if !SQLITE_CORE
        !          1491: int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
        !          1492:                            const sqlite3_api_routines *pApi){
        !          1493:  SQLITE_EXTENSION_INIT2(pApi)
        !          1494:  return fulltext_init(db);
        !          1495: }
        !          1496: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>