File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts1 / fts1.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:17 2012 UTC (13 years, 1 month ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: /* fts1 has a design flaw which can lead to database corruption (see
    2: ** below).  It is recommended not to use it any longer, instead use
    3: ** fts3 (or higher).  If you believe that your use of fts1 is safe,
    4: ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
    5: */
    6: #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
    7:         && !defined(SQLITE_ENABLE_BROKEN_FTS1)
    8: #error fts1 has a design flaw and has been deprecated.
    9: #endif
   10: /* The flaw is that fts1 uses the content table's unaliased rowid as
   11: ** the unique docid.  fts1 embeds the rowid in the index it builds,
   12: ** and expects the rowid to not change.  The SQLite VACUUM operation
   13: ** will renumber such rowids, thereby breaking fts1.  If you are using
   14: ** fts1 in a system which has disabled VACUUM, then you can continue
   15: ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
   16: ** VACUUM, though systems using auto_vacuum are unlikely to invoke
   17: ** VACUUM.
   18: **
   19: ** fts1 should be safe even across VACUUM if you only insert documents
   20: ** and never delete.
   21: */
   22: 
   23: /* The author disclaims copyright to this source code.
   24:  *
   25:  * This is an SQLite module implementing full-text search.
   26:  */
   27: 
   28: /*
   29: ** The code in this file is only compiled if:
   30: **
   31: **     * The FTS1 module is being built as an extension
   32: **       (in which case SQLITE_CORE is not defined), or
   33: **
   34: **     * The FTS1 module is being built into the core of
   35: **       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
   36: */
   37: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
   38: 
   39: #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
   40: # define SQLITE_CORE 1
   41: #endif
   42: 
   43: #include <assert.h>
   44: #include <stdlib.h>
   45: #include <stdio.h>
   46: #include <string.h>
   47: #include <ctype.h>
   48: 
   49: #include "fts1.h"
   50: #include "fts1_hash.h"
   51: #include "fts1_tokenizer.h"
   52: #include "sqlite3.h"
   53: #include "sqlite3ext.h"
   54: SQLITE_EXTENSION_INIT1
   55: 
   56: 
   57: #if 0
   58: # define TRACE(A)  printf A; fflush(stdout)
   59: #else
   60: # define TRACE(A)
   61: #endif
   62: 
   63: /* utility functions */
   64: 
   65: typedef struct StringBuffer {
   66:   int len;      /* length, not including null terminator */
   67:   int alloced;  /* Space allocated for s[] */ 
   68:   char *s;      /* Content of the string */
   69: } StringBuffer;
   70: 
   71: static void initStringBuffer(StringBuffer *sb){
   72:   sb->len = 0;
   73:   sb->alloced = 100;
   74:   sb->s = malloc(100);
   75:   sb->s[0] = '\0';
   76: }
   77: 
   78: static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
   79:   if( sb->len + nFrom >= sb->alloced ){
   80:     sb->alloced = sb->len + nFrom + 100;
   81:     sb->s = realloc(sb->s, sb->alloced+1);
   82:     if( sb->s==0 ){
   83:       initStringBuffer(sb);
   84:       return;
   85:     }
   86:   }
   87:   memcpy(sb->s + sb->len, zFrom, nFrom);
   88:   sb->len += nFrom;
   89:   sb->s[sb->len] = 0;
   90: }
   91: static void append(StringBuffer *sb, const char *zFrom){
   92:   nappend(sb, zFrom, strlen(zFrom));
   93: }
   94: 
   95: /* We encode variable-length integers in little-endian order using seven bits
   96:  * per byte as follows:
   97: **
   98: ** KEY:
   99: **         A = 0xxxxxxx    7 bits of data and one flag bit
  100: **         B = 1xxxxxxx    7 bits of data and one flag bit
  101: **
  102: **  7 bits - A
  103: ** 14 bits - BA
  104: ** 21 bits - BBA
  105: ** and so on.
  106: */
  107: 
  108: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
  109: #define VARINT_MAX 10
  110: 
  111: /* Write a 64-bit variable-length integer to memory starting at p[0].
  112:  * The length of data written will be between 1 and VARINT_MAX bytes.
  113:  * The number of bytes written is returned. */
  114: static int putVarint(char *p, sqlite_int64 v){
  115:   unsigned char *q = (unsigned char *) p;
  116:   sqlite_uint64 vu = v;
  117:   do{
  118:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
  119:     vu >>= 7;
  120:   }while( vu!=0 );
  121:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
  122:   assert( q - (unsigned char *)p <= VARINT_MAX );
  123:   return (int) (q - (unsigned char *)p);
  124: }
  125: 
  126: /* Read a 64-bit variable-length integer from memory starting at p[0].
  127:  * Return the number of bytes read, or 0 on error.
  128:  * The value is stored in *v. */
  129: static int getVarint(const char *p, sqlite_int64 *v){
  130:   const unsigned char *q = (const unsigned char *) p;
  131:   sqlite_uint64 x = 0, y = 1;
  132:   while( (*q & 0x80) == 0x80 ){
  133:     x += y * (*q++ & 0x7f);
  134:     y <<= 7;
  135:     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
  136:       assert( 0 );
  137:       return 0;
  138:     }
  139:   }
  140:   x += y * (*q++);
  141:   *v = (sqlite_int64) x;
  142:   return (int) (q - (unsigned char *)p);
  143: }
  144: 
  145: static int getVarint32(const char *p, int *pi){
  146:  sqlite_int64 i;
  147:  int ret = getVarint(p, &i);
  148:  *pi = (int) i;
  149:  assert( *pi==i );
  150:  return ret;
  151: }
  152: 
  153: /*** Document lists ***
  154:  *
  155:  * A document list holds a sorted list of varint-encoded document IDs.
  156:  *
  157:  * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
  158:  *
  159:  * array {
  160:  *   varint docid;
  161:  *   array {
  162:  *     varint position;     (delta from previous position plus POS_BASE)
  163:  *     varint startOffset;  (delta from previous startOffset)
  164:  *     varint endOffset;    (delta from startOffset)
  165:  *   }
  166:  * }
  167:  *
  168:  * Here, array { X } means zero or more occurrences of X, adjacent in memory.
  169:  *
  170:  * A position list may hold positions for text in multiple columns.  A position
  171:  * POS_COLUMN is followed by a varint containing the index of the column for
  172:  * following positions in the list.  Any positions appearing before any
  173:  * occurrences of POS_COLUMN are for column 0.
  174:  *
  175:  * A doclist with type DL_POSITIONS is like the above, but holds only docids
  176:  * and positions without offset information.
  177:  *
  178:  * A doclist with type DL_DOCIDS is like the above, but holds only docids
  179:  * without positions or offset information.
  180:  *
  181:  * On disk, every document list has positions and offsets, so we don't bother
  182:  * to serialize a doclist's type.
  183:  * 
  184:  * We don't yet delta-encode document IDs; doing so will probably be a
  185:  * modest win.
  186:  *
  187:  * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
  188:  * After the first offset, estimate the next offset by using the
  189:  * current token position and the previous token position and offset,
  190:  * offset to handle some variance.  So the estimate would be
  191:  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
  192:  * as normal.  Offsets more than 64 chars from the estimate are
  193:  * encoded as the delta to the previous start offset + 128.  An
  194:  * additional tiny increment can be gained by using the end offset of
  195:  * the previous token to make the estimate a tiny bit more precise.
  196: */
  197: 
  198: /* It is not safe to call isspace(), tolower(), or isalnum() on
  199: ** hi-bit-set characters.  This is the same solution used in the
  200: ** tokenizer.
  201: */
  202: /* TODO(shess) The snippet-generation code should be using the
  203: ** tokenizer-generated tokens rather than doing its own local
  204: ** tokenization.
  205: */
  206: /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
  207: static int safe_isspace(char c){
  208:   return (c&0x80)==0 ? isspace(c) : 0;
  209: }
  210: static int safe_tolower(char c){
  211:   return (c&0x80)==0 ? tolower(c) : c;
  212: }
  213: static int safe_isalnum(char c){
  214:   return (c&0x80)==0 ? isalnum(c) : 0;
  215: }
  216: 
  217: typedef enum DocListType {
  218:   DL_DOCIDS,              /* docids only */
  219:   DL_POSITIONS,           /* docids + positions */
  220:   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
  221: } DocListType;
  222: 
  223: /*
  224: ** By default, only positions and not offsets are stored in the doclists.
  225: ** To change this so that offsets are stored too, compile with
  226: **
  227: **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
  228: **
  229: */
  230: #ifndef DL_DEFAULT
  231: # define DL_DEFAULT DL_POSITIONS
  232: #endif
  233: 
  234: typedef struct DocList {
  235:   char *pData;
  236:   int nData;
  237:   DocListType iType;
  238:   int iLastColumn;    /* the last column written */
  239:   int iLastPos;       /* the last position written */
  240:   int iLastOffset;    /* the last start offset written */
  241: } DocList;
  242: 
  243: enum {
  244:   POS_END = 0,        /* end of this position list */
  245:   POS_COLUMN,         /* followed by new column number */
  246:   POS_BASE
  247: };
  248: 
  249: /* Initialize a new DocList to hold the given data. */
  250: static void docListInit(DocList *d, DocListType iType,
  251:                         const char *pData, int nData){
  252:   d->nData = nData;
  253:   if( nData>0 ){
  254:     d->pData = malloc(nData);
  255:     memcpy(d->pData, pData, nData);
  256:   } else {
  257:     d->pData = NULL;
  258:   }
  259:   d->iType = iType;
  260:   d->iLastColumn = 0;
  261:   d->iLastPos = d->iLastOffset = 0;
  262: }
  263: 
  264: /* Create a new dynamically-allocated DocList. */
  265: static DocList *docListNew(DocListType iType){
  266:   DocList *d = (DocList *) malloc(sizeof(DocList));
  267:   docListInit(d, iType, 0, 0);
  268:   return d;
  269: }
  270: 
  271: static void docListDestroy(DocList *d){
  272:   free(d->pData);
  273: #ifndef NDEBUG
  274:   memset(d, 0x55, sizeof(*d));
  275: #endif
  276: }
  277: 
  278: static void docListDelete(DocList *d){
  279:   docListDestroy(d);
  280:   free(d);
  281: }
  282: 
  283: static char *docListEnd(DocList *d){
  284:   return d->pData + d->nData;
  285: }
  286: 
  287: /* Append a varint to a DocList's data. */
  288: static void appendVarint(DocList *d, sqlite_int64 i){
  289:   char c[VARINT_MAX];
  290:   int n = putVarint(c, i);
  291:   d->pData = realloc(d->pData, d->nData + n);
  292:   memcpy(d->pData + d->nData, c, n);
  293:   d->nData += n;
  294: }
  295: 
  296: static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
  297:   appendVarint(d, iDocid);
  298:   if( d->iType>=DL_POSITIONS ){
  299:     appendVarint(d, POS_END);  /* initially empty position list */
  300:     d->iLastColumn = 0;
  301:     d->iLastPos = d->iLastOffset = 0;
  302:   }
  303: }
  304: 
  305: /* helper function for docListAddPos and docListAddPosOffset */
  306: static void addPos(DocList *d, int iColumn, int iPos){
  307:   assert( d->nData>0 );
  308:   --d->nData;  /* remove previous terminator */
  309:   if( iColumn!=d->iLastColumn ){
  310:     assert( iColumn>d->iLastColumn );
  311:     appendVarint(d, POS_COLUMN);
  312:     appendVarint(d, iColumn);
  313:     d->iLastColumn = iColumn;
  314:     d->iLastPos = d->iLastOffset = 0;
  315:   }
  316:   assert( iPos>=d->iLastPos );
  317:   appendVarint(d, iPos-d->iLastPos+POS_BASE);
  318:   d->iLastPos = iPos;
  319: }
  320: 
  321: /* Add a position to the last position list in a doclist. */
  322: static void docListAddPos(DocList *d, int iColumn, int iPos){
  323:   assert( d->iType==DL_POSITIONS );
  324:   addPos(d, iColumn, iPos);
  325:   appendVarint(d, POS_END);  /* add new terminator */
  326: }
  327: 
  328: /*
  329: ** Add a position and starting and ending offsets to a doclist.
  330: **
  331: ** If the doclist is setup to handle only positions, then insert
  332: ** the position only and ignore the offsets.
  333: */
  334: static void docListAddPosOffset(
  335:   DocList *d,             /* Doclist under construction */
  336:   int iColumn,            /* Column the inserted term is part of */
  337:   int iPos,               /* Position of the inserted term */
  338:   int iStartOffset,       /* Starting offset of inserted term */
  339:   int iEndOffset          /* Ending offset of inserted term */
  340: ){
  341:   assert( d->iType>=DL_POSITIONS );
  342:   addPos(d, iColumn, iPos);
  343:   if( d->iType==DL_POSITIONS_OFFSETS ){
  344:     assert( iStartOffset>=d->iLastOffset );
  345:     appendVarint(d, iStartOffset-d->iLastOffset);
  346:     d->iLastOffset = iStartOffset;
  347:     assert( iEndOffset>=iStartOffset );
  348:     appendVarint(d, iEndOffset-iStartOffset);
  349:   }
  350:   appendVarint(d, POS_END);  /* add new terminator */
  351: }
  352: 
  353: /*
  354: ** A DocListReader object is a cursor into a doclist.  Initialize
  355: ** the cursor to the beginning of the doclist by calling readerInit().
  356: ** Then use routines
  357: **
  358: **      peekDocid()
  359: **      readDocid()
  360: **      readPosition()
  361: **      skipPositionList()
  362: **      and so forth...
  363: **
  364: ** to read information out of the doclist.  When we reach the end
  365: ** of the doclist, atEnd() returns TRUE.
  366: */
  367: typedef struct DocListReader {
  368:   DocList *pDoclist;  /* The document list we are stepping through */
  369:   char *p;            /* Pointer to next unread byte in the doclist */
  370:   int iLastColumn;
  371:   int iLastPos;  /* the last position read, or -1 when not in a position list */
  372: } DocListReader;
  373: 
  374: /*
  375: ** Initialize the DocListReader r to point to the beginning of pDoclist.
  376: */
  377: static void readerInit(DocListReader *r, DocList *pDoclist){
  378:   r->pDoclist = pDoclist;
  379:   if( pDoclist!=NULL ){
  380:     r->p = pDoclist->pData;
  381:   }
  382:   r->iLastColumn = -1;
  383:   r->iLastPos = -1;
  384: }
  385: 
  386: /*
  387: ** Return TRUE if we have reached then end of pReader and there is
  388: ** nothing else left to read.
  389: */
  390: static int atEnd(DocListReader *pReader){
  391:   return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
  392: }
  393: 
  394: /* Peek at the next docid without advancing the read pointer. 
  395: */
  396: static sqlite_int64 peekDocid(DocListReader *pReader){
  397:   sqlite_int64 ret;
  398:   assert( !atEnd(pReader) );
  399:   assert( pReader->iLastPos==-1 );
  400:   getVarint(pReader->p, &ret);
  401:   return ret;
  402: }
  403: 
  404: /* Read the next docid.   See also nextDocid().
  405: */
  406: static sqlite_int64 readDocid(DocListReader *pReader){
  407:   sqlite_int64 ret;
  408:   assert( !atEnd(pReader) );
  409:   assert( pReader->iLastPos==-1 );
  410:   pReader->p += getVarint(pReader->p, &ret);
  411:   if( pReader->pDoclist->iType>=DL_POSITIONS ){
  412:     pReader->iLastColumn = 0;
  413:     pReader->iLastPos = 0;
  414:   }
  415:   return ret;
  416: }
  417: 
  418: /* Read the next position and column index from a position list.
  419:  * Returns the position, or -1 at the end of the list. */
  420: static int readPosition(DocListReader *pReader, int *iColumn){
  421:   int i;
  422:   int iType = pReader->pDoclist->iType;
  423: 
  424:   if( pReader->iLastPos==-1 ){
  425:     return -1;
  426:   }
  427:   assert( !atEnd(pReader) );
  428: 
  429:   if( iType<DL_POSITIONS ){
  430:     return -1;
  431:   }
  432:   pReader->p += getVarint32(pReader->p, &i);
  433:   if( i==POS_END ){
  434:     pReader->iLastColumn = pReader->iLastPos = -1;
  435:     *iColumn = -1;
  436:     return -1;
  437:   }
  438:   if( i==POS_COLUMN ){
  439:     pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
  440:     pReader->iLastPos = 0;
  441:     pReader->p += getVarint32(pReader->p, &i);
  442:     assert( i>=POS_BASE );
  443:   }
  444:   pReader->iLastPos += ((int) i)-POS_BASE;
  445:   if( iType>=DL_POSITIONS_OFFSETS ){
  446:     /* Skip over offsets, ignoring them for now. */
  447:     int iStart, iEnd;
  448:     pReader->p += getVarint32(pReader->p, &iStart);
  449:     pReader->p += getVarint32(pReader->p, &iEnd);
  450:   }
  451:   *iColumn = pReader->iLastColumn;
  452:   return pReader->iLastPos;
  453: }
  454: 
  455: /* Skip past the end of a position list. */
  456: static void skipPositionList(DocListReader *pReader){
  457:   DocList *p = pReader->pDoclist;
  458:   if( p && p->iType>=DL_POSITIONS ){
  459:     int iColumn;
  460:     while( readPosition(pReader, &iColumn)!=-1 ){}
  461:   }
  462: }
  463: 
  464: /* Skip over a docid, including its position list if the doclist has
  465:  * positions. */
  466: static void skipDocument(DocListReader *pReader){
  467:   readDocid(pReader);
  468:   skipPositionList(pReader);
  469: }
  470: 
  471: /* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
  472:  * matching [iDocid] was found.  */
  473: static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
  474:   sqlite_int64 d = 0;
  475:   while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
  476:     skipDocument(pReader);
  477:   }
  478:   return !atEnd(pReader) && d==iDocid;
  479: }
  480: 
  481: /* Return the first document in a document list.
  482: */
  483: static sqlite_int64 firstDocid(DocList *d){
  484:   DocListReader r;
  485:   readerInit(&r, d);
  486:   return readDocid(&r);
  487: }
  488: 
  489: #ifdef SQLITE_DEBUG
  490: /*
  491: ** This routine is used for debugging purpose only.
  492: **
  493: ** Write the content of a doclist to standard output.
  494: */
  495: static void printDoclist(DocList *p){
  496:   DocListReader r;
  497:   const char *zSep = "";
  498: 
  499:   readerInit(&r, p);
  500:   while( !atEnd(&r) ){
  501:     sqlite_int64 docid = readDocid(&r);
  502:     if( docid==0 ){
  503:       skipPositionList(&r);
  504:       continue;
  505:     }
  506:     printf("%s%lld", zSep, docid);
  507:     zSep =  ",";
  508:     if( p->iType>=DL_POSITIONS ){
  509:       int iPos, iCol;
  510:       const char *zDiv = "";
  511:       printf("(");
  512:       while( (iPos = readPosition(&r, &iCol))>=0 ){
  513:         printf("%s%d:%d", zDiv, iCol, iPos);
  514:         zDiv = ":";
  515:       }
  516:       printf(")");
  517:     }
  518:   }
  519:   printf("\n");
  520:   fflush(stdout);
  521: }
  522: #endif /* SQLITE_DEBUG */
  523: 
  524: /* Trim the given doclist to contain only positions in column
  525:  * [iRestrictColumn]. */
  526: static void docListRestrictColumn(DocList *in, int iRestrictColumn){
  527:   DocListReader r;
  528:   DocList out;
  529: 
  530:   assert( in->iType>=DL_POSITIONS );
  531:   readerInit(&r, in);
  532:   docListInit(&out, DL_POSITIONS, NULL, 0);
  533: 
  534:   while( !atEnd(&r) ){
  535:     sqlite_int64 iDocid = readDocid(&r);
  536:     int iPos, iColumn;
  537: 
  538:     docListAddDocid(&out, iDocid);
  539:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
  540:       if( iColumn==iRestrictColumn ){
  541:         docListAddPos(&out, iColumn, iPos);
  542:       }
  543:     }
  544:   }
  545: 
  546:   docListDestroy(in);
  547:   *in = out;
  548: }
  549: 
  550: /* Trim the given doclist by discarding any docids without any remaining
  551:  * positions. */
  552: static void docListDiscardEmpty(DocList *in) {
  553:   DocListReader r;
  554:   DocList out;
  555: 
  556:   /* TODO: It would be nice to implement this operation in place; that
  557:    * could save a significant amount of memory in queries with long doclists. */
  558:   assert( in->iType>=DL_POSITIONS );
  559:   readerInit(&r, in);
  560:   docListInit(&out, DL_POSITIONS, NULL, 0);
  561: 
  562:   while( !atEnd(&r) ){
  563:     sqlite_int64 iDocid = readDocid(&r);
  564:     int match = 0;
  565:     int iPos, iColumn;
  566:     while( (iPos = readPosition(&r, &iColumn)) != -1 ){
  567:       if( !match ){
  568:         docListAddDocid(&out, iDocid);
  569:         match = 1;
  570:       }
  571:       docListAddPos(&out, iColumn, iPos);
  572:     }
  573:   }
  574: 
  575:   docListDestroy(in);
  576:   *in = out;
  577: }
  578: 
  579: /* Helper function for docListUpdate() and docListAccumulate().
  580: ** Splices a doclist element into the doclist represented by r,
  581: ** leaving r pointing after the newly spliced element.
  582: */
  583: static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
  584:                                  const char *pSource, int nSource){
  585:   DocList *d = r->pDoclist;
  586:   char *pTarget;
  587:   int nTarget, found;
  588: 
  589:   found = skipToDocid(r, iDocid);
  590: 
  591:   /* Describe slice in d to place pSource/nSource. */
  592:   pTarget = r->p;
  593:   if( found ){
  594:     skipDocument(r);
  595:     nTarget = r->p-pTarget;
  596:   }else{
  597:     nTarget = 0;
  598:   }
  599: 
  600:   /* The sense of the following is that there are three possibilities.
  601:   ** If nTarget==nSource, we should not move any memory nor realloc.
  602:   ** If nTarget>nSource, trim target and realloc.
  603:   ** If nTarget<nSource, realloc then expand target.
  604:   */
  605:   if( nTarget>nSource ){
  606:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
  607:   }
  608:   if( nTarget!=nSource ){
  609:     int iDoclist = pTarget-d->pData;
  610:     d->pData = realloc(d->pData, d->nData+nSource-nTarget);
  611:     pTarget = d->pData+iDoclist;
  612:   }
  613:   if( nTarget<nSource ){
  614:     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
  615:   }
  616: 
  617:   memcpy(pTarget, pSource, nSource);
  618:   d->nData += nSource-nTarget;
  619:   r->p = pTarget+nSource;
  620: }
  621: 
  622: /* Insert/update pUpdate into the doclist. */
  623: static void docListUpdate(DocList *d, DocList *pUpdate){
  624:   DocListReader reader;
  625: 
  626:   assert( d!=NULL && pUpdate!=NULL );
  627:   assert( d->iType==pUpdate->iType);
  628: 
  629:   readerInit(&reader, d);
  630:   docListSpliceElement(&reader, firstDocid(pUpdate),
  631:                        pUpdate->pData, pUpdate->nData);
  632: }
  633: 
  634: /* Propagate elements from pUpdate to pAcc, overwriting elements with
  635: ** matching docids.
  636: */
  637: static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
  638:   DocListReader accReader, updateReader;
  639: 
  640:   /* Handle edge cases where one doclist is empty. */
  641:   assert( pAcc!=NULL );
  642:   if( pUpdate==NULL || pUpdate->nData==0 ) return;
  643:   if( pAcc->nData==0 ){
  644:     pAcc->pData = malloc(pUpdate->nData);
  645:     memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
  646:     pAcc->nData = pUpdate->nData;
  647:     return;
  648:   }
  649: 
  650:   readerInit(&accReader, pAcc);
  651:   readerInit(&updateReader, pUpdate);
  652: 
  653:   while( !atEnd(&updateReader) ){
  654:     char *pSource = updateReader.p;
  655:     sqlite_int64 iDocid = readDocid(&updateReader);
  656:     skipPositionList(&updateReader);
  657:     docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
  658:   }
  659: }
  660: 
  661: /*
  662: ** Read the next docid off of pIn.  Return 0 if we reach the end.
  663: *
  664: * TODO: This assumes that docids are never 0, but they may actually be 0 since
  665: * users can choose docids when inserting into a full-text table.  Fix this.
  666: */
  667: static sqlite_int64 nextDocid(DocListReader *pIn){
  668:   skipPositionList(pIn);
  669:   return atEnd(pIn) ? 0 : readDocid(pIn);
  670: }
  671: 
  672: /*
  673: ** pLeft and pRight are two DocListReaders that are pointing to
  674: ** positions lists of the same document: iDocid. 
  675: **
  676: ** If there are no instances in pLeft or pRight where the position
  677: ** of pLeft is one less than the position of pRight, then this
  678: ** routine adds nothing to pOut.
  679: **
  680: ** If there are one or more instances where positions from pLeft
  681: ** are exactly one less than positions from pRight, then add a new
  682: ** document record to pOut.  If pOut wants to hold positions, then
  683: ** include the positions from pRight that are one more than a
  684: ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
  685: **
  686: ** pLeft and pRight are left pointing at the next document record.
  687: */
  688: static void mergePosList(
  689:   DocListReader *pLeft,    /* Left position list */
  690:   DocListReader *pRight,   /* Right position list */
  691:   sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
  692:   DocList *pOut            /* Write the merged document record here */
  693: ){
  694:   int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
  695:   int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
  696:   int match = 0;
  697: 
  698:   /* Loop until we've reached the end of both position lists. */
  699:   while( iLeftPos!=-1 && iRightPos!=-1 ){
  700:     if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
  701:       if( !match ){
  702:         docListAddDocid(pOut, iDocid);
  703:         match = 1;
  704:       }
  705:       if( pOut->iType>=DL_POSITIONS ){
  706:         docListAddPos(pOut, iRightCol, iRightPos);
  707:       }
  708:       iLeftPos = readPosition(pLeft, &iLeftCol);
  709:       iRightPos = readPosition(pRight, &iRightCol);
  710:     }else if( iRightCol<iLeftCol ||
  711:               (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
  712:       iRightPos = readPosition(pRight, &iRightCol);
  713:     }else{
  714:       iLeftPos = readPosition(pLeft, &iLeftCol);
  715:     }
  716:   }
  717:   if( iLeftPos>=0 ) skipPositionList(pLeft);
  718:   if( iRightPos>=0 ) skipPositionList(pRight);
  719: }
  720: 
  721: /* We have two doclists:  pLeft and pRight.
  722: ** Write the phrase intersection of these two doclists into pOut.
  723: **
  724: ** A phrase intersection means that two documents only match
  725: ** if pLeft.iPos+1==pRight.iPos.
  726: **
  727: ** The output pOut may or may not contain positions.  If pOut
  728: ** does contain positions, they are the positions of pRight.
  729: */
  730: static void docListPhraseMerge(
  731:   DocList *pLeft,    /* Doclist resulting from the words on the left */
  732:   DocList *pRight,   /* Doclist for the next word to the right */
  733:   DocList *pOut      /* Write the combined doclist here */
  734: ){
  735:   DocListReader left, right;
  736:   sqlite_int64 docidLeft, docidRight;
  737: 
  738:   readerInit(&left, pLeft);
  739:   readerInit(&right, pRight);
  740:   docidLeft = nextDocid(&left);
  741:   docidRight = nextDocid(&right);
  742: 
  743:   while( docidLeft>0 && docidRight>0 ){
  744:     if( docidLeft<docidRight ){
  745:       docidLeft = nextDocid(&left);
  746:     }else if( docidRight<docidLeft ){
  747:       docidRight = nextDocid(&right);
  748:     }else{
  749:       mergePosList(&left, &right, docidLeft, pOut);
  750:       docidLeft = nextDocid(&left);
  751:       docidRight = nextDocid(&right);
  752:     }
  753:   }
  754: }
  755: 
  756: /* We have two doclists:  pLeft and pRight.
  757: ** Write the intersection of these two doclists into pOut.
  758: ** Only docids are matched.  Position information is ignored.
  759: **
  760: ** The output pOut never holds positions.
  761: */
  762: static void docListAndMerge(
  763:   DocList *pLeft,    /* Doclist resulting from the words on the left */
  764:   DocList *pRight,   /* Doclist for the next word to the right */
  765:   DocList *pOut      /* Write the combined doclist here */
  766: ){
  767:   DocListReader left, right;
  768:   sqlite_int64 docidLeft, docidRight;
  769: 
  770:   assert( pOut->iType<DL_POSITIONS );
  771: 
  772:   readerInit(&left, pLeft);
  773:   readerInit(&right, pRight);
  774:   docidLeft = nextDocid(&left);
  775:   docidRight = nextDocid(&right);
  776: 
  777:   while( docidLeft>0 && docidRight>0 ){
  778:     if( docidLeft<docidRight ){
  779:       docidLeft = nextDocid(&left);
  780:     }else if( docidRight<docidLeft ){
  781:       docidRight = nextDocid(&right);
  782:     }else{
  783:       docListAddDocid(pOut, docidLeft);
  784:       docidLeft = nextDocid(&left);
  785:       docidRight = nextDocid(&right);
  786:     }
  787:   }
  788: }
  789: 
  790: /* We have two doclists:  pLeft and pRight.
  791: ** Write the union of these two doclists into pOut.
  792: ** Only docids are matched.  Position information is ignored.
  793: **
  794: ** The output pOut never holds positions.
  795: */
  796: static void docListOrMerge(
  797:   DocList *pLeft,    /* Doclist resulting from the words on the left */
  798:   DocList *pRight,   /* Doclist for the next word to the right */
  799:   DocList *pOut      /* Write the combined doclist here */
  800: ){
  801:   DocListReader left, right;
  802:   sqlite_int64 docidLeft, docidRight, priorLeft;
  803: 
  804:   readerInit(&left, pLeft);
  805:   readerInit(&right, pRight);
  806:   docidLeft = nextDocid(&left);
  807:   docidRight = nextDocid(&right);
  808: 
  809:   while( docidLeft>0 && docidRight>0 ){
  810:     if( docidLeft<=docidRight ){
  811:       docListAddDocid(pOut, docidLeft);
  812:     }else{
  813:       docListAddDocid(pOut, docidRight);
  814:     }
  815:     priorLeft = docidLeft;
  816:     if( docidLeft<=docidRight ){
  817:       docidLeft = nextDocid(&left);
  818:     }
  819:     if( docidRight>0 && docidRight<=priorLeft ){
  820:       docidRight = nextDocid(&right);
  821:     }
  822:   }
  823:   while( docidLeft>0 ){
  824:     docListAddDocid(pOut, docidLeft);
  825:     docidLeft = nextDocid(&left);
  826:   }
  827:   while( docidRight>0 ){
  828:     docListAddDocid(pOut, docidRight);
  829:     docidRight = nextDocid(&right);
  830:   }
  831: }
  832: 
  833: /* We have two doclists:  pLeft and pRight.
  834: ** Write into pOut all documents that occur in pLeft but not
  835: ** in pRight.
  836: **
  837: ** Only docids are matched.  Position information is ignored.
  838: **
  839: ** The output pOut never holds positions.
  840: */
  841: static void docListExceptMerge(
  842:   DocList *pLeft,    /* Doclist resulting from the words on the left */
  843:   DocList *pRight,   /* Doclist for the next word to the right */
  844:   DocList *pOut      /* Write the combined doclist here */
  845: ){
  846:   DocListReader left, right;
  847:   sqlite_int64 docidLeft, docidRight, priorLeft;
  848: 
  849:   readerInit(&left, pLeft);
  850:   readerInit(&right, pRight);
  851:   docidLeft = nextDocid(&left);
  852:   docidRight = nextDocid(&right);
  853: 
  854:   while( docidLeft>0 && docidRight>0 ){
  855:     priorLeft = docidLeft;
  856:     if( docidLeft<docidRight ){
  857:       docListAddDocid(pOut, docidLeft);
  858:     }
  859:     if( docidLeft<=docidRight ){
  860:       docidLeft = nextDocid(&left);
  861:     }
  862:     if( docidRight>0 && docidRight<=priorLeft ){
  863:       docidRight = nextDocid(&right);
  864:     }
  865:   }
  866:   while( docidLeft>0 ){
  867:     docListAddDocid(pOut, docidLeft);
  868:     docidLeft = nextDocid(&left);
  869:   }
  870: }
  871: 
  872: static char *string_dup_n(const char *s, int n){
  873:   char *str = malloc(n + 1);
  874:   memcpy(str, s, n);
  875:   str[n] = '\0';
  876:   return str;
  877: }
  878: 
  879: /* Duplicate a string; the caller must free() the returned string.
  880:  * (We don't use strdup() since it is not part of the standard C library and
  881:  * may not be available everywhere.) */
  882: static char *string_dup(const char *s){
  883:   return string_dup_n(s, strlen(s));
  884: }
  885: 
  886: /* Format a string, replacing each occurrence of the % character with
  887:  * zDb.zName.  This may be more convenient than sqlite_mprintf()
  888:  * when one string is used repeatedly in a format string.
  889:  * The caller must free() the returned string. */
  890: static char *string_format(const char *zFormat,
  891:                            const char *zDb, const char *zName){
  892:   const char *p;
  893:   size_t len = 0;
  894:   size_t nDb = strlen(zDb);
  895:   size_t nName = strlen(zName);
  896:   size_t nFullTableName = nDb+1+nName;
  897:   char *result;
  898:   char *r;
  899: 
  900:   /* first compute length needed */
  901:   for(p = zFormat ; *p ; ++p){
  902:     len += (*p=='%' ? nFullTableName : 1);
  903:   }
  904:   len += 1;  /* for null terminator */
  905: 
  906:   r = result = malloc(len);
  907:   for(p = zFormat; *p; ++p){
  908:     if( *p=='%' ){
  909:       memcpy(r, zDb, nDb);
  910:       r += nDb;
  911:       *r++ = '.';
  912:       memcpy(r, zName, nName);
  913:       r += nName;
  914:     } else {
  915:       *r++ = *p;
  916:     }
  917:   }
  918:   *r++ = '\0';
  919:   assert( r == result + len );
  920:   return result;
  921: }
  922: 
  923: static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
  924:                     const char *zFormat){
  925:   char *zCommand = string_format(zFormat, zDb, zName);
  926:   int rc;
  927:   TRACE(("FTS1 sql: %s\n", zCommand));
  928:   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
  929:   free(zCommand);
  930:   return rc;
  931: }
  932: 
  933: static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
  934:                        sqlite3_stmt **ppStmt, const char *zFormat){
  935:   char *zCommand = string_format(zFormat, zDb, zName);
  936:   int rc;
  937:   TRACE(("FTS1 prepare: %s\n", zCommand));
  938:   rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
  939:   free(zCommand);
  940:   return rc;
  941: }
  942: 
  943: /* end utility functions */
  944: 
  945: /* Forward reference */
  946: typedef struct fulltext_vtab fulltext_vtab;
  947: 
  948: /* A single term in a query is represented by an instances of
  949: ** the following structure.
  950: */
  951: typedef struct QueryTerm {
  952:   short int nPhrase; /* How many following terms are part of the same phrase */
  953:   short int iPhrase; /* This is the i-th term of a phrase. */
  954:   short int iColumn; /* Column of the index that must match this term */
  955:   signed char isOr;  /* this term is preceded by "OR" */
  956:   signed char isNot; /* this term is preceded by "-" */
  957:   char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  958:   int nTerm;         /* Number of bytes in pTerm[] */
  959: } QueryTerm;
  960: 
  961: 
  962: /* A query string is parsed into a Query structure.
  963:  *
  964:  * We could, in theory, allow query strings to be complicated
  965:  * nested expressions with precedence determined by parentheses.
  966:  * But none of the major search engines do this.  (Perhaps the
  967:  * feeling is that an parenthesized expression is two complex of
  968:  * an idea for the average user to grasp.)  Taking our lead from
  969:  * the major search engines, we will allow queries to be a list
  970:  * of terms (with an implied AND operator) or phrases in double-quotes,
  971:  * with a single optional "-" before each non-phrase term to designate
  972:  * negation and an optional OR connector.
  973:  *
  974:  * OR binds more tightly than the implied AND, which is what the
  975:  * major search engines seem to do.  So, for example:
  976:  * 
  977:  *    [one two OR three]     ==>    one AND (two OR three)
  978:  *    [one OR two three]     ==>    (one OR two) AND three
  979:  *
  980:  * A "-" before a term matches all entries that lack that term.
  981:  * The "-" must occur immediately before the term with in intervening
  982:  * space.  This is how the search engines do it.
  983:  *
  984:  * A NOT term cannot be the right-hand operand of an OR.  If this
  985:  * occurs in the query string, the NOT is ignored:
  986:  *
  987:  *    [one OR -two]          ==>    one OR two
  988:  *
  989:  */
  990: typedef struct Query {
  991:   fulltext_vtab *pFts;  /* The full text index */
  992:   int nTerms;           /* Number of terms in the query */
  993:   QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  994:   int nextIsOr;         /* Set the isOr flag on the next inserted term */
  995:   int nextColumn;       /* Next word parsed must be in this column */
  996:   int dfltColumn;       /* The default column */
  997: } Query;
  998: 
  999: 
 1000: /*
 1001: ** An instance of the following structure keeps track of generated
 1002: ** matching-word offset information and snippets.
 1003: */
 1004: typedef struct Snippet {
 1005:   int nMatch;     /* Total number of matches */
 1006:   int nAlloc;     /* Space allocated for aMatch[] */
 1007:   struct snippetMatch { /* One entry for each matching term */
 1008:     char snStatus;       /* Status flag for use while constructing snippets */
 1009:     short int iCol;      /* The column that contains the match */
 1010:     short int iTerm;     /* The index in Query.pTerms[] of the matching term */
 1011:     short int nByte;     /* Number of bytes in the term */
 1012:     int iStart;          /* The offset to the first character of the term */
 1013:   } *aMatch;      /* Points to space obtained from malloc */
 1014:   char *zOffset;  /* Text rendering of aMatch[] */
 1015:   int nOffset;    /* strlen(zOffset) */
 1016:   char *zSnippet; /* Snippet text */
 1017:   int nSnippet;   /* strlen(zSnippet) */
 1018: } Snippet;
 1019: 
 1020: 
 1021: typedef enum QueryType {
 1022:   QUERY_GENERIC,   /* table scan */
 1023:   QUERY_ROWID,     /* lookup by rowid */
 1024:   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
 1025: } QueryType;
 1026: 
 1027: /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
 1028: ** before we start aggregating into larger segments.  Lower CHUNK_MAX
 1029: ** means that for a given input we have more individual segments per
 1030: ** term, which means more rows in the table and a bigger index (due to
 1031: ** both more rows and bigger rowids).  But it also reduces the average
 1032: ** cost of adding new elements to the segment 0 doclist, and it seems
 1033: ** to reduce the number of pages read and written during inserts.  256
 1034: ** was chosen by measuring insertion times for a certain input (first
 1035: ** 10k documents of Enron corpus), though including query performance
 1036: ** in the decision may argue for a larger value.
 1037: */
 1038: #define CHUNK_MAX 256
 1039: 
 1040: typedef enum fulltext_statement {
 1041:   CONTENT_INSERT_STMT,
 1042:   CONTENT_SELECT_STMT,
 1043:   CONTENT_UPDATE_STMT,
 1044:   CONTENT_DELETE_STMT,
 1045: 
 1046:   TERM_SELECT_STMT,
 1047:   TERM_SELECT_ALL_STMT,
 1048:   TERM_INSERT_STMT,
 1049:   TERM_UPDATE_STMT,
 1050:   TERM_DELETE_STMT,
 1051: 
 1052:   MAX_STMT                     /* Always at end! */
 1053: } fulltext_statement;
 1054: 
 1055: /* These must exactly match the enum above. */
 1056: /* TODO(adam): Is there some risk that a statement (in particular,
 1057: ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
 1058: ** query joins a virtual table to itself?  If so perhaps we should
 1059: ** move some of these to the cursor object.
 1060: */
 1061: static const char *const fulltext_zStatement[MAX_STMT] = {
 1062:   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
 1063:   /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
 1064:   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
 1065:   /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
 1066: 
 1067:   /* TERM_SELECT */
 1068:   "select rowid, doclist from %_term where term = ? and segment = ?",
 1069:   /* TERM_SELECT_ALL */
 1070:   "select doclist from %_term where term = ? order by segment",
 1071:   /* TERM_INSERT */
 1072:   "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
 1073:   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
 1074:   /* TERM_DELETE */ "delete from %_term where rowid = ?",
 1075: };
 1076: 
 1077: /*
 1078: ** A connection to a fulltext index is an instance of the following
 1079: ** structure.  The xCreate and xConnect methods create an instance
 1080: ** of this structure and xDestroy and xDisconnect free that instance.
 1081: ** All other methods receive a pointer to the structure as one of their
 1082: ** arguments.
 1083: */
 1084: struct fulltext_vtab {
 1085:   sqlite3_vtab base;               /* Base class used by SQLite core */
 1086:   sqlite3 *db;                     /* The database connection */
 1087:   const char *zDb;                 /* logical database name */
 1088:   const char *zName;               /* virtual table name */
 1089:   int nColumn;                     /* number of columns in virtual table */
 1090:   char **azColumn;                 /* column names.  malloced */
 1091:   char **azContentColumn;          /* column names in content table; malloced */
 1092:   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
 1093: 
 1094:   /* Precompiled statements which we keep as long as the table is
 1095:   ** open.
 1096:   */
 1097:   sqlite3_stmt *pFulltextStatements[MAX_STMT];
 1098: };
 1099: 
 1100: /*
 1101: ** When the core wants to do a query, it create a cursor using a
 1102: ** call to xOpen.  This structure is an instance of a cursor.  It
 1103: ** is destroyed by xClose.
 1104: */
 1105: typedef struct fulltext_cursor {
 1106:   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
 1107:   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
 1108:   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
 1109:   int eof;                         /* True if at End Of Results */
 1110:   Query q;                         /* Parsed query string */
 1111:   Snippet snippet;                 /* Cached snippet for the current row */
 1112:   int iColumn;                     /* Column being searched */
 1113:   DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
 1114: } fulltext_cursor;
 1115: 
 1116: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
 1117:   return (fulltext_vtab *) c->base.pVtab;
 1118: }
 1119: 
 1120: static const sqlite3_module fulltextModule;   /* forward declaration */
 1121: 
 1122: /* Append a list of strings separated by commas to a StringBuffer. */
 1123: static void appendList(StringBuffer *sb, int nString, char **azString){
 1124:   int i;
 1125:   for(i=0; i<nString; ++i){
 1126:     if( i>0 ) append(sb, ", ");
 1127:     append(sb, azString[i]);
 1128:   }
 1129: }
 1130: 
 1131: /* Return a dynamically generated statement of the form
 1132:  *   insert into %_content (rowid, ...) values (?, ...)
 1133:  */
 1134: static const char *contentInsertStatement(fulltext_vtab *v){
 1135:   StringBuffer sb;
 1136:   int i;
 1137: 
 1138:   initStringBuffer(&sb);
 1139:   append(&sb, "insert into %_content (rowid, ");
 1140:   appendList(&sb, v->nColumn, v->azContentColumn);
 1141:   append(&sb, ") values (?");
 1142:   for(i=0; i<v->nColumn; ++i)
 1143:     append(&sb, ", ?");
 1144:   append(&sb, ")");
 1145:   return sb.s;
 1146: }
 1147: 
 1148: /* Return a dynamically generated statement of the form
 1149:  *   update %_content set [col_0] = ?, [col_1] = ?, ...
 1150:  *                    where rowid = ?
 1151:  */
 1152: static const char *contentUpdateStatement(fulltext_vtab *v){
 1153:   StringBuffer sb;
 1154:   int i;
 1155: 
 1156:   initStringBuffer(&sb);
 1157:   append(&sb, "update %_content set ");
 1158:   for(i=0; i<v->nColumn; ++i) {
 1159:     if( i>0 ){
 1160:       append(&sb, ", ");
 1161:     }
 1162:     append(&sb, v->azContentColumn[i]);
 1163:     append(&sb, " = ?");
 1164:   }
 1165:   append(&sb, " where rowid = ?");
 1166:   return sb.s;
 1167: }
 1168: 
 1169: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
 1170: ** If the indicated statement has never been prepared, it is prepared
 1171: ** and cached, otherwise the cached version is reset.
 1172: */
 1173: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
 1174:                              sqlite3_stmt **ppStmt){
 1175:   assert( iStmt<MAX_STMT );
 1176:   if( v->pFulltextStatements[iStmt]==NULL ){
 1177:     const char *zStmt;
 1178:     int rc;
 1179:     switch( iStmt ){
 1180:       case CONTENT_INSERT_STMT:
 1181:         zStmt = contentInsertStatement(v); break;
 1182:       case CONTENT_UPDATE_STMT:
 1183:         zStmt = contentUpdateStatement(v); break;
 1184:       default:
 1185:         zStmt = fulltext_zStatement[iStmt];
 1186:     }
 1187:     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
 1188:                          zStmt);
 1189:     if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
 1190:     if( rc!=SQLITE_OK ) return rc;
 1191:   } else {
 1192:     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
 1193:     if( rc!=SQLITE_OK ) return rc;
 1194:   }
 1195: 
 1196:   *ppStmt = v->pFulltextStatements[iStmt];
 1197:   return SQLITE_OK;
 1198: }
 1199: 
 1200: /* Step the indicated statement, handling errors SQLITE_BUSY (by
 1201: ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
 1202: ** bindings to the new statement).
 1203: ** TODO(adam): We should extend this function so that it can work with
 1204: ** statements declared locally, not only globally cached statements.
 1205: */
 1206: static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
 1207:                               sqlite3_stmt **ppStmt){
 1208:   int rc;
 1209:   sqlite3_stmt *s = *ppStmt;
 1210:   assert( iStmt<MAX_STMT );
 1211:   assert( s==v->pFulltextStatements[iStmt] );
 1212: 
 1213:   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
 1214:     if( rc==SQLITE_BUSY ) continue;
 1215:     if( rc!=SQLITE_ERROR ) return rc;
 1216: 
 1217:     /* If an SQLITE_SCHEMA error has occurred, then finalizing this
 1218:      * statement is going to delete the fulltext_vtab structure. If
 1219:      * the statement just executed is in the pFulltextStatements[]
 1220:      * array, it will be finalized twice. So remove it before
 1221:      * calling sqlite3_finalize().
 1222:      */
 1223:     v->pFulltextStatements[iStmt] = NULL;
 1224:     rc = sqlite3_finalize(s);
 1225:     break;
 1226:   }
 1227:   return rc;
 1228: 
 1229:  err:
 1230:   sqlite3_finalize(s);
 1231:   return rc;
 1232: }
 1233: 
 1234: /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
 1235: ** Useful for statements like UPDATE, where we expect no results.
 1236: */
 1237: static int sql_single_step_statement(fulltext_vtab *v,
 1238:                                      fulltext_statement iStmt,
 1239:                                      sqlite3_stmt **ppStmt){
 1240:   int rc = sql_step_statement(v, iStmt, ppStmt);
 1241:   return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
 1242: }
 1243: 
 1244: /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
 1245: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
 1246:                           sqlite3_value **pValues){
 1247:   sqlite3_stmt *s;
 1248:   int i;
 1249:   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
 1250:   if( rc!=SQLITE_OK ) return rc;
 1251: 
 1252:   rc = sqlite3_bind_value(s, 1, rowid);
 1253:   if( rc!=SQLITE_OK ) return rc;
 1254: 
 1255:   for(i=0; i<v->nColumn; ++i){
 1256:     rc = sqlite3_bind_value(s, 2+i, pValues[i]);
 1257:     if( rc!=SQLITE_OK ) return rc;
 1258:   }
 1259: 
 1260:   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
 1261: }
 1262: 
 1263: /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
 1264:  *                  where rowid = [iRowid] */
 1265: static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
 1266:                           sqlite_int64 iRowid){
 1267:   sqlite3_stmt *s;
 1268:   int i;
 1269:   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
 1270:   if( rc!=SQLITE_OK ) return rc;
 1271: 
 1272:   for(i=0; i<v->nColumn; ++i){
 1273:     rc = sqlite3_bind_value(s, 1+i, pValues[i]);
 1274:     if( rc!=SQLITE_OK ) return rc;
 1275:   }
 1276: 
 1277:   rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
 1278:   if( rc!=SQLITE_OK ) return rc;
 1279: 
 1280:   return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
 1281: }
 1282: 
 1283: static void freeStringArray(int nString, const char **pString){
 1284:   int i;
 1285: 
 1286:   for (i=0 ; i < nString ; ++i) {
 1287:     if( pString[i]!=NULL ) free((void *) pString[i]);
 1288:   }
 1289:   free((void *) pString);
 1290: }
 1291: 
 1292: /* select * from %_content where rowid = [iRow]
 1293:  * The caller must delete the returned array and all strings in it.
 1294:  * null fields will be NULL in the returned array.
 1295:  *
 1296:  * TODO: Perhaps we should return pointer/length strings here for consistency
 1297:  * with other code which uses pointer/length. */
 1298: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
 1299:                           const char ***pValues){
 1300:   sqlite3_stmt *s;
 1301:   const char **values;
 1302:   int i;
 1303:   int rc;
 1304: 
 1305:   *pValues = NULL;
 1306: 
 1307:   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
 1308:   if( rc!=SQLITE_OK ) return rc;
 1309: 
 1310:   rc = sqlite3_bind_int64(s, 1, iRow);
 1311:   if( rc!=SQLITE_OK ) return rc;
 1312: 
 1313:   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
 1314:   if( rc!=SQLITE_ROW ) return rc;
 1315: 
 1316:   values = (const char **) malloc(v->nColumn * sizeof(const char *));
 1317:   for(i=0; i<v->nColumn; ++i){
 1318:     if( sqlite3_column_type(s, i)==SQLITE_NULL ){
 1319:       values[i] = NULL;
 1320:     }else{
 1321:       values[i] = string_dup((char*)sqlite3_column_text(s, i));
 1322:     }
 1323:   }
 1324: 
 1325:   /* We expect only one row.  We must execute another sqlite3_step()
 1326:    * to complete the iteration; otherwise the table will remain locked. */
 1327:   rc = sqlite3_step(s);
 1328:   if( rc==SQLITE_DONE ){
 1329:     *pValues = values;
 1330:     return SQLITE_OK;
 1331:   }
 1332: 
 1333:   freeStringArray(v->nColumn, values);
 1334:   return rc;
 1335: }
 1336: 
 1337: /* delete from %_content where rowid = [iRow ] */
 1338: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
 1339:   sqlite3_stmt *s;
 1340:   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
 1341:   if( rc!=SQLITE_OK ) return rc;
 1342: 
 1343:   rc = sqlite3_bind_int64(s, 1, iRow);
 1344:   if( rc!=SQLITE_OK ) return rc;
 1345: 
 1346:   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
 1347: }
 1348: 
 1349: /* select rowid, doclist from %_term
 1350:  *  where term = [pTerm] and segment = [iSegment]
 1351:  * If found, returns SQLITE_ROW; the caller must free the
 1352:  * returned doclist.  If no rows found, returns SQLITE_DONE. */
 1353: static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
 1354:                        int iSegment,
 1355:                        sqlite_int64 *rowid, DocList *out){
 1356:   sqlite3_stmt *s;
 1357:   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
 1358:   if( rc!=SQLITE_OK ) return rc;
 1359: 
 1360:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
 1361:   if( rc!=SQLITE_OK ) return rc;
 1362: 
 1363:   rc = sqlite3_bind_int(s, 2, iSegment);
 1364:   if( rc!=SQLITE_OK ) return rc;
 1365: 
 1366:   rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
 1367:   if( rc!=SQLITE_ROW ) return rc;
 1368: 
 1369:   *rowid = sqlite3_column_int64(s, 0);
 1370:   docListInit(out, DL_DEFAULT,
 1371:               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
 1372: 
 1373:   /* We expect only one row.  We must execute another sqlite3_step()
 1374:    * to complete the iteration; otherwise the table will remain locked. */
 1375:   rc = sqlite3_step(s);
 1376:   return rc==SQLITE_DONE ? SQLITE_ROW : rc;
 1377: }
 1378: 
 1379: /* Load the segment doclists for term pTerm and merge them in
 1380: ** appropriate order into out.  Returns SQLITE_OK if successful.  If
 1381: ** there are no segments for pTerm, successfully returns an empty
 1382: ** doclist in out.
 1383: **
 1384: ** Each document consists of 1 or more "columns".  The number of
 1385: ** columns is v->nColumn.  If iColumn==v->nColumn, then return
 1386: ** position information about all columns.  If iColumn<v->nColumn,
 1387: ** then only return position information about the iColumn-th column
 1388: ** (where the first column is 0).
 1389: */
 1390: static int term_select_all(
 1391:   fulltext_vtab *v,     /* The fulltext index we are querying against */
 1392:   int iColumn,          /* If <nColumn, only look at the iColumn-th column */
 1393:   const char *pTerm,    /* The term whose posting lists we want */
 1394:   int nTerm,            /* Number of bytes in pTerm */
 1395:   DocList *out          /* Write the resulting doclist here */
 1396: ){
 1397:   DocList doclist;
 1398:   sqlite3_stmt *s;
 1399:   int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
 1400:   if( rc!=SQLITE_OK ) return rc;
 1401: 
 1402:   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
 1403:   if( rc!=SQLITE_OK ) return rc;
 1404: 
 1405:   docListInit(&doclist, DL_DEFAULT, 0, 0);
 1406: 
 1407:   /* TODO(shess) Handle schema and busy errors. */
 1408:   while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
 1409:     DocList old;
 1410: 
 1411:     /* TODO(shess) If we processed doclists from oldest to newest, we
 1412:     ** could skip the malloc() involved with the following call.  For
 1413:     ** now, I'd rather keep this logic similar to index_insert_term().
 1414:     ** We could additionally drop elements when we see deletes, but
 1415:     ** that would require a distinct version of docListAccumulate().
 1416:     */
 1417:     docListInit(&old, DL_DEFAULT,
 1418:                 sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
 1419: 
 1420:     if( iColumn<v->nColumn ){   /* querying a single column */
 1421:       docListRestrictColumn(&old, iColumn);
 1422:     }
 1423: 
 1424:     /* doclist contains the newer data, so write it over old.  Then
 1425:     ** steal accumulated result for doclist.
 1426:     */
 1427:     docListAccumulate(&old, &doclist);
 1428:     docListDestroy(&doclist);
 1429:     doclist = old;
 1430:   }
 1431:   if( rc!=SQLITE_DONE ){
 1432:     docListDestroy(&doclist);
 1433:     return rc;
 1434:   }
 1435: 
 1436:   docListDiscardEmpty(&doclist);
 1437:   *out = doclist;
 1438:   return SQLITE_OK;
 1439: }
 1440: 
 1441: /* insert into %_term (rowid, term, segment, doclist)
 1442:                values ([piRowid], [pTerm], [iSegment], [doclist])
 1443: ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
 1444: **
 1445: ** NOTE(shess) piRowid is IN, with values of "space of int64" plus
 1446: ** null, it is not used to pass data back to the caller.
 1447: */
 1448: static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
 1449:                        const char *pTerm, int nTerm,
 1450:                        int iSegment, DocList *doclist){
 1451:   sqlite3_stmt *s;
 1452:   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
 1453:   if( rc!=SQLITE_OK ) return rc;
 1454: 
 1455:   if( piRowid==NULL ){
 1456:     rc = sqlite3_bind_null(s, 1);
 1457:   }else{
 1458:     rc = sqlite3_bind_int64(s, 1, *piRowid);
 1459:   }
 1460:   if( rc!=SQLITE_OK ) return rc;
 1461: 
 1462:   rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
 1463:   if( rc!=SQLITE_OK ) return rc;
 1464: 
 1465:   rc = sqlite3_bind_int(s, 3, iSegment);
 1466:   if( rc!=SQLITE_OK ) return rc;
 1467: 
 1468:   rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
 1469:   if( rc!=SQLITE_OK ) return rc;
 1470: 
 1471:   return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
 1472: }
 1473: 
 1474: /* update %_term set doclist = [doclist] where rowid = [rowid] */
 1475: static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
 1476:                        DocList *doclist){
 1477:   sqlite3_stmt *s;
 1478:   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
 1479:   if( rc!=SQLITE_OK ) return rc;
 1480: 
 1481:   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
 1482:   if( rc!=SQLITE_OK ) return rc;
 1483: 
 1484:   rc = sqlite3_bind_int64(s, 2, rowid);
 1485:   if( rc!=SQLITE_OK ) return rc;
 1486: 
 1487:   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
 1488: }
 1489: 
 1490: static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
 1491:   sqlite3_stmt *s;
 1492:   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
 1493:   if( rc!=SQLITE_OK ) return rc;
 1494: 
 1495:   rc = sqlite3_bind_int64(s, 1, rowid);
 1496:   if( rc!=SQLITE_OK ) return rc;
 1497: 
 1498:   return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
 1499: }
 1500: 
 1501: /*
 1502: ** Free the memory used to contain a fulltext_vtab structure.
 1503: */
 1504: static void fulltext_vtab_destroy(fulltext_vtab *v){
 1505:   int iStmt, i;
 1506: 
 1507:   TRACE(("FTS1 Destroy %p\n", v));
 1508:   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
 1509:     if( v->pFulltextStatements[iStmt]!=NULL ){
 1510:       sqlite3_finalize(v->pFulltextStatements[iStmt]);
 1511:       v->pFulltextStatements[iStmt] = NULL;
 1512:     }
 1513:   }
 1514: 
 1515:   if( v->pTokenizer!=NULL ){
 1516:     v->pTokenizer->pModule->xDestroy(v->pTokenizer);
 1517:     v->pTokenizer = NULL;
 1518:   }
 1519:   
 1520:   free(v->azColumn);
 1521:   for(i = 0; i < v->nColumn; ++i) {
 1522:     sqlite3_free(v->azContentColumn[i]);
 1523:   }
 1524:   free(v->azContentColumn);
 1525:   free(v);
 1526: }
 1527: 
 1528: /*
 1529: ** Token types for parsing the arguments to xConnect or xCreate.
 1530: */
 1531: #define TOKEN_EOF         0    /* End of file */
 1532: #define TOKEN_SPACE       1    /* Any kind of whitespace */
 1533: #define TOKEN_ID          2    /* An identifier */
 1534: #define TOKEN_STRING      3    /* A string literal */
 1535: #define TOKEN_PUNCT       4    /* A single punctuation character */
 1536: 
 1537: /*
 1538: ** If X is a character that can be used in an identifier then
 1539: ** IdChar(X) will be true.  Otherwise it is false.
 1540: **
 1541: ** For ASCII, any character with the high-order bit set is
 1542: ** allowed in an identifier.  For 7-bit characters, 
 1543: ** sqlite3IsIdChar[X] must be 1.
 1544: **
 1545: ** Ticket #1066.  the SQL standard does not allow '$' in the
 1546: ** middle of identfiers.  But many SQL implementations do. 
 1547: ** SQLite will allow '$' in identifiers for compatibility.
 1548: ** But the feature is undocumented.
 1549: */
 1550: static const char isIdChar[] = {
 1551: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
 1552:     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
 1553:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
 1554:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
 1555:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
 1556:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
 1557:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
 1558: };
 1559: #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
 1560: 
 1561: 
 1562: /*
 1563: ** Return the length of the token that begins at z[0]. 
 1564: ** Store the token type in *tokenType before returning.
 1565: */
 1566: static int getToken(const char *z, int *tokenType){
 1567:   int i, c;
 1568:   switch( *z ){
 1569:     case 0: {
 1570:       *tokenType = TOKEN_EOF;
 1571:       return 0;
 1572:     }
 1573:     case ' ': case '\t': case '\n': case '\f': case '\r': {
 1574:       for(i=1; safe_isspace(z[i]); i++){}
 1575:       *tokenType = TOKEN_SPACE;
 1576:       return i;
 1577:     }
 1578:     case '`':
 1579:     case '\'':
 1580:     case '"': {
 1581:       int delim = z[0];
 1582:       for(i=1; (c=z[i])!=0; i++){
 1583:         if( c==delim ){
 1584:           if( z[i+1]==delim ){
 1585:             i++;
 1586:           }else{
 1587:             break;
 1588:           }
 1589:         }
 1590:       }
 1591:       *tokenType = TOKEN_STRING;
 1592:       return i + (c!=0);
 1593:     }
 1594:     case '[': {
 1595:       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
 1596:       *tokenType = TOKEN_ID;
 1597:       return i;
 1598:     }
 1599:     default: {
 1600:       if( !IdChar(*z) ){
 1601:         break;
 1602:       }
 1603:       for(i=1; IdChar(z[i]); i++){}
 1604:       *tokenType = TOKEN_ID;
 1605:       return i;
 1606:     }
 1607:   }
 1608:   *tokenType = TOKEN_PUNCT;
 1609:   return 1;
 1610: }
 1611: 
 1612: /*
 1613: ** A token extracted from a string is an instance of the following
 1614: ** structure.
 1615: */
 1616: typedef struct Token {
 1617:   const char *z;       /* Pointer to token text.  Not '\000' terminated */
 1618:   short int n;         /* Length of the token text in bytes. */
 1619: } Token;
 1620: 
 1621: /*
 1622: ** Given a input string (which is really one of the argv[] parameters
 1623: ** passed into xConnect or xCreate) split the string up into tokens.
 1624: ** Return an array of pointers to '\000' terminated strings, one string
 1625: ** for each non-whitespace token.
 1626: **
 1627: ** The returned array is terminated by a single NULL pointer.
 1628: **
 1629: ** Space to hold the returned array is obtained from a single
 1630: ** malloc and should be freed by passing the return value to free().
 1631: ** The individual strings within the token list are all a part of
 1632: ** the single memory allocation and will all be freed at once.
 1633: */
 1634: static char **tokenizeString(const char *z, int *pnToken){
 1635:   int nToken = 0;
 1636:   Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
 1637:   int n = 1;
 1638:   int e, i;
 1639:   int totalSize = 0;
 1640:   char **azToken;
 1641:   char *zCopy;
 1642:   while( n>0 ){
 1643:     n = getToken(z, &e);
 1644:     if( e!=TOKEN_SPACE ){
 1645:       aToken[nToken].z = z;
 1646:       aToken[nToken].n = n;
 1647:       nToken++;
 1648:       totalSize += n+1;
 1649:     }
 1650:     z += n;
 1651:   }
 1652:   azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
 1653:   zCopy = (char*)&azToken[nToken];
 1654:   nToken--;
 1655:   for(i=0; i<nToken; i++){
 1656:     azToken[i] = zCopy;
 1657:     n = aToken[i].n;
 1658:     memcpy(zCopy, aToken[i].z, n);
 1659:     zCopy[n] = 0;
 1660:     zCopy += n+1;
 1661:   }
 1662:   azToken[nToken] = 0;
 1663:   free(aToken);
 1664:   *pnToken = nToken;
 1665:   return azToken;
 1666: }
 1667: 
 1668: /*
 1669: ** Convert an SQL-style quoted string into a normal string by removing
 1670: ** the quote characters.  The conversion is done in-place.  If the
 1671: ** input does not begin with a quote character, then this routine
 1672: ** is a no-op.
 1673: **
 1674: ** Examples:
 1675: **
 1676: **     "abc"   becomes   abc
 1677: **     'xyz'   becomes   xyz
 1678: **     [pqr]   becomes   pqr
 1679: **     `mno`   becomes   mno
 1680: */
 1681: static void dequoteString(char *z){
 1682:   int quote;
 1683:   int i, j;
 1684:   if( z==0 ) return;
 1685:   quote = z[0];
 1686:   switch( quote ){
 1687:     case '\'':  break;
 1688:     case '"':   break;
 1689:     case '`':   break;                /* For MySQL compatibility */
 1690:     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
 1691:     default:    return;
 1692:   }
 1693:   for(i=1, j=0; z[i]; i++){
 1694:     if( z[i]==quote ){
 1695:       if( z[i+1]==quote ){
 1696:         z[j++] = quote;
 1697:         i++;
 1698:       }else{
 1699:         z[j++] = 0;
 1700:         break;
 1701:       }
 1702:     }else{
 1703:       z[j++] = z[i];
 1704:     }
 1705:   }
 1706: }
 1707: 
 1708: /*
 1709: ** The input azIn is a NULL-terminated list of tokens.  Remove the first
 1710: ** token and all punctuation tokens.  Remove the quotes from
 1711: ** around string literal tokens.
 1712: **
 1713: ** Example:
 1714: **
 1715: **     input:      tokenize chinese ( 'simplifed' , 'mixed' )
 1716: **     output:     chinese simplifed mixed
 1717: **
 1718: ** Another example:
 1719: **
 1720: **     input:      delimiters ( '[' , ']' , '...' )
 1721: **     output:     [ ] ...
 1722: */
 1723: static void tokenListToIdList(char **azIn){
 1724:   int i, j;
 1725:   if( azIn ){
 1726:     for(i=0, j=-1; azIn[i]; i++){
 1727:       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
 1728:         dequoteString(azIn[i]);
 1729:         if( j>=0 ){
 1730:           azIn[j] = azIn[i];
 1731:         }
 1732:         j++;
 1733:       }
 1734:     }
 1735:     azIn[j] = 0;
 1736:   }
 1737: }
 1738: 
 1739: 
 1740: /*
 1741: ** Find the first alphanumeric token in the string zIn.  Null-terminate
 1742: ** this token.  Remove any quotation marks.  And return a pointer to
 1743: ** the result.
 1744: */
 1745: static char *firstToken(char *zIn, char **pzTail){
 1746:   int n, ttype;
 1747:   while(1){
 1748:     n = getToken(zIn, &ttype);
 1749:     if( ttype==TOKEN_SPACE ){
 1750:       zIn += n;
 1751:     }else if( ttype==TOKEN_EOF ){
 1752:       *pzTail = zIn;
 1753:       return 0;
 1754:     }else{
 1755:       zIn[n] = 0;
 1756:       *pzTail = &zIn[1];
 1757:       dequoteString(zIn);
 1758:       return zIn;
 1759:     }
 1760:   }
 1761:   /*NOTREACHED*/
 1762: }
 1763: 
 1764: /* Return true if...
 1765: **
 1766: **   *  s begins with the string t, ignoring case
 1767: **   *  s is longer than t
 1768: **   *  The first character of s beyond t is not a alphanumeric
 1769: ** 
 1770: ** Ignore leading space in *s.
 1771: **
 1772: ** To put it another way, return true if the first token of
 1773: ** s[] is t[].
 1774: */
 1775: static int startsWith(const char *s, const char *t){
 1776:   while( safe_isspace(*s) ){ s++; }
 1777:   while( *t ){
 1778:     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
 1779:   }
 1780:   return *s!='_' && !safe_isalnum(*s);
 1781: }
 1782: 
 1783: /*
 1784: ** An instance of this structure defines the "spec" of a
 1785: ** full text index.  This structure is populated by parseSpec
 1786: ** and use by fulltextConnect and fulltextCreate.
 1787: */
 1788: typedef struct TableSpec {
 1789:   const char *zDb;         /* Logical database name */
 1790:   const char *zName;       /* Name of the full-text index */
 1791:   int nColumn;             /* Number of columns to be indexed */
 1792:   char **azColumn;         /* Original names of columns to be indexed */
 1793:   char **azContentColumn;  /* Column names for %_content */
 1794:   char **azTokenizer;      /* Name of tokenizer and its arguments */
 1795: } TableSpec;
 1796: 
 1797: /*
 1798: ** Reclaim all of the memory used by a TableSpec
 1799: */
 1800: static void clearTableSpec(TableSpec *p) {
 1801:   free(p->azColumn);
 1802:   free(p->azContentColumn);
 1803:   free(p->azTokenizer);
 1804: }
 1805: 
 1806: /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
 1807:  *
 1808:  * CREATE VIRTUAL TABLE email
 1809:  *        USING fts1(subject, body, tokenize mytokenizer(myarg))
 1810:  *
 1811:  * We return parsed information in a TableSpec structure.
 1812:  * 
 1813:  */
 1814: static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
 1815:                      char**pzErr){
 1816:   int i, n;
 1817:   char *z, *zDummy;
 1818:   char **azArg;
 1819:   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
 1820: 
 1821:   assert( argc>=3 );
 1822:   /* Current interface:
 1823:   ** argv[0] - module name
 1824:   ** argv[1] - database name
 1825:   ** argv[2] - table name
 1826:   ** argv[3..] - columns, optionally followed by tokenizer specification
 1827:   **             and snippet delimiters specification.
 1828:   */
 1829: 
 1830:   /* Make a copy of the complete argv[][] array in a single allocation.
 1831:   ** The argv[][] array is read-only and transient.  We can write to the
 1832:   ** copy in order to modify things and the copy is persistent.
 1833:   */
 1834:   memset(pSpec, 0, sizeof(*pSpec));
 1835:   for(i=n=0; i<argc; i++){
 1836:     n += strlen(argv[i]) + 1;
 1837:   }
 1838:   azArg = malloc( sizeof(char*)*argc + n );
 1839:   if( azArg==0 ){
 1840:     return SQLITE_NOMEM;
 1841:   }
 1842:   z = (char*)&azArg[argc];
 1843:   for(i=0; i<argc; i++){
 1844:     azArg[i] = z;
 1845:     strcpy(z, argv[i]);
 1846:     z += strlen(z)+1;
 1847:   }
 1848: 
 1849:   /* Identify the column names and the tokenizer and delimiter arguments
 1850:   ** in the argv[][] array.
 1851:   */
 1852:   pSpec->zDb = azArg[1];
 1853:   pSpec->zName = azArg[2];
 1854:   pSpec->nColumn = 0;
 1855:   pSpec->azColumn = azArg;
 1856:   zTokenizer = "tokenize simple";
 1857:   for(i=3; i<argc; ++i){
 1858:     if( startsWith(azArg[i],"tokenize") ){
 1859:       zTokenizer = azArg[i];
 1860:     }else{
 1861:       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
 1862:       pSpec->nColumn++;
 1863:     }
 1864:   }
 1865:   if( pSpec->nColumn==0 ){
 1866:     azArg[0] = "content";
 1867:     pSpec->nColumn = 1;
 1868:   }
 1869: 
 1870:   /*
 1871:   ** Construct the list of content column names.
 1872:   **
 1873:   ** Each content column name will be of the form cNNAAAA
 1874:   ** where NN is the column number and AAAA is the sanitized
 1875:   ** column name.  "sanitized" means that special characters are
 1876:   ** converted to "_".  The cNN prefix guarantees that all column
 1877:   ** names are unique.
 1878:   **
 1879:   ** The AAAA suffix is not strictly necessary.  It is included
 1880:   ** for the convenience of people who might examine the generated
 1881:   ** %_content table and wonder what the columns are used for.
 1882:   */
 1883:   pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
 1884:   if( pSpec->azContentColumn==0 ){
 1885:     clearTableSpec(pSpec);
 1886:     return SQLITE_NOMEM;
 1887:   }
 1888:   for(i=0; i<pSpec->nColumn; i++){
 1889:     char *p;
 1890:     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
 1891:     for (p = pSpec->azContentColumn[i]; *p ; ++p) {
 1892:       if( !safe_isalnum(*p) ) *p = '_';
 1893:     }
 1894:   }
 1895: 
 1896:   /*
 1897:   ** Parse the tokenizer specification string.
 1898:   */
 1899:   pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
 1900:   tokenListToIdList(pSpec->azTokenizer);
 1901: 
 1902:   return SQLITE_OK;
 1903: }
 1904: 
 1905: /*
 1906: ** Generate a CREATE TABLE statement that describes the schema of
 1907: ** the virtual table.  Return a pointer to this schema string.
 1908: **
 1909: ** Space is obtained from sqlite3_mprintf() and should be freed
 1910: ** using sqlite3_free().
 1911: */
 1912: static char *fulltextSchema(
 1913:   int nColumn,                  /* Number of columns */
 1914:   const char *const* azColumn,  /* List of columns */
 1915:   const char *zTableName        /* Name of the table */
 1916: ){
 1917:   int i;
 1918:   char *zSchema, *zNext;
 1919:   const char *zSep = "(";
 1920:   zSchema = sqlite3_mprintf("CREATE TABLE x");
 1921:   for(i=0; i<nColumn; i++){
 1922:     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
 1923:     sqlite3_free(zSchema);
 1924:     zSchema = zNext;
 1925:     zSep = ",";
 1926:   }
 1927:   zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
 1928:   sqlite3_free(zSchema);
 1929:   return zNext;
 1930: }
 1931: 
 1932: /*
 1933: ** Build a new sqlite3_vtab structure that will describe the
 1934: ** fulltext index defined by spec.
 1935: */
 1936: static int constructVtab(
 1937:   sqlite3 *db,              /* The SQLite database connection */
 1938:   TableSpec *spec,          /* Parsed spec information from parseSpec() */
 1939:   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
 1940:   char **pzErr              /* Write any error message here */
 1941: ){
 1942:   int rc;
 1943:   int n;
 1944:   fulltext_vtab *v = 0;
 1945:   const sqlite3_tokenizer_module *m = NULL;
 1946:   char *schema;
 1947: 
 1948:   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
 1949:   if( v==0 ) return SQLITE_NOMEM;
 1950:   memset(v, 0, sizeof(*v));
 1951:   /* sqlite will initialize v->base */
 1952:   v->db = db;
 1953:   v->zDb = spec->zDb;       /* Freed when azColumn is freed */
 1954:   v->zName = spec->zName;   /* Freed when azColumn is freed */
 1955:   v->nColumn = spec->nColumn;
 1956:   v->azContentColumn = spec->azContentColumn;
 1957:   spec->azContentColumn = 0;
 1958:   v->azColumn = spec->azColumn;
 1959:   spec->azColumn = 0;
 1960: 
 1961:   if( spec->azTokenizer==0 ){
 1962:     return SQLITE_NOMEM;
 1963:   }
 1964:   /* TODO(shess) For now, add new tokenizers as else if clauses. */
 1965:   if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
 1966:     sqlite3Fts1SimpleTokenizerModule(&m);
 1967:   }else if( startsWith(spec->azTokenizer[0], "porter") ){
 1968:     sqlite3Fts1PorterTokenizerModule(&m);
 1969:   }else{
 1970:     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
 1971:     rc = SQLITE_ERROR;
 1972:     goto err;
 1973:   }
 1974:   for(n=0; spec->azTokenizer[n]; n++){}
 1975:   if( n ){
 1976:     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
 1977:                     &v->pTokenizer);
 1978:   }else{
 1979:     rc = m->xCreate(0, 0, &v->pTokenizer);
 1980:   }
 1981:   if( rc!=SQLITE_OK ) goto err;
 1982:   v->pTokenizer->pModule = m;
 1983: 
 1984:   /* TODO: verify the existence of backing tables foo_content, foo_term */
 1985: 
 1986:   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
 1987:                           spec->zName);
 1988:   rc = sqlite3_declare_vtab(db, schema);
 1989:   sqlite3_free(schema);
 1990:   if( rc!=SQLITE_OK ) goto err;
 1991: 
 1992:   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
 1993: 
 1994:   *ppVTab = &v->base;
 1995:   TRACE(("FTS1 Connect %p\n", v));
 1996: 
 1997:   return rc;
 1998: 
 1999: err:
 2000:   fulltext_vtab_destroy(v);
 2001:   return rc;
 2002: }
 2003: 
 2004: static int fulltextConnect(
 2005:   sqlite3 *db,
 2006:   void *pAux,
 2007:   int argc, const char *const*argv,
 2008:   sqlite3_vtab **ppVTab,
 2009:   char **pzErr
 2010: ){
 2011:   TableSpec spec;
 2012:   int rc = parseSpec(&spec, argc, argv, pzErr);
 2013:   if( rc!=SQLITE_OK ) return rc;
 2014: 
 2015:   rc = constructVtab(db, &spec, ppVTab, pzErr);
 2016:   clearTableSpec(&spec);
 2017:   return rc;
 2018: }
 2019: 
 2020:   /* The %_content table holds the text of each document, with
 2021:   ** the rowid used as the docid.
 2022:   **
 2023:   ** The %_term table maps each term to a document list blob
 2024:   ** containing elements sorted by ascending docid, each element
 2025:   ** encoded as:
 2026:   **
 2027:   **   docid varint-encoded
 2028:   **   token elements:
 2029:   **     position+1 varint-encoded as delta from previous position
 2030:   **     start offset varint-encoded as delta from previous start offset
 2031:   **     end offset varint-encoded as delta from start offset
 2032:   **
 2033:   ** The sentinel position of 0 indicates the end of the token list.
 2034:   **
 2035:   ** Additionally, doclist blobs are chunked into multiple segments,
 2036:   ** using segment to order the segments.  New elements are added to
 2037:   ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
 2038:   ** segment 0 is deleted, and the doclist is inserted at segment 1.
 2039:   ** If there is already a doclist at segment 1, the segment 0 doclist
 2040:   ** is merged with it, the segment 1 doclist is deleted, and the
 2041:   ** merged doclist is inserted at segment 2, repeating those
 2042:   ** operations until an insert succeeds.
 2043:   **
 2044:   ** Since this structure doesn't allow us to update elements in place
 2045:   ** in case of deletion or update, these are simply written to
 2046:   ** segment 0 (with an empty token list in case of deletion), with
 2047:   ** docListAccumulate() taking care to retain lower-segment
 2048:   ** information in preference to higher-segment information.
 2049:   */
 2050:   /* TODO(shess) Provide a VACUUM type operation which both removes
 2051:   ** deleted elements which are no longer necessary, and duplicated
 2052:   ** elements.  I suspect this will probably not be necessary in
 2053:   ** practice, though.
 2054:   */
 2055: static int fulltextCreate(sqlite3 *db, void *pAux,
 2056:                           int argc, const char * const *argv,
 2057:                           sqlite3_vtab **ppVTab, char **pzErr){
 2058:   int rc;
 2059:   TableSpec spec;
 2060:   StringBuffer schema;
 2061:   TRACE(("FTS1 Create\n"));
 2062: 
 2063:   rc = parseSpec(&spec, argc, argv, pzErr);
 2064:   if( rc!=SQLITE_OK ) return rc;
 2065: 
 2066:   initStringBuffer(&schema);
 2067:   append(&schema, "CREATE TABLE %_content(");
 2068:   appendList(&schema, spec.nColumn, spec.azContentColumn);
 2069:   append(&schema, ")");
 2070:   rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
 2071:   free(schema.s);
 2072:   if( rc!=SQLITE_OK ) goto out;
 2073: 
 2074:   rc = sql_exec(db, spec.zDb, spec.zName,
 2075:     "create table %_term(term text, segment integer, doclist blob, "
 2076:                         "primary key(term, segment));");
 2077:   if( rc!=SQLITE_OK ) goto out;
 2078: 
 2079:   rc = constructVtab(db, &spec, ppVTab, pzErr);
 2080: 
 2081: out:
 2082:   clearTableSpec(&spec);
 2083:   return rc;
 2084: }
 2085: 
 2086: /* Decide how to handle an SQL query. */
 2087: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
 2088:   int i;
 2089:   TRACE(("FTS1 BestIndex\n"));
 2090: 
 2091:   for(i=0; i<pInfo->nConstraint; ++i){
 2092:     const struct sqlite3_index_constraint *pConstraint;
 2093:     pConstraint = &pInfo->aConstraint[i];
 2094:     if( pConstraint->usable ) {
 2095:       if( pConstraint->iColumn==-1 &&
 2096:           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
 2097:         pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
 2098:         TRACE(("FTS1 QUERY_ROWID\n"));
 2099:       } else if( pConstraint->iColumn>=0 &&
 2100:                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
 2101:         /* full-text search */
 2102:         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
 2103:         TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
 2104:       } else continue;
 2105: 
 2106:       pInfo->aConstraintUsage[i].argvIndex = 1;
 2107:       pInfo->aConstraintUsage[i].omit = 1;
 2108: 
 2109:       /* An arbitrary value for now.
 2110:        * TODO: Perhaps rowid matches should be considered cheaper than
 2111:        * full-text searches. */
 2112:       pInfo->estimatedCost = 1.0;   
 2113: 
 2114:       return SQLITE_OK;
 2115:     }
 2116:   }
 2117:   pInfo->idxNum = QUERY_GENERIC;
 2118:   return SQLITE_OK;
 2119: }
 2120: 
 2121: static int fulltextDisconnect(sqlite3_vtab *pVTab){
 2122:   TRACE(("FTS1 Disconnect %p\n", pVTab));
 2123:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
 2124:   return SQLITE_OK;
 2125: }
 2126: 
 2127: static int fulltextDestroy(sqlite3_vtab *pVTab){
 2128:   fulltext_vtab *v = (fulltext_vtab *)pVTab;
 2129:   int rc;
 2130: 
 2131:   TRACE(("FTS1 Destroy %p\n", pVTab));
 2132:   rc = sql_exec(v->db, v->zDb, v->zName,
 2133:                 "drop table if exists %_content;"
 2134:                 "drop table if exists %_term;"
 2135:                 );
 2136:   if( rc!=SQLITE_OK ) return rc;
 2137: 
 2138:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
 2139:   return SQLITE_OK;
 2140: }
 2141: 
 2142: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
 2143:   fulltext_cursor *c;
 2144: 
 2145:   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
 2146:   /* sqlite will initialize c->base */
 2147:   *ppCursor = &c->base;
 2148:   TRACE(("FTS1 Open %p: %p\n", pVTab, c));
 2149: 
 2150:   return SQLITE_OK;
 2151: }
 2152: 
 2153: 
 2154: /* Free all of the dynamically allocated memory held by *q
 2155: */
 2156: static void queryClear(Query *q){
 2157:   int i;
 2158:   for(i = 0; i < q->nTerms; ++i){
 2159:     free(q->pTerms[i].pTerm);
 2160:   }
 2161:   free(q->pTerms);
 2162:   memset(q, 0, sizeof(*q));
 2163: }
 2164: 
 2165: /* Free all of the dynamically allocated memory held by the
 2166: ** Snippet
 2167: */
 2168: static void snippetClear(Snippet *p){
 2169:   free(p->aMatch);
 2170:   free(p->zOffset);
 2171:   free(p->zSnippet);
 2172:   memset(p, 0, sizeof(*p));
 2173: }
 2174: /*
 2175: ** Append a single entry to the p->aMatch[] log.
 2176: */
 2177: static void snippetAppendMatch(
 2178:   Snippet *p,               /* Append the entry to this snippet */
 2179:   int iCol, int iTerm,      /* The column and query term */
 2180:   int iStart, int nByte     /* Offset and size of the match */
 2181: ){
 2182:   int i;
 2183:   struct snippetMatch *pMatch;
 2184:   if( p->nMatch+1>=p->nAlloc ){
 2185:     p->nAlloc = p->nAlloc*2 + 10;
 2186:     p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
 2187:     if( p->aMatch==0 ){
 2188:       p->nMatch = 0;
 2189:       p->nAlloc = 0;
 2190:       return;
 2191:     }
 2192:   }
 2193:   i = p->nMatch++;
 2194:   pMatch = &p->aMatch[i];
 2195:   pMatch->iCol = iCol;
 2196:   pMatch->iTerm = iTerm;
 2197:   pMatch->iStart = iStart;
 2198:   pMatch->nByte = nByte;
 2199: }
 2200: 
 2201: /*
 2202: ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
 2203: */
 2204: #define FTS1_ROTOR_SZ   (32)
 2205: #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
 2206: 
 2207: /*
 2208: ** Add entries to pSnippet->aMatch[] for every match that occurs against
 2209: ** document zDoc[0..nDoc-1] which is stored in column iColumn.
 2210: */
 2211: static void snippetOffsetsOfColumn(
 2212:   Query *pQuery,
 2213:   Snippet *pSnippet,
 2214:   int iColumn,
 2215:   const char *zDoc,
 2216:   int nDoc
 2217: ){
 2218:   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
 2219:   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
 2220:   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
 2221:   fulltext_vtab *pVtab;                /* The full text index */
 2222:   int nColumn;                         /* Number of columns in the index */
 2223:   const QueryTerm *aTerm;              /* Query string terms */
 2224:   int nTerm;                           /* Number of query string terms */  
 2225:   int i, j;                            /* Loop counters */
 2226:   int rc;                              /* Return code */
 2227:   unsigned int match, prevMatch;       /* Phrase search bitmasks */
 2228:   const char *zToken;                  /* Next token from the tokenizer */
 2229:   int nToken;                          /* Size of zToken */
 2230:   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
 2231: 
 2232:   /* The following variables keep a circular buffer of the last
 2233:   ** few tokens */
 2234:   unsigned int iRotor = 0;             /* Index of current token */
 2235:   int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
 2236:   int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
 2237: 
 2238:   pVtab = pQuery->pFts;
 2239:   nColumn = pVtab->nColumn;
 2240:   pTokenizer = pVtab->pTokenizer;
 2241:   pTModule = pTokenizer->pModule;
 2242:   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
 2243:   if( rc ) return;
 2244:   pTCursor->pTokenizer = pTokenizer;
 2245:   aTerm = pQuery->pTerms;
 2246:   nTerm = pQuery->nTerms;
 2247:   if( nTerm>=FTS1_ROTOR_SZ ){
 2248:     nTerm = FTS1_ROTOR_SZ - 1;
 2249:   }
 2250:   prevMatch = 0;
 2251:   while(1){
 2252:     rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
 2253:     if( rc ) break;
 2254:     iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
 2255:     iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
 2256:     match = 0;
 2257:     for(i=0; i<nTerm; i++){
 2258:       int iCol;
 2259:       iCol = aTerm[i].iColumn;
 2260:       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
 2261:       if( aTerm[i].nTerm!=nToken ) continue;
 2262:       if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
 2263:       if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
 2264:       match |= 1<<i;
 2265:       if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
 2266:         for(j=aTerm[i].iPhrase-1; j>=0; j--){
 2267:           int k = (iRotor-j) & FTS1_ROTOR_MASK;
 2268:           snippetAppendMatch(pSnippet, iColumn, i-j,
 2269:                 iRotorBegin[k], iRotorLen[k]);
 2270:         }
 2271:       }
 2272:     }
 2273:     prevMatch = match<<1;
 2274:     iRotor++;
 2275:   }
 2276:   pTModule->xClose(pTCursor);  
 2277: }
 2278: 
 2279: 
 2280: /*
 2281: ** Compute all offsets for the current row of the query.  
 2282: ** If the offsets have already been computed, this routine is a no-op.
 2283: */
 2284: static void snippetAllOffsets(fulltext_cursor *p){
 2285:   int nColumn;
 2286:   int iColumn, i;
 2287:   int iFirst, iLast;
 2288:   fulltext_vtab *pFts;
 2289: 
 2290:   if( p->snippet.nMatch ) return;
 2291:   if( p->q.nTerms==0 ) return;
 2292:   pFts = p->q.pFts;
 2293:   nColumn = pFts->nColumn;
 2294:   iColumn = p->iCursorType - QUERY_FULLTEXT;
 2295:   if( iColumn<0 || iColumn>=nColumn ){
 2296:     iFirst = 0;
 2297:     iLast = nColumn-1;
 2298:   }else{
 2299:     iFirst = iColumn;
 2300:     iLast = iColumn;
 2301:   }
 2302:   for(i=iFirst; i<=iLast; i++){
 2303:     const char *zDoc;
 2304:     int nDoc;
 2305:     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
 2306:     nDoc = sqlite3_column_bytes(p->pStmt, i+1);
 2307:     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
 2308:   }
 2309: }
 2310: 
 2311: /*
 2312: ** Convert the information in the aMatch[] array of the snippet
 2313: ** into the string zOffset[0..nOffset-1].
 2314: */
 2315: static void snippetOffsetText(Snippet *p){
 2316:   int i;
 2317:   int cnt = 0;
 2318:   StringBuffer sb;
 2319:   char zBuf[200];
 2320:   if( p->zOffset ) return;
 2321:   initStringBuffer(&sb);
 2322:   for(i=0; i<p->nMatch; i++){
 2323:     struct snippetMatch *pMatch = &p->aMatch[i];
 2324:     zBuf[0] = ' ';
 2325:     sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
 2326:         pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
 2327:     append(&sb, zBuf);
 2328:     cnt++;
 2329:   }
 2330:   p->zOffset = sb.s;
 2331:   p->nOffset = sb.len;
 2332: }
 2333: 
 2334: /*
 2335: ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
 2336: ** of matching words some of which might be in zDoc.  zDoc is column
 2337: ** number iCol.
 2338: **
 2339: ** iBreak is suggested spot in zDoc where we could begin or end an
 2340: ** excerpt.  Return a value similar to iBreak but possibly adjusted
 2341: ** to be a little left or right so that the break point is better.
 2342: */
 2343: static int wordBoundary(
 2344:   int iBreak,                   /* The suggested break point */
 2345:   const char *zDoc,             /* Document text */
 2346:   int nDoc,                     /* Number of bytes in zDoc[] */
 2347:   struct snippetMatch *aMatch,  /* Matching words */
 2348:   int nMatch,                   /* Number of entries in aMatch[] */
 2349:   int iCol                      /* The column number for zDoc[] */
 2350: ){
 2351:   int i;
 2352:   if( iBreak<=10 ){
 2353:     return 0;
 2354:   }
 2355:   if( iBreak>=nDoc-10 ){
 2356:     return nDoc;
 2357:   }
 2358:   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
 2359:   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
 2360:   if( i<nMatch ){
 2361:     if( aMatch[i].iStart<iBreak+10 ){
 2362:       return aMatch[i].iStart;
 2363:     }
 2364:     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
 2365:       return aMatch[i-1].iStart;
 2366:     }
 2367:   }
 2368:   for(i=1; i<=10; i++){
 2369:     if( safe_isspace(zDoc[iBreak-i]) ){
 2370:       return iBreak - i + 1;
 2371:     }
 2372:     if( safe_isspace(zDoc[iBreak+i]) ){
 2373:       return iBreak + i + 1;
 2374:     }
 2375:   }
 2376:   return iBreak;
 2377: }
 2378: 
 2379: /*
 2380: ** If the StringBuffer does not end in white space, add a single
 2381: ** space character to the end.
 2382: */
 2383: static void appendWhiteSpace(StringBuffer *p){
 2384:   if( p->len==0 ) return;
 2385:   if( safe_isspace(p->s[p->len-1]) ) return;
 2386:   append(p, " ");
 2387: }
 2388: 
 2389: /*
 2390: ** Remove white space from teh end of the StringBuffer
 2391: */
 2392: static void trimWhiteSpace(StringBuffer *p){
 2393:   while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
 2394:     p->len--;
 2395:   }
 2396: }
 2397: 
 2398: 
 2399: 
 2400: /*
 2401: ** Allowed values for Snippet.aMatch[].snStatus
 2402: */
 2403: #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
 2404: #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
 2405: 
 2406: /*
 2407: ** Generate the text of a snippet.
 2408: */
 2409: static void snippetText(
 2410:   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
 2411:   const char *zStartMark,     /* Markup to appear before each match */
 2412:   const char *zEndMark,       /* Markup to appear after each match */
 2413:   const char *zEllipsis       /* Ellipsis mark */
 2414: ){
 2415:   int i, j;
 2416:   struct snippetMatch *aMatch;
 2417:   int nMatch;
 2418:   int nDesired;
 2419:   StringBuffer sb;
 2420:   int tailCol;
 2421:   int tailOffset;
 2422:   int iCol;
 2423:   int nDoc;
 2424:   const char *zDoc;
 2425:   int iStart, iEnd;
 2426:   int tailEllipsis = 0;
 2427:   int iMatch;
 2428:   
 2429: 
 2430:   free(pCursor->snippet.zSnippet);
 2431:   pCursor->snippet.zSnippet = 0;
 2432:   aMatch = pCursor->snippet.aMatch;
 2433:   nMatch = pCursor->snippet.nMatch;
 2434:   initStringBuffer(&sb);
 2435: 
 2436:   for(i=0; i<nMatch; i++){
 2437:     aMatch[i].snStatus = SNIPPET_IGNORE;
 2438:   }
 2439:   nDesired = 0;
 2440:   for(i=0; i<pCursor->q.nTerms; i++){
 2441:     for(j=0; j<nMatch; j++){
 2442:       if( aMatch[j].iTerm==i ){
 2443:         aMatch[j].snStatus = SNIPPET_DESIRED;
 2444:         nDesired++;
 2445:         break;
 2446:       }
 2447:     }
 2448:   }
 2449: 
 2450:   iMatch = 0;
 2451:   tailCol = -1;
 2452:   tailOffset = 0;
 2453:   for(i=0; i<nMatch && nDesired>0; i++){
 2454:     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
 2455:     nDesired--;
 2456:     iCol = aMatch[i].iCol;
 2457:     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
 2458:     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
 2459:     iStart = aMatch[i].iStart - 40;
 2460:     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
 2461:     if( iStart<=10 ){
 2462:       iStart = 0;
 2463:     }
 2464:     if( iCol==tailCol && iStart<=tailOffset+20 ){
 2465:       iStart = tailOffset;
 2466:     }
 2467:     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
 2468:       trimWhiteSpace(&sb);
 2469:       appendWhiteSpace(&sb);
 2470:       append(&sb, zEllipsis);
 2471:       appendWhiteSpace(&sb);
 2472:     }
 2473:     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
 2474:     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
 2475:     if( iEnd>=nDoc-10 ){
 2476:       iEnd = nDoc;
 2477:       tailEllipsis = 0;
 2478:     }else{
 2479:       tailEllipsis = 1;
 2480:     }
 2481:     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
 2482:     while( iStart<iEnd ){
 2483:       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
 2484:              && aMatch[iMatch].iCol<=iCol ){
 2485:         iMatch++;
 2486:       }
 2487:       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
 2488:              && aMatch[iMatch].iCol==iCol ){
 2489:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
 2490:         iStart = aMatch[iMatch].iStart;
 2491:         append(&sb, zStartMark);
 2492:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
 2493:         append(&sb, zEndMark);
 2494:         iStart += aMatch[iMatch].nByte;
 2495:         for(j=iMatch+1; j<nMatch; j++){
 2496:           if( aMatch[j].iTerm==aMatch[iMatch].iTerm
 2497:               && aMatch[j].snStatus==SNIPPET_DESIRED ){
 2498:             nDesired--;
 2499:             aMatch[j].snStatus = SNIPPET_IGNORE;
 2500:           }
 2501:         }
 2502:       }else{
 2503:         nappend(&sb, &zDoc[iStart], iEnd - iStart);
 2504:         iStart = iEnd;
 2505:       }
 2506:     }
 2507:     tailCol = iCol;
 2508:     tailOffset = iEnd;
 2509:   }
 2510:   trimWhiteSpace(&sb);
 2511:   if( tailEllipsis ){
 2512:     appendWhiteSpace(&sb);
 2513:     append(&sb, zEllipsis);
 2514:   }
 2515:   pCursor->snippet.zSnippet = sb.s;
 2516:   pCursor->snippet.nSnippet = sb.len;  
 2517: }
 2518: 
 2519: 
 2520: /*
 2521: ** Close the cursor.  For additional information see the documentation
 2522: ** on the xClose method of the virtual table interface.
 2523: */
 2524: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
 2525:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2526:   TRACE(("FTS1 Close %p\n", c));
 2527:   sqlite3_finalize(c->pStmt);
 2528:   queryClear(&c->q);
 2529:   snippetClear(&c->snippet);
 2530:   if( c->result.pDoclist!=NULL ){
 2531:     docListDelete(c->result.pDoclist);
 2532:   }
 2533:   free(c);
 2534:   return SQLITE_OK;
 2535: }
 2536: 
 2537: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
 2538:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2539:   sqlite_int64 iDocid;
 2540:   int rc;
 2541: 
 2542:   TRACE(("FTS1 Next %p\n", pCursor));
 2543:   snippetClear(&c->snippet);
 2544:   if( c->iCursorType < QUERY_FULLTEXT ){
 2545:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
 2546:     rc = sqlite3_step(c->pStmt);
 2547:     switch( rc ){
 2548:       case SQLITE_ROW:
 2549:         c->eof = 0;
 2550:         return SQLITE_OK;
 2551:       case SQLITE_DONE:
 2552:         c->eof = 1;
 2553:         return SQLITE_OK;
 2554:       default:
 2555:         c->eof = 1;
 2556:         return rc;
 2557:     }
 2558:   } else {  /* full-text query */
 2559:     rc = sqlite3_reset(c->pStmt);
 2560:     if( rc!=SQLITE_OK ) return rc;
 2561: 
 2562:     iDocid = nextDocid(&c->result);
 2563:     if( iDocid==0 ){
 2564:       c->eof = 1;
 2565:       return SQLITE_OK;
 2566:     }
 2567:     rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
 2568:     if( rc!=SQLITE_OK ) return rc;
 2569:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
 2570:     rc = sqlite3_step(c->pStmt);
 2571:     if( rc==SQLITE_ROW ){   /* the case we expect */
 2572:       c->eof = 0;
 2573:       return SQLITE_OK;
 2574:     }
 2575:     /* an error occurred; abort */
 2576:     return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
 2577:   }
 2578: }
 2579: 
 2580: 
 2581: /* Return a DocList corresponding to the query term *pTerm.  If *pTerm
 2582: ** is the first term of a phrase query, go ahead and evaluate the phrase
 2583: ** query and return the doclist for the entire phrase query.
 2584: **
 2585: ** The result is stored in pTerm->doclist.
 2586: */
 2587: static int docListOfTerm(
 2588:   fulltext_vtab *v,     /* The full text index */
 2589:   int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
 2590:   QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
 2591:   DocList **ppResult    /* Write the result here */
 2592: ){
 2593:   DocList *pLeft, *pRight, *pNew;
 2594:   int i, rc;
 2595: 
 2596:   pLeft = docListNew(DL_POSITIONS);
 2597:   rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
 2598:   if( rc ){
 2599:     docListDelete(pLeft);
 2600:     return rc;
 2601:   }
 2602:   for(i=1; i<=pQTerm->nPhrase; i++){
 2603:     pRight = docListNew(DL_POSITIONS);
 2604:     rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
 2605:     if( rc ){
 2606:       docListDelete(pLeft);
 2607:       return rc;
 2608:     }
 2609:     pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
 2610:     docListPhraseMerge(pLeft, pRight, pNew);
 2611:     docListDelete(pLeft);
 2612:     docListDelete(pRight);
 2613:     pLeft = pNew;
 2614:   }
 2615:   *ppResult = pLeft;
 2616:   return SQLITE_OK;
 2617: }
 2618: 
 2619: /* Add a new term pTerm[0..nTerm-1] to the query *q.
 2620: */
 2621: static void queryAdd(Query *q, const char *pTerm, int nTerm){
 2622:   QueryTerm *t;
 2623:   ++q->nTerms;
 2624:   q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
 2625:   if( q->pTerms==0 ){
 2626:     q->nTerms = 0;
 2627:     return;
 2628:   }
 2629:   t = &q->pTerms[q->nTerms - 1];
 2630:   memset(t, 0, sizeof(*t));
 2631:   t->pTerm = malloc(nTerm+1);
 2632:   memcpy(t->pTerm, pTerm, nTerm);
 2633:   t->pTerm[nTerm] = 0;
 2634:   t->nTerm = nTerm;
 2635:   t->isOr = q->nextIsOr;
 2636:   q->nextIsOr = 0;
 2637:   t->iColumn = q->nextColumn;
 2638:   q->nextColumn = q->dfltColumn;
 2639: }
 2640: 
 2641: /*
 2642: ** Check to see if the string zToken[0...nToken-1] matches any
 2643: ** column name in the virtual table.   If it does,
 2644: ** return the zero-indexed column number.  If not, return -1.
 2645: */
 2646: static int checkColumnSpecifier(
 2647:   fulltext_vtab *pVtab,    /* The virtual table */
 2648:   const char *zToken,      /* Text of the token */
 2649:   int nToken               /* Number of characters in the token */
 2650: ){
 2651:   int i;
 2652:   for(i=0; i<pVtab->nColumn; i++){
 2653:     if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
 2654:         && pVtab->azColumn[i][nToken]==0 ){
 2655:       return i;
 2656:     }
 2657:   }
 2658:   return -1;
 2659: }
 2660: 
 2661: /*
 2662: ** Parse the text at pSegment[0..nSegment-1].  Add additional terms
 2663: ** to the query being assemblied in pQuery.
 2664: **
 2665: ** inPhrase is true if pSegment[0..nSegement-1] is contained within
 2666: ** double-quotes.  If inPhrase is true, then the first term
 2667: ** is marked with the number of terms in the phrase less one and
 2668: ** OR and "-" syntax is ignored.  If inPhrase is false, then every
 2669: ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
 2670: */
 2671: static int tokenizeSegment(
 2672:   sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
 2673:   const char *pSegment, int nSegment,     /* Query expression being parsed */
 2674:   int inPhrase,                           /* True if within "..." */
 2675:   Query *pQuery                           /* Append results here */
 2676: ){
 2677:   const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
 2678:   sqlite3_tokenizer_cursor *pCursor;
 2679:   int firstIndex = pQuery->nTerms;
 2680:   int iCol;
 2681:   int nTerm = 1;
 2682:   
 2683:   int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
 2684:   if( rc!=SQLITE_OK ) return rc;
 2685:   pCursor->pTokenizer = pTokenizer;
 2686: 
 2687:   while( 1 ){
 2688:     const char *pToken;
 2689:     int nToken, iBegin, iEnd, iPos;
 2690: 
 2691:     rc = pModule->xNext(pCursor,
 2692:                         &pToken, &nToken,
 2693:                         &iBegin, &iEnd, &iPos);
 2694:     if( rc!=SQLITE_OK ) break;
 2695:     if( !inPhrase &&
 2696:         pSegment[iEnd]==':' &&
 2697:          (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
 2698:       pQuery->nextColumn = iCol;
 2699:       continue;
 2700:     }
 2701:     if( !inPhrase && pQuery->nTerms>0 && nToken==2
 2702:          && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
 2703:       pQuery->nextIsOr = 1;
 2704:       continue;
 2705:     }
 2706:     queryAdd(pQuery, pToken, nToken);
 2707:     if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
 2708:       pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
 2709:     }
 2710:     pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
 2711:     if( inPhrase ){
 2712:       nTerm++;
 2713:     }
 2714:   }
 2715: 
 2716:   if( inPhrase && pQuery->nTerms>firstIndex ){
 2717:     pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
 2718:   }
 2719: 
 2720:   return pModule->xClose(pCursor);
 2721: }
 2722: 
 2723: /* Parse a query string, yielding a Query object pQuery.
 2724: **
 2725: ** The calling function will need to queryClear() to clean up
 2726: ** the dynamically allocated memory held by pQuery.
 2727: */
 2728: static int parseQuery(
 2729:   fulltext_vtab *v,        /* The fulltext index */
 2730:   const char *zInput,      /* Input text of the query string */
 2731:   int nInput,              /* Size of the input text */
 2732:   int dfltColumn,          /* Default column of the index to match against */
 2733:   Query *pQuery            /* Write the parse results here. */
 2734: ){
 2735:   int iInput, inPhrase = 0;
 2736: 
 2737:   if( zInput==0 ) nInput = 0;
 2738:   if( nInput<0 ) nInput = strlen(zInput);
 2739:   pQuery->nTerms = 0;
 2740:   pQuery->pTerms = NULL;
 2741:   pQuery->nextIsOr = 0;
 2742:   pQuery->nextColumn = dfltColumn;
 2743:   pQuery->dfltColumn = dfltColumn;
 2744:   pQuery->pFts = v;
 2745: 
 2746:   for(iInput=0; iInput<nInput; ++iInput){
 2747:     int i;
 2748:     for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
 2749:     if( i>iInput ){
 2750:       tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
 2751:                        pQuery);
 2752:     }
 2753:     iInput = i;
 2754:     if( i<nInput ){
 2755:       assert( zInput[i]=='"' );
 2756:       inPhrase = !inPhrase;
 2757:     }
 2758:   }
 2759: 
 2760:   if( inPhrase ){
 2761:     /* unmatched quote */
 2762:     queryClear(pQuery);
 2763:     return SQLITE_ERROR;
 2764:   }
 2765:   return SQLITE_OK;
 2766: }
 2767: 
 2768: /* Perform a full-text query using the search expression in
 2769: ** zInput[0..nInput-1].  Return a list of matching documents
 2770: ** in pResult.
 2771: **
 2772: ** Queries must match column iColumn.  Or if iColumn>=nColumn
 2773: ** they are allowed to match against any column.
 2774: */
 2775: static int fulltextQuery(
 2776:   fulltext_vtab *v,      /* The full text index */
 2777:   int iColumn,           /* Match against this column by default */
 2778:   const char *zInput,    /* The query string */
 2779:   int nInput,            /* Number of bytes in zInput[] */
 2780:   DocList **pResult,     /* Write the result doclist here */
 2781:   Query *pQuery          /* Put parsed query string here */
 2782: ){
 2783:   int i, iNext, rc;
 2784:   DocList *pLeft = NULL;
 2785:   DocList *pRight, *pNew, *pOr;
 2786:   int nNot = 0;
 2787:   QueryTerm *aTerm;
 2788: 
 2789:   rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
 2790:   if( rc!=SQLITE_OK ) return rc;
 2791: 
 2792:   /* Merge AND terms. */
 2793:   aTerm = pQuery->pTerms;
 2794:   for(i = 0; i<pQuery->nTerms; i=iNext){
 2795:     if( aTerm[i].isNot ){
 2796:       /* Handle all NOT terms in a separate pass */
 2797:       nNot++;
 2798:       iNext = i + aTerm[i].nPhrase+1;
 2799:       continue;
 2800:     }
 2801:     iNext = i + aTerm[i].nPhrase + 1;
 2802:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
 2803:     if( rc ){
 2804:       queryClear(pQuery);
 2805:       return rc;
 2806:     }
 2807:     while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
 2808:       rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
 2809:       iNext += aTerm[iNext].nPhrase + 1;
 2810:       if( rc ){
 2811:         queryClear(pQuery);
 2812:         return rc;
 2813:       }
 2814:       pNew = docListNew(DL_DOCIDS);
 2815:       docListOrMerge(pRight, pOr, pNew);
 2816:       docListDelete(pRight);
 2817:       docListDelete(pOr);
 2818:       pRight = pNew;
 2819:     }
 2820:     if( pLeft==0 ){
 2821:       pLeft = pRight;
 2822:     }else{
 2823:       pNew = docListNew(DL_DOCIDS);
 2824:       docListAndMerge(pLeft, pRight, pNew);
 2825:       docListDelete(pRight);
 2826:       docListDelete(pLeft);
 2827:       pLeft = pNew;
 2828:     }
 2829:   }
 2830: 
 2831:   if( nNot && pLeft==0 ){
 2832:     /* We do not yet know how to handle a query of only NOT terms */
 2833:     return SQLITE_ERROR;
 2834:   }
 2835: 
 2836:   /* Do the EXCEPT terms */
 2837:   for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
 2838:     if( !aTerm[i].isNot ) continue;
 2839:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
 2840:     if( rc ){
 2841:       queryClear(pQuery);
 2842:       docListDelete(pLeft);
 2843:       return rc;
 2844:     }
 2845:     pNew = docListNew(DL_DOCIDS);
 2846:     docListExceptMerge(pLeft, pRight, pNew);
 2847:     docListDelete(pRight);
 2848:     docListDelete(pLeft);
 2849:     pLeft = pNew;
 2850:   }
 2851: 
 2852:   *pResult = pLeft;
 2853:   return rc;
 2854: }
 2855: 
 2856: /*
 2857: ** This is the xFilter interface for the virtual table.  See
 2858: ** the virtual table xFilter method documentation for additional
 2859: ** information.
 2860: **
 2861: ** If idxNum==QUERY_GENERIC then do a full table scan against
 2862: ** the %_content table.
 2863: **
 2864: ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
 2865: ** in the %_content table.
 2866: **
 2867: ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
 2868: ** column on the left-hand side of the MATCH operator is column
 2869: ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
 2870: ** side of the MATCH operator.
 2871: */
 2872: /* TODO(shess) Upgrade the cursor initialization and destruction to
 2873: ** account for fulltextFilter() being called multiple times on the
 2874: ** same cursor.  The current solution is very fragile.  Apply fix to
 2875: ** fts2 as appropriate.
 2876: */
 2877: static int fulltextFilter(
 2878:   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
 2879:   int idxNum, const char *idxStr,   /* Which indexing scheme to use */
 2880:   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
 2881: ){
 2882:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2883:   fulltext_vtab *v = cursor_vtab(c);
 2884:   int rc;
 2885:   char *zSql;
 2886: 
 2887:   TRACE(("FTS1 Filter %p\n",pCursor));
 2888: 
 2889:   zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
 2890:                           idxNum==QUERY_GENERIC ? "" : "where rowid=?");
 2891:   sqlite3_finalize(c->pStmt);
 2892:   rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
 2893:   sqlite3_free(zSql);
 2894:   if( rc!=SQLITE_OK ) return rc;
 2895: 
 2896:   c->iCursorType = idxNum;
 2897:   switch( idxNum ){
 2898:     case QUERY_GENERIC:
 2899:       break;
 2900: 
 2901:     case QUERY_ROWID:
 2902:       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
 2903:       if( rc!=SQLITE_OK ) return rc;
 2904:       break;
 2905: 
 2906:     default:   /* full-text search */
 2907:     {
 2908:       const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
 2909:       DocList *pResult;
 2910:       assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
 2911:       assert( argc==1 );
 2912:       queryClear(&c->q);
 2913:       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
 2914:       if( rc!=SQLITE_OK ) return rc;
 2915:       if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
 2916:       readerInit(&c->result, pResult);
 2917:       break;
 2918:     }
 2919:   }
 2920: 
 2921:   return fulltextNext(pCursor);
 2922: }
 2923: 
 2924: /* This is the xEof method of the virtual table.  The SQLite core
 2925: ** calls this routine to find out if it has reached the end of
 2926: ** a query's results set.
 2927: */
 2928: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
 2929:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2930:   return c->eof;
 2931: }
 2932: 
 2933: /* This is the xColumn method of the virtual table.  The SQLite
 2934: ** core calls this method during a query when it needs the value
 2935: ** of a column from the virtual table.  This method needs to use
 2936: ** one of the sqlite3_result_*() routines to store the requested
 2937: ** value back in the pContext.
 2938: */
 2939: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
 2940:                           sqlite3_context *pContext, int idxCol){
 2941:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2942:   fulltext_vtab *v = cursor_vtab(c);
 2943: 
 2944:   if( idxCol<v->nColumn ){
 2945:     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
 2946:     sqlite3_result_value(pContext, pVal);
 2947:   }else if( idxCol==v->nColumn ){
 2948:     /* The extra column whose name is the same as the table.
 2949:     ** Return a blob which is a pointer to the cursor
 2950:     */
 2951:     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
 2952:   }
 2953:   return SQLITE_OK;
 2954: }
 2955: 
 2956: /* This is the xRowid method.  The SQLite core calls this routine to
 2957: ** retrive the rowid for the current row of the result set.  The
 2958: ** rowid should be written to *pRowid.
 2959: */
 2960: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
 2961:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
 2962: 
 2963:   *pRowid = sqlite3_column_int64(c->pStmt, 0);
 2964:   return SQLITE_OK;
 2965: }
 2966: 
 2967: /* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
 2968:  * we also store positions and offsets in the hash table using the given
 2969:  * column number. */
 2970: static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
 2971:                       const char *zText, int iColumn){
 2972:   sqlite3_tokenizer *pTokenizer = v->pTokenizer;
 2973:   sqlite3_tokenizer_cursor *pCursor;
 2974:   const char *pToken;
 2975:   int nTokenBytes;
 2976:   int iStartOffset, iEndOffset, iPosition;
 2977:   int rc;
 2978: 
 2979:   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
 2980:   if( rc!=SQLITE_OK ) return rc;
 2981: 
 2982:   pCursor->pTokenizer = pTokenizer;
 2983:   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
 2984:                                                &pToken, &nTokenBytes,
 2985:                                                &iStartOffset, &iEndOffset,
 2986:                                                &iPosition) ){
 2987:     DocList *p;
 2988: 
 2989:     /* Positions can't be negative; we use -1 as a terminator internally. */
 2990:     if( iPosition<0 ){
 2991:       pTokenizer->pModule->xClose(pCursor);
 2992:       return SQLITE_ERROR;
 2993:     }
 2994: 
 2995:     p = fts1HashFind(terms, pToken, nTokenBytes);
 2996:     if( p==NULL ){
 2997:       p = docListNew(DL_DEFAULT);
 2998:       docListAddDocid(p, iDocid);
 2999:       fts1HashInsert(terms, pToken, nTokenBytes, p);
 3000:     }
 3001:     if( iColumn>=0 ){
 3002:       docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
 3003:     }
 3004:   }
 3005: 
 3006:   /* TODO(shess) Check return?  Should this be able to cause errors at
 3007:   ** this point?  Actually, same question about sqlite3_finalize(),
 3008:   ** though one could argue that failure there means that the data is
 3009:   ** not durable.  *ponder*
 3010:   */
 3011:   pTokenizer->pModule->xClose(pCursor);
 3012:   return rc;
 3013: }
 3014: 
 3015: /* Update the %_terms table to map the term [pTerm] to the given rowid. */
 3016: static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
 3017:                              DocList *d){
 3018:   sqlite_int64 iIndexRow;
 3019:   DocList doclist;
 3020:   int iSegment = 0, rc;
 3021: 
 3022:   rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
 3023:   if( rc==SQLITE_DONE ){
 3024:     docListInit(&doclist, DL_DEFAULT, 0, 0);
 3025:     docListUpdate(&doclist, d);
 3026:     /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
 3027:     rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
 3028:     goto err;
 3029:   }
 3030:   if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
 3031: 
 3032:   docListUpdate(&doclist, d);
 3033:   if( doclist.nData<=CHUNK_MAX ){
 3034:     rc = term_update(v, iIndexRow, &doclist);
 3035:     goto err;
 3036:   }
 3037: 
 3038:   /* Doclist doesn't fit, delete what's there, and accumulate
 3039:   ** forward.
 3040:   */
 3041:   rc = term_delete(v, iIndexRow);
 3042:   if( rc!=SQLITE_OK ) goto err;
 3043: 
 3044:   /* Try to insert the doclist into a higher segment bucket.  On
 3045:   ** failure, accumulate existing doclist with the doclist from that
 3046:   ** bucket, and put results in the next bucket.
 3047:   */
 3048:   iSegment++;
 3049:   while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
 3050:                          &doclist))!=SQLITE_OK ){
 3051:     sqlite_int64 iSegmentRow;
 3052:     DocList old;
 3053:     int rc2;
 3054: 
 3055:     /* Retain old error in case the term_insert() error was really an
 3056:     ** error rather than a bounced insert.
 3057:     */
 3058:     rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
 3059:     if( rc2!=SQLITE_ROW ) goto err;
 3060: 
 3061:     rc = term_delete(v, iSegmentRow);
 3062:     if( rc!=SQLITE_OK ) goto err;
 3063: 
 3064:     /* Reusing lowest-number deleted row keeps the index smaller. */
 3065:     if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
 3066: 
 3067:     /* doclist contains the newer data, so accumulate it over old.
 3068:     ** Then steal accumulated data for doclist.
 3069:     */
 3070:     docListAccumulate(&old, &doclist);
 3071:     docListDestroy(&doclist);
 3072:     doclist = old;
 3073: 
 3074:     iSegment++;
 3075:   }
 3076: 
 3077:  err:
 3078:   docListDestroy(&doclist);
 3079:   return rc;
 3080: }
 3081: 
 3082: /* Add doclists for all terms in [pValues] to the hash table [terms]. */
 3083: static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
 3084:                 sqlite3_value **pValues){
 3085:   int i;
 3086:   for(i = 0; i < v->nColumn ; ++i){
 3087:     char *zText = (char*)sqlite3_value_text(pValues[i]);
 3088:     int rc = buildTerms(v, terms, iRowid, zText, i);
 3089:     if( rc!=SQLITE_OK ) return rc;
 3090:   }
 3091:   return SQLITE_OK;
 3092: }
 3093: 
 3094: /* Add empty doclists for all terms in the given row's content to the hash
 3095:  * table [pTerms]. */
 3096: static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
 3097:   const char **pValues;
 3098:   int i;
 3099: 
 3100:   int rc = content_select(v, iRowid, &pValues);
 3101:   if( rc!=SQLITE_OK ) return rc;
 3102: 
 3103:   for(i = 0 ; i < v->nColumn; ++i) {
 3104:     rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
 3105:     if( rc!=SQLITE_OK ) break;
 3106:   }
 3107: 
 3108:   freeStringArray(v->nColumn, pValues);
 3109:   return SQLITE_OK;
 3110: }
 3111: 
 3112: /* Insert a row into the %_content table; set *piRowid to be the ID of the
 3113:  * new row.  Fill [pTerms] with new doclists for the %_term table. */
 3114: static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
 3115:                         sqlite3_value **pValues,
 3116:                         sqlite_int64 *piRowid, fts1Hash *pTerms){
 3117:   int rc;
 3118: 
 3119:   rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
 3120:   if( rc!=SQLITE_OK ) return rc;
 3121:   *piRowid = sqlite3_last_insert_rowid(v->db);
 3122:   return insertTerms(v, pTerms, *piRowid, pValues);
 3123: }
 3124: 
 3125: /* Delete a row from the %_content table; fill [pTerms] with empty doclists
 3126:  * to be written to the %_term table. */
 3127: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
 3128:   int rc = deleteTerms(v, pTerms, iRow);
 3129:   if( rc!=SQLITE_OK ) return rc;
 3130:   return content_delete(v, iRow);  /* execute an SQL DELETE */
 3131: }
 3132: 
 3133: /* Update a row in the %_content table; fill [pTerms] with new doclists for the
 3134:  * %_term table. */
 3135: static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
 3136:                         sqlite3_value **pValues, fts1Hash *pTerms){
 3137:   /* Generate an empty doclist for each term that previously appeared in this
 3138:    * row. */
 3139:   int rc = deleteTerms(v, pTerms, iRow);
 3140:   if( rc!=SQLITE_OK ) return rc;
 3141: 
 3142:   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
 3143:   if( rc!=SQLITE_OK ) return rc;
 3144: 
 3145:   /* Now add positions for terms which appear in the updated row. */
 3146:   return insertTerms(v, pTerms, iRow, pValues);
 3147: }
 3148: 
 3149: /* This function implements the xUpdate callback; it is the top-level entry
 3150:  * point for inserting, deleting or updating a row in a full-text table. */
 3151: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
 3152:                    sqlite_int64 *pRowid){
 3153:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
 3154:   fts1Hash terms;   /* maps term string -> PosList */
 3155:   int rc;
 3156:   fts1HashElem *e;
 3157: 
 3158:   TRACE(("FTS1 Update %p\n", pVtab));
 3159:   
 3160:   fts1HashInit(&terms, FTS1_HASH_STRING, 1);
 3161: 
 3162:   if( nArg<2 ){
 3163:     rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
 3164:   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
 3165:     /* An update:
 3166:      * ppArg[0] = old rowid
 3167:      * ppArg[1] = new rowid
 3168:      * ppArg[2..2+v->nColumn-1] = values
 3169:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
 3170:      */
 3171:     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
 3172:     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
 3173:       sqlite3_value_int64(ppArg[1]) != rowid ){
 3174:       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
 3175:     } else {
 3176:       assert( nArg==2+v->nColumn+1);
 3177:       rc = index_update(v, rowid, &ppArg[2], &terms);
 3178:     }
 3179:   } else {
 3180:     /* An insert:
 3181:      * ppArg[1] = requested rowid
 3182:      * ppArg[2..2+v->nColumn-1] = values
 3183:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
 3184:      */
 3185:     assert( nArg==2+v->nColumn+1);
 3186:     rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
 3187:   }
 3188: 
 3189:   if( rc==SQLITE_OK ){
 3190:     /* Write updated doclists to disk. */
 3191:     for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
 3192:       DocList *p = fts1HashData(e);
 3193:       rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
 3194:       if( rc!=SQLITE_OK ) break;
 3195:     }
 3196:   }
 3197: 
 3198:   /* clean up */
 3199:   for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
 3200:     DocList *p = fts1HashData(e);
 3201:     docListDelete(p);
 3202:   }
 3203:   fts1HashClear(&terms);
 3204: 
 3205:   return rc;
 3206: }
 3207: 
 3208: /*
 3209: ** Implementation of the snippet() function for FTS1
 3210: */
 3211: static void snippetFunc(
 3212:   sqlite3_context *pContext,
 3213:   int argc,
 3214:   sqlite3_value **argv
 3215: ){
 3216:   fulltext_cursor *pCursor;
 3217:   if( argc<1 ) return;
 3218:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
 3219:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
 3220:     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
 3221:   }else{
 3222:     const char *zStart = "<b>";
 3223:     const char *zEnd = "</b>";
 3224:     const char *zEllipsis = "<b>...</b>";
 3225:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
 3226:     if( argc>=2 ){
 3227:       zStart = (const char*)sqlite3_value_text(argv[1]);
 3228:       if( argc>=3 ){
 3229:         zEnd = (const char*)sqlite3_value_text(argv[2]);
 3230:         if( argc>=4 ){
 3231:           zEllipsis = (const char*)sqlite3_value_text(argv[3]);
 3232:         }
 3233:       }
 3234:     }
 3235:     snippetAllOffsets(pCursor);
 3236:     snippetText(pCursor, zStart, zEnd, zEllipsis);
 3237:     sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
 3238:                         pCursor->snippet.nSnippet, SQLITE_STATIC);
 3239:   }
 3240: }
 3241: 
 3242: /*
 3243: ** Implementation of the offsets() function for FTS1
 3244: */
 3245: static void snippetOffsetsFunc(
 3246:   sqlite3_context *pContext,
 3247:   int argc,
 3248:   sqlite3_value **argv
 3249: ){
 3250:   fulltext_cursor *pCursor;
 3251:   if( argc<1 ) return;
 3252:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
 3253:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
 3254:     sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
 3255:   }else{
 3256:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
 3257:     snippetAllOffsets(pCursor);
 3258:     snippetOffsetText(&pCursor->snippet);
 3259:     sqlite3_result_text(pContext,
 3260:                         pCursor->snippet.zOffset, pCursor->snippet.nOffset,
 3261:                         SQLITE_STATIC);
 3262:   }
 3263: }
 3264: 
 3265: /*
 3266: ** This routine implements the xFindFunction method for the FTS1
 3267: ** virtual table.
 3268: */
 3269: static int fulltextFindFunction(
 3270:   sqlite3_vtab *pVtab,
 3271:   int nArg,
 3272:   const char *zName,
 3273:   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
 3274:   void **ppArg
 3275: ){
 3276:   if( strcmp(zName,"snippet")==0 ){
 3277:     *pxFunc = snippetFunc;
 3278:     return 1;
 3279:   }else if( strcmp(zName,"offsets")==0 ){
 3280:     *pxFunc = snippetOffsetsFunc;
 3281:     return 1;
 3282:   }
 3283:   return 0;
 3284: }
 3285: 
 3286: /*
 3287: ** Rename an fts1 table.
 3288: */
 3289: static int fulltextRename(
 3290:   sqlite3_vtab *pVtab,
 3291:   const char *zName
 3292: ){
 3293:   fulltext_vtab *p = (fulltext_vtab *)pVtab;
 3294:   int rc = SQLITE_NOMEM;
 3295:   char *zSql = sqlite3_mprintf(
 3296:     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
 3297:     "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
 3298:     , p->zDb, p->zName, zName
 3299:     , p->zDb, p->zName, zName
 3300:   );
 3301:   if( zSql ){
 3302:     rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
 3303:     sqlite3_free(zSql);
 3304:   }
 3305:   return rc;
 3306: }
 3307: 
 3308: static const sqlite3_module fulltextModule = {
 3309:   /* iVersion      */ 0,
 3310:   /* xCreate       */ fulltextCreate,
 3311:   /* xConnect      */ fulltextConnect,
 3312:   /* xBestIndex    */ fulltextBestIndex,
 3313:   /* xDisconnect   */ fulltextDisconnect,
 3314:   /* xDestroy      */ fulltextDestroy,
 3315:   /* xOpen         */ fulltextOpen,
 3316:   /* xClose        */ fulltextClose,
 3317:   /* xFilter       */ fulltextFilter,
 3318:   /* xNext         */ fulltextNext,
 3319:   /* xEof          */ fulltextEof,
 3320:   /* xColumn       */ fulltextColumn,
 3321:   /* xRowid        */ fulltextRowid,
 3322:   /* xUpdate       */ fulltextUpdate,
 3323:   /* xBegin        */ 0, 
 3324:   /* xSync         */ 0,
 3325:   /* xCommit       */ 0,
 3326:   /* xRollback     */ 0,
 3327:   /* xFindFunction */ fulltextFindFunction,
 3328:   /* xRename       */ fulltextRename,
 3329: };
 3330: 
 3331: int sqlite3Fts1Init(sqlite3 *db){
 3332:   sqlite3_overload_function(db, "snippet", -1);
 3333:   sqlite3_overload_function(db, "offsets", -1);
 3334:   return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
 3335: }
 3336: 
 3337: #if !SQLITE_CORE
 3338: int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
 3339:                            const sqlite3_api_routines *pApi){
 3340:   SQLITE_EXTENSION_INIT2(pApi)
 3341:   return sqlite3Fts1Init(db);
 3342: }
 3343: #endif
 3344: 
 3345: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>