Annotation of embedaddon/sqlite3/ext/fts2/fts2.c, revision 1.1

1.1     ! misho       1: /* fts2 has a design flaw which can lead to database corruption (see
        !             2: ** below).  It is recommended not to use it any longer, instead use
        !             3: ** fts3 (or higher).  If you believe that your use of fts2 is safe,
        !             4: ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS.
        !             5: */
        !             6: #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \
        !             7:         && !defined(SQLITE_ENABLE_BROKEN_FTS2)
        !             8: #error fts2 has a design flaw and has been deprecated.
        !             9: #endif
        !            10: /* The flaw is that fts2 uses the content table's unaliased rowid as
        !            11: ** the unique docid.  fts2 embeds the rowid in the index it builds,
        !            12: ** and expects the rowid to not change.  The SQLite VACUUM operation
        !            13: ** will renumber such rowids, thereby breaking fts2.  If you are using
        !            14: ** fts2 in a system which has disabled VACUUM, then you can continue
        !            15: ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
        !            16: ** VACUUM, though systems using auto_vacuum are unlikely to invoke
        !            17: ** VACUUM.
        !            18: **
        !            19: ** Unlike fts1, which is safe across VACUUM if you never delete
        !            20: ** documents, fts2 has a second exposure to this flaw, in the segments
        !            21: ** table.  So fts2 should be considered unsafe across VACUUM in all
        !            22: ** cases.
        !            23: */
        !            24: 
        !            25: /*
        !            26: ** 2006 Oct 10
        !            27: **
        !            28: ** The author disclaims copyright to this source code.  In place of
        !            29: ** a legal notice, here is a blessing:
        !            30: **
        !            31: **    May you do good and not evil.
        !            32: **    May you find forgiveness for yourself and forgive others.
        !            33: **    May you share freely, never taking more than you give.
        !            34: **
        !            35: ******************************************************************************
        !            36: **
        !            37: ** This is an SQLite module implementing full-text search.
        !            38: */
        !            39: 
        !            40: /*
        !            41: ** The code in this file is only compiled if:
        !            42: **
        !            43: **     * The FTS2 module is being built as an extension
        !            44: **       (in which case SQLITE_CORE is not defined), or
        !            45: **
        !            46: **     * The FTS2 module is being built into the core of
        !            47: **       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
        !            48: */
        !            49: 
        !            50: /* TODO(shess) Consider exporting this comment to an HTML file or the
        !            51: ** wiki.
        !            52: */
        !            53: /* The full-text index is stored in a series of b+tree (-like)
        !            54: ** structures called segments which map terms to doclists.  The
        !            55: ** structures are like b+trees in layout, but are constructed from the
        !            56: ** bottom up in optimal fashion and are not updatable.  Since trees
        !            57: ** are built from the bottom up, things will be described from the
        !            58: ** bottom up.
        !            59: **
        !            60: **
        !            61: **** Varints ****
        !            62: ** The basic unit of encoding is a variable-length integer called a
        !            63: ** varint.  We encode variable-length integers in little-endian order
        !            64: ** using seven bits * per byte as follows:
        !            65: **
        !            66: ** KEY:
        !            67: **         A = 0xxxxxxx    7 bits of data and one flag bit
        !            68: **         B = 1xxxxxxx    7 bits of data and one flag bit
        !            69: **
        !            70: **  7 bits - A
        !            71: ** 14 bits - BA
        !            72: ** 21 bits - BBA
        !            73: ** and so on.
        !            74: **
        !            75: ** This is identical to how sqlite encodes varints (see util.c).
        !            76: **
        !            77: **
        !            78: **** Document lists ****
        !            79: ** A doclist (document list) holds a docid-sorted list of hits for a
        !            80: ** given term.  Doclists hold docids, and can optionally associate
        !            81: ** token positions and offsets with docids.
        !            82: **
        !            83: ** A DL_POSITIONS_OFFSETS doclist is stored like this:
        !            84: **
        !            85: ** array {
        !            86: **   varint docid;
        !            87: **   array {                (position list for column 0)
        !            88: **     varint position;     (delta from previous position plus POS_BASE)
        !            89: **     varint startOffset;  (delta from previous startOffset)
        !            90: **     varint endOffset;    (delta from startOffset)
        !            91: **   }
        !            92: **   array {
        !            93: **     varint POS_COLUMN;   (marks start of position list for new column)
        !            94: **     varint column;       (index of new column)
        !            95: **     array {
        !            96: **       varint position;   (delta from previous position plus POS_BASE)
        !            97: **       varint startOffset;(delta from previous startOffset)
        !            98: **       varint endOffset;  (delta from startOffset)
        !            99: **     }
        !           100: **   }
        !           101: **   varint POS_END;        (marks end of positions for this document.
        !           102: ** }
        !           103: **
        !           104: ** Here, array { X } means zero or more occurrences of X, adjacent in
        !           105: ** memory.  A "position" is an index of a token in the token stream
        !           106: ** generated by the tokenizer, while an "offset" is a byte offset,
        !           107: ** both based at 0.  Note that POS_END and POS_COLUMN occur in the
        !           108: ** same logical place as the position element, and act as sentinals
        !           109: ** ending a position list array.
        !           110: **
        !           111: ** A DL_POSITIONS doclist omits the startOffset and endOffset
        !           112: ** information.  A DL_DOCIDS doclist omits both the position and
        !           113: ** offset information, becoming an array of varint-encoded docids.
        !           114: **
        !           115: ** On-disk data is stored as type DL_DEFAULT, so we don't serialize
        !           116: ** the type.  Due to how deletion is implemented in the segmentation
        !           117: ** system, on-disk doclists MUST store at least positions.
        !           118: **
        !           119: **
        !           120: **** Segment leaf nodes ****
        !           121: ** Segment leaf nodes store terms and doclists, ordered by term.  Leaf
        !           122: ** nodes are written using LeafWriter, and read using LeafReader (to
        !           123: ** iterate through a single leaf node's data) and LeavesReader (to
        !           124: ** iterate through a segment's entire leaf layer).  Leaf nodes have
        !           125: ** the format:
        !           126: **
        !           127: ** varint iHeight;             (height from leaf level, always 0)
        !           128: ** varint nTerm;               (length of first term)
        !           129: ** char pTerm[nTerm];          (content of first term)
        !           130: ** varint nDoclist;            (length of term's associated doclist)
        !           131: ** char pDoclist[nDoclist];    (content of doclist)
        !           132: ** array {
        !           133: **                             (further terms are delta-encoded)
        !           134: **   varint nPrefix;           (length of prefix shared with previous term)
        !           135: **   varint nSuffix;           (length of unshared suffix)
        !           136: **   char pTermSuffix[nSuffix];(unshared suffix of next term)
        !           137: **   varint nDoclist;          (length of term's associated doclist)
        !           138: **   char pDoclist[nDoclist];  (content of doclist)
        !           139: ** }
        !           140: **
        !           141: ** Here, array { X } means zero or more occurrences of X, adjacent in
        !           142: ** memory.
        !           143: **
        !           144: ** Leaf nodes are broken into blocks which are stored contiguously in
        !           145: ** the %_segments table in sorted order.  This means that when the end
        !           146: ** of a node is reached, the next term is in the node with the next
        !           147: ** greater node id.
        !           148: **
        !           149: ** New data is spilled to a new leaf node when the current node
        !           150: ** exceeds LEAF_MAX bytes (default 2048).  New data which itself is
        !           151: ** larger than STANDALONE_MIN (default 1024) is placed in a standalone
        !           152: ** node (a leaf node with a single term and doclist).  The goal of
        !           153: ** these settings is to pack together groups of small doclists while
        !           154: ** making it efficient to directly access large doclists.  The
        !           155: ** assumption is that large doclists represent terms which are more
        !           156: ** likely to be query targets.
        !           157: **
        !           158: ** TODO(shess) It may be useful for blocking decisions to be more
        !           159: ** dynamic.  For instance, it may make more sense to have a 2.5k leaf
        !           160: ** node rather than splitting into 2k and .5k nodes.  My intuition is
        !           161: ** that this might extend through 2x or 4x the pagesize.
        !           162: **
        !           163: **
        !           164: **** Segment interior nodes ****
        !           165: ** Segment interior nodes store blockids for subtree nodes and terms
        !           166: ** to describe what data is stored by the each subtree.  Interior
        !           167: ** nodes are written using InteriorWriter, and read using
        !           168: ** InteriorReader.  InteriorWriters are created as needed when
        !           169: ** SegmentWriter creates new leaf nodes, or when an interior node
        !           170: ** itself grows too big and must be split.  The format of interior
        !           171: ** nodes:
        !           172: **
        !           173: ** varint iHeight;           (height from leaf level, always >0)
        !           174: ** varint iBlockid;          (block id of node's leftmost subtree)
        !           175: ** optional {
        !           176: **   varint nTerm;           (length of first term)
        !           177: **   char pTerm[nTerm];      (content of first term)
        !           178: **   array {
        !           179: **                                (further terms are delta-encoded)
        !           180: **     varint nPrefix;            (length of shared prefix with previous term)
        !           181: **     varint nSuffix;            (length of unshared suffix)
        !           182: **     char pTermSuffix[nSuffix]; (unshared suffix of next term)
        !           183: **   }
        !           184: ** }
        !           185: **
        !           186: ** Here, optional { X } means an optional element, while array { X }
        !           187: ** means zero or more occurrences of X, adjacent in memory.
        !           188: **
        !           189: ** An interior node encodes n terms separating n+1 subtrees.  The
        !           190: ** subtree blocks are contiguous, so only the first subtree's blockid
        !           191: ** is encoded.  The subtree at iBlockid will contain all terms less
        !           192: ** than the first term encoded (or all terms if no term is encoded).
        !           193: ** Otherwise, for terms greater than or equal to pTerm[i] but less
        !           194: ** than pTerm[i+1], the subtree for that term will be rooted at
        !           195: ** iBlockid+i.  Interior nodes only store enough term data to
        !           196: ** distinguish adjacent children (if the rightmost term of the left
        !           197: ** child is "something", and the leftmost term of the right child is
        !           198: ** "wicked", only "w" is stored).
        !           199: **
        !           200: ** New data is spilled to a new interior node at the same height when
        !           201: ** the current node exceeds INTERIOR_MAX bytes (default 2048).
        !           202: ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
        !           203: ** interior nodes and making the tree too skinny.  The interior nodes
        !           204: ** at a given height are naturally tracked by interior nodes at
        !           205: ** height+1, and so on.
        !           206: **
        !           207: **
        !           208: **** Segment directory ****
        !           209: ** The segment directory in table %_segdir stores meta-information for
        !           210: ** merging and deleting segments, and also the root node of the
        !           211: ** segment's tree.
        !           212: **
        !           213: ** The root node is the top node of the segment's tree after encoding
        !           214: ** the entire segment, restricted to ROOT_MAX bytes (default 1024).
        !           215: ** This could be either a leaf node or an interior node.  If the top
        !           216: ** node requires more than ROOT_MAX bytes, it is flushed to %_segments
        !           217: ** and a new root interior node is generated (which should always fit
        !           218: ** within ROOT_MAX because it only needs space for 2 varints, the
        !           219: ** height and the blockid of the previous root).
        !           220: **
        !           221: ** The meta-information in the segment directory is:
        !           222: **   level               - segment level (see below)
        !           223: **   idx                 - index within level
        !           224: **                       - (level,idx uniquely identify a segment)
        !           225: **   start_block         - first leaf node
        !           226: **   leaves_end_block    - last leaf node
        !           227: **   end_block           - last block (including interior nodes)
        !           228: **   root                - contents of root node
        !           229: **
        !           230: ** If the root node is a leaf node, then start_block,
        !           231: ** leaves_end_block, and end_block are all 0.
        !           232: **
        !           233: **
        !           234: **** Segment merging ****
        !           235: ** To amortize update costs, segments are groups into levels and
        !           236: ** merged in matches.  Each increase in level represents exponentially
        !           237: ** more documents.
        !           238: **
        !           239: ** New documents (actually, document updates) are tokenized and
        !           240: ** written individually (using LeafWriter) to a level 0 segment, with
        !           241: ** incrementing idx.  When idx reaches MERGE_COUNT (default 16), all
        !           242: ** level 0 segments are merged into a single level 1 segment.  Level 1
        !           243: ** is populated like level 0, and eventually MERGE_COUNT level 1
        !           244: ** segments are merged to a single level 2 segment (representing
        !           245: ** MERGE_COUNT^2 updates), and so on.
        !           246: **
        !           247: ** A segment merge traverses all segments at a given level in
        !           248: ** parallel, performing a straightforward sorted merge.  Since segment
        !           249: ** leaf nodes are written in to the %_segments table in order, this
        !           250: ** merge traverses the underlying sqlite disk structures efficiently.
        !           251: ** After the merge, all segment blocks from the merged level are
        !           252: ** deleted.
        !           253: **
        !           254: ** MERGE_COUNT controls how often we merge segments.  16 seems to be
        !           255: ** somewhat of a sweet spot for insertion performance.  32 and 64 show
        !           256: ** very similar performance numbers to 16 on insertion, though they're
        !           257: ** a tiny bit slower (perhaps due to more overhead in merge-time
        !           258: ** sorting).  8 is about 20% slower than 16, 4 about 50% slower than
        !           259: ** 16, 2 about 66% slower than 16.
        !           260: **
        !           261: ** At query time, high MERGE_COUNT increases the number of segments
        !           262: ** which need to be scanned and merged.  For instance, with 100k docs
        !           263: ** inserted:
        !           264: **
        !           265: **    MERGE_COUNT   segments
        !           266: **       16           25
        !           267: **        8           12
        !           268: **        4           10
        !           269: **        2            6
        !           270: **
        !           271: ** This appears to have only a moderate impact on queries for very
        !           272: ** frequent terms (which are somewhat dominated by segment merge
        !           273: ** costs), and infrequent and non-existent terms still seem to be fast
        !           274: ** even with many segments.
        !           275: **
        !           276: ** TODO(shess) That said, it would be nice to have a better query-side
        !           277: ** argument for MERGE_COUNT of 16.  Also, it is possible/likely that
        !           278: ** optimizations to things like doclist merging will swing the sweet
        !           279: ** spot around.
        !           280: **
        !           281: **
        !           282: **
        !           283: **** Handling of deletions and updates ****
        !           284: ** Since we're using a segmented structure, with no docid-oriented
        !           285: ** index into the term index, we clearly cannot simply update the term
        !           286: ** index when a document is deleted or updated.  For deletions, we
        !           287: ** write an empty doclist (varint(docid) varint(POS_END)), for updates
        !           288: ** we simply write the new doclist.  Segment merges overwrite older
        !           289: ** data for a particular docid with newer data, so deletes or updates
        !           290: ** will eventually overtake the earlier data and knock it out.  The
        !           291: ** query logic likewise merges doclists so that newer data knocks out
        !           292: ** older data.
        !           293: **
        !           294: ** TODO(shess) Provide a VACUUM type operation to clear out all
        !           295: ** deletions and duplications.  This would basically be a forced merge
        !           296: ** into a single segment.
        !           297: */
        !           298: 
        !           299: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
        !           300: 
        !           301: #if defined(SQLITE_ENABLE_FTS2) && !defined(SQLITE_CORE)
        !           302: # define SQLITE_CORE 1
        !           303: #endif
        !           304: 
        !           305: #include <assert.h>
        !           306: #include <stdlib.h>
        !           307: #include <stdio.h>
        !           308: #include <string.h>
        !           309: #include "fts2.h"
        !           310: #include "fts2_hash.h"
        !           311: #include "fts2_tokenizer.h"
        !           312: #include "sqlite3.h"
        !           313: #include "sqlite3ext.h"
        !           314: SQLITE_EXTENSION_INIT1
        !           315: 
        !           316: 
        !           317: /* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
        !           318: ** would be nice to order the file better, perhaps something along the
        !           319: ** lines of:
        !           320: **
        !           321: **  - utility functions
        !           322: **  - table setup functions
        !           323: **  - table update functions
        !           324: **  - table query functions
        !           325: **
        !           326: ** Put the query functions last because they're likely to reference
        !           327: ** typedefs or functions from the table update section.
        !           328: */
        !           329: 
        !           330: #if 0
        !           331: # define TRACE(A)  printf A; fflush(stdout)
        !           332: #else
        !           333: # define TRACE(A)
        !           334: #endif
        !           335: 
        !           336: /* It is not safe to call isspace(), tolower(), or isalnum() on
        !           337: ** hi-bit-set characters.  This is the same solution used in the
        !           338: ** tokenizer.
        !           339: */
        !           340: /* TODO(shess) The snippet-generation code should be using the
        !           341: ** tokenizer-generated tokens rather than doing its own local
        !           342: ** tokenization.
        !           343: */
        !           344: /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
        !           345: static int safe_isspace(char c){
        !           346:   return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
        !           347: }
        !           348: static int safe_tolower(char c){
        !           349:   return (c>='A' && c<='Z') ? (c - 'A' + 'a') : c;
        !           350: }
        !           351: static int safe_isalnum(char c){
        !           352:   return (c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z');
        !           353: }
        !           354: 
        !           355: typedef enum DocListType {
        !           356:   DL_DOCIDS,              /* docids only */
        !           357:   DL_POSITIONS,           /* docids + positions */
        !           358:   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
        !           359: } DocListType;
        !           360: 
        !           361: /*
        !           362: ** By default, only positions and not offsets are stored in the doclists.
        !           363: ** To change this so that offsets are stored too, compile with
        !           364: **
        !           365: **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
        !           366: **
        !           367: ** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted
        !           368: ** into (no deletes or updates).
        !           369: */
        !           370: #ifndef DL_DEFAULT
        !           371: # define DL_DEFAULT DL_POSITIONS
        !           372: #endif
        !           373: 
        !           374: enum {
        !           375:   POS_END = 0,        /* end of this position list */
        !           376:   POS_COLUMN,         /* followed by new column number */
        !           377:   POS_BASE
        !           378: };
        !           379: 
        !           380: /* MERGE_COUNT controls how often we merge segments (see comment at
        !           381: ** top of file).
        !           382: */
        !           383: #define MERGE_COUNT 16
        !           384: 
        !           385: /* utility functions */
        !           386: 
        !           387: /* CLEAR() and SCRAMBLE() abstract memset() on a pointer to a single
        !           388: ** record to prevent errors of the form:
        !           389: **
        !           390: ** my_function(SomeType *b){
        !           391: **   memset(b, '\0', sizeof(b));  // sizeof(b)!=sizeof(*b)
        !           392: ** }
        !           393: */
        !           394: /* TODO(shess) Obvious candidates for a header file. */
        !           395: #define CLEAR(b) memset(b, '\0', sizeof(*(b)))
        !           396: 
        !           397: #ifndef NDEBUG
        !           398: #  define SCRAMBLE(b) memset(b, 0x55, sizeof(*(b)))
        !           399: #else
        !           400: #  define SCRAMBLE(b)
        !           401: #endif
        !           402: 
        !           403: /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
        !           404: #define VARINT_MAX 10
        !           405: 
        !           406: /* Write a 64-bit variable-length integer to memory starting at p[0].
        !           407:  * The length of data written will be between 1 and VARINT_MAX bytes.
        !           408:  * The number of bytes written is returned. */
        !           409: static int putVarint(char *p, sqlite_int64 v){
        !           410:   unsigned char *q = (unsigned char *) p;
        !           411:   sqlite_uint64 vu = v;
        !           412:   do{
        !           413:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
        !           414:     vu >>= 7;
        !           415:   }while( vu!=0 );
        !           416:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
        !           417:   assert( q - (unsigned char *)p <= VARINT_MAX );
        !           418:   return (int) (q - (unsigned char *)p);
        !           419: }
        !           420: 
        !           421: /* Read a 64-bit variable-length integer from memory starting at p[0].
        !           422:  * Return the number of bytes read, or 0 on error.
        !           423:  * The value is stored in *v. */
        !           424: static int getVarint(const char *p, sqlite_int64 *v){
        !           425:   const unsigned char *q = (const unsigned char *) p;
        !           426:   sqlite_uint64 x = 0, y = 1;
        !           427:   while( (*q & 0x80) == 0x80 ){
        !           428:     x += y * (*q++ & 0x7f);
        !           429:     y <<= 7;
        !           430:     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
        !           431:       assert( 0 );
        !           432:       return 0;
        !           433:     }
        !           434:   }
        !           435:   x += y * (*q++);
        !           436:   *v = (sqlite_int64) x;
        !           437:   return (int) (q - (unsigned char *)p);
        !           438: }
        !           439: 
        !           440: static int getVarint32(const char *p, int *pi){
        !           441:  sqlite_int64 i;
        !           442:  int ret = getVarint(p, &i);
        !           443:  *pi = (int) i;
        !           444:  assert( *pi==i );
        !           445:  return ret;
        !           446: }
        !           447: 
        !           448: /*******************************************************************/
        !           449: /* DataBuffer is used to collect data into a buffer in piecemeal
        !           450: ** fashion.  It implements the usual distinction between amount of
        !           451: ** data currently stored (nData) and buffer capacity (nCapacity).
        !           452: **
        !           453: ** dataBufferInit - create a buffer with given initial capacity.
        !           454: ** dataBufferReset - forget buffer's data, retaining capacity.
        !           455: ** dataBufferDestroy - free buffer's data.
        !           456: ** dataBufferSwap - swap contents of two buffers.
        !           457: ** dataBufferExpand - expand capacity without adding data.
        !           458: ** dataBufferAppend - append data.
        !           459: ** dataBufferAppend2 - append two pieces of data at once.
        !           460: ** dataBufferReplace - replace buffer's data.
        !           461: */
        !           462: typedef struct DataBuffer {
        !           463:   char *pData;          /* Pointer to malloc'ed buffer. */
        !           464:   int nCapacity;        /* Size of pData buffer. */
        !           465:   int nData;            /* End of data loaded into pData. */
        !           466: } DataBuffer;
        !           467: 
        !           468: static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
        !           469:   assert( nCapacity>=0 );
        !           470:   pBuffer->nData = 0;
        !           471:   pBuffer->nCapacity = nCapacity;
        !           472:   pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity);
        !           473: }
        !           474: static void dataBufferReset(DataBuffer *pBuffer){
        !           475:   pBuffer->nData = 0;
        !           476: }
        !           477: static void dataBufferDestroy(DataBuffer *pBuffer){
        !           478:   if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData);
        !           479:   SCRAMBLE(pBuffer);
        !           480: }
        !           481: static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){
        !           482:   DataBuffer tmp = *pBuffer1;
        !           483:   *pBuffer1 = *pBuffer2;
        !           484:   *pBuffer2 = tmp;
        !           485: }
        !           486: static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
        !           487:   assert( nAddCapacity>0 );
        !           488:   /* TODO(shess) Consider expanding more aggressively.  Note that the
        !           489:   ** underlying malloc implementation may take care of such things for
        !           490:   ** us already.
        !           491:   */
        !           492:   if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
        !           493:     pBuffer->nCapacity = pBuffer->nData+nAddCapacity;
        !           494:     pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity);
        !           495:   }
        !           496: }
        !           497: static void dataBufferAppend(DataBuffer *pBuffer,
        !           498:                              const char *pSource, int nSource){
        !           499:   assert( nSource>0 && pSource!=NULL );
        !           500:   dataBufferExpand(pBuffer, nSource);
        !           501:   memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
        !           502:   pBuffer->nData += nSource;
        !           503: }
        !           504: static void dataBufferAppend2(DataBuffer *pBuffer,
        !           505:                               const char *pSource1, int nSource1,
        !           506:                               const char *pSource2, int nSource2){
        !           507:   assert( nSource1>0 && pSource1!=NULL );
        !           508:   assert( nSource2>0 && pSource2!=NULL );
        !           509:   dataBufferExpand(pBuffer, nSource1+nSource2);
        !           510:   memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
        !           511:   memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
        !           512:   pBuffer->nData += nSource1+nSource2;
        !           513: }
        !           514: static void dataBufferReplace(DataBuffer *pBuffer,
        !           515:                               const char *pSource, int nSource){
        !           516:   dataBufferReset(pBuffer);
        !           517:   dataBufferAppend(pBuffer, pSource, nSource);
        !           518: }
        !           519: 
        !           520: /* StringBuffer is a null-terminated version of DataBuffer. */
        !           521: typedef struct StringBuffer {
        !           522:   DataBuffer b;            /* Includes null terminator. */
        !           523: } StringBuffer;
        !           524: 
        !           525: static void initStringBuffer(StringBuffer *sb){
        !           526:   dataBufferInit(&sb->b, 100);
        !           527:   dataBufferReplace(&sb->b, "", 1);
        !           528: }
        !           529: static int stringBufferLength(StringBuffer *sb){
        !           530:   return sb->b.nData-1;
        !           531: }
        !           532: static char *stringBufferData(StringBuffer *sb){
        !           533:   return sb->b.pData;
        !           534: }
        !           535: static void stringBufferDestroy(StringBuffer *sb){
        !           536:   dataBufferDestroy(&sb->b);
        !           537: }
        !           538: 
        !           539: static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
        !           540:   assert( sb->b.nData>0 );
        !           541:   if( nFrom>0 ){
        !           542:     sb->b.nData--;
        !           543:     dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
        !           544:   }
        !           545: }
        !           546: static void append(StringBuffer *sb, const char *zFrom){
        !           547:   nappend(sb, zFrom, strlen(zFrom));
        !           548: }
        !           549: 
        !           550: /* Append a list of strings separated by commas. */
        !           551: static void appendList(StringBuffer *sb, int nString, char **azString){
        !           552:   int i;
        !           553:   for(i=0; i<nString; ++i){
        !           554:     if( i>0 ) append(sb, ", ");
        !           555:     append(sb, azString[i]);
        !           556:   }
        !           557: }
        !           558: 
        !           559: static int endsInWhiteSpace(StringBuffer *p){
        !           560:   return stringBufferLength(p)>0 &&
        !           561:     safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
        !           562: }
        !           563: 
        !           564: /* If the StringBuffer ends in something other than white space, add a
        !           565: ** single space character to the end.
        !           566: */
        !           567: static void appendWhiteSpace(StringBuffer *p){
        !           568:   if( stringBufferLength(p)==0 ) return;
        !           569:   if( !endsInWhiteSpace(p) ) append(p, " ");
        !           570: }
        !           571: 
        !           572: /* Remove white space from the end of the StringBuffer */
        !           573: static void trimWhiteSpace(StringBuffer *p){
        !           574:   while( endsInWhiteSpace(p) ){
        !           575:     p->b.pData[--p->b.nData-1] = '\0';
        !           576:   }
        !           577: }
        !           578: 
        !           579: /*******************************************************************/
        !           580: /* DLReader is used to read document elements from a doclist.  The
        !           581: ** current docid is cached, so dlrDocid() is fast.  DLReader does not
        !           582: ** own the doclist buffer.
        !           583: **
        !           584: ** dlrAtEnd - true if there's no more data to read.
        !           585: ** dlrDocid - docid of current document.
        !           586: ** dlrDocData - doclist data for current document (including docid).
        !           587: ** dlrDocDataBytes - length of same.
        !           588: ** dlrAllDataBytes - length of all remaining data.
        !           589: ** dlrPosData - position data for current document.
        !           590: ** dlrPosDataLen - length of pos data for current document (incl POS_END).
        !           591: ** dlrStep - step to current document.
        !           592: ** dlrInit - initial for doclist of given type against given data.
        !           593: ** dlrDestroy - clean up.
        !           594: **
        !           595: ** Expected usage is something like:
        !           596: **
        !           597: **   DLReader reader;
        !           598: **   dlrInit(&reader, pData, nData);
        !           599: **   while( !dlrAtEnd(&reader) ){
        !           600: **     // calls to dlrDocid() and kin.
        !           601: **     dlrStep(&reader);
        !           602: **   }
        !           603: **   dlrDestroy(&reader);
        !           604: */
        !           605: typedef struct DLReader {
        !           606:   DocListType iType;
        !           607:   const char *pData;
        !           608:   int nData;
        !           609: 
        !           610:   sqlite_int64 iDocid;
        !           611:   int nElement;
        !           612: } DLReader;
        !           613: 
        !           614: static int dlrAtEnd(DLReader *pReader){
        !           615:   assert( pReader->nData>=0 );
        !           616:   return pReader->nData==0;
        !           617: }
        !           618: static sqlite_int64 dlrDocid(DLReader *pReader){
        !           619:   assert( !dlrAtEnd(pReader) );
        !           620:   return pReader->iDocid;
        !           621: }
        !           622: static const char *dlrDocData(DLReader *pReader){
        !           623:   assert( !dlrAtEnd(pReader) );
        !           624:   return pReader->pData;
        !           625: }
        !           626: static int dlrDocDataBytes(DLReader *pReader){
        !           627:   assert( !dlrAtEnd(pReader) );
        !           628:   return pReader->nElement;
        !           629: }
        !           630: static int dlrAllDataBytes(DLReader *pReader){
        !           631:   assert( !dlrAtEnd(pReader) );
        !           632:   return pReader->nData;
        !           633: }
        !           634: /* TODO(shess) Consider adding a field to track iDocid varint length
        !           635: ** to make these two functions faster.  This might matter (a tiny bit)
        !           636: ** for queries.
        !           637: */
        !           638: static const char *dlrPosData(DLReader *pReader){
        !           639:   sqlite_int64 iDummy;
        !           640:   int n = getVarint(pReader->pData, &iDummy);
        !           641:   assert( !dlrAtEnd(pReader) );
        !           642:   return pReader->pData+n;
        !           643: }
        !           644: static int dlrPosDataLen(DLReader *pReader){
        !           645:   sqlite_int64 iDummy;
        !           646:   int n = getVarint(pReader->pData, &iDummy);
        !           647:   assert( !dlrAtEnd(pReader) );
        !           648:   return pReader->nElement-n;
        !           649: }
        !           650: static void dlrStep(DLReader *pReader){
        !           651:   assert( !dlrAtEnd(pReader) );
        !           652: 
        !           653:   /* Skip past current doclist element. */
        !           654:   assert( pReader->nElement<=pReader->nData );
        !           655:   pReader->pData += pReader->nElement;
        !           656:   pReader->nData -= pReader->nElement;
        !           657: 
        !           658:   /* If there is more data, read the next doclist element. */
        !           659:   if( pReader->nData!=0 ){
        !           660:     sqlite_int64 iDocidDelta;
        !           661:     int iDummy, n = getVarint(pReader->pData, &iDocidDelta);
        !           662:     pReader->iDocid += iDocidDelta;
        !           663:     if( pReader->iType>=DL_POSITIONS ){
        !           664:       assert( n<pReader->nData );
        !           665:       while( 1 ){
        !           666:         n += getVarint32(pReader->pData+n, &iDummy);
        !           667:         assert( n<=pReader->nData );
        !           668:         if( iDummy==POS_END ) break;
        !           669:         if( iDummy==POS_COLUMN ){
        !           670:           n += getVarint32(pReader->pData+n, &iDummy);
        !           671:           assert( n<pReader->nData );
        !           672:         }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
        !           673:           n += getVarint32(pReader->pData+n, &iDummy);
        !           674:           n += getVarint32(pReader->pData+n, &iDummy);
        !           675:           assert( n<pReader->nData );
        !           676:         }
        !           677:       }
        !           678:     }
        !           679:     pReader->nElement = n;
        !           680:     assert( pReader->nElement<=pReader->nData );
        !           681:   }
        !           682: }
        !           683: static void dlrInit(DLReader *pReader, DocListType iType,
        !           684:                     const char *pData, int nData){
        !           685:   assert( pData!=NULL && nData!=0 );
        !           686:   pReader->iType = iType;
        !           687:   pReader->pData = pData;
        !           688:   pReader->nData = nData;
        !           689:   pReader->nElement = 0;
        !           690:   pReader->iDocid = 0;
        !           691: 
        !           692:   /* Load the first element's data.  There must be a first element. */
        !           693:   dlrStep(pReader);
        !           694: }
        !           695: static void dlrDestroy(DLReader *pReader){
        !           696:   SCRAMBLE(pReader);
        !           697: }
        !           698: 
        !           699: #ifndef NDEBUG
        !           700: /* Verify that the doclist can be validly decoded.  Also returns the
        !           701: ** last docid found because it is convenient in other assertions for
        !           702: ** DLWriter.
        !           703: */
        !           704: static void docListValidate(DocListType iType, const char *pData, int nData,
        !           705:                             sqlite_int64 *pLastDocid){
        !           706:   sqlite_int64 iPrevDocid = 0;
        !           707:   assert( nData>0 );
        !           708:   assert( pData!=0 );
        !           709:   assert( pData+nData>pData );
        !           710:   while( nData!=0 ){
        !           711:     sqlite_int64 iDocidDelta;
        !           712:     int n = getVarint(pData, &iDocidDelta);
        !           713:     iPrevDocid += iDocidDelta;
        !           714:     if( iType>DL_DOCIDS ){
        !           715:       int iDummy;
        !           716:       while( 1 ){
        !           717:         n += getVarint32(pData+n, &iDummy);
        !           718:         if( iDummy==POS_END ) break;
        !           719:         if( iDummy==POS_COLUMN ){
        !           720:           n += getVarint32(pData+n, &iDummy);
        !           721:         }else if( iType>DL_POSITIONS ){
        !           722:           n += getVarint32(pData+n, &iDummy);
        !           723:           n += getVarint32(pData+n, &iDummy);
        !           724:         }
        !           725:         assert( n<=nData );
        !           726:       }
        !           727:     }
        !           728:     assert( n<=nData );
        !           729:     pData += n;
        !           730:     nData -= n;
        !           731:   }
        !           732:   if( pLastDocid ) *pLastDocid = iPrevDocid;
        !           733: }
        !           734: #define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o)
        !           735: #else
        !           736: #define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 )
        !           737: #endif
        !           738: 
        !           739: /*******************************************************************/
        !           740: /* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
        !           741: ** always appends to the buffer and does not own it.
        !           742: **
        !           743: ** dlwInit - initialize to write a given type doclistto a buffer.
        !           744: ** dlwDestroy - clear the writer's memory.  Does not free buffer.
        !           745: ** dlwAppend - append raw doclist data to buffer.
        !           746: ** dlwCopy - copy next doclist from reader to writer.
        !           747: ** dlwAdd - construct doclist element and append to buffer.
        !           748: **    Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).
        !           749: */
        !           750: typedef struct DLWriter {
        !           751:   DocListType iType;
        !           752:   DataBuffer *b;
        !           753:   sqlite_int64 iPrevDocid;
        !           754: #ifndef NDEBUG
        !           755:   int has_iPrevDocid;
        !           756: #endif
        !           757: } DLWriter;
        !           758: 
        !           759: static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){
        !           760:   pWriter->b = b;
        !           761:   pWriter->iType = iType;
        !           762:   pWriter->iPrevDocid = 0;
        !           763: #ifndef NDEBUG
        !           764:   pWriter->has_iPrevDocid = 0;
        !           765: #endif
        !           766: }
        !           767: static void dlwDestroy(DLWriter *pWriter){
        !           768:   SCRAMBLE(pWriter);
        !           769: }
        !           770: /* iFirstDocid is the first docid in the doclist in pData.  It is
        !           771: ** needed because pData may point within a larger doclist, in which
        !           772: ** case the first item would be delta-encoded.
        !           773: **
        !           774: ** iLastDocid is the final docid in the doclist in pData.  It is
        !           775: ** needed to create the new iPrevDocid for future delta-encoding.  The
        !           776: ** code could decode the passed doclist to recreate iLastDocid, but
        !           777: ** the only current user (docListMerge) already has decoded this
        !           778: ** information.
        !           779: */
        !           780: /* TODO(shess) This has become just a helper for docListMerge.
        !           781: ** Consider a refactor to make this cleaner.
        !           782: */
        !           783: static void dlwAppend(DLWriter *pWriter,
        !           784:                       const char *pData, int nData,
        !           785:                       sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){
        !           786:   sqlite_int64 iDocid = 0;
        !           787:   char c[VARINT_MAX];
        !           788:   int nFirstOld, nFirstNew;     /* Old and new varint len of first docid. */
        !           789: #ifndef NDEBUG
        !           790:   sqlite_int64 iLastDocidDelta;
        !           791: #endif
        !           792: 
        !           793:   /* Recode the initial docid as delta from iPrevDocid. */
        !           794:   nFirstOld = getVarint(pData, &iDocid);
        !           795:   assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
        !           796:   nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);
        !           797: 
        !           798:   /* Verify that the incoming doclist is valid AND that it ends with
        !           799:   ** the expected docid.  This is essential because we'll trust this
        !           800:   ** docid in future delta-encoding.
        !           801:   */
        !           802:   ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
        !           803:   assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
        !           804: 
        !           805:   /* Append recoded initial docid and everything else.  Rest of docids
        !           806:   ** should have been delta-encoded from previous initial docid.
        !           807:   */
        !           808:   if( nFirstOld<nData ){
        !           809:     dataBufferAppend2(pWriter->b, c, nFirstNew,
        !           810:                       pData+nFirstOld, nData-nFirstOld);
        !           811:   }else{
        !           812:     dataBufferAppend(pWriter->b, c, nFirstNew);
        !           813:   }
        !           814:   pWriter->iPrevDocid = iLastDocid;
        !           815: }
        !           816: static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
        !           817:   dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
        !           818:             dlrDocid(pReader), dlrDocid(pReader));
        !           819: }
        !           820: static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
        !           821:   char c[VARINT_MAX];
        !           822:   int n = putVarint(c, iDocid-pWriter->iPrevDocid);
        !           823: 
        !           824:   /* Docids must ascend. */
        !           825:   assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
        !           826:   assert( pWriter->iType==DL_DOCIDS );
        !           827: 
        !           828:   dataBufferAppend(pWriter->b, c, n);
        !           829:   pWriter->iPrevDocid = iDocid;
        !           830: #ifndef NDEBUG
        !           831:   pWriter->has_iPrevDocid = 1;
        !           832: #endif
        !           833: }
        !           834: 
        !           835: /*******************************************************************/
        !           836: /* PLReader is used to read data from a document's position list.  As
        !           837: ** the caller steps through the list, data is cached so that varints
        !           838: ** only need to be decoded once.
        !           839: **
        !           840: ** plrInit, plrDestroy - create/destroy a reader.
        !           841: ** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors
        !           842: ** plrAtEnd - at end of stream, only call plrDestroy once true.
        !           843: ** plrStep - step to the next element.
        !           844: */
        !           845: typedef struct PLReader {
        !           846:   /* These refer to the next position's data.  nData will reach 0 when
        !           847:   ** reading the last position, so plrStep() signals EOF by setting
        !           848:   ** pData to NULL.
        !           849:   */
        !           850:   const char *pData;
        !           851:   int nData;
        !           852: 
        !           853:   DocListType iType;
        !           854:   int iColumn;         /* the last column read */
        !           855:   int iPosition;       /* the last position read */
        !           856:   int iStartOffset;    /* the last start offset read */
        !           857:   int iEndOffset;      /* the last end offset read */
        !           858: } PLReader;
        !           859: 
        !           860: static int plrAtEnd(PLReader *pReader){
        !           861:   return pReader->pData==NULL;
        !           862: }
        !           863: static int plrColumn(PLReader *pReader){
        !           864:   assert( !plrAtEnd(pReader) );
        !           865:   return pReader->iColumn;
        !           866: }
        !           867: static int plrPosition(PLReader *pReader){
        !           868:   assert( !plrAtEnd(pReader) );
        !           869:   return pReader->iPosition;
        !           870: }
        !           871: static int plrStartOffset(PLReader *pReader){
        !           872:   assert( !plrAtEnd(pReader) );
        !           873:   return pReader->iStartOffset;
        !           874: }
        !           875: static int plrEndOffset(PLReader *pReader){
        !           876:   assert( !plrAtEnd(pReader) );
        !           877:   return pReader->iEndOffset;
        !           878: }
        !           879: static void plrStep(PLReader *pReader){
        !           880:   int i, n;
        !           881: 
        !           882:   assert( !plrAtEnd(pReader) );
        !           883: 
        !           884:   if( pReader->nData==0 ){
        !           885:     pReader->pData = NULL;
        !           886:     return;
        !           887:   }
        !           888: 
        !           889:   n = getVarint32(pReader->pData, &i);
        !           890:   if( i==POS_COLUMN ){
        !           891:     n += getVarint32(pReader->pData+n, &pReader->iColumn);
        !           892:     pReader->iPosition = 0;
        !           893:     pReader->iStartOffset = 0;
        !           894:     n += getVarint32(pReader->pData+n, &i);
        !           895:   }
        !           896:   /* Should never see adjacent column changes. */
        !           897:   assert( i!=POS_COLUMN );
        !           898: 
        !           899:   if( i==POS_END ){
        !           900:     pReader->nData = 0;
        !           901:     pReader->pData = NULL;
        !           902:     return;
        !           903:   }
        !           904: 
        !           905:   pReader->iPosition += i-POS_BASE;
        !           906:   if( pReader->iType==DL_POSITIONS_OFFSETS ){
        !           907:     n += getVarint32(pReader->pData+n, &i);
        !           908:     pReader->iStartOffset += i;
        !           909:     n += getVarint32(pReader->pData+n, &i);
        !           910:     pReader->iEndOffset = pReader->iStartOffset+i;
        !           911:   }
        !           912:   assert( n<=pReader->nData );
        !           913:   pReader->pData += n;
        !           914:   pReader->nData -= n;
        !           915: }
        !           916: 
        !           917: static void plrInit(PLReader *pReader, DLReader *pDLReader){
        !           918:   pReader->pData = dlrPosData(pDLReader);
        !           919:   pReader->nData = dlrPosDataLen(pDLReader);
        !           920:   pReader->iType = pDLReader->iType;
        !           921:   pReader->iColumn = 0;
        !           922:   pReader->iPosition = 0;
        !           923:   pReader->iStartOffset = 0;
        !           924:   pReader->iEndOffset = 0;
        !           925:   plrStep(pReader);
        !           926: }
        !           927: static void plrDestroy(PLReader *pReader){
        !           928:   SCRAMBLE(pReader);
        !           929: }
        !           930: 
        !           931: /*******************************************************************/
        !           932: /* PLWriter is used in constructing a document's position list.  As a
        !           933: ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
        !           934: ** PLWriter writes to the associated DLWriter's buffer.
        !           935: **
        !           936: ** plwInit - init for writing a document's poslist.
        !           937: ** plwDestroy - clear a writer.
        !           938: ** plwAdd - append position and offset information.
        !           939: ** plwCopy - copy next position's data from reader to writer.
        !           940: ** plwTerminate - add any necessary doclist terminator.
        !           941: **
        !           942: ** Calling plwAdd() after plwTerminate() may result in a corrupt
        !           943: ** doclist.
        !           944: */
        !           945: /* TODO(shess) Until we've written the second item, we can cache the
        !           946: ** first item's information.  Then we'd have three states:
        !           947: **
        !           948: ** - initialized with docid, no positions.
        !           949: ** - docid and one position.
        !           950: ** - docid and multiple positions.
        !           951: **
        !           952: ** Only the last state needs to actually write to dlw->b, which would
        !           953: ** be an improvement in the DLCollector case.
        !           954: */
        !           955: typedef struct PLWriter {
        !           956:   DLWriter *dlw;
        !           957: 
        !           958:   int iColumn;    /* the last column written */
        !           959:   int iPos;       /* the last position written */
        !           960:   int iOffset;    /* the last start offset written */
        !           961: } PLWriter;
        !           962: 
        !           963: /* TODO(shess) In the case where the parent is reading these values
        !           964: ** from a PLReader, we could optimize to a copy if that PLReader has
        !           965: ** the same type as pWriter.
        !           966: */
        !           967: static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
        !           968:                    int iStartOffset, int iEndOffset){
        !           969:   /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,
        !           970:   ** iStartOffsetDelta, and iEndOffsetDelta.
        !           971:   */
        !           972:   char c[5*VARINT_MAX];
        !           973:   int n = 0;
        !           974: 
        !           975:   /* Ban plwAdd() after plwTerminate(). */
        !           976:   assert( pWriter->iPos!=-1 );
        !           977: 
        !           978:   if( pWriter->dlw->iType==DL_DOCIDS ) return;
        !           979: 
        !           980:   if( iColumn!=pWriter->iColumn ){
        !           981:     n += putVarint(c+n, POS_COLUMN);
        !           982:     n += putVarint(c+n, iColumn);
        !           983:     pWriter->iColumn = iColumn;
        !           984:     pWriter->iPos = 0;
        !           985:     pWriter->iOffset = 0;
        !           986:   }
        !           987:   assert( iPos>=pWriter->iPos );
        !           988:   n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
        !           989:   pWriter->iPos = iPos;
        !           990:   if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){
        !           991:     assert( iStartOffset>=pWriter->iOffset );
        !           992:     n += putVarint(c+n, iStartOffset-pWriter->iOffset);
        !           993:     pWriter->iOffset = iStartOffset;
        !           994:     assert( iEndOffset>=iStartOffset );
        !           995:     n += putVarint(c+n, iEndOffset-iStartOffset);
        !           996:   }
        !           997:   dataBufferAppend(pWriter->dlw->b, c, n);
        !           998: }
        !           999: static void plwCopy(PLWriter *pWriter, PLReader *pReader){
        !          1000:   plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
        !          1001:          plrStartOffset(pReader), plrEndOffset(pReader));
        !          1002: }
        !          1003: static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
        !          1004:   char c[VARINT_MAX];
        !          1005:   int n;
        !          1006: 
        !          1007:   pWriter->dlw = dlw;
        !          1008: 
        !          1009:   /* Docids must ascend. */
        !          1010:   assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );
        !          1011:   n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid);
        !          1012:   dataBufferAppend(pWriter->dlw->b, c, n);
        !          1013:   pWriter->dlw->iPrevDocid = iDocid;
        !          1014: #ifndef NDEBUG
        !          1015:   pWriter->dlw->has_iPrevDocid = 1;
        !          1016: #endif
        !          1017: 
        !          1018:   pWriter->iColumn = 0;
        !          1019:   pWriter->iPos = 0;
        !          1020:   pWriter->iOffset = 0;
        !          1021: }
        !          1022: /* TODO(shess) Should plwDestroy() also terminate the doclist?  But
        !          1023: ** then plwDestroy() would no longer be just a destructor, it would
        !          1024: ** also be doing work, which isn't consistent with the overall idiom.
        !          1025: ** Another option would be for plwAdd() to always append any necessary
        !          1026: ** terminator, so that the output is always correct.  But that would
        !          1027: ** add incremental work to the common case with the only benefit being
        !          1028: ** API elegance.  Punt for now.
        !          1029: */
        !          1030: static void plwTerminate(PLWriter *pWriter){
        !          1031:   if( pWriter->dlw->iType>DL_DOCIDS ){
        !          1032:     char c[VARINT_MAX];
        !          1033:     int n = putVarint(c, POS_END);
        !          1034:     dataBufferAppend(pWriter->dlw->b, c, n);
        !          1035:   }
        !          1036: #ifndef NDEBUG
        !          1037:   /* Mark as terminated for assert in plwAdd(). */
        !          1038:   pWriter->iPos = -1;
        !          1039: #endif
        !          1040: }
        !          1041: static void plwDestroy(PLWriter *pWriter){
        !          1042:   SCRAMBLE(pWriter);
        !          1043: }
        !          1044: 
        !          1045: /*******************************************************************/
        !          1046: /* DLCollector wraps PLWriter and DLWriter to provide a
        !          1047: ** dynamically-allocated doclist area to use during tokenization.
        !          1048: **
        !          1049: ** dlcNew - malloc up and initialize a collector.
        !          1050: ** dlcDelete - destroy a collector and all contained items.
        !          1051: ** dlcAddPos - append position and offset information.
        !          1052: ** dlcAddDoclist - add the collected doclist to the given buffer.
        !          1053: ** dlcNext - terminate the current document and open another.
        !          1054: */
        !          1055: typedef struct DLCollector {
        !          1056:   DataBuffer b;
        !          1057:   DLWriter dlw;
        !          1058:   PLWriter plw;
        !          1059: } DLCollector;
        !          1060: 
        !          1061: /* TODO(shess) This could also be done by calling plwTerminate() and
        !          1062: ** dataBufferAppend().  I tried that, expecting nominal performance
        !          1063: ** differences, but it seemed to pretty reliably be worth 1% to code
        !          1064: ** it this way.  I suspect it is the incremental malloc overhead (some
        !          1065: ** percentage of the plwTerminate() calls will cause a realloc), so
        !          1066: ** this might be worth revisiting if the DataBuffer implementation
        !          1067: ** changes.
        !          1068: */
        !          1069: static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
        !          1070:   if( pCollector->dlw.iType>DL_DOCIDS ){
        !          1071:     char c[VARINT_MAX];
        !          1072:     int n = putVarint(c, POS_END);
        !          1073:     dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
        !          1074:   }else{
        !          1075:     dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
        !          1076:   }
        !          1077: }
        !          1078: static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
        !          1079:   plwTerminate(&pCollector->plw);
        !          1080:   plwDestroy(&pCollector->plw);
        !          1081:   plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
        !          1082: }
        !          1083: static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
        !          1084:                       int iStartOffset, int iEndOffset){
        !          1085:   plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
        !          1086: }
        !          1087: 
        !          1088: static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
        !          1089:   DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector));
        !          1090:   dataBufferInit(&pCollector->b, 0);
        !          1091:   dlwInit(&pCollector->dlw, iType, &pCollector->b);
        !          1092:   plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
        !          1093:   return pCollector;
        !          1094: }
        !          1095: static void dlcDelete(DLCollector *pCollector){
        !          1096:   plwDestroy(&pCollector->plw);
        !          1097:   dlwDestroy(&pCollector->dlw);
        !          1098:   dataBufferDestroy(&pCollector->b);
        !          1099:   SCRAMBLE(pCollector);
        !          1100:   sqlite3_free(pCollector);
        !          1101: }
        !          1102: 
        !          1103: 
        !          1104: /* Copy the doclist data of iType in pData/nData into *out, trimming
        !          1105: ** unnecessary data as we go.  Only columns matching iColumn are
        !          1106: ** copied, all columns copied if iColumn is -1.  Elements with no
        !          1107: ** matching columns are dropped.  The output is an iOutType doclist.
        !          1108: */
        !          1109: /* NOTE(shess) This code is only valid after all doclists are merged.
        !          1110: ** If this is run before merges, then doclist items which represent
        !          1111: ** deletion will be trimmed, and will thus not effect a deletion
        !          1112: ** during the merge.
        !          1113: */
        !          1114: static void docListTrim(DocListType iType, const char *pData, int nData,
        !          1115:                         int iColumn, DocListType iOutType, DataBuffer *out){
        !          1116:   DLReader dlReader;
        !          1117:   DLWriter dlWriter;
        !          1118: 
        !          1119:   assert( iOutType<=iType );
        !          1120: 
        !          1121:   dlrInit(&dlReader, iType, pData, nData);
        !          1122:   dlwInit(&dlWriter, iOutType, out);
        !          1123: 
        !          1124:   while( !dlrAtEnd(&dlReader) ){
        !          1125:     PLReader plReader;
        !          1126:     PLWriter plWriter;
        !          1127:     int match = 0;
        !          1128: 
        !          1129:     plrInit(&plReader, &dlReader);
        !          1130: 
        !          1131:     while( !plrAtEnd(&plReader) ){
        !          1132:       if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
        !          1133:         if( !match ){
        !          1134:           plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
        !          1135:           match = 1;
        !          1136:         }
        !          1137:         plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
        !          1138:                plrStartOffset(&plReader), plrEndOffset(&plReader));
        !          1139:       }
        !          1140:       plrStep(&plReader);
        !          1141:     }
        !          1142:     if( match ){
        !          1143:       plwTerminate(&plWriter);
        !          1144:       plwDestroy(&plWriter);
        !          1145:     }
        !          1146: 
        !          1147:     plrDestroy(&plReader);
        !          1148:     dlrStep(&dlReader);
        !          1149:   }
        !          1150:   dlwDestroy(&dlWriter);
        !          1151:   dlrDestroy(&dlReader);
        !          1152: }
        !          1153: 
        !          1154: /* Used by docListMerge() to keep doclists in the ascending order by
        !          1155: ** docid, then ascending order by age (so the newest comes first).
        !          1156: */
        !          1157: typedef struct OrderedDLReader {
        !          1158:   DLReader *pReader;
        !          1159: 
        !          1160:   /* TODO(shess) If we assume that docListMerge pReaders is ordered by
        !          1161:   ** age (which we do), then we could use pReader comparisons to break
        !          1162:   ** ties.
        !          1163:   */
        !          1164:   int idx;
        !          1165: } OrderedDLReader;
        !          1166: 
        !          1167: /* Order eof to end, then by docid asc, idx desc. */
        !          1168: static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){
        !          1169:   if( dlrAtEnd(r1->pReader) ){
        !          1170:     if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */
        !          1171:     return 1;                              /* Only r1 atEnd(). */
        !          1172:   }
        !          1173:   if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */
        !          1174: 
        !          1175:   if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1;
        !          1176:   if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1;
        !          1177: 
        !          1178:   /* Descending on idx. */
        !          1179:   return r2->idx-r1->idx;
        !          1180: }
        !          1181: 
        !          1182: /* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that
        !          1183: ** p[1..n-1] is already sorted.
        !          1184: */
        !          1185: /* TODO(shess) Is this frequent enough to warrant a binary search?
        !          1186: ** Before implementing that, instrument the code to check.  In most
        !          1187: ** current usage, I expect that p[0] will be less than p[1] a very
        !          1188: ** high proportion of the time.
        !          1189: */
        !          1190: static void orderedDLReaderReorder(OrderedDLReader *p, int n){
        !          1191:   while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){
        !          1192:     OrderedDLReader tmp = p[0];
        !          1193:     p[0] = p[1];
        !          1194:     p[1] = tmp;
        !          1195:     n--;
        !          1196:     p++;
        !          1197:   }
        !          1198: }
        !          1199: 
        !          1200: /* Given an array of doclist readers, merge their doclist elements
        !          1201: ** into out in sorted order (by docid), dropping elements from older
        !          1202: ** readers when there is a duplicate docid.  pReaders is assumed to be
        !          1203: ** ordered by age, oldest first.
        !          1204: */
        !          1205: /* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably
        !          1206: ** be fixed.
        !          1207: */
        !          1208: static void docListMerge(DataBuffer *out,
        !          1209:                          DLReader *pReaders, int nReaders){
        !          1210:   OrderedDLReader readers[MERGE_COUNT];
        !          1211:   DLWriter writer;
        !          1212:   int i, n;
        !          1213:   const char *pStart = 0;
        !          1214:   int nStart = 0;
        !          1215:   sqlite_int64 iFirstDocid = 0, iLastDocid = 0;
        !          1216: 
        !          1217:   assert( nReaders>0 );
        !          1218:   if( nReaders==1 ){
        !          1219:     dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders));
        !          1220:     return;
        !          1221:   }
        !          1222: 
        !          1223:   assert( nReaders<=MERGE_COUNT );
        !          1224:   n = 0;
        !          1225:   for(i=0; i<nReaders; i++){
        !          1226:     assert( pReaders[i].iType==pReaders[0].iType );
        !          1227:     readers[i].pReader = pReaders+i;
        !          1228:     readers[i].idx = i;
        !          1229:     n += dlrAllDataBytes(&pReaders[i]);
        !          1230:   }
        !          1231:   /* Conservatively size output to sum of inputs.  Output should end
        !          1232:   ** up strictly smaller than input.
        !          1233:   */
        !          1234:   dataBufferExpand(out, n);
        !          1235: 
        !          1236:   /* Get the readers into sorted order. */
        !          1237:   while( i-->0 ){
        !          1238:     orderedDLReaderReorder(readers+i, nReaders-i);
        !          1239:   }
        !          1240: 
        !          1241:   dlwInit(&writer, pReaders[0].iType, out);
        !          1242:   while( !dlrAtEnd(readers[0].pReader) ){
        !          1243:     sqlite_int64 iDocid = dlrDocid(readers[0].pReader);
        !          1244: 
        !          1245:     /* If this is a continuation of the current buffer to copy, extend
        !          1246:     ** that buffer.  memcpy() seems to be more efficient if it has a
        !          1247:     ** lots of data to copy.
        !          1248:     */
        !          1249:     if( dlrDocData(readers[0].pReader)==pStart+nStart ){
        !          1250:       nStart += dlrDocDataBytes(readers[0].pReader);
        !          1251:     }else{
        !          1252:       if( pStart!=0 ){
        !          1253:         dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
        !          1254:       }
        !          1255:       pStart = dlrDocData(readers[0].pReader);
        !          1256:       nStart = dlrDocDataBytes(readers[0].pReader);
        !          1257:       iFirstDocid = iDocid;
        !          1258:     }
        !          1259:     iLastDocid = iDocid;
        !          1260:     dlrStep(readers[0].pReader);
        !          1261: 
        !          1262:     /* Drop all of the older elements with the same docid. */
        !          1263:     for(i=1; i<nReaders &&
        !          1264:              !dlrAtEnd(readers[i].pReader) &&
        !          1265:              dlrDocid(readers[i].pReader)==iDocid; i++){
        !          1266:       dlrStep(readers[i].pReader);
        !          1267:     }
        !          1268: 
        !          1269:     /* Get the readers back into order. */
        !          1270:     while( i-->0 ){
        !          1271:       orderedDLReaderReorder(readers+i, nReaders-i);
        !          1272:     }
        !          1273:   }
        !          1274: 
        !          1275:   /* Copy over any remaining elements. */
        !          1276:   if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
        !          1277:   dlwDestroy(&writer);
        !          1278: }
        !          1279: 
        !          1280: /* Helper function for posListUnion().  Compares the current position
        !          1281: ** between left and right, returning as standard C idiom of <0 if
        !          1282: ** left<right, >0 if left>right, and 0 if left==right.  "End" always
        !          1283: ** compares greater.
        !          1284: */
        !          1285: static int posListCmp(PLReader *pLeft, PLReader *pRight){
        !          1286:   assert( pLeft->iType==pRight->iType );
        !          1287:   if( pLeft->iType==DL_DOCIDS ) return 0;
        !          1288: 
        !          1289:   if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1;
        !          1290:   if( plrAtEnd(pRight) ) return -1;
        !          1291: 
        !          1292:   if( plrColumn(pLeft)<plrColumn(pRight) ) return -1;
        !          1293:   if( plrColumn(pLeft)>plrColumn(pRight) ) return 1;
        !          1294: 
        !          1295:   if( plrPosition(pLeft)<plrPosition(pRight) ) return -1;
        !          1296:   if( plrPosition(pLeft)>plrPosition(pRight) ) return 1;
        !          1297:   if( pLeft->iType==DL_POSITIONS ) return 0;
        !          1298: 
        !          1299:   if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1;
        !          1300:   if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1;
        !          1301: 
        !          1302:   if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1;
        !          1303:   if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1;
        !          1304: 
        !          1305:   return 0;
        !          1306: }
        !          1307: 
        !          1308: /* Write the union of position lists in pLeft and pRight to pOut.
        !          1309: ** "Union" in this case meaning "All unique position tuples".  Should
        !          1310: ** work with any doclist type, though both inputs and the output
        !          1311: ** should be the same type.
        !          1312: */
        !          1313: static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
        !          1314:   PLReader left, right;
        !          1315:   PLWriter writer;
        !          1316: 
        !          1317:   assert( dlrDocid(pLeft)==dlrDocid(pRight) );
        !          1318:   assert( pLeft->iType==pRight->iType );
        !          1319:   assert( pLeft->iType==pOut->iType );
        !          1320: 
        !          1321:   plrInit(&left, pLeft);
        !          1322:   plrInit(&right, pRight);
        !          1323:   plwInit(&writer, pOut, dlrDocid(pLeft));
        !          1324: 
        !          1325:   while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
        !          1326:     int c = posListCmp(&left, &right);
        !          1327:     if( c<0 ){
        !          1328:       plwCopy(&writer, &left);
        !          1329:       plrStep(&left);
        !          1330:     }else if( c>0 ){
        !          1331:       plwCopy(&writer, &right);
        !          1332:       plrStep(&right);
        !          1333:     }else{
        !          1334:       plwCopy(&writer, &left);
        !          1335:       plrStep(&left);
        !          1336:       plrStep(&right);
        !          1337:     }
        !          1338:   }
        !          1339: 
        !          1340:   plwTerminate(&writer);
        !          1341:   plwDestroy(&writer);
        !          1342:   plrDestroy(&left);
        !          1343:   plrDestroy(&right);
        !          1344: }
        !          1345: 
        !          1346: /* Write the union of doclists in pLeft and pRight to pOut.  For
        !          1347: ** docids in common between the inputs, the union of the position
        !          1348: ** lists is written.  Inputs and outputs are always type DL_DEFAULT.
        !          1349: */
        !          1350: static void docListUnion(
        !          1351:   const char *pLeft, int nLeft,
        !          1352:   const char *pRight, int nRight,
        !          1353:   DataBuffer *pOut      /* Write the combined doclist here */
        !          1354: ){
        !          1355:   DLReader left, right;
        !          1356:   DLWriter writer;
        !          1357: 
        !          1358:   if( nLeft==0 ){
        !          1359:     if( nRight!=0) dataBufferAppend(pOut, pRight, nRight);
        !          1360:     return;
        !          1361:   }
        !          1362:   if( nRight==0 ){
        !          1363:     dataBufferAppend(pOut, pLeft, nLeft);
        !          1364:     return;
        !          1365:   }
        !          1366: 
        !          1367:   dlrInit(&left, DL_DEFAULT, pLeft, nLeft);
        !          1368:   dlrInit(&right, DL_DEFAULT, pRight, nRight);
        !          1369:   dlwInit(&writer, DL_DEFAULT, pOut);
        !          1370: 
        !          1371:   while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
        !          1372:     if( dlrAtEnd(&right) ){
        !          1373:       dlwCopy(&writer, &left);
        !          1374:       dlrStep(&left);
        !          1375:     }else if( dlrAtEnd(&left) ){
        !          1376:       dlwCopy(&writer, &right);
        !          1377:       dlrStep(&right);
        !          1378:     }else if( dlrDocid(&left)<dlrDocid(&right) ){
        !          1379:       dlwCopy(&writer, &left);
        !          1380:       dlrStep(&left);
        !          1381:     }else if( dlrDocid(&left)>dlrDocid(&right) ){
        !          1382:       dlwCopy(&writer, &right);
        !          1383:       dlrStep(&right);
        !          1384:     }else{
        !          1385:       posListUnion(&left, &right, &writer);
        !          1386:       dlrStep(&left);
        !          1387:       dlrStep(&right);
        !          1388:     }
        !          1389:   }
        !          1390: 
        !          1391:   dlrDestroy(&left);
        !          1392:   dlrDestroy(&right);
        !          1393:   dlwDestroy(&writer);
        !          1394: }
        !          1395: 
        !          1396: /* pLeft and pRight are DLReaders positioned to the same docid.
        !          1397: **
        !          1398: ** If there are no instances in pLeft or pRight where the position
        !          1399: ** of pLeft is one less than the position of pRight, then this
        !          1400: ** routine adds nothing to pOut.
        !          1401: **
        !          1402: ** If there are one or more instances where positions from pLeft
        !          1403: ** are exactly one less than positions from pRight, then add a new
        !          1404: ** document record to pOut.  If pOut wants to hold positions, then
        !          1405: ** include the positions from pRight that are one more than a
        !          1406: ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
        !          1407: */
        !          1408: static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight,
        !          1409:                                DLWriter *pOut){
        !          1410:   PLReader left, right;
        !          1411:   PLWriter writer;
        !          1412:   int match = 0;
        !          1413: 
        !          1414:   assert( dlrDocid(pLeft)==dlrDocid(pRight) );
        !          1415:   assert( pOut->iType!=DL_POSITIONS_OFFSETS );
        !          1416: 
        !          1417:   plrInit(&left, pLeft);
        !          1418:   plrInit(&right, pRight);
        !          1419: 
        !          1420:   while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
        !          1421:     if( plrColumn(&left)<plrColumn(&right) ){
        !          1422:       plrStep(&left);
        !          1423:     }else if( plrColumn(&left)>plrColumn(&right) ){
        !          1424:       plrStep(&right);
        !          1425:     }else if( plrPosition(&left)+1<plrPosition(&right) ){
        !          1426:       plrStep(&left);
        !          1427:     }else if( plrPosition(&left)+1>plrPosition(&right) ){
        !          1428:       plrStep(&right);
        !          1429:     }else{
        !          1430:       if( !match ){
        !          1431:         plwInit(&writer, pOut, dlrDocid(pLeft));
        !          1432:         match = 1;
        !          1433:       }
        !          1434:       plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
        !          1435:       plrStep(&left);
        !          1436:       plrStep(&right);
        !          1437:     }
        !          1438:   }
        !          1439: 
        !          1440:   if( match ){
        !          1441:     plwTerminate(&writer);
        !          1442:     plwDestroy(&writer);
        !          1443:   }
        !          1444: 
        !          1445:   plrDestroy(&left);
        !          1446:   plrDestroy(&right);
        !          1447: }
        !          1448: 
        !          1449: /* We have two doclists with positions:  pLeft and pRight.
        !          1450: ** Write the phrase intersection of these two doclists into pOut.
        !          1451: **
        !          1452: ** A phrase intersection means that two documents only match
        !          1453: ** if pLeft.iPos+1==pRight.iPos.
        !          1454: **
        !          1455: ** iType controls the type of data written to pOut.  If iType is
        !          1456: ** DL_POSITIONS, the positions are those from pRight.
        !          1457: */
        !          1458: static void docListPhraseMerge(
        !          1459:   const char *pLeft, int nLeft,
        !          1460:   const char *pRight, int nRight,
        !          1461:   DocListType iType,
        !          1462:   DataBuffer *pOut      /* Write the combined doclist here */
        !          1463: ){
        !          1464:   DLReader left, right;
        !          1465:   DLWriter writer;
        !          1466: 
        !          1467:   if( nLeft==0 || nRight==0 ) return;
        !          1468: 
        !          1469:   assert( iType!=DL_POSITIONS_OFFSETS );
        !          1470: 
        !          1471:   dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
        !          1472:   dlrInit(&right, DL_POSITIONS, pRight, nRight);
        !          1473:   dlwInit(&writer, iType, pOut);
        !          1474: 
        !          1475:   while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
        !          1476:     if( dlrDocid(&left)<dlrDocid(&right) ){
        !          1477:       dlrStep(&left);
        !          1478:     }else if( dlrDocid(&right)<dlrDocid(&left) ){
        !          1479:       dlrStep(&right);
        !          1480:     }else{
        !          1481:       posListPhraseMerge(&left, &right, &writer);
        !          1482:       dlrStep(&left);
        !          1483:       dlrStep(&right);
        !          1484:     }
        !          1485:   }
        !          1486: 
        !          1487:   dlrDestroy(&left);
        !          1488:   dlrDestroy(&right);
        !          1489:   dlwDestroy(&writer);
        !          1490: }
        !          1491: 
        !          1492: /* We have two DL_DOCIDS doclists:  pLeft and pRight.
        !          1493: ** Write the intersection of these two doclists into pOut as a
        !          1494: ** DL_DOCIDS doclist.
        !          1495: */
        !          1496: static void docListAndMerge(
        !          1497:   const char *pLeft, int nLeft,
        !          1498:   const char *pRight, int nRight,
        !          1499:   DataBuffer *pOut      /* Write the combined doclist here */
        !          1500: ){
        !          1501:   DLReader left, right;
        !          1502:   DLWriter writer;
        !          1503: 
        !          1504:   if( nLeft==0 || nRight==0 ) return;
        !          1505: 
        !          1506:   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
        !          1507:   dlrInit(&right, DL_DOCIDS, pRight, nRight);
        !          1508:   dlwInit(&writer, DL_DOCIDS, pOut);
        !          1509: 
        !          1510:   while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
        !          1511:     if( dlrDocid(&left)<dlrDocid(&right) ){
        !          1512:       dlrStep(&left);
        !          1513:     }else if( dlrDocid(&right)<dlrDocid(&left) ){
        !          1514:       dlrStep(&right);
        !          1515:     }else{
        !          1516:       dlwAdd(&writer, dlrDocid(&left));
        !          1517:       dlrStep(&left);
        !          1518:       dlrStep(&right);
        !          1519:     }
        !          1520:   }
        !          1521: 
        !          1522:   dlrDestroy(&left);
        !          1523:   dlrDestroy(&right);
        !          1524:   dlwDestroy(&writer);
        !          1525: }
        !          1526: 
        !          1527: /* We have two DL_DOCIDS doclists:  pLeft and pRight.
        !          1528: ** Write the union of these two doclists into pOut as a
        !          1529: ** DL_DOCIDS doclist.
        !          1530: */
        !          1531: static void docListOrMerge(
        !          1532:   const char *pLeft, int nLeft,
        !          1533:   const char *pRight, int nRight,
        !          1534:   DataBuffer *pOut      /* Write the combined doclist here */
        !          1535: ){
        !          1536:   DLReader left, right;
        !          1537:   DLWriter writer;
        !          1538: 
        !          1539:   if( nLeft==0 ){
        !          1540:     if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight);
        !          1541:     return;
        !          1542:   }
        !          1543:   if( nRight==0 ){
        !          1544:     dataBufferAppend(pOut, pLeft, nLeft);
        !          1545:     return;
        !          1546:   }
        !          1547: 
        !          1548:   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
        !          1549:   dlrInit(&right, DL_DOCIDS, pRight, nRight);
        !          1550:   dlwInit(&writer, DL_DOCIDS, pOut);
        !          1551: 
        !          1552:   while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
        !          1553:     if( dlrAtEnd(&right) ){
        !          1554:       dlwAdd(&writer, dlrDocid(&left));
        !          1555:       dlrStep(&left);
        !          1556:     }else if( dlrAtEnd(&left) ){
        !          1557:       dlwAdd(&writer, dlrDocid(&right));
        !          1558:       dlrStep(&right);
        !          1559:     }else if( dlrDocid(&left)<dlrDocid(&right) ){
        !          1560:       dlwAdd(&writer, dlrDocid(&left));
        !          1561:       dlrStep(&left);
        !          1562:     }else if( dlrDocid(&right)<dlrDocid(&left) ){
        !          1563:       dlwAdd(&writer, dlrDocid(&right));
        !          1564:       dlrStep(&right);
        !          1565:     }else{
        !          1566:       dlwAdd(&writer, dlrDocid(&left));
        !          1567:       dlrStep(&left);
        !          1568:       dlrStep(&right);
        !          1569:     }
        !          1570:   }
        !          1571: 
        !          1572:   dlrDestroy(&left);
        !          1573:   dlrDestroy(&right);
        !          1574:   dlwDestroy(&writer);
        !          1575: }
        !          1576: 
        !          1577: /* We have two DL_DOCIDS doclists:  pLeft and pRight.
        !          1578: ** Write into pOut as DL_DOCIDS doclist containing all documents that
        !          1579: ** occur in pLeft but not in pRight.
        !          1580: */
        !          1581: static void docListExceptMerge(
        !          1582:   const char *pLeft, int nLeft,
        !          1583:   const char *pRight, int nRight,
        !          1584:   DataBuffer *pOut      /* Write the combined doclist here */
        !          1585: ){
        !          1586:   DLReader left, right;
        !          1587:   DLWriter writer;
        !          1588: 
        !          1589:   if( nLeft==0 ) return;
        !          1590:   if( nRight==0 ){
        !          1591:     dataBufferAppend(pOut, pLeft, nLeft);
        !          1592:     return;
        !          1593:   }
        !          1594: 
        !          1595:   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
        !          1596:   dlrInit(&right, DL_DOCIDS, pRight, nRight);
        !          1597:   dlwInit(&writer, DL_DOCIDS, pOut);
        !          1598: 
        !          1599:   while( !dlrAtEnd(&left) ){
        !          1600:     while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
        !          1601:       dlrStep(&right);
        !          1602:     }
        !          1603:     if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
        !          1604:       dlwAdd(&writer, dlrDocid(&left));
        !          1605:     }
        !          1606:     dlrStep(&left);
        !          1607:   }
        !          1608: 
        !          1609:   dlrDestroy(&left);
        !          1610:   dlrDestroy(&right);
        !          1611:   dlwDestroy(&writer);
        !          1612: }
        !          1613: 
        !          1614: static char *string_dup_n(const char *s, int n){
        !          1615:   char *str = sqlite3_malloc(n + 1);
        !          1616:   memcpy(str, s, n);
        !          1617:   str[n] = '\0';
        !          1618:   return str;
        !          1619: }
        !          1620: 
        !          1621: /* Duplicate a string; the caller must free() the returned string.
        !          1622:  * (We don't use strdup() since it is not part of the standard C library and
        !          1623:  * may not be available everywhere.) */
        !          1624: static char *string_dup(const char *s){
        !          1625:   return string_dup_n(s, strlen(s));
        !          1626: }
        !          1627: 
        !          1628: /* Format a string, replacing each occurrence of the % character with
        !          1629:  * zDb.zName.  This may be more convenient than sqlite_mprintf()
        !          1630:  * when one string is used repeatedly in a format string.
        !          1631:  * The caller must free() the returned string. */
        !          1632: static char *string_format(const char *zFormat,
        !          1633:                            const char *zDb, const char *zName){
        !          1634:   const char *p;
        !          1635:   size_t len = 0;
        !          1636:   size_t nDb = strlen(zDb);
        !          1637:   size_t nName = strlen(zName);
        !          1638:   size_t nFullTableName = nDb+1+nName;
        !          1639:   char *result;
        !          1640:   char *r;
        !          1641: 
        !          1642:   /* first compute length needed */
        !          1643:   for(p = zFormat ; *p ; ++p){
        !          1644:     len += (*p=='%' ? nFullTableName : 1);
        !          1645:   }
        !          1646:   len += 1;  /* for null terminator */
        !          1647: 
        !          1648:   r = result = sqlite3_malloc(len);
        !          1649:   for(p = zFormat; *p; ++p){
        !          1650:     if( *p=='%' ){
        !          1651:       memcpy(r, zDb, nDb);
        !          1652:       r += nDb;
        !          1653:       *r++ = '.';
        !          1654:       memcpy(r, zName, nName);
        !          1655:       r += nName;
        !          1656:     } else {
        !          1657:       *r++ = *p;
        !          1658:     }
        !          1659:   }
        !          1660:   *r++ = '\0';
        !          1661:   assert( r == result + len );
        !          1662:   return result;
        !          1663: }
        !          1664: 
        !          1665: static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
        !          1666:                     const char *zFormat){
        !          1667:   char *zCommand = string_format(zFormat, zDb, zName);
        !          1668:   int rc;
        !          1669:   TRACE(("FTS2 sql: %s\n", zCommand));
        !          1670:   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
        !          1671:   sqlite3_free(zCommand);
        !          1672:   return rc;
        !          1673: }
        !          1674: 
        !          1675: static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
        !          1676:                        sqlite3_stmt **ppStmt, const char *zFormat){
        !          1677:   char *zCommand = string_format(zFormat, zDb, zName);
        !          1678:   int rc;
        !          1679:   TRACE(("FTS2 prepare: %s\n", zCommand));
        !          1680:   rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL);
        !          1681:   sqlite3_free(zCommand);
        !          1682:   return rc;
        !          1683: }
        !          1684: 
        !          1685: /* end utility functions */
        !          1686: 
        !          1687: /* Forward reference */
        !          1688: typedef struct fulltext_vtab fulltext_vtab;
        !          1689: 
        !          1690: /* A single term in a query is represented by an instances of
        !          1691: ** the following structure.
        !          1692: */
        !          1693: typedef struct QueryTerm {
        !          1694:   short int nPhrase; /* How many following terms are part of the same phrase */
        !          1695:   short int iPhrase; /* This is the i-th term of a phrase. */
        !          1696:   short int iColumn; /* Column of the index that must match this term */
        !          1697:   signed char isOr;  /* this term is preceded by "OR" */
        !          1698:   signed char isNot; /* this term is preceded by "-" */
        !          1699:   signed char isPrefix; /* this term is followed by "*" */
        !          1700:   char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
        !          1701:   int nTerm;         /* Number of bytes in pTerm[] */
        !          1702: } QueryTerm;
        !          1703: 
        !          1704: 
        !          1705: /* A query string is parsed into a Query structure.
        !          1706:  *
        !          1707:  * We could, in theory, allow query strings to be complicated
        !          1708:  * nested expressions with precedence determined by parentheses.
        !          1709:  * But none of the major search engines do this.  (Perhaps the
        !          1710:  * feeling is that an parenthesized expression is two complex of
        !          1711:  * an idea for the average user to grasp.)  Taking our lead from
        !          1712:  * the major search engines, we will allow queries to be a list
        !          1713:  * of terms (with an implied AND operator) or phrases in double-quotes,
        !          1714:  * with a single optional "-" before each non-phrase term to designate
        !          1715:  * negation and an optional OR connector.
        !          1716:  *
        !          1717:  * OR binds more tightly than the implied AND, which is what the
        !          1718:  * major search engines seem to do.  So, for example:
        !          1719:  * 
        !          1720:  *    [one two OR three]     ==>    one AND (two OR three)
        !          1721:  *    [one OR two three]     ==>    (one OR two) AND three
        !          1722:  *
        !          1723:  * A "-" before a term matches all entries that lack that term.
        !          1724:  * The "-" must occur immediately before the term with in intervening
        !          1725:  * space.  This is how the search engines do it.
        !          1726:  *
        !          1727:  * A NOT term cannot be the right-hand operand of an OR.  If this
        !          1728:  * occurs in the query string, the NOT is ignored:
        !          1729:  *
        !          1730:  *    [one OR -two]          ==>    one OR two
        !          1731:  *
        !          1732:  */
        !          1733: typedef struct Query {
        !          1734:   fulltext_vtab *pFts;  /* The full text index */
        !          1735:   int nTerms;           /* Number of terms in the query */
        !          1736:   QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
        !          1737:   int nextIsOr;         /* Set the isOr flag on the next inserted term */
        !          1738:   int nextColumn;       /* Next word parsed must be in this column */
        !          1739:   int dfltColumn;       /* The default column */
        !          1740: } Query;
        !          1741: 
        !          1742: 
        !          1743: /*
        !          1744: ** An instance of the following structure keeps track of generated
        !          1745: ** matching-word offset information and snippets.
        !          1746: */
        !          1747: typedef struct Snippet {
        !          1748:   int nMatch;     /* Total number of matches */
        !          1749:   int nAlloc;     /* Space allocated for aMatch[] */
        !          1750:   struct snippetMatch { /* One entry for each matching term */
        !          1751:     char snStatus;       /* Status flag for use while constructing snippets */
        !          1752:     short int iCol;      /* The column that contains the match */
        !          1753:     short int iTerm;     /* The index in Query.pTerms[] of the matching term */
        !          1754:     short int nByte;     /* Number of bytes in the term */
        !          1755:     int iStart;          /* The offset to the first character of the term */
        !          1756:   } *aMatch;      /* Points to space obtained from malloc */
        !          1757:   char *zOffset;  /* Text rendering of aMatch[] */
        !          1758:   int nOffset;    /* strlen(zOffset) */
        !          1759:   char *zSnippet; /* Snippet text */
        !          1760:   int nSnippet;   /* strlen(zSnippet) */
        !          1761: } Snippet;
        !          1762: 
        !          1763: 
        !          1764: typedef enum QueryType {
        !          1765:   QUERY_GENERIC,   /* table scan */
        !          1766:   QUERY_ROWID,     /* lookup by rowid */
        !          1767:   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
        !          1768: } QueryType;
        !          1769: 
        !          1770: typedef enum fulltext_statement {
        !          1771:   CONTENT_INSERT_STMT,
        !          1772:   CONTENT_SELECT_STMT,
        !          1773:   CONTENT_UPDATE_STMT,
        !          1774:   CONTENT_DELETE_STMT,
        !          1775:   CONTENT_EXISTS_STMT,
        !          1776: 
        !          1777:   BLOCK_INSERT_STMT,
        !          1778:   BLOCK_SELECT_STMT,
        !          1779:   BLOCK_DELETE_STMT,
        !          1780:   BLOCK_DELETE_ALL_STMT,
        !          1781: 
        !          1782:   SEGDIR_MAX_INDEX_STMT,
        !          1783:   SEGDIR_SET_STMT,
        !          1784:   SEGDIR_SELECT_LEVEL_STMT,
        !          1785:   SEGDIR_SPAN_STMT,
        !          1786:   SEGDIR_DELETE_STMT,
        !          1787:   SEGDIR_SELECT_SEGMENT_STMT,
        !          1788:   SEGDIR_SELECT_ALL_STMT,
        !          1789:   SEGDIR_DELETE_ALL_STMT,
        !          1790:   SEGDIR_COUNT_STMT,
        !          1791: 
        !          1792:   MAX_STMT                     /* Always at end! */
        !          1793: } fulltext_statement;
        !          1794: 
        !          1795: /* These must exactly match the enum above. */
        !          1796: /* TODO(shess): Is there some risk that a statement will be used in two
        !          1797: ** cursors at once, e.g.  if a query joins a virtual table to itself?
        !          1798: ** If so perhaps we should move some of these to the cursor object.
        !          1799: */
        !          1800: static const char *const fulltext_zStatement[MAX_STMT] = {
        !          1801:   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
        !          1802:   /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
        !          1803:   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
        !          1804:   /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
        !          1805:   /* CONTENT_EXISTS */ "select rowid from %_content limit 1",
        !          1806: 
        !          1807:   /* BLOCK_INSERT */ "insert into %_segments values (?)",
        !          1808:   /* BLOCK_SELECT */ "select block from %_segments where rowid = ?",
        !          1809:   /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?",
        !          1810:   /* BLOCK_DELETE_ALL */ "delete from %_segments",
        !          1811: 
        !          1812:   /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
        !          1813:   /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
        !          1814:   /* SEGDIR_SELECT_LEVEL */
        !          1815:   "select start_block, leaves_end_block, root from %_segdir "
        !          1816:   " where level = ? order by idx",
        !          1817:   /* SEGDIR_SPAN */
        !          1818:   "select min(start_block), max(end_block) from %_segdir "
        !          1819:   " where level = ? and start_block <> 0",
        !          1820:   /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
        !          1821: 
        !          1822:   /* NOTE(shess): The first three results of the following two
        !          1823:   ** statements must match.
        !          1824:   */
        !          1825:   /* SEGDIR_SELECT_SEGMENT */
        !          1826:   "select start_block, leaves_end_block, root from %_segdir "
        !          1827:   " where level = ? and idx = ?",
        !          1828:   /* SEGDIR_SELECT_ALL */
        !          1829:   "select start_block, leaves_end_block, root from %_segdir "
        !          1830:   " order by level desc, idx asc",
        !          1831:   /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
        !          1832:   /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir",
        !          1833: };
        !          1834: 
        !          1835: /*
        !          1836: ** A connection to a fulltext index is an instance of the following
        !          1837: ** structure.  The xCreate and xConnect methods create an instance
        !          1838: ** of this structure and xDestroy and xDisconnect free that instance.
        !          1839: ** All other methods receive a pointer to the structure as one of their
        !          1840: ** arguments.
        !          1841: */
        !          1842: struct fulltext_vtab {
        !          1843:   sqlite3_vtab base;               /* Base class used by SQLite core */
        !          1844:   sqlite3 *db;                     /* The database connection */
        !          1845:   const char *zDb;                 /* logical database name */
        !          1846:   const char *zName;               /* virtual table name */
        !          1847:   int nColumn;                     /* number of columns in virtual table */
        !          1848:   char **azColumn;                 /* column names.  malloced */
        !          1849:   char **azContentColumn;          /* column names in content table; malloced */
        !          1850:   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
        !          1851: 
        !          1852:   /* Precompiled statements which we keep as long as the table is
        !          1853:   ** open.
        !          1854:   */
        !          1855:   sqlite3_stmt *pFulltextStatements[MAX_STMT];
        !          1856: 
        !          1857:   /* Precompiled statements used for segment merges.  We run a
        !          1858:   ** separate select across the leaf level of each tree being merged.
        !          1859:   */
        !          1860:   sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT];
        !          1861:   /* The statement used to prepare pLeafSelectStmts. */
        !          1862: #define LEAF_SELECT \
        !          1863:   "select block from %_segments where rowid between ? and ? order by rowid"
        !          1864: 
        !          1865:   /* These buffer pending index updates during transactions.
        !          1866:   ** nPendingData estimates the memory size of the pending data.  It
        !          1867:   ** doesn't include the hash-bucket overhead, nor any malloc
        !          1868:   ** overhead.  When nPendingData exceeds kPendingThreshold, the
        !          1869:   ** buffer is flushed even before the transaction closes.
        !          1870:   ** pendingTerms stores the data, and is only valid when nPendingData
        !          1871:   ** is >=0 (nPendingData<0 means pendingTerms has not been
        !          1872:   ** initialized).  iPrevDocid is the last docid written, used to make
        !          1873:   ** certain we're inserting in sorted order.
        !          1874:   */
        !          1875:   int nPendingData;
        !          1876: #define kPendingThreshold (1*1024*1024)
        !          1877:   sqlite_int64 iPrevDocid;
        !          1878:   fts2Hash pendingTerms;
        !          1879: };
        !          1880: 
        !          1881: /*
        !          1882: ** When the core wants to do a query, it create a cursor using a
        !          1883: ** call to xOpen.  This structure is an instance of a cursor.  It
        !          1884: ** is destroyed by xClose.
        !          1885: */
        !          1886: typedef struct fulltext_cursor {
        !          1887:   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
        !          1888:   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
        !          1889:   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
        !          1890:   int eof;                         /* True if at End Of Results */
        !          1891:   Query q;                         /* Parsed query string */
        !          1892:   Snippet snippet;                 /* Cached snippet for the current row */
        !          1893:   int iColumn;                     /* Column being searched */
        !          1894:   DataBuffer result;               /* Doclist results from fulltextQuery */
        !          1895:   DLReader reader;                 /* Result reader if result not empty */
        !          1896: } fulltext_cursor;
        !          1897: 
        !          1898: static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
        !          1899:   return (fulltext_vtab *) c->base.pVtab;
        !          1900: }
        !          1901: 
        !          1902: static const sqlite3_module fts2Module;   /* forward declaration */
        !          1903: 
        !          1904: /* Return a dynamically generated statement of the form
        !          1905:  *   insert into %_content (rowid, ...) values (?, ...)
        !          1906:  */
        !          1907: static const char *contentInsertStatement(fulltext_vtab *v){
        !          1908:   StringBuffer sb;
        !          1909:   int i;
        !          1910: 
        !          1911:   initStringBuffer(&sb);
        !          1912:   append(&sb, "insert into %_content (rowid, ");
        !          1913:   appendList(&sb, v->nColumn, v->azContentColumn);
        !          1914:   append(&sb, ") values (?");
        !          1915:   for(i=0; i<v->nColumn; ++i)
        !          1916:     append(&sb, ", ?");
        !          1917:   append(&sb, ")");
        !          1918:   return stringBufferData(&sb);
        !          1919: }
        !          1920: 
        !          1921: /* Return a dynamically generated statement of the form
        !          1922:  *   update %_content set [col_0] = ?, [col_1] = ?, ...
        !          1923:  *                    where rowid = ?
        !          1924:  */
        !          1925: static const char *contentUpdateStatement(fulltext_vtab *v){
        !          1926:   StringBuffer sb;
        !          1927:   int i;
        !          1928: 
        !          1929:   initStringBuffer(&sb);
        !          1930:   append(&sb, "update %_content set ");
        !          1931:   for(i=0; i<v->nColumn; ++i) {
        !          1932:     if( i>0 ){
        !          1933:       append(&sb, ", ");
        !          1934:     }
        !          1935:     append(&sb, v->azContentColumn[i]);
        !          1936:     append(&sb, " = ?");
        !          1937:   }
        !          1938:   append(&sb, " where rowid = ?");
        !          1939:   return stringBufferData(&sb);
        !          1940: }
        !          1941: 
        !          1942: /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
        !          1943: ** If the indicated statement has never been prepared, it is prepared
        !          1944: ** and cached, otherwise the cached version is reset.
        !          1945: */
        !          1946: static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
        !          1947:                              sqlite3_stmt **ppStmt){
        !          1948:   assert( iStmt<MAX_STMT );
        !          1949:   if( v->pFulltextStatements[iStmt]==NULL ){
        !          1950:     const char *zStmt;
        !          1951:     int rc;
        !          1952:     switch( iStmt ){
        !          1953:       case CONTENT_INSERT_STMT:
        !          1954:         zStmt = contentInsertStatement(v); break;
        !          1955:       case CONTENT_UPDATE_STMT:
        !          1956:         zStmt = contentUpdateStatement(v); break;
        !          1957:       default:
        !          1958:         zStmt = fulltext_zStatement[iStmt];
        !          1959:     }
        !          1960:     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
        !          1961:                          zStmt);
        !          1962:     if( zStmt != fulltext_zStatement[iStmt]) sqlite3_free((void *) zStmt);
        !          1963:     if( rc!=SQLITE_OK ) return rc;
        !          1964:   } else {
        !          1965:     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
        !          1966:     if( rc!=SQLITE_OK ) return rc;
        !          1967:   }
        !          1968: 
        !          1969:   *ppStmt = v->pFulltextStatements[iStmt];
        !          1970:   return SQLITE_OK;
        !          1971: }
        !          1972: 
        !          1973: /* Like sqlite3_step(), but convert SQLITE_DONE to SQLITE_OK and
        !          1974: ** SQLITE_ROW to SQLITE_ERROR.  Useful for statements like UPDATE,
        !          1975: ** where we expect no results.
        !          1976: */
        !          1977: static int sql_single_step(sqlite3_stmt *s){
        !          1978:   int rc = sqlite3_step(s);
        !          1979:   return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
        !          1980: }
        !          1981: 
        !          1982: /* Like sql_get_statement(), but for special replicated LEAF_SELECT
        !          1983: ** statements.  idx -1 is a special case for an uncached version of
        !          1984: ** the statement (used in the optimize implementation).
        !          1985: */
        !          1986: /* TODO(shess) Write version for generic statements and then share
        !          1987: ** that between the cached-statement functions.
        !          1988: */
        !          1989: static int sql_get_leaf_statement(fulltext_vtab *v, int idx,
        !          1990:                                   sqlite3_stmt **ppStmt){
        !          1991:   assert( idx>=-1 && idx<MERGE_COUNT );
        !          1992:   if( idx==-1 ){
        !          1993:     return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT);
        !          1994:   }else if( v->pLeafSelectStmts[idx]==NULL ){
        !          1995:     int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx],
        !          1996:                          LEAF_SELECT);
        !          1997:     if( rc!=SQLITE_OK ) return rc;
        !          1998:   }else{
        !          1999:     int rc = sqlite3_reset(v->pLeafSelectStmts[idx]);
        !          2000:     if( rc!=SQLITE_OK ) return rc;
        !          2001:   }
        !          2002: 
        !          2003:   *ppStmt = v->pLeafSelectStmts[idx];
        !          2004:   return SQLITE_OK;
        !          2005: }
        !          2006: 
        !          2007: /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
        !          2008: static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
        !          2009:                           sqlite3_value **pValues){
        !          2010:   sqlite3_stmt *s;
        !          2011:   int i;
        !          2012:   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
        !          2013:   if( rc!=SQLITE_OK ) return rc;
        !          2014: 
        !          2015:   rc = sqlite3_bind_value(s, 1, rowid);
        !          2016:   if( rc!=SQLITE_OK ) return rc;
        !          2017: 
        !          2018:   for(i=0; i<v->nColumn; ++i){
        !          2019:     rc = sqlite3_bind_value(s, 2+i, pValues[i]);
        !          2020:     if( rc!=SQLITE_OK ) return rc;
        !          2021:   }
        !          2022: 
        !          2023:   return sql_single_step(s);
        !          2024: }
        !          2025: 
        !          2026: /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
        !          2027:  *                  where rowid = [iRowid] */
        !          2028: static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
        !          2029:                           sqlite_int64 iRowid){
        !          2030:   sqlite3_stmt *s;
        !          2031:   int i;
        !          2032:   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
        !          2033:   if( rc!=SQLITE_OK ) return rc;
        !          2034: 
        !          2035:   for(i=0; i<v->nColumn; ++i){
        !          2036:     rc = sqlite3_bind_value(s, 1+i, pValues[i]);
        !          2037:     if( rc!=SQLITE_OK ) return rc;
        !          2038:   }
        !          2039: 
        !          2040:   rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
        !          2041:   if( rc!=SQLITE_OK ) return rc;
        !          2042: 
        !          2043:   return sql_single_step(s);
        !          2044: }
        !          2045: 
        !          2046: static void freeStringArray(int nString, const char **pString){
        !          2047:   int i;
        !          2048: 
        !          2049:   for (i=0 ; i < nString ; ++i) {
        !          2050:     if( pString[i]!=NULL ) sqlite3_free((void *) pString[i]);
        !          2051:   }
        !          2052:   sqlite3_free((void *) pString);
        !          2053: }
        !          2054: 
        !          2055: /* select * from %_content where rowid = [iRow]
        !          2056:  * The caller must delete the returned array and all strings in it.
        !          2057:  * null fields will be NULL in the returned array.
        !          2058:  *
        !          2059:  * TODO: Perhaps we should return pointer/length strings here for consistency
        !          2060:  * with other code which uses pointer/length. */
        !          2061: static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
        !          2062:                           const char ***pValues){
        !          2063:   sqlite3_stmt *s;
        !          2064:   const char **values;
        !          2065:   int i;
        !          2066:   int rc;
        !          2067: 
        !          2068:   *pValues = NULL;
        !          2069: 
        !          2070:   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
        !          2071:   if( rc!=SQLITE_OK ) return rc;
        !          2072: 
        !          2073:   rc = sqlite3_bind_int64(s, 1, iRow);
        !          2074:   if( rc!=SQLITE_OK ) return rc;
        !          2075: 
        !          2076:   rc = sqlite3_step(s);
        !          2077:   if( rc!=SQLITE_ROW ) return rc;
        !          2078: 
        !          2079:   values = (const char **) sqlite3_malloc(v->nColumn * sizeof(const char *));
        !          2080:   for(i=0; i<v->nColumn; ++i){
        !          2081:     if( sqlite3_column_type(s, i)==SQLITE_NULL ){
        !          2082:       values[i] = NULL;
        !          2083:     }else{
        !          2084:       values[i] = string_dup((char*)sqlite3_column_text(s, i));
        !          2085:     }
        !          2086:   }
        !          2087: 
        !          2088:   /* We expect only one row.  We must execute another sqlite3_step()
        !          2089:    * to complete the iteration; otherwise the table will remain locked. */
        !          2090:   rc = sqlite3_step(s);
        !          2091:   if( rc==SQLITE_DONE ){
        !          2092:     *pValues = values;
        !          2093:     return SQLITE_OK;
        !          2094:   }
        !          2095: 
        !          2096:   freeStringArray(v->nColumn, values);
        !          2097:   return rc;
        !          2098: }
        !          2099: 
        !          2100: /* delete from %_content where rowid = [iRow ] */
        !          2101: static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
        !          2102:   sqlite3_stmt *s;
        !          2103:   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
        !          2104:   if( rc!=SQLITE_OK ) return rc;
        !          2105: 
        !          2106:   rc = sqlite3_bind_int64(s, 1, iRow);
        !          2107:   if( rc!=SQLITE_OK ) return rc;
        !          2108: 
        !          2109:   return sql_single_step(s);
        !          2110: }
        !          2111: 
        !          2112: /* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if
        !          2113: ** no rows exist, and any error in case of failure.
        !          2114: */
        !          2115: static int content_exists(fulltext_vtab *v){
        !          2116:   sqlite3_stmt *s;
        !          2117:   int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s);
        !          2118:   if( rc!=SQLITE_OK ) return rc;
        !          2119: 
        !          2120:   rc = sqlite3_step(s);
        !          2121:   if( rc!=SQLITE_ROW ) return rc;
        !          2122: 
        !          2123:   /* We expect only one row.  We must execute another sqlite3_step()
        !          2124:    * to complete the iteration; otherwise the table will remain locked. */
        !          2125:   rc = sqlite3_step(s);
        !          2126:   if( rc==SQLITE_DONE ) return SQLITE_ROW;
        !          2127:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2128:   return rc;
        !          2129: }
        !          2130: 
        !          2131: /* insert into %_segments values ([pData])
        !          2132: **   returns assigned rowid in *piBlockid
        !          2133: */
        !          2134: static int block_insert(fulltext_vtab *v, const char *pData, int nData,
        !          2135:                         sqlite_int64 *piBlockid){
        !          2136:   sqlite3_stmt *s;
        !          2137:   int rc = sql_get_statement(v, BLOCK_INSERT_STMT, &s);
        !          2138:   if( rc!=SQLITE_OK ) return rc;
        !          2139: 
        !          2140:   rc = sqlite3_bind_blob(s, 1, pData, nData, SQLITE_STATIC);
        !          2141:   if( rc!=SQLITE_OK ) return rc;
        !          2142: 
        !          2143:   rc = sqlite3_step(s);
        !          2144:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2145:   if( rc!=SQLITE_DONE ) return rc;
        !          2146: 
        !          2147:   *piBlockid = sqlite3_last_insert_rowid(v->db);
        !          2148:   return SQLITE_OK;
        !          2149: }
        !          2150: 
        !          2151: /* delete from %_segments
        !          2152: **   where rowid between [iStartBlockid] and [iEndBlockid]
        !          2153: **
        !          2154: ** Deletes the range of blocks, inclusive, used to delete the blocks
        !          2155: ** which form a segment.
        !          2156: */
        !          2157: static int block_delete(fulltext_vtab *v,
        !          2158:                         sqlite_int64 iStartBlockid, sqlite_int64 iEndBlockid){
        !          2159:   sqlite3_stmt *s;
        !          2160:   int rc = sql_get_statement(v, BLOCK_DELETE_STMT, &s);
        !          2161:   if( rc!=SQLITE_OK ) return rc;
        !          2162: 
        !          2163:   rc = sqlite3_bind_int64(s, 1, iStartBlockid);
        !          2164:   if( rc!=SQLITE_OK ) return rc;
        !          2165: 
        !          2166:   rc = sqlite3_bind_int64(s, 2, iEndBlockid);
        !          2167:   if( rc!=SQLITE_OK ) return rc;
        !          2168: 
        !          2169:   return sql_single_step(s);
        !          2170: }
        !          2171: 
        !          2172: /* Returns SQLITE_ROW with *pidx set to the maximum segment idx found
        !          2173: ** at iLevel.  Returns SQLITE_DONE if there are no segments at
        !          2174: ** iLevel.  Otherwise returns an error.
        !          2175: */
        !          2176: static int segdir_max_index(fulltext_vtab *v, int iLevel, int *pidx){
        !          2177:   sqlite3_stmt *s;
        !          2178:   int rc = sql_get_statement(v, SEGDIR_MAX_INDEX_STMT, &s);
        !          2179:   if( rc!=SQLITE_OK ) return rc;
        !          2180: 
        !          2181:   rc = sqlite3_bind_int(s, 1, iLevel);
        !          2182:   if( rc!=SQLITE_OK ) return rc;
        !          2183: 
        !          2184:   rc = sqlite3_step(s);
        !          2185:   /* Should always get at least one row due to how max() works. */
        !          2186:   if( rc==SQLITE_DONE ) return SQLITE_DONE;
        !          2187:   if( rc!=SQLITE_ROW ) return rc;
        !          2188: 
        !          2189:   /* NULL means that there were no inputs to max(). */
        !          2190:   if( SQLITE_NULL==sqlite3_column_type(s, 0) ){
        !          2191:     rc = sqlite3_step(s);
        !          2192:     if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2193:     return rc;
        !          2194:   }
        !          2195: 
        !          2196:   *pidx = sqlite3_column_int(s, 0);
        !          2197: 
        !          2198:   /* We expect only one row.  We must execute another sqlite3_step()
        !          2199:    * to complete the iteration; otherwise the table will remain locked. */
        !          2200:   rc = sqlite3_step(s);
        !          2201:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2202:   if( rc!=SQLITE_DONE ) return rc;
        !          2203:   return SQLITE_ROW;
        !          2204: }
        !          2205: 
        !          2206: /* insert into %_segdir values (
        !          2207: **   [iLevel], [idx],
        !          2208: **   [iStartBlockid], [iLeavesEndBlockid], [iEndBlockid],
        !          2209: **   [pRootData]
        !          2210: ** )
        !          2211: */
        !          2212: static int segdir_set(fulltext_vtab *v, int iLevel, int idx,
        !          2213:                       sqlite_int64 iStartBlockid,
        !          2214:                       sqlite_int64 iLeavesEndBlockid,
        !          2215:                       sqlite_int64 iEndBlockid,
        !          2216:                       const char *pRootData, int nRootData){
        !          2217:   sqlite3_stmt *s;
        !          2218:   int rc = sql_get_statement(v, SEGDIR_SET_STMT, &s);
        !          2219:   if( rc!=SQLITE_OK ) return rc;
        !          2220: 
        !          2221:   rc = sqlite3_bind_int(s, 1, iLevel);
        !          2222:   if( rc!=SQLITE_OK ) return rc;
        !          2223: 
        !          2224:   rc = sqlite3_bind_int(s, 2, idx);
        !          2225:   if( rc!=SQLITE_OK ) return rc;
        !          2226: 
        !          2227:   rc = sqlite3_bind_int64(s, 3, iStartBlockid);
        !          2228:   if( rc!=SQLITE_OK ) return rc;
        !          2229: 
        !          2230:   rc = sqlite3_bind_int64(s, 4, iLeavesEndBlockid);
        !          2231:   if( rc!=SQLITE_OK ) return rc;
        !          2232: 
        !          2233:   rc = sqlite3_bind_int64(s, 5, iEndBlockid);
        !          2234:   if( rc!=SQLITE_OK ) return rc;
        !          2235: 
        !          2236:   rc = sqlite3_bind_blob(s, 6, pRootData, nRootData, SQLITE_STATIC);
        !          2237:   if( rc!=SQLITE_OK ) return rc;
        !          2238: 
        !          2239:   return sql_single_step(s);
        !          2240: }
        !          2241: 
        !          2242: /* Queries %_segdir for the block span of the segments in level
        !          2243: ** iLevel.  Returns SQLITE_DONE if there are no blocks for iLevel,
        !          2244: ** SQLITE_ROW if there are blocks, else an error.
        !          2245: */
        !          2246: static int segdir_span(fulltext_vtab *v, int iLevel,
        !          2247:                        sqlite_int64 *piStartBlockid,
        !          2248:                        sqlite_int64 *piEndBlockid){
        !          2249:   sqlite3_stmt *s;
        !          2250:   int rc = sql_get_statement(v, SEGDIR_SPAN_STMT, &s);
        !          2251:   if( rc!=SQLITE_OK ) return rc;
        !          2252: 
        !          2253:   rc = sqlite3_bind_int(s, 1, iLevel);
        !          2254:   if( rc!=SQLITE_OK ) return rc;
        !          2255: 
        !          2256:   rc = sqlite3_step(s);
        !          2257:   if( rc==SQLITE_DONE ) return SQLITE_DONE;  /* Should never happen */
        !          2258:   if( rc!=SQLITE_ROW ) return rc;
        !          2259: 
        !          2260:   /* This happens if all segments at this level are entirely inline. */
        !          2261:   if( SQLITE_NULL==sqlite3_column_type(s, 0) ){
        !          2262:     /* We expect only one row.  We must execute another sqlite3_step()
        !          2263:      * to complete the iteration; otherwise the table will remain locked. */
        !          2264:     int rc2 = sqlite3_step(s);
        !          2265:     if( rc2==SQLITE_ROW ) return SQLITE_ERROR;
        !          2266:     return rc2;
        !          2267:   }
        !          2268: 
        !          2269:   *piStartBlockid = sqlite3_column_int64(s, 0);
        !          2270:   *piEndBlockid = sqlite3_column_int64(s, 1);
        !          2271: 
        !          2272:   /* We expect only one row.  We must execute another sqlite3_step()
        !          2273:    * to complete the iteration; otherwise the table will remain locked. */
        !          2274:   rc = sqlite3_step(s);
        !          2275:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2276:   if( rc!=SQLITE_DONE ) return rc;
        !          2277:   return SQLITE_ROW;
        !          2278: }
        !          2279: 
        !          2280: /* Delete the segment blocks and segment directory records for all
        !          2281: ** segments at iLevel.
        !          2282: */
        !          2283: static int segdir_delete(fulltext_vtab *v, int iLevel){
        !          2284:   sqlite3_stmt *s;
        !          2285:   sqlite_int64 iStartBlockid, iEndBlockid;
        !          2286:   int rc = segdir_span(v, iLevel, &iStartBlockid, &iEndBlockid);
        !          2287:   if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc;
        !          2288: 
        !          2289:   if( rc==SQLITE_ROW ){
        !          2290:     rc = block_delete(v, iStartBlockid, iEndBlockid);
        !          2291:     if( rc!=SQLITE_OK ) return rc;
        !          2292:   }
        !          2293: 
        !          2294:   /* Delete the segment directory itself. */
        !          2295:   rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s);
        !          2296:   if( rc!=SQLITE_OK ) return rc;
        !          2297: 
        !          2298:   rc = sqlite3_bind_int64(s, 1, iLevel);
        !          2299:   if( rc!=SQLITE_OK ) return rc;
        !          2300: 
        !          2301:   return sql_single_step(s);
        !          2302: }
        !          2303: 
        !          2304: /* Delete entire fts index, SQLITE_OK on success, relevant error on
        !          2305: ** failure.
        !          2306: */
        !          2307: static int segdir_delete_all(fulltext_vtab *v){
        !          2308:   sqlite3_stmt *s;
        !          2309:   int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s);
        !          2310:   if( rc!=SQLITE_OK ) return rc;
        !          2311: 
        !          2312:   rc = sql_single_step(s);
        !          2313:   if( rc!=SQLITE_OK ) return rc;
        !          2314: 
        !          2315:   rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
        !          2316:   if( rc!=SQLITE_OK ) return rc;
        !          2317: 
        !          2318:   return sql_single_step(s);
        !          2319: }
        !          2320: 
        !          2321: /* Returns SQLITE_OK with *pnSegments set to the number of entries in
        !          2322: ** %_segdir and *piMaxLevel set to the highest level which has a
        !          2323: ** segment.  Otherwise returns the SQLite error which caused failure.
        !          2324: */
        !          2325: static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){
        !          2326:   sqlite3_stmt *s;
        !          2327:   int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s);
        !          2328:   if( rc!=SQLITE_OK ) return rc;
        !          2329: 
        !          2330:   rc = sqlite3_step(s);
        !          2331:   /* TODO(shess): This case should not be possible?  Should stronger
        !          2332:   ** measures be taken if it happens?
        !          2333:   */
        !          2334:   if( rc==SQLITE_DONE ){
        !          2335:     *pnSegments = 0;
        !          2336:     *piMaxLevel = 0;
        !          2337:     return SQLITE_OK;
        !          2338:   }
        !          2339:   if( rc!=SQLITE_ROW ) return rc;
        !          2340: 
        !          2341:   *pnSegments = sqlite3_column_int(s, 0);
        !          2342:   *piMaxLevel = sqlite3_column_int(s, 1);
        !          2343: 
        !          2344:   /* We expect only one row.  We must execute another sqlite3_step()
        !          2345:    * to complete the iteration; otherwise the table will remain locked. */
        !          2346:   rc = sqlite3_step(s);
        !          2347:   if( rc==SQLITE_DONE ) return SQLITE_OK;
        !          2348:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          2349:   return rc;
        !          2350: }
        !          2351: 
        !          2352: /* TODO(shess) clearPendingTerms() is far down the file because
        !          2353: ** writeZeroSegment() is far down the file because LeafWriter is far
        !          2354: ** down the file.  Consider refactoring the code to move the non-vtab
        !          2355: ** code above the vtab code so that we don't need this forward
        !          2356: ** reference.
        !          2357: */
        !          2358: static int clearPendingTerms(fulltext_vtab *v);
        !          2359: 
        !          2360: /*
        !          2361: ** Free the memory used to contain a fulltext_vtab structure.
        !          2362: */
        !          2363: static void fulltext_vtab_destroy(fulltext_vtab *v){
        !          2364:   int iStmt, i;
        !          2365: 
        !          2366:   TRACE(("FTS2 Destroy %p\n", v));
        !          2367:   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
        !          2368:     if( v->pFulltextStatements[iStmt]!=NULL ){
        !          2369:       sqlite3_finalize(v->pFulltextStatements[iStmt]);
        !          2370:       v->pFulltextStatements[iStmt] = NULL;
        !          2371:     }
        !          2372:   }
        !          2373: 
        !          2374:   for( i=0; i<MERGE_COUNT; i++ ){
        !          2375:     if( v->pLeafSelectStmts[i]!=NULL ){
        !          2376:       sqlite3_finalize(v->pLeafSelectStmts[i]);
        !          2377:       v->pLeafSelectStmts[i] = NULL;
        !          2378:     }
        !          2379:   }
        !          2380: 
        !          2381:   if( v->pTokenizer!=NULL ){
        !          2382:     v->pTokenizer->pModule->xDestroy(v->pTokenizer);
        !          2383:     v->pTokenizer = NULL;
        !          2384:   }
        !          2385: 
        !          2386:   clearPendingTerms(v);
        !          2387: 
        !          2388:   sqlite3_free(v->azColumn);
        !          2389:   for(i = 0; i < v->nColumn; ++i) {
        !          2390:     sqlite3_free(v->azContentColumn[i]);
        !          2391:   }
        !          2392:   sqlite3_free(v->azContentColumn);
        !          2393:   sqlite3_free(v);
        !          2394: }
        !          2395: 
        !          2396: /*
        !          2397: ** Token types for parsing the arguments to xConnect or xCreate.
        !          2398: */
        !          2399: #define TOKEN_EOF         0    /* End of file */
        !          2400: #define TOKEN_SPACE       1    /* Any kind of whitespace */
        !          2401: #define TOKEN_ID          2    /* An identifier */
        !          2402: #define TOKEN_STRING      3    /* A string literal */
        !          2403: #define TOKEN_PUNCT       4    /* A single punctuation character */
        !          2404: 
        !          2405: /*
        !          2406: ** If X is a character that can be used in an identifier then
        !          2407: ** IdChar(X) will be true.  Otherwise it is false.
        !          2408: **
        !          2409: ** For ASCII, any character with the high-order bit set is
        !          2410: ** allowed in an identifier.  For 7-bit characters, 
        !          2411: ** sqlite3IsIdChar[X] must be 1.
        !          2412: **
        !          2413: ** Ticket #1066.  the SQL standard does not allow '$' in the
        !          2414: ** middle of identfiers.  But many SQL implementations do. 
        !          2415: ** SQLite will allow '$' in identifiers for compatibility.
        !          2416: ** But the feature is undocumented.
        !          2417: */
        !          2418: static const char isIdChar[] = {
        !          2419: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
        !          2420:     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
        !          2421:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
        !          2422:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
        !          2423:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
        !          2424:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
        !          2425:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
        !          2426: };
        !          2427: #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
        !          2428: 
        !          2429: 
        !          2430: /*
        !          2431: ** Return the length of the token that begins at z[0]. 
        !          2432: ** Store the token type in *tokenType before returning.
        !          2433: */
        !          2434: static int getToken(const char *z, int *tokenType){
        !          2435:   int i, c;
        !          2436:   switch( *z ){
        !          2437:     case 0: {
        !          2438:       *tokenType = TOKEN_EOF;
        !          2439:       return 0;
        !          2440:     }
        !          2441:     case ' ': case '\t': case '\n': case '\f': case '\r': {
        !          2442:       for(i=1; safe_isspace(z[i]); i++){}
        !          2443:       *tokenType = TOKEN_SPACE;
        !          2444:       return i;
        !          2445:     }
        !          2446:     case '`':
        !          2447:     case '\'':
        !          2448:     case '"': {
        !          2449:       int delim = z[0];
        !          2450:       for(i=1; (c=z[i])!=0; i++){
        !          2451:         if( c==delim ){
        !          2452:           if( z[i+1]==delim ){
        !          2453:             i++;
        !          2454:           }else{
        !          2455:             break;
        !          2456:           }
        !          2457:         }
        !          2458:       }
        !          2459:       *tokenType = TOKEN_STRING;
        !          2460:       return i + (c!=0);
        !          2461:     }
        !          2462:     case '[': {
        !          2463:       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
        !          2464:       *tokenType = TOKEN_ID;
        !          2465:       return i;
        !          2466:     }
        !          2467:     default: {
        !          2468:       if( !IdChar(*z) ){
        !          2469:         break;
        !          2470:       }
        !          2471:       for(i=1; IdChar(z[i]); i++){}
        !          2472:       *tokenType = TOKEN_ID;
        !          2473:       return i;
        !          2474:     }
        !          2475:   }
        !          2476:   *tokenType = TOKEN_PUNCT;
        !          2477:   return 1;
        !          2478: }
        !          2479: 
        !          2480: /*
        !          2481: ** A token extracted from a string is an instance of the following
        !          2482: ** structure.
        !          2483: */
        !          2484: typedef struct Token {
        !          2485:   const char *z;       /* Pointer to token text.  Not '\000' terminated */
        !          2486:   short int n;         /* Length of the token text in bytes. */
        !          2487: } Token;
        !          2488: 
        !          2489: /*
        !          2490: ** Given a input string (which is really one of the argv[] parameters
        !          2491: ** passed into xConnect or xCreate) split the string up into tokens.
        !          2492: ** Return an array of pointers to '\000' terminated strings, one string
        !          2493: ** for each non-whitespace token.
        !          2494: **
        !          2495: ** The returned array is terminated by a single NULL pointer.
        !          2496: **
        !          2497: ** Space to hold the returned array is obtained from a single
        !          2498: ** malloc and should be freed by passing the return value to free().
        !          2499: ** The individual strings within the token list are all a part of
        !          2500: ** the single memory allocation and will all be freed at once.
        !          2501: */
        !          2502: static char **tokenizeString(const char *z, int *pnToken){
        !          2503:   int nToken = 0;
        !          2504:   Token *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) );
        !          2505:   int n = 1;
        !          2506:   int e, i;
        !          2507:   int totalSize = 0;
        !          2508:   char **azToken;
        !          2509:   char *zCopy;
        !          2510:   while( n>0 ){
        !          2511:     n = getToken(z, &e);
        !          2512:     if( e!=TOKEN_SPACE ){
        !          2513:       aToken[nToken].z = z;
        !          2514:       aToken[nToken].n = n;
        !          2515:       nToken++;
        !          2516:       totalSize += n+1;
        !          2517:     }
        !          2518:     z += n;
        !          2519:   }
        !          2520:   azToken = (char**)sqlite3_malloc( nToken*sizeof(char*) + totalSize );
        !          2521:   zCopy = (char*)&azToken[nToken];
        !          2522:   nToken--;
        !          2523:   for(i=0; i<nToken; i++){
        !          2524:     azToken[i] = zCopy;
        !          2525:     n = aToken[i].n;
        !          2526:     memcpy(zCopy, aToken[i].z, n);
        !          2527:     zCopy[n] = 0;
        !          2528:     zCopy += n+1;
        !          2529:   }
        !          2530:   azToken[nToken] = 0;
        !          2531:   sqlite3_free(aToken);
        !          2532:   *pnToken = nToken;
        !          2533:   return azToken;
        !          2534: }
        !          2535: 
        !          2536: /*
        !          2537: ** Convert an SQL-style quoted string into a normal string by removing
        !          2538: ** the quote characters.  The conversion is done in-place.  If the
        !          2539: ** input does not begin with a quote character, then this routine
        !          2540: ** is a no-op.
        !          2541: **
        !          2542: ** Examples:
        !          2543: **
        !          2544: **     "abc"   becomes   abc
        !          2545: **     'xyz'   becomes   xyz
        !          2546: **     [pqr]   becomes   pqr
        !          2547: **     `mno`   becomes   mno
        !          2548: */
        !          2549: static void dequoteString(char *z){
        !          2550:   int quote;
        !          2551:   int i, j;
        !          2552:   if( z==0 ) return;
        !          2553:   quote = z[0];
        !          2554:   switch( quote ){
        !          2555:     case '\'':  break;
        !          2556:     case '"':   break;
        !          2557:     case '`':   break;                /* For MySQL compatibility */
        !          2558:     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
        !          2559:     default:    return;
        !          2560:   }
        !          2561:   for(i=1, j=0; z[i]; i++){
        !          2562:     if( z[i]==quote ){
        !          2563:       if( z[i+1]==quote ){
        !          2564:         z[j++] = quote;
        !          2565:         i++;
        !          2566:       }else{
        !          2567:         z[j++] = 0;
        !          2568:         break;
        !          2569:       }
        !          2570:     }else{
        !          2571:       z[j++] = z[i];
        !          2572:     }
        !          2573:   }
        !          2574: }
        !          2575: 
        !          2576: /*
        !          2577: ** The input azIn is a NULL-terminated list of tokens.  Remove the first
        !          2578: ** token and all punctuation tokens.  Remove the quotes from
        !          2579: ** around string literal tokens.
        !          2580: **
        !          2581: ** Example:
        !          2582: **
        !          2583: **     input:      tokenize chinese ( 'simplifed' , 'mixed' )
        !          2584: **     output:     chinese simplifed mixed
        !          2585: **
        !          2586: ** Another example:
        !          2587: **
        !          2588: **     input:      delimiters ( '[' , ']' , '...' )
        !          2589: **     output:     [ ] ...
        !          2590: */
        !          2591: static void tokenListToIdList(char **azIn){
        !          2592:   int i, j;
        !          2593:   if( azIn ){
        !          2594:     for(i=0, j=-1; azIn[i]; i++){
        !          2595:       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
        !          2596:         dequoteString(azIn[i]);
        !          2597:         if( j>=0 ){
        !          2598:           azIn[j] = azIn[i];
        !          2599:         }
        !          2600:         j++;
        !          2601:       }
        !          2602:     }
        !          2603:     azIn[j] = 0;
        !          2604:   }
        !          2605: }
        !          2606: 
        !          2607: 
        !          2608: /*
        !          2609: ** Find the first alphanumeric token in the string zIn.  Null-terminate
        !          2610: ** this token.  Remove any quotation marks.  And return a pointer to
        !          2611: ** the result.
        !          2612: */
        !          2613: static char *firstToken(char *zIn, char **pzTail){
        !          2614:   int n, ttype;
        !          2615:   while(1){
        !          2616:     n = getToken(zIn, &ttype);
        !          2617:     if( ttype==TOKEN_SPACE ){
        !          2618:       zIn += n;
        !          2619:     }else if( ttype==TOKEN_EOF ){
        !          2620:       *pzTail = zIn;
        !          2621:       return 0;
        !          2622:     }else{
        !          2623:       zIn[n] = 0;
        !          2624:       *pzTail = &zIn[1];
        !          2625:       dequoteString(zIn);
        !          2626:       return zIn;
        !          2627:     }
        !          2628:   }
        !          2629:   /*NOTREACHED*/
        !          2630: }
        !          2631: 
        !          2632: /* Return true if...
        !          2633: **
        !          2634: **   *  s begins with the string t, ignoring case
        !          2635: **   *  s is longer than t
        !          2636: **   *  The first character of s beyond t is not a alphanumeric
        !          2637: ** 
        !          2638: ** Ignore leading space in *s.
        !          2639: **
        !          2640: ** To put it another way, return true if the first token of
        !          2641: ** s[] is t[].
        !          2642: */
        !          2643: static int startsWith(const char *s, const char *t){
        !          2644:   while( safe_isspace(*s) ){ s++; }
        !          2645:   while( *t ){
        !          2646:     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
        !          2647:   }
        !          2648:   return *s!='_' && !safe_isalnum(*s);
        !          2649: }
        !          2650: 
        !          2651: /*
        !          2652: ** An instance of this structure defines the "spec" of a
        !          2653: ** full text index.  This structure is populated by parseSpec
        !          2654: ** and use by fulltextConnect and fulltextCreate.
        !          2655: */
        !          2656: typedef struct TableSpec {
        !          2657:   const char *zDb;         /* Logical database name */
        !          2658:   const char *zName;       /* Name of the full-text index */
        !          2659:   int nColumn;             /* Number of columns to be indexed */
        !          2660:   char **azColumn;         /* Original names of columns to be indexed */
        !          2661:   char **azContentColumn;  /* Column names for %_content */
        !          2662:   char **azTokenizer;      /* Name of tokenizer and its arguments */
        !          2663: } TableSpec;
        !          2664: 
        !          2665: /*
        !          2666: ** Reclaim all of the memory used by a TableSpec
        !          2667: */
        !          2668: static void clearTableSpec(TableSpec *p) {
        !          2669:   sqlite3_free(p->azColumn);
        !          2670:   sqlite3_free(p->azContentColumn);
        !          2671:   sqlite3_free(p->azTokenizer);
        !          2672: }
        !          2673: 
        !          2674: /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
        !          2675:  *
        !          2676:  * CREATE VIRTUAL TABLE email
        !          2677:  *        USING fts2(subject, body, tokenize mytokenizer(myarg))
        !          2678:  *
        !          2679:  * We return parsed information in a TableSpec structure.
        !          2680:  * 
        !          2681:  */
        !          2682: static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
        !          2683:                      char**pzErr){
        !          2684:   int i, n;
        !          2685:   char *z, *zDummy;
        !          2686:   char **azArg;
        !          2687:   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
        !          2688: 
        !          2689:   assert( argc>=3 );
        !          2690:   /* Current interface:
        !          2691:   ** argv[0] - module name
        !          2692:   ** argv[1] - database name
        !          2693:   ** argv[2] - table name
        !          2694:   ** argv[3..] - columns, optionally followed by tokenizer specification
        !          2695:   **             and snippet delimiters specification.
        !          2696:   */
        !          2697: 
        !          2698:   /* Make a copy of the complete argv[][] array in a single allocation.
        !          2699:   ** The argv[][] array is read-only and transient.  We can write to the
        !          2700:   ** copy in order to modify things and the copy is persistent.
        !          2701:   */
        !          2702:   CLEAR(pSpec);
        !          2703:   for(i=n=0; i<argc; i++){
        !          2704:     n += strlen(argv[i]) + 1;
        !          2705:   }
        !          2706:   azArg = sqlite3_malloc( sizeof(char*)*argc + n );
        !          2707:   if( azArg==0 ){
        !          2708:     return SQLITE_NOMEM;
        !          2709:   }
        !          2710:   z = (char*)&azArg[argc];
        !          2711:   for(i=0; i<argc; i++){
        !          2712:     azArg[i] = z;
        !          2713:     strcpy(z, argv[i]);
        !          2714:     z += strlen(z)+1;
        !          2715:   }
        !          2716: 
        !          2717:   /* Identify the column names and the tokenizer and delimiter arguments
        !          2718:   ** in the argv[][] array.
        !          2719:   */
        !          2720:   pSpec->zDb = azArg[1];
        !          2721:   pSpec->zName = azArg[2];
        !          2722:   pSpec->nColumn = 0;
        !          2723:   pSpec->azColumn = azArg;
        !          2724:   zTokenizer = "tokenize simple";
        !          2725:   for(i=3; i<argc; ++i){
        !          2726:     if( startsWith(azArg[i],"tokenize") ){
        !          2727:       zTokenizer = azArg[i];
        !          2728:     }else{
        !          2729:       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
        !          2730:       pSpec->nColumn++;
        !          2731:     }
        !          2732:   }
        !          2733:   if( pSpec->nColumn==0 ){
        !          2734:     azArg[0] = "content";
        !          2735:     pSpec->nColumn = 1;
        !          2736:   }
        !          2737: 
        !          2738:   /*
        !          2739:   ** Construct the list of content column names.
        !          2740:   **
        !          2741:   ** Each content column name will be of the form cNNAAAA
        !          2742:   ** where NN is the column number and AAAA is the sanitized
        !          2743:   ** column name.  "sanitized" means that special characters are
        !          2744:   ** converted to "_".  The cNN prefix guarantees that all column
        !          2745:   ** names are unique.
        !          2746:   **
        !          2747:   ** The AAAA suffix is not strictly necessary.  It is included
        !          2748:   ** for the convenience of people who might examine the generated
        !          2749:   ** %_content table and wonder what the columns are used for.
        !          2750:   */
        !          2751:   pSpec->azContentColumn = sqlite3_malloc( pSpec->nColumn * sizeof(char *) );
        !          2752:   if( pSpec->azContentColumn==0 ){
        !          2753:     clearTableSpec(pSpec);
        !          2754:     return SQLITE_NOMEM;
        !          2755:   }
        !          2756:   for(i=0; i<pSpec->nColumn; i++){
        !          2757:     char *p;
        !          2758:     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
        !          2759:     for (p = pSpec->azContentColumn[i]; *p ; ++p) {
        !          2760:       if( !safe_isalnum(*p) ) *p = '_';
        !          2761:     }
        !          2762:   }
        !          2763: 
        !          2764:   /*
        !          2765:   ** Parse the tokenizer specification string.
        !          2766:   */
        !          2767:   pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
        !          2768:   tokenListToIdList(pSpec->azTokenizer);
        !          2769: 
        !          2770:   return SQLITE_OK;
        !          2771: }
        !          2772: 
        !          2773: /*
        !          2774: ** Generate a CREATE TABLE statement that describes the schema of
        !          2775: ** the virtual table.  Return a pointer to this schema string.
        !          2776: **
        !          2777: ** Space is obtained from sqlite3_mprintf() and should be freed
        !          2778: ** using sqlite3_free().
        !          2779: */
        !          2780: static char *fulltextSchema(
        !          2781:   int nColumn,                  /* Number of columns */
        !          2782:   const char *const* azColumn,  /* List of columns */
        !          2783:   const char *zTableName        /* Name of the table */
        !          2784: ){
        !          2785:   int i;
        !          2786:   char *zSchema, *zNext;
        !          2787:   const char *zSep = "(";
        !          2788:   zSchema = sqlite3_mprintf("CREATE TABLE x");
        !          2789:   for(i=0; i<nColumn; i++){
        !          2790:     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
        !          2791:     sqlite3_free(zSchema);
        !          2792:     zSchema = zNext;
        !          2793:     zSep = ",";
        !          2794:   }
        !          2795:   zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
        !          2796:   sqlite3_free(zSchema);
        !          2797:   return zNext;
        !          2798: }
        !          2799: 
        !          2800: /*
        !          2801: ** Build a new sqlite3_vtab structure that will describe the
        !          2802: ** fulltext index defined by spec.
        !          2803: */
        !          2804: static int constructVtab(
        !          2805:   sqlite3 *db,              /* The SQLite database connection */
        !          2806:   fts2Hash *pHash,          /* Hash table containing tokenizers */
        !          2807:   TableSpec *spec,          /* Parsed spec information from parseSpec() */
        !          2808:   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
        !          2809:   char **pzErr              /* Write any error message here */
        !          2810: ){
        !          2811:   int rc;
        !          2812:   int n;
        !          2813:   fulltext_vtab *v = 0;
        !          2814:   const sqlite3_tokenizer_module *m = NULL;
        !          2815:   char *schema;
        !          2816: 
        !          2817:   char const *zTok;         /* Name of tokenizer to use for this fts table */
        !          2818:   int nTok;                 /* Length of zTok, including nul terminator */
        !          2819: 
        !          2820:   v = (fulltext_vtab *) sqlite3_malloc(sizeof(fulltext_vtab));
        !          2821:   if( v==0 ) return SQLITE_NOMEM;
        !          2822:   CLEAR(v);
        !          2823:   /* sqlite will initialize v->base */
        !          2824:   v->db = db;
        !          2825:   v->zDb = spec->zDb;       /* Freed when azColumn is freed */
        !          2826:   v->zName = spec->zName;   /* Freed when azColumn is freed */
        !          2827:   v->nColumn = spec->nColumn;
        !          2828:   v->azContentColumn = spec->azContentColumn;
        !          2829:   spec->azContentColumn = 0;
        !          2830:   v->azColumn = spec->azColumn;
        !          2831:   spec->azColumn = 0;
        !          2832: 
        !          2833:   if( spec->azTokenizer==0 ){
        !          2834:     return SQLITE_NOMEM;
        !          2835:   }
        !          2836: 
        !          2837:   zTok = spec->azTokenizer[0]; 
        !          2838:   if( !zTok ){
        !          2839:     zTok = "simple";
        !          2840:   }
        !          2841:   nTok = strlen(zTok)+1;
        !          2842: 
        !          2843:   m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok);
        !          2844:   if( !m ){
        !          2845:     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
        !          2846:     rc = SQLITE_ERROR;
        !          2847:     goto err;
        !          2848:   }
        !          2849: 
        !          2850:   for(n=0; spec->azTokenizer[n]; n++){}
        !          2851:   if( n ){
        !          2852:     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
        !          2853:                     &v->pTokenizer);
        !          2854:   }else{
        !          2855:     rc = m->xCreate(0, 0, &v->pTokenizer);
        !          2856:   }
        !          2857:   if( rc!=SQLITE_OK ) goto err;
        !          2858:   v->pTokenizer->pModule = m;
        !          2859: 
        !          2860:   /* TODO: verify the existence of backing tables foo_content, foo_term */
        !          2861: 
        !          2862:   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
        !          2863:                           spec->zName);
        !          2864:   rc = sqlite3_declare_vtab(db, schema);
        !          2865:   sqlite3_free(schema);
        !          2866:   if( rc!=SQLITE_OK ) goto err;
        !          2867: 
        !          2868:   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
        !          2869: 
        !          2870:   /* Indicate that the buffer is not live. */
        !          2871:   v->nPendingData = -1;
        !          2872: 
        !          2873:   *ppVTab = &v->base;
        !          2874:   TRACE(("FTS2 Connect %p\n", v));
        !          2875: 
        !          2876:   return rc;
        !          2877: 
        !          2878: err:
        !          2879:   fulltext_vtab_destroy(v);
        !          2880:   return rc;
        !          2881: }
        !          2882: 
        !          2883: static int fulltextConnect(
        !          2884:   sqlite3 *db,
        !          2885:   void *pAux,
        !          2886:   int argc, const char *const*argv,
        !          2887:   sqlite3_vtab **ppVTab,
        !          2888:   char **pzErr
        !          2889: ){
        !          2890:   TableSpec spec;
        !          2891:   int rc = parseSpec(&spec, argc, argv, pzErr);
        !          2892:   if( rc!=SQLITE_OK ) return rc;
        !          2893: 
        !          2894:   rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
        !          2895:   clearTableSpec(&spec);
        !          2896:   return rc;
        !          2897: }
        !          2898: 
        !          2899: /* The %_content table holds the text of each document, with
        !          2900: ** the rowid used as the docid.
        !          2901: */
        !          2902: /* TODO(shess) This comment needs elaboration to match the updated
        !          2903: ** code.  Work it into the top-of-file comment at that time.
        !          2904: */
        !          2905: static int fulltextCreate(sqlite3 *db, void *pAux,
        !          2906:                           int argc, const char * const *argv,
        !          2907:                           sqlite3_vtab **ppVTab, char **pzErr){
        !          2908:   int rc;
        !          2909:   TableSpec spec;
        !          2910:   StringBuffer schema;
        !          2911:   TRACE(("FTS2 Create\n"));
        !          2912: 
        !          2913:   rc = parseSpec(&spec, argc, argv, pzErr);
        !          2914:   if( rc!=SQLITE_OK ) return rc;
        !          2915: 
        !          2916:   initStringBuffer(&schema);
        !          2917:   append(&schema, "CREATE TABLE %_content(");
        !          2918:   appendList(&schema, spec.nColumn, spec.azContentColumn);
        !          2919:   append(&schema, ")");
        !          2920:   rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema));
        !          2921:   stringBufferDestroy(&schema);
        !          2922:   if( rc!=SQLITE_OK ) goto out;
        !          2923: 
        !          2924:   rc = sql_exec(db, spec.zDb, spec.zName,
        !          2925:                 "create table %_segments(block blob);");
        !          2926:   if( rc!=SQLITE_OK ) goto out;
        !          2927: 
        !          2928:   rc = sql_exec(db, spec.zDb, spec.zName,
        !          2929:                 "create table %_segdir("
        !          2930:                 "  level integer,"
        !          2931:                 "  idx integer,"
        !          2932:                 "  start_block integer,"
        !          2933:                 "  leaves_end_block integer,"
        !          2934:                 "  end_block integer,"
        !          2935:                 "  root blob,"
        !          2936:                 "  primary key(level, idx)"
        !          2937:                 ");");
        !          2938:   if( rc!=SQLITE_OK ) goto out;
        !          2939: 
        !          2940:   rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
        !          2941: 
        !          2942: out:
        !          2943:   clearTableSpec(&spec);
        !          2944:   return rc;
        !          2945: }
        !          2946: 
        !          2947: /* Decide how to handle an SQL query. */
        !          2948: static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
        !          2949:   int i;
        !          2950:   TRACE(("FTS2 BestIndex\n"));
        !          2951: 
        !          2952:   for(i=0; i<pInfo->nConstraint; ++i){
        !          2953:     const struct sqlite3_index_constraint *pConstraint;
        !          2954:     pConstraint = &pInfo->aConstraint[i];
        !          2955:     if( pConstraint->usable ) {
        !          2956:       if( pConstraint->iColumn==-1 &&
        !          2957:           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
        !          2958:         pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
        !          2959:         TRACE(("FTS2 QUERY_ROWID\n"));
        !          2960:       } else if( pConstraint->iColumn>=0 &&
        !          2961:                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
        !          2962:         /* full-text search */
        !          2963:         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
        !          2964:         TRACE(("FTS2 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
        !          2965:       } else continue;
        !          2966: 
        !          2967:       pInfo->aConstraintUsage[i].argvIndex = 1;
        !          2968:       pInfo->aConstraintUsage[i].omit = 1;
        !          2969: 
        !          2970:       /* An arbitrary value for now.
        !          2971:        * TODO: Perhaps rowid matches should be considered cheaper than
        !          2972:        * full-text searches. */
        !          2973:       pInfo->estimatedCost = 1.0;   
        !          2974: 
        !          2975:       return SQLITE_OK;
        !          2976:     }
        !          2977:   }
        !          2978:   pInfo->idxNum = QUERY_GENERIC;
        !          2979:   return SQLITE_OK;
        !          2980: }
        !          2981: 
        !          2982: static int fulltextDisconnect(sqlite3_vtab *pVTab){
        !          2983:   TRACE(("FTS2 Disconnect %p\n", pVTab));
        !          2984:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !          2985:   return SQLITE_OK;
        !          2986: }
        !          2987: 
        !          2988: static int fulltextDestroy(sqlite3_vtab *pVTab){
        !          2989:   fulltext_vtab *v = (fulltext_vtab *)pVTab;
        !          2990:   int rc;
        !          2991: 
        !          2992:   TRACE(("FTS2 Destroy %p\n", pVTab));
        !          2993:   rc = sql_exec(v->db, v->zDb, v->zName,
        !          2994:                 "drop table if exists %_content;"
        !          2995:                 "drop table if exists %_segments;"
        !          2996:                 "drop table if exists %_segdir;"
        !          2997:                 );
        !          2998:   if( rc!=SQLITE_OK ) return rc;
        !          2999: 
        !          3000:   fulltext_vtab_destroy((fulltext_vtab *)pVTab);
        !          3001:   return SQLITE_OK;
        !          3002: }
        !          3003: 
        !          3004: static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
        !          3005:   fulltext_cursor *c;
        !          3006: 
        !          3007:   c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor));
        !          3008:   if( c ){
        !          3009:     memset(c, 0, sizeof(fulltext_cursor));
        !          3010:     /* sqlite will initialize c->base */
        !          3011:     *ppCursor = &c->base;
        !          3012:     TRACE(("FTS2 Open %p: %p\n", pVTab, c));
        !          3013:     return SQLITE_OK;
        !          3014:   }else{
        !          3015:     return SQLITE_NOMEM;
        !          3016:   }
        !          3017: }
        !          3018: 
        !          3019: 
        !          3020: /* Free all of the dynamically allocated memory held by *q
        !          3021: */
        !          3022: static void queryClear(Query *q){
        !          3023:   int i;
        !          3024:   for(i = 0; i < q->nTerms; ++i){
        !          3025:     sqlite3_free(q->pTerms[i].pTerm);
        !          3026:   }
        !          3027:   sqlite3_free(q->pTerms);
        !          3028:   CLEAR(q);
        !          3029: }
        !          3030: 
        !          3031: /* Free all of the dynamically allocated memory held by the
        !          3032: ** Snippet
        !          3033: */
        !          3034: static void snippetClear(Snippet *p){
        !          3035:   sqlite3_free(p->aMatch);
        !          3036:   sqlite3_free(p->zOffset);
        !          3037:   sqlite3_free(p->zSnippet);
        !          3038:   CLEAR(p);
        !          3039: }
        !          3040: /*
        !          3041: ** Append a single entry to the p->aMatch[] log.
        !          3042: */
        !          3043: static void snippetAppendMatch(
        !          3044:   Snippet *p,               /* Append the entry to this snippet */
        !          3045:   int iCol, int iTerm,      /* The column and query term */
        !          3046:   int iStart, int nByte     /* Offset and size of the match */
        !          3047: ){
        !          3048:   int i;
        !          3049:   struct snippetMatch *pMatch;
        !          3050:   if( p->nMatch+1>=p->nAlloc ){
        !          3051:     p->nAlloc = p->nAlloc*2 + 10;
        !          3052:     p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
        !          3053:     if( p->aMatch==0 ){
        !          3054:       p->nMatch = 0;
        !          3055:       p->nAlloc = 0;
        !          3056:       return;
        !          3057:     }
        !          3058:   }
        !          3059:   i = p->nMatch++;
        !          3060:   pMatch = &p->aMatch[i];
        !          3061:   pMatch->iCol = iCol;
        !          3062:   pMatch->iTerm = iTerm;
        !          3063:   pMatch->iStart = iStart;
        !          3064:   pMatch->nByte = nByte;
        !          3065: }
        !          3066: 
        !          3067: /*
        !          3068: ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
        !          3069: */
        !          3070: #define FTS2_ROTOR_SZ   (32)
        !          3071: #define FTS2_ROTOR_MASK (FTS2_ROTOR_SZ-1)
        !          3072: 
        !          3073: /*
        !          3074: ** Add entries to pSnippet->aMatch[] for every match that occurs against
        !          3075: ** document zDoc[0..nDoc-1] which is stored in column iColumn.
        !          3076: */
        !          3077: static void snippetOffsetsOfColumn(
        !          3078:   Query *pQuery,
        !          3079:   Snippet *pSnippet,
        !          3080:   int iColumn,
        !          3081:   const char *zDoc,
        !          3082:   int nDoc
        !          3083: ){
        !          3084:   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
        !          3085:   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
        !          3086:   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
        !          3087:   fulltext_vtab *pVtab;                /* The full text index */
        !          3088:   int nColumn;                         /* Number of columns in the index */
        !          3089:   const QueryTerm *aTerm;              /* Query string terms */
        !          3090:   int nTerm;                           /* Number of query string terms */  
        !          3091:   int i, j;                            /* Loop counters */
        !          3092:   int rc;                              /* Return code */
        !          3093:   unsigned int match, prevMatch;       /* Phrase search bitmasks */
        !          3094:   const char *zToken;                  /* Next token from the tokenizer */
        !          3095:   int nToken;                          /* Size of zToken */
        !          3096:   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
        !          3097: 
        !          3098:   /* The following variables keep a circular buffer of the last
        !          3099:   ** few tokens */
        !          3100:   unsigned int iRotor = 0;             /* Index of current token */
        !          3101:   int iRotorBegin[FTS2_ROTOR_SZ];      /* Beginning offset of token */
        !          3102:   int iRotorLen[FTS2_ROTOR_SZ];        /* Length of token */
        !          3103: 
        !          3104:   pVtab = pQuery->pFts;
        !          3105:   nColumn = pVtab->nColumn;
        !          3106:   pTokenizer = pVtab->pTokenizer;
        !          3107:   pTModule = pTokenizer->pModule;
        !          3108:   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
        !          3109:   if( rc ) return;
        !          3110:   pTCursor->pTokenizer = pTokenizer;
        !          3111:   aTerm = pQuery->pTerms;
        !          3112:   nTerm = pQuery->nTerms;
        !          3113:   if( nTerm>=FTS2_ROTOR_SZ ){
        !          3114:     nTerm = FTS2_ROTOR_SZ - 1;
        !          3115:   }
        !          3116:   prevMatch = 0;
        !          3117:   while(1){
        !          3118:     rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
        !          3119:     if( rc ) break;
        !          3120:     iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin;
        !          3121:     iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin;
        !          3122:     match = 0;
        !          3123:     for(i=0; i<nTerm; i++){
        !          3124:       int iCol;
        !          3125:       iCol = aTerm[i].iColumn;
        !          3126:       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
        !          3127:       if( aTerm[i].nTerm>nToken ) continue;
        !          3128:       if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue;
        !          3129:       assert( aTerm[i].nTerm<=nToken );
        !          3130:       if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
        !          3131:       if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
        !          3132:       match |= 1<<i;
        !          3133:       if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        !          3134:         for(j=aTerm[i].iPhrase-1; j>=0; j--){
        !          3135:           int k = (iRotor-j) & FTS2_ROTOR_MASK;
        !          3136:           snippetAppendMatch(pSnippet, iColumn, i-j,
        !          3137:                 iRotorBegin[k], iRotorLen[k]);
        !          3138:         }
        !          3139:       }
        !          3140:     }
        !          3141:     prevMatch = match<<1;
        !          3142:     iRotor++;
        !          3143:   }
        !          3144:   pTModule->xClose(pTCursor);  
        !          3145: }
        !          3146: 
        !          3147: 
        !          3148: /*
        !          3149: ** Compute all offsets for the current row of the query.  
        !          3150: ** If the offsets have already been computed, this routine is a no-op.
        !          3151: */
        !          3152: static void snippetAllOffsets(fulltext_cursor *p){
        !          3153:   int nColumn;
        !          3154:   int iColumn, i;
        !          3155:   int iFirst, iLast;
        !          3156:   fulltext_vtab *pFts;
        !          3157: 
        !          3158:   if( p->snippet.nMatch ) return;
        !          3159:   if( p->q.nTerms==0 ) return;
        !          3160:   pFts = p->q.pFts;
        !          3161:   nColumn = pFts->nColumn;
        !          3162:   iColumn = (p->iCursorType - QUERY_FULLTEXT);
        !          3163:   if( iColumn<0 || iColumn>=nColumn ){
        !          3164:     iFirst = 0;
        !          3165:     iLast = nColumn-1;
        !          3166:   }else{
        !          3167:     iFirst = iColumn;
        !          3168:     iLast = iColumn;
        !          3169:   }
        !          3170:   for(i=iFirst; i<=iLast; i++){
        !          3171:     const char *zDoc;
        !          3172:     int nDoc;
        !          3173:     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
        !          3174:     nDoc = sqlite3_column_bytes(p->pStmt, i+1);
        !          3175:     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
        !          3176:   }
        !          3177: }
        !          3178: 
        !          3179: /*
        !          3180: ** Convert the information in the aMatch[] array of the snippet
        !          3181: ** into the string zOffset[0..nOffset-1].
        !          3182: */
        !          3183: static void snippetOffsetText(Snippet *p){
        !          3184:   int i;
        !          3185:   int cnt = 0;
        !          3186:   StringBuffer sb;
        !          3187:   char zBuf[200];
        !          3188:   if( p->zOffset ) return;
        !          3189:   initStringBuffer(&sb);
        !          3190:   for(i=0; i<p->nMatch; i++){
        !          3191:     struct snippetMatch *pMatch = &p->aMatch[i];
        !          3192:     zBuf[0] = ' ';
        !          3193:     sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
        !          3194:         pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
        !          3195:     append(&sb, zBuf);
        !          3196:     cnt++;
        !          3197:   }
        !          3198:   p->zOffset = stringBufferData(&sb);
        !          3199:   p->nOffset = stringBufferLength(&sb);
        !          3200: }
        !          3201: 
        !          3202: /*
        !          3203: ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
        !          3204: ** of matching words some of which might be in zDoc.  zDoc is column
        !          3205: ** number iCol.
        !          3206: **
        !          3207: ** iBreak is suggested spot in zDoc where we could begin or end an
        !          3208: ** excerpt.  Return a value similar to iBreak but possibly adjusted
        !          3209: ** to be a little left or right so that the break point is better.
        !          3210: */
        !          3211: static int wordBoundary(
        !          3212:   int iBreak,                   /* The suggested break point */
        !          3213:   const char *zDoc,             /* Document text */
        !          3214:   int nDoc,                     /* Number of bytes in zDoc[] */
        !          3215:   struct snippetMatch *aMatch,  /* Matching words */
        !          3216:   int nMatch,                   /* Number of entries in aMatch[] */
        !          3217:   int iCol                      /* The column number for zDoc[] */
        !          3218: ){
        !          3219:   int i;
        !          3220:   if( iBreak<=10 ){
        !          3221:     return 0;
        !          3222:   }
        !          3223:   if( iBreak>=nDoc-10 ){
        !          3224:     return nDoc;
        !          3225:   }
        !          3226:   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
        !          3227:   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
        !          3228:   if( i<nMatch ){
        !          3229:     if( aMatch[i].iStart<iBreak+10 ){
        !          3230:       return aMatch[i].iStart;
        !          3231:     }
        !          3232:     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
        !          3233:       return aMatch[i-1].iStart;
        !          3234:     }
        !          3235:   }
        !          3236:   for(i=1; i<=10; i++){
        !          3237:     if( safe_isspace(zDoc[iBreak-i]) ){
        !          3238:       return iBreak - i + 1;
        !          3239:     }
        !          3240:     if( safe_isspace(zDoc[iBreak+i]) ){
        !          3241:       return iBreak + i + 1;
        !          3242:     }
        !          3243:   }
        !          3244:   return iBreak;
        !          3245: }
        !          3246: 
        !          3247: 
        !          3248: 
        !          3249: /*
        !          3250: ** Allowed values for Snippet.aMatch[].snStatus
        !          3251: */
        !          3252: #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
        !          3253: #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
        !          3254: 
        !          3255: /*
        !          3256: ** Generate the text of a snippet.
        !          3257: */
        !          3258: static void snippetText(
        !          3259:   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
        !          3260:   const char *zStartMark,     /* Markup to appear before each match */
        !          3261:   const char *zEndMark,       /* Markup to appear after each match */
        !          3262:   const char *zEllipsis       /* Ellipsis mark */
        !          3263: ){
        !          3264:   int i, j;
        !          3265:   struct snippetMatch *aMatch;
        !          3266:   int nMatch;
        !          3267:   int nDesired;
        !          3268:   StringBuffer sb;
        !          3269:   int tailCol;
        !          3270:   int tailOffset;
        !          3271:   int iCol;
        !          3272:   int nDoc;
        !          3273:   const char *zDoc;
        !          3274:   int iStart, iEnd;
        !          3275:   int tailEllipsis = 0;
        !          3276:   int iMatch;
        !          3277:   
        !          3278: 
        !          3279:   sqlite3_free(pCursor->snippet.zSnippet);
        !          3280:   pCursor->snippet.zSnippet = 0;
        !          3281:   aMatch = pCursor->snippet.aMatch;
        !          3282:   nMatch = pCursor->snippet.nMatch;
        !          3283:   initStringBuffer(&sb);
        !          3284: 
        !          3285:   for(i=0; i<nMatch; i++){
        !          3286:     aMatch[i].snStatus = SNIPPET_IGNORE;
        !          3287:   }
        !          3288:   nDesired = 0;
        !          3289:   for(i=0; i<pCursor->q.nTerms; i++){
        !          3290:     for(j=0; j<nMatch; j++){
        !          3291:       if( aMatch[j].iTerm==i ){
        !          3292:         aMatch[j].snStatus = SNIPPET_DESIRED;
        !          3293:         nDesired++;
        !          3294:         break;
        !          3295:       }
        !          3296:     }
        !          3297:   }
        !          3298: 
        !          3299:   iMatch = 0;
        !          3300:   tailCol = -1;
        !          3301:   tailOffset = 0;
        !          3302:   for(i=0; i<nMatch && nDesired>0; i++){
        !          3303:     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
        !          3304:     nDesired--;
        !          3305:     iCol = aMatch[i].iCol;
        !          3306:     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
        !          3307:     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
        !          3308:     iStart = aMatch[i].iStart - 40;
        !          3309:     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
        !          3310:     if( iStart<=10 ){
        !          3311:       iStart = 0;
        !          3312:     }
        !          3313:     if( iCol==tailCol && iStart<=tailOffset+20 ){
        !          3314:       iStart = tailOffset;
        !          3315:     }
        !          3316:     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
        !          3317:       trimWhiteSpace(&sb);
        !          3318:       appendWhiteSpace(&sb);
        !          3319:       append(&sb, zEllipsis);
        !          3320:       appendWhiteSpace(&sb);
        !          3321:     }
        !          3322:     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
        !          3323:     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
        !          3324:     if( iEnd>=nDoc-10 ){
        !          3325:       iEnd = nDoc;
        !          3326:       tailEllipsis = 0;
        !          3327:     }else{
        !          3328:       tailEllipsis = 1;
        !          3329:     }
        !          3330:     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
        !          3331:     while( iStart<iEnd ){
        !          3332:       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
        !          3333:              && aMatch[iMatch].iCol<=iCol ){
        !          3334:         iMatch++;
        !          3335:       }
        !          3336:       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
        !          3337:              && aMatch[iMatch].iCol==iCol ){
        !          3338:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
        !          3339:         iStart = aMatch[iMatch].iStart;
        !          3340:         append(&sb, zStartMark);
        !          3341:         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
        !          3342:         append(&sb, zEndMark);
        !          3343:         iStart += aMatch[iMatch].nByte;
        !          3344:         for(j=iMatch+1; j<nMatch; j++){
        !          3345:           if( aMatch[j].iTerm==aMatch[iMatch].iTerm
        !          3346:               && aMatch[j].snStatus==SNIPPET_DESIRED ){
        !          3347:             nDesired--;
        !          3348:             aMatch[j].snStatus = SNIPPET_IGNORE;
        !          3349:           }
        !          3350:         }
        !          3351:       }else{
        !          3352:         nappend(&sb, &zDoc[iStart], iEnd - iStart);
        !          3353:         iStart = iEnd;
        !          3354:       }
        !          3355:     }
        !          3356:     tailCol = iCol;
        !          3357:     tailOffset = iEnd;
        !          3358:   }
        !          3359:   trimWhiteSpace(&sb);
        !          3360:   if( tailEllipsis ){
        !          3361:     appendWhiteSpace(&sb);
        !          3362:     append(&sb, zEllipsis);
        !          3363:   }
        !          3364:   pCursor->snippet.zSnippet = stringBufferData(&sb);
        !          3365:   pCursor->snippet.nSnippet = stringBufferLength(&sb);
        !          3366: }
        !          3367: 
        !          3368: 
        !          3369: /*
        !          3370: ** Close the cursor.  For additional information see the documentation
        !          3371: ** on the xClose method of the virtual table interface.
        !          3372: */
        !          3373: static int fulltextClose(sqlite3_vtab_cursor *pCursor){
        !          3374:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3375:   TRACE(("FTS2 Close %p\n", c));
        !          3376:   sqlite3_finalize(c->pStmt);
        !          3377:   queryClear(&c->q);
        !          3378:   snippetClear(&c->snippet);
        !          3379:   if( c->result.nData!=0 ) dlrDestroy(&c->reader);
        !          3380:   dataBufferDestroy(&c->result);
        !          3381:   sqlite3_free(c);
        !          3382:   return SQLITE_OK;
        !          3383: }
        !          3384: 
        !          3385: static int fulltextNext(sqlite3_vtab_cursor *pCursor){
        !          3386:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3387:   int rc;
        !          3388: 
        !          3389:   TRACE(("FTS2 Next %p\n", pCursor));
        !          3390:   snippetClear(&c->snippet);
        !          3391:   if( c->iCursorType < QUERY_FULLTEXT ){
        !          3392:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          3393:     rc = sqlite3_step(c->pStmt);
        !          3394:     switch( rc ){
        !          3395:       case SQLITE_ROW:
        !          3396:         c->eof = 0;
        !          3397:         return SQLITE_OK;
        !          3398:       case SQLITE_DONE:
        !          3399:         c->eof = 1;
        !          3400:         return SQLITE_OK;
        !          3401:       default:
        !          3402:         c->eof = 1;
        !          3403:         return rc;
        !          3404:     }
        !          3405:   } else {  /* full-text query */
        !          3406:     rc = sqlite3_reset(c->pStmt);
        !          3407:     if( rc!=SQLITE_OK ) return rc;
        !          3408: 
        !          3409:     if( c->result.nData==0 || dlrAtEnd(&c->reader) ){
        !          3410:       c->eof = 1;
        !          3411:       return SQLITE_OK;
        !          3412:     }
        !          3413:     rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader));
        !          3414:     dlrStep(&c->reader);
        !          3415:     if( rc!=SQLITE_OK ) return rc;
        !          3416:     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
        !          3417:     rc = sqlite3_step(c->pStmt);
        !          3418:     if( rc==SQLITE_ROW ){   /* the case we expect */
        !          3419:       c->eof = 0;
        !          3420:       return SQLITE_OK;
        !          3421:     }
        !          3422:     /* an error occurred; abort */
        !          3423:     return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
        !          3424:   }
        !          3425: }
        !          3426: 
        !          3427: 
        !          3428: /* TODO(shess) If we pushed LeafReader to the top of the file, or to
        !          3429: ** another file, term_select() could be pushed above
        !          3430: ** docListOfTerm().
        !          3431: */
        !          3432: static int termSelect(fulltext_vtab *v, int iColumn,
        !          3433:                       const char *pTerm, int nTerm, int isPrefix,
        !          3434:                       DocListType iType, DataBuffer *out);
        !          3435: 
        !          3436: /* Return a DocList corresponding to the query term *pTerm.  If *pTerm
        !          3437: ** is the first term of a phrase query, go ahead and evaluate the phrase
        !          3438: ** query and return the doclist for the entire phrase query.
        !          3439: **
        !          3440: ** The resulting DL_DOCIDS doclist is stored in pResult, which is
        !          3441: ** overwritten.
        !          3442: */
        !          3443: static int docListOfTerm(
        !          3444:   fulltext_vtab *v,   /* The full text index */
        !          3445:   int iColumn,        /* column to restrict to.  No restriction if >=nColumn */
        !          3446:   QueryTerm *pQTerm,  /* Term we are looking for, or 1st term of a phrase */
        !          3447:   DataBuffer *pResult /* Write the result here */
        !          3448: ){
        !          3449:   DataBuffer left, right, new;
        !          3450:   int i, rc;
        !          3451: 
        !          3452:   /* No phrase search if no position info. */
        !          3453:   assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );
        !          3454: 
        !          3455:   /* This code should never be called with buffered updates. */
        !          3456:   assert( v->nPendingData<0 );
        !          3457: 
        !          3458:   dataBufferInit(&left, 0);
        !          3459:   rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
        !          3460:                   0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
        !          3461:   if( rc ) return rc;
        !          3462:   for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
        !          3463:     dataBufferInit(&right, 0);
        !          3464:     rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
        !          3465:                     pQTerm[i].isPrefix, DL_POSITIONS, &right);
        !          3466:     if( rc ){
        !          3467:       dataBufferDestroy(&left);
        !          3468:       return rc;
        !          3469:     }
        !          3470:     dataBufferInit(&new, 0);
        !          3471:     docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
        !          3472:                        i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);
        !          3473:     dataBufferDestroy(&left);
        !          3474:     dataBufferDestroy(&right);
        !          3475:     left = new;
        !          3476:   }
        !          3477:   *pResult = left;
        !          3478:   return SQLITE_OK;
        !          3479: }
        !          3480: 
        !          3481: /* Add a new term pTerm[0..nTerm-1] to the query *q.
        !          3482: */
        !          3483: static void queryAdd(Query *q, const char *pTerm, int nTerm){
        !          3484:   QueryTerm *t;
        !          3485:   ++q->nTerms;
        !          3486:   q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
        !          3487:   if( q->pTerms==0 ){
        !          3488:     q->nTerms = 0;
        !          3489:     return;
        !          3490:   }
        !          3491:   t = &q->pTerms[q->nTerms - 1];
        !          3492:   CLEAR(t);
        !          3493:   t->pTerm = sqlite3_malloc(nTerm+1);
        !          3494:   memcpy(t->pTerm, pTerm, nTerm);
        !          3495:   t->pTerm[nTerm] = 0;
        !          3496:   t->nTerm = nTerm;
        !          3497:   t->isOr = q->nextIsOr;
        !          3498:   t->isPrefix = 0;
        !          3499:   q->nextIsOr = 0;
        !          3500:   t->iColumn = q->nextColumn;
        !          3501:   q->nextColumn = q->dfltColumn;
        !          3502: }
        !          3503: 
        !          3504: /*
        !          3505: ** Check to see if the string zToken[0...nToken-1] matches any
        !          3506: ** column name in the virtual table.   If it does,
        !          3507: ** return the zero-indexed column number.  If not, return -1.
        !          3508: */
        !          3509: static int checkColumnSpecifier(
        !          3510:   fulltext_vtab *pVtab,    /* The virtual table */
        !          3511:   const char *zToken,      /* Text of the token */
        !          3512:   int nToken               /* Number of characters in the token */
        !          3513: ){
        !          3514:   int i;
        !          3515:   for(i=0; i<pVtab->nColumn; i++){
        !          3516:     if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
        !          3517:         && pVtab->azColumn[i][nToken]==0 ){
        !          3518:       return i;
        !          3519:     }
        !          3520:   }
        !          3521:   return -1;
        !          3522: }
        !          3523: 
        !          3524: /*
        !          3525: ** Parse the text at pSegment[0..nSegment-1].  Add additional terms
        !          3526: ** to the query being assemblied in pQuery.
        !          3527: **
        !          3528: ** inPhrase is true if pSegment[0..nSegement-1] is contained within
        !          3529: ** double-quotes.  If inPhrase is true, then the first term
        !          3530: ** is marked with the number of terms in the phrase less one and
        !          3531: ** OR and "-" syntax is ignored.  If inPhrase is false, then every
        !          3532: ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
        !          3533: */
        !          3534: static int tokenizeSegment(
        !          3535:   sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
        !          3536:   const char *pSegment, int nSegment,     /* Query expression being parsed */
        !          3537:   int inPhrase,                           /* True if within "..." */
        !          3538:   Query *pQuery                           /* Append results here */
        !          3539: ){
        !          3540:   const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
        !          3541:   sqlite3_tokenizer_cursor *pCursor;
        !          3542:   int firstIndex = pQuery->nTerms;
        !          3543:   int iCol;
        !          3544:   int nTerm = 1;
        !          3545:   
        !          3546:   int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
        !          3547:   if( rc!=SQLITE_OK ) return rc;
        !          3548:   pCursor->pTokenizer = pTokenizer;
        !          3549: 
        !          3550:   while( 1 ){
        !          3551:     const char *pToken;
        !          3552:     int nToken, iBegin, iEnd, iPos;
        !          3553: 
        !          3554:     rc = pModule->xNext(pCursor,
        !          3555:                         &pToken, &nToken,
        !          3556:                         &iBegin, &iEnd, &iPos);
        !          3557:     if( rc!=SQLITE_OK ) break;
        !          3558:     if( !inPhrase &&
        !          3559:         pSegment[iEnd]==':' &&
        !          3560:          (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
        !          3561:       pQuery->nextColumn = iCol;
        !          3562:       continue;
        !          3563:     }
        !          3564:     if( !inPhrase && pQuery->nTerms>0 && nToken==2
        !          3565:          && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
        !          3566:       pQuery->nextIsOr = 1;
        !          3567:       continue;
        !          3568:     }
        !          3569:     queryAdd(pQuery, pToken, nToken);
        !          3570:     if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
        !          3571:       pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
        !          3572:     }
        !          3573:     if( iEnd<nSegment && pSegment[iEnd]=='*' ){
        !          3574:       pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
        !          3575:     }
        !          3576:     pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
        !          3577:     if( inPhrase ){
        !          3578:       nTerm++;
        !          3579:     }
        !          3580:   }
        !          3581: 
        !          3582:   if( inPhrase && pQuery->nTerms>firstIndex ){
        !          3583:     pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
        !          3584:   }
        !          3585: 
        !          3586:   return pModule->xClose(pCursor);
        !          3587: }
        !          3588: 
        !          3589: /* Parse a query string, yielding a Query object pQuery.
        !          3590: **
        !          3591: ** The calling function will need to queryClear() to clean up
        !          3592: ** the dynamically allocated memory held by pQuery.
        !          3593: */
        !          3594: static int parseQuery(
        !          3595:   fulltext_vtab *v,        /* The fulltext index */
        !          3596:   const char *zInput,      /* Input text of the query string */
        !          3597:   int nInput,              /* Size of the input text */
        !          3598:   int dfltColumn,          /* Default column of the index to match against */
        !          3599:   Query *pQuery            /* Write the parse results here. */
        !          3600: ){
        !          3601:   int iInput, inPhrase = 0;
        !          3602: 
        !          3603:   if( zInput==0 ) nInput = 0;
        !          3604:   if( nInput<0 ) nInput = strlen(zInput);
        !          3605:   pQuery->nTerms = 0;
        !          3606:   pQuery->pTerms = NULL;
        !          3607:   pQuery->nextIsOr = 0;
        !          3608:   pQuery->nextColumn = dfltColumn;
        !          3609:   pQuery->dfltColumn = dfltColumn;
        !          3610:   pQuery->pFts = v;
        !          3611: 
        !          3612:   for(iInput=0; iInput<nInput; ++iInput){
        !          3613:     int i;
        !          3614:     for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
        !          3615:     if( i>iInput ){
        !          3616:       tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
        !          3617:                        pQuery);
        !          3618:     }
        !          3619:     iInput = i;
        !          3620:     if( i<nInput ){
        !          3621:       assert( zInput[i]=='"' );
        !          3622:       inPhrase = !inPhrase;
        !          3623:     }
        !          3624:   }
        !          3625: 
        !          3626:   if( inPhrase ){
        !          3627:     /* unmatched quote */
        !          3628:     queryClear(pQuery);
        !          3629:     return SQLITE_ERROR;
        !          3630:   }
        !          3631:   return SQLITE_OK;
        !          3632: }
        !          3633: 
        !          3634: /* TODO(shess) Refactor the code to remove this forward decl. */
        !          3635: static int flushPendingTerms(fulltext_vtab *v);
        !          3636: 
        !          3637: /* Perform a full-text query using the search expression in
        !          3638: ** zInput[0..nInput-1].  Return a list of matching documents
        !          3639: ** in pResult.
        !          3640: **
        !          3641: ** Queries must match column iColumn.  Or if iColumn>=nColumn
        !          3642: ** they are allowed to match against any column.
        !          3643: */
        !          3644: static int fulltextQuery(
        !          3645:   fulltext_vtab *v,      /* The full text index */
        !          3646:   int iColumn,           /* Match against this column by default */
        !          3647:   const char *zInput,    /* The query string */
        !          3648:   int nInput,            /* Number of bytes in zInput[] */
        !          3649:   DataBuffer *pResult,   /* Write the result doclist here */
        !          3650:   Query *pQuery          /* Put parsed query string here */
        !          3651: ){
        !          3652:   int i, iNext, rc;
        !          3653:   DataBuffer left, right, or, new;
        !          3654:   int nNot = 0;
        !          3655:   QueryTerm *aTerm;
        !          3656: 
        !          3657:   /* TODO(shess) Instead of flushing pendingTerms, we could query for
        !          3658:   ** the relevant term and merge the doclist into what we receive from
        !          3659:   ** the database.  Wait and see if this is a common issue, first.
        !          3660:   **
        !          3661:   ** A good reason not to flush is to not generate update-related
        !          3662:   ** error codes from here.
        !          3663:   */
        !          3664: 
        !          3665:   /* Flush any buffered updates before executing the query. */
        !          3666:   rc = flushPendingTerms(v);
        !          3667:   if( rc!=SQLITE_OK ) return rc;
        !          3668: 
        !          3669:   /* TODO(shess) I think that the queryClear() calls below are not
        !          3670:   ** necessary, because fulltextClose() already clears the query.
        !          3671:   */
        !          3672:   rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
        !          3673:   if( rc!=SQLITE_OK ) return rc;
        !          3674: 
        !          3675:   /* Empty or NULL queries return no results. */
        !          3676:   if( pQuery->nTerms==0 ){
        !          3677:     dataBufferInit(pResult, 0);
        !          3678:     return SQLITE_OK;
        !          3679:   }
        !          3680: 
        !          3681:   /* Merge AND terms. */
        !          3682:   /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */
        !          3683:   aTerm = pQuery->pTerms;
        !          3684:   for(i = 0; i<pQuery->nTerms; i=iNext){
        !          3685:     if( aTerm[i].isNot ){
        !          3686:       /* Handle all NOT terms in a separate pass */
        !          3687:       nNot++;
        !          3688:       iNext = i + aTerm[i].nPhrase+1;
        !          3689:       continue;
        !          3690:     }
        !          3691:     iNext = i + aTerm[i].nPhrase + 1;
        !          3692:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
        !          3693:     if( rc ){
        !          3694:       if( i!=nNot ) dataBufferDestroy(&left);
        !          3695:       queryClear(pQuery);
        !          3696:       return rc;
        !          3697:     }
        !          3698:     while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
        !          3699:       rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
        !          3700:       iNext += aTerm[iNext].nPhrase + 1;
        !          3701:       if( rc ){
        !          3702:         if( i!=nNot ) dataBufferDestroy(&left);
        !          3703:         dataBufferDestroy(&right);
        !          3704:         queryClear(pQuery);
        !          3705:         return rc;
        !          3706:       }
        !          3707:       dataBufferInit(&new, 0);
        !          3708:       docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
        !          3709:       dataBufferDestroy(&right);
        !          3710:       dataBufferDestroy(&or);
        !          3711:       right = new;
        !          3712:     }
        !          3713:     if( i==nNot ){           /* first term processed. */
        !          3714:       left = right;
        !          3715:     }else{
        !          3716:       dataBufferInit(&new, 0);
        !          3717:       docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
        !          3718:       dataBufferDestroy(&right);
        !          3719:       dataBufferDestroy(&left);
        !          3720:       left = new;
        !          3721:     }
        !          3722:   }
        !          3723: 
        !          3724:   if( nNot==pQuery->nTerms ){
        !          3725:     /* We do not yet know how to handle a query of only NOT terms */
        !          3726:     return SQLITE_ERROR;
        !          3727:   }
        !          3728: 
        !          3729:   /* Do the EXCEPT terms */
        !          3730:   for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
        !          3731:     if( !aTerm[i].isNot ) continue;
        !          3732:     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
        !          3733:     if( rc ){
        !          3734:       queryClear(pQuery);
        !          3735:       dataBufferDestroy(&left);
        !          3736:       return rc;
        !          3737:     }
        !          3738:     dataBufferInit(&new, 0);
        !          3739:     docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
        !          3740:     dataBufferDestroy(&right);
        !          3741:     dataBufferDestroy(&left);
        !          3742:     left = new;
        !          3743:   }
        !          3744: 
        !          3745:   *pResult = left;
        !          3746:   return rc;
        !          3747: }
        !          3748: 
        !          3749: /*
        !          3750: ** This is the xFilter interface for the virtual table.  See
        !          3751: ** the virtual table xFilter method documentation for additional
        !          3752: ** information.
        !          3753: **
        !          3754: ** If idxNum==QUERY_GENERIC then do a full table scan against
        !          3755: ** the %_content table.
        !          3756: **
        !          3757: ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
        !          3758: ** in the %_content table.
        !          3759: **
        !          3760: ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
        !          3761: ** column on the left-hand side of the MATCH operator is column
        !          3762: ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
        !          3763: ** side of the MATCH operator.
        !          3764: */
        !          3765: /* TODO(shess) Upgrade the cursor initialization and destruction to
        !          3766: ** account for fulltextFilter() being called multiple times on the
        !          3767: ** same cursor.  The current solution is very fragile.  Apply fix to
        !          3768: ** fts2 as appropriate.
        !          3769: */
        !          3770: static int fulltextFilter(
        !          3771:   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
        !          3772:   int idxNum, const char *idxStr,   /* Which indexing scheme to use */
        !          3773:   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
        !          3774: ){
        !          3775:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3776:   fulltext_vtab *v = cursor_vtab(c);
        !          3777:   int rc;
        !          3778: 
        !          3779:   TRACE(("FTS2 Filter %p\n",pCursor));
        !          3780: 
        !          3781:   /* If the cursor has a statement that was not prepared according to
        !          3782:   ** idxNum, clear it.  I believe all calls to fulltextFilter with a
        !          3783:   ** given cursor will have the same idxNum , but in this case it's
        !          3784:   ** easy to be safe.
        !          3785:   */
        !          3786:   if( c->pStmt && c->iCursorType!=idxNum ){
        !          3787:     sqlite3_finalize(c->pStmt);
        !          3788:     c->pStmt = NULL;
        !          3789:   }
        !          3790: 
        !          3791:   /* Get a fresh statement appropriate to idxNum. */
        !          3792:   /* TODO(shess): Add a prepared-statement cache in the vt structure.
        !          3793:   ** The cache must handle multiple open cursors.  Easier to cache the
        !          3794:   ** statement variants at the vt to reduce malloc/realloc/free here.
        !          3795:   ** Or we could have a StringBuffer variant which allowed stack
        !          3796:   ** construction for small values.
        !          3797:   */
        !          3798:   if( !c->pStmt ){
        !          3799:     char *zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
        !          3800:                                  idxNum==QUERY_GENERIC ? "" : "where rowid=?");
        !          3801:     rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
        !          3802:     sqlite3_free(zSql);
        !          3803:     if( rc!=SQLITE_OK ) return rc;
        !          3804:     c->iCursorType = idxNum;
        !          3805:   }else{
        !          3806:     sqlite3_reset(c->pStmt);
        !          3807:     assert( c->iCursorType==idxNum );
        !          3808:   }
        !          3809: 
        !          3810:   switch( idxNum ){
        !          3811:     case QUERY_GENERIC:
        !          3812:       break;
        !          3813: 
        !          3814:     case QUERY_ROWID:
        !          3815:       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
        !          3816:       if( rc!=SQLITE_OK ) return rc;
        !          3817:       break;
        !          3818: 
        !          3819:     default:   /* full-text search */
        !          3820:     {
        !          3821:       const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
        !          3822:       assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
        !          3823:       assert( argc==1 );
        !          3824:       queryClear(&c->q);
        !          3825:       if( c->result.nData!=0 ){
        !          3826:         /* This case happens if the same cursor is used repeatedly. */
        !          3827:         dlrDestroy(&c->reader);
        !          3828:         dataBufferReset(&c->result);
        !          3829:       }else{
        !          3830:         dataBufferInit(&c->result, 0);
        !          3831:       }
        !          3832:       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
        !          3833:       if( rc!=SQLITE_OK ) return rc;
        !          3834:       if( c->result.nData!=0 ){
        !          3835:         dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
        !          3836:       }
        !          3837:       break;
        !          3838:     }
        !          3839:   }
        !          3840: 
        !          3841:   return fulltextNext(pCursor);
        !          3842: }
        !          3843: 
        !          3844: /* This is the xEof method of the virtual table.  The SQLite core
        !          3845: ** calls this routine to find out if it has reached the end of
        !          3846: ** a query's results set.
        !          3847: */
        !          3848: static int fulltextEof(sqlite3_vtab_cursor *pCursor){
        !          3849:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3850:   return c->eof;
        !          3851: }
        !          3852: 
        !          3853: /* This is the xColumn method of the virtual table.  The SQLite
        !          3854: ** core calls this method during a query when it needs the value
        !          3855: ** of a column from the virtual table.  This method needs to use
        !          3856: ** one of the sqlite3_result_*() routines to store the requested
        !          3857: ** value back in the pContext.
        !          3858: */
        !          3859: static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
        !          3860:                           sqlite3_context *pContext, int idxCol){
        !          3861:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3862:   fulltext_vtab *v = cursor_vtab(c);
        !          3863: 
        !          3864:   if( idxCol<v->nColumn ){
        !          3865:     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
        !          3866:     sqlite3_result_value(pContext, pVal);
        !          3867:   }else if( idxCol==v->nColumn ){
        !          3868:     /* The extra column whose name is the same as the table.
        !          3869:     ** Return a blob which is a pointer to the cursor
        !          3870:     */
        !          3871:     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
        !          3872:   }
        !          3873:   return SQLITE_OK;
        !          3874: }
        !          3875: 
        !          3876: /* This is the xRowid method.  The SQLite core calls this routine to
        !          3877: ** retrive the rowid for the current row of the result set.  The
        !          3878: ** rowid should be written to *pRowid.
        !          3879: */
        !          3880: static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
        !          3881:   fulltext_cursor *c = (fulltext_cursor *) pCursor;
        !          3882: 
        !          3883:   *pRowid = sqlite3_column_int64(c->pStmt, 0);
        !          3884:   return SQLITE_OK;
        !          3885: }
        !          3886: 
        !          3887: /* Add all terms in [zText] to pendingTerms table.  If [iColumn] > 0,
        !          3888: ** we also store positions and offsets in the hash table using that
        !          3889: ** column number.
        !          3890: */
        !          3891: static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid,
        !          3892:                       const char *zText, int iColumn){
        !          3893:   sqlite3_tokenizer *pTokenizer = v->pTokenizer;
        !          3894:   sqlite3_tokenizer_cursor *pCursor;
        !          3895:   const char *pToken;
        !          3896:   int nTokenBytes;
        !          3897:   int iStartOffset, iEndOffset, iPosition;
        !          3898:   int rc;
        !          3899: 
        !          3900:   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
        !          3901:   if( rc!=SQLITE_OK ) return rc;
        !          3902: 
        !          3903:   pCursor->pTokenizer = pTokenizer;
        !          3904:   while( SQLITE_OK==(rc=pTokenizer->pModule->xNext(pCursor,
        !          3905:                                                    &pToken, &nTokenBytes,
        !          3906:                                                    &iStartOffset, &iEndOffset,
        !          3907:                                                    &iPosition)) ){
        !          3908:     DLCollector *p;
        !          3909:     int nData;                   /* Size of doclist before our update. */
        !          3910: 
        !          3911:     /* Positions can't be negative; we use -1 as a terminator
        !          3912:      * internally.  Token can't be NULL or empty. */
        !          3913:     if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){
        !          3914:       rc = SQLITE_ERROR;
        !          3915:       break;
        !          3916:     }
        !          3917: 
        !          3918:     p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes);
        !          3919:     if( p==NULL ){
        !          3920:       nData = 0;
        !          3921:       p = dlcNew(iDocid, DL_DEFAULT);
        !          3922:       fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p);
        !          3923: 
        !          3924:       /* Overhead for our hash table entry, the key, and the value. */
        !          3925:       v->nPendingData += sizeof(struct fts2HashElem)+sizeof(*p)+nTokenBytes;
        !          3926:     }else{
        !          3927:       nData = p->b.nData;
        !          3928:       if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid);
        !          3929:     }
        !          3930:     if( iColumn>=0 ){
        !          3931:       dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset);
        !          3932:     }
        !          3933: 
        !          3934:     /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */
        !          3935:     v->nPendingData += p->b.nData-nData;
        !          3936:   }
        !          3937: 
        !          3938:   /* TODO(shess) Check return?  Should this be able to cause errors at
        !          3939:   ** this point?  Actually, same question about sqlite3_finalize(),
        !          3940:   ** though one could argue that failure there means that the data is
        !          3941:   ** not durable.  *ponder*
        !          3942:   */
        !          3943:   pTokenizer->pModule->xClose(pCursor);
        !          3944:   if( SQLITE_DONE == rc ) return SQLITE_OK;
        !          3945:   return rc;
        !          3946: }
        !          3947: 
        !          3948: /* Add doclists for all terms in [pValues] to pendingTerms table. */
        !          3949: static int insertTerms(fulltext_vtab *v, sqlite_int64 iRowid,
        !          3950:                        sqlite3_value **pValues){
        !          3951:   int i;
        !          3952:   for(i = 0; i < v->nColumn ; ++i){
        !          3953:     char *zText = (char*)sqlite3_value_text(pValues[i]);
        !          3954:     int rc = buildTerms(v, iRowid, zText, i);
        !          3955:     if( rc!=SQLITE_OK ) return rc;
        !          3956:   }
        !          3957:   return SQLITE_OK;
        !          3958: }
        !          3959: 
        !          3960: /* Add empty doclists for all terms in the given row's content to
        !          3961: ** pendingTerms.
        !          3962: */
        !          3963: static int deleteTerms(fulltext_vtab *v, sqlite_int64 iRowid){
        !          3964:   const char **pValues;
        !          3965:   int i, rc;
        !          3966: 
        !          3967:   /* TODO(shess) Should we allow such tables at all? */
        !          3968:   if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR;
        !          3969: 
        !          3970:   rc = content_select(v, iRowid, &pValues);
        !          3971:   if( rc!=SQLITE_OK ) return rc;
        !          3972: 
        !          3973:   for(i = 0 ; i < v->nColumn; ++i) {
        !          3974:     rc = buildTerms(v, iRowid, pValues[i], -1);
        !          3975:     if( rc!=SQLITE_OK ) break;
        !          3976:   }
        !          3977: 
        !          3978:   freeStringArray(v->nColumn, pValues);
        !          3979:   return SQLITE_OK;
        !          3980: }
        !          3981: 
        !          3982: /* TODO(shess) Refactor the code to remove this forward decl. */
        !          3983: static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid);
        !          3984: 
        !          3985: /* Insert a row into the %_content table; set *piRowid to be the ID of the
        !          3986: ** new row.  Add doclists for terms to pendingTerms.
        !          3987: */
        !          3988: static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
        !          3989:                         sqlite3_value **pValues, sqlite_int64 *piRowid){
        !          3990:   int rc;
        !          3991: 
        !          3992:   rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
        !          3993:   if( rc!=SQLITE_OK ) return rc;
        !          3994: 
        !          3995:   *piRowid = sqlite3_last_insert_rowid(v->db);
        !          3996:   rc = initPendingTerms(v, *piRowid);
        !          3997:   if( rc!=SQLITE_OK ) return rc;
        !          3998: 
        !          3999:   return insertTerms(v, *piRowid, pValues);
        !          4000: }
        !          4001: 
        !          4002: /* Delete a row from the %_content table; add empty doclists for terms
        !          4003: ** to pendingTerms.
        !          4004: */
        !          4005: static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
        !          4006:   int rc = initPendingTerms(v, iRow);
        !          4007:   if( rc!=SQLITE_OK ) return rc;
        !          4008: 
        !          4009:   rc = deleteTerms(v, iRow);
        !          4010:   if( rc!=SQLITE_OK ) return rc;
        !          4011: 
        !          4012:   return content_delete(v, iRow);  /* execute an SQL DELETE */
        !          4013: }
        !          4014: 
        !          4015: /* Update a row in the %_content table; add delete doclists to
        !          4016: ** pendingTerms for old terms not in the new data, add insert doclists
        !          4017: ** to pendingTerms for terms in the new data.
        !          4018: */
        !          4019: static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
        !          4020:                         sqlite3_value **pValues){
        !          4021:   int rc = initPendingTerms(v, iRow);
        !          4022:   if( rc!=SQLITE_OK ) return rc;
        !          4023: 
        !          4024:   /* Generate an empty doclist for each term that previously appeared in this
        !          4025:    * row. */
        !          4026:   rc = deleteTerms(v, iRow);
        !          4027:   if( rc!=SQLITE_OK ) return rc;
        !          4028: 
        !          4029:   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
        !          4030:   if( rc!=SQLITE_OK ) return rc;
        !          4031: 
        !          4032:   /* Now add positions for terms which appear in the updated row. */
        !          4033:   return insertTerms(v, iRow, pValues);
        !          4034: }
        !          4035: 
        !          4036: /*******************************************************************/
        !          4037: /* InteriorWriter is used to collect terms and block references into
        !          4038: ** interior nodes in %_segments.  See commentary at top of file for
        !          4039: ** format.
        !          4040: */
        !          4041: 
        !          4042: /* How large interior nodes can grow. */
        !          4043: #define INTERIOR_MAX 2048
        !          4044: 
        !          4045: /* Minimum number of terms per interior node (except the root). This
        !          4046: ** prevents large terms from making the tree too skinny - must be >0
        !          4047: ** so that the tree always makes progress.  Note that the min tree
        !          4048: ** fanout will be INTERIOR_MIN_TERMS+1.
        !          4049: */
        !          4050: #define INTERIOR_MIN_TERMS 7
        !          4051: #if INTERIOR_MIN_TERMS<1
        !          4052: # error INTERIOR_MIN_TERMS must be greater than 0.
        !          4053: #endif
        !          4054: 
        !          4055: /* ROOT_MAX controls how much data is stored inline in the segment
        !          4056: ** directory.
        !          4057: */
        !          4058: /* TODO(shess) Push ROOT_MAX down to whoever is writing things.  It's
        !          4059: ** only here so that interiorWriterRootInfo() and leafWriterRootInfo()
        !          4060: ** can both see it, but if the caller passed it in, we wouldn't even
        !          4061: ** need a define.
        !          4062: */
        !          4063: #define ROOT_MAX 1024
        !          4064: #if ROOT_MAX<VARINT_MAX*2
        !          4065: # error ROOT_MAX must have enough space for a header.
        !          4066: #endif
        !          4067: 
        !          4068: /* InteriorBlock stores a linked-list of interior blocks while a lower
        !          4069: ** layer is being constructed.
        !          4070: */
        !          4071: typedef struct InteriorBlock {
        !          4072:   DataBuffer term;           /* Leftmost term in block's subtree. */
        !          4073:   DataBuffer data;           /* Accumulated data for the block. */
        !          4074:   struct InteriorBlock *next;
        !          4075: } InteriorBlock;
        !          4076: 
        !          4077: static InteriorBlock *interiorBlockNew(int iHeight, sqlite_int64 iChildBlock,
        !          4078:                                        const char *pTerm, int nTerm){
        !          4079:   InteriorBlock *block = sqlite3_malloc(sizeof(InteriorBlock));
        !          4080:   char c[VARINT_MAX+VARINT_MAX];
        !          4081:   int n;
        !          4082: 
        !          4083:   if( block ){
        !          4084:     memset(block, 0, sizeof(*block));
        !          4085:     dataBufferInit(&block->term, 0);
        !          4086:     dataBufferReplace(&block->term, pTerm, nTerm);
        !          4087: 
        !          4088:     n = putVarint(c, iHeight);
        !          4089:     n += putVarint(c+n, iChildBlock);
        !          4090:     dataBufferInit(&block->data, INTERIOR_MAX);
        !          4091:     dataBufferReplace(&block->data, c, n);
        !          4092:   }
        !          4093:   return block;
        !          4094: }
        !          4095: 
        !          4096: #ifndef NDEBUG
        !          4097: /* Verify that the data is readable as an interior node. */
        !          4098: static void interiorBlockValidate(InteriorBlock *pBlock){
        !          4099:   const char *pData = pBlock->data.pData;
        !          4100:   int nData = pBlock->data.nData;
        !          4101:   int n, iDummy;
        !          4102:   sqlite_int64 iBlockid;
        !          4103: 
        !          4104:   assert( nData>0 );
        !          4105:   assert( pData!=0 );
        !          4106:   assert( pData+nData>pData );
        !          4107: 
        !          4108:   /* Must lead with height of node as a varint(n), n>0 */
        !          4109:   n = getVarint32(pData, &iDummy);
        !          4110:   assert( n>0 );
        !          4111:   assert( iDummy>0 );
        !          4112:   assert( n<nData );
        !          4113:   pData += n;
        !          4114:   nData -= n;
        !          4115: 
        !          4116:   /* Must contain iBlockid. */
        !          4117:   n = getVarint(pData, &iBlockid);
        !          4118:   assert( n>0 );
        !          4119:   assert( n<=nData );
        !          4120:   pData += n;
        !          4121:   nData -= n;
        !          4122: 
        !          4123:   /* Zero or more terms of positive length */
        !          4124:   if( nData!=0 ){
        !          4125:     /* First term is not delta-encoded. */
        !          4126:     n = getVarint32(pData, &iDummy);
        !          4127:     assert( n>0 );
        !          4128:     assert( iDummy>0 );
        !          4129:     assert( n+iDummy>0);
        !          4130:     assert( n+iDummy<=nData );
        !          4131:     pData += n+iDummy;
        !          4132:     nData -= n+iDummy;
        !          4133: 
        !          4134:     /* Following terms delta-encoded. */
        !          4135:     while( nData!=0 ){
        !          4136:       /* Length of shared prefix. */
        !          4137:       n = getVarint32(pData, &iDummy);
        !          4138:       assert( n>0 );
        !          4139:       assert( iDummy>=0 );
        !          4140:       assert( n<nData );
        !          4141:       pData += n;
        !          4142:       nData -= n;
        !          4143: 
        !          4144:       /* Length and data of distinct suffix. */
        !          4145:       n = getVarint32(pData, &iDummy);
        !          4146:       assert( n>0 );
        !          4147:       assert( iDummy>0 );
        !          4148:       assert( n+iDummy>0);
        !          4149:       assert( n+iDummy<=nData );
        !          4150:       pData += n+iDummy;
        !          4151:       nData -= n+iDummy;
        !          4152:     }
        !          4153:   }
        !          4154: }
        !          4155: #define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x)
        !          4156: #else
        !          4157: #define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 )
        !          4158: #endif
        !          4159: 
        !          4160: typedef struct InteriorWriter {
        !          4161:   int iHeight;                   /* from 0 at leaves. */
        !          4162:   InteriorBlock *first, *last;
        !          4163:   struct InteriorWriter *parentWriter;
        !          4164: 
        !          4165:   DataBuffer term;               /* Last term written to block "last". */
        !          4166:   sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */
        !          4167: #ifndef NDEBUG
        !          4168:   sqlite_int64 iLastChildBlock;  /* for consistency checks. */
        !          4169: #endif
        !          4170: } InteriorWriter;
        !          4171: 
        !          4172: /* Initialize an interior node where pTerm[nTerm] marks the leftmost
        !          4173: ** term in the tree.  iChildBlock is the leftmost child block at the
        !          4174: ** next level down the tree.
        !          4175: */
        !          4176: static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm,
        !          4177:                                sqlite_int64 iChildBlock,
        !          4178:                                InteriorWriter *pWriter){
        !          4179:   InteriorBlock *block;
        !          4180:   assert( iHeight>0 );
        !          4181:   CLEAR(pWriter);
        !          4182: 
        !          4183:   pWriter->iHeight = iHeight;
        !          4184:   pWriter->iOpeningChildBlock = iChildBlock;
        !          4185: #ifndef NDEBUG
        !          4186:   pWriter->iLastChildBlock = iChildBlock;
        !          4187: #endif
        !          4188:   block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm);
        !          4189:   pWriter->last = pWriter->first = block;
        !          4190:   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
        !          4191:   dataBufferInit(&pWriter->term, 0);
        !          4192: }
        !          4193: 
        !          4194: /* Append the child node rooted at iChildBlock to the interior node,
        !          4195: ** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree.
        !          4196: */
        !          4197: static void interiorWriterAppend(InteriorWriter *pWriter,
        !          4198:                                  const char *pTerm, int nTerm,
        !          4199:                                  sqlite_int64 iChildBlock){
        !          4200:   char c[VARINT_MAX+VARINT_MAX];
        !          4201:   int n, nPrefix = 0;
        !          4202: 
        !          4203:   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
        !          4204: 
        !          4205:   /* The first term written into an interior node is actually
        !          4206:   ** associated with the second child added (the first child was added
        !          4207:   ** in interiorWriterInit, or in the if clause at the bottom of this
        !          4208:   ** function).  That term gets encoded straight up, with nPrefix left
        !          4209:   ** at 0.
        !          4210:   */
        !          4211:   if( pWriter->term.nData==0 ){
        !          4212:     n = putVarint(c, nTerm);
        !          4213:   }else{
        !          4214:     while( nPrefix<pWriter->term.nData &&
        !          4215:            pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
        !          4216:       nPrefix++;
        !          4217:     }
        !          4218: 
        !          4219:     n = putVarint(c, nPrefix);
        !          4220:     n += putVarint(c+n, nTerm-nPrefix);
        !          4221:   }
        !          4222: 
        !          4223: #ifndef NDEBUG
        !          4224:   pWriter->iLastChildBlock++;
        !          4225: #endif
        !          4226:   assert( pWriter->iLastChildBlock==iChildBlock );
        !          4227: 
        !          4228:   /* Overflow to a new block if the new term makes the current block
        !          4229:   ** too big, and the current block already has enough terms.
        !          4230:   */
        !          4231:   if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX &&
        !          4232:       iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){
        !          4233:     pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock,
        !          4234:                                            pTerm, nTerm);
        !          4235:     pWriter->last = pWriter->last->next;
        !          4236:     pWriter->iOpeningChildBlock = iChildBlock;
        !          4237:     dataBufferReset(&pWriter->term);
        !          4238:   }else{
        !          4239:     dataBufferAppend2(&pWriter->last->data, c, n,
        !          4240:                       pTerm+nPrefix, nTerm-nPrefix);
        !          4241:     dataBufferReplace(&pWriter->term, pTerm, nTerm);
        !          4242:   }
        !          4243:   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last);
        !          4244: }
        !          4245: 
        !          4246: /* Free the space used by pWriter, including the linked-list of
        !          4247: ** InteriorBlocks, and parentWriter, if present.
        !          4248: */
        !          4249: static int interiorWriterDestroy(InteriorWriter *pWriter){
        !          4250:   InteriorBlock *block = pWriter->first;
        !          4251: 
        !          4252:   while( block!=NULL ){
        !          4253:     InteriorBlock *b = block;
        !          4254:     block = block->next;
        !          4255:     dataBufferDestroy(&b->term);
        !          4256:     dataBufferDestroy(&b->data);
        !          4257:     sqlite3_free(b);
        !          4258:   }
        !          4259:   if( pWriter->parentWriter!=NULL ){
        !          4260:     interiorWriterDestroy(pWriter->parentWriter);
        !          4261:     sqlite3_free(pWriter->parentWriter);
        !          4262:   }
        !          4263:   dataBufferDestroy(&pWriter->term);
        !          4264:   SCRAMBLE(pWriter);
        !          4265:   return SQLITE_OK;
        !          4266: }
        !          4267: 
        !          4268: /* If pWriter can fit entirely in ROOT_MAX, return it as the root info
        !          4269: ** directly, leaving *piEndBlockid unchanged.  Otherwise, flush
        !          4270: ** pWriter to %_segments, building a new layer of interior nodes, and
        !          4271: ** recursively ask for their root into.
        !          4272: */
        !          4273: static int interiorWriterRootInfo(fulltext_vtab *v, InteriorWriter *pWriter,
        !          4274:                                   char **ppRootInfo, int *pnRootInfo,
        !          4275:                                   sqlite_int64 *piEndBlockid){
        !          4276:   InteriorBlock *block = pWriter->first;
        !          4277:   sqlite_int64 iBlockid = 0;
        !          4278:   int rc;
        !          4279: 
        !          4280:   /* If we can fit the segment inline */
        !          4281:   if( block==pWriter->last && block->data.nData<ROOT_MAX ){
        !          4282:     *ppRootInfo = block->data.pData;
        !          4283:     *pnRootInfo = block->data.nData;
        !          4284:     return SQLITE_OK;
        !          4285:   }
        !          4286: 
        !          4287:   /* Flush the first block to %_segments, and create a new level of
        !          4288:   ** interior node.
        !          4289:   */
        !          4290:   ASSERT_VALID_INTERIOR_BLOCK(block);
        !          4291:   rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
        !          4292:   if( rc!=SQLITE_OK ) return rc;
        !          4293:   *piEndBlockid = iBlockid;
        !          4294: 
        !          4295:   pWriter->parentWriter = sqlite3_malloc(sizeof(*pWriter->parentWriter));
        !          4296:   interiorWriterInit(pWriter->iHeight+1,
        !          4297:                      block->term.pData, block->term.nData,
        !          4298:                      iBlockid, pWriter->parentWriter);
        !          4299: 
        !          4300:   /* Flush additional blocks and append to the higher interior
        !          4301:   ** node.
        !          4302:   */
        !          4303:   for(block=block->next; block!=NULL; block=block->next){
        !          4304:     ASSERT_VALID_INTERIOR_BLOCK(block);
        !          4305:     rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid);
        !          4306:     if( rc!=SQLITE_OK ) return rc;
        !          4307:     *piEndBlockid = iBlockid;
        !          4308: 
        !          4309:     interiorWriterAppend(pWriter->parentWriter,
        !          4310:                          block->term.pData, block->term.nData, iBlockid);
        !          4311:   }
        !          4312: 
        !          4313:   /* Parent node gets the chance to be the root. */
        !          4314:   return interiorWriterRootInfo(v, pWriter->parentWriter,
        !          4315:                                 ppRootInfo, pnRootInfo, piEndBlockid);
        !          4316: }
        !          4317: 
        !          4318: /****************************************************************/
        !          4319: /* InteriorReader is used to read off the data from an interior node
        !          4320: ** (see comment at top of file for the format).
        !          4321: */
        !          4322: typedef struct InteriorReader {
        !          4323:   const char *pData;
        !          4324:   int nData;
        !          4325: 
        !          4326:   DataBuffer term;          /* previous term, for decoding term delta. */
        !          4327: 
        !          4328:   sqlite_int64 iBlockid;
        !          4329: } InteriorReader;
        !          4330: 
        !          4331: static void interiorReaderDestroy(InteriorReader *pReader){
        !          4332:   dataBufferDestroy(&pReader->term);
        !          4333:   SCRAMBLE(pReader);
        !          4334: }
        !          4335: 
        !          4336: /* TODO(shess) The assertions are great, but what if we're in NDEBUG
        !          4337: ** and the blob is empty or otherwise contains suspect data?
        !          4338: */
        !          4339: static void interiorReaderInit(const char *pData, int nData,
        !          4340:                                InteriorReader *pReader){
        !          4341:   int n, nTerm;
        !          4342: 
        !          4343:   /* Require at least the leading flag byte */
        !          4344:   assert( nData>0 );
        !          4345:   assert( pData[0]!='\0' );
        !          4346: 
        !          4347:   CLEAR(pReader);
        !          4348: 
        !          4349:   /* Decode the base blockid, and set the cursor to the first term. */
        !          4350:   n = getVarint(pData+1, &pReader->iBlockid);
        !          4351:   assert( 1+n<=nData );
        !          4352:   pReader->pData = pData+1+n;
        !          4353:   pReader->nData = nData-(1+n);
        !          4354: 
        !          4355:   /* A single-child interior node (such as when a leaf node was too
        !          4356:   ** large for the segment directory) won't have any terms.
        !          4357:   ** Otherwise, decode the first term.
        !          4358:   */
        !          4359:   if( pReader->nData==0 ){
        !          4360:     dataBufferInit(&pReader->term, 0);
        !          4361:   }else{
        !          4362:     n = getVarint32(pReader->pData, &nTerm);
        !          4363:     dataBufferInit(&pReader->term, nTerm);
        !          4364:     dataBufferReplace(&pReader->term, pReader->pData+n, nTerm);
        !          4365:     assert( n+nTerm<=pReader->nData );
        !          4366:     pReader->pData += n+nTerm;
        !          4367:     pReader->nData -= n+nTerm;
        !          4368:   }
        !          4369: }
        !          4370: 
        !          4371: static int interiorReaderAtEnd(InteriorReader *pReader){
        !          4372:   return pReader->term.nData==0;
        !          4373: }
        !          4374: 
        !          4375: static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){
        !          4376:   return pReader->iBlockid;
        !          4377: }
        !          4378: 
        !          4379: static int interiorReaderTermBytes(InteriorReader *pReader){
        !          4380:   assert( !interiorReaderAtEnd(pReader) );
        !          4381:   return pReader->term.nData;
        !          4382: }
        !          4383: static const char *interiorReaderTerm(InteriorReader *pReader){
        !          4384:   assert( !interiorReaderAtEnd(pReader) );
        !          4385:   return pReader->term.pData;
        !          4386: }
        !          4387: 
        !          4388: /* Step forward to the next term in the node. */
        !          4389: static void interiorReaderStep(InteriorReader *pReader){
        !          4390:   assert( !interiorReaderAtEnd(pReader) );
        !          4391: 
        !          4392:   /* If the last term has been read, signal eof, else construct the
        !          4393:   ** next term.
        !          4394:   */
        !          4395:   if( pReader->nData==0 ){
        !          4396:     dataBufferReset(&pReader->term);
        !          4397:   }else{
        !          4398:     int n, nPrefix, nSuffix;
        !          4399: 
        !          4400:     n = getVarint32(pReader->pData, &nPrefix);
        !          4401:     n += getVarint32(pReader->pData+n, &nSuffix);
        !          4402: 
        !          4403:     /* Truncate the current term and append suffix data. */
        !          4404:     pReader->term.nData = nPrefix;
        !          4405:     dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
        !          4406: 
        !          4407:     assert( n+nSuffix<=pReader->nData );
        !          4408:     pReader->pData += n+nSuffix;
        !          4409:     pReader->nData -= n+nSuffix;
        !          4410:   }
        !          4411:   pReader->iBlockid++;
        !          4412: }
        !          4413: 
        !          4414: /* Compare the current term to pTerm[nTerm], returning strcmp-style
        !          4415: ** results.  If isPrefix, equality means equal through nTerm bytes.
        !          4416: */
        !          4417: static int interiorReaderTermCmp(InteriorReader *pReader,
        !          4418:                                  const char *pTerm, int nTerm, int isPrefix){
        !          4419:   const char *pReaderTerm = interiorReaderTerm(pReader);
        !          4420:   int nReaderTerm = interiorReaderTermBytes(pReader);
        !          4421:   int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;
        !          4422: 
        !          4423:   if( n==0 ){
        !          4424:     if( nReaderTerm>0 ) return -1;
        !          4425:     if( nTerm>0 ) return 1;
        !          4426:     return 0;
        !          4427:   }
        !          4428: 
        !          4429:   c = memcmp(pReaderTerm, pTerm, n);
        !          4430:   if( c!=0 ) return c;
        !          4431:   if( isPrefix && n==nTerm ) return 0;
        !          4432:   return nReaderTerm - nTerm;
        !          4433: }
        !          4434: 
        !          4435: /****************************************************************/
        !          4436: /* LeafWriter is used to collect terms and associated doclist data
        !          4437: ** into leaf blocks in %_segments (see top of file for format info).
        !          4438: ** Expected usage is:
        !          4439: **
        !          4440: ** LeafWriter writer;
        !          4441: ** leafWriterInit(0, 0, &writer);
        !          4442: ** while( sorted_terms_left_to_process ){
        !          4443: **   // data is doclist data for that term.
        !          4444: **   rc = leafWriterStep(v, &writer, pTerm, nTerm, pData, nData);
        !          4445: **   if( rc!=SQLITE_OK ) goto err;
        !          4446: ** }
        !          4447: ** rc = leafWriterFinalize(v, &writer);
        !          4448: **err:
        !          4449: ** leafWriterDestroy(&writer);
        !          4450: ** return rc;
        !          4451: **
        !          4452: ** leafWriterStep() may write a collected leaf out to %_segments.
        !          4453: ** leafWriterFinalize() finishes writing any buffered data and stores
        !          4454: ** a root node in %_segdir.  leafWriterDestroy() frees all buffers and
        !          4455: ** InteriorWriters allocated as part of writing this segment.
        !          4456: **
        !          4457: ** TODO(shess) Document leafWriterStepMerge().
        !          4458: */
        !          4459: 
        !          4460: /* Put terms with data this big in their own block. */
        !          4461: #define STANDALONE_MIN 1024
        !          4462: 
        !          4463: /* Keep leaf blocks below this size. */
        !          4464: #define LEAF_MAX 2048
        !          4465: 
        !          4466: typedef struct LeafWriter {
        !          4467:   int iLevel;
        !          4468:   int idx;
        !          4469:   sqlite_int64 iStartBlockid;     /* needed to create the root info */
        !          4470:   sqlite_int64 iEndBlockid;       /* when we're done writing. */
        !          4471: 
        !          4472:   DataBuffer term;                /* previous encoded term */
        !          4473:   DataBuffer data;                /* encoding buffer */
        !          4474: 
        !          4475:   /* bytes of first term in the current node which distinguishes that
        !          4476:   ** term from the last term of the previous node.
        !          4477:   */
        !          4478:   int nTermDistinct;
        !          4479: 
        !          4480:   InteriorWriter parentWriter;    /* if we overflow */
        !          4481:   int has_parent;
        !          4482: } LeafWriter;
        !          4483: 
        !          4484: static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){
        !          4485:   CLEAR(pWriter);
        !          4486:   pWriter->iLevel = iLevel;
        !          4487:   pWriter->idx = idx;
        !          4488: 
        !          4489:   dataBufferInit(&pWriter->term, 32);
        !          4490: 
        !          4491:   /* Start out with a reasonably sized block, though it can grow. */
        !          4492:   dataBufferInit(&pWriter->data, LEAF_MAX);
        !          4493: }
        !          4494: 
        !          4495: #ifndef NDEBUG
        !          4496: /* Verify that the data is readable as a leaf node. */
        !          4497: static void leafNodeValidate(const char *pData, int nData){
        !          4498:   int n, iDummy;
        !          4499: 
        !          4500:   if( nData==0 ) return;
        !          4501:   assert( nData>0 );
        !          4502:   assert( pData!=0 );
        !          4503:   assert( pData+nData>pData );
        !          4504: 
        !          4505:   /* Must lead with a varint(0) */
        !          4506:   n = getVarint32(pData, &iDummy);
        !          4507:   assert( iDummy==0 );
        !          4508:   assert( n>0 );
        !          4509:   assert( n<nData );
        !          4510:   pData += n;
        !          4511:   nData -= n;
        !          4512: 
        !          4513:   /* Leading term length and data must fit in buffer. */
        !          4514:   n = getVarint32(pData, &iDummy);
        !          4515:   assert( n>0 );
        !          4516:   assert( iDummy>0 );
        !          4517:   assert( n+iDummy>0 );
        !          4518:   assert( n+iDummy<nData );
        !          4519:   pData += n+iDummy;
        !          4520:   nData -= n+iDummy;
        !          4521: 
        !          4522:   /* Leading term's doclist length and data must fit. */
        !          4523:   n = getVarint32(pData, &iDummy);
        !          4524:   assert( n>0 );
        !          4525:   assert( iDummy>0 );
        !          4526:   assert( n+iDummy>0 );
        !          4527:   assert( n+iDummy<=nData );
        !          4528:   ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
        !          4529:   pData += n+iDummy;
        !          4530:   nData -= n+iDummy;
        !          4531: 
        !          4532:   /* Verify that trailing terms and doclists also are readable. */
        !          4533:   while( nData!=0 ){
        !          4534:     n = getVarint32(pData, &iDummy);
        !          4535:     assert( n>0 );
        !          4536:     assert( iDummy>=0 );
        !          4537:     assert( n<nData );
        !          4538:     pData += n;
        !          4539:     nData -= n;
        !          4540:     n = getVarint32(pData, &iDummy);
        !          4541:     assert( n>0 );
        !          4542:     assert( iDummy>0 );
        !          4543:     assert( n+iDummy>0 );
        !          4544:     assert( n+iDummy<nData );
        !          4545:     pData += n+iDummy;
        !          4546:     nData -= n+iDummy;
        !          4547: 
        !          4548:     n = getVarint32(pData, &iDummy);
        !          4549:     assert( n>0 );
        !          4550:     assert( iDummy>0 );
        !          4551:     assert( n+iDummy>0 );
        !          4552:     assert( n+iDummy<=nData );
        !          4553:     ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL);
        !          4554:     pData += n+iDummy;
        !          4555:     nData -= n+iDummy;
        !          4556:   }
        !          4557: }
        !          4558: #define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n)
        !          4559: #else
        !          4560: #define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 )
        !          4561: #endif
        !          4562: 
        !          4563: /* Flush the current leaf node to %_segments, and adding the resulting
        !          4564: ** blockid and the starting term to the interior node which will
        !          4565: ** contain it.
        !          4566: */
        !          4567: static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
        !          4568:                                    int iData, int nData){
        !          4569:   sqlite_int64 iBlockid = 0;
        !          4570:   const char *pStartingTerm;
        !          4571:   int nStartingTerm, rc, n;
        !          4572: 
        !          4573:   /* Must have the leading varint(0) flag, plus at least some
        !          4574:   ** valid-looking data.
        !          4575:   */
        !          4576:   assert( nData>2 );
        !          4577:   assert( iData>=0 );
        !          4578:   assert( iData+nData<=pWriter->data.nData );
        !          4579:   ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData);
        !          4580: 
        !          4581:   rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid);
        !          4582:   if( rc!=SQLITE_OK ) return rc;
        !          4583:   assert( iBlockid!=0 );
        !          4584: 
        !          4585:   /* Reconstruct the first term in the leaf for purposes of building
        !          4586:   ** the interior node.
        !          4587:   */
        !          4588:   n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
        !          4589:   pStartingTerm = pWriter->data.pData+iData+1+n;
        !          4590:   assert( pWriter->data.nData>iData+1+n+nStartingTerm );
        !          4591:   assert( pWriter->nTermDistinct>0 );
        !          4592:   assert( pWriter->nTermDistinct<=nStartingTerm );
        !          4593:   nStartingTerm = pWriter->nTermDistinct;
        !          4594: 
        !          4595:   if( pWriter->has_parent ){
        !          4596:     interiorWriterAppend(&pWriter->parentWriter,
        !          4597:                          pStartingTerm, nStartingTerm, iBlockid);
        !          4598:   }else{
        !          4599:     interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid,
        !          4600:                        &pWriter->parentWriter);
        !          4601:     pWriter->has_parent = 1;
        !          4602:   }
        !          4603: 
        !          4604:   /* Track the span of this segment's leaf nodes. */
        !          4605:   if( pWriter->iEndBlockid==0 ){
        !          4606:     pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid;
        !          4607:   }else{
        !          4608:     pWriter->iEndBlockid++;
        !          4609:     assert( iBlockid==pWriter->iEndBlockid );
        !          4610:   }
        !          4611: 
        !          4612:   return SQLITE_OK;
        !          4613: }
        !          4614: static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){
        !          4615:   int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData);
        !          4616:   if( rc!=SQLITE_OK ) return rc;
        !          4617: 
        !          4618:   /* Re-initialize the output buffer. */
        !          4619:   dataBufferReset(&pWriter->data);
        !          4620: 
        !          4621:   return SQLITE_OK;
        !          4622: }
        !          4623: 
        !          4624: /* Fetch the root info for the segment.  If the entire leaf fits
        !          4625: ** within ROOT_MAX, then it will be returned directly, otherwise it
        !          4626: ** will be flushed and the root info will be returned from the
        !          4627: ** interior node.  *piEndBlockid is set to the blockid of the last
        !          4628: ** interior or leaf node written to disk (0 if none are written at
        !          4629: ** all).
        !          4630: */
        !          4631: static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter,
        !          4632:                               char **ppRootInfo, int *pnRootInfo,
        !          4633:                               sqlite_int64 *piEndBlockid){
        !          4634:   /* we can fit the segment entirely inline */
        !          4635:   if( !pWriter->has_parent && pWriter->data.nData<ROOT_MAX ){
        !          4636:     *ppRootInfo = pWriter->data.pData;
        !          4637:     *pnRootInfo = pWriter->data.nData;
        !          4638:     *piEndBlockid = 0;
        !          4639:     return SQLITE_OK;
        !          4640:   }
        !          4641: 
        !          4642:   /* Flush remaining leaf data. */
        !          4643:   if( pWriter->data.nData>0 ){
        !          4644:     int rc = leafWriterFlush(v, pWriter);
        !          4645:     if( rc!=SQLITE_OK ) return rc;
        !          4646:   }
        !          4647: 
        !          4648:   /* We must have flushed a leaf at some point. */
        !          4649:   assert( pWriter->has_parent );
        !          4650: 
        !          4651:   /* Tenatively set the end leaf blockid as the end blockid.  If the
        !          4652:   ** interior node can be returned inline, this will be the final
        !          4653:   ** blockid, otherwise it will be overwritten by
        !          4654:   ** interiorWriterRootInfo().
        !          4655:   */
        !          4656:   *piEndBlockid = pWriter->iEndBlockid;
        !          4657: 
        !          4658:   return interiorWriterRootInfo(v, &pWriter->parentWriter,
        !          4659:                                 ppRootInfo, pnRootInfo, piEndBlockid);
        !          4660: }
        !          4661: 
        !          4662: /* Collect the rootInfo data and store it into the segment directory.
        !          4663: ** This has the effect of flushing the segment's leaf data to
        !          4664: ** %_segments, and also flushing any interior nodes to %_segments.
        !          4665: */
        !          4666: static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){
        !          4667:   sqlite_int64 iEndBlockid;
        !          4668:   char *pRootInfo;
        !          4669:   int rc, nRootInfo;
        !          4670: 
        !          4671:   rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid);
        !          4672:   if( rc!=SQLITE_OK ) return rc;
        !          4673: 
        !          4674:   /* Don't bother storing an entirely empty segment. */
        !          4675:   if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK;
        !          4676: 
        !          4677:   return segdir_set(v, pWriter->iLevel, pWriter->idx,
        !          4678:                     pWriter->iStartBlockid, pWriter->iEndBlockid,
        !          4679:                     iEndBlockid, pRootInfo, nRootInfo);
        !          4680: }
        !          4681: 
        !          4682: static void leafWriterDestroy(LeafWriter *pWriter){
        !          4683:   if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter);
        !          4684:   dataBufferDestroy(&pWriter->term);
        !          4685:   dataBufferDestroy(&pWriter->data);
        !          4686: }
        !          4687: 
        !          4688: /* Encode a term into the leafWriter, delta-encoding as appropriate.
        !          4689: ** Returns the length of the new term which distinguishes it from the
        !          4690: ** previous term, which can be used to set nTermDistinct when a node
        !          4691: ** boundary is crossed.
        !          4692: */
        !          4693: static int leafWriterEncodeTerm(LeafWriter *pWriter,
        !          4694:                                 const char *pTerm, int nTerm){
        !          4695:   char c[VARINT_MAX+VARINT_MAX];
        !          4696:   int n, nPrefix = 0;
        !          4697: 
        !          4698:   assert( nTerm>0 );
        !          4699:   while( nPrefix<pWriter->term.nData &&
        !          4700:          pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
        !          4701:     nPrefix++;
        !          4702:     /* Failing this implies that the terms weren't in order. */
        !          4703:     assert( nPrefix<nTerm );
        !          4704:   }
        !          4705: 
        !          4706:   if( pWriter->data.nData==0 ){
        !          4707:     /* Encode the node header and leading term as:
        !          4708:     **  varint(0)
        !          4709:     **  varint(nTerm)
        !          4710:     **  char pTerm[nTerm]
        !          4711:     */
        !          4712:     n = putVarint(c, '\0');
        !          4713:     n += putVarint(c+n, nTerm);
        !          4714:     dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm);
        !          4715:   }else{
        !          4716:     /* Delta-encode the term as:
        !          4717:     **  varint(nPrefix)
        !          4718:     **  varint(nSuffix)
        !          4719:     **  char pTermSuffix[nSuffix]
        !          4720:     */
        !          4721:     n = putVarint(c, nPrefix);
        !          4722:     n += putVarint(c+n, nTerm-nPrefix);
        !          4723:     dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
        !          4724:   }
        !          4725:   dataBufferReplace(&pWriter->term, pTerm, nTerm);
        !          4726: 
        !          4727:   return nPrefix+1;
        !          4728: }
        !          4729: 
        !          4730: /* Used to avoid a memmove when a large amount of doclist data is in
        !          4731: ** the buffer.  This constructs a node and term header before
        !          4732: ** iDoclistData and flushes the resulting complete node using
        !          4733: ** leafWriterInternalFlush().
        !          4734: */
        !          4735: static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter,
        !          4736:                                  const char *pTerm, int nTerm,
        !          4737:                                  int iDoclistData){
        !          4738:   char c[VARINT_MAX+VARINT_MAX];
        !          4739:   int iData, n = putVarint(c, 0);
        !          4740:   n += putVarint(c+n, nTerm);
        !          4741: 
        !          4742:   /* There should always be room for the header.  Even if pTerm shared
        !          4743:   ** a substantial prefix with the previous term, the entire prefix
        !          4744:   ** could be constructed from earlier data in the doclist, so there
        !          4745:   ** should be room.
        !          4746:   */
        !          4747:   assert( iDoclistData>=n+nTerm );
        !          4748: 
        !          4749:   iData = iDoclistData-(n+nTerm);
        !          4750:   memcpy(pWriter->data.pData+iData, c, n);
        !          4751:   memcpy(pWriter->data.pData+iData+n, pTerm, nTerm);
        !          4752: 
        !          4753:   return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData);
        !          4754: }
        !          4755: 
        !          4756: /* Push pTerm[nTerm] along with the doclist data to the leaf layer of
        !          4757: ** %_segments.
        !          4758: */
        !          4759: static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
        !          4760:                                const char *pTerm, int nTerm,
        !          4761:                                DLReader *pReaders, int nReaders){
        !          4762:   char c[VARINT_MAX+VARINT_MAX];
        !          4763:   int iTermData = pWriter->data.nData, iDoclistData;
        !          4764:   int i, nData, n, nActualData, nActual, rc, nTermDistinct;
        !          4765: 
        !          4766:   ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
        !          4767:   nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm);
        !          4768: 
        !          4769:   /* Remember nTermDistinct if opening a new node. */
        !          4770:   if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct;
        !          4771: 
        !          4772:   iDoclistData = pWriter->data.nData;
        !          4773: 
        !          4774:   /* Estimate the length of the merged doclist so we can leave space
        !          4775:   ** to encode it.
        !          4776:   */
        !          4777:   for(i=0, nData=0; i<nReaders; i++){
        !          4778:     nData += dlrAllDataBytes(&pReaders[i]);
        !          4779:   }
        !          4780:   n = putVarint(c, nData);
        !          4781:   dataBufferAppend(&pWriter->data, c, n);
        !          4782: 
        !          4783:   docListMerge(&pWriter->data, pReaders, nReaders);
        !          4784:   ASSERT_VALID_DOCLIST(DL_DEFAULT,
        !          4785:                        pWriter->data.pData+iDoclistData+n,
        !          4786:                        pWriter->data.nData-iDoclistData-n, NULL);
        !          4787: 
        !          4788:   /* The actual amount of doclist data at this point could be smaller
        !          4789:   ** than the length we encoded.  Additionally, the space required to
        !          4790:   ** encode this length could be smaller.  For small doclists, this is
        !          4791:   ** not a big deal, we can just use memmove() to adjust things.
        !          4792:   */
        !          4793:   nActualData = pWriter->data.nData-(iDoclistData+n);
        !          4794:   nActual = putVarint(c, nActualData);
        !          4795:   assert( nActualData<=nData );
        !          4796:   assert( nActual<=n );
        !          4797: 
        !          4798:   /* If the new doclist is big enough for force a standalone leaf
        !          4799:   ** node, we can immediately flush it inline without doing the
        !          4800:   ** memmove().
        !          4801:   */
        !          4802:   /* TODO(shess) This test matches leafWriterStep(), which does this
        !          4803:   ** test before it knows the cost to varint-encode the term and
        !          4804:   ** doclist lengths.  At some point, change to
        !          4805:   ** pWriter->data.nData-iTermData>STANDALONE_MIN.
        !          4806:   */
        !          4807:   if( nTerm+nActualData>STANDALONE_MIN ){
        !          4808:     /* Push leaf node from before this term. */
        !          4809:     if( iTermData>0 ){
        !          4810:       rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
        !          4811:       if( rc!=SQLITE_OK ) return rc;
        !          4812: 
        !          4813:       pWriter->nTermDistinct = nTermDistinct;
        !          4814:     }
        !          4815: 
        !          4816:     /* Fix the encoded doclist length. */
        !          4817:     iDoclistData += n - nActual;
        !          4818:     memcpy(pWriter->data.pData+iDoclistData, c, nActual);
        !          4819: 
        !          4820:     /* Push the standalone leaf node. */
        !          4821:     rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData);
        !          4822:     if( rc!=SQLITE_OK ) return rc;
        !          4823: 
        !          4824:     /* Leave the node empty. */
        !          4825:     dataBufferReset(&pWriter->data);
        !          4826: 
        !          4827:     return rc;
        !          4828:   }
        !          4829: 
        !          4830:   /* At this point, we know that the doclist was small, so do the
        !          4831:   ** memmove if indicated.
        !          4832:   */
        !          4833:   if( nActual<n ){
        !          4834:     memmove(pWriter->data.pData+iDoclistData+nActual,
        !          4835:             pWriter->data.pData+iDoclistData+n,
        !          4836:             pWriter->data.nData-(iDoclistData+n));
        !          4837:     pWriter->data.nData -= n-nActual;
        !          4838:   }
        !          4839: 
        !          4840:   /* Replace written length with actual length. */
        !          4841:   memcpy(pWriter->data.pData+iDoclistData, c, nActual);
        !          4842: 
        !          4843:   /* If the node is too large, break things up. */
        !          4844:   /* TODO(shess) This test matches leafWriterStep(), which does this
        !          4845:   ** test before it knows the cost to varint-encode the term and
        !          4846:   ** doclist lengths.  At some point, change to
        !          4847:   ** pWriter->data.nData>LEAF_MAX.
        !          4848:   */
        !          4849:   if( iTermData+nTerm+nActualData>LEAF_MAX ){
        !          4850:     /* Flush out the leading data as a node */
        !          4851:     rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
        !          4852:     if( rc!=SQLITE_OK ) return rc;
        !          4853: 
        !          4854:     pWriter->nTermDistinct = nTermDistinct;
        !          4855: 
        !          4856:     /* Rebuild header using the current term */
        !          4857:     n = putVarint(pWriter->data.pData, 0);
        !          4858:     n += putVarint(pWriter->data.pData+n, nTerm);
        !          4859:     memcpy(pWriter->data.pData+n, pTerm, nTerm);
        !          4860:     n += nTerm;
        !          4861: 
        !          4862:     /* There should always be room, because the previous encoding
        !          4863:     ** included all data necessary to construct the term.
        !          4864:     */
        !          4865:     assert( n<iDoclistData );
        !          4866:     /* So long as STANDALONE_MIN is half or less of LEAF_MAX, the
        !          4867:     ** following memcpy() is safe (as opposed to needing a memmove).
        !          4868:     */
        !          4869:     assert( 2*STANDALONE_MIN<=LEAF_MAX );
        !          4870:     assert( n+pWriter->data.nData-iDoclistData<iDoclistData );
        !          4871:     memcpy(pWriter->data.pData+n,
        !          4872:            pWriter->data.pData+iDoclistData,
        !          4873:            pWriter->data.nData-iDoclistData);
        !          4874:     pWriter->data.nData -= iDoclistData-n;
        !          4875:   }
        !          4876:   ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
        !          4877: 
        !          4878:   return SQLITE_OK;
        !          4879: }
        !          4880: 
        !          4881: /* Push pTerm[nTerm] along with the doclist data to the leaf layer of
        !          4882: ** %_segments.
        !          4883: */
        !          4884: /* TODO(shess) Revise writeZeroSegment() so that doclists are
        !          4885: ** constructed directly in pWriter->data.
        !          4886: */
        !          4887: static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
        !          4888:                           const char *pTerm, int nTerm,
        !          4889:                           const char *pData, int nData){
        !          4890:   int rc;
        !          4891:   DLReader reader;
        !          4892: 
        !          4893:   dlrInit(&reader, DL_DEFAULT, pData, nData);
        !          4894:   rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1);
        !          4895:   dlrDestroy(&reader);
        !          4896: 
        !          4897:   return rc;
        !          4898: }
        !          4899: 
        !          4900: 
        !          4901: /****************************************************************/
        !          4902: /* LeafReader is used to iterate over an individual leaf node. */
        !          4903: typedef struct LeafReader {
        !          4904:   DataBuffer term;          /* copy of current term. */
        !          4905: 
        !          4906:   const char *pData;        /* data for current term. */
        !          4907:   int nData;
        !          4908: } LeafReader;
        !          4909: 
        !          4910: static void leafReaderDestroy(LeafReader *pReader){
        !          4911:   dataBufferDestroy(&pReader->term);
        !          4912:   SCRAMBLE(pReader);
        !          4913: }
        !          4914: 
        !          4915: static int leafReaderAtEnd(LeafReader *pReader){
        !          4916:   return pReader->nData<=0;
        !          4917: }
        !          4918: 
        !          4919: /* Access the current term. */
        !          4920: static int leafReaderTermBytes(LeafReader *pReader){
        !          4921:   return pReader->term.nData;
        !          4922: }
        !          4923: static const char *leafReaderTerm(LeafReader *pReader){
        !          4924:   assert( pReader->term.nData>0 );
        !          4925:   return pReader->term.pData;
        !          4926: }
        !          4927: 
        !          4928: /* Access the doclist data for the current term. */
        !          4929: static int leafReaderDataBytes(LeafReader *pReader){
        !          4930:   int nData;
        !          4931:   assert( pReader->term.nData>0 );
        !          4932:   getVarint32(pReader->pData, &nData);
        !          4933:   return nData;
        !          4934: }
        !          4935: static const char *leafReaderData(LeafReader *pReader){
        !          4936:   int n, nData;
        !          4937:   assert( pReader->term.nData>0 );
        !          4938:   n = getVarint32(pReader->pData, &nData);
        !          4939:   return pReader->pData+n;
        !          4940: }
        !          4941: 
        !          4942: static void leafReaderInit(const char *pData, int nData,
        !          4943:                            LeafReader *pReader){
        !          4944:   int nTerm, n;
        !          4945: 
        !          4946:   assert( nData>0 );
        !          4947:   assert( pData[0]=='\0' );
        !          4948: 
        !          4949:   CLEAR(pReader);
        !          4950: 
        !          4951:   /* Read the first term, skipping the header byte. */
        !          4952:   n = getVarint32(pData+1, &nTerm);
        !          4953:   dataBufferInit(&pReader->term, nTerm);
        !          4954:   dataBufferReplace(&pReader->term, pData+1+n, nTerm);
        !          4955: 
        !          4956:   /* Position after the first term. */
        !          4957:   assert( 1+n+nTerm<nData );
        !          4958:   pReader->pData = pData+1+n+nTerm;
        !          4959:   pReader->nData = nData-1-n-nTerm;
        !          4960: }
        !          4961: 
        !          4962: /* Step the reader forward to the next term. */
        !          4963: static void leafReaderStep(LeafReader *pReader){
        !          4964:   int n, nData, nPrefix, nSuffix;
        !          4965:   assert( !leafReaderAtEnd(pReader) );
        !          4966: 
        !          4967:   /* Skip previous entry's data block. */
        !          4968:   n = getVarint32(pReader->pData, &nData);
        !          4969:   assert( n+nData<=pReader->nData );
        !          4970:   pReader->pData += n+nData;
        !          4971:   pReader->nData -= n+nData;
        !          4972: 
        !          4973:   if( !leafReaderAtEnd(pReader) ){
        !          4974:     /* Construct the new term using a prefix from the old term plus a
        !          4975:     ** suffix from the leaf data.
        !          4976:     */
        !          4977:     n = getVarint32(pReader->pData, &nPrefix);
        !          4978:     n += getVarint32(pReader->pData+n, &nSuffix);
        !          4979:     assert( n+nSuffix<pReader->nData );
        !          4980:     pReader->term.nData = nPrefix;
        !          4981:     dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
        !          4982: 
        !          4983:     pReader->pData += n+nSuffix;
        !          4984:     pReader->nData -= n+nSuffix;
        !          4985:   }
        !          4986: }
        !          4987: 
        !          4988: /* strcmp-style comparison of pReader's current term against pTerm.
        !          4989: ** If isPrefix, equality means equal through nTerm bytes.
        !          4990: */
        !          4991: static int leafReaderTermCmp(LeafReader *pReader,
        !          4992:                              const char *pTerm, int nTerm, int isPrefix){
        !          4993:   int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
        !          4994:   if( n==0 ){
        !          4995:     if( pReader->term.nData>0 ) return -1;
        !          4996:     if(nTerm>0 ) return 1;
        !          4997:     return 0;
        !          4998:   }
        !          4999: 
        !          5000:   c = memcmp(pReader->term.pData, pTerm, n);
        !          5001:   if( c!=0 ) return c;
        !          5002:   if( isPrefix && n==nTerm ) return 0;
        !          5003:   return pReader->term.nData - nTerm;
        !          5004: }
        !          5005: 
        !          5006: 
        !          5007: /****************************************************************/
        !          5008: /* LeavesReader wraps LeafReader to allow iterating over the entire
        !          5009: ** leaf layer of the tree.
        !          5010: */
        !          5011: typedef struct LeavesReader {
        !          5012:   int idx;                  /* Index within the segment. */
        !          5013: 
        !          5014:   sqlite3_stmt *pStmt;      /* Statement we're streaming leaves from. */
        !          5015:   int eof;                  /* we've seen SQLITE_DONE from pStmt. */
        !          5016: 
        !          5017:   LeafReader leafReader;    /* reader for the current leaf. */
        !          5018:   DataBuffer rootData;      /* root data for inline. */
        !          5019: } LeavesReader;
        !          5020: 
        !          5021: /* Access the current term. */
        !          5022: static int leavesReaderTermBytes(LeavesReader *pReader){
        !          5023:   assert( !pReader->eof );
        !          5024:   return leafReaderTermBytes(&pReader->leafReader);
        !          5025: }
        !          5026: static const char *leavesReaderTerm(LeavesReader *pReader){
        !          5027:   assert( !pReader->eof );
        !          5028:   return leafReaderTerm(&pReader->leafReader);
        !          5029: }
        !          5030: 
        !          5031: /* Access the doclist data for the current term. */
        !          5032: static int leavesReaderDataBytes(LeavesReader *pReader){
        !          5033:   assert( !pReader->eof );
        !          5034:   return leafReaderDataBytes(&pReader->leafReader);
        !          5035: }
        !          5036: static const char *leavesReaderData(LeavesReader *pReader){
        !          5037:   assert( !pReader->eof );
        !          5038:   return leafReaderData(&pReader->leafReader);
        !          5039: }
        !          5040: 
        !          5041: static int leavesReaderAtEnd(LeavesReader *pReader){
        !          5042:   return pReader->eof;
        !          5043: }
        !          5044: 
        !          5045: /* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus
        !          5046: ** leaving the statement handle open, which locks the table.
        !          5047: */
        !          5048: /* TODO(shess) This "solution" is not satisfactory.  Really, there
        !          5049: ** should be check-in function for all statement handles which
        !          5050: ** arranges to call sqlite3_reset().  This most likely will require
        !          5051: ** modification to control flow all over the place, though, so for now
        !          5052: ** just punt.
        !          5053: **
        !          5054: ** Note the the current system assumes that segment merges will run to
        !          5055: ** completion, which is why this particular probably hasn't arisen in
        !          5056: ** this case.  Probably a brittle assumption.
        !          5057: */
        !          5058: static int leavesReaderReset(LeavesReader *pReader){
        !          5059:   return sqlite3_reset(pReader->pStmt);
        !          5060: }
        !          5061: 
        !          5062: static void leavesReaderDestroy(LeavesReader *pReader){
        !          5063:   /* If idx is -1, that means we're using a non-cached statement
        !          5064:   ** handle in the optimize() case, so we need to release it.
        !          5065:   */
        !          5066:   if( pReader->pStmt!=NULL && pReader->idx==-1 ){
        !          5067:     sqlite3_finalize(pReader->pStmt);
        !          5068:   }
        !          5069:   leafReaderDestroy(&pReader->leafReader);
        !          5070:   dataBufferDestroy(&pReader->rootData);
        !          5071:   SCRAMBLE(pReader);
        !          5072: }
        !          5073: 
        !          5074: /* Initialize pReader with the given root data (if iStartBlockid==0
        !          5075: ** the leaf data was entirely contained in the root), or from the
        !          5076: ** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
        !          5077: */
        !          5078: static int leavesReaderInit(fulltext_vtab *v,
        !          5079:                             int idx,
        !          5080:                             sqlite_int64 iStartBlockid,
        !          5081:                             sqlite_int64 iEndBlockid,
        !          5082:                             const char *pRootData, int nRootData,
        !          5083:                             LeavesReader *pReader){
        !          5084:   CLEAR(pReader);
        !          5085:   pReader->idx = idx;
        !          5086: 
        !          5087:   dataBufferInit(&pReader->rootData, 0);
        !          5088:   if( iStartBlockid==0 ){
        !          5089:     /* Entire leaf level fit in root data. */
        !          5090:     dataBufferReplace(&pReader->rootData, pRootData, nRootData);
        !          5091:     leafReaderInit(pReader->rootData.pData, pReader->rootData.nData,
        !          5092:                    &pReader->leafReader);
        !          5093:   }else{
        !          5094:     sqlite3_stmt *s;
        !          5095:     int rc = sql_get_leaf_statement(v, idx, &s);
        !          5096:     if( rc!=SQLITE_OK ) return rc;
        !          5097: 
        !          5098:     rc = sqlite3_bind_int64(s, 1, iStartBlockid);
        !          5099:     if( rc!=SQLITE_OK ) return rc;
        !          5100: 
        !          5101:     rc = sqlite3_bind_int64(s, 2, iEndBlockid);
        !          5102:     if( rc!=SQLITE_OK ) return rc;
        !          5103: 
        !          5104:     rc = sqlite3_step(s);
        !          5105:     if( rc==SQLITE_DONE ){
        !          5106:       pReader->eof = 1;
        !          5107:       return SQLITE_OK;
        !          5108:     }
        !          5109:     if( rc!=SQLITE_ROW ) return rc;
        !          5110: 
        !          5111:     pReader->pStmt = s;
        !          5112:     leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
        !          5113:                    sqlite3_column_bytes(pReader->pStmt, 0),
        !          5114:                    &pReader->leafReader);
        !          5115:   }
        !          5116:   return SQLITE_OK;
        !          5117: }
        !          5118: 
        !          5119: /* Step the current leaf forward to the next term.  If we reach the
        !          5120: ** end of the current leaf, step forward to the next leaf block.
        !          5121: */
        !          5122: static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){
        !          5123:   assert( !leavesReaderAtEnd(pReader) );
        !          5124:   leafReaderStep(&pReader->leafReader);
        !          5125: 
        !          5126:   if( leafReaderAtEnd(&pReader->leafReader) ){
        !          5127:     int rc;
        !          5128:     if( pReader->rootData.pData ){
        !          5129:       pReader->eof = 1;
        !          5130:       return SQLITE_OK;
        !          5131:     }
        !          5132:     rc = sqlite3_step(pReader->pStmt);
        !          5133:     if( rc!=SQLITE_ROW ){
        !          5134:       pReader->eof = 1;
        !          5135:       return rc==SQLITE_DONE ? SQLITE_OK : rc;
        !          5136:     }
        !          5137:     leafReaderDestroy(&pReader->leafReader);
        !          5138:     leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
        !          5139:                    sqlite3_column_bytes(pReader->pStmt, 0),
        !          5140:                    &pReader->leafReader);
        !          5141:   }
        !          5142:   return SQLITE_OK;
        !          5143: }
        !          5144: 
        !          5145: /* Order LeavesReaders by their term, ignoring idx.  Readers at eof
        !          5146: ** always sort to the end.
        !          5147: */
        !          5148: static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){
        !          5149:   if( leavesReaderAtEnd(lr1) ){
        !          5150:     if( leavesReaderAtEnd(lr2) ) return 0;
        !          5151:     return 1;
        !          5152:   }
        !          5153:   if( leavesReaderAtEnd(lr2) ) return -1;
        !          5154: 
        !          5155:   return leafReaderTermCmp(&lr1->leafReader,
        !          5156:                            leavesReaderTerm(lr2), leavesReaderTermBytes(lr2),
        !          5157:                            0);
        !          5158: }
        !          5159: 
        !          5160: /* Similar to leavesReaderTermCmp(), with additional ordering by idx
        !          5161: ** so that older segments sort before newer segments.
        !          5162: */
        !          5163: static int leavesReaderCmp(LeavesReader *lr1, LeavesReader *lr2){
        !          5164:   int c = leavesReaderTermCmp(lr1, lr2);
        !          5165:   if( c!=0 ) return c;
        !          5166:   return lr1->idx-lr2->idx;
        !          5167: }
        !          5168: 
        !          5169: /* Assume that pLr[1]..pLr[nLr] are sorted.  Bubble pLr[0] into its
        !          5170: ** sorted position.
        !          5171: */
        !          5172: static void leavesReaderReorder(LeavesReader *pLr, int nLr){
        !          5173:   while( nLr>1 && leavesReaderCmp(pLr, pLr+1)>0 ){
        !          5174:     LeavesReader tmp = pLr[0];
        !          5175:     pLr[0] = pLr[1];
        !          5176:     pLr[1] = tmp;
        !          5177:     nLr--;
        !          5178:     pLr++;
        !          5179:   }
        !          5180: }
        !          5181: 
        !          5182: /* Initializes pReaders with the segments from level iLevel, returning
        !          5183: ** the number of segments in *piReaders.  Leaves pReaders in sorted
        !          5184: ** order.
        !          5185: */
        !          5186: static int leavesReadersInit(fulltext_vtab *v, int iLevel,
        !          5187:                              LeavesReader *pReaders, int *piReaders){
        !          5188:   sqlite3_stmt *s;
        !          5189:   int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s);
        !          5190:   if( rc!=SQLITE_OK ) return rc;
        !          5191: 
        !          5192:   rc = sqlite3_bind_int(s, 1, iLevel);
        !          5193:   if( rc!=SQLITE_OK ) return rc;
        !          5194: 
        !          5195:   i = 0;
        !          5196:   while( (rc = sqlite3_step(s))==SQLITE_ROW ){
        !          5197:     sqlite_int64 iStart = sqlite3_column_int64(s, 0);
        !          5198:     sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
        !          5199:     const char *pRootData = sqlite3_column_blob(s, 2);
        !          5200:     int nRootData = sqlite3_column_bytes(s, 2);
        !          5201: 
        !          5202:     assert( i<MERGE_COUNT );
        !          5203:     rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData,
        !          5204:                           &pReaders[i]);
        !          5205:     if( rc!=SQLITE_OK ) break;
        !          5206: 
        !          5207:     i++;
        !          5208:   }
        !          5209:   if( rc!=SQLITE_DONE ){
        !          5210:     while( i-->0 ){
        !          5211:       leavesReaderDestroy(&pReaders[i]);
        !          5212:     }
        !          5213:     return rc;
        !          5214:   }
        !          5215: 
        !          5216:   *piReaders = i;
        !          5217: 
        !          5218:   /* Leave our results sorted by term, then age. */
        !          5219:   while( i-- ){
        !          5220:     leavesReaderReorder(pReaders+i, *piReaders-i);
        !          5221:   }
        !          5222:   return SQLITE_OK;
        !          5223: }
        !          5224: 
        !          5225: /* Merge doclists from pReaders[nReaders] into a single doclist, which
        !          5226: ** is written to pWriter.  Assumes pReaders is ordered oldest to
        !          5227: ** newest.
        !          5228: */
        !          5229: /* TODO(shess) Consider putting this inline in segmentMerge(). */
        !          5230: static int leavesReadersMerge(fulltext_vtab *v,
        !          5231:                               LeavesReader *pReaders, int nReaders,
        !          5232:                               LeafWriter *pWriter){
        !          5233:   DLReader dlReaders[MERGE_COUNT];
        !          5234:   const char *pTerm = leavesReaderTerm(pReaders);
        !          5235:   int i, nTerm = leavesReaderTermBytes(pReaders);
        !          5236: 
        !          5237:   assert( nReaders<=MERGE_COUNT );
        !          5238: 
        !          5239:   for(i=0; i<nReaders; i++){
        !          5240:     dlrInit(&dlReaders[i], DL_DEFAULT,
        !          5241:             leavesReaderData(pReaders+i),
        !          5242:             leavesReaderDataBytes(pReaders+i));
        !          5243:   }
        !          5244: 
        !          5245:   return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders);
        !          5246: }
        !          5247: 
        !          5248: /* Forward ref due to mutual recursion with segdirNextIndex(). */
        !          5249: static int segmentMerge(fulltext_vtab *v, int iLevel);
        !          5250: 
        !          5251: /* Put the next available index at iLevel into *pidx.  If iLevel
        !          5252: ** already has MERGE_COUNT segments, they are merged to a higher
        !          5253: ** level to make room.
        !          5254: */
        !          5255: static int segdirNextIndex(fulltext_vtab *v, int iLevel, int *pidx){
        !          5256:   int rc = segdir_max_index(v, iLevel, pidx);
        !          5257:   if( rc==SQLITE_DONE ){              /* No segments at iLevel. */
        !          5258:     *pidx = 0;
        !          5259:   }else if( rc==SQLITE_ROW ){
        !          5260:     if( *pidx==(MERGE_COUNT-1) ){
        !          5261:       rc = segmentMerge(v, iLevel);
        !          5262:       if( rc!=SQLITE_OK ) return rc;
        !          5263:       *pidx = 0;
        !          5264:     }else{
        !          5265:       (*pidx)++;
        !          5266:     }
        !          5267:   }else{
        !          5268:     return rc;
        !          5269:   }
        !          5270:   return SQLITE_OK;
        !          5271: }
        !          5272: 
        !          5273: /* Merge MERGE_COUNT segments at iLevel into a new segment at
        !          5274: ** iLevel+1.  If iLevel+1 is already full of segments, those will be
        !          5275: ** merged to make room.
        !          5276: */
        !          5277: static int segmentMerge(fulltext_vtab *v, int iLevel){
        !          5278:   LeafWriter writer;
        !          5279:   LeavesReader lrs[MERGE_COUNT];
        !          5280:   int i, rc, idx = 0;
        !          5281: 
        !          5282:   /* Determine the next available segment index at the next level,
        !          5283:   ** merging as necessary.
        !          5284:   */
        !          5285:   rc = segdirNextIndex(v, iLevel+1, &idx);
        !          5286:   if( rc!=SQLITE_OK ) return rc;
        !          5287: 
        !          5288:   /* TODO(shess) This assumes that we'll always see exactly
        !          5289:   ** MERGE_COUNT segments to merge at a given level.  That will be
        !          5290:   ** broken if we allow the developer to request preemptive or
        !          5291:   ** deferred merging.
        !          5292:   */
        !          5293:   memset(&lrs, '\0', sizeof(lrs));
        !          5294:   rc = leavesReadersInit(v, iLevel, lrs, &i);
        !          5295:   if( rc!=SQLITE_OK ) return rc;
        !          5296:   assert( i==MERGE_COUNT );
        !          5297: 
        !          5298:   leafWriterInit(iLevel+1, idx, &writer);
        !          5299: 
        !          5300:   /* Since leavesReaderReorder() pushes readers at eof to the end,
        !          5301:   ** when the first reader is empty, all will be empty.
        !          5302:   */
        !          5303:   while( !leavesReaderAtEnd(lrs) ){
        !          5304:     /* Figure out how many readers share their next term. */
        !          5305:     for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){
        !          5306:       if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break;
        !          5307:     }
        !          5308: 
        !          5309:     rc = leavesReadersMerge(v, lrs, i, &writer);
        !          5310:     if( rc!=SQLITE_OK ) goto err;
        !          5311: 
        !          5312:     /* Step forward those that were merged. */
        !          5313:     while( i-->0 ){
        !          5314:       rc = leavesReaderStep(v, lrs+i);
        !          5315:       if( rc!=SQLITE_OK ) goto err;
        !          5316: 
        !          5317:       /* Reorder by term, then by age. */
        !          5318:       leavesReaderReorder(lrs+i, MERGE_COUNT-i);
        !          5319:     }
        !          5320:   }
        !          5321: 
        !          5322:   for(i=0; i<MERGE_COUNT; i++){
        !          5323:     leavesReaderDestroy(&lrs[i]);
        !          5324:   }
        !          5325: 
        !          5326:   rc = leafWriterFinalize(v, &writer);
        !          5327:   leafWriterDestroy(&writer);
        !          5328:   if( rc!=SQLITE_OK ) return rc;
        !          5329: 
        !          5330:   /* Delete the merged segment data. */
        !          5331:   return segdir_delete(v, iLevel);
        !          5332: 
        !          5333:  err:
        !          5334:   for(i=0; i<MERGE_COUNT; i++){
        !          5335:     leavesReaderDestroy(&lrs[i]);
        !          5336:   }
        !          5337:   leafWriterDestroy(&writer);
        !          5338:   return rc;
        !          5339: }
        !          5340: 
        !          5341: /* Accumulate the union of *acc and *pData into *acc. */
        !          5342: static void docListAccumulateUnion(DataBuffer *acc,
        !          5343:                                    const char *pData, int nData) {
        !          5344:   DataBuffer tmp = *acc;
        !          5345:   dataBufferInit(acc, tmp.nData+nData);
        !          5346:   docListUnion(tmp.pData, tmp.nData, pData, nData, acc);
        !          5347:   dataBufferDestroy(&tmp);
        !          5348: }
        !          5349: 
        !          5350: /* TODO(shess) It might be interesting to explore different merge
        !          5351: ** strategies, here.  For instance, since this is a sorted merge, we
        !          5352: ** could easily merge many doclists in parallel.  With some
        !          5353: ** comprehension of the storage format, we could merge all of the
        !          5354: ** doclists within a leaf node directly from the leaf node's storage.
        !          5355: ** It may be worthwhile to merge smaller doclists before larger
        !          5356: ** doclists, since they can be traversed more quickly - but the
        !          5357: ** results may have less overlap, making them more expensive in a
        !          5358: ** different way.
        !          5359: */
        !          5360: 
        !          5361: /* Scan pReader for pTerm/nTerm, and merge the term's doclist over
        !          5362: ** *out (any doclists with duplicate docids overwrite those in *out).
        !          5363: ** Internal function for loadSegmentLeaf().
        !          5364: */
        !          5365: static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader,
        !          5366:                                 const char *pTerm, int nTerm, int isPrefix,
        !          5367:                                 DataBuffer *out){
        !          5368:   /* doclist data is accumulated into pBuffers similar to how one does
        !          5369:   ** increment in binary arithmetic.  If index 0 is empty, the data is
        !          5370:   ** stored there.  If there is data there, it is merged and the
        !          5371:   ** results carried into position 1, with further merge-and-carry
        !          5372:   ** until an empty position is found.
        !          5373:   */
        !          5374:   DataBuffer *pBuffers = NULL;
        !          5375:   int nBuffers = 0, nMaxBuffers = 0, rc;
        !          5376: 
        !          5377:   assert( nTerm>0 );
        !          5378: 
        !          5379:   for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader);
        !          5380:       rc=leavesReaderStep(v, pReader)){
        !          5381:     /* TODO(shess) Really want leavesReaderTermCmp(), but that name is
        !          5382:     ** already taken to compare the terms of two LeavesReaders.  Think
        !          5383:     ** on a better name.  [Meanwhile, break encapsulation rather than
        !          5384:     ** use a confusing name.]
        !          5385:     */
        !          5386:     int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix);
        !          5387:     if( c>0 ) break;      /* Past any possible matches. */
        !          5388:     if( c==0 ){
        !          5389:       const char *pData = leavesReaderData(pReader);
        !          5390:       int iBuffer, nData = leavesReaderDataBytes(pReader);
        !          5391: 
        !          5392:       /* Find the first empty buffer. */
        !          5393:       for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
        !          5394:         if( 0==pBuffers[iBuffer].nData ) break;
        !          5395:       }
        !          5396: 
        !          5397:       /* Out of buffers, add an empty one. */
        !          5398:       if( iBuffer==nBuffers ){
        !          5399:         if( nBuffers==nMaxBuffers ){
        !          5400:           DataBuffer *p;
        !          5401:           nMaxBuffers += 20;
        !          5402: 
        !          5403:           /* Manual realloc so we can handle NULL appropriately. */
        !          5404:           p = sqlite3_malloc(nMaxBuffers*sizeof(*pBuffers));
        !          5405:           if( p==NULL ){
        !          5406:             rc = SQLITE_NOMEM;
        !          5407:             break;
        !          5408:           }
        !          5409: 
        !          5410:           if( nBuffers>0 ){
        !          5411:             assert(pBuffers!=NULL);
        !          5412:             memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers));
        !          5413:             sqlite3_free(pBuffers);
        !          5414:           }
        !          5415:           pBuffers = p;
        !          5416:         }
        !          5417:         dataBufferInit(&(pBuffers[nBuffers]), 0);
        !          5418:         nBuffers++;
        !          5419:       }
        !          5420: 
        !          5421:       /* At this point, must have an empty at iBuffer. */
        !          5422:       assert(iBuffer<nBuffers && pBuffers[iBuffer].nData==0);
        !          5423: 
        !          5424:       /* If empty was first buffer, no need for merge logic. */
        !          5425:       if( iBuffer==0 ){
        !          5426:         dataBufferReplace(&(pBuffers[0]), pData, nData);
        !          5427:       }else{
        !          5428:         /* pAcc is the empty buffer the merged data will end up in. */
        !          5429:         DataBuffer *pAcc = &(pBuffers[iBuffer]);
        !          5430:         DataBuffer *p = &(pBuffers[0]);
        !          5431: 
        !          5432:         /* Handle position 0 specially to avoid need to prime pAcc
        !          5433:         ** with pData/nData.
        !          5434:         */
        !          5435:         dataBufferSwap(p, pAcc);
        !          5436:         docListAccumulateUnion(pAcc, pData, nData);
        !          5437: 
        !          5438:         /* Accumulate remaining doclists into pAcc. */
        !          5439:         for(++p; p<pAcc; ++p){
        !          5440:           docListAccumulateUnion(pAcc, p->pData, p->nData);
        !          5441: 
        !          5442:           /* dataBufferReset() could allow a large doclist to blow up
        !          5443:           ** our memory requirements.
        !          5444:           */
        !          5445:           if( p->nCapacity<1024 ){
        !          5446:             dataBufferReset(p);
        !          5447:           }else{
        !          5448:             dataBufferDestroy(p);
        !          5449:             dataBufferInit(p, 0);
        !          5450:           }
        !          5451:         }
        !          5452:       }
        !          5453:     }
        !          5454:   }
        !          5455: 
        !          5456:   /* Union all the doclists together into *out. */
        !          5457:   /* TODO(shess) What if *out is big?  Sigh. */
        !          5458:   if( rc==SQLITE_OK && nBuffers>0 ){
        !          5459:     int iBuffer;
        !          5460:     for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
        !          5461:       if( pBuffers[iBuffer].nData>0 ){
        !          5462:         if( out->nData==0 ){
        !          5463:           dataBufferSwap(out, &(pBuffers[iBuffer]));
        !          5464:         }else{
        !          5465:           docListAccumulateUnion(out, pBuffers[iBuffer].pData,
        !          5466:                                  pBuffers[iBuffer].nData);
        !          5467:         }
        !          5468:       }
        !          5469:     }
        !          5470:   }
        !          5471: 
        !          5472:   while( nBuffers-- ){
        !          5473:     dataBufferDestroy(&(pBuffers[nBuffers]));
        !          5474:   }
        !          5475:   if( pBuffers!=NULL ) sqlite3_free(pBuffers);
        !          5476: 
        !          5477:   return rc;
        !          5478: }
        !          5479: 
        !          5480: /* Call loadSegmentLeavesInt() with pData/nData as input. */
        !          5481: static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
        !          5482:                            const char *pTerm, int nTerm, int isPrefix,
        !          5483:                            DataBuffer *out){
        !          5484:   LeavesReader reader;
        !          5485:   int rc;
        !          5486: 
        !          5487:   assert( nData>1 );
        !          5488:   assert( *pData=='\0' );
        !          5489:   rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader);
        !          5490:   if( rc!=SQLITE_OK ) return rc;
        !          5491: 
        !          5492:   rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
        !          5493:   leavesReaderReset(&reader);
        !          5494:   leavesReaderDestroy(&reader);
        !          5495:   return rc;
        !          5496: }
        !          5497: 
        !          5498: /* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to
        !          5499: ** iEndLeaf (inclusive) as input, and merge the resulting doclist into
        !          5500: ** out.
        !          5501: */
        !          5502: static int loadSegmentLeaves(fulltext_vtab *v,
        !          5503:                              sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf,
        !          5504:                              const char *pTerm, int nTerm, int isPrefix,
        !          5505:                              DataBuffer *out){
        !          5506:   int rc;
        !          5507:   LeavesReader reader;
        !          5508: 
        !          5509:   assert( iStartLeaf<=iEndLeaf );
        !          5510:   rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader);
        !          5511:   if( rc!=SQLITE_OK ) return rc;
        !          5512: 
        !          5513:   rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out);
        !          5514:   leavesReaderReset(&reader);
        !          5515:   leavesReaderDestroy(&reader);
        !          5516:   return rc;
        !          5517: }
        !          5518: 
        !          5519: /* Taking pData/nData as an interior node, find the sequence of child
        !          5520: ** nodes which could include pTerm/nTerm/isPrefix.  Note that the
        !          5521: ** interior node terms logically come between the blocks, so there is
        !          5522: ** one more blockid than there are terms (that block contains terms >=
        !          5523: ** the last interior-node term).
        !          5524: */
        !          5525: /* TODO(shess) The calling code may already know that the end child is
        !          5526: ** not worth calculating, because the end may be in a later sibling
        !          5527: ** node.  Consider whether breaking symmetry is worthwhile.  I suspect
        !          5528: ** it is not worthwhile.
        !          5529: */
        !          5530: static void getChildrenContaining(const char *pData, int nData,
        !          5531:                                   const char *pTerm, int nTerm, int isPrefix,
        !          5532:                                   sqlite_int64 *piStartChild,
        !          5533:                                   sqlite_int64 *piEndChild){
        !          5534:   InteriorReader reader;
        !          5535: 
        !          5536:   assert( nData>1 );
        !          5537:   assert( *pData!='\0' );
        !          5538:   interiorReaderInit(pData, nData, &reader);
        !          5539: 
        !          5540:   /* Scan for the first child which could contain pTerm/nTerm. */
        !          5541:   while( !interiorReaderAtEnd(&reader) ){
        !          5542:     if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break;
        !          5543:     interiorReaderStep(&reader);
        !          5544:   }
        !          5545:   *piStartChild = interiorReaderCurrentBlockid(&reader);
        !          5546: 
        !          5547:   /* Keep scanning to find a term greater than our term, using prefix
        !          5548:   ** comparison if indicated.  If isPrefix is false, this will be the
        !          5549:   ** same blockid as the starting block.
        !          5550:   */
        !          5551:   while( !interiorReaderAtEnd(&reader) ){
        !          5552:     if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break;
        !          5553:     interiorReaderStep(&reader);
        !          5554:   }
        !          5555:   *piEndChild = interiorReaderCurrentBlockid(&reader);
        !          5556: 
        !          5557:   interiorReaderDestroy(&reader);
        !          5558: 
        !          5559:   /* Children must ascend, and if !prefix, both must be the same. */
        !          5560:   assert( *piEndChild>=*piStartChild );
        !          5561:   assert( isPrefix || *piStartChild==*piEndChild );
        !          5562: }
        !          5563: 
        !          5564: /* Read block at iBlockid and pass it with other params to
        !          5565: ** getChildrenContaining().
        !          5566: */
        !          5567: static int loadAndGetChildrenContaining(
        !          5568:   fulltext_vtab *v,
        !          5569:   sqlite_int64 iBlockid,
        !          5570:   const char *pTerm, int nTerm, int isPrefix,
        !          5571:   sqlite_int64 *piStartChild, sqlite_int64 *piEndChild
        !          5572: ){
        !          5573:   sqlite3_stmt *s = NULL;
        !          5574:   int rc;
        !          5575: 
        !          5576:   assert( iBlockid!=0 );
        !          5577:   assert( pTerm!=NULL );
        !          5578:   assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */
        !          5579:   assert( piStartChild!=NULL );
        !          5580:   assert( piEndChild!=NULL );
        !          5581: 
        !          5582:   rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
        !          5583:   if( rc!=SQLITE_OK ) return rc;
        !          5584: 
        !          5585:   rc = sqlite3_bind_int64(s, 1, iBlockid);
        !          5586:   if( rc!=SQLITE_OK ) return rc;
        !          5587: 
        !          5588:   rc = sqlite3_step(s);
        !          5589:   if( rc==SQLITE_DONE ) return SQLITE_ERROR;
        !          5590:   if( rc!=SQLITE_ROW ) return rc;
        !          5591: 
        !          5592:   getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
        !          5593:                         pTerm, nTerm, isPrefix, piStartChild, piEndChild);
        !          5594: 
        !          5595:   /* We expect only one row.  We must execute another sqlite3_step()
        !          5596:    * to complete the iteration; otherwise the table will remain
        !          5597:    * locked. */
        !          5598:   rc = sqlite3_step(s);
        !          5599:   if( rc==SQLITE_ROW ) return SQLITE_ERROR;
        !          5600:   if( rc!=SQLITE_DONE ) return rc;
        !          5601: 
        !          5602:   return SQLITE_OK;
        !          5603: }
        !          5604: 
        !          5605: /* Traverse the tree represented by pData[nData] looking for
        !          5606: ** pTerm[nTerm], placing its doclist into *out.  This is internal to
        !          5607: ** loadSegment() to make error-handling cleaner.
        !          5608: */
        !          5609: static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
        !          5610:                           sqlite_int64 iLeavesEnd,
        !          5611:                           const char *pTerm, int nTerm, int isPrefix,
        !          5612:                           DataBuffer *out){
        !          5613:   /* Special case where root is a leaf. */
        !          5614:   if( *pData=='\0' ){
        !          5615:     return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out);
        !          5616:   }else{
        !          5617:     int rc;
        !          5618:     sqlite_int64 iStartChild, iEndChild;
        !          5619: 
        !          5620:     /* Process pData as an interior node, then loop down the tree
        !          5621:     ** until we find the set of leaf nodes to scan for the term.
        !          5622:     */
        !          5623:     getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix,
        !          5624:                           &iStartChild, &iEndChild);
        !          5625:     while( iStartChild>iLeavesEnd ){
        !          5626:       sqlite_int64 iNextStart, iNextEnd;
        !          5627:       rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix,
        !          5628:                                         &iNextStart, &iNextEnd);
        !          5629:       if( rc!=SQLITE_OK ) return rc;
        !          5630: 
        !          5631:       /* If we've branched, follow the end branch, too. */
        !          5632:       if( iStartChild!=iEndChild ){
        !          5633:         sqlite_int64 iDummy;
        !          5634:         rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix,
        !          5635:                                           &iDummy, &iNextEnd);
        !          5636:         if( rc!=SQLITE_OK ) return rc;
        !          5637:       }
        !          5638: 
        !          5639:       assert( iNextStart<=iNextEnd );
        !          5640:       iStartChild = iNextStart;
        !          5641:       iEndChild = iNextEnd;
        !          5642:     }
        !          5643:     assert( iStartChild<=iLeavesEnd );
        !          5644:     assert( iEndChild<=iLeavesEnd );
        !          5645: 
        !          5646:     /* Scan through the leaf segments for doclists. */
        !          5647:     return loadSegmentLeaves(v, iStartChild, iEndChild,
        !          5648:                              pTerm, nTerm, isPrefix, out);
        !          5649:   }
        !          5650: }
        !          5651: 
        !          5652: /* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then
        !          5653: ** merge its doclist over *out (any duplicate doclists read from the
        !          5654: ** segment rooted at pData will overwrite those in *out).
        !          5655: */
        !          5656: /* TODO(shess) Consider changing this to determine the depth of the
        !          5657: ** leaves using either the first characters of interior nodes (when
        !          5658: ** ==1, we're one level above the leaves), or the first character of
        !          5659: ** the root (which will describe the height of the tree directly).
        !          5660: ** Either feels somewhat tricky to me.
        !          5661: */
        !          5662: /* TODO(shess) The current merge is likely to be slow for large
        !          5663: ** doclists (though it should process from newest/smallest to
        !          5664: ** oldest/largest, so it may not be that bad).  It might be useful to
        !          5665: ** modify things to allow for N-way merging.  This could either be
        !          5666: ** within a segment, with pairwise merges across segments, or across
        !          5667: ** all segments at once.
        !          5668: */
        !          5669: static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
        !          5670:                        sqlite_int64 iLeavesEnd,
        !          5671:                        const char *pTerm, int nTerm, int isPrefix,
        !          5672:                        DataBuffer *out){
        !          5673:   DataBuffer result;
        !          5674:   int rc;
        !          5675: 
        !          5676:   assert( nData>1 );
        !          5677: 
        !          5678:   /* This code should never be called with buffered updates. */
        !          5679:   assert( v->nPendingData<0 );
        !          5680: 
        !          5681:   dataBufferInit(&result, 0);
        !          5682:   rc = loadSegmentInt(v, pData, nData, iLeavesEnd,
        !          5683:                       pTerm, nTerm, isPrefix, &result);
        !          5684:   if( rc==SQLITE_OK && result.nData>0 ){
        !          5685:     if( out->nData==0 ){
        !          5686:       DataBuffer tmp = *out;
        !          5687:       *out = result;
        !          5688:       result = tmp;
        !          5689:     }else{
        !          5690:       DataBuffer merged;
        !          5691:       DLReader readers[2];
        !          5692: 
        !          5693:       dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData);
        !          5694:       dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData);
        !          5695:       dataBufferInit(&merged, out->nData+result.nData);
        !          5696:       docListMerge(&merged, readers, 2);
        !          5697:       dataBufferDestroy(out);
        !          5698:       *out = merged;
        !          5699:       dlrDestroy(&readers[0]);
        !          5700:       dlrDestroy(&readers[1]);
        !          5701:     }
        !          5702:   }
        !          5703:   dataBufferDestroy(&result);
        !          5704:   return rc;
        !          5705: }
        !          5706: 
        !          5707: /* Scan the database and merge together the posting lists for the term
        !          5708: ** into *out.
        !          5709: */
        !          5710: static int termSelect(fulltext_vtab *v, int iColumn,
        !          5711:                       const char *pTerm, int nTerm, int isPrefix,
        !          5712:                       DocListType iType, DataBuffer *out){
        !          5713:   DataBuffer doclist;
        !          5714:   sqlite3_stmt *s;
        !          5715:   int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
        !          5716:   if( rc!=SQLITE_OK ) return rc;
        !          5717: 
        !          5718:   /* This code should never be called with buffered updates. */
        !          5719:   assert( v->nPendingData<0 );
        !          5720: 
        !          5721:   dataBufferInit(&doclist, 0);
        !          5722: 
        !          5723:   /* Traverse the segments from oldest to newest so that newer doclist
        !          5724:   ** elements for given docids overwrite older elements.
        !          5725:   */
        !          5726:   while( (rc = sqlite3_step(s))==SQLITE_ROW ){
        !          5727:     const char *pData = sqlite3_column_blob(s, 2);
        !          5728:     const int nData = sqlite3_column_bytes(s, 2);
        !          5729:     const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
        !          5730:     rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
        !          5731:                      &doclist);
        !          5732:     if( rc!=SQLITE_OK ) goto err;
        !          5733:   }
        !          5734:   if( rc==SQLITE_DONE ){
        !          5735:     if( doclist.nData!=0 ){
        !          5736:       /* TODO(shess) The old term_select_all() code applied the column
        !          5737:       ** restrict as we merged segments, leading to smaller buffers.
        !          5738:       ** This is probably worthwhile to bring back, once the new storage
        !          5739:       ** system is checked in.
        !          5740:       */
        !          5741:       if( iColumn==v->nColumn) iColumn = -1;
        !          5742:       docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
        !          5743:                   iColumn, iType, out);
        !          5744:     }
        !          5745:     rc = SQLITE_OK;
        !          5746:   }
        !          5747: 
        !          5748:  err:
        !          5749:   dataBufferDestroy(&doclist);
        !          5750:   return rc;
        !          5751: }
        !          5752: 
        !          5753: /****************************************************************/
        !          5754: /* Used to hold hashtable data for sorting. */
        !          5755: typedef struct TermData {
        !          5756:   const char *pTerm;
        !          5757:   int nTerm;
        !          5758:   DLCollector *pCollector;
        !          5759: } TermData;
        !          5760: 
        !          5761: /* Orders TermData elements in strcmp fashion ( <0 for less-than, 0
        !          5762: ** for equal, >0 for greater-than).
        !          5763: */
        !          5764: static int termDataCmp(const void *av, const void *bv){
        !          5765:   const TermData *a = (const TermData *)av;
        !          5766:   const TermData *b = (const TermData *)bv;
        !          5767:   int n = a->nTerm<b->nTerm ? a->nTerm : b->nTerm;
        !          5768:   int c = memcmp(a->pTerm, b->pTerm, n);
        !          5769:   if( c!=0 ) return c;
        !          5770:   return a->nTerm-b->nTerm;
        !          5771: }
        !          5772: 
        !          5773: /* Order pTerms data by term, then write a new level 0 segment using
        !          5774: ** LeafWriter.
        !          5775: */
        !          5776: static int writeZeroSegment(fulltext_vtab *v, fts2Hash *pTerms){
        !          5777:   fts2HashElem *e;
        !          5778:   int idx, rc, i, n;
        !          5779:   TermData *pData;
        !          5780:   LeafWriter writer;
        !          5781:   DataBuffer dl;
        !          5782: 
        !          5783:   /* Determine the next index at level 0, merging as necessary. */
        !          5784:   rc = segdirNextIndex(v, 0, &idx);
        !          5785:   if( rc!=SQLITE_OK ) return rc;
        !          5786: 
        !          5787:   n = fts2HashCount(pTerms);
        !          5788:   pData = sqlite3_malloc(n*sizeof(TermData));
        !          5789: 
        !          5790:   for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){
        !          5791:     assert( i<n );
        !          5792:     pData[i].pTerm = fts2HashKey(e);
        !          5793:     pData[i].nTerm = fts2HashKeysize(e);
        !          5794:     pData[i].pCollector = fts2HashData(e);
        !          5795:   }
        !          5796:   assert( i==n );
        !          5797: 
        !          5798:   /* TODO(shess) Should we allow user-defined collation sequences,
        !          5799:   ** here?  I think we only need that once we support prefix searches.
        !          5800:   */
        !          5801:   if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp);
        !          5802: 
        !          5803:   /* TODO(shess) Refactor so that we can write directly to the segment
        !          5804:   ** DataBuffer, as happens for segment merges.
        !          5805:   */
        !          5806:   leafWriterInit(0, idx, &writer);
        !          5807:   dataBufferInit(&dl, 0);
        !          5808:   for(i=0; i<n; i++){
        !          5809:     dataBufferReset(&dl);
        !          5810:     dlcAddDoclist(pData[i].pCollector, &dl);
        !          5811:     rc = leafWriterStep(v, &writer,
        !          5812:                         pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData);
        !          5813:     if( rc!=SQLITE_OK ) goto err;
        !          5814:   }
        !          5815:   rc = leafWriterFinalize(v, &writer);
        !          5816: 
        !          5817:  err:
        !          5818:   dataBufferDestroy(&dl);
        !          5819:   sqlite3_free(pData);
        !          5820:   leafWriterDestroy(&writer);
        !          5821:   return rc;
        !          5822: }
        !          5823: 
        !          5824: /* If pendingTerms has data, free it. */
        !          5825: static int clearPendingTerms(fulltext_vtab *v){
        !          5826:   if( v->nPendingData>=0 ){
        !          5827:     fts2HashElem *e;
        !          5828:     for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){
        !          5829:       dlcDelete(fts2HashData(e));
        !          5830:     }
        !          5831:     fts2HashClear(&v->pendingTerms);
        !          5832:     v->nPendingData = -1;
        !          5833:   }
        !          5834:   return SQLITE_OK;
        !          5835: }
        !          5836: 
        !          5837: /* If pendingTerms has data, flush it to a level-zero segment, and
        !          5838: ** free it.
        !          5839: */
        !          5840: static int flushPendingTerms(fulltext_vtab *v){
        !          5841:   if( v->nPendingData>=0 ){
        !          5842:     int rc = writeZeroSegment(v, &v->pendingTerms);
        !          5843:     if( rc==SQLITE_OK ) clearPendingTerms(v);
        !          5844:     return rc;
        !          5845:   }
        !          5846:   return SQLITE_OK;
        !          5847: }
        !          5848: 
        !          5849: /* If pendingTerms is "too big", or docid is out of order, flush it.
        !          5850: ** Regardless, be certain that pendingTerms is initialized for use.
        !          5851: */
        !          5852: static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){
        !          5853:   /* TODO(shess) Explore whether partially flushing the buffer on
        !          5854:   ** forced-flush would provide better performance.  I suspect that if
        !          5855:   ** we ordered the doclists by size and flushed the largest until the
        !          5856:   ** buffer was half empty, that would let the less frequent terms
        !          5857:   ** generate longer doclists.
        !          5858:   */
        !          5859:   if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){
        !          5860:     int rc = flushPendingTerms(v);
        !          5861:     if( rc!=SQLITE_OK ) return rc;
        !          5862:   }
        !          5863:   if( v->nPendingData<0 ){
        !          5864:     fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1);
        !          5865:     v->nPendingData = 0;
        !          5866:   }
        !          5867:   v->iPrevDocid = iDocid;
        !          5868:   return SQLITE_OK;
        !          5869: }
        !          5870: 
        !          5871: /* This function implements the xUpdate callback; it is the top-level entry
        !          5872:  * point for inserting, deleting or updating a row in a full-text table. */
        !          5873: static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
        !          5874:                    sqlite_int64 *pRowid){
        !          5875:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
        !          5876:   int rc;
        !          5877: 
        !          5878:   TRACE(("FTS2 Update %p\n", pVtab));
        !          5879: 
        !          5880:   if( nArg<2 ){
        !          5881:     rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
        !          5882:     if( rc==SQLITE_OK ){
        !          5883:       /* If we just deleted the last row in the table, clear out the
        !          5884:       ** index data.
        !          5885:       */
        !          5886:       rc = content_exists(v);
        !          5887:       if( rc==SQLITE_ROW ){
        !          5888:         rc = SQLITE_OK;
        !          5889:       }else if( rc==SQLITE_DONE ){
        !          5890:         /* Clear the pending terms so we don't flush a useless level-0
        !          5891:         ** segment when the transaction closes.
        !          5892:         */
        !          5893:         rc = clearPendingTerms(v);
        !          5894:         if( rc==SQLITE_OK ){
        !          5895:           rc = segdir_delete_all(v);
        !          5896:         }
        !          5897:       }
        !          5898:     }
        !          5899:   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
        !          5900:     /* An update:
        !          5901:      * ppArg[0] = old rowid
        !          5902:      * ppArg[1] = new rowid
        !          5903:      * ppArg[2..2+v->nColumn-1] = values
        !          5904:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
        !          5905:      */
        !          5906:     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
        !          5907:     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
        !          5908:       sqlite3_value_int64(ppArg[1]) != rowid ){
        !          5909:       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
        !          5910:     } else {
        !          5911:       assert( nArg==2+v->nColumn+1);
        !          5912:       rc = index_update(v, rowid, &ppArg[2]);
        !          5913:     }
        !          5914:   } else {
        !          5915:     /* An insert:
        !          5916:      * ppArg[1] = requested rowid
        !          5917:      * ppArg[2..2+v->nColumn-1] = values
        !          5918:      * ppArg[2+v->nColumn] = value for magic column (we ignore this)
        !          5919:      */
        !          5920:     assert( nArg==2+v->nColumn+1);
        !          5921:     rc = index_insert(v, ppArg[1], &ppArg[2], pRowid);
        !          5922:   }
        !          5923: 
        !          5924:   return rc;
        !          5925: }
        !          5926: 
        !          5927: static int fulltextSync(sqlite3_vtab *pVtab){
        !          5928:   TRACE(("FTS2 xSync()\n"));
        !          5929:   return flushPendingTerms((fulltext_vtab *)pVtab);
        !          5930: }
        !          5931: 
        !          5932: static int fulltextBegin(sqlite3_vtab *pVtab){
        !          5933:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
        !          5934:   TRACE(("FTS2 xBegin()\n"));
        !          5935: 
        !          5936:   /* Any buffered updates should have been cleared by the previous
        !          5937:   ** transaction.
        !          5938:   */
        !          5939:   assert( v->nPendingData<0 );
        !          5940:   return clearPendingTerms(v);
        !          5941: }
        !          5942: 
        !          5943: static int fulltextCommit(sqlite3_vtab *pVtab){
        !          5944:   fulltext_vtab *v = (fulltext_vtab *) pVtab;
        !          5945:   TRACE(("FTS2 xCommit()\n"));
        !          5946: 
        !          5947:   /* Buffered updates should have been cleared by fulltextSync(). */
        !          5948:   assert( v->nPendingData<0 );
        !          5949:   return clearPendingTerms(v);
        !          5950: }
        !          5951: 
        !          5952: static int fulltextRollback(sqlite3_vtab *pVtab){
        !          5953:   TRACE(("FTS2 xRollback()\n"));
        !          5954:   return clearPendingTerms((fulltext_vtab *)pVtab);
        !          5955: }
        !          5956: 
        !          5957: /*
        !          5958: ** Implementation of the snippet() function for FTS2
        !          5959: */
        !          5960: static void snippetFunc(
        !          5961:   sqlite3_context *pContext,
        !          5962:   int argc,
        !          5963:   sqlite3_value **argv
        !          5964: ){
        !          5965:   fulltext_cursor *pCursor;
        !          5966:   if( argc<1 ) return;
        !          5967:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          5968:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          5969:     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
        !          5970:   }else{
        !          5971:     const char *zStart = "<b>";
        !          5972:     const char *zEnd = "</b>";
        !          5973:     const char *zEllipsis = "<b>...</b>";
        !          5974:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          5975:     if( argc>=2 ){
        !          5976:       zStart = (const char*)sqlite3_value_text(argv[1]);
        !          5977:       if( argc>=3 ){
        !          5978:         zEnd = (const char*)sqlite3_value_text(argv[2]);
        !          5979:         if( argc>=4 ){
        !          5980:           zEllipsis = (const char*)sqlite3_value_text(argv[3]);
        !          5981:         }
        !          5982:       }
        !          5983:     }
        !          5984:     snippetAllOffsets(pCursor);
        !          5985:     snippetText(pCursor, zStart, zEnd, zEllipsis);
        !          5986:     sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
        !          5987:                         pCursor->snippet.nSnippet, SQLITE_STATIC);
        !          5988:   }
        !          5989: }
        !          5990: 
        !          5991: /*
        !          5992: ** Implementation of the offsets() function for FTS2
        !          5993: */
        !          5994: static void snippetOffsetsFunc(
        !          5995:   sqlite3_context *pContext,
        !          5996:   int argc,
        !          5997:   sqlite3_value **argv
        !          5998: ){
        !          5999:   fulltext_cursor *pCursor;
        !          6000:   if( argc<1 ) return;
        !          6001:   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          6002:       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          6003:     sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
        !          6004:   }else{
        !          6005:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          6006:     snippetAllOffsets(pCursor);
        !          6007:     snippetOffsetText(&pCursor->snippet);
        !          6008:     sqlite3_result_text(pContext,
        !          6009:                         pCursor->snippet.zOffset, pCursor->snippet.nOffset,
        !          6010:                         SQLITE_STATIC);
        !          6011:   }
        !          6012: }
        !          6013: 
        !          6014: /* OptLeavesReader is nearly identical to LeavesReader, except that
        !          6015: ** where LeavesReader is geared towards the merging of complete
        !          6016: ** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader
        !          6017: ** is geared towards implementation of the optimize() function, and
        !          6018: ** can merge all segments simultaneously.  This version may be
        !          6019: ** somewhat less efficient than LeavesReader because it merges into an
        !          6020: ** accumulator rather than doing an N-way merge, but since segment
        !          6021: ** size grows exponentially (so segment count logrithmically) this is
        !          6022: ** probably not an immediate problem.
        !          6023: */
        !          6024: /* TODO(shess): Prove that assertion, or extend the merge code to
        !          6025: ** merge tree fashion (like the prefix-searching code does).
        !          6026: */
        !          6027: /* TODO(shess): OptLeavesReader and LeavesReader could probably be
        !          6028: ** merged with little or no loss of performance for LeavesReader.  The
        !          6029: ** merged code would need to handle >MERGE_COUNT segments, and would
        !          6030: ** also need to be able to optionally optimize away deletes.
        !          6031: */
        !          6032: typedef struct OptLeavesReader {
        !          6033:   /* Segment number, to order readers by age. */
        !          6034:   int segment;
        !          6035:   LeavesReader reader;
        !          6036: } OptLeavesReader;
        !          6037: 
        !          6038: static int optLeavesReaderAtEnd(OptLeavesReader *pReader){
        !          6039:   return leavesReaderAtEnd(&pReader->reader);
        !          6040: }
        !          6041: static int optLeavesReaderTermBytes(OptLeavesReader *pReader){
        !          6042:   return leavesReaderTermBytes(&pReader->reader);
        !          6043: }
        !          6044: static const char *optLeavesReaderData(OptLeavesReader *pReader){
        !          6045:   return leavesReaderData(&pReader->reader);
        !          6046: }
        !          6047: static int optLeavesReaderDataBytes(OptLeavesReader *pReader){
        !          6048:   return leavesReaderDataBytes(&pReader->reader);
        !          6049: }
        !          6050: static const char *optLeavesReaderTerm(OptLeavesReader *pReader){
        !          6051:   return leavesReaderTerm(&pReader->reader);
        !          6052: }
        !          6053: static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){
        !          6054:   return leavesReaderStep(v, &pReader->reader);
        !          6055: }
        !          6056: static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
        !          6057:   return leavesReaderTermCmp(&lr1->reader, &lr2->reader);
        !          6058: }
        !          6059: /* Order by term ascending, segment ascending (oldest to newest), with
        !          6060: ** exhausted readers to the end.
        !          6061: */
        !          6062: static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){
        !          6063:   int c = optLeavesReaderTermCmp(lr1, lr2);
        !          6064:   if( c!=0 ) return c;
        !          6065:   return lr1->segment-lr2->segment;
        !          6066: }
        !          6067: /* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that
        !          6068: ** pLr[1..nLr-1] is already sorted.
        !          6069: */
        !          6070: static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){
        !          6071:   while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){
        !          6072:     OptLeavesReader tmp = pLr[0];
        !          6073:     pLr[0] = pLr[1];
        !          6074:     pLr[1] = tmp;
        !          6075:     nLr--;
        !          6076:     pLr++;
        !          6077:   }
        !          6078: }
        !          6079: 
        !          6080: /* optimize() helper function.  Put the readers in order and iterate
        !          6081: ** through them, merging doclists for matching terms into pWriter.
        !          6082: ** Returns SQLITE_OK on success, or the SQLite error code which
        !          6083: ** prevented success.
        !          6084: */
        !          6085: static int optimizeInternal(fulltext_vtab *v,
        !          6086:                             OptLeavesReader *readers, int nReaders,
        !          6087:                             LeafWriter *pWriter){
        !          6088:   int i, rc = SQLITE_OK;
        !          6089:   DataBuffer doclist, merged, tmp;
        !          6090: 
        !          6091:   /* Order the readers. */
        !          6092:   i = nReaders;
        !          6093:   while( i-- > 0 ){
        !          6094:     optLeavesReaderReorder(&readers[i], nReaders-i);
        !          6095:   }
        !          6096: 
        !          6097:   dataBufferInit(&doclist, LEAF_MAX);
        !          6098:   dataBufferInit(&merged, LEAF_MAX);
        !          6099: 
        !          6100:   /* Exhausted readers bubble to the end, so when the first reader is
        !          6101:   ** at eof, all are at eof.
        !          6102:   */
        !          6103:   while( !optLeavesReaderAtEnd(&readers[0]) ){
        !          6104: 
        !          6105:     /* Figure out how many readers share the next term. */
        !          6106:     for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
        !          6107:       if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
        !          6108:     }
        !          6109: 
        !          6110:     /* Special-case for no merge. */
        !          6111:     if( i==1 ){
        !          6112:       /* Trim deletions from the doclist. */
        !          6113:       dataBufferReset(&merged);
        !          6114:       docListTrim(DL_DEFAULT,
        !          6115:                   optLeavesReaderData(&readers[0]),
        !          6116:                   optLeavesReaderDataBytes(&readers[0]),
        !          6117:                   -1, DL_DEFAULT, &merged);
        !          6118:     }else{
        !          6119:       DLReader dlReaders[MERGE_COUNT];
        !          6120:       int iReader, nReaders;
        !          6121: 
        !          6122:       /* Prime the pipeline with the first reader's doclist.  After
        !          6123:       ** one pass index 0 will reference the accumulated doclist.
        !          6124:       */
        !          6125:       dlrInit(&dlReaders[0], DL_DEFAULT,
        !          6126:               optLeavesReaderData(&readers[0]),
        !          6127:               optLeavesReaderDataBytes(&readers[0]));
        !          6128:       iReader = 1;
        !          6129: 
        !          6130:       assert( iReader<i );  /* Must execute the loop at least once. */
        !          6131:       while( iReader<i ){
        !          6132:         /* Merge 16 inputs per pass. */
        !          6133:         for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
        !          6134:              iReader++, nReaders++ ){
        !          6135:           dlrInit(&dlReaders[nReaders], DL_DEFAULT,
        !          6136:                   optLeavesReaderData(&readers[iReader]),
        !          6137:                   optLeavesReaderDataBytes(&readers[iReader]));
        !          6138:         }
        !          6139: 
        !          6140:         /* Merge doclists and swap result into accumulator. */
        !          6141:         dataBufferReset(&merged);
        !          6142:         docListMerge(&merged, dlReaders, nReaders);
        !          6143:         tmp = merged;
        !          6144:         merged = doclist;
        !          6145:         doclist = tmp;
        !          6146: 
        !          6147:         while( nReaders-- > 0 ){
        !          6148:           dlrDestroy(&dlReaders[nReaders]);
        !          6149:         }
        !          6150: 
        !          6151:         /* Accumulated doclist to reader 0 for next pass. */
        !          6152:         dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
        !          6153:       }
        !          6154: 
        !          6155:       /* Destroy reader that was left in the pipeline. */
        !          6156:       dlrDestroy(&dlReaders[0]);
        !          6157: 
        !          6158:       /* Trim deletions from the doclist. */
        !          6159:       dataBufferReset(&merged);
        !          6160:       docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
        !          6161:                   -1, DL_DEFAULT, &merged);
        !          6162:     }
        !          6163: 
        !          6164:     /* Only pass doclists with hits (skip if all hits deleted). */
        !          6165:     if( merged.nData>0 ){
        !          6166:       rc = leafWriterStep(v, pWriter,
        !          6167:                           optLeavesReaderTerm(&readers[0]),
        !          6168:                           optLeavesReaderTermBytes(&readers[0]),
        !          6169:                           merged.pData, merged.nData);
        !          6170:       if( rc!=SQLITE_OK ) goto err;
        !          6171:     }
        !          6172: 
        !          6173:     /* Step merged readers to next term and reorder. */
        !          6174:     while( i-- > 0 ){
        !          6175:       rc = optLeavesReaderStep(v, &readers[i]);
        !          6176:       if( rc!=SQLITE_OK ) goto err;
        !          6177: 
        !          6178:       optLeavesReaderReorder(&readers[i], nReaders-i);
        !          6179:     }
        !          6180:   }
        !          6181: 
        !          6182:  err:
        !          6183:   dataBufferDestroy(&doclist);
        !          6184:   dataBufferDestroy(&merged);
        !          6185:   return rc;
        !          6186: }
        !          6187: 
        !          6188: /* Implement optimize() function for FTS3.  optimize(t) merges all
        !          6189: ** segments in the fts index into a single segment.  't' is the magic
        !          6190: ** table-named column.
        !          6191: */
        !          6192: static void optimizeFunc(sqlite3_context *pContext,
        !          6193:                          int argc, sqlite3_value **argv){
        !          6194:   fulltext_cursor *pCursor;
        !          6195:   if( argc>1 ){
        !          6196:     sqlite3_result_error(pContext, "excess arguments to optimize()",-1);
        !          6197:   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          6198:             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          6199:     sqlite3_result_error(pContext, "illegal first argument to optimize",-1);
        !          6200:   }else{
        !          6201:     fulltext_vtab *v;
        !          6202:     int i, rc, iMaxLevel;
        !          6203:     OptLeavesReader *readers;
        !          6204:     int nReaders;
        !          6205:     LeafWriter writer;
        !          6206:     sqlite3_stmt *s;
        !          6207: 
        !          6208:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          6209:     v = cursor_vtab(pCursor);
        !          6210: 
        !          6211:     /* Flush any buffered updates before optimizing. */
        !          6212:     rc = flushPendingTerms(v);
        !          6213:     if( rc!=SQLITE_OK ) goto err;
        !          6214: 
        !          6215:     rc = segdir_count(v, &nReaders, &iMaxLevel);
        !          6216:     if( rc!=SQLITE_OK ) goto err;
        !          6217:     if( nReaders==0 || nReaders==1 ){
        !          6218:       sqlite3_result_text(pContext, "Index already optimal", -1,
        !          6219:                           SQLITE_STATIC);
        !          6220:       return;
        !          6221:     }
        !          6222: 
        !          6223:     rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
        !          6224:     if( rc!=SQLITE_OK ) goto err;
        !          6225: 
        !          6226:     readers = sqlite3_malloc(nReaders*sizeof(readers[0]));
        !          6227:     if( readers==NULL ) goto err;
        !          6228: 
        !          6229:     /* Note that there will already be a segment at this position
        !          6230:     ** until we call segdir_delete() on iMaxLevel.
        !          6231:     */
        !          6232:     leafWriterInit(iMaxLevel, 0, &writer);
        !          6233: 
        !          6234:     i = 0;
        !          6235:     while( (rc = sqlite3_step(s))==SQLITE_ROW ){
        !          6236:       sqlite_int64 iStart = sqlite3_column_int64(s, 0);
        !          6237:       sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
        !          6238:       const char *pRootData = sqlite3_column_blob(s, 2);
        !          6239:       int nRootData = sqlite3_column_bytes(s, 2);
        !          6240: 
        !          6241:       assert( i<nReaders );
        !          6242:       rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
        !          6243:                             &readers[i].reader);
        !          6244:       if( rc!=SQLITE_OK ) break;
        !          6245: 
        !          6246:       readers[i].segment = i;
        !          6247:       i++;
        !          6248:     }
        !          6249: 
        !          6250:     /* If we managed to successfully read them all, optimize them. */
        !          6251:     if( rc==SQLITE_DONE ){
        !          6252:       assert( i==nReaders );
        !          6253:       rc = optimizeInternal(v, readers, nReaders, &writer);
        !          6254:     }
        !          6255: 
        !          6256:     while( i-- > 0 ){
        !          6257:       leavesReaderDestroy(&readers[i].reader);
        !          6258:     }
        !          6259:     sqlite3_free(readers);
        !          6260: 
        !          6261:     /* If we've successfully gotten to here, delete the old segments
        !          6262:     ** and flush the interior structure of the new segment.
        !          6263:     */
        !          6264:     if( rc==SQLITE_OK ){
        !          6265:       for( i=0; i<=iMaxLevel; i++ ){
        !          6266:         rc = segdir_delete(v, i);
        !          6267:         if( rc!=SQLITE_OK ) break;
        !          6268:       }
        !          6269: 
        !          6270:       if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer);
        !          6271:     }
        !          6272: 
        !          6273:     leafWriterDestroy(&writer);
        !          6274: 
        !          6275:     if( rc!=SQLITE_OK ) goto err;
        !          6276: 
        !          6277:     sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
        !          6278:     return;
        !          6279: 
        !          6280:     /* TODO(shess): Error-handling needs to be improved along the
        !          6281:     ** lines of the dump_ functions.
        !          6282:     */
        !          6283:  err:
        !          6284:     {
        !          6285:       char buf[512];
        !          6286:       sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s",
        !          6287:                        sqlite3_errmsg(sqlite3_context_db_handle(pContext)));
        !          6288:       sqlite3_result_error(pContext, buf, -1);
        !          6289:     }
        !          6290:   }
        !          6291: }
        !          6292: 
        !          6293: #ifdef SQLITE_TEST
        !          6294: /* Generate an error of the form "<prefix>: <msg>".  If msg is NULL,
        !          6295: ** pull the error from the context's db handle.
        !          6296: */
        !          6297: static void generateError(sqlite3_context *pContext,
        !          6298:                           const char *prefix, const char *msg){
        !          6299:   char buf[512];
        !          6300:   if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext));
        !          6301:   sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg);
        !          6302:   sqlite3_result_error(pContext, buf, -1);
        !          6303: }
        !          6304: 
        !          6305: /* Helper function to collect the set of terms in the segment into
        !          6306: ** pTerms.  The segment is defined by the leaf nodes between
        !          6307: ** iStartBlockid and iEndBlockid, inclusive, or by the contents of
        !          6308: ** pRootData if iStartBlockid is 0 (in which case the entire segment
        !          6309: ** fit in a leaf).
        !          6310: */
        !          6311: static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s,
        !          6312:                                fts2Hash *pTerms){
        !          6313:   const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0);
        !          6314:   const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1);
        !          6315:   const char *pRootData = sqlite3_column_blob(s, 2);
        !          6316:   const int nRootData = sqlite3_column_bytes(s, 2);
        !          6317:   LeavesReader reader;
        !          6318:   int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
        !          6319:                             pRootData, nRootData, &reader);
        !          6320:   if( rc!=SQLITE_OK ) return rc;
        !          6321: 
        !          6322:   while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){
        !          6323:     const char *pTerm = leavesReaderTerm(&reader);
        !          6324:     const int nTerm = leavesReaderTermBytes(&reader);
        !          6325:     void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm);
        !          6326:     void *newValue = (void *)((char *)oldValue+1);
        !          6327: 
        !          6328:     /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c,
        !          6329:     ** the data value passed is returned in case of malloc failure.
        !          6330:     */
        !          6331:     if( newValue==sqlite3Fts2HashInsert(pTerms, pTerm, nTerm, newValue) ){
        !          6332:       rc = SQLITE_NOMEM;
        !          6333:     }else{
        !          6334:       rc = leavesReaderStep(v, &reader);
        !          6335:     }
        !          6336:   }
        !          6337: 
        !          6338:   leavesReaderDestroy(&reader);
        !          6339:   return rc;
        !          6340: }
        !          6341: 
        !          6342: /* Helper function to build the result string for dump_terms(). */
        !          6343: static int generateTermsResult(sqlite3_context *pContext, fts2Hash *pTerms){
        !          6344:   int iTerm, nTerms, nResultBytes, iByte;
        !          6345:   char *result;
        !          6346:   TermData *pData;
        !          6347:   fts2HashElem *e;
        !          6348: 
        !          6349:   /* Iterate pTerms to generate an array of terms in pData for
        !          6350:   ** sorting.
        !          6351:   */
        !          6352:   nTerms = fts2HashCount(pTerms);
        !          6353:   assert( nTerms>0 );
        !          6354:   pData = sqlite3_malloc(nTerms*sizeof(TermData));
        !          6355:   if( pData==NULL ) return SQLITE_NOMEM;
        !          6356: 
        !          6357:   nResultBytes = 0;
        !          6358:   for(iTerm = 0, e = fts2HashFirst(pTerms); e; iTerm++, e = fts2HashNext(e)){
        !          6359:     nResultBytes += fts2HashKeysize(e)+1;   /* Term plus trailing space */
        !          6360:     assert( iTerm<nTerms );
        !          6361:     pData[iTerm].pTerm = fts2HashKey(e);
        !          6362:     pData[iTerm].nTerm = fts2HashKeysize(e);
        !          6363:     pData[iTerm].pCollector = fts2HashData(e);  /* unused */
        !          6364:   }
        !          6365:   assert( iTerm==nTerms );
        !          6366: 
        !          6367:   assert( nResultBytes>0 );   /* nTerms>0, nResultsBytes must be, too. */
        !          6368:   result = sqlite3_malloc(nResultBytes);
        !          6369:   if( result==NULL ){
        !          6370:     sqlite3_free(pData);
        !          6371:     return SQLITE_NOMEM;
        !          6372:   }
        !          6373: 
        !          6374:   if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp);
        !          6375: 
        !          6376:   /* Read the terms in order to build the result. */
        !          6377:   iByte = 0;
        !          6378:   for(iTerm=0; iTerm<nTerms; ++iTerm){
        !          6379:     memcpy(result+iByte, pData[iTerm].pTerm, pData[iTerm].nTerm);
        !          6380:     iByte += pData[iTerm].nTerm;
        !          6381:     result[iByte++] = ' ';
        !          6382:   }
        !          6383:   assert( iByte==nResultBytes );
        !          6384:   assert( result[nResultBytes-1]==' ' );
        !          6385:   result[nResultBytes-1] = '\0';
        !          6386: 
        !          6387:   /* Passes away ownership of result. */
        !          6388:   sqlite3_result_text(pContext, result, nResultBytes-1, sqlite3_free);
        !          6389:   sqlite3_free(pData);
        !          6390:   return SQLITE_OK;
        !          6391: }
        !          6392: 
        !          6393: /* Implements dump_terms() for use in inspecting the fts2 index from
        !          6394: ** tests.  TEXT result containing the ordered list of terms joined by
        !          6395: ** spaces.  dump_terms(t, level, idx) dumps the terms for the segment
        !          6396: ** specified by level, idx (in %_segdir), while dump_terms(t) dumps
        !          6397: ** all terms in the index.  In both cases t is the fts table's magic
        !          6398: ** table-named column.
        !          6399: */
        !          6400: static void dumpTermsFunc(
        !          6401:   sqlite3_context *pContext,
        !          6402:   int argc, sqlite3_value **argv
        !          6403: ){
        !          6404:   fulltext_cursor *pCursor;
        !          6405:   if( argc!=3 && argc!=1 ){
        !          6406:     generateError(pContext, "dump_terms", "incorrect arguments");
        !          6407:   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          6408:             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          6409:     generateError(pContext, "dump_terms", "illegal first argument");
        !          6410:   }else{
        !          6411:     fulltext_vtab *v;
        !          6412:     fts2Hash terms;
        !          6413:     sqlite3_stmt *s = NULL;
        !          6414:     int rc;
        !          6415: 
        !          6416:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          6417:     v = cursor_vtab(pCursor);
        !          6418: 
        !          6419:     /* If passed only the cursor column, get all segments.  Otherwise
        !          6420:     ** get the segment described by the following two arguments.
        !          6421:     */
        !          6422:     if( argc==1 ){
        !          6423:       rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
        !          6424:     }else{
        !          6425:       rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
        !          6426:       if( rc==SQLITE_OK ){
        !          6427:         rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[1]));
        !          6428:         if( rc==SQLITE_OK ){
        !          6429:           rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[2]));
        !          6430:         }
        !          6431:       }
        !          6432:     }
        !          6433: 
        !          6434:     if( rc!=SQLITE_OK ){
        !          6435:       generateError(pContext, "dump_terms", NULL);
        !          6436:       return;
        !          6437:     }
        !          6438: 
        !          6439:     /* Collect the terms for each segment. */
        !          6440:     sqlite3Fts2HashInit(&terms, FTS2_HASH_STRING, 1);
        !          6441:     while( (rc = sqlite3_step(s))==SQLITE_ROW ){
        !          6442:       rc = collectSegmentTerms(v, s, &terms);
        !          6443:       if( rc!=SQLITE_OK ) break;
        !          6444:     }
        !          6445: 
        !          6446:     if( rc!=SQLITE_DONE ){
        !          6447:       sqlite3_reset(s);
        !          6448:       generateError(pContext, "dump_terms", NULL);
        !          6449:     }else{
        !          6450:       const int nTerms = fts2HashCount(&terms);
        !          6451:       if( nTerms>0 ){
        !          6452:         rc = generateTermsResult(pContext, &terms);
        !          6453:         if( rc==SQLITE_NOMEM ){
        !          6454:           generateError(pContext, "dump_terms", "out of memory");
        !          6455:         }else{
        !          6456:           assert( rc==SQLITE_OK );
        !          6457:         }
        !          6458:       }else if( argc==3 ){
        !          6459:         /* The specific segment asked for could not be found. */
        !          6460:         generateError(pContext, "dump_terms", "segment not found");
        !          6461:       }else{
        !          6462:         /* No segments found. */
        !          6463:         /* TODO(shess): It should be impossible to reach this.  This
        !          6464:         ** case can only happen for an empty table, in which case
        !          6465:         ** SQLite has no rows to call this function on.
        !          6466:         */
        !          6467:         sqlite3_result_null(pContext);
        !          6468:       }
        !          6469:     }
        !          6470:     sqlite3Fts2HashClear(&terms);
        !          6471:   }
        !          6472: }
        !          6473: 
        !          6474: /* Expand the DL_DEFAULT doclist in pData into a text result in
        !          6475: ** pContext.
        !          6476: */
        !          6477: static void createDoclistResult(sqlite3_context *pContext,
        !          6478:                                 const char *pData, int nData){
        !          6479:   DataBuffer dump;
        !          6480:   DLReader dlReader;
        !          6481: 
        !          6482:   assert( pData!=NULL && nData>0 );
        !          6483: 
        !          6484:   dataBufferInit(&dump, 0);
        !          6485:   dlrInit(&dlReader, DL_DEFAULT, pData, nData);
        !          6486:   for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){
        !          6487:     char buf[256];
        !          6488:     PLReader plReader;
        !          6489: 
        !          6490:     plrInit(&plReader, &dlReader);
        !          6491:     if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){
        !          6492:       sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader));
        !          6493:       dataBufferAppend(&dump, buf, strlen(buf));
        !          6494:     }else{
        !          6495:       int iColumn = plrColumn(&plReader);
        !          6496: 
        !          6497:       sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[",
        !          6498:                        dlrDocid(&dlReader), iColumn);
        !          6499:       dataBufferAppend(&dump, buf, strlen(buf));
        !          6500: 
        !          6501:       for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){
        !          6502:         if( plrColumn(&plReader)!=iColumn ){
        !          6503:           iColumn = plrColumn(&plReader);
        !          6504:           sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn);
        !          6505:           assert( dump.nData>0 );
        !          6506:           dump.nData--;                     /* Overwrite trailing space. */
        !          6507:           assert( dump.pData[dump.nData]==' ');
        !          6508:           dataBufferAppend(&dump, buf, strlen(buf));
        !          6509:         }
        !          6510:         if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){
        !          6511:           sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ",
        !          6512:                            plrPosition(&plReader),
        !          6513:                            plrStartOffset(&plReader), plrEndOffset(&plReader));
        !          6514:         }else if( DL_DEFAULT==DL_POSITIONS ){
        !          6515:           sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader));
        !          6516:         }else{
        !          6517:           assert( NULL=="Unhandled DL_DEFAULT value");
        !          6518:         }
        !          6519:         dataBufferAppend(&dump, buf, strlen(buf));
        !          6520:       }
        !          6521:       plrDestroy(&plReader);
        !          6522: 
        !          6523:       assert( dump.nData>0 );
        !          6524:       dump.nData--;                     /* Overwrite trailing space. */
        !          6525:       assert( dump.pData[dump.nData]==' ');
        !          6526:       dataBufferAppend(&dump, "]] ", 3);
        !          6527:     }
        !          6528:   }
        !          6529:   dlrDestroy(&dlReader);
        !          6530: 
        !          6531:   assert( dump.nData>0 );
        !          6532:   dump.nData--;                     /* Overwrite trailing space. */
        !          6533:   assert( dump.pData[dump.nData]==' ');
        !          6534:   dump.pData[dump.nData] = '\0';
        !          6535:   assert( dump.nData>0 );
        !          6536: 
        !          6537:   /* Passes ownership of dump's buffer to pContext. */
        !          6538:   sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free);
        !          6539:   dump.pData = NULL;
        !          6540:   dump.nData = dump.nCapacity = 0;
        !          6541: }
        !          6542: 
        !          6543: /* Implements dump_doclist() for use in inspecting the fts2 index from
        !          6544: ** tests.  TEXT result containing a string representation of the
        !          6545: ** doclist for the indicated term.  dump_doclist(t, term, level, idx)
        !          6546: ** dumps the doclist for term from the segment specified by level, idx
        !          6547: ** (in %_segdir), while dump_doclist(t, term) dumps the logical
        !          6548: ** doclist for the term across all segments.  The per-segment doclist
        !          6549: ** can contain deletions, while the full-index doclist will not
        !          6550: ** (deletions are omitted).
        !          6551: **
        !          6552: ** Result formats differ with the setting of DL_DEFAULTS.  Examples:
        !          6553: **
        !          6554: ** DL_DOCIDS: [1] [3] [7]
        !          6555: ** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]]
        !          6556: ** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]]
        !          6557: **
        !          6558: ** In each case the number after the outer '[' is the docid.  In the
        !          6559: ** latter two cases, the number before the inner '[' is the column
        !          6560: ** associated with the values within.  For DL_POSITIONS the numbers
        !          6561: ** within are the positions, for DL_POSITIONS_OFFSETS they are the
        !          6562: ** position, the start offset, and the end offset.
        !          6563: */
        !          6564: static void dumpDoclistFunc(
        !          6565:   sqlite3_context *pContext,
        !          6566:   int argc, sqlite3_value **argv
        !          6567: ){
        !          6568:   fulltext_cursor *pCursor;
        !          6569:   if( argc!=2 && argc!=4 ){
        !          6570:     generateError(pContext, "dump_doclist", "incorrect arguments");
        !          6571:   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
        !          6572:             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
        !          6573:     generateError(pContext, "dump_doclist", "illegal first argument");
        !          6574:   }else if( sqlite3_value_text(argv[1])==NULL ||
        !          6575:             sqlite3_value_text(argv[1])[0]=='\0' ){
        !          6576:     generateError(pContext, "dump_doclist", "empty second argument");
        !          6577:   }else{
        !          6578:     const char *pTerm = (const char *)sqlite3_value_text(argv[1]);
        !          6579:     const int nTerm = strlen(pTerm);
        !          6580:     fulltext_vtab *v;
        !          6581:     int rc;
        !          6582:     DataBuffer doclist;
        !          6583: 
        !          6584:     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
        !          6585:     v = cursor_vtab(pCursor);
        !          6586: 
        !          6587:     dataBufferInit(&doclist, 0);
        !          6588: 
        !          6589:     /* termSelect() yields the same logical doclist that queries are
        !          6590:     ** run against.
        !          6591:     */
        !          6592:     if( argc==2 ){
        !          6593:       rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist);
        !          6594:     }else{
        !          6595:       sqlite3_stmt *s = NULL;
        !          6596: 
        !          6597:       /* Get our specific segment's information. */
        !          6598:       rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s);
        !          6599:       if( rc==SQLITE_OK ){
        !          6600:         rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2]));
        !          6601:         if( rc==SQLITE_OK ){
        !          6602:           rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3]));
        !          6603:         }
        !          6604:       }
        !          6605: 
        !          6606:       if( rc==SQLITE_OK ){
        !          6607:         rc = sqlite3_step(s);
        !          6608: 
        !          6609:         if( rc==SQLITE_DONE ){
        !          6610:           dataBufferDestroy(&doclist);
        !          6611:           generateError(pContext, "dump_doclist", "segment not found");
        !          6612:           return;
        !          6613:         }
        !          6614: 
        !          6615:         /* Found a segment, load it into doclist. */
        !          6616:         if( rc==SQLITE_ROW ){
        !          6617:           const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
        !          6618:           const char *pData = sqlite3_column_blob(s, 2);
        !          6619:           const int nData = sqlite3_column_bytes(s, 2);
        !          6620: 
        !          6621:           /* loadSegment() is used by termSelect() to load each
        !          6622:           ** segment's data.
        !          6623:           */
        !          6624:           rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0,
        !          6625:                            &doclist);
        !          6626:           if( rc==SQLITE_OK ){
        !          6627:             rc = sqlite3_step(s);
        !          6628: 
        !          6629:             /* Should not have more than one matching segment. */
        !          6630:             if( rc!=SQLITE_DONE ){
        !          6631:               sqlite3_reset(s);
        !          6632:               dataBufferDestroy(&doclist);
        !          6633:               generateError(pContext, "dump_doclist", "invalid segdir");
        !          6634:               return;
        !          6635:             }
        !          6636:             rc = SQLITE_OK;
        !          6637:           }
        !          6638:         }
        !          6639:       }
        !          6640: 
        !          6641:       sqlite3_reset(s);
        !          6642:     }
        !          6643: 
        !          6644:     if( rc==SQLITE_OK ){
        !          6645:       if( doclist.nData>0 ){
        !          6646:         createDoclistResult(pContext, doclist.pData, doclist.nData);
        !          6647:       }else{
        !          6648:         /* TODO(shess): This can happen if the term is not present, or
        !          6649:         ** if all instances of the term have been deleted and this is
        !          6650:         ** an all-index dump.  It may be interesting to distinguish
        !          6651:         ** these cases.
        !          6652:         */
        !          6653:         sqlite3_result_text(pContext, "", 0, SQLITE_STATIC);
        !          6654:       }
        !          6655:     }else if( rc==SQLITE_NOMEM ){
        !          6656:       /* Handle out-of-memory cases specially because if they are
        !          6657:       ** generated in fts2 code they may not be reflected in the db
        !          6658:       ** handle.
        !          6659:       */
        !          6660:       /* TODO(shess): Handle this more comprehensively.
        !          6661:       ** sqlite3ErrStr() has what I need, but is internal.
        !          6662:       */
        !          6663:       generateError(pContext, "dump_doclist", "out of memory");
        !          6664:     }else{
        !          6665:       generateError(pContext, "dump_doclist", NULL);
        !          6666:     }
        !          6667: 
        !          6668:     dataBufferDestroy(&doclist);
        !          6669:   }
        !          6670: }
        !          6671: #endif
        !          6672: 
        !          6673: /*
        !          6674: ** This routine implements the xFindFunction method for the FTS2
        !          6675: ** virtual table.
        !          6676: */
        !          6677: static int fulltextFindFunction(
        !          6678:   sqlite3_vtab *pVtab,
        !          6679:   int nArg,
        !          6680:   const char *zName,
        !          6681:   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
        !          6682:   void **ppArg
        !          6683: ){
        !          6684:   if( strcmp(zName,"snippet")==0 ){
        !          6685:     *pxFunc = snippetFunc;
        !          6686:     return 1;
        !          6687:   }else if( strcmp(zName,"offsets")==0 ){
        !          6688:     *pxFunc = snippetOffsetsFunc;
        !          6689:     return 1;
        !          6690:   }else if( strcmp(zName,"optimize")==0 ){
        !          6691:     *pxFunc = optimizeFunc;
        !          6692:     return 1;
        !          6693: #ifdef SQLITE_TEST
        !          6694:     /* NOTE(shess): These functions are present only for testing
        !          6695:     ** purposes.  No particular effort is made to optimize their
        !          6696:     ** execution or how they build their results.
        !          6697:     */
        !          6698:   }else if( strcmp(zName,"dump_terms")==0 ){
        !          6699:     /* fprintf(stderr, "Found dump_terms\n"); */
        !          6700:     *pxFunc = dumpTermsFunc;
        !          6701:     return 1;
        !          6702:   }else if( strcmp(zName,"dump_doclist")==0 ){
        !          6703:     /* fprintf(stderr, "Found dump_doclist\n"); */
        !          6704:     *pxFunc = dumpDoclistFunc;
        !          6705:     return 1;
        !          6706: #endif
        !          6707:   }
        !          6708:   return 0;
        !          6709: }
        !          6710: 
        !          6711: /*
        !          6712: ** Rename an fts2 table.
        !          6713: */
        !          6714: static int fulltextRename(
        !          6715:   sqlite3_vtab *pVtab,
        !          6716:   const char *zName
        !          6717: ){
        !          6718:   fulltext_vtab *p = (fulltext_vtab *)pVtab;
        !          6719:   int rc = SQLITE_NOMEM;
        !          6720:   char *zSql = sqlite3_mprintf(
        !          6721:     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
        !          6722:     "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';"
        !          6723:     "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';"
        !          6724:     , p->zDb, p->zName, zName 
        !          6725:     , p->zDb, p->zName, zName 
        !          6726:     , p->zDb, p->zName, zName
        !          6727:   );
        !          6728:   if( zSql ){
        !          6729:     rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
        !          6730:     sqlite3_free(zSql);
        !          6731:   }
        !          6732:   return rc;
        !          6733: }
        !          6734: 
        !          6735: static const sqlite3_module fts2Module = {
        !          6736:   /* iVersion      */ 0,
        !          6737:   /* xCreate       */ fulltextCreate,
        !          6738:   /* xConnect      */ fulltextConnect,
        !          6739:   /* xBestIndex    */ fulltextBestIndex,
        !          6740:   /* xDisconnect   */ fulltextDisconnect,
        !          6741:   /* xDestroy      */ fulltextDestroy,
        !          6742:   /* xOpen         */ fulltextOpen,
        !          6743:   /* xClose        */ fulltextClose,
        !          6744:   /* xFilter       */ fulltextFilter,
        !          6745:   /* xNext         */ fulltextNext,
        !          6746:   /* xEof          */ fulltextEof,
        !          6747:   /* xColumn       */ fulltextColumn,
        !          6748:   /* xRowid        */ fulltextRowid,
        !          6749:   /* xUpdate       */ fulltextUpdate,
        !          6750:   /* xBegin        */ fulltextBegin,
        !          6751:   /* xSync         */ fulltextSync,
        !          6752:   /* xCommit       */ fulltextCommit,
        !          6753:   /* xRollback     */ fulltextRollback,
        !          6754:   /* xFindFunction */ fulltextFindFunction,
        !          6755:   /* xRename */       fulltextRename,
        !          6756: };
        !          6757: 
        !          6758: static void hashDestroy(void *p){
        !          6759:   fts2Hash *pHash = (fts2Hash *)p;
        !          6760:   sqlite3Fts2HashClear(pHash);
        !          6761:   sqlite3_free(pHash);
        !          6762: }
        !          6763: 
        !          6764: /*
        !          6765: ** The fts2 built-in tokenizers - "simple" and "porter" - are implemented
        !          6766: ** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following
        !          6767: ** two forward declarations are for functions declared in these files
        !          6768: ** used to retrieve the respective implementations.
        !          6769: **
        !          6770: ** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed
        !          6771: ** to by the argument to point a the "simple" tokenizer implementation.
        !          6772: ** Function ...PorterTokenizerModule() sets *pModule to point to the
        !          6773: ** porter tokenizer/stemmer implementation.
        !          6774: */
        !          6775: void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
        !          6776: void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
        !          6777: void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
        !          6778: 
        !          6779: int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *);
        !          6780: 
        !          6781: /*
        !          6782: ** Initialise the fts2 extension. If this extension is built as part
        !          6783: ** of the sqlite library, then this function is called directly by
        !          6784: ** SQLite. If fts2 is built as a dynamically loadable extension, this
        !          6785: ** function is called by the sqlite3_extension_init() entry point.
        !          6786: */
        !          6787: int sqlite3Fts2Init(sqlite3 *db){
        !          6788:   int rc = SQLITE_OK;
        !          6789:   fts2Hash *pHash = 0;
        !          6790:   const sqlite3_tokenizer_module *pSimple = 0;
        !          6791:   const sqlite3_tokenizer_module *pPorter = 0;
        !          6792:   const sqlite3_tokenizer_module *pIcu = 0;
        !          6793: 
        !          6794:   sqlite3Fts2SimpleTokenizerModule(&pSimple);
        !          6795:   sqlite3Fts2PorterTokenizerModule(&pPorter);
        !          6796: #ifdef SQLITE_ENABLE_ICU
        !          6797:   sqlite3Fts2IcuTokenizerModule(&pIcu);
        !          6798: #endif
        !          6799: 
        !          6800:   /* Allocate and initialise the hash-table used to store tokenizers. */
        !          6801:   pHash = sqlite3_malloc(sizeof(fts2Hash));
        !          6802:   if( !pHash ){
        !          6803:     rc = SQLITE_NOMEM;
        !          6804:   }else{
        !          6805:     sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
        !          6806:   }
        !          6807: 
        !          6808:   /* Load the built-in tokenizers into the hash table */
        !          6809:   if( rc==SQLITE_OK ){
        !          6810:     if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple)
        !          6811:      || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter) 
        !          6812:      || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu))
        !          6813:     ){
        !          6814:       rc = SQLITE_NOMEM;
        !          6815:     }
        !          6816:   }
        !          6817: 
        !          6818:   /* Create the virtual table wrapper around the hash-table and overload 
        !          6819:   ** the two scalar functions. If this is successful, register the
        !          6820:   ** module with sqlite.
        !          6821:   */
        !          6822:   if( SQLITE_OK==rc 
        !          6823:    && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
        !          6824:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
        !          6825:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
        !          6826:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
        !          6827: #ifdef SQLITE_TEST
        !          6828:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
        !          6829:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
        !          6830: #endif
        !          6831:   ){
        !          6832:     return sqlite3_create_module_v2(
        !          6833:         db, "fts2", &fts2Module, (void *)pHash, hashDestroy
        !          6834:     );
        !          6835:   }
        !          6836: 
        !          6837:   /* An error has occurred. Delete the hash table and return the error code. */
        !          6838:   assert( rc!=SQLITE_OK );
        !          6839:   if( pHash ){
        !          6840:     sqlite3Fts2HashClear(pHash);
        !          6841:     sqlite3_free(pHash);
        !          6842:   }
        !          6843:   return rc;
        !          6844: }
        !          6845: 
        !          6846: #if !SQLITE_CORE
        !          6847: int sqlite3_extension_init(
        !          6848:   sqlite3 *db, 
        !          6849:   char **pzErrMsg,
        !          6850:   const sqlite3_api_routines *pApi
        !          6851: ){
        !          6852:   SQLITE_EXTENSION_INIT2(pApi)
        !          6853:   return sqlite3Fts2Init(db);
        !          6854: }
        !          6855: #endif
        !          6856: 
        !          6857: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>