File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts3 / fts3.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:17 2012 UTC (13 years, 1 month ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: /*
    2: ** 2006 Oct 10
    3: **
    4: ** The author disclaims copyright to this source code.  In place of
    5: ** a legal notice, here is a blessing:
    6: **
    7: **    May you do good and not evil.
    8: **    May you find forgiveness for yourself and forgive others.
    9: **    May you share freely, never taking more than you give.
   10: **
   11: ******************************************************************************
   12: **
   13: ** This is an SQLite module implementing full-text search.
   14: */
   15: 
   16: /*
   17: ** The code in this file is only compiled if:
   18: **
   19: **     * The FTS3 module is being built as an extension
   20: **       (in which case SQLITE_CORE is not defined), or
   21: **
   22: **     * The FTS3 module is being built into the core of
   23: **       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
   24: */
   25: 
   26: /* The full-text index is stored in a series of b+tree (-like)
   27: ** structures called segments which map terms to doclists.  The
   28: ** structures are like b+trees in layout, but are constructed from the
   29: ** bottom up in optimal fashion and are not updatable.  Since trees
   30: ** are built from the bottom up, things will be described from the
   31: ** bottom up.
   32: **
   33: **
   34: **** Varints ****
   35: ** The basic unit of encoding is a variable-length integer called a
   36: ** varint.  We encode variable-length integers in little-endian order
   37: ** using seven bits * per byte as follows:
   38: **
   39: ** KEY:
   40: **         A = 0xxxxxxx    7 bits of data and one flag bit
   41: **         B = 1xxxxxxx    7 bits of data and one flag bit
   42: **
   43: **  7 bits - A
   44: ** 14 bits - BA
   45: ** 21 bits - BBA
   46: ** and so on.
   47: **
   48: ** This is similar in concept to how sqlite encodes "varints" but
   49: ** the encoding is not the same.  SQLite varints are big-endian
   50: ** are are limited to 9 bytes in length whereas FTS3 varints are
   51: ** little-endian and can be up to 10 bytes in length (in theory).
   52: **
   53: ** Example encodings:
   54: **
   55: **     1:    0x01
   56: **   127:    0x7f
   57: **   128:    0x81 0x00
   58: **
   59: **
   60: **** Document lists ****
   61: ** A doclist (document list) holds a docid-sorted list of hits for a
   62: ** given term.  Doclists hold docids and associated token positions.
   63: ** A docid is the unique integer identifier for a single document.
   64: ** A position is the index of a word within the document.  The first 
   65: ** word of the document has a position of 0.
   66: **
   67: ** FTS3 used to optionally store character offsets using a compile-time
   68: ** option.  But that functionality is no longer supported.
   69: **
   70: ** A doclist is stored like this:
   71: **
   72: ** array {
   73: **   varint docid;
   74: **   array {                (position list for column 0)
   75: **     varint position;     (2 more than the delta from previous position)
   76: **   }
   77: **   array {
   78: **     varint POS_COLUMN;   (marks start of position list for new column)
   79: **     varint column;       (index of new column)
   80: **     array {
   81: **       varint position;   (2 more than the delta from previous position)
   82: **     }
   83: **   }
   84: **   varint POS_END;        (marks end of positions for this document.
   85: ** }
   86: **
   87: ** Here, array { X } means zero or more occurrences of X, adjacent in
   88: ** memory.  A "position" is an index of a token in the token stream
   89: ** generated by the tokenizer. Note that POS_END and POS_COLUMN occur 
   90: ** in the same logical place as the position element, and act as sentinals
   91: ** ending a position list array.  POS_END is 0.  POS_COLUMN is 1.
   92: ** The positions numbers are not stored literally but rather as two more
   93: ** than the difference from the prior position, or the just the position plus
   94: ** 2 for the first position.  Example:
   95: **
   96: **   label:       A B C D E  F  G H   I  J K
   97: **   value:     123 5 9 1 1 14 35 0 234 72 0
   98: **
   99: ** The 123 value is the first docid.  For column zero in this document
  100: ** there are two matches at positions 3 and 10 (5-2 and 9-2+3).  The 1
  101: ** at D signals the start of a new column; the 1 at E indicates that the
  102: ** new column is column number 1.  There are two positions at 12 and 45
  103: ** (14-2 and 35-2+12).  The 0 at H indicate the end-of-document.  The
  104: ** 234 at I is the next docid.  It has one position 72 (72-2) and then
  105: ** terminates with the 0 at K.
  106: **
  107: ** A "position-list" is the list of positions for multiple columns for
  108: ** a single docid.  A "column-list" is the set of positions for a single
  109: ** column.  Hence, a position-list consists of one or more column-lists,
  110: ** a document record consists of a docid followed by a position-list and
  111: ** a doclist consists of one or more document records.
  112: **
  113: ** A bare doclist omits the position information, becoming an 
  114: ** array of varint-encoded docids.
  115: **
  116: **** Segment leaf nodes ****
  117: ** Segment leaf nodes store terms and doclists, ordered by term.  Leaf
  118: ** nodes are written using LeafWriter, and read using LeafReader (to
  119: ** iterate through a single leaf node's data) and LeavesReader (to
  120: ** iterate through a segment's entire leaf layer).  Leaf nodes have
  121: ** the format:
  122: **
  123: ** varint iHeight;             (height from leaf level, always 0)
  124: ** varint nTerm;               (length of first term)
  125: ** char pTerm[nTerm];          (content of first term)
  126: ** varint nDoclist;            (length of term's associated doclist)
  127: ** char pDoclist[nDoclist];    (content of doclist)
  128: ** array {
  129: **                             (further terms are delta-encoded)
  130: **   varint nPrefix;           (length of prefix shared with previous term)
  131: **   varint nSuffix;           (length of unshared suffix)
  132: **   char pTermSuffix[nSuffix];(unshared suffix of next term)
  133: **   varint nDoclist;          (length of term's associated doclist)
  134: **   char pDoclist[nDoclist];  (content of doclist)
  135: ** }
  136: **
  137: ** Here, array { X } means zero or more occurrences of X, adjacent in
  138: ** memory.
  139: **
  140: ** Leaf nodes are broken into blocks which are stored contiguously in
  141: ** the %_segments table in sorted order.  This means that when the end
  142: ** of a node is reached, the next term is in the node with the next
  143: ** greater node id.
  144: **
  145: ** New data is spilled to a new leaf node when the current node
  146: ** exceeds LEAF_MAX bytes (default 2048).  New data which itself is
  147: ** larger than STANDALONE_MIN (default 1024) is placed in a standalone
  148: ** node (a leaf node with a single term and doclist).  The goal of
  149: ** these settings is to pack together groups of small doclists while
  150: ** making it efficient to directly access large doclists.  The
  151: ** assumption is that large doclists represent terms which are more
  152: ** likely to be query targets.
  153: **
  154: ** TODO(shess) It may be useful for blocking decisions to be more
  155: ** dynamic.  For instance, it may make more sense to have a 2.5k leaf
  156: ** node rather than splitting into 2k and .5k nodes.  My intuition is
  157: ** that this might extend through 2x or 4x the pagesize.
  158: **
  159: **
  160: **** Segment interior nodes ****
  161: ** Segment interior nodes store blockids for subtree nodes and terms
  162: ** to describe what data is stored by the each subtree.  Interior
  163: ** nodes are written using InteriorWriter, and read using
  164: ** InteriorReader.  InteriorWriters are created as needed when
  165: ** SegmentWriter creates new leaf nodes, or when an interior node
  166: ** itself grows too big and must be split.  The format of interior
  167: ** nodes:
  168: **
  169: ** varint iHeight;           (height from leaf level, always >0)
  170: ** varint iBlockid;          (block id of node's leftmost subtree)
  171: ** optional {
  172: **   varint nTerm;           (length of first term)
  173: **   char pTerm[nTerm];      (content of first term)
  174: **   array {
  175: **                                (further terms are delta-encoded)
  176: **     varint nPrefix;            (length of shared prefix with previous term)
  177: **     varint nSuffix;            (length of unshared suffix)
  178: **     char pTermSuffix[nSuffix]; (unshared suffix of next term)
  179: **   }
  180: ** }
  181: **
  182: ** Here, optional { X } means an optional element, while array { X }
  183: ** means zero or more occurrences of X, adjacent in memory.
  184: **
  185: ** An interior node encodes n terms separating n+1 subtrees.  The
  186: ** subtree blocks are contiguous, so only the first subtree's blockid
  187: ** is encoded.  The subtree at iBlockid will contain all terms less
  188: ** than the first term encoded (or all terms if no term is encoded).
  189: ** Otherwise, for terms greater than or equal to pTerm[i] but less
  190: ** than pTerm[i+1], the subtree for that term will be rooted at
  191: ** iBlockid+i.  Interior nodes only store enough term data to
  192: ** distinguish adjacent children (if the rightmost term of the left
  193: ** child is "something", and the leftmost term of the right child is
  194: ** "wicked", only "w" is stored).
  195: **
  196: ** New data is spilled to a new interior node at the same height when
  197: ** the current node exceeds INTERIOR_MAX bytes (default 2048).
  198: ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
  199: ** interior nodes and making the tree too skinny.  The interior nodes
  200: ** at a given height are naturally tracked by interior nodes at
  201: ** height+1, and so on.
  202: **
  203: **
  204: **** Segment directory ****
  205: ** The segment directory in table %_segdir stores meta-information for
  206: ** merging and deleting segments, and also the root node of the
  207: ** segment's tree.
  208: **
  209: ** The root node is the top node of the segment's tree after encoding
  210: ** the entire segment, restricted to ROOT_MAX bytes (default 1024).
  211: ** This could be either a leaf node or an interior node.  If the top
  212: ** node requires more than ROOT_MAX bytes, it is flushed to %_segments
  213: ** and a new root interior node is generated (which should always fit
  214: ** within ROOT_MAX because it only needs space for 2 varints, the
  215: ** height and the blockid of the previous root).
  216: **
  217: ** The meta-information in the segment directory is:
  218: **   level               - segment level (see below)
  219: **   idx                 - index within level
  220: **                       - (level,idx uniquely identify a segment)
  221: **   start_block         - first leaf node
  222: **   leaves_end_block    - last leaf node
  223: **   end_block           - last block (including interior nodes)
  224: **   root                - contents of root node
  225: **
  226: ** If the root node is a leaf node, then start_block,
  227: ** leaves_end_block, and end_block are all 0.
  228: **
  229: **
  230: **** Segment merging ****
  231: ** To amortize update costs, segments are grouped into levels and
  232: ** merged in batches.  Each increase in level represents exponentially
  233: ** more documents.
  234: **
  235: ** New documents (actually, document updates) are tokenized and
  236: ** written individually (using LeafWriter) to a level 0 segment, with
  237: ** incrementing idx.  When idx reaches MERGE_COUNT (default 16), all
  238: ** level 0 segments are merged into a single level 1 segment.  Level 1
  239: ** is populated like level 0, and eventually MERGE_COUNT level 1
  240: ** segments are merged to a single level 2 segment (representing
  241: ** MERGE_COUNT^2 updates), and so on.
  242: **
  243: ** A segment merge traverses all segments at a given level in
  244: ** parallel, performing a straightforward sorted merge.  Since segment
  245: ** leaf nodes are written in to the %_segments table in order, this
  246: ** merge traverses the underlying sqlite disk structures efficiently.
  247: ** After the merge, all segment blocks from the merged level are
  248: ** deleted.
  249: **
  250: ** MERGE_COUNT controls how often we merge segments.  16 seems to be
  251: ** somewhat of a sweet spot for insertion performance.  32 and 64 show
  252: ** very similar performance numbers to 16 on insertion, though they're
  253: ** a tiny bit slower (perhaps due to more overhead in merge-time
  254: ** sorting).  8 is about 20% slower than 16, 4 about 50% slower than
  255: ** 16, 2 about 66% slower than 16.
  256: **
  257: ** At query time, high MERGE_COUNT increases the number of segments
  258: ** which need to be scanned and merged.  For instance, with 100k docs
  259: ** inserted:
  260: **
  261: **    MERGE_COUNT   segments
  262: **       16           25
  263: **        8           12
  264: **        4           10
  265: **        2            6
  266: **
  267: ** This appears to have only a moderate impact on queries for very
  268: ** frequent terms (which are somewhat dominated by segment merge
  269: ** costs), and infrequent and non-existent terms still seem to be fast
  270: ** even with many segments.
  271: **
  272: ** TODO(shess) That said, it would be nice to have a better query-side
  273: ** argument for MERGE_COUNT of 16.  Also, it is possible/likely that
  274: ** optimizations to things like doclist merging will swing the sweet
  275: ** spot around.
  276: **
  277: **
  278: **
  279: **** Handling of deletions and updates ****
  280: ** Since we're using a segmented structure, with no docid-oriented
  281: ** index into the term index, we clearly cannot simply update the term
  282: ** index when a document is deleted or updated.  For deletions, we
  283: ** write an empty doclist (varint(docid) varint(POS_END)), for updates
  284: ** we simply write the new doclist.  Segment merges overwrite older
  285: ** data for a particular docid with newer data, so deletes or updates
  286: ** will eventually overtake the earlier data and knock it out.  The
  287: ** query logic likewise merges doclists so that newer data knocks out
  288: ** older data.
  289: **
  290: ** TODO(shess) Provide a VACUUM type operation to clear out all
  291: ** deletions and duplications.  This would basically be a forced merge
  292: ** into a single segment.
  293: */
  294: 
  295: #include "fts3Int.h"
  296: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
  297: 
  298: #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
  299: # define SQLITE_CORE 1
  300: #endif
  301: 
  302: #include <assert.h>
  303: #include <stdlib.h>
  304: #include <stddef.h>
  305: #include <stdio.h>
  306: #include <string.h>
  307: #include <stdarg.h>
  308: 
  309: #include "fts3.h"
  310: #ifndef SQLITE_CORE 
  311: # include "sqlite3ext.h"
  312:   SQLITE_EXTENSION_INIT1
  313: #endif
  314: 
  315: static int fts3EvalNext(Fts3Cursor *pCsr);
  316: static int fts3EvalStart(Fts3Cursor *pCsr);
  317: static int fts3TermSegReaderCursor(
  318:     Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
  319: 
  320: /* 
  321: ** Write a 64-bit variable-length integer to memory starting at p[0].
  322: ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
  323: ** The number of bytes written is returned.
  324: */
  325: int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
  326:   unsigned char *q = (unsigned char *) p;
  327:   sqlite_uint64 vu = v;
  328:   do{
  329:     *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
  330:     vu >>= 7;
  331:   }while( vu!=0 );
  332:   q[-1] &= 0x7f;  /* turn off high bit in final byte */
  333:   assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
  334:   return (int) (q - (unsigned char *)p);
  335: }
  336: 
  337: /* 
  338: ** Read a 64-bit variable-length integer from memory starting at p[0].
  339: ** Return the number of bytes read, or 0 on error.
  340: ** The value is stored in *v.
  341: */
  342: int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
  343:   const unsigned char *q = (const unsigned char *) p;
  344:   sqlite_uint64 x = 0, y = 1;
  345:   while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){
  346:     x += y * (*q++ & 0x7f);
  347:     y <<= 7;
  348:   }
  349:   x += y * (*q++);
  350:   *v = (sqlite_int64) x;
  351:   return (int) (q - (unsigned char *)p);
  352: }
  353: 
  354: /*
  355: ** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
  356: ** 32-bit integer before it is returned.
  357: */
  358: int sqlite3Fts3GetVarint32(const char *p, int *pi){
  359:  sqlite_int64 i;
  360:  int ret = sqlite3Fts3GetVarint(p, &i);
  361:  *pi = (int) i;
  362:  return ret;
  363: }
  364: 
  365: /*
  366: ** Return the number of bytes required to encode v as a varint
  367: */
  368: int sqlite3Fts3VarintLen(sqlite3_uint64 v){
  369:   int i = 0;
  370:   do{
  371:     i++;
  372:     v >>= 7;
  373:   }while( v!=0 );
  374:   return i;
  375: }
  376: 
  377: /*
  378: ** Convert an SQL-style quoted string into a normal string by removing
  379: ** the quote characters.  The conversion is done in-place.  If the
  380: ** input does not begin with a quote character, then this routine
  381: ** is a no-op.
  382: **
  383: ** Examples:
  384: **
  385: **     "abc"   becomes   abc
  386: **     'xyz'   becomes   xyz
  387: **     [pqr]   becomes   pqr
  388: **     `mno`   becomes   mno
  389: **
  390: */
  391: void sqlite3Fts3Dequote(char *z){
  392:   char quote;                     /* Quote character (if any ) */
  393: 
  394:   quote = z[0];
  395:   if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
  396:     int iIn = 1;                  /* Index of next byte to read from input */
  397:     int iOut = 0;                 /* Index of next byte to write to output */
  398: 
  399:     /* If the first byte was a '[', then the close-quote character is a ']' */
  400:     if( quote=='[' ) quote = ']';  
  401: 
  402:     while( ALWAYS(z[iIn]) ){
  403:       if( z[iIn]==quote ){
  404:         if( z[iIn+1]!=quote ) break;
  405:         z[iOut++] = quote;
  406:         iIn += 2;
  407:       }else{
  408:         z[iOut++] = z[iIn++];
  409:       }
  410:     }
  411:     z[iOut] = '\0';
  412:   }
  413: }
  414: 
  415: /*
  416: ** Read a single varint from the doclist at *pp and advance *pp to point
  417: ** to the first byte past the end of the varint.  Add the value of the varint
  418: ** to *pVal.
  419: */
  420: static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
  421:   sqlite3_int64 iVal;
  422:   *pp += sqlite3Fts3GetVarint(*pp, &iVal);
  423:   *pVal += iVal;
  424: }
  425: 
  426: /*
  427: ** When this function is called, *pp points to the first byte following a
  428: ** varint that is part of a doclist (or position-list, or any other list
  429: ** of varints). This function moves *pp to point to the start of that varint,
  430: ** and sets *pVal by the varint value.
  431: **
  432: ** Argument pStart points to the first byte of the doclist that the
  433: ** varint is part of.
  434: */
  435: static void fts3GetReverseVarint(
  436:   char **pp, 
  437:   char *pStart, 
  438:   sqlite3_int64 *pVal
  439: ){
  440:   sqlite3_int64 iVal;
  441:   char *p;
  442: 
  443:   /* Pointer p now points at the first byte past the varint we are 
  444:   ** interested in. So, unless the doclist is corrupt, the 0x80 bit is
  445:   ** clear on character p[-1]. */
  446:   for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
  447:   p++;
  448:   *pp = p;
  449: 
  450:   sqlite3Fts3GetVarint(p, &iVal);
  451:   *pVal = iVal;
  452: }
  453: 
  454: /*
  455: ** The xDisconnect() virtual table method.
  456: */
  457: static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
  458:   Fts3Table *p = (Fts3Table *)pVtab;
  459:   int i;
  460: 
  461:   assert( p->nPendingData==0 );
  462:   assert( p->pSegments==0 );
  463: 
  464:   /* Free any prepared statements held */
  465:   for(i=0; i<SizeofArray(p->aStmt); i++){
  466:     sqlite3_finalize(p->aStmt[i]);
  467:   }
  468:   sqlite3_free(p->zSegmentsTbl);
  469:   sqlite3_free(p->zReadExprlist);
  470:   sqlite3_free(p->zWriteExprlist);
  471:   sqlite3_free(p->zContentTbl);
  472: 
  473:   /* Invoke the tokenizer destructor to free the tokenizer. */
  474:   p->pTokenizer->pModule->xDestroy(p->pTokenizer);
  475: 
  476:   sqlite3_free(p);
  477:   return SQLITE_OK;
  478: }
  479: 
  480: /*
  481: ** Construct one or more SQL statements from the format string given
  482: ** and then evaluate those statements. The success code is written
  483: ** into *pRc.
  484: **
  485: ** If *pRc is initially non-zero then this routine is a no-op.
  486: */
  487: static void fts3DbExec(
  488:   int *pRc,              /* Success code */
  489:   sqlite3 *db,           /* Database in which to run SQL */
  490:   const char *zFormat,   /* Format string for SQL */
  491:   ...                    /* Arguments to the format string */
  492: ){
  493:   va_list ap;
  494:   char *zSql;
  495:   if( *pRc ) return;
  496:   va_start(ap, zFormat);
  497:   zSql = sqlite3_vmprintf(zFormat, ap);
  498:   va_end(ap);
  499:   if( zSql==0 ){
  500:     *pRc = SQLITE_NOMEM;
  501:   }else{
  502:     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
  503:     sqlite3_free(zSql);
  504:   }
  505: }
  506: 
  507: /*
  508: ** The xDestroy() virtual table method.
  509: */
  510: static int fts3DestroyMethod(sqlite3_vtab *pVtab){
  511:   Fts3Table *p = (Fts3Table *)pVtab;
  512:   int rc = SQLITE_OK;              /* Return code */
  513:   const char *zDb = p->zDb;        /* Name of database (e.g. "main", "temp") */
  514:   sqlite3 *db = p->db;             /* Database handle */
  515: 
  516:   /* Drop the shadow tables */
  517:   if( p->zContentTbl==0 ){
  518:     fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
  519:   }
  520:   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
  521:   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
  522:   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
  523:   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
  524: 
  525:   /* If everything has worked, invoke fts3DisconnectMethod() to free the
  526:   ** memory associated with the Fts3Table structure and return SQLITE_OK.
  527:   ** Otherwise, return an SQLite error code.
  528:   */
  529:   return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
  530: }
  531: 
  532: 
  533: /*
  534: ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
  535: ** passed as the first argument. This is done as part of the xConnect()
  536: ** and xCreate() methods.
  537: **
  538: ** If *pRc is non-zero when this function is called, it is a no-op. 
  539: ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
  540: ** before returning.
  541: */
  542: static void fts3DeclareVtab(int *pRc, Fts3Table *p){
  543:   if( *pRc==SQLITE_OK ){
  544:     int i;                        /* Iterator variable */
  545:     int rc;                       /* Return code */
  546:     char *zSql;                   /* SQL statement passed to declare_vtab() */
  547:     char *zCols;                  /* List of user defined columns */
  548: 
  549:     sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
  550: 
  551:     /* Create a list of user columns for the virtual table */
  552:     zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
  553:     for(i=1; zCols && i<p->nColumn; i++){
  554:       zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
  555:     }
  556: 
  557:     /* Create the whole "CREATE TABLE" statement to pass to SQLite */
  558:     zSql = sqlite3_mprintf(
  559:         "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
  560:     );
  561:     if( !zCols || !zSql ){
  562:       rc = SQLITE_NOMEM;
  563:     }else{
  564:       rc = sqlite3_declare_vtab(p->db, zSql);
  565:     }
  566: 
  567:     sqlite3_free(zSql);
  568:     sqlite3_free(zCols);
  569:     *pRc = rc;
  570:   }
  571: }
  572: 
  573: /*
  574: ** Create the backing store tables (%_content, %_segments and %_segdir)
  575: ** required by the FTS3 table passed as the only argument. This is done
  576: ** as part of the vtab xCreate() method.
  577: **
  578: ** If the p->bHasDocsize boolean is true (indicating that this is an
  579: ** FTS4 table, not an FTS3 table) then also create the %_docsize and
  580: ** %_stat tables required by FTS4.
  581: */
  582: static int fts3CreateTables(Fts3Table *p){
  583:   int rc = SQLITE_OK;             /* Return code */
  584:   int i;                          /* Iterator variable */
  585:   sqlite3 *db = p->db;            /* The database connection */
  586: 
  587:   if( p->zContentTbl==0 ){
  588:     char *zContentCols;           /* Columns of %_content table */
  589: 
  590:     /* Create a list of user columns for the content table */
  591:     zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
  592:     for(i=0; zContentCols && i<p->nColumn; i++){
  593:       char *z = p->azColumn[i];
  594:       zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
  595:     }
  596:     if( zContentCols==0 ) rc = SQLITE_NOMEM;
  597:   
  598:     /* Create the content table */
  599:     fts3DbExec(&rc, db, 
  600:        "CREATE TABLE %Q.'%q_content'(%s)",
  601:        p->zDb, p->zName, zContentCols
  602:     );
  603:     sqlite3_free(zContentCols);
  604:   }
  605: 
  606:   /* Create other tables */
  607:   fts3DbExec(&rc, db, 
  608:       "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
  609:       p->zDb, p->zName
  610:   );
  611:   fts3DbExec(&rc, db, 
  612:       "CREATE TABLE %Q.'%q_segdir'("
  613:         "level INTEGER,"
  614:         "idx INTEGER,"
  615:         "start_block INTEGER,"
  616:         "leaves_end_block INTEGER,"
  617:         "end_block INTEGER,"
  618:         "root BLOB,"
  619:         "PRIMARY KEY(level, idx)"
  620:       ");",
  621:       p->zDb, p->zName
  622:   );
  623:   if( p->bHasDocsize ){
  624:     fts3DbExec(&rc, db, 
  625:         "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
  626:         p->zDb, p->zName
  627:     );
  628:   }
  629:   if( p->bHasStat ){
  630:     fts3DbExec(&rc, db, 
  631:         "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
  632:         p->zDb, p->zName
  633:     );
  634:   }
  635:   return rc;
  636: }
  637: 
  638: /*
  639: ** Store the current database page-size in bytes in p->nPgsz.
  640: **
  641: ** If *pRc is non-zero when this function is called, it is a no-op. 
  642: ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
  643: ** before returning.
  644: */
  645: static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
  646:   if( *pRc==SQLITE_OK ){
  647:     int rc;                       /* Return code */
  648:     char *zSql;                   /* SQL text "PRAGMA %Q.page_size" */
  649:     sqlite3_stmt *pStmt;          /* Compiled "PRAGMA %Q.page_size" statement */
  650:   
  651:     zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
  652:     if( !zSql ){
  653:       rc = SQLITE_NOMEM;
  654:     }else{
  655:       rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
  656:       if( rc==SQLITE_OK ){
  657:         sqlite3_step(pStmt);
  658:         p->nPgsz = sqlite3_column_int(pStmt, 0);
  659:         rc = sqlite3_finalize(pStmt);
  660:       }else if( rc==SQLITE_AUTH ){
  661:         p->nPgsz = 1024;
  662:         rc = SQLITE_OK;
  663:       }
  664:     }
  665:     assert( p->nPgsz>0 || rc!=SQLITE_OK );
  666:     sqlite3_free(zSql);
  667:     *pRc = rc;
  668:   }
  669: }
  670: 
  671: /*
  672: ** "Special" FTS4 arguments are column specifications of the following form:
  673: **
  674: **   <key> = <value>
  675: **
  676: ** There may not be whitespace surrounding the "=" character. The <value> 
  677: ** term may be quoted, but the <key> may not.
  678: */
  679: static int fts3IsSpecialColumn(
  680:   const char *z, 
  681:   int *pnKey,
  682:   char **pzValue
  683: ){
  684:   char *zValue;
  685:   const char *zCsr = z;
  686: 
  687:   while( *zCsr!='=' ){
  688:     if( *zCsr=='\0' ) return 0;
  689:     zCsr++;
  690:   }
  691: 
  692:   *pnKey = (int)(zCsr-z);
  693:   zValue = sqlite3_mprintf("%s", &zCsr[1]);
  694:   if( zValue ){
  695:     sqlite3Fts3Dequote(zValue);
  696:   }
  697:   *pzValue = zValue;
  698:   return 1;
  699: }
  700: 
  701: /*
  702: ** Append the output of a printf() style formatting to an existing string.
  703: */
  704: static void fts3Appendf(
  705:   int *pRc,                       /* IN/OUT: Error code */
  706:   char **pz,                      /* IN/OUT: Pointer to string buffer */
  707:   const char *zFormat,            /* Printf format string to append */
  708:   ...                             /* Arguments for printf format string */
  709: ){
  710:   if( *pRc==SQLITE_OK ){
  711:     va_list ap;
  712:     char *z;
  713:     va_start(ap, zFormat);
  714:     z = sqlite3_vmprintf(zFormat, ap);
  715:     va_end(ap);
  716:     if( z && *pz ){
  717:       char *z2 = sqlite3_mprintf("%s%s", *pz, z);
  718:       sqlite3_free(z);
  719:       z = z2;
  720:     }
  721:     if( z==0 ) *pRc = SQLITE_NOMEM;
  722:     sqlite3_free(*pz);
  723:     *pz = z;
  724:   }
  725: }
  726: 
  727: /*
  728: ** Return a copy of input string zInput enclosed in double-quotes (") and
  729: ** with all double quote characters escaped. For example:
  730: **
  731: **     fts3QuoteId("un \"zip\"")   ->    "un \"\"zip\"\""
  732: **
  733: ** The pointer returned points to memory obtained from sqlite3_malloc(). It
  734: ** is the callers responsibility to call sqlite3_free() to release this
  735: ** memory.
  736: */
  737: static char *fts3QuoteId(char const *zInput){
  738:   int nRet;
  739:   char *zRet;
  740:   nRet = 2 + strlen(zInput)*2 + 1;
  741:   zRet = sqlite3_malloc(nRet);
  742:   if( zRet ){
  743:     int i;
  744:     char *z = zRet;
  745:     *(z++) = '"';
  746:     for(i=0; zInput[i]; i++){
  747:       if( zInput[i]=='"' ) *(z++) = '"';
  748:       *(z++) = zInput[i];
  749:     }
  750:     *(z++) = '"';
  751:     *(z++) = '\0';
  752:   }
  753:   return zRet;
  754: }
  755: 
  756: /*
  757: ** Return a list of comma separated SQL expressions and a FROM clause that 
  758: ** could be used in a SELECT statement such as the following:
  759: **
  760: **     SELECT <list of expressions> FROM %_content AS x ...
  761: **
  762: ** to return the docid, followed by each column of text data in order
  763: ** from left to write. If parameter zFunc is not NULL, then instead of
  764: ** being returned directly each column of text data is passed to an SQL
  765: ** function named zFunc first. For example, if zFunc is "unzip" and the
  766: ** table has the three user-defined columns "a", "b", and "c", the following
  767: ** string is returned:
  768: **
  769: **     "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
  770: **
  771: ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
  772: ** is the responsibility of the caller to eventually free it.
  773: **
  774: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
  775: ** a NULL pointer is returned). Otherwise, if an OOM error is encountered
  776: ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
  777: ** no error occurs, *pRc is left unmodified.
  778: */
  779: static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
  780:   char *zRet = 0;
  781:   char *zFree = 0;
  782:   char *zFunction;
  783:   int i;
  784: 
  785:   if( p->zContentTbl==0 ){
  786:     if( !zFunc ){
  787:       zFunction = "";
  788:     }else{
  789:       zFree = zFunction = fts3QuoteId(zFunc);
  790:     }
  791:     fts3Appendf(pRc, &zRet, "docid");
  792:     for(i=0; i<p->nColumn; i++){
  793:       fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
  794:     }
  795:     sqlite3_free(zFree);
  796:   }else{
  797:     fts3Appendf(pRc, &zRet, "rowid");
  798:     for(i=0; i<p->nColumn; i++){
  799:       fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
  800:     }
  801:   }
  802:   fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x", 
  803:       p->zDb,
  804:       (p->zContentTbl ? p->zContentTbl : p->zName),
  805:       (p->zContentTbl ? "" : "_content")
  806:   );
  807:   return zRet;
  808: }
  809: 
  810: /*
  811: ** Return a list of N comma separated question marks, where N is the number
  812: ** of columns in the %_content table (one for the docid plus one for each
  813: ** user-defined text column).
  814: **
  815: ** If argument zFunc is not NULL, then all but the first question mark
  816: ** is preceded by zFunc and an open bracket, and followed by a closed
  817: ** bracket. For example, if zFunc is "zip" and the FTS3 table has three 
  818: ** user-defined text columns, the following string is returned:
  819: **
  820: **     "?, zip(?), zip(?), zip(?)"
  821: **
  822: ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
  823: ** is the responsibility of the caller to eventually free it.
  824: **
  825: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
  826: ** a NULL pointer is returned). Otherwise, if an OOM error is encountered
  827: ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
  828: ** no error occurs, *pRc is left unmodified.
  829: */
  830: static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
  831:   char *zRet = 0;
  832:   char *zFree = 0;
  833:   char *zFunction;
  834:   int i;
  835: 
  836:   if( !zFunc ){
  837:     zFunction = "";
  838:   }else{
  839:     zFree = zFunction = fts3QuoteId(zFunc);
  840:   }
  841:   fts3Appendf(pRc, &zRet, "?");
  842:   for(i=0; i<p->nColumn; i++){
  843:     fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
  844:   }
  845:   sqlite3_free(zFree);
  846:   return zRet;
  847: }
  848: 
  849: /*
  850: ** This function interprets the string at (*pp) as a non-negative integer
  851: ** value. It reads the integer and sets *pnOut to the value read, then 
  852: ** sets *pp to point to the byte immediately following the last byte of
  853: ** the integer value.
  854: **
  855: ** Only decimal digits ('0'..'9') may be part of an integer value. 
  856: **
  857: ** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
  858: ** the output value undefined. Otherwise SQLITE_OK is returned.
  859: **
  860: ** This function is used when parsing the "prefix=" FTS4 parameter.
  861: */
  862: static int fts3GobbleInt(const char **pp, int *pnOut){
  863:   const char *p;                  /* Iterator pointer */
  864:   int nInt = 0;                   /* Output value */
  865: 
  866:   for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
  867:     nInt = nInt * 10 + (p[0] - '0');
  868:   }
  869:   if( p==*pp ) return SQLITE_ERROR;
  870:   *pnOut = nInt;
  871:   *pp = p;
  872:   return SQLITE_OK;
  873: }
  874: 
  875: /*
  876: ** This function is called to allocate an array of Fts3Index structures
  877: ** representing the indexes maintained by the current FTS table. FTS tables
  878: ** always maintain the main "terms" index, but may also maintain one or
  879: ** more "prefix" indexes, depending on the value of the "prefix=" parameter
  880: ** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
  881: **
  882: ** Argument zParam is passed the value of the "prefix=" option if one was
  883: ** specified, or NULL otherwise.
  884: **
  885: ** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
  886: ** the allocated array. *pnIndex is set to the number of elements in the
  887: ** array. If an error does occur, an SQLite error code is returned.
  888: **
  889: ** Regardless of whether or not an error is returned, it is the responsibility
  890: ** of the caller to call sqlite3_free() on the output array to free it.
  891: */
  892: static int fts3PrefixParameter(
  893:   const char *zParam,             /* ABC in prefix=ABC parameter to parse */
  894:   int *pnIndex,                   /* OUT: size of *apIndex[] array */
  895:   struct Fts3Index **apIndex      /* OUT: Array of indexes for this table */
  896: ){
  897:   struct Fts3Index *aIndex;       /* Allocated array */
  898:   int nIndex = 1;                 /* Number of entries in array */
  899: 
  900:   if( zParam && zParam[0] ){
  901:     const char *p;
  902:     nIndex++;
  903:     for(p=zParam; *p; p++){
  904:       if( *p==',' ) nIndex++;
  905:     }
  906:   }
  907: 
  908:   aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
  909:   *apIndex = aIndex;
  910:   *pnIndex = nIndex;
  911:   if( !aIndex ){
  912:     return SQLITE_NOMEM;
  913:   }
  914: 
  915:   memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
  916:   if( zParam ){
  917:     const char *p = zParam;
  918:     int i;
  919:     for(i=1; i<nIndex; i++){
  920:       int nPrefix;
  921:       if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
  922:       aIndex[i].nPrefix = nPrefix;
  923:       p++;
  924:     }
  925:   }
  926: 
  927:   return SQLITE_OK;
  928: }
  929: 
  930: /*
  931: ** This function is called when initializing an FTS4 table that uses the
  932: ** content=xxx option. It determines the number of and names of the columns
  933: ** of the new FTS4 table.
  934: **
  935: ** The third argument passed to this function is the value passed to the
  936: ** config=xxx option (i.e. "xxx"). This function queries the database for
  937: ** a table of that name. If found, the output variables are populated
  938: ** as follows:
  939: **
  940: **   *pnCol:   Set to the number of columns table xxx has,
  941: **
  942: **   *pnStr:   Set to the total amount of space required to store a copy
  943: **             of each columns name, including the nul-terminator.
  944: **
  945: **   *pazCol:  Set to point to an array of *pnCol strings. Each string is
  946: **             the name of the corresponding column in table xxx. The array
  947: **             and its contents are allocated using a single allocation. It
  948: **             is the responsibility of the caller to free this allocation
  949: **             by eventually passing the *pazCol value to sqlite3_free().
  950: **
  951: ** If the table cannot be found, an error code is returned and the output
  952: ** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
  953: ** returned (and the output variables are undefined).
  954: */
  955: static int fts3ContentColumns(
  956:   sqlite3 *db,                    /* Database handle */
  957:   const char *zDb,                /* Name of db (i.e. "main", "temp" etc.) */
  958:   const char *zTbl,               /* Name of content table */
  959:   const char ***pazCol,           /* OUT: Malloc'd array of column names */
  960:   int *pnCol,                     /* OUT: Size of array *pazCol */
  961:   int *pnStr                      /* OUT: Bytes of string content */
  962: ){
  963:   int rc = SQLITE_OK;             /* Return code */
  964:   char *zSql;                     /* "SELECT *" statement on zTbl */  
  965:   sqlite3_stmt *pStmt = 0;        /* Compiled version of zSql */
  966: 
  967:   zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
  968:   if( !zSql ){
  969:     rc = SQLITE_NOMEM;
  970:   }else{
  971:     rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
  972:   }
  973:   sqlite3_free(zSql);
  974: 
  975:   if( rc==SQLITE_OK ){
  976:     const char **azCol;           /* Output array */
  977:     int nStr = 0;                 /* Size of all column names (incl. 0x00) */
  978:     int nCol;                     /* Number of table columns */
  979:     int i;                        /* Used to iterate through columns */
  980: 
  981:     /* Loop through the returned columns. Set nStr to the number of bytes of
  982:     ** space required to store a copy of each column name, including the
  983:     ** nul-terminator byte.  */
  984:     nCol = sqlite3_column_count(pStmt);
  985:     for(i=0; i<nCol; i++){
  986:       const char *zCol = sqlite3_column_name(pStmt, i);
  987:       nStr += strlen(zCol) + 1;
  988:     }
  989: 
  990:     /* Allocate and populate the array to return. */
  991:     azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
  992:     if( azCol==0 ){
  993:       rc = SQLITE_NOMEM;
  994:     }else{
  995:       char *p = (char *)&azCol[nCol];
  996:       for(i=0; i<nCol; i++){
  997:         const char *zCol = sqlite3_column_name(pStmt, i);
  998:         int n = strlen(zCol)+1;
  999:         memcpy(p, zCol, n);
 1000:         azCol[i] = p;
 1001:         p += n;
 1002:       }
 1003:     }
 1004:     sqlite3_finalize(pStmt);
 1005: 
 1006:     /* Set the output variables. */
 1007:     *pnCol = nCol;
 1008:     *pnStr = nStr;
 1009:     *pazCol = azCol;
 1010:   }
 1011: 
 1012:   return rc;
 1013: }
 1014: 
 1015: /*
 1016: ** This function is the implementation of both the xConnect and xCreate
 1017: ** methods of the FTS3 virtual table.
 1018: **
 1019: ** The argv[] array contains the following:
 1020: **
 1021: **   argv[0]   -> module name  ("fts3" or "fts4")
 1022: **   argv[1]   -> database name
 1023: **   argv[2]   -> table name
 1024: **   argv[...] -> "column name" and other module argument fields.
 1025: */
 1026: static int fts3InitVtab(
 1027:   int isCreate,                   /* True for xCreate, false for xConnect */
 1028:   sqlite3 *db,                    /* The SQLite database connection */
 1029:   void *pAux,                     /* Hash table containing tokenizers */
 1030:   int argc,                       /* Number of elements in argv array */
 1031:   const char * const *argv,       /* xCreate/xConnect argument array */
 1032:   sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
 1033:   char **pzErr                    /* Write any error message here */
 1034: ){
 1035:   Fts3Hash *pHash = (Fts3Hash *)pAux;
 1036:   Fts3Table *p = 0;               /* Pointer to allocated vtab */
 1037:   int rc = SQLITE_OK;             /* Return code */
 1038:   int i;                          /* Iterator variable */
 1039:   int nByte;                      /* Size of allocation used for *p */
 1040:   int iCol;                       /* Column index */
 1041:   int nString = 0;                /* Bytes required to hold all column names */
 1042:   int nCol = 0;                   /* Number of columns in the FTS table */
 1043:   char *zCsr;                     /* Space for holding column names */
 1044:   int nDb;                        /* Bytes required to hold database name */
 1045:   int nName;                      /* Bytes required to hold table name */
 1046:   int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
 1047:   const char **aCol;              /* Array of column names */
 1048:   sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */
 1049: 
 1050:   int nIndex;                     /* Size of aIndex[] array */
 1051:   struct Fts3Index *aIndex = 0;   /* Array of indexes for this table */
 1052: 
 1053:   /* The results of parsing supported FTS4 key=value options: */
 1054:   int bNoDocsize = 0;             /* True to omit %_docsize table */
 1055:   int bDescIdx = 0;               /* True to store descending indexes */
 1056:   char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
 1057:   char *zCompress = 0;            /* compress=? parameter (or NULL) */
 1058:   char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
 1059:   char *zContent = 0;             /* content=? parameter (or NULL) */
 1060: 
 1061:   assert( strlen(argv[0])==4 );
 1062:   assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
 1063:        || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
 1064:   );
 1065: 
 1066:   nDb = (int)strlen(argv[1]) + 1;
 1067:   nName = (int)strlen(argv[2]) + 1;
 1068: 
 1069:   aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
 1070:   if( !aCol ) return SQLITE_NOMEM;
 1071:   memset((void *)aCol, 0, sizeof(const char *) * (argc-2));
 1072: 
 1073:   /* Loop through all of the arguments passed by the user to the FTS3/4
 1074:   ** module (i.e. all the column names and special arguments). This loop
 1075:   ** does the following:
 1076:   **
 1077:   **   + Figures out the number of columns the FTSX table will have, and
 1078:   **     the number of bytes of space that must be allocated to store copies
 1079:   **     of the column names.
 1080:   **
 1081:   **   + If there is a tokenizer specification included in the arguments,
 1082:   **     initializes the tokenizer pTokenizer.
 1083:   */
 1084:   for(i=3; rc==SQLITE_OK && i<argc; i++){
 1085:     char const *z = argv[i];
 1086:     int nKey;
 1087:     char *zVal;
 1088: 
 1089:     /* Check if this is a tokenizer specification */
 1090:     if( !pTokenizer 
 1091:      && strlen(z)>8
 1092:      && 0==sqlite3_strnicmp(z, "tokenize", 8) 
 1093:      && 0==sqlite3Fts3IsIdChar(z[8])
 1094:     ){
 1095:       rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
 1096:     }
 1097: 
 1098:     /* Check if it is an FTS4 special argument. */
 1099:     else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
 1100:       struct Fts4Option {
 1101:         const char *zOpt;
 1102:         int nOpt;
 1103:       } aFts4Opt[] = {
 1104:         { "matchinfo",   9 },     /* 0 -> MATCHINFO */
 1105:         { "prefix",      6 },     /* 1 -> PREFIX */
 1106:         { "compress",    8 },     /* 2 -> COMPRESS */
 1107:         { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
 1108:         { "order",       5 },     /* 4 -> ORDER */
 1109:         { "content",     7 }      /* 5 -> CONTENT */
 1110:       };
 1111: 
 1112:       int iOpt;
 1113:       if( !zVal ){
 1114:         rc = SQLITE_NOMEM;
 1115:       }else{
 1116:         for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
 1117:           struct Fts4Option *pOp = &aFts4Opt[iOpt];
 1118:           if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
 1119:             break;
 1120:           }
 1121:         }
 1122:         if( iOpt==SizeofArray(aFts4Opt) ){
 1123:           *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
 1124:           rc = SQLITE_ERROR;
 1125:         }else{
 1126:           switch( iOpt ){
 1127:             case 0:               /* MATCHINFO */
 1128:               if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
 1129:                 *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
 1130:                 rc = SQLITE_ERROR;
 1131:               }
 1132:               bNoDocsize = 1;
 1133:               break;
 1134: 
 1135:             case 1:               /* PREFIX */
 1136:               sqlite3_free(zPrefix);
 1137:               zPrefix = zVal;
 1138:               zVal = 0;
 1139:               break;
 1140: 
 1141:             case 2:               /* COMPRESS */
 1142:               sqlite3_free(zCompress);
 1143:               zCompress = zVal;
 1144:               zVal = 0;
 1145:               break;
 1146: 
 1147:             case 3:               /* UNCOMPRESS */
 1148:               sqlite3_free(zUncompress);
 1149:               zUncompress = zVal;
 1150:               zVal = 0;
 1151:               break;
 1152: 
 1153:             case 4:               /* ORDER */
 1154:               if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) 
 1155:                && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) 
 1156:               ){
 1157:                 *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
 1158:                 rc = SQLITE_ERROR;
 1159:               }
 1160:               bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
 1161:               break;
 1162: 
 1163:             default:              /* CONTENT */
 1164:               assert( iOpt==5 );
 1165:               sqlite3_free(zUncompress);
 1166:               zContent = zVal;
 1167:               zVal = 0;
 1168:               break;
 1169:           }
 1170:         }
 1171:         sqlite3_free(zVal);
 1172:       }
 1173:     }
 1174: 
 1175:     /* Otherwise, the argument is a column name. */
 1176:     else {
 1177:       nString += (int)(strlen(z) + 1);
 1178:       aCol[nCol++] = z;
 1179:     }
 1180:   }
 1181: 
 1182:   /* If a content=xxx option was specified, the following:
 1183:   **
 1184:   **   1. Ignore any compress= and uncompress= options.
 1185:   **
 1186:   **   2. If no column names were specified as part of the CREATE VIRTUAL
 1187:   **      TABLE statement, use all columns from the content table.
 1188:   */
 1189:   if( rc==SQLITE_OK && zContent ){
 1190:     sqlite3_free(zCompress); 
 1191:     sqlite3_free(zUncompress); 
 1192:     zCompress = 0;
 1193:     zUncompress = 0;
 1194:     if( nCol==0 ){
 1195:       sqlite3_free((void*)aCol); 
 1196:       aCol = 0;
 1197:       rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
 1198:     }
 1199:     assert( rc!=SQLITE_OK || nCol>0 );
 1200:   }
 1201:   if( rc!=SQLITE_OK ) goto fts3_init_out;
 1202: 
 1203:   if( nCol==0 ){
 1204:     assert( nString==0 );
 1205:     aCol[0] = "content";
 1206:     nString = 8;
 1207:     nCol = 1;
 1208:   }
 1209: 
 1210:   if( pTokenizer==0 ){
 1211:     rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
 1212:     if( rc!=SQLITE_OK ) goto fts3_init_out;
 1213:   }
 1214:   assert( pTokenizer );
 1215: 
 1216:   rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
 1217:   if( rc==SQLITE_ERROR ){
 1218:     assert( zPrefix );
 1219:     *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
 1220:   }
 1221:   if( rc!=SQLITE_OK ) goto fts3_init_out;
 1222: 
 1223:   /* Allocate and populate the Fts3Table structure. */
 1224:   nByte = sizeof(Fts3Table) +                  /* Fts3Table */
 1225:           nCol * sizeof(char *) +              /* azColumn */
 1226:           nIndex * sizeof(struct Fts3Index) +  /* aIndex */
 1227:           nName +                              /* zName */
 1228:           nDb +                                /* zDb */
 1229:           nString;                             /* Space for azColumn strings */
 1230:   p = (Fts3Table*)sqlite3_malloc(nByte);
 1231:   if( p==0 ){
 1232:     rc = SQLITE_NOMEM;
 1233:     goto fts3_init_out;
 1234:   }
 1235:   memset(p, 0, nByte);
 1236:   p->db = db;
 1237:   p->nColumn = nCol;
 1238:   p->nPendingData = 0;
 1239:   p->azColumn = (char **)&p[1];
 1240:   p->pTokenizer = pTokenizer;
 1241:   p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
 1242:   p->bHasDocsize = (isFts4 && bNoDocsize==0);
 1243:   p->bHasStat = isFts4;
 1244:   p->bDescIdx = bDescIdx;
 1245:   p->zContentTbl = zContent;
 1246:   zContent = 0;
 1247:   TESTONLY( p->inTransaction = -1 );
 1248:   TESTONLY( p->mxSavepoint = -1 );
 1249: 
 1250:   p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
 1251:   memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
 1252:   p->nIndex = nIndex;
 1253:   for(i=0; i<nIndex; i++){
 1254:     fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
 1255:   }
 1256: 
 1257:   /* Fill in the zName and zDb fields of the vtab structure. */
 1258:   zCsr = (char *)&p->aIndex[nIndex];
 1259:   p->zName = zCsr;
 1260:   memcpy(zCsr, argv[2], nName);
 1261:   zCsr += nName;
 1262:   p->zDb = zCsr;
 1263:   memcpy(zCsr, argv[1], nDb);
 1264:   zCsr += nDb;
 1265: 
 1266:   /* Fill in the azColumn array */
 1267:   for(iCol=0; iCol<nCol; iCol++){
 1268:     char *z; 
 1269:     int n = 0;
 1270:     z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
 1271:     memcpy(zCsr, z, n);
 1272:     zCsr[n] = '\0';
 1273:     sqlite3Fts3Dequote(zCsr);
 1274:     p->azColumn[iCol] = zCsr;
 1275:     zCsr += n+1;
 1276:     assert( zCsr <= &((char *)p)[nByte] );
 1277:   }
 1278: 
 1279:   if( (zCompress==0)!=(zUncompress==0) ){
 1280:     char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
 1281:     rc = SQLITE_ERROR;
 1282:     *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
 1283:   }
 1284:   p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
 1285:   p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
 1286:   if( rc!=SQLITE_OK ) goto fts3_init_out;
 1287: 
 1288:   /* If this is an xCreate call, create the underlying tables in the 
 1289:   ** database. TODO: For xConnect(), it could verify that said tables exist.
 1290:   */
 1291:   if( isCreate ){
 1292:     rc = fts3CreateTables(p);
 1293:   }
 1294: 
 1295:   /* Figure out the page-size for the database. This is required in order to
 1296:   ** estimate the cost of loading large doclists from the database.  */
 1297:   fts3DatabasePageSize(&rc, p);
 1298:   p->nNodeSize = p->nPgsz-35;
 1299: 
 1300:   /* Declare the table schema to SQLite. */
 1301:   fts3DeclareVtab(&rc, p);
 1302: 
 1303: fts3_init_out:
 1304:   sqlite3_free(zPrefix);
 1305:   sqlite3_free(aIndex);
 1306:   sqlite3_free(zCompress);
 1307:   sqlite3_free(zUncompress);
 1308:   sqlite3_free(zContent);
 1309:   sqlite3_free((void *)aCol);
 1310:   if( rc!=SQLITE_OK ){
 1311:     if( p ){
 1312:       fts3DisconnectMethod((sqlite3_vtab *)p);
 1313:     }else if( pTokenizer ){
 1314:       pTokenizer->pModule->xDestroy(pTokenizer);
 1315:     }
 1316:   }else{
 1317:     assert( p->pSegments==0 );
 1318:     *ppVTab = &p->base;
 1319:   }
 1320:   return rc;
 1321: }
 1322: 
 1323: /*
 1324: ** The xConnect() and xCreate() methods for the virtual table. All the
 1325: ** work is done in function fts3InitVtab().
 1326: */
 1327: static int fts3ConnectMethod(
 1328:   sqlite3 *db,                    /* Database connection */
 1329:   void *pAux,                     /* Pointer to tokenizer hash table */
 1330:   int argc,                       /* Number of elements in argv array */
 1331:   const char * const *argv,       /* xCreate/xConnect argument array */
 1332:   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
 1333:   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
 1334: ){
 1335:   return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
 1336: }
 1337: static int fts3CreateMethod(
 1338:   sqlite3 *db,                    /* Database connection */
 1339:   void *pAux,                     /* Pointer to tokenizer hash table */
 1340:   int argc,                       /* Number of elements in argv array */
 1341:   const char * const *argv,       /* xCreate/xConnect argument array */
 1342:   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
 1343:   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
 1344: ){
 1345:   return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
 1346: }
 1347: 
 1348: /* 
 1349: ** Implementation of the xBestIndex method for FTS3 tables. There
 1350: ** are three possible strategies, in order of preference:
 1351: **
 1352: **   1. Direct lookup by rowid or docid. 
 1353: **   2. Full-text search using a MATCH operator on a non-docid column.
 1354: **   3. Linear scan of %_content table.
 1355: */
 1356: static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
 1357:   Fts3Table *p = (Fts3Table *)pVTab;
 1358:   int i;                          /* Iterator variable */
 1359:   int iCons = -1;                 /* Index of constraint to use */
 1360: 
 1361:   /* By default use a full table scan. This is an expensive option,
 1362:   ** so search through the constraints to see if a more efficient 
 1363:   ** strategy is possible.
 1364:   */
 1365:   pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
 1366:   pInfo->estimatedCost = 500000;
 1367:   for(i=0; i<pInfo->nConstraint; i++){
 1368:     struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
 1369:     if( pCons->usable==0 ) continue;
 1370: 
 1371:     /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
 1372:     if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
 1373:      && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
 1374:     ){
 1375:       pInfo->idxNum = FTS3_DOCID_SEARCH;
 1376:       pInfo->estimatedCost = 1.0;
 1377:       iCons = i;
 1378:     }
 1379: 
 1380:     /* A MATCH constraint. Use a full-text search.
 1381:     **
 1382:     ** If there is more than one MATCH constraint available, use the first
 1383:     ** one encountered. If there is both a MATCH constraint and a direct
 1384:     ** rowid/docid lookup, prefer the MATCH strategy. This is done even 
 1385:     ** though the rowid/docid lookup is faster than a MATCH query, selecting
 1386:     ** it would lead to an "unable to use function MATCH in the requested 
 1387:     ** context" error.
 1388:     */
 1389:     if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH 
 1390:      && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
 1391:     ){
 1392:       pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
 1393:       pInfo->estimatedCost = 2.0;
 1394:       iCons = i;
 1395:       break;
 1396:     }
 1397:   }
 1398: 
 1399:   if( iCons>=0 ){
 1400:     pInfo->aConstraintUsage[iCons].argvIndex = 1;
 1401:     pInfo->aConstraintUsage[iCons].omit = 1;
 1402:   } 
 1403: 
 1404:   /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
 1405:   ** docid) order. Both ascending and descending are possible. 
 1406:   */
 1407:   if( pInfo->nOrderBy==1 ){
 1408:     struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
 1409:     if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
 1410:       if( pOrder->desc ){
 1411:         pInfo->idxStr = "DESC";
 1412:       }else{
 1413:         pInfo->idxStr = "ASC";
 1414:       }
 1415:       pInfo->orderByConsumed = 1;
 1416:     }
 1417:   }
 1418: 
 1419:   assert( p->pSegments==0 );
 1420:   return SQLITE_OK;
 1421: }
 1422: 
 1423: /*
 1424: ** Implementation of xOpen method.
 1425: */
 1426: static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
 1427:   sqlite3_vtab_cursor *pCsr;               /* Allocated cursor */
 1428: 
 1429:   UNUSED_PARAMETER(pVTab);
 1430: 
 1431:   /* Allocate a buffer large enough for an Fts3Cursor structure. If the
 1432:   ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, 
 1433:   ** if the allocation fails, return SQLITE_NOMEM.
 1434:   */
 1435:   *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
 1436:   if( !pCsr ){
 1437:     return SQLITE_NOMEM;
 1438:   }
 1439:   memset(pCsr, 0, sizeof(Fts3Cursor));
 1440:   return SQLITE_OK;
 1441: }
 1442: 
 1443: /*
 1444: ** Close the cursor.  For additional information see the documentation
 1445: ** on the xClose method of the virtual table interface.
 1446: */
 1447: static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
 1448:   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
 1449:   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
 1450:   sqlite3_finalize(pCsr->pStmt);
 1451:   sqlite3Fts3ExprFree(pCsr->pExpr);
 1452:   sqlite3Fts3FreeDeferredTokens(pCsr);
 1453:   sqlite3_free(pCsr->aDoclist);
 1454:   sqlite3_free(pCsr->aMatchinfo);
 1455:   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
 1456:   sqlite3_free(pCsr);
 1457:   return SQLITE_OK;
 1458: }
 1459: 
 1460: /*
 1461: ** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
 1462: ** compose and prepare an SQL statement of the form:
 1463: **
 1464: **    "SELECT <columns> FROM %_content WHERE rowid = ?"
 1465: **
 1466: ** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
 1467: ** it. If an error occurs, return an SQLite error code.
 1468: **
 1469: ** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
 1470: */
 1471: static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
 1472:   int rc = SQLITE_OK;
 1473:   if( pCsr->pStmt==0 ){
 1474:     Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
 1475:     char *zSql;
 1476:     zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
 1477:     if( !zSql ) return SQLITE_NOMEM;
 1478:     rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
 1479:     sqlite3_free(zSql);
 1480:   }
 1481:   *ppStmt = pCsr->pStmt;
 1482:   return rc;
 1483: }
 1484: 
 1485: /*
 1486: ** Position the pCsr->pStmt statement so that it is on the row
 1487: ** of the %_content table that contains the last match.  Return
 1488: ** SQLITE_OK on success.  
 1489: */
 1490: static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
 1491:   int rc = SQLITE_OK;
 1492:   if( pCsr->isRequireSeek ){
 1493:     sqlite3_stmt *pStmt = 0;
 1494: 
 1495:     rc = fts3CursorSeekStmt(pCsr, &pStmt);
 1496:     if( rc==SQLITE_OK ){
 1497:       sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
 1498:       pCsr->isRequireSeek = 0;
 1499:       if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
 1500:         return SQLITE_OK;
 1501:       }else{
 1502:         rc = sqlite3_reset(pCsr->pStmt);
 1503:         if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
 1504:           /* If no row was found and no error has occured, then the %_content
 1505:           ** table is missing a row that is present in the full-text index.
 1506:           ** The data structures are corrupt.  */
 1507:           rc = FTS_CORRUPT_VTAB;
 1508:           pCsr->isEof = 1;
 1509:         }
 1510:       }
 1511:     }
 1512:   }
 1513: 
 1514:   if( rc!=SQLITE_OK && pContext ){
 1515:     sqlite3_result_error_code(pContext, rc);
 1516:   }
 1517:   return rc;
 1518: }
 1519: 
 1520: /*
 1521: ** This function is used to process a single interior node when searching
 1522: ** a b-tree for a term or term prefix. The node data is passed to this 
 1523: ** function via the zNode/nNode parameters. The term to search for is
 1524: ** passed in zTerm/nTerm.
 1525: **
 1526: ** If piFirst is not NULL, then this function sets *piFirst to the blockid
 1527: ** of the child node that heads the sub-tree that may contain the term.
 1528: **
 1529: ** If piLast is not NULL, then *piLast is set to the right-most child node
 1530: ** that heads a sub-tree that may contain a term for which zTerm/nTerm is
 1531: ** a prefix.
 1532: **
 1533: ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
 1534: */
 1535: static int fts3ScanInteriorNode(
 1536:   const char *zTerm,              /* Term to select leaves for */
 1537:   int nTerm,                      /* Size of term zTerm in bytes */
 1538:   const char *zNode,              /* Buffer containing segment interior node */
 1539:   int nNode,                      /* Size of buffer at zNode */
 1540:   sqlite3_int64 *piFirst,         /* OUT: Selected child node */
 1541:   sqlite3_int64 *piLast           /* OUT: Selected child node */
 1542: ){
 1543:   int rc = SQLITE_OK;             /* Return code */
 1544:   const char *zCsr = zNode;       /* Cursor to iterate through node */
 1545:   const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
 1546:   char *zBuffer = 0;              /* Buffer to load terms into */
 1547:   int nAlloc = 0;                 /* Size of allocated buffer */
 1548:   int isFirstTerm = 1;            /* True when processing first term on page */
 1549:   sqlite3_int64 iChild;           /* Block id of child node to descend to */
 1550: 
 1551:   /* Skip over the 'height' varint that occurs at the start of every 
 1552:   ** interior node. Then load the blockid of the left-child of the b-tree
 1553:   ** node into variable iChild.  
 1554:   **
 1555:   ** Even if the data structure on disk is corrupted, this (reading two
 1556:   ** varints from the buffer) does not risk an overread. If zNode is a
 1557:   ** root node, then the buffer comes from a SELECT statement. SQLite does
 1558:   ** not make this guarantee explicitly, but in practice there are always
 1559:   ** either more than 20 bytes of allocated space following the nNode bytes of
 1560:   ** contents, or two zero bytes. Or, if the node is read from the %_segments
 1561:   ** table, then there are always 20 bytes of zeroed padding following the
 1562:   ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
 1563:   */
 1564:   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
 1565:   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
 1566:   if( zCsr>zEnd ){
 1567:     return FTS_CORRUPT_VTAB;
 1568:   }
 1569:   
 1570:   while( zCsr<zEnd && (piFirst || piLast) ){
 1571:     int cmp;                      /* memcmp() result */
 1572:     int nSuffix;                  /* Size of term suffix */
 1573:     int nPrefix = 0;              /* Size of term prefix */
 1574:     int nBuffer;                  /* Total term size */
 1575:   
 1576:     /* Load the next term on the node into zBuffer. Use realloc() to expand
 1577:     ** the size of zBuffer if required.  */
 1578:     if( !isFirstTerm ){
 1579:       zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
 1580:     }
 1581:     isFirstTerm = 0;
 1582:     zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
 1583:     
 1584:     if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
 1585:       rc = FTS_CORRUPT_VTAB;
 1586:       goto finish_scan;
 1587:     }
 1588:     if( nPrefix+nSuffix>nAlloc ){
 1589:       char *zNew;
 1590:       nAlloc = (nPrefix+nSuffix) * 2;
 1591:       zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
 1592:       if( !zNew ){
 1593:         rc = SQLITE_NOMEM;
 1594:         goto finish_scan;
 1595:       }
 1596:       zBuffer = zNew;
 1597:     }
 1598:     assert( zBuffer );
 1599:     memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
 1600:     nBuffer = nPrefix + nSuffix;
 1601:     zCsr += nSuffix;
 1602: 
 1603:     /* Compare the term we are searching for with the term just loaded from
 1604:     ** the interior node. If the specified term is greater than or equal
 1605:     ** to the term from the interior node, then all terms on the sub-tree 
 1606:     ** headed by node iChild are smaller than zTerm. No need to search 
 1607:     ** iChild.
 1608:     **
 1609:     ** If the interior node term is larger than the specified term, then
 1610:     ** the tree headed by iChild may contain the specified term.
 1611:     */
 1612:     cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
 1613:     if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
 1614:       *piFirst = iChild;
 1615:       piFirst = 0;
 1616:     }
 1617: 
 1618:     if( piLast && cmp<0 ){
 1619:       *piLast = iChild;
 1620:       piLast = 0;
 1621:     }
 1622: 
 1623:     iChild++;
 1624:   };
 1625: 
 1626:   if( piFirst ) *piFirst = iChild;
 1627:   if( piLast ) *piLast = iChild;
 1628: 
 1629:  finish_scan:
 1630:   sqlite3_free(zBuffer);
 1631:   return rc;
 1632: }
 1633: 
 1634: 
 1635: /*
 1636: ** The buffer pointed to by argument zNode (size nNode bytes) contains an
 1637: ** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
 1638: ** contains a term. This function searches the sub-tree headed by the zNode
 1639: ** node for the range of leaf nodes that may contain the specified term
 1640: ** or terms for which the specified term is a prefix.
 1641: **
 1642: ** If piLeaf is not NULL, then *piLeaf is set to the blockid of the 
 1643: ** left-most leaf node in the tree that may contain the specified term.
 1644: ** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
 1645: ** right-most leaf node that may contain a term for which the specified
 1646: ** term is a prefix.
 1647: **
 1648: ** It is possible that the range of returned leaf nodes does not contain 
 1649: ** the specified term or any terms for which it is a prefix. However, if the 
 1650: ** segment does contain any such terms, they are stored within the identified
 1651: ** range. Because this function only inspects interior segment nodes (and
 1652: ** never loads leaf nodes into memory), it is not possible to be sure.
 1653: **
 1654: ** If an error occurs, an error code other than SQLITE_OK is returned.
 1655: */ 
 1656: static int fts3SelectLeaf(
 1657:   Fts3Table *p,                   /* Virtual table handle */
 1658:   const char *zTerm,              /* Term to select leaves for */
 1659:   int nTerm,                      /* Size of term zTerm in bytes */
 1660:   const char *zNode,              /* Buffer containing segment interior node */
 1661:   int nNode,                      /* Size of buffer at zNode */
 1662:   sqlite3_int64 *piLeaf,          /* Selected leaf node */
 1663:   sqlite3_int64 *piLeaf2          /* Selected leaf node */
 1664: ){
 1665:   int rc;                         /* Return code */
 1666:   int iHeight;                    /* Height of this node in tree */
 1667: 
 1668:   assert( piLeaf || piLeaf2 );
 1669: 
 1670:   sqlite3Fts3GetVarint32(zNode, &iHeight);
 1671:   rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
 1672:   assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
 1673: 
 1674:   if( rc==SQLITE_OK && iHeight>1 ){
 1675:     char *zBlob = 0;              /* Blob read from %_segments table */
 1676:     int nBlob;                    /* Size of zBlob in bytes */
 1677: 
 1678:     if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
 1679:       rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
 1680:       if( rc==SQLITE_OK ){
 1681:         rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
 1682:       }
 1683:       sqlite3_free(zBlob);
 1684:       piLeaf = 0;
 1685:       zBlob = 0;
 1686:     }
 1687: 
 1688:     if( rc==SQLITE_OK ){
 1689:       rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
 1690:     }
 1691:     if( rc==SQLITE_OK ){
 1692:       rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
 1693:     }
 1694:     sqlite3_free(zBlob);
 1695:   }
 1696: 
 1697:   return rc;
 1698: }
 1699: 
 1700: /*
 1701: ** This function is used to create delta-encoded serialized lists of FTS3 
 1702: ** varints. Each call to this function appends a single varint to a list.
 1703: */
 1704: static void fts3PutDeltaVarint(
 1705:   char **pp,                      /* IN/OUT: Output pointer */
 1706:   sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
 1707:   sqlite3_int64 iVal              /* Write this value to the list */
 1708: ){
 1709:   assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
 1710:   *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
 1711:   *piPrev = iVal;
 1712: }
 1713: 
 1714: /*
 1715: ** When this function is called, *ppPoslist is assumed to point to the 
 1716: ** start of a position-list. After it returns, *ppPoslist points to the
 1717: ** first byte after the position-list.
 1718: **
 1719: ** A position list is list of positions (delta encoded) and columns for 
 1720: ** a single document record of a doclist.  So, in other words, this
 1721: ** routine advances *ppPoslist so that it points to the next docid in
 1722: ** the doclist, or to the first byte past the end of the doclist.
 1723: **
 1724: ** If pp is not NULL, then the contents of the position list are copied
 1725: ** to *pp. *pp is set to point to the first byte past the last byte copied
 1726: ** before this function returns.
 1727: */
 1728: static void fts3PoslistCopy(char **pp, char **ppPoslist){
 1729:   char *pEnd = *ppPoslist;
 1730:   char c = 0;
 1731: 
 1732:   /* The end of a position list is marked by a zero encoded as an FTS3 
 1733:   ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
 1734:   ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
 1735:   ** of some other, multi-byte, value.
 1736:   **
 1737:   ** The following while-loop moves pEnd to point to the first byte that is not 
 1738:   ** immediately preceded by a byte with the 0x80 bit set. Then increments
 1739:   ** pEnd once more so that it points to the byte immediately following the
 1740:   ** last byte in the position-list.
 1741:   */
 1742:   while( *pEnd | c ){
 1743:     c = *pEnd++ & 0x80;
 1744:     testcase( c!=0 && (*pEnd)==0 );
 1745:   }
 1746:   pEnd++;  /* Advance past the POS_END terminator byte */
 1747: 
 1748:   if( pp ){
 1749:     int n = (int)(pEnd - *ppPoslist);
 1750:     char *p = *pp;
 1751:     memcpy(p, *ppPoslist, n);
 1752:     p += n;
 1753:     *pp = p;
 1754:   }
 1755:   *ppPoslist = pEnd;
 1756: }
 1757: 
 1758: /*
 1759: ** When this function is called, *ppPoslist is assumed to point to the 
 1760: ** start of a column-list. After it returns, *ppPoslist points to the
 1761: ** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
 1762: **
 1763: ** A column-list is list of delta-encoded positions for a single column
 1764: ** within a single document within a doclist.
 1765: **
 1766: ** The column-list is terminated either by a POS_COLUMN varint (1) or
 1767: ** a POS_END varint (0).  This routine leaves *ppPoslist pointing to
 1768: ** the POS_COLUMN or POS_END that terminates the column-list.
 1769: **
 1770: ** If pp is not NULL, then the contents of the column-list are copied
 1771: ** to *pp. *pp is set to point to the first byte past the last byte copied
 1772: ** before this function returns.  The POS_COLUMN or POS_END terminator
 1773: ** is not copied into *pp.
 1774: */
 1775: static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
 1776:   char *pEnd = *ppPoslist;
 1777:   char c = 0;
 1778: 
 1779:   /* A column-list is terminated by either a 0x01 or 0x00 byte that is
 1780:   ** not part of a multi-byte varint.
 1781:   */
 1782:   while( 0xFE & (*pEnd | c) ){
 1783:     c = *pEnd++ & 0x80;
 1784:     testcase( c!=0 && ((*pEnd)&0xfe)==0 );
 1785:   }
 1786:   if( pp ){
 1787:     int n = (int)(pEnd - *ppPoslist);
 1788:     char *p = *pp;
 1789:     memcpy(p, *ppPoslist, n);
 1790:     p += n;
 1791:     *pp = p;
 1792:   }
 1793:   *ppPoslist = pEnd;
 1794: }
 1795: 
 1796: /*
 1797: ** Value used to signify the end of an position-list. This is safe because
 1798: ** it is not possible to have a document with 2^31 terms.
 1799: */
 1800: #define POSITION_LIST_END 0x7fffffff
 1801: 
 1802: /*
 1803: ** This function is used to help parse position-lists. When this function is
 1804: ** called, *pp may point to the start of the next varint in the position-list
 1805: ** being parsed, or it may point to 1 byte past the end of the position-list
 1806: ** (in which case **pp will be a terminator bytes POS_END (0) or
 1807: ** (1)).
 1808: **
 1809: ** If *pp points past the end of the current position-list, set *pi to 
 1810: ** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
 1811: ** increment the current value of *pi by the value read, and set *pp to
 1812: ** point to the next value before returning.
 1813: **
 1814: ** Before calling this routine *pi must be initialized to the value of
 1815: ** the previous position, or zero if we are reading the first position
 1816: ** in the position-list.  Because positions are delta-encoded, the value
 1817: ** of the previous position is needed in order to compute the value of
 1818: ** the next position.
 1819: */
 1820: static void fts3ReadNextPos(
 1821:   char **pp,                    /* IN/OUT: Pointer into position-list buffer */
 1822:   sqlite3_int64 *pi             /* IN/OUT: Value read from position-list */
 1823: ){
 1824:   if( (**pp)&0xFE ){
 1825:     fts3GetDeltaVarint(pp, pi);
 1826:     *pi -= 2;
 1827:   }else{
 1828:     *pi = POSITION_LIST_END;
 1829:   }
 1830: }
 1831: 
 1832: /*
 1833: ** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
 1834: ** the value of iCol encoded as a varint to *pp.   This will start a new
 1835: ** column list.
 1836: **
 1837: ** Set *pp to point to the byte just after the last byte written before 
 1838: ** returning (do not modify it if iCol==0). Return the total number of bytes
 1839: ** written (0 if iCol==0).
 1840: */
 1841: static int fts3PutColNumber(char **pp, int iCol){
 1842:   int n = 0;                      /* Number of bytes written */
 1843:   if( iCol ){
 1844:     char *p = *pp;                /* Output pointer */
 1845:     n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
 1846:     *p = 0x01;
 1847:     *pp = &p[n];
 1848:   }
 1849:   return n;
 1850: }
 1851: 
 1852: /*
 1853: ** Compute the union of two position lists.  The output written
 1854: ** into *pp contains all positions of both *pp1 and *pp2 in sorted
 1855: ** order and with any duplicates removed.  All pointers are
 1856: ** updated appropriately.   The caller is responsible for insuring
 1857: ** that there is enough space in *pp to hold the complete output.
 1858: */
 1859: static void fts3PoslistMerge(
 1860:   char **pp,                      /* Output buffer */
 1861:   char **pp1,                     /* Left input list */
 1862:   char **pp2                      /* Right input list */
 1863: ){
 1864:   char *p = *pp;
 1865:   char *p1 = *pp1;
 1866:   char *p2 = *pp2;
 1867: 
 1868:   while( *p1 || *p2 ){
 1869:     int iCol1;         /* The current column index in pp1 */
 1870:     int iCol2;         /* The current column index in pp2 */
 1871: 
 1872:     if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
 1873:     else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
 1874:     else iCol1 = 0;
 1875: 
 1876:     if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);
 1877:     else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
 1878:     else iCol2 = 0;
 1879: 
 1880:     if( iCol1==iCol2 ){
 1881:       sqlite3_int64 i1 = 0;       /* Last position from pp1 */
 1882:       sqlite3_int64 i2 = 0;       /* Last position from pp2 */
 1883:       sqlite3_int64 iPrev = 0;
 1884:       int n = fts3PutColNumber(&p, iCol1);
 1885:       p1 += n;
 1886:       p2 += n;
 1887: 
 1888:       /* At this point, both p1 and p2 point to the start of column-lists
 1889:       ** for the same column (the column with index iCol1 and iCol2).
 1890:       ** A column-list is a list of non-negative delta-encoded varints, each 
 1891:       ** incremented by 2 before being stored. Each list is terminated by a
 1892:       ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
 1893:       ** and writes the results to buffer p. p is left pointing to the byte
 1894:       ** after the list written. No terminator (POS_END or POS_COLUMN) is
 1895:       ** written to the output.
 1896:       */
 1897:       fts3GetDeltaVarint(&p1, &i1);
 1898:       fts3GetDeltaVarint(&p2, &i2);
 1899:       do {
 1900:         fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2); 
 1901:         iPrev -= 2;
 1902:         if( i1==i2 ){
 1903:           fts3ReadNextPos(&p1, &i1);
 1904:           fts3ReadNextPos(&p2, &i2);
 1905:         }else if( i1<i2 ){
 1906:           fts3ReadNextPos(&p1, &i1);
 1907:         }else{
 1908:           fts3ReadNextPos(&p2, &i2);
 1909:         }
 1910:       }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
 1911:     }else if( iCol1<iCol2 ){
 1912:       p1 += fts3PutColNumber(&p, iCol1);
 1913:       fts3ColumnlistCopy(&p, &p1);
 1914:     }else{
 1915:       p2 += fts3PutColNumber(&p, iCol2);
 1916:       fts3ColumnlistCopy(&p, &p2);
 1917:     }
 1918:   }
 1919: 
 1920:   *p++ = POS_END;
 1921:   *pp = p;
 1922:   *pp1 = p1 + 1;
 1923:   *pp2 = p2 + 1;
 1924: }
 1925: 
 1926: /*
 1927: ** This function is used to merge two position lists into one. When it is
 1928: ** called, *pp1 and *pp2 must both point to position lists. A position-list is
 1929: ** the part of a doclist that follows each document id. For example, if a row
 1930: ** contains:
 1931: **
 1932: **     'a b c'|'x y z'|'a b b a'
 1933: **
 1934: ** Then the position list for this row for token 'b' would consist of:
 1935: **
 1936: **     0x02 0x01 0x02 0x03 0x03 0x00
 1937: **
 1938: ** When this function returns, both *pp1 and *pp2 are left pointing to the
 1939: ** byte following the 0x00 terminator of their respective position lists.
 1940: **
 1941: ** If isSaveLeft is 0, an entry is added to the output position list for 
 1942: ** each position in *pp2 for which there exists one or more positions in
 1943: ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
 1944: ** when the *pp1 token appears before the *pp2 token, but not more than nToken
 1945: ** slots before it.
 1946: **
 1947: ** e.g. nToken==1 searches for adjacent positions.
 1948: */
 1949: static int fts3PoslistPhraseMerge(
 1950:   char **pp,                      /* IN/OUT: Preallocated output buffer */
 1951:   int nToken,                     /* Maximum difference in token positions */
 1952:   int isSaveLeft,                 /* Save the left position */
 1953:   int isExact,                    /* If *pp1 is exactly nTokens before *pp2 */
 1954:   char **pp1,                     /* IN/OUT: Left input list */
 1955:   char **pp2                      /* IN/OUT: Right input list */
 1956: ){
 1957:   char *p = *pp;
 1958:   char *p1 = *pp1;
 1959:   char *p2 = *pp2;
 1960:   int iCol1 = 0;
 1961:   int iCol2 = 0;
 1962: 
 1963:   /* Never set both isSaveLeft and isExact for the same invocation. */
 1964:   assert( isSaveLeft==0 || isExact==0 );
 1965: 
 1966:   assert( p!=0 && *p1!=0 && *p2!=0 );
 1967:   if( *p1==POS_COLUMN ){ 
 1968:     p1++;
 1969:     p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
 1970:   }
 1971:   if( *p2==POS_COLUMN ){ 
 1972:     p2++;
 1973:     p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
 1974:   }
 1975: 
 1976:   while( 1 ){
 1977:     if( iCol1==iCol2 ){
 1978:       char *pSave = p;
 1979:       sqlite3_int64 iPrev = 0;
 1980:       sqlite3_int64 iPos1 = 0;
 1981:       sqlite3_int64 iPos2 = 0;
 1982: 
 1983:       if( iCol1 ){
 1984:         *p++ = POS_COLUMN;
 1985:         p += sqlite3Fts3PutVarint(p, iCol1);
 1986:       }
 1987: 
 1988:       assert( *p1!=POS_END && *p1!=POS_COLUMN );
 1989:       assert( *p2!=POS_END && *p2!=POS_COLUMN );
 1990:       fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
 1991:       fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
 1992: 
 1993:       while( 1 ){
 1994:         if( iPos2==iPos1+nToken 
 1995:          || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) 
 1996:         ){
 1997:           sqlite3_int64 iSave;
 1998:           iSave = isSaveLeft ? iPos1 : iPos2;
 1999:           fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
 2000:           pSave = 0;
 2001:           assert( p );
 2002:         }
 2003:         if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
 2004:           if( (*p2&0xFE)==0 ) break;
 2005:           fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
 2006:         }else{
 2007:           if( (*p1&0xFE)==0 ) break;
 2008:           fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
 2009:         }
 2010:       }
 2011: 
 2012:       if( pSave ){
 2013:         assert( pp && p );
 2014:         p = pSave;
 2015:       }
 2016: 
 2017:       fts3ColumnlistCopy(0, &p1);
 2018:       fts3ColumnlistCopy(0, &p2);
 2019:       assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
 2020:       if( 0==*p1 || 0==*p2 ) break;
 2021: 
 2022:       p1++;
 2023:       p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
 2024:       p2++;
 2025:       p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
 2026:     }
 2027: 
 2028:     /* Advance pointer p1 or p2 (whichever corresponds to the smaller of
 2029:     ** iCol1 and iCol2) so that it points to either the 0x00 that marks the
 2030:     ** end of the position list, or the 0x01 that precedes the next 
 2031:     ** column-number in the position list. 
 2032:     */
 2033:     else if( iCol1<iCol2 ){
 2034:       fts3ColumnlistCopy(0, &p1);
 2035:       if( 0==*p1 ) break;
 2036:       p1++;
 2037:       p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
 2038:     }else{
 2039:       fts3ColumnlistCopy(0, &p2);
 2040:       if( 0==*p2 ) break;
 2041:       p2++;
 2042:       p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
 2043:     }
 2044:   }
 2045: 
 2046:   fts3PoslistCopy(0, &p2);
 2047:   fts3PoslistCopy(0, &p1);
 2048:   *pp1 = p1;
 2049:   *pp2 = p2;
 2050:   if( *pp==p ){
 2051:     return 0;
 2052:   }
 2053:   *p++ = 0x00;
 2054:   *pp = p;
 2055:   return 1;
 2056: }
 2057: 
 2058: /*
 2059: ** Merge two position-lists as required by the NEAR operator. The argument
 2060: ** position lists correspond to the left and right phrases of an expression 
 2061: ** like:
 2062: **
 2063: **     "phrase 1" NEAR "phrase number 2"
 2064: **
 2065: ** Position list *pp1 corresponds to the left-hand side of the NEAR 
 2066: ** expression and *pp2 to the right. As usual, the indexes in the position 
 2067: ** lists are the offsets of the last token in each phrase (tokens "1" and "2" 
 2068: ** in the example above).
 2069: **
 2070: ** The output position list - written to *pp - is a copy of *pp2 with those
 2071: ** entries that are not sufficiently NEAR entries in *pp1 removed.
 2072: */
 2073: static int fts3PoslistNearMerge(
 2074:   char **pp,                      /* Output buffer */
 2075:   char *aTmp,                     /* Temporary buffer space */
 2076:   int nRight,                     /* Maximum difference in token positions */
 2077:   int nLeft,                      /* Maximum difference in token positions */
 2078:   char **pp1,                     /* IN/OUT: Left input list */
 2079:   char **pp2                      /* IN/OUT: Right input list */
 2080: ){
 2081:   char *p1 = *pp1;
 2082:   char *p2 = *pp2;
 2083: 
 2084:   char *pTmp1 = aTmp;
 2085:   char *pTmp2;
 2086:   char *aTmp2;
 2087:   int res = 1;
 2088: 
 2089:   fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
 2090:   aTmp2 = pTmp2 = pTmp1;
 2091:   *pp1 = p1;
 2092:   *pp2 = p2;
 2093:   fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
 2094:   if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
 2095:     fts3PoslistMerge(pp, &aTmp, &aTmp2);
 2096:   }else if( pTmp1!=aTmp ){
 2097:     fts3PoslistCopy(pp, &aTmp);
 2098:   }else if( pTmp2!=aTmp2 ){
 2099:     fts3PoslistCopy(pp, &aTmp2);
 2100:   }else{
 2101:     res = 0;
 2102:   }
 2103: 
 2104:   return res;
 2105: }
 2106: 
 2107: /* 
 2108: ** An instance of this function is used to merge together the (potentially
 2109: ** large number of) doclists for each term that matches a prefix query.
 2110: ** See function fts3TermSelectMerge() for details.
 2111: */
 2112: typedef struct TermSelect TermSelect;
 2113: struct TermSelect {
 2114:   char *aaOutput[16];             /* Malloc'd output buffers */
 2115:   int anOutput[16];               /* Size each output buffer in bytes */
 2116: };
 2117: 
 2118: /*
 2119: ** This function is used to read a single varint from a buffer. Parameter
 2120: ** pEnd points 1 byte past the end of the buffer. When this function is
 2121: ** called, if *pp points to pEnd or greater, then the end of the buffer
 2122: ** has been reached. In this case *pp is set to 0 and the function returns.
 2123: **
 2124: ** If *pp does not point to or past pEnd, then a single varint is read
 2125: ** from *pp. *pp is then set to point 1 byte past the end of the read varint.
 2126: **
 2127: ** If bDescIdx is false, the value read is added to *pVal before returning.
 2128: ** If it is true, the value read is subtracted from *pVal before this 
 2129: ** function returns.
 2130: */
 2131: static void fts3GetDeltaVarint3(
 2132:   char **pp,                      /* IN/OUT: Point to read varint from */
 2133:   char *pEnd,                     /* End of buffer */
 2134:   int bDescIdx,                   /* True if docids are descending */
 2135:   sqlite3_int64 *pVal             /* IN/OUT: Integer value */
 2136: ){
 2137:   if( *pp>=pEnd ){
 2138:     *pp = 0;
 2139:   }else{
 2140:     sqlite3_int64 iVal;
 2141:     *pp += sqlite3Fts3GetVarint(*pp, &iVal);
 2142:     if( bDescIdx ){
 2143:       *pVal -= iVal;
 2144:     }else{
 2145:       *pVal += iVal;
 2146:     }
 2147:   }
 2148: }
 2149: 
 2150: /*
 2151: ** This function is used to write a single varint to a buffer. The varint
 2152: ** is written to *pp. Before returning, *pp is set to point 1 byte past the
 2153: ** end of the value written.
 2154: **
 2155: ** If *pbFirst is zero when this function is called, the value written to
 2156: ** the buffer is that of parameter iVal. 
 2157: **
 2158: ** If *pbFirst is non-zero when this function is called, then the value 
 2159: ** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
 2160: ** (if bDescIdx is non-zero).
 2161: **
 2162: ** Before returning, this function always sets *pbFirst to 1 and *piPrev
 2163: ** to the value of parameter iVal.
 2164: */
 2165: static void fts3PutDeltaVarint3(
 2166:   char **pp,                      /* IN/OUT: Output pointer */
 2167:   int bDescIdx,                   /* True for descending docids */
 2168:   sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
 2169:   int *pbFirst,                   /* IN/OUT: True after first int written */
 2170:   sqlite3_int64 iVal              /* Write this value to the list */
 2171: ){
 2172:   sqlite3_int64 iWrite;
 2173:   if( bDescIdx==0 || *pbFirst==0 ){
 2174:     iWrite = iVal - *piPrev;
 2175:   }else{
 2176:     iWrite = *piPrev - iVal;
 2177:   }
 2178:   assert( *pbFirst || *piPrev==0 );
 2179:   assert( *pbFirst==0 || iWrite>0 );
 2180:   *pp += sqlite3Fts3PutVarint(*pp, iWrite);
 2181:   *piPrev = iVal;
 2182:   *pbFirst = 1;
 2183: }
 2184: 
 2185: 
 2186: /*
 2187: ** This macro is used by various functions that merge doclists. The two
 2188: ** arguments are 64-bit docid values. If the value of the stack variable
 2189: ** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). 
 2190: ** Otherwise, (i2-i1).
 2191: **
 2192: ** Using this makes it easier to write code that can merge doclists that are
 2193: ** sorted in either ascending or descending order.
 2194: */
 2195: #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
 2196: 
 2197: /*
 2198: ** This function does an "OR" merge of two doclists (output contains all
 2199: ** positions contained in either argument doclist). If the docids in the 
 2200: ** input doclists are sorted in ascending order, parameter bDescDoclist
 2201: ** should be false. If they are sorted in ascending order, it should be
 2202: ** passed a non-zero value.
 2203: **
 2204: ** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
 2205: ** containing the output doclist and SQLITE_OK is returned. In this case
 2206: ** *pnOut is set to the number of bytes in the output doclist.
 2207: **
 2208: ** If an error occurs, an SQLite error code is returned. The output values
 2209: ** are undefined in this case.
 2210: */
 2211: static int fts3DoclistOrMerge(
 2212:   int bDescDoclist,               /* True if arguments are desc */
 2213:   char *a1, int n1,               /* First doclist */
 2214:   char *a2, int n2,               /* Second doclist */
 2215:   char **paOut, int *pnOut        /* OUT: Malloc'd doclist */
 2216: ){
 2217:   sqlite3_int64 i1 = 0;
 2218:   sqlite3_int64 i2 = 0;
 2219:   sqlite3_int64 iPrev = 0;
 2220:   char *pEnd1 = &a1[n1];
 2221:   char *pEnd2 = &a2[n2];
 2222:   char *p1 = a1;
 2223:   char *p2 = a2;
 2224:   char *p;
 2225:   char *aOut;
 2226:   int bFirstOut = 0;
 2227: 
 2228:   *paOut = 0;
 2229:   *pnOut = 0;
 2230: 
 2231:   /* Allocate space for the output. Both the input and output doclists
 2232:   ** are delta encoded. If they are in ascending order (bDescDoclist==0),
 2233:   ** then the first docid in each list is simply encoded as a varint. For
 2234:   ** each subsequent docid, the varint stored is the difference between the
 2235:   ** current and previous docid (a positive number - since the list is in
 2236:   ** ascending order).
 2237:   **
 2238:   ** The first docid written to the output is therefore encoded using the 
 2239:   ** same number of bytes as it is in whichever of the input lists it is
 2240:   ** read from. And each subsequent docid read from the same input list 
 2241:   ** consumes either the same or less bytes as it did in the input (since
 2242:   ** the difference between it and the previous value in the output must
 2243:   ** be a positive value less than or equal to the delta value read from 
 2244:   ** the input list). The same argument applies to all but the first docid
 2245:   ** read from the 'other' list. And to the contents of all position lists
 2246:   ** that will be copied and merged from the input to the output.
 2247:   **
 2248:   ** However, if the first docid copied to the output is a negative number,
 2249:   ** then the encoding of the first docid from the 'other' input list may
 2250:   ** be larger in the output than it was in the input (since the delta value
 2251:   ** may be a larger positive integer than the actual docid).
 2252:   **
 2253:   ** The space required to store the output is therefore the sum of the
 2254:   ** sizes of the two inputs, plus enough space for exactly one of the input
 2255:   ** docids to grow. 
 2256:   **
 2257:   ** A symetric argument may be made if the doclists are in descending 
 2258:   ** order.
 2259:   */
 2260:   aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
 2261:   if( !aOut ) return SQLITE_NOMEM;
 2262: 
 2263:   p = aOut;
 2264:   fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
 2265:   fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
 2266:   while( p1 || p2 ){
 2267:     sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
 2268: 
 2269:     if( p2 && p1 && iDiff==0 ){
 2270:       fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
 2271:       fts3PoslistMerge(&p, &p1, &p2);
 2272:       fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
 2273:       fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
 2274:     }else if( !p2 || (p1 && iDiff<0) ){
 2275:       fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
 2276:       fts3PoslistCopy(&p, &p1);
 2277:       fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
 2278:     }else{
 2279:       fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
 2280:       fts3PoslistCopy(&p, &p2);
 2281:       fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
 2282:     }
 2283:   }
 2284: 
 2285:   *paOut = aOut;
 2286:   *pnOut = (p-aOut);
 2287:   assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
 2288:   return SQLITE_OK;
 2289: }
 2290: 
 2291: /*
 2292: ** This function does a "phrase" merge of two doclists. In a phrase merge,
 2293: ** the output contains a copy of each position from the right-hand input
 2294: ** doclist for which there is a position in the left-hand input doclist
 2295: ** exactly nDist tokens before it.
 2296: **
 2297: ** If the docids in the input doclists are sorted in ascending order,
 2298: ** parameter bDescDoclist should be false. If they are sorted in ascending 
 2299: ** order, it should be passed a non-zero value.
 2300: **
 2301: ** The right-hand input doclist is overwritten by this function.
 2302: */
 2303: static void fts3DoclistPhraseMerge(
 2304:   int bDescDoclist,               /* True if arguments are desc */
 2305:   int nDist,                      /* Distance from left to right (1=adjacent) */
 2306:   char *aLeft, int nLeft,         /* Left doclist */
 2307:   char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
 2308: ){
 2309:   sqlite3_int64 i1 = 0;
 2310:   sqlite3_int64 i2 = 0;
 2311:   sqlite3_int64 iPrev = 0;
 2312:   char *pEnd1 = &aLeft[nLeft];
 2313:   char *pEnd2 = &aRight[*pnRight];
 2314:   char *p1 = aLeft;
 2315:   char *p2 = aRight;
 2316:   char *p;
 2317:   int bFirstOut = 0;
 2318:   char *aOut = aRight;
 2319: 
 2320:   assert( nDist>0 );
 2321: 
 2322:   p = aOut;
 2323:   fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
 2324:   fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
 2325: 
 2326:   while( p1 && p2 ){
 2327:     sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
 2328:     if( iDiff==0 ){
 2329:       char *pSave = p;
 2330:       sqlite3_int64 iPrevSave = iPrev;
 2331:       int bFirstOutSave = bFirstOut;
 2332: 
 2333:       fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
 2334:       if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
 2335:         p = pSave;
 2336:         iPrev = iPrevSave;
 2337:         bFirstOut = bFirstOutSave;
 2338:       }
 2339:       fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
 2340:       fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
 2341:     }else if( iDiff<0 ){
 2342:       fts3PoslistCopy(0, &p1);
 2343:       fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
 2344:     }else{
 2345:       fts3PoslistCopy(0, &p2);
 2346:       fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
 2347:     }
 2348:   }
 2349: 
 2350:   *pnRight = p - aOut;
 2351: }
 2352: 
 2353: /*
 2354: ** Argument pList points to a position list nList bytes in size. This
 2355: ** function checks to see if the position list contains any entries for
 2356: ** a token in position 0 (of any column). If so, it writes argument iDelta
 2357: ** to the output buffer pOut, followed by a position list consisting only
 2358: ** of the entries from pList at position 0, and terminated by an 0x00 byte.
 2359: ** The value returned is the number of bytes written to pOut (if any).
 2360: */
 2361: int sqlite3Fts3FirstFilter(
 2362:   sqlite3_int64 iDelta,           /* Varint that may be written to pOut */
 2363:   char *pList,                    /* Position list (no 0x00 term) */
 2364:   int nList,                      /* Size of pList in bytes */
 2365:   char *pOut                      /* Write output here */
 2366: ){
 2367:   int nOut = 0;
 2368:   int bWritten = 0;               /* True once iDelta has been written */
 2369:   char *p = pList;
 2370:   char *pEnd = &pList[nList];
 2371: 
 2372:   if( *p!=0x01 ){
 2373:     if( *p==0x02 ){
 2374:       nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
 2375:       pOut[nOut++] = 0x02;
 2376:       bWritten = 1;
 2377:     }
 2378:     fts3ColumnlistCopy(0, &p);
 2379:   }
 2380: 
 2381:   while( p<pEnd && *p==0x01 ){
 2382:     sqlite3_int64 iCol;
 2383:     p++;
 2384:     p += sqlite3Fts3GetVarint(p, &iCol);
 2385:     if( *p==0x02 ){
 2386:       if( bWritten==0 ){
 2387:         nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
 2388:         bWritten = 1;
 2389:       }
 2390:       pOut[nOut++] = 0x01;
 2391:       nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
 2392:       pOut[nOut++] = 0x02;
 2393:     }
 2394:     fts3ColumnlistCopy(0, &p);
 2395:   }
 2396:   if( bWritten ){
 2397:     pOut[nOut++] = 0x00;
 2398:   }
 2399: 
 2400:   return nOut;
 2401: }
 2402: 
 2403: 
 2404: /*
 2405: ** Merge all doclists in the TermSelect.aaOutput[] array into a single
 2406: ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
 2407: ** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
 2408: **
 2409: ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
 2410: ** the responsibility of the caller to free any doclists left in the
 2411: ** TermSelect.aaOutput[] array.
 2412: */
 2413: static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
 2414:   char *aOut = 0;
 2415:   int nOut = 0;
 2416:   int i;
 2417: 
 2418:   /* Loop through the doclists in the aaOutput[] array. Merge them all
 2419:   ** into a single doclist.
 2420:   */
 2421:   for(i=0; i<SizeofArray(pTS->aaOutput); i++){
 2422:     if( pTS->aaOutput[i] ){
 2423:       if( !aOut ){
 2424:         aOut = pTS->aaOutput[i];
 2425:         nOut = pTS->anOutput[i];
 2426:         pTS->aaOutput[i] = 0;
 2427:       }else{
 2428:         int nNew;
 2429:         char *aNew;
 2430: 
 2431:         int rc = fts3DoclistOrMerge(p->bDescIdx, 
 2432:             pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
 2433:         );
 2434:         if( rc!=SQLITE_OK ){
 2435:           sqlite3_free(aOut);
 2436:           return rc;
 2437:         }
 2438: 
 2439:         sqlite3_free(pTS->aaOutput[i]);
 2440:         sqlite3_free(aOut);
 2441:         pTS->aaOutput[i] = 0;
 2442:         aOut = aNew;
 2443:         nOut = nNew;
 2444:       }
 2445:     }
 2446:   }
 2447: 
 2448:   pTS->aaOutput[0] = aOut;
 2449:   pTS->anOutput[0] = nOut;
 2450:   return SQLITE_OK;
 2451: }
 2452: 
 2453: /*
 2454: ** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
 2455: ** as the first argument. The merge is an "OR" merge (see function
 2456: ** fts3DoclistOrMerge() for details).
 2457: **
 2458: ** This function is called with the doclist for each term that matches
 2459: ** a queried prefix. It merges all these doclists into one, the doclist
 2460: ** for the specified prefix. Since there can be a very large number of
 2461: ** doclists to merge, the merging is done pair-wise using the TermSelect
 2462: ** object.
 2463: **
 2464: ** This function returns SQLITE_OK if the merge is successful, or an
 2465: ** SQLite error code (SQLITE_NOMEM) if an error occurs.
 2466: */
 2467: static int fts3TermSelectMerge(
 2468:   Fts3Table *p,                   /* FTS table handle */
 2469:   TermSelect *pTS,                /* TermSelect object to merge into */
 2470:   char *aDoclist,                 /* Pointer to doclist */
 2471:   int nDoclist                    /* Size of aDoclist in bytes */
 2472: ){
 2473:   if( pTS->aaOutput[0]==0 ){
 2474:     /* If this is the first term selected, copy the doclist to the output
 2475:     ** buffer using memcpy(). */
 2476:     pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
 2477:     pTS->anOutput[0] = nDoclist;
 2478:     if( pTS->aaOutput[0] ){
 2479:       memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
 2480:     }else{
 2481:       return SQLITE_NOMEM;
 2482:     }
 2483:   }else{
 2484:     char *aMerge = aDoclist;
 2485:     int nMerge = nDoclist;
 2486:     int iOut;
 2487: 
 2488:     for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
 2489:       if( pTS->aaOutput[iOut]==0 ){
 2490:         assert( iOut>0 );
 2491:         pTS->aaOutput[iOut] = aMerge;
 2492:         pTS->anOutput[iOut] = nMerge;
 2493:         break;
 2494:       }else{
 2495:         char *aNew;
 2496:         int nNew;
 2497: 
 2498:         int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, 
 2499:             pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
 2500:         );
 2501:         if( rc!=SQLITE_OK ){
 2502:           if( aMerge!=aDoclist ) sqlite3_free(aMerge);
 2503:           return rc;
 2504:         }
 2505: 
 2506:         if( aMerge!=aDoclist ) sqlite3_free(aMerge);
 2507:         sqlite3_free(pTS->aaOutput[iOut]);
 2508:         pTS->aaOutput[iOut] = 0;
 2509:   
 2510:         aMerge = aNew;
 2511:         nMerge = nNew;
 2512:         if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
 2513:           pTS->aaOutput[iOut] = aMerge;
 2514:           pTS->anOutput[iOut] = nMerge;
 2515:         }
 2516:       }
 2517:     }
 2518:   }
 2519:   return SQLITE_OK;
 2520: }
 2521: 
 2522: /*
 2523: ** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
 2524: */
 2525: static int fts3SegReaderCursorAppend(
 2526:   Fts3MultiSegReader *pCsr, 
 2527:   Fts3SegReader *pNew
 2528: ){
 2529:   if( (pCsr->nSegment%16)==0 ){
 2530:     Fts3SegReader **apNew;
 2531:     int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
 2532:     apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
 2533:     if( !apNew ){
 2534:       sqlite3Fts3SegReaderFree(pNew);
 2535:       return SQLITE_NOMEM;
 2536:     }
 2537:     pCsr->apSegment = apNew;
 2538:   }
 2539:   pCsr->apSegment[pCsr->nSegment++] = pNew;
 2540:   return SQLITE_OK;
 2541: }
 2542: 
 2543: /*
 2544: ** Add seg-reader objects to the Fts3MultiSegReader object passed as the
 2545: ** 8th argument.
 2546: **
 2547: ** This function returns SQLITE_OK if successful, or an SQLite error code
 2548: ** otherwise.
 2549: */
 2550: static int fts3SegReaderCursor(
 2551:   Fts3Table *p,                   /* FTS3 table handle */
 2552:   int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
 2553:   int iLevel,                     /* Level of segments to scan */
 2554:   const char *zTerm,              /* Term to query for */
 2555:   int nTerm,                      /* Size of zTerm in bytes */
 2556:   int isPrefix,                   /* True for a prefix search */
 2557:   int isScan,                     /* True to scan from zTerm to EOF */
 2558:   Fts3MultiSegReader *pCsr        /* Cursor object to populate */
 2559: ){
 2560:   int rc = SQLITE_OK;             /* Error code */
 2561:   sqlite3_stmt *pStmt = 0;        /* Statement to iterate through segments */
 2562:   int rc2;                        /* Result of sqlite3_reset() */
 2563: 
 2564:   /* If iLevel is less than 0 and this is not a scan, include a seg-reader 
 2565:   ** for the pending-terms. If this is a scan, then this call must be being
 2566:   ** made by an fts4aux module, not an FTS table. In this case calling
 2567:   ** Fts3SegReaderPending might segfault, as the data structures used by 
 2568:   ** fts4aux are not completely populated. So it's easiest to filter these
 2569:   ** calls out here.  */
 2570:   if( iLevel<0 && p->aIndex ){
 2571:     Fts3SegReader *pSeg = 0;
 2572:     rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg);
 2573:     if( rc==SQLITE_OK && pSeg ){
 2574:       rc = fts3SegReaderCursorAppend(pCsr, pSeg);
 2575:     }
 2576:   }
 2577: 
 2578:   if( iLevel!=FTS3_SEGCURSOR_PENDING ){
 2579:     if( rc==SQLITE_OK ){
 2580:       rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
 2581:     }
 2582: 
 2583:     while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
 2584:       Fts3SegReader *pSeg = 0;
 2585: 
 2586:       /* Read the values returned by the SELECT into local variables. */
 2587:       sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
 2588:       sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
 2589:       sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
 2590:       int nRoot = sqlite3_column_bytes(pStmt, 4);
 2591:       char const *zRoot = sqlite3_column_blob(pStmt, 4);
 2592: 
 2593:       /* If zTerm is not NULL, and this segment is not stored entirely on its
 2594:       ** root node, the range of leaves scanned can be reduced. Do this. */
 2595:       if( iStartBlock && zTerm ){
 2596:         sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
 2597:         rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
 2598:         if( rc!=SQLITE_OK ) goto finished;
 2599:         if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
 2600:       }
 2601:  
 2602:       rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, 
 2603:           iStartBlock, iLeavesEndBlock, iEndBlock, zRoot, nRoot, &pSeg
 2604:       );
 2605:       if( rc!=SQLITE_OK ) goto finished;
 2606:       rc = fts3SegReaderCursorAppend(pCsr, pSeg);
 2607:     }
 2608:   }
 2609: 
 2610:  finished:
 2611:   rc2 = sqlite3_reset(pStmt);
 2612:   if( rc==SQLITE_DONE ) rc = rc2;
 2613: 
 2614:   return rc;
 2615: }
 2616: 
 2617: /*
 2618: ** Set up a cursor object for iterating through a full-text index or a 
 2619: ** single level therein.
 2620: */
 2621: int sqlite3Fts3SegReaderCursor(
 2622:   Fts3Table *p,                   /* FTS3 table handle */
 2623:   int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
 2624:   int iLevel,                     /* Level of segments to scan */
 2625:   const char *zTerm,              /* Term to query for */
 2626:   int nTerm,                      /* Size of zTerm in bytes */
 2627:   int isPrefix,                   /* True for a prefix search */
 2628:   int isScan,                     /* True to scan from zTerm to EOF */
 2629:   Fts3MultiSegReader *pCsr       /* Cursor object to populate */
 2630: ){
 2631:   assert( iIndex>=0 && iIndex<p->nIndex );
 2632:   assert( iLevel==FTS3_SEGCURSOR_ALL
 2633:       ||  iLevel==FTS3_SEGCURSOR_PENDING 
 2634:       ||  iLevel>=0
 2635:   );
 2636:   assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
 2637:   assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
 2638:   assert( isPrefix==0 || isScan==0 );
 2639: 
 2640:   /* "isScan" is only set to true by the ft4aux module, an ordinary
 2641:   ** full-text tables. */
 2642:   assert( isScan==0 || p->aIndex==0 );
 2643: 
 2644:   memset(pCsr, 0, sizeof(Fts3MultiSegReader));
 2645: 
 2646:   return fts3SegReaderCursor(
 2647:       p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
 2648:   );
 2649: }
 2650: 
 2651: /*
 2652: ** In addition to its current configuration, have the Fts3MultiSegReader
 2653: ** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
 2654: **
 2655: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 2656: */
 2657: static int fts3SegReaderCursorAddZero(
 2658:   Fts3Table *p,                   /* FTS virtual table handle */
 2659:   const char *zTerm,              /* Term to scan doclist of */
 2660:   int nTerm,                      /* Number of bytes in zTerm */
 2661:   Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
 2662: ){
 2663:   return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
 2664: }
 2665: 
 2666: /*
 2667: ** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
 2668: ** if isPrefix is true, to scan the doclist for all terms for which 
 2669: ** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
 2670: ** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
 2671: ** an SQLite error code.
 2672: **
 2673: ** It is the responsibility of the caller to free this object by eventually
 2674: ** passing it to fts3SegReaderCursorFree() 
 2675: **
 2676: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 2677: ** Output parameter *ppSegcsr is set to 0 if an error occurs.
 2678: */
 2679: static int fts3TermSegReaderCursor(
 2680:   Fts3Cursor *pCsr,               /* Virtual table cursor handle */
 2681:   const char *zTerm,              /* Term to query for */
 2682:   int nTerm,                      /* Size of zTerm in bytes */
 2683:   int isPrefix,                   /* True for a prefix search */
 2684:   Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
 2685: ){
 2686:   Fts3MultiSegReader *pSegcsr;    /* Object to allocate and return */
 2687:   int rc = SQLITE_NOMEM;          /* Return code */
 2688: 
 2689:   pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
 2690:   if( pSegcsr ){
 2691:     int i;
 2692:     int bFound = 0;               /* True once an index has been found */
 2693:     Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
 2694: 
 2695:     if( isPrefix ){
 2696:       for(i=1; bFound==0 && i<p->nIndex; i++){
 2697:         if( p->aIndex[i].nPrefix==nTerm ){
 2698:           bFound = 1;
 2699:           rc = sqlite3Fts3SegReaderCursor(
 2700:               p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
 2701:           pSegcsr->bLookup = 1;
 2702:         }
 2703:       }
 2704: 
 2705:       for(i=1; bFound==0 && i<p->nIndex; i++){
 2706:         if( p->aIndex[i].nPrefix==nTerm+1 ){
 2707:           bFound = 1;
 2708:           rc = sqlite3Fts3SegReaderCursor(
 2709:               p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
 2710:           );
 2711:           if( rc==SQLITE_OK ){
 2712:             rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
 2713:           }
 2714:         }
 2715:       }
 2716:     }
 2717: 
 2718:     if( bFound==0 ){
 2719:       rc = sqlite3Fts3SegReaderCursor(
 2720:           p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
 2721:       );
 2722:       pSegcsr->bLookup = !isPrefix;
 2723:     }
 2724:   }
 2725: 
 2726:   *ppSegcsr = pSegcsr;
 2727:   return rc;
 2728: }
 2729: 
 2730: /*
 2731: ** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
 2732: */
 2733: static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
 2734:   sqlite3Fts3SegReaderFinish(pSegcsr);
 2735:   sqlite3_free(pSegcsr);
 2736: }
 2737: 
 2738: /*
 2739: ** This function retreives the doclist for the specified term (or term
 2740: ** prefix) from the database.
 2741: */
 2742: static int fts3TermSelect(
 2743:   Fts3Table *p,                   /* Virtual table handle */
 2744:   Fts3PhraseToken *pTok,          /* Token to query for */
 2745:   int iColumn,                    /* Column to query (or -ve for all columns) */
 2746:   int *pnOut,                     /* OUT: Size of buffer at *ppOut */
 2747:   char **ppOut                    /* OUT: Malloced result buffer */
 2748: ){
 2749:   int rc;                         /* Return code */
 2750:   Fts3MultiSegReader *pSegcsr;    /* Seg-reader cursor for this term */
 2751:   TermSelect tsc;                 /* Object for pair-wise doclist merging */
 2752:   Fts3SegFilter filter;           /* Segment term filter configuration */
 2753: 
 2754:   pSegcsr = pTok->pSegcsr;
 2755:   memset(&tsc, 0, sizeof(TermSelect));
 2756: 
 2757:   filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
 2758:         | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
 2759:         | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
 2760:         | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
 2761:   filter.iCol = iColumn;
 2762:   filter.zTerm = pTok->z;
 2763:   filter.nTerm = pTok->n;
 2764: 
 2765:   rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
 2766:   while( SQLITE_OK==rc
 2767:       && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) 
 2768:   ){
 2769:     rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
 2770:   }
 2771: 
 2772:   if( rc==SQLITE_OK ){
 2773:     rc = fts3TermSelectFinishMerge(p, &tsc);
 2774:   }
 2775:   if( rc==SQLITE_OK ){
 2776:     *ppOut = tsc.aaOutput[0];
 2777:     *pnOut = tsc.anOutput[0];
 2778:   }else{
 2779:     int i;
 2780:     for(i=0; i<SizeofArray(tsc.aaOutput); i++){
 2781:       sqlite3_free(tsc.aaOutput[i]);
 2782:     }
 2783:   }
 2784: 
 2785:   fts3SegReaderCursorFree(pSegcsr);
 2786:   pTok->pSegcsr = 0;
 2787:   return rc;
 2788: }
 2789: 
 2790: /*
 2791: ** This function counts the total number of docids in the doclist stored
 2792: ** in buffer aList[], size nList bytes.
 2793: **
 2794: ** If the isPoslist argument is true, then it is assumed that the doclist
 2795: ** contains a position-list following each docid. Otherwise, it is assumed
 2796: ** that the doclist is simply a list of docids stored as delta encoded 
 2797: ** varints.
 2798: */
 2799: static int fts3DoclistCountDocids(char *aList, int nList){
 2800:   int nDoc = 0;                   /* Return value */
 2801:   if( aList ){
 2802:     char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
 2803:     char *p = aList;              /* Cursor */
 2804:     while( p<aEnd ){
 2805:       nDoc++;
 2806:       while( (*p++)&0x80 );     /* Skip docid varint */
 2807:       fts3PoslistCopy(0, &p);   /* Skip over position list */
 2808:     }
 2809:   }
 2810: 
 2811:   return nDoc;
 2812: }
 2813: 
 2814: /*
 2815: ** Advance the cursor to the next row in the %_content table that
 2816: ** matches the search criteria.  For a MATCH search, this will be
 2817: ** the next row that matches. For a full-table scan, this will be
 2818: ** simply the next row in the %_content table.  For a docid lookup,
 2819: ** this routine simply sets the EOF flag.
 2820: **
 2821: ** Return SQLITE_OK if nothing goes wrong.  SQLITE_OK is returned
 2822: ** even if we reach end-of-file.  The fts3EofMethod() will be called
 2823: ** subsequently to determine whether or not an EOF was hit.
 2824: */
 2825: static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
 2826:   int rc;
 2827:   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
 2828:   if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
 2829:     if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
 2830:       pCsr->isEof = 1;
 2831:       rc = sqlite3_reset(pCsr->pStmt);
 2832:     }else{
 2833:       pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
 2834:       rc = SQLITE_OK;
 2835:     }
 2836:   }else{
 2837:     rc = fts3EvalNext((Fts3Cursor *)pCursor);
 2838:   }
 2839:   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
 2840:   return rc;
 2841: }
 2842: 
 2843: /*
 2844: ** This is the xFilter interface for the virtual table.  See
 2845: ** the virtual table xFilter method documentation for additional
 2846: ** information.
 2847: **
 2848: ** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
 2849: ** the %_content table.
 2850: **
 2851: ** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
 2852: ** in the %_content table.
 2853: **
 2854: ** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index.  The
 2855: ** column on the left-hand side of the MATCH operator is column
 2856: ** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed.  argv[0] is the right-hand
 2857: ** side of the MATCH operator.
 2858: */
 2859: static int fts3FilterMethod(
 2860:   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
 2861:   int idxNum,                     /* Strategy index */
 2862:   const char *idxStr,             /* Unused */
 2863:   int nVal,                       /* Number of elements in apVal */
 2864:   sqlite3_value **apVal           /* Arguments for the indexing scheme */
 2865: ){
 2866:   int rc;
 2867:   char *zSql;                     /* SQL statement used to access %_content */
 2868:   Fts3Table *p = (Fts3Table *)pCursor->pVtab;
 2869:   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
 2870: 
 2871:   UNUSED_PARAMETER(idxStr);
 2872:   UNUSED_PARAMETER(nVal);
 2873: 
 2874:   assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
 2875:   assert( nVal==0 || nVal==1 );
 2876:   assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
 2877:   assert( p->pSegments==0 );
 2878: 
 2879:   /* In case the cursor has been used before, clear it now. */
 2880:   sqlite3_finalize(pCsr->pStmt);
 2881:   sqlite3_free(pCsr->aDoclist);
 2882:   sqlite3Fts3ExprFree(pCsr->pExpr);
 2883:   memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
 2884: 
 2885:   if( idxStr ){
 2886:     pCsr->bDesc = (idxStr[0]=='D');
 2887:   }else{
 2888:     pCsr->bDesc = p->bDescIdx;
 2889:   }
 2890:   pCsr->eSearch = (i16)idxNum;
 2891: 
 2892:   if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
 2893:     int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
 2894:     const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
 2895: 
 2896:     if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
 2897:       return SQLITE_NOMEM;
 2898:     }
 2899: 
 2900:     rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
 2901:         p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
 2902:     );
 2903:     if( rc!=SQLITE_OK ){
 2904:       if( rc==SQLITE_ERROR ){
 2905:         static const char *zErr = "malformed MATCH expression: [%s]";
 2906:         p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
 2907:       }
 2908:       return rc;
 2909:     }
 2910: 
 2911:     rc = sqlite3Fts3ReadLock(p);
 2912:     if( rc!=SQLITE_OK ) return rc;
 2913: 
 2914:     rc = fts3EvalStart(pCsr);
 2915: 
 2916:     sqlite3Fts3SegmentsClose(p);
 2917:     if( rc!=SQLITE_OK ) return rc;
 2918:     pCsr->pNextId = pCsr->aDoclist;
 2919:     pCsr->iPrevId = 0;
 2920:   }
 2921: 
 2922:   /* Compile a SELECT statement for this cursor. For a full-table-scan, the
 2923:   ** statement loops through all rows of the %_content table. For a
 2924:   ** full-text query or docid lookup, the statement retrieves a single
 2925:   ** row by docid.
 2926:   */
 2927:   if( idxNum==FTS3_FULLSCAN_SEARCH ){
 2928:     zSql = sqlite3_mprintf(
 2929:         "SELECT %s ORDER BY rowid %s",
 2930:         p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
 2931:     );
 2932:     if( zSql ){
 2933:       rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
 2934:       sqlite3_free(zSql);
 2935:     }else{
 2936:       rc = SQLITE_NOMEM;
 2937:     }
 2938:   }else if( idxNum==FTS3_DOCID_SEARCH ){
 2939:     rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
 2940:     if( rc==SQLITE_OK ){
 2941:       rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
 2942:     }
 2943:   }
 2944:   if( rc!=SQLITE_OK ) return rc;
 2945: 
 2946:   return fts3NextMethod(pCursor);
 2947: }
 2948: 
 2949: /* 
 2950: ** This is the xEof method of the virtual table. SQLite calls this 
 2951: ** routine to find out if it has reached the end of a result set.
 2952: */
 2953: static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
 2954:   return ((Fts3Cursor *)pCursor)->isEof;
 2955: }
 2956: 
 2957: /* 
 2958: ** This is the xRowid method. The SQLite core calls this routine to
 2959: ** retrieve the rowid for the current row of the result set. fts3
 2960: ** exposes %_content.docid as the rowid for the virtual table. The
 2961: ** rowid should be written to *pRowid.
 2962: */
 2963: static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
 2964:   Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
 2965:   *pRowid = pCsr->iPrevId;
 2966:   return SQLITE_OK;
 2967: }
 2968: 
 2969: /* 
 2970: ** This is the xColumn method, called by SQLite to request a value from
 2971: ** the row that the supplied cursor currently points to.
 2972: */
 2973: static int fts3ColumnMethod(
 2974:   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
 2975:   sqlite3_context *pContext,      /* Context for sqlite3_result_xxx() calls */
 2976:   int iCol                        /* Index of column to read value from */
 2977: ){
 2978:   int rc = SQLITE_OK;             /* Return Code */
 2979:   Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
 2980:   Fts3Table *p = (Fts3Table *)pCursor->pVtab;
 2981: 
 2982:   /* The column value supplied by SQLite must be in range. */
 2983:   assert( iCol>=0 && iCol<=p->nColumn+1 );
 2984: 
 2985:   if( iCol==p->nColumn+1 ){
 2986:     /* This call is a request for the "docid" column. Since "docid" is an 
 2987:     ** alias for "rowid", use the xRowid() method to obtain the value.
 2988:     */
 2989:     sqlite3_result_int64(pContext, pCsr->iPrevId);
 2990:   }else if( iCol==p->nColumn ){
 2991:     /* The extra column whose name is the same as the table.
 2992:     ** Return a blob which is a pointer to the cursor.
 2993:     */
 2994:     sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
 2995:   }else{
 2996:     rc = fts3CursorSeek(0, pCsr);
 2997:     if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
 2998:       sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
 2999:     }
 3000:   }
 3001: 
 3002:   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
 3003:   return rc;
 3004: }
 3005: 
 3006: /* 
 3007: ** This function is the implementation of the xUpdate callback used by 
 3008: ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
 3009: ** inserted, updated or deleted.
 3010: */
 3011: static int fts3UpdateMethod(
 3012:   sqlite3_vtab *pVtab,            /* Virtual table handle */
 3013:   int nArg,                       /* Size of argument array */
 3014:   sqlite3_value **apVal,          /* Array of arguments */
 3015:   sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
 3016: ){
 3017:   return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
 3018: }
 3019: 
 3020: /*
 3021: ** Implementation of xSync() method. Flush the contents of the pending-terms
 3022: ** hash-table to the database.
 3023: */
 3024: static int fts3SyncMethod(sqlite3_vtab *pVtab){
 3025:   int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
 3026:   sqlite3Fts3SegmentsClose((Fts3Table *)pVtab);
 3027:   return rc;
 3028: }
 3029: 
 3030: /*
 3031: ** Implementation of xBegin() method. This is a no-op.
 3032: */
 3033: static int fts3BeginMethod(sqlite3_vtab *pVtab){
 3034:   TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
 3035:   UNUSED_PARAMETER(pVtab);
 3036:   assert( p->pSegments==0 );
 3037:   assert( p->nPendingData==0 );
 3038:   assert( p->inTransaction!=1 );
 3039:   TESTONLY( p->inTransaction = 1 );
 3040:   TESTONLY( p->mxSavepoint = -1; );
 3041:   return SQLITE_OK;
 3042: }
 3043: 
 3044: /*
 3045: ** Implementation of xCommit() method. This is a no-op. The contents of
 3046: ** the pending-terms hash-table have already been flushed into the database
 3047: ** by fts3SyncMethod().
 3048: */
 3049: static int fts3CommitMethod(sqlite3_vtab *pVtab){
 3050:   TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
 3051:   UNUSED_PARAMETER(pVtab);
 3052:   assert( p->nPendingData==0 );
 3053:   assert( p->inTransaction!=0 );
 3054:   assert( p->pSegments==0 );
 3055:   TESTONLY( p->inTransaction = 0 );
 3056:   TESTONLY( p->mxSavepoint = -1; );
 3057:   return SQLITE_OK;
 3058: }
 3059: 
 3060: /*
 3061: ** Implementation of xRollback(). Discard the contents of the pending-terms
 3062: ** hash-table. Any changes made to the database are reverted by SQLite.
 3063: */
 3064: static int fts3RollbackMethod(sqlite3_vtab *pVtab){
 3065:   Fts3Table *p = (Fts3Table*)pVtab;
 3066:   sqlite3Fts3PendingTermsClear(p);
 3067:   assert( p->inTransaction!=0 );
 3068:   TESTONLY( p->inTransaction = 0 );
 3069:   TESTONLY( p->mxSavepoint = -1; );
 3070:   return SQLITE_OK;
 3071: }
 3072: 
 3073: /*
 3074: ** When called, *ppPoslist must point to the byte immediately following the
 3075: ** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
 3076: ** moves *ppPoslist so that it instead points to the first byte of the
 3077: ** same position list.
 3078: */
 3079: static void fts3ReversePoslist(char *pStart, char **ppPoslist){
 3080:   char *p = &(*ppPoslist)[-2];
 3081:   char c = 0;
 3082: 
 3083:   while( p>pStart && (c=*p--)==0 );
 3084:   while( p>pStart && (*p & 0x80) | c ){ 
 3085:     c = *p--; 
 3086:   }
 3087:   if( p>pStart ){ p = &p[2]; }
 3088:   while( *p++&0x80 );
 3089:   *ppPoslist = p;
 3090: }
 3091: 
 3092: /*
 3093: ** Helper function used by the implementation of the overloaded snippet(),
 3094: ** offsets() and optimize() SQL functions.
 3095: **
 3096: ** If the value passed as the third argument is a blob of size
 3097: ** sizeof(Fts3Cursor*), then the blob contents are copied to the 
 3098: ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
 3099: ** message is written to context pContext and SQLITE_ERROR returned. The
 3100: ** string passed via zFunc is used as part of the error message.
 3101: */
 3102: static int fts3FunctionArg(
 3103:   sqlite3_context *pContext,      /* SQL function call context */
 3104:   const char *zFunc,              /* Function name */
 3105:   sqlite3_value *pVal,            /* argv[0] passed to function */
 3106:   Fts3Cursor **ppCsr              /* OUT: Store cursor handle here */
 3107: ){
 3108:   Fts3Cursor *pRet;
 3109:   if( sqlite3_value_type(pVal)!=SQLITE_BLOB 
 3110:    || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
 3111:   ){
 3112:     char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
 3113:     sqlite3_result_error(pContext, zErr, -1);
 3114:     sqlite3_free(zErr);
 3115:     return SQLITE_ERROR;
 3116:   }
 3117:   memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
 3118:   *ppCsr = pRet;
 3119:   return SQLITE_OK;
 3120: }
 3121: 
 3122: /*
 3123: ** Implementation of the snippet() function for FTS3
 3124: */
 3125: static void fts3SnippetFunc(
 3126:   sqlite3_context *pContext,      /* SQLite function call context */
 3127:   int nVal,                       /* Size of apVal[] array */
 3128:   sqlite3_value **apVal           /* Array of arguments */
 3129: ){
 3130:   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
 3131:   const char *zStart = "<b>";
 3132:   const char *zEnd = "</b>";
 3133:   const char *zEllipsis = "<b>...</b>";
 3134:   int iCol = -1;
 3135:   int nToken = 15;                /* Default number of tokens in snippet */
 3136: 
 3137:   /* There must be at least one argument passed to this function (otherwise
 3138:   ** the non-overloaded version would have been called instead of this one).
 3139:   */
 3140:   assert( nVal>=1 );
 3141: 
 3142:   if( nVal>6 ){
 3143:     sqlite3_result_error(pContext, 
 3144:         "wrong number of arguments to function snippet()", -1);
 3145:     return;
 3146:   }
 3147:   if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
 3148: 
 3149:   switch( nVal ){
 3150:     case 6: nToken = sqlite3_value_int(apVal[5]);
 3151:     case 5: iCol = sqlite3_value_int(apVal[4]);
 3152:     case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
 3153:     case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
 3154:     case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
 3155:   }
 3156:   if( !zEllipsis || !zEnd || !zStart ){
 3157:     sqlite3_result_error_nomem(pContext);
 3158:   }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
 3159:     sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
 3160:   }
 3161: }
 3162: 
 3163: /*
 3164: ** Implementation of the offsets() function for FTS3
 3165: */
 3166: static void fts3OffsetsFunc(
 3167:   sqlite3_context *pContext,      /* SQLite function call context */
 3168:   int nVal,                       /* Size of argument array */
 3169:   sqlite3_value **apVal           /* Array of arguments */
 3170: ){
 3171:   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
 3172: 
 3173:   UNUSED_PARAMETER(nVal);
 3174: 
 3175:   assert( nVal==1 );
 3176:   if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
 3177:   assert( pCsr );
 3178:   if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
 3179:     sqlite3Fts3Offsets(pContext, pCsr);
 3180:   }
 3181: }
 3182: 
 3183: /* 
 3184: ** Implementation of the special optimize() function for FTS3. This 
 3185: ** function merges all segments in the database to a single segment.
 3186: ** Example usage is:
 3187: **
 3188: **   SELECT optimize(t) FROM t LIMIT 1;
 3189: **
 3190: ** where 't' is the name of an FTS3 table.
 3191: */
 3192: static void fts3OptimizeFunc(
 3193:   sqlite3_context *pContext,      /* SQLite function call context */
 3194:   int nVal,                       /* Size of argument array */
 3195:   sqlite3_value **apVal           /* Array of arguments */
 3196: ){
 3197:   int rc;                         /* Return code */
 3198:   Fts3Table *p;                   /* Virtual table handle */
 3199:   Fts3Cursor *pCursor;            /* Cursor handle passed through apVal[0] */
 3200: 
 3201:   UNUSED_PARAMETER(nVal);
 3202: 
 3203:   assert( nVal==1 );
 3204:   if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
 3205:   p = (Fts3Table *)pCursor->base.pVtab;
 3206:   assert( p );
 3207: 
 3208:   rc = sqlite3Fts3Optimize(p);
 3209: 
 3210:   switch( rc ){
 3211:     case SQLITE_OK:
 3212:       sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
 3213:       break;
 3214:     case SQLITE_DONE:
 3215:       sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
 3216:       break;
 3217:     default:
 3218:       sqlite3_result_error_code(pContext, rc);
 3219:       break;
 3220:   }
 3221: }
 3222: 
 3223: /*
 3224: ** Implementation of the matchinfo() function for FTS3
 3225: */
 3226: static void fts3MatchinfoFunc(
 3227:   sqlite3_context *pContext,      /* SQLite function call context */
 3228:   int nVal,                       /* Size of argument array */
 3229:   sqlite3_value **apVal           /* Array of arguments */
 3230: ){
 3231:   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
 3232:   assert( nVal==1 || nVal==2 );
 3233:   if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
 3234:     const char *zArg = 0;
 3235:     if( nVal>1 ){
 3236:       zArg = (const char *)sqlite3_value_text(apVal[1]);
 3237:     }
 3238:     sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
 3239:   }
 3240: }
 3241: 
 3242: /*
 3243: ** This routine implements the xFindFunction method for the FTS3
 3244: ** virtual table.
 3245: */
 3246: static int fts3FindFunctionMethod(
 3247:   sqlite3_vtab *pVtab,            /* Virtual table handle */
 3248:   int nArg,                       /* Number of SQL function arguments */
 3249:   const char *zName,              /* Name of SQL function */
 3250:   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
 3251:   void **ppArg                    /* Unused */
 3252: ){
 3253:   struct Overloaded {
 3254:     const char *zName;
 3255:     void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
 3256:   } aOverload[] = {
 3257:     { "snippet", fts3SnippetFunc },
 3258:     { "offsets", fts3OffsetsFunc },
 3259:     { "optimize", fts3OptimizeFunc },
 3260:     { "matchinfo", fts3MatchinfoFunc },
 3261:   };
 3262:   int i;                          /* Iterator variable */
 3263: 
 3264:   UNUSED_PARAMETER(pVtab);
 3265:   UNUSED_PARAMETER(nArg);
 3266:   UNUSED_PARAMETER(ppArg);
 3267: 
 3268:   for(i=0; i<SizeofArray(aOverload); i++){
 3269:     if( strcmp(zName, aOverload[i].zName)==0 ){
 3270:       *pxFunc = aOverload[i].xFunc;
 3271:       return 1;
 3272:     }
 3273:   }
 3274: 
 3275:   /* No function of the specified name was found. Return 0. */
 3276:   return 0;
 3277: }
 3278: 
 3279: /*
 3280: ** Implementation of FTS3 xRename method. Rename an fts3 table.
 3281: */
 3282: static int fts3RenameMethod(
 3283:   sqlite3_vtab *pVtab,            /* Virtual table handle */
 3284:   const char *zName               /* New name of table */
 3285: ){
 3286:   Fts3Table *p = (Fts3Table *)pVtab;
 3287:   sqlite3 *db = p->db;            /* Database connection */
 3288:   int rc;                         /* Return Code */
 3289: 
 3290:   /* As it happens, the pending terms table is always empty here. This is
 3291:   ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction 
 3292:   ** always opens a savepoint transaction. And the xSavepoint() method 
 3293:   ** flushes the pending terms table. But leave the (no-op) call to
 3294:   ** PendingTermsFlush() in in case that changes.
 3295:   */
 3296:   assert( p->nPendingData==0 );
 3297:   rc = sqlite3Fts3PendingTermsFlush(p);
 3298: 
 3299:   if( p->zContentTbl==0 ){
 3300:     fts3DbExec(&rc, db,
 3301:       "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';",
 3302:       p->zDb, p->zName, zName
 3303:     );
 3304:   }
 3305: 
 3306:   if( p->bHasDocsize ){
 3307:     fts3DbExec(&rc, db,
 3308:       "ALTER TABLE %Q.'%q_docsize'  RENAME TO '%q_docsize';",
 3309:       p->zDb, p->zName, zName
 3310:     );
 3311:   }
 3312:   if( p->bHasStat ){
 3313:     fts3DbExec(&rc, db,
 3314:       "ALTER TABLE %Q.'%q_stat'  RENAME TO '%q_stat';",
 3315:       p->zDb, p->zName, zName
 3316:     );
 3317:   }
 3318:   fts3DbExec(&rc, db,
 3319:     "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
 3320:     p->zDb, p->zName, zName
 3321:   );
 3322:   fts3DbExec(&rc, db,
 3323:     "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';",
 3324:     p->zDb, p->zName, zName
 3325:   );
 3326:   return rc;
 3327: }
 3328: 
 3329: /*
 3330: ** The xSavepoint() method.
 3331: **
 3332: ** Flush the contents of the pending-terms table to disk.
 3333: */
 3334: static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
 3335:   UNUSED_PARAMETER(iSavepoint);
 3336:   assert( ((Fts3Table *)pVtab)->inTransaction );
 3337:   assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
 3338:   TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
 3339:   return fts3SyncMethod(pVtab);
 3340: }
 3341: 
 3342: /*
 3343: ** The xRelease() method.
 3344: **
 3345: ** This is a no-op.
 3346: */
 3347: static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
 3348:   TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
 3349:   UNUSED_PARAMETER(iSavepoint);
 3350:   UNUSED_PARAMETER(pVtab);
 3351:   assert( p->inTransaction );
 3352:   assert( p->mxSavepoint >= iSavepoint );
 3353:   TESTONLY( p->mxSavepoint = iSavepoint-1 );
 3354:   return SQLITE_OK;
 3355: }
 3356: 
 3357: /*
 3358: ** The xRollbackTo() method.
 3359: **
 3360: ** Discard the contents of the pending terms table.
 3361: */
 3362: static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
 3363:   Fts3Table *p = (Fts3Table*)pVtab;
 3364:   UNUSED_PARAMETER(iSavepoint);
 3365:   assert( p->inTransaction );
 3366:   assert( p->mxSavepoint >= iSavepoint );
 3367:   TESTONLY( p->mxSavepoint = iSavepoint );
 3368:   sqlite3Fts3PendingTermsClear(p);
 3369:   return SQLITE_OK;
 3370: }
 3371: 
 3372: static const sqlite3_module fts3Module = {
 3373:   /* iVersion      */ 2,
 3374:   /* xCreate       */ fts3CreateMethod,
 3375:   /* xConnect      */ fts3ConnectMethod,
 3376:   /* xBestIndex    */ fts3BestIndexMethod,
 3377:   /* xDisconnect   */ fts3DisconnectMethod,
 3378:   /* xDestroy      */ fts3DestroyMethod,
 3379:   /* xOpen         */ fts3OpenMethod,
 3380:   /* xClose        */ fts3CloseMethod,
 3381:   /* xFilter       */ fts3FilterMethod,
 3382:   /* xNext         */ fts3NextMethod,
 3383:   /* xEof          */ fts3EofMethod,
 3384:   /* xColumn       */ fts3ColumnMethod,
 3385:   /* xRowid        */ fts3RowidMethod,
 3386:   /* xUpdate       */ fts3UpdateMethod,
 3387:   /* xBegin        */ fts3BeginMethod,
 3388:   /* xSync         */ fts3SyncMethod,
 3389:   /* xCommit       */ fts3CommitMethod,
 3390:   /* xRollback     */ fts3RollbackMethod,
 3391:   /* xFindFunction */ fts3FindFunctionMethod,
 3392:   /* xRename */       fts3RenameMethod,
 3393:   /* xSavepoint    */ fts3SavepointMethod,
 3394:   /* xRelease      */ fts3ReleaseMethod,
 3395:   /* xRollbackTo   */ fts3RollbackToMethod,
 3396: };
 3397: 
 3398: /*
 3399: ** This function is registered as the module destructor (called when an
 3400: ** FTS3 enabled database connection is closed). It frees the memory
 3401: ** allocated for the tokenizer hash table.
 3402: */
 3403: static void hashDestroy(void *p){
 3404:   Fts3Hash *pHash = (Fts3Hash *)p;
 3405:   sqlite3Fts3HashClear(pHash);
 3406:   sqlite3_free(pHash);
 3407: }
 3408: 
 3409: /*
 3410: ** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are 
 3411: ** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
 3412: ** respectively. The following three forward declarations are for functions
 3413: ** declared in these files used to retrieve the respective implementations.
 3414: **
 3415: ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
 3416: ** to by the argument to point to the "simple" tokenizer implementation.
 3417: ** And so on.
 3418: */
 3419: void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
 3420: void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
 3421: #ifdef SQLITE_ENABLE_ICU
 3422: void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
 3423: #endif
 3424: 
 3425: /*
 3426: ** Initialise the fts3 extension. If this extension is built as part
 3427: ** of the sqlite library, then this function is called directly by
 3428: ** SQLite. If fts3 is built as a dynamically loadable extension, this
 3429: ** function is called by the sqlite3_extension_init() entry point.
 3430: */
 3431: int sqlite3Fts3Init(sqlite3 *db){
 3432:   int rc = SQLITE_OK;
 3433:   Fts3Hash *pHash = 0;
 3434:   const sqlite3_tokenizer_module *pSimple = 0;
 3435:   const sqlite3_tokenizer_module *pPorter = 0;
 3436: 
 3437: #ifdef SQLITE_ENABLE_ICU
 3438:   const sqlite3_tokenizer_module *pIcu = 0;
 3439:   sqlite3Fts3IcuTokenizerModule(&pIcu);
 3440: #endif
 3441: 
 3442: #ifdef SQLITE_TEST
 3443:   rc = sqlite3Fts3InitTerm(db);
 3444:   if( rc!=SQLITE_OK ) return rc;
 3445: #endif
 3446: 
 3447:   rc = sqlite3Fts3InitAux(db);
 3448:   if( rc!=SQLITE_OK ) return rc;
 3449: 
 3450:   sqlite3Fts3SimpleTokenizerModule(&pSimple);
 3451:   sqlite3Fts3PorterTokenizerModule(&pPorter);
 3452: 
 3453:   /* Allocate and initialise the hash-table used to store tokenizers. */
 3454:   pHash = sqlite3_malloc(sizeof(Fts3Hash));
 3455:   if( !pHash ){
 3456:     rc = SQLITE_NOMEM;
 3457:   }else{
 3458:     sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
 3459:   }
 3460: 
 3461:   /* Load the built-in tokenizers into the hash table */
 3462:   if( rc==SQLITE_OK ){
 3463:     if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
 3464:      || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 
 3465: #ifdef SQLITE_ENABLE_ICU
 3466:      || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
 3467: #endif
 3468:     ){
 3469:       rc = SQLITE_NOMEM;
 3470:     }
 3471:   }
 3472: 
 3473: #ifdef SQLITE_TEST
 3474:   if( rc==SQLITE_OK ){
 3475:     rc = sqlite3Fts3ExprInitTestInterface(db);
 3476:   }
 3477: #endif
 3478: 
 3479:   /* Create the virtual table wrapper around the hash-table and overload 
 3480:   ** the two scalar functions. If this is successful, register the
 3481:   ** module with sqlite.
 3482:   */
 3483:   if( SQLITE_OK==rc 
 3484:    && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
 3485:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
 3486:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
 3487:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
 3488:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
 3489:    && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
 3490:   ){
 3491:     rc = sqlite3_create_module_v2(
 3492:         db, "fts3", &fts3Module, (void *)pHash, hashDestroy
 3493:     );
 3494:     if( rc==SQLITE_OK ){
 3495:       rc = sqlite3_create_module_v2(
 3496:           db, "fts4", &fts3Module, (void *)pHash, 0
 3497:       );
 3498:     }
 3499:     return rc;
 3500:   }
 3501: 
 3502:   /* An error has occurred. Delete the hash table and return the error code. */
 3503:   assert( rc!=SQLITE_OK );
 3504:   if( pHash ){
 3505:     sqlite3Fts3HashClear(pHash);
 3506:     sqlite3_free(pHash);
 3507:   }
 3508:   return rc;
 3509: }
 3510: 
 3511: /*
 3512: ** Allocate an Fts3MultiSegReader for each token in the expression headed
 3513: ** by pExpr. 
 3514: **
 3515: ** An Fts3SegReader object is a cursor that can seek or scan a range of
 3516: ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
 3517: ** Fts3SegReader objects internally to provide an interface to seek or scan
 3518: ** within the union of all segments of a b-tree. Hence the name.
 3519: **
 3520: ** If the allocated Fts3MultiSegReader just seeks to a single entry in a
 3521: ** segment b-tree (if the term is not a prefix or it is a prefix for which
 3522: ** there exists prefix b-tree of the right length) then it may be traversed
 3523: ** and merged incrementally. Otherwise, it has to be merged into an in-memory 
 3524: ** doclist and then traversed.
 3525: */
 3526: static void fts3EvalAllocateReaders(
 3527:   Fts3Cursor *pCsr,               /* FTS cursor handle */
 3528:   Fts3Expr *pExpr,                /* Allocate readers for this expression */
 3529:   int *pnToken,                   /* OUT: Total number of tokens in phrase. */
 3530:   int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
 3531:   int *pRc                        /* IN/OUT: Error code */
 3532: ){
 3533:   if( pExpr && SQLITE_OK==*pRc ){
 3534:     if( pExpr->eType==FTSQUERY_PHRASE ){
 3535:       int i;
 3536:       int nToken = pExpr->pPhrase->nToken;
 3537:       *pnToken += nToken;
 3538:       for(i=0; i<nToken; i++){
 3539:         Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
 3540:         int rc = fts3TermSegReaderCursor(pCsr, 
 3541:             pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
 3542:         );
 3543:         if( rc!=SQLITE_OK ){
 3544:           *pRc = rc;
 3545:           return;
 3546:         }
 3547:       }
 3548:       assert( pExpr->pPhrase->iDoclistToken==0 );
 3549:       pExpr->pPhrase->iDoclistToken = -1;
 3550:     }else{
 3551:       *pnOr += (pExpr->eType==FTSQUERY_OR);
 3552:       fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
 3553:       fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
 3554:     }
 3555:   }
 3556: }
 3557: 
 3558: /*
 3559: ** Arguments pList/nList contain the doclist for token iToken of phrase p.
 3560: ** It is merged into the main doclist stored in p->doclist.aAll/nAll.
 3561: **
 3562: ** This function assumes that pList points to a buffer allocated using
 3563: ** sqlite3_malloc(). This function takes responsibility for eventually
 3564: ** freeing the buffer.
 3565: */
 3566: static void fts3EvalPhraseMergeToken(
 3567:   Fts3Table *pTab,                /* FTS Table pointer */
 3568:   Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
 3569:   int iToken,                     /* Token pList/nList corresponds to */
 3570:   char *pList,                    /* Pointer to doclist */
 3571:   int nList                       /* Number of bytes in pList */
 3572: ){
 3573:   assert( iToken!=p->iDoclistToken );
 3574: 
 3575:   if( pList==0 ){
 3576:     sqlite3_free(p->doclist.aAll);
 3577:     p->doclist.aAll = 0;
 3578:     p->doclist.nAll = 0;
 3579:   }
 3580: 
 3581:   else if( p->iDoclistToken<0 ){
 3582:     p->doclist.aAll = pList;
 3583:     p->doclist.nAll = nList;
 3584:   }
 3585: 
 3586:   else if( p->doclist.aAll==0 ){
 3587:     sqlite3_free(pList);
 3588:   }
 3589: 
 3590:   else {
 3591:     char *pLeft;
 3592:     char *pRight;
 3593:     int nLeft;
 3594:     int nRight;
 3595:     int nDiff;
 3596: 
 3597:     if( p->iDoclistToken<iToken ){
 3598:       pLeft = p->doclist.aAll;
 3599:       nLeft = p->doclist.nAll;
 3600:       pRight = pList;
 3601:       nRight = nList;
 3602:       nDiff = iToken - p->iDoclistToken;
 3603:     }else{
 3604:       pRight = p->doclist.aAll;
 3605:       nRight = p->doclist.nAll;
 3606:       pLeft = pList;
 3607:       nLeft = nList;
 3608:       nDiff = p->iDoclistToken - iToken;
 3609:     }
 3610: 
 3611:     fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
 3612:     sqlite3_free(pLeft);
 3613:     p->doclist.aAll = pRight;
 3614:     p->doclist.nAll = nRight;
 3615:   }
 3616: 
 3617:   if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
 3618: }
 3619: 
 3620: /*
 3621: ** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
 3622: ** does not take deferred tokens into account.
 3623: **
 3624: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 3625: */
 3626: static int fts3EvalPhraseLoad(
 3627:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 3628:   Fts3Phrase *p                   /* Phrase object */
 3629: ){
 3630:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 3631:   int iToken;
 3632:   int rc = SQLITE_OK;
 3633: 
 3634:   for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
 3635:     Fts3PhraseToken *pToken = &p->aToken[iToken];
 3636:     assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
 3637: 
 3638:     if( pToken->pSegcsr ){
 3639:       int nThis = 0;
 3640:       char *pThis = 0;
 3641:       rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
 3642:       if( rc==SQLITE_OK ){
 3643:         fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
 3644:       }
 3645:     }
 3646:     assert( pToken->pSegcsr==0 );
 3647:   }
 3648: 
 3649:   return rc;
 3650: }
 3651: 
 3652: /*
 3653: ** This function is called on each phrase after the position lists for
 3654: ** any deferred tokens have been loaded into memory. It updates the phrases
 3655: ** current position list to include only those positions that are really
 3656: ** instances of the phrase (after considering deferred tokens). If this
 3657: ** means that the phrase does not appear in the current row, doclist.pList
 3658: ** and doclist.nList are both zeroed.
 3659: **
 3660: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 3661: */
 3662: static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
 3663:   int iToken;                     /* Used to iterate through phrase tokens */
 3664:   char *aPoslist = 0;             /* Position list for deferred tokens */
 3665:   int nPoslist = 0;               /* Number of bytes in aPoslist */
 3666:   int iPrev = -1;                 /* Token number of previous deferred token */
 3667: 
 3668:   assert( pPhrase->doclist.bFreeList==0 );
 3669: 
 3670:   for(iToken=0; iToken<pPhrase->nToken; iToken++){
 3671:     Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
 3672:     Fts3DeferredToken *pDeferred = pToken->pDeferred;
 3673: 
 3674:     if( pDeferred ){
 3675:       char *pList;
 3676:       int nList;
 3677:       int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
 3678:       if( rc!=SQLITE_OK ) return rc;
 3679: 
 3680:       if( pList==0 ){
 3681:         sqlite3_free(aPoslist);
 3682:         pPhrase->doclist.pList = 0;
 3683:         pPhrase->doclist.nList = 0;
 3684:         return SQLITE_OK;
 3685: 
 3686:       }else if( aPoslist==0 ){
 3687:         aPoslist = pList;
 3688:         nPoslist = nList;
 3689: 
 3690:       }else{
 3691:         char *aOut = pList;
 3692:         char *p1 = aPoslist;
 3693:         char *p2 = aOut;
 3694: 
 3695:         assert( iPrev>=0 );
 3696:         fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
 3697:         sqlite3_free(aPoslist);
 3698:         aPoslist = pList;
 3699:         nPoslist = aOut - aPoslist;
 3700:         if( nPoslist==0 ){
 3701:           sqlite3_free(aPoslist);
 3702:           pPhrase->doclist.pList = 0;
 3703:           pPhrase->doclist.nList = 0;
 3704:           return SQLITE_OK;
 3705:         }
 3706:       }
 3707:       iPrev = iToken;
 3708:     }
 3709:   }
 3710: 
 3711:   if( iPrev>=0 ){
 3712:     int nMaxUndeferred = pPhrase->iDoclistToken;
 3713:     if( nMaxUndeferred<0 ){
 3714:       pPhrase->doclist.pList = aPoslist;
 3715:       pPhrase->doclist.nList = nPoslist;
 3716:       pPhrase->doclist.iDocid = pCsr->iPrevId;
 3717:       pPhrase->doclist.bFreeList = 1;
 3718:     }else{
 3719:       int nDistance;
 3720:       char *p1;
 3721:       char *p2;
 3722:       char *aOut;
 3723: 
 3724:       if( nMaxUndeferred>iPrev ){
 3725:         p1 = aPoslist;
 3726:         p2 = pPhrase->doclist.pList;
 3727:         nDistance = nMaxUndeferred - iPrev;
 3728:       }else{
 3729:         p1 = pPhrase->doclist.pList;
 3730:         p2 = aPoslist;
 3731:         nDistance = iPrev - nMaxUndeferred;
 3732:       }
 3733: 
 3734:       aOut = (char *)sqlite3_malloc(nPoslist+8);
 3735:       if( !aOut ){
 3736:         sqlite3_free(aPoslist);
 3737:         return SQLITE_NOMEM;
 3738:       }
 3739:       
 3740:       pPhrase->doclist.pList = aOut;
 3741:       if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
 3742:         pPhrase->doclist.bFreeList = 1;
 3743:         pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList);
 3744:       }else{
 3745:         sqlite3_free(aOut);
 3746:         pPhrase->doclist.pList = 0;
 3747:         pPhrase->doclist.nList = 0;
 3748:       }
 3749:       sqlite3_free(aPoslist);
 3750:     }
 3751:   }
 3752: 
 3753:   return SQLITE_OK;
 3754: }
 3755: 
 3756: /*
 3757: ** This function is called for each Fts3Phrase in a full-text query 
 3758: ** expression to initialize the mechanism for returning rows. Once this
 3759: ** function has been called successfully on an Fts3Phrase, it may be
 3760: ** used with fts3EvalPhraseNext() to iterate through the matching docids.
 3761: **
 3762: ** If parameter bOptOk is true, then the phrase may (or may not) use the
 3763: ** incremental loading strategy. Otherwise, the entire doclist is loaded into
 3764: ** memory within this call.
 3765: **
 3766: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 3767: */
 3768: static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
 3769:   int rc;                         /* Error code */
 3770:   Fts3PhraseToken *pFirst = &p->aToken[0];
 3771:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 3772: 
 3773:   if( pCsr->bDesc==pTab->bDescIdx 
 3774:    && bOptOk==1 
 3775:    && p->nToken==1 
 3776:    && pFirst->pSegcsr 
 3777:    && pFirst->pSegcsr->bLookup 
 3778:    && pFirst->bFirst==0
 3779:   ){
 3780:     /* Use the incremental approach. */
 3781:     int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
 3782:     rc = sqlite3Fts3MsrIncrStart(
 3783:         pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
 3784:     p->bIncr = 1;
 3785: 
 3786:   }else{
 3787:     /* Load the full doclist for the phrase into memory. */
 3788:     rc = fts3EvalPhraseLoad(pCsr, p);
 3789:     p->bIncr = 0;
 3790:   }
 3791: 
 3792:   assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
 3793:   return rc;
 3794: }
 3795: 
 3796: /*
 3797: ** This function is used to iterate backwards (from the end to start) 
 3798: ** through doclists. It is used by this module to iterate through phrase
 3799: ** doclists in reverse and by the fts3_write.c module to iterate through
 3800: ** pending-terms lists when writing to databases with "order=desc".
 3801: **
 3802: ** The doclist may be sorted in ascending (parameter bDescIdx==0) or 
 3803: ** descending (parameter bDescIdx==1) order of docid. Regardless, this
 3804: ** function iterates from the end of the doclist to the beginning.
 3805: */
 3806: void sqlite3Fts3DoclistPrev(
 3807:   int bDescIdx,                   /* True if the doclist is desc */
 3808:   char *aDoclist,                 /* Pointer to entire doclist */
 3809:   int nDoclist,                   /* Length of aDoclist in bytes */
 3810:   char **ppIter,                  /* IN/OUT: Iterator pointer */
 3811:   sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
 3812:   int *pnList,                    /* IN/OUT: List length pointer */
 3813:   u8 *pbEof                       /* OUT: End-of-file flag */
 3814: ){
 3815:   char *p = *ppIter;
 3816: 
 3817:   assert( nDoclist>0 );
 3818:   assert( *pbEof==0 );
 3819:   assert( p || *piDocid==0 );
 3820:   assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
 3821: 
 3822:   if( p==0 ){
 3823:     sqlite3_int64 iDocid = 0;
 3824:     char *pNext = 0;
 3825:     char *pDocid = aDoclist;
 3826:     char *pEnd = &aDoclist[nDoclist];
 3827:     int iMul = 1;
 3828: 
 3829:     while( pDocid<pEnd ){
 3830:       sqlite3_int64 iDelta;
 3831:       pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
 3832:       iDocid += (iMul * iDelta);
 3833:       pNext = pDocid;
 3834:       fts3PoslistCopy(0, &pDocid);
 3835:       while( pDocid<pEnd && *pDocid==0 ) pDocid++;
 3836:       iMul = (bDescIdx ? -1 : 1);
 3837:     }
 3838: 
 3839:     *pnList = pEnd - pNext;
 3840:     *ppIter = pNext;
 3841:     *piDocid = iDocid;
 3842:   }else{
 3843:     int iMul = (bDescIdx ? -1 : 1);
 3844:     sqlite3_int64 iDelta;
 3845:     fts3GetReverseVarint(&p, aDoclist, &iDelta);
 3846:     *piDocid -= (iMul * iDelta);
 3847: 
 3848:     if( p==aDoclist ){
 3849:       *pbEof = 1;
 3850:     }else{
 3851:       char *pSave = p;
 3852:       fts3ReversePoslist(aDoclist, &p);
 3853:       *pnList = (pSave - p);
 3854:     }
 3855:     *ppIter = p;
 3856:   }
 3857: }
 3858: 
 3859: /*
 3860: ** Attempt to move the phrase iterator to point to the next matching docid. 
 3861: ** If an error occurs, return an SQLite error code. Otherwise, return 
 3862: ** SQLITE_OK.
 3863: **
 3864: ** If there is no "next" entry and no error occurs, then *pbEof is set to
 3865: ** 1 before returning. Otherwise, if no error occurs and the iterator is
 3866: ** successfully advanced, *pbEof is set to 0.
 3867: */
 3868: static int fts3EvalPhraseNext(
 3869:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 3870:   Fts3Phrase *p,                  /* Phrase object to advance to next docid */
 3871:   u8 *pbEof                       /* OUT: Set to 1 if EOF */
 3872: ){
 3873:   int rc = SQLITE_OK;
 3874:   Fts3Doclist *pDL = &p->doclist;
 3875:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 3876: 
 3877:   if( p->bIncr ){
 3878:     assert( p->nToken==1 );
 3879:     assert( pDL->pNextDocid==0 );
 3880:     rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
 3881:         &pDL->iDocid, &pDL->pList, &pDL->nList
 3882:     );
 3883:     if( rc==SQLITE_OK && !pDL->pList ){
 3884:       *pbEof = 1;
 3885:     }
 3886:   }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
 3887:     sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, 
 3888:         &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
 3889:     );
 3890:     pDL->pList = pDL->pNextDocid;
 3891:   }else{
 3892:     char *pIter;                            /* Used to iterate through aAll */
 3893:     char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */
 3894:     if( pDL->pNextDocid ){
 3895:       pIter = pDL->pNextDocid;
 3896:     }else{
 3897:       pIter = pDL->aAll;
 3898:     }
 3899: 
 3900:     if( pIter>=pEnd ){
 3901:       /* We have already reached the end of this doclist. EOF. */
 3902:       *pbEof = 1;
 3903:     }else{
 3904:       sqlite3_int64 iDelta;
 3905:       pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
 3906:       if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
 3907:         pDL->iDocid += iDelta;
 3908:       }else{
 3909:         pDL->iDocid -= iDelta;
 3910:       }
 3911:       pDL->pList = pIter;
 3912:       fts3PoslistCopy(0, &pIter);
 3913:       pDL->nList = (pIter - pDL->pList);
 3914: 
 3915:       /* pIter now points just past the 0x00 that terminates the position-
 3916:       ** list for document pDL->iDocid. However, if this position-list was
 3917:       ** edited in place by fts3EvalNearTrim(), then pIter may not actually
 3918:       ** point to the start of the next docid value. The following line deals
 3919:       ** with this case by advancing pIter past the zero-padding added by
 3920:       ** fts3EvalNearTrim().  */
 3921:       while( pIter<pEnd && *pIter==0 ) pIter++;
 3922: 
 3923:       pDL->pNextDocid = pIter;
 3924:       assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
 3925:       *pbEof = 0;
 3926:     }
 3927:   }
 3928: 
 3929:   return rc;
 3930: }
 3931: 
 3932: /*
 3933: **
 3934: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
 3935: ** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
 3936: ** expression. Also the Fts3Expr.bDeferred variable is set to true for any
 3937: ** expressions for which all descendent tokens are deferred.
 3938: **
 3939: ** If parameter bOptOk is zero, then it is guaranteed that the
 3940: ** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
 3941: ** each phrase in the expression (subject to deferred token processing).
 3942: ** Or, if bOptOk is non-zero, then one or more tokens within the expression
 3943: ** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
 3944: **
 3945: ** If an error occurs within this function, *pRc is set to an SQLite error
 3946: ** code before returning.
 3947: */
 3948: static void fts3EvalStartReaders(
 3949:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 3950:   Fts3Expr *pExpr,                /* Expression to initialize phrases in */
 3951:   int bOptOk,                     /* True to enable incremental loading */
 3952:   int *pRc                        /* IN/OUT: Error code */
 3953: ){
 3954:   if( pExpr && SQLITE_OK==*pRc ){
 3955:     if( pExpr->eType==FTSQUERY_PHRASE ){
 3956:       int i;
 3957:       int nToken = pExpr->pPhrase->nToken;
 3958:       for(i=0; i<nToken; i++){
 3959:         if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
 3960:       }
 3961:       pExpr->bDeferred = (i==nToken);
 3962:       *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
 3963:     }else{
 3964:       fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
 3965:       fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
 3966:       pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
 3967:     }
 3968:   }
 3969: }
 3970: 
 3971: /*
 3972: ** An array of the following structures is assembled as part of the process
 3973: ** of selecting tokens to defer before the query starts executing (as part
 3974: ** of the xFilter() method). There is one element in the array for each
 3975: ** token in the FTS expression.
 3976: **
 3977: ** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
 3978: ** to phrases that are connected only by AND and NEAR operators (not OR or
 3979: ** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
 3980: ** separately. The root of a tokens AND/NEAR cluster is stored in 
 3981: ** Fts3TokenAndCost.pRoot.
 3982: */
 3983: typedef struct Fts3TokenAndCost Fts3TokenAndCost;
 3984: struct Fts3TokenAndCost {
 3985:   Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
 3986:   int iToken;                     /* Position of token in phrase */
 3987:   Fts3PhraseToken *pToken;        /* The token itself */
 3988:   Fts3Expr *pRoot;                /* Root of NEAR/AND cluster */
 3989:   int nOvfl;                      /* Number of overflow pages to load doclist */
 3990:   int iCol;                       /* The column the token must match */
 3991: };
 3992: 
 3993: /*
 3994: ** This function is used to populate an allocated Fts3TokenAndCost array.
 3995: **
 3996: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
 3997: ** Otherwise, if an error occurs during execution, *pRc is set to an
 3998: ** SQLite error code.
 3999: */
 4000: static void fts3EvalTokenCosts(
 4001:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 4002:   Fts3Expr *pRoot,                /* Root of current AND/NEAR cluster */
 4003:   Fts3Expr *pExpr,                /* Expression to consider */
 4004:   Fts3TokenAndCost **ppTC,        /* Write new entries to *(*ppTC)++ */
 4005:   Fts3Expr ***ppOr,               /* Write new OR root to *(*ppOr)++ */
 4006:   int *pRc                        /* IN/OUT: Error code */
 4007: ){
 4008:   if( *pRc==SQLITE_OK ){
 4009:     if( pExpr->eType==FTSQUERY_PHRASE ){
 4010:       Fts3Phrase *pPhrase = pExpr->pPhrase;
 4011:       int i;
 4012:       for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
 4013:         Fts3TokenAndCost *pTC = (*ppTC)++;
 4014:         pTC->pPhrase = pPhrase;
 4015:         pTC->iToken = i;
 4016:         pTC->pRoot = pRoot;
 4017:         pTC->pToken = &pPhrase->aToken[i];
 4018:         pTC->iCol = pPhrase->iColumn;
 4019:         *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
 4020:       }
 4021:     }else if( pExpr->eType!=FTSQUERY_NOT ){
 4022:       assert( pExpr->eType==FTSQUERY_OR
 4023:            || pExpr->eType==FTSQUERY_AND
 4024:            || pExpr->eType==FTSQUERY_NEAR
 4025:       );
 4026:       assert( pExpr->pLeft && pExpr->pRight );
 4027:       if( pExpr->eType==FTSQUERY_OR ){
 4028:         pRoot = pExpr->pLeft;
 4029:         **ppOr = pRoot;
 4030:         (*ppOr)++;
 4031:       }
 4032:       fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
 4033:       if( pExpr->eType==FTSQUERY_OR ){
 4034:         pRoot = pExpr->pRight;
 4035:         **ppOr = pRoot;
 4036:         (*ppOr)++;
 4037:       }
 4038:       fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
 4039:     }
 4040:   }
 4041: }
 4042: 
 4043: /*
 4044: ** Determine the average document (row) size in pages. If successful,
 4045: ** write this value to *pnPage and return SQLITE_OK. Otherwise, return
 4046: ** an SQLite error code.
 4047: **
 4048: ** The average document size in pages is calculated by first calculating 
 4049: ** determining the average size in bytes, B. If B is less than the amount
 4050: ** of data that will fit on a single leaf page of an intkey table in
 4051: ** this database, then the average docsize is 1. Otherwise, it is 1 plus
 4052: ** the number of overflow pages consumed by a record B bytes in size.
 4053: */
 4054: static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
 4055:   if( pCsr->nRowAvg==0 ){
 4056:     /* The average document size, which is required to calculate the cost
 4057:     ** of each doclist, has not yet been determined. Read the required 
 4058:     ** data from the %_stat table to calculate it.
 4059:     **
 4060:     ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
 4061:     ** varints, where nCol is the number of columns in the FTS3 table.
 4062:     ** The first varint is the number of documents currently stored in
 4063:     ** the table. The following nCol varints contain the total amount of
 4064:     ** data stored in all rows of each column of the table, from left
 4065:     ** to right.
 4066:     */
 4067:     int rc;
 4068:     Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
 4069:     sqlite3_stmt *pStmt;
 4070:     sqlite3_int64 nDoc = 0;
 4071:     sqlite3_int64 nByte = 0;
 4072:     const char *pEnd;
 4073:     const char *a;
 4074: 
 4075:     rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
 4076:     if( rc!=SQLITE_OK ) return rc;
 4077:     a = sqlite3_column_blob(pStmt, 0);
 4078:     assert( a );
 4079: 
 4080:     pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
 4081:     a += sqlite3Fts3GetVarint(a, &nDoc);
 4082:     while( a<pEnd ){
 4083:       a += sqlite3Fts3GetVarint(a, &nByte);
 4084:     }
 4085:     if( nDoc==0 || nByte==0 ){
 4086:       sqlite3_reset(pStmt);
 4087:       return FTS_CORRUPT_VTAB;
 4088:     }
 4089: 
 4090:     pCsr->nDoc = nDoc;
 4091:     pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
 4092:     assert( pCsr->nRowAvg>0 ); 
 4093:     rc = sqlite3_reset(pStmt);
 4094:     if( rc!=SQLITE_OK ) return rc;
 4095:   }
 4096: 
 4097:   *pnPage = pCsr->nRowAvg;
 4098:   return SQLITE_OK;
 4099: }
 4100: 
 4101: /*
 4102: ** This function is called to select the tokens (if any) that will be 
 4103: ** deferred. The array aTC[] has already been populated when this is
 4104: ** called.
 4105: **
 4106: ** This function is called once for each AND/NEAR cluster in the 
 4107: ** expression. Each invocation determines which tokens to defer within
 4108: ** the cluster with root node pRoot. See comments above the definition
 4109: ** of struct Fts3TokenAndCost for more details.
 4110: **
 4111: ** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
 4112: ** called on each token to defer. Otherwise, an SQLite error code is
 4113: ** returned.
 4114: */
 4115: static int fts3EvalSelectDeferred(
 4116:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 4117:   Fts3Expr *pRoot,                /* Consider tokens with this root node */
 4118:   Fts3TokenAndCost *aTC,          /* Array of expression tokens and costs */
 4119:   int nTC                         /* Number of entries in aTC[] */
 4120: ){
 4121:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 4122:   int nDocSize = 0;               /* Number of pages per doc loaded */
 4123:   int rc = SQLITE_OK;             /* Return code */
 4124:   int ii;                         /* Iterator variable for various purposes */
 4125:   int nOvfl = 0;                  /* Total overflow pages used by doclists */
 4126:   int nToken = 0;                 /* Total number of tokens in cluster */
 4127: 
 4128:   int nMinEst = 0;                /* The minimum count for any phrase so far. */
 4129:   int nLoad4 = 1;                 /* (Phrases that will be loaded)^4. */
 4130: 
 4131:   /* Tokens are never deferred for FTS tables created using the content=xxx
 4132:   ** option. The reason being that it is not guaranteed that the content
 4133:   ** table actually contains the same data as the index. To prevent this from
 4134:   ** causing any problems, the deferred token optimization is completely
 4135:   ** disabled for content=xxx tables. */
 4136:   if( pTab->zContentTbl ){
 4137:     return SQLITE_OK;
 4138:   }
 4139: 
 4140:   /* Count the tokens in this AND/NEAR cluster. If none of the doclists
 4141:   ** associated with the tokens spill onto overflow pages, or if there is
 4142:   ** only 1 token, exit early. No tokens to defer in this case. */
 4143:   for(ii=0; ii<nTC; ii++){
 4144:     if( aTC[ii].pRoot==pRoot ){
 4145:       nOvfl += aTC[ii].nOvfl;
 4146:       nToken++;
 4147:     }
 4148:   }
 4149:   if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
 4150: 
 4151:   /* Obtain the average docsize (in pages). */
 4152:   rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
 4153:   assert( rc!=SQLITE_OK || nDocSize>0 );
 4154: 
 4155: 
 4156:   /* Iterate through all tokens in this AND/NEAR cluster, in ascending order 
 4157:   ** of the number of overflow pages that will be loaded by the pager layer 
 4158:   ** to retrieve the entire doclist for the token from the full-text index.
 4159:   ** Load the doclists for tokens that are either:
 4160:   **
 4161:   **   a. The cheapest token in the entire query (i.e. the one visited by the
 4162:   **      first iteration of this loop), or
 4163:   **
 4164:   **   b. Part of a multi-token phrase.
 4165:   **
 4166:   ** After each token doclist is loaded, merge it with the others from the
 4167:   ** same phrase and count the number of documents that the merged doclist
 4168:   ** contains. Set variable "nMinEst" to the smallest number of documents in 
 4169:   ** any phrase doclist for which 1 or more token doclists have been loaded.
 4170:   ** Let nOther be the number of other phrases for which it is certain that
 4171:   ** one or more tokens will not be deferred.
 4172:   **
 4173:   ** Then, for each token, defer it if loading the doclist would result in
 4174:   ** loading N or more overflow pages into memory, where N is computed as:
 4175:   **
 4176:   **    (nMinEst + 4^nOther - 1) / (4^nOther)
 4177:   */
 4178:   for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
 4179:     int iTC;                      /* Used to iterate through aTC[] array. */
 4180:     Fts3TokenAndCost *pTC = 0;    /* Set to cheapest remaining token. */
 4181: 
 4182:     /* Set pTC to point to the cheapest remaining token. */
 4183:     for(iTC=0; iTC<nTC; iTC++){
 4184:       if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot 
 4185:        && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) 
 4186:       ){
 4187:         pTC = &aTC[iTC];
 4188:       }
 4189:     }
 4190:     assert( pTC );
 4191: 
 4192:     if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
 4193:       /* The number of overflow pages to load for this (and therefore all
 4194:       ** subsequent) tokens is greater than the estimated number of pages 
 4195:       ** that will be loaded if all subsequent tokens are deferred.
 4196:       */
 4197:       Fts3PhraseToken *pToken = pTC->pToken;
 4198:       rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
 4199:       fts3SegReaderCursorFree(pToken->pSegcsr);
 4200:       pToken->pSegcsr = 0;
 4201:     }else{
 4202:       /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
 4203:       ** for-loop. Except, limit the value to 2^24 to prevent it from 
 4204:       ** overflowing the 32-bit integer it is stored in. */
 4205:       if( ii<12 ) nLoad4 = nLoad4*4;
 4206: 
 4207:       if( ii==0 || pTC->pPhrase->nToken>1 ){
 4208:         /* Either this is the cheapest token in the entire query, or it is
 4209:         ** part of a multi-token phrase. Either way, the entire doclist will
 4210:         ** (eventually) be loaded into memory. It may as well be now. */
 4211:         Fts3PhraseToken *pToken = pTC->pToken;
 4212:         int nList = 0;
 4213:         char *pList = 0;
 4214:         rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
 4215:         assert( rc==SQLITE_OK || pList==0 );
 4216:         if( rc==SQLITE_OK ){
 4217:           int nCount;
 4218:           fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
 4219:           nCount = fts3DoclistCountDocids(
 4220:               pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
 4221:           );
 4222:           if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
 4223:         }
 4224:       }
 4225:     }
 4226:     pTC->pToken = 0;
 4227:   }
 4228: 
 4229:   return rc;
 4230: }
 4231: 
 4232: /*
 4233: ** This function is called from within the xFilter method. It initializes
 4234: ** the full-text query currently stored in pCsr->pExpr. To iterate through
 4235: ** the results of a query, the caller does:
 4236: **
 4237: **    fts3EvalStart(pCsr);
 4238: **    while( 1 ){
 4239: **      fts3EvalNext(pCsr);
 4240: **      if( pCsr->bEof ) break;
 4241: **      ... return row pCsr->iPrevId to the caller ...
 4242: **    }
 4243: */
 4244: static int fts3EvalStart(Fts3Cursor *pCsr){
 4245:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 4246:   int rc = SQLITE_OK;
 4247:   int nToken = 0;
 4248:   int nOr = 0;
 4249: 
 4250:   /* Allocate a MultiSegReader for each token in the expression. */
 4251:   fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
 4252: 
 4253:   /* Determine which, if any, tokens in the expression should be deferred. */
 4254:   if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
 4255:     Fts3TokenAndCost *aTC;
 4256:     Fts3Expr **apOr;
 4257:     aTC = (Fts3TokenAndCost *)sqlite3_malloc(
 4258:         sizeof(Fts3TokenAndCost) * nToken
 4259:       + sizeof(Fts3Expr *) * nOr * 2
 4260:     );
 4261:     apOr = (Fts3Expr **)&aTC[nToken];
 4262: 
 4263:     if( !aTC ){
 4264:       rc = SQLITE_NOMEM;
 4265:     }else{
 4266:       int ii;
 4267:       Fts3TokenAndCost *pTC = aTC;
 4268:       Fts3Expr **ppOr = apOr;
 4269: 
 4270:       fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
 4271:       nToken = pTC-aTC;
 4272:       nOr = ppOr-apOr;
 4273: 
 4274:       if( rc==SQLITE_OK ){
 4275:         rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
 4276:         for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
 4277:           rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
 4278:         }
 4279:       }
 4280: 
 4281:       sqlite3_free(aTC);
 4282:     }
 4283:   }
 4284: 
 4285:   fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
 4286:   return rc;
 4287: }
 4288: 
 4289: /*
 4290: ** Invalidate the current position list for phrase pPhrase.
 4291: */
 4292: static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
 4293:   if( pPhrase->doclist.bFreeList ){
 4294:     sqlite3_free(pPhrase->doclist.pList);
 4295:   }
 4296:   pPhrase->doclist.pList = 0;
 4297:   pPhrase->doclist.nList = 0;
 4298:   pPhrase->doclist.bFreeList = 0;
 4299: }
 4300: 
 4301: /*
 4302: ** This function is called to edit the position list associated with
 4303: ** the phrase object passed as the fifth argument according to a NEAR
 4304: ** condition. For example:
 4305: **
 4306: **     abc NEAR/5 "def ghi"
 4307: **
 4308: ** Parameter nNear is passed the NEAR distance of the expression (5 in
 4309: ** the example above). When this function is called, *paPoslist points to
 4310: ** the position list, and *pnToken is the number of phrase tokens in, the
 4311: ** phrase on the other side of the NEAR operator to pPhrase. For example,
 4312: ** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
 4313: ** the position list associated with phrase "abc".
 4314: **
 4315: ** All positions in the pPhrase position list that are not sufficiently
 4316: ** close to a position in the *paPoslist position list are removed. If this
 4317: ** leaves 0 positions, zero is returned. Otherwise, non-zero.
 4318: **
 4319: ** Before returning, *paPoslist is set to point to the position lsit 
 4320: ** associated with pPhrase. And *pnToken is set to the number of tokens in
 4321: ** pPhrase.
 4322: */
 4323: static int fts3EvalNearTrim(
 4324:   int nNear,                      /* NEAR distance. As in "NEAR/nNear". */
 4325:   char *aTmp,                     /* Temporary space to use */
 4326:   char **paPoslist,               /* IN/OUT: Position list */
 4327:   int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
 4328:   Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
 4329: ){
 4330:   int nParam1 = nNear + pPhrase->nToken;
 4331:   int nParam2 = nNear + *pnToken;
 4332:   int nNew;
 4333:   char *p2; 
 4334:   char *pOut; 
 4335:   int res;
 4336: 
 4337:   assert( pPhrase->doclist.pList );
 4338: 
 4339:   p2 = pOut = pPhrase->doclist.pList;
 4340:   res = fts3PoslistNearMerge(
 4341:     &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
 4342:   );
 4343:   if( res ){
 4344:     nNew = (pOut - pPhrase->doclist.pList) - 1;
 4345:     assert( pPhrase->doclist.pList[nNew]=='\0' );
 4346:     assert( nNew<=pPhrase->doclist.nList && nNew>0 );
 4347:     memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
 4348:     pPhrase->doclist.nList = nNew;
 4349:     *paPoslist = pPhrase->doclist.pList;
 4350:     *pnToken = pPhrase->nToken;
 4351:   }
 4352: 
 4353:   return res;
 4354: }
 4355: 
 4356: /*
 4357: ** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
 4358: ** Otherwise, it advances the expression passed as the second argument to
 4359: ** point to the next matching row in the database. Expressions iterate through
 4360: ** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
 4361: ** or descending if it is non-zero.
 4362: **
 4363: ** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
 4364: ** successful, the following variables in pExpr are set:
 4365: **
 4366: **   Fts3Expr.bEof                (non-zero if EOF - there is no next row)
 4367: **   Fts3Expr.iDocid              (valid if bEof==0. The docid of the next row)
 4368: **
 4369: ** If the expression is of type FTSQUERY_PHRASE, and the expression is not
 4370: ** at EOF, then the following variables are populated with the position list
 4371: ** for the phrase for the visited row:
 4372: **
 4373: **   FTs3Expr.pPhrase->doclist.nList        (length of pList in bytes)
 4374: **   FTs3Expr.pPhrase->doclist.pList        (pointer to position list)
 4375: **
 4376: ** It says above that this function advances the expression to the next
 4377: ** matching row. This is usually true, but there are the following exceptions:
 4378: **
 4379: **   1. Deferred tokens are not taken into account. If a phrase consists
 4380: **      entirely of deferred tokens, it is assumed to match every row in
 4381: **      the db. In this case the position-list is not populated at all. 
 4382: **
 4383: **      Or, if a phrase contains one or more deferred tokens and one or
 4384: **      more non-deferred tokens, then the expression is advanced to the 
 4385: **      next possible match, considering only non-deferred tokens. In other
 4386: **      words, if the phrase is "A B C", and "B" is deferred, the expression
 4387: **      is advanced to the next row that contains an instance of "A * C", 
 4388: **      where "*" may match any single token. The position list in this case
 4389: **      is populated as for "A * C" before returning.
 4390: **
 4391: **   2. NEAR is treated as AND. If the expression is "x NEAR y", it is 
 4392: **      advanced to point to the next row that matches "x AND y".
 4393: ** 
 4394: ** See fts3EvalTestDeferredAndNear() for details on testing if a row is
 4395: ** really a match, taking into account deferred tokens and NEAR operators.
 4396: */
 4397: static void fts3EvalNextRow(
 4398:   Fts3Cursor *pCsr,               /* FTS Cursor handle */
 4399:   Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
 4400:   int *pRc                        /* IN/OUT: Error code */
 4401: ){
 4402:   if( *pRc==SQLITE_OK ){
 4403:     int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
 4404:     assert( pExpr->bEof==0 );
 4405:     pExpr->bStart = 1;
 4406: 
 4407:     switch( pExpr->eType ){
 4408:       case FTSQUERY_NEAR:
 4409:       case FTSQUERY_AND: {
 4410:         Fts3Expr *pLeft = pExpr->pLeft;
 4411:         Fts3Expr *pRight = pExpr->pRight;
 4412:         assert( !pLeft->bDeferred || !pRight->bDeferred );
 4413: 
 4414:         if( pLeft->bDeferred ){
 4415:           /* LHS is entirely deferred. So we assume it matches every row.
 4416:           ** Advance the RHS iterator to find the next row visited. */
 4417:           fts3EvalNextRow(pCsr, pRight, pRc);
 4418:           pExpr->iDocid = pRight->iDocid;
 4419:           pExpr->bEof = pRight->bEof;
 4420:         }else if( pRight->bDeferred ){
 4421:           /* RHS is entirely deferred. So we assume it matches every row.
 4422:           ** Advance the LHS iterator to find the next row visited. */
 4423:           fts3EvalNextRow(pCsr, pLeft, pRc);
 4424:           pExpr->iDocid = pLeft->iDocid;
 4425:           pExpr->bEof = pLeft->bEof;
 4426:         }else{
 4427:           /* Neither the RHS or LHS are deferred. */
 4428:           fts3EvalNextRow(pCsr, pLeft, pRc);
 4429:           fts3EvalNextRow(pCsr, pRight, pRc);
 4430:           while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
 4431:             sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
 4432:             if( iDiff==0 ) break;
 4433:             if( iDiff<0 ){
 4434:               fts3EvalNextRow(pCsr, pLeft, pRc);
 4435:             }else{
 4436:               fts3EvalNextRow(pCsr, pRight, pRc);
 4437:             }
 4438:           }
 4439:           pExpr->iDocid = pLeft->iDocid;
 4440:           pExpr->bEof = (pLeft->bEof || pRight->bEof);
 4441:         }
 4442:         break;
 4443:       }
 4444:   
 4445:       case FTSQUERY_OR: {
 4446:         Fts3Expr *pLeft = pExpr->pLeft;
 4447:         Fts3Expr *pRight = pExpr->pRight;
 4448:         sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
 4449: 
 4450:         assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
 4451:         assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
 4452: 
 4453:         if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
 4454:           fts3EvalNextRow(pCsr, pLeft, pRc);
 4455:         }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
 4456:           fts3EvalNextRow(pCsr, pRight, pRc);
 4457:         }else{
 4458:           fts3EvalNextRow(pCsr, pLeft, pRc);
 4459:           fts3EvalNextRow(pCsr, pRight, pRc);
 4460:         }
 4461: 
 4462:         pExpr->bEof = (pLeft->bEof && pRight->bEof);
 4463:         iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
 4464:         if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
 4465:           pExpr->iDocid = pLeft->iDocid;
 4466:         }else{
 4467:           pExpr->iDocid = pRight->iDocid;
 4468:         }
 4469: 
 4470:         break;
 4471:       }
 4472: 
 4473:       case FTSQUERY_NOT: {
 4474:         Fts3Expr *pLeft = pExpr->pLeft;
 4475:         Fts3Expr *pRight = pExpr->pRight;
 4476: 
 4477:         if( pRight->bStart==0 ){
 4478:           fts3EvalNextRow(pCsr, pRight, pRc);
 4479:           assert( *pRc!=SQLITE_OK || pRight->bStart );
 4480:         }
 4481: 
 4482:         fts3EvalNextRow(pCsr, pLeft, pRc);
 4483:         if( pLeft->bEof==0 ){
 4484:           while( !*pRc 
 4485:               && !pRight->bEof 
 4486:               && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
 4487:           ){
 4488:             fts3EvalNextRow(pCsr, pRight, pRc);
 4489:           }
 4490:         }
 4491:         pExpr->iDocid = pLeft->iDocid;
 4492:         pExpr->bEof = pLeft->bEof;
 4493:         break;
 4494:       }
 4495: 
 4496:       default: {
 4497:         Fts3Phrase *pPhrase = pExpr->pPhrase;
 4498:         fts3EvalInvalidatePoslist(pPhrase);
 4499:         *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
 4500:         pExpr->iDocid = pPhrase->doclist.iDocid;
 4501:         break;
 4502:       }
 4503:     }
 4504:   }
 4505: }
 4506: 
 4507: /*
 4508: ** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
 4509: ** cluster, then this function returns 1 immediately.
 4510: **
 4511: ** Otherwise, it checks if the current row really does match the NEAR 
 4512: ** expression, using the data currently stored in the position lists 
 4513: ** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. 
 4514: **
 4515: ** If the current row is a match, the position list associated with each
 4516: ** phrase in the NEAR expression is edited in place to contain only those
 4517: ** phrase instances sufficiently close to their peers to satisfy all NEAR
 4518: ** constraints. In this case it returns 1. If the NEAR expression does not 
 4519: ** match the current row, 0 is returned. The position lists may or may not
 4520: ** be edited if 0 is returned.
 4521: */
 4522: static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
 4523:   int res = 1;
 4524: 
 4525:   /* The following block runs if pExpr is the root of a NEAR query.
 4526:   ** For example, the query:
 4527:   **
 4528:   **         "w" NEAR "x" NEAR "y" NEAR "z"
 4529:   **
 4530:   ** which is represented in tree form as:
 4531:   **
 4532:   **                               |
 4533:   **                          +--NEAR--+      <-- root of NEAR query
 4534:   **                          |        |
 4535:   **                     +--NEAR--+   "z"
 4536:   **                     |        |
 4537:   **                +--NEAR--+   "y"
 4538:   **                |        |
 4539:   **               "w"      "x"
 4540:   **
 4541:   ** The right-hand child of a NEAR node is always a phrase. The 
 4542:   ** left-hand child may be either a phrase or a NEAR node. There are
 4543:   ** no exceptions to this - it's the way the parser in fts3_expr.c works.
 4544:   */
 4545:   if( *pRc==SQLITE_OK 
 4546:    && pExpr->eType==FTSQUERY_NEAR 
 4547:    && pExpr->bEof==0
 4548:    && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
 4549:   ){
 4550:     Fts3Expr *p; 
 4551:     int nTmp = 0;                 /* Bytes of temp space */
 4552:     char *aTmp;                   /* Temp space for PoslistNearMerge() */
 4553: 
 4554:     /* Allocate temporary working space. */
 4555:     for(p=pExpr; p->pLeft; p=p->pLeft){
 4556:       nTmp += p->pRight->pPhrase->doclist.nList;
 4557:     }
 4558:     nTmp += p->pPhrase->doclist.nList;
 4559:     aTmp = sqlite3_malloc(nTmp*2);
 4560:     if( !aTmp ){
 4561:       *pRc = SQLITE_NOMEM;
 4562:       res = 0;
 4563:     }else{
 4564:       char *aPoslist = p->pPhrase->doclist.pList;
 4565:       int nToken = p->pPhrase->nToken;
 4566: 
 4567:       for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
 4568:         Fts3Phrase *pPhrase = p->pRight->pPhrase;
 4569:         int nNear = p->nNear;
 4570:         res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
 4571:       }
 4572:   
 4573:       aPoslist = pExpr->pRight->pPhrase->doclist.pList;
 4574:       nToken = pExpr->pRight->pPhrase->nToken;
 4575:       for(p=pExpr->pLeft; p && res; p=p->pLeft){
 4576:         int nNear;
 4577:         Fts3Phrase *pPhrase;
 4578:         assert( p->pParent && p->pParent->pLeft==p );
 4579:         nNear = p->pParent->nNear;
 4580:         pPhrase = (
 4581:             p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
 4582:         );
 4583:         res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
 4584:       }
 4585:     }
 4586: 
 4587:     sqlite3_free(aTmp);
 4588:   }
 4589: 
 4590:   return res;
 4591: }
 4592: 
 4593: /*
 4594: ** This function is a helper function for fts3EvalTestDeferredAndNear().
 4595: ** Assuming no error occurs or has occurred, It returns non-zero if the
 4596: ** expression passed as the second argument matches the row that pCsr 
 4597: ** currently points to, or zero if it does not.
 4598: **
 4599: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
 4600: ** If an error occurs during execution of this function, *pRc is set to 
 4601: ** the appropriate SQLite error code. In this case the returned value is 
 4602: ** undefined.
 4603: */
 4604: static int fts3EvalTestExpr(
 4605:   Fts3Cursor *pCsr,               /* FTS cursor handle */
 4606:   Fts3Expr *pExpr,                /* Expr to test. May or may not be root. */
 4607:   int *pRc                        /* IN/OUT: Error code */
 4608: ){
 4609:   int bHit = 1;                   /* Return value */
 4610:   if( *pRc==SQLITE_OK ){
 4611:     switch( pExpr->eType ){
 4612:       case FTSQUERY_NEAR:
 4613:       case FTSQUERY_AND:
 4614:         bHit = (
 4615:             fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
 4616:          && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
 4617:          && fts3EvalNearTest(pExpr, pRc)
 4618:         );
 4619: 
 4620:         /* If the NEAR expression does not match any rows, zero the doclist for 
 4621:         ** all phrases involved in the NEAR. This is because the snippet(),
 4622:         ** offsets() and matchinfo() functions are not supposed to recognize 
 4623:         ** any instances of phrases that are part of unmatched NEAR queries. 
 4624:         ** For example if this expression:
 4625:         **
 4626:         **    ... MATCH 'a OR (b NEAR c)'
 4627:         **
 4628:         ** is matched against a row containing:
 4629:         **
 4630:         **        'a b d e'
 4631:         **
 4632:         ** then any snippet() should ony highlight the "a" term, not the "b"
 4633:         ** (as "b" is part of a non-matching NEAR clause).
 4634:         */
 4635:         if( bHit==0 
 4636:          && pExpr->eType==FTSQUERY_NEAR 
 4637:          && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
 4638:         ){
 4639:           Fts3Expr *p;
 4640:           for(p=pExpr; p->pPhrase==0; p=p->pLeft){
 4641:             if( p->pRight->iDocid==pCsr->iPrevId ){
 4642:               fts3EvalInvalidatePoslist(p->pRight->pPhrase);
 4643:             }
 4644:           }
 4645:           if( p->iDocid==pCsr->iPrevId ){
 4646:             fts3EvalInvalidatePoslist(p->pPhrase);
 4647:           }
 4648:         }
 4649: 
 4650:         break;
 4651: 
 4652:       case FTSQUERY_OR: {
 4653:         int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
 4654:         int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
 4655:         bHit = bHit1 || bHit2;
 4656:         break;
 4657:       }
 4658: 
 4659:       case FTSQUERY_NOT:
 4660:         bHit = (
 4661:             fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
 4662:          && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
 4663:         );
 4664:         break;
 4665: 
 4666:       default: {
 4667:         if( pCsr->pDeferred 
 4668:          && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
 4669:         ){
 4670:           Fts3Phrase *pPhrase = pExpr->pPhrase;
 4671:           assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
 4672:           if( pExpr->bDeferred ){
 4673:             fts3EvalInvalidatePoslist(pPhrase);
 4674:           }
 4675:           *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
 4676:           bHit = (pPhrase->doclist.pList!=0);
 4677:           pExpr->iDocid = pCsr->iPrevId;
 4678:         }else{
 4679:           bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
 4680:         }
 4681:         break;
 4682:       }
 4683:     }
 4684:   }
 4685:   return bHit;
 4686: }
 4687: 
 4688: /*
 4689: ** This function is called as the second part of each xNext operation when
 4690: ** iterating through the results of a full-text query. At this point the
 4691: ** cursor points to a row that matches the query expression, with the
 4692: ** following caveats:
 4693: **
 4694: **   * Up until this point, "NEAR" operators in the expression have been
 4695: **     treated as "AND".
 4696: **
 4697: **   * Deferred tokens have not yet been considered.
 4698: **
 4699: ** If *pRc is not SQLITE_OK when this function is called, it immediately
 4700: ** returns 0. Otherwise, it tests whether or not after considering NEAR
 4701: ** operators and deferred tokens the current row is still a match for the
 4702: ** expression. It returns 1 if both of the following are true:
 4703: **
 4704: **   1. *pRc is SQLITE_OK when this function returns, and
 4705: **
 4706: **   2. After scanning the current FTS table row for the deferred tokens,
 4707: **      it is determined that the row does *not* match the query.
 4708: **
 4709: ** Or, if no error occurs and it seems the current row does match the FTS
 4710: ** query, return 0.
 4711: */
 4712: static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
 4713:   int rc = *pRc;
 4714:   int bMiss = 0;
 4715:   if( rc==SQLITE_OK ){
 4716: 
 4717:     /* If there are one or more deferred tokens, load the current row into
 4718:     ** memory and scan it to determine the position list for each deferred
 4719:     ** token. Then, see if this row is really a match, considering deferred
 4720:     ** tokens and NEAR operators (neither of which were taken into account
 4721:     ** earlier, by fts3EvalNextRow()). 
 4722:     */
 4723:     if( pCsr->pDeferred ){
 4724:       rc = fts3CursorSeek(0, pCsr);
 4725:       if( rc==SQLITE_OK ){
 4726:         rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
 4727:       }
 4728:     }
 4729:     bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
 4730: 
 4731:     /* Free the position-lists accumulated for each deferred token above. */
 4732:     sqlite3Fts3FreeDeferredDoclists(pCsr);
 4733:     *pRc = rc;
 4734:   }
 4735:   return (rc==SQLITE_OK && bMiss);
 4736: }
 4737: 
 4738: /*
 4739: ** Advance to the next document that matches the FTS expression in
 4740: ** Fts3Cursor.pExpr.
 4741: */
 4742: static int fts3EvalNext(Fts3Cursor *pCsr){
 4743:   int rc = SQLITE_OK;             /* Return Code */
 4744:   Fts3Expr *pExpr = pCsr->pExpr;
 4745:   assert( pCsr->isEof==0 );
 4746:   if( pExpr==0 ){
 4747:     pCsr->isEof = 1;
 4748:   }else{
 4749:     do {
 4750:       if( pCsr->isRequireSeek==0 ){
 4751:         sqlite3_reset(pCsr->pStmt);
 4752:       }
 4753:       assert( sqlite3_data_count(pCsr->pStmt)==0 );
 4754:       fts3EvalNextRow(pCsr, pExpr, &rc);
 4755:       pCsr->isEof = pExpr->bEof;
 4756:       pCsr->isRequireSeek = 1;
 4757:       pCsr->isMatchinfoNeeded = 1;
 4758:       pCsr->iPrevId = pExpr->iDocid;
 4759:     }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
 4760:   }
 4761:   return rc;
 4762: }
 4763: 
 4764: /*
 4765: ** Restart interation for expression pExpr so that the next call to
 4766: ** fts3EvalNext() visits the first row. Do not allow incremental 
 4767: ** loading or merging of phrase doclists for this iteration.
 4768: **
 4769: ** If *pRc is other than SQLITE_OK when this function is called, it is
 4770: ** a no-op. If an error occurs within this function, *pRc is set to an
 4771: ** SQLite error code before returning.
 4772: */
 4773: static void fts3EvalRestart(
 4774:   Fts3Cursor *pCsr,
 4775:   Fts3Expr *pExpr,
 4776:   int *pRc
 4777: ){
 4778:   if( pExpr && *pRc==SQLITE_OK ){
 4779:     Fts3Phrase *pPhrase = pExpr->pPhrase;
 4780: 
 4781:     if( pPhrase ){
 4782:       fts3EvalInvalidatePoslist(pPhrase);
 4783:       if( pPhrase->bIncr ){
 4784:         assert( pPhrase->nToken==1 );
 4785:         assert( pPhrase->aToken[0].pSegcsr );
 4786:         sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
 4787:         *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
 4788:       }
 4789: 
 4790:       pPhrase->doclist.pNextDocid = 0;
 4791:       pPhrase->doclist.iDocid = 0;
 4792:     }
 4793: 
 4794:     pExpr->iDocid = 0;
 4795:     pExpr->bEof = 0;
 4796:     pExpr->bStart = 0;
 4797: 
 4798:     fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
 4799:     fts3EvalRestart(pCsr, pExpr->pRight, pRc);
 4800:   }
 4801: }
 4802: 
 4803: /*
 4804: ** After allocating the Fts3Expr.aMI[] array for each phrase in the 
 4805: ** expression rooted at pExpr, the cursor iterates through all rows matched
 4806: ** by pExpr, calling this function for each row. This function increments
 4807: ** the values in Fts3Expr.aMI[] according to the position-list currently
 4808: ** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase 
 4809: ** expression nodes.
 4810: */
 4811: static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
 4812:   if( pExpr ){
 4813:     Fts3Phrase *pPhrase = pExpr->pPhrase;
 4814:     if( pPhrase && pPhrase->doclist.pList ){
 4815:       int iCol = 0;
 4816:       char *p = pPhrase->doclist.pList;
 4817: 
 4818:       assert( *p );
 4819:       while( 1 ){
 4820:         u8 c = 0;
 4821:         int iCnt = 0;
 4822:         while( 0xFE & (*p | c) ){
 4823:           if( (c&0x80)==0 ) iCnt++;
 4824:           c = *p++ & 0x80;
 4825:         }
 4826: 
 4827:         /* aMI[iCol*3 + 1] = Number of occurrences
 4828:         ** aMI[iCol*3 + 2] = Number of rows containing at least one instance
 4829:         */
 4830:         pExpr->aMI[iCol*3 + 1] += iCnt;
 4831:         pExpr->aMI[iCol*3 + 2] += (iCnt>0);
 4832:         if( *p==0x00 ) break;
 4833:         p++;
 4834:         p += sqlite3Fts3GetVarint32(p, &iCol);
 4835:       }
 4836:     }
 4837: 
 4838:     fts3EvalUpdateCounts(pExpr->pLeft);
 4839:     fts3EvalUpdateCounts(pExpr->pRight);
 4840:   }
 4841: }
 4842: 
 4843: /*
 4844: ** Expression pExpr must be of type FTSQUERY_PHRASE.
 4845: **
 4846: ** If it is not already allocated and populated, this function allocates and
 4847: ** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
 4848: ** of a NEAR expression, then it also allocates and populates the same array
 4849: ** for all other phrases that are part of the NEAR expression.
 4850: **
 4851: ** SQLITE_OK is returned if the aMI[] array is successfully allocated and
 4852: ** populated. Otherwise, if an error occurs, an SQLite error code is returned.
 4853: */
 4854: static int fts3EvalGatherStats(
 4855:   Fts3Cursor *pCsr,               /* Cursor object */
 4856:   Fts3Expr *pExpr                 /* FTSQUERY_PHRASE expression */
 4857: ){
 4858:   int rc = SQLITE_OK;             /* Return code */
 4859: 
 4860:   assert( pExpr->eType==FTSQUERY_PHRASE );
 4861:   if( pExpr->aMI==0 ){
 4862:     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 4863:     Fts3Expr *pRoot;                /* Root of NEAR expression */
 4864:     Fts3Expr *p;                    /* Iterator used for several purposes */
 4865: 
 4866:     sqlite3_int64 iPrevId = pCsr->iPrevId;
 4867:     sqlite3_int64 iDocid;
 4868:     u8 bEof;
 4869: 
 4870:     /* Find the root of the NEAR expression */
 4871:     pRoot = pExpr;
 4872:     while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
 4873:       pRoot = pRoot->pParent;
 4874:     }
 4875:     iDocid = pRoot->iDocid;
 4876:     bEof = pRoot->bEof;
 4877:     assert( pRoot->bStart );
 4878: 
 4879:     /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
 4880:     for(p=pRoot; p; p=p->pLeft){
 4881:       Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
 4882:       assert( pE->aMI==0 );
 4883:       pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
 4884:       if( !pE->aMI ) return SQLITE_NOMEM;
 4885:       memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
 4886:     }
 4887: 
 4888:     fts3EvalRestart(pCsr, pRoot, &rc);
 4889: 
 4890:     while( pCsr->isEof==0 && rc==SQLITE_OK ){
 4891: 
 4892:       do {
 4893:         /* Ensure the %_content statement is reset. */
 4894:         if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
 4895:         assert( sqlite3_data_count(pCsr->pStmt)==0 );
 4896: 
 4897:         /* Advance to the next document */
 4898:         fts3EvalNextRow(pCsr, pRoot, &rc);
 4899:         pCsr->isEof = pRoot->bEof;
 4900:         pCsr->isRequireSeek = 1;
 4901:         pCsr->isMatchinfoNeeded = 1;
 4902:         pCsr->iPrevId = pRoot->iDocid;
 4903:       }while( pCsr->isEof==0 
 4904:            && pRoot->eType==FTSQUERY_NEAR 
 4905:            && fts3EvalTestDeferredAndNear(pCsr, &rc) 
 4906:       );
 4907: 
 4908:       if( rc==SQLITE_OK && pCsr->isEof==0 ){
 4909:         fts3EvalUpdateCounts(pRoot);
 4910:       }
 4911:     }
 4912: 
 4913:     pCsr->isEof = 0;
 4914:     pCsr->iPrevId = iPrevId;
 4915: 
 4916:     if( bEof ){
 4917:       pRoot->bEof = bEof;
 4918:     }else{
 4919:       /* Caution: pRoot may iterate through docids in ascending or descending
 4920:       ** order. For this reason, even though it seems more defensive, the 
 4921:       ** do loop can not be written:
 4922:       **
 4923:       **   do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
 4924:       */
 4925:       fts3EvalRestart(pCsr, pRoot, &rc);
 4926:       do {
 4927:         fts3EvalNextRow(pCsr, pRoot, &rc);
 4928:         assert( pRoot->bEof==0 );
 4929:       }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
 4930:       fts3EvalTestDeferredAndNear(pCsr, &rc);
 4931:     }
 4932:   }
 4933:   return rc;
 4934: }
 4935: 
 4936: /*
 4937: ** This function is used by the matchinfo() module to query a phrase 
 4938: ** expression node for the following information:
 4939: **
 4940: **   1. The total number of occurrences of the phrase in each column of 
 4941: **      the FTS table (considering all rows), and
 4942: **
 4943: **   2. For each column, the number of rows in the table for which the
 4944: **      column contains at least one instance of the phrase.
 4945: **
 4946: ** If no error occurs, SQLITE_OK is returned and the values for each column
 4947: ** written into the array aiOut as follows:
 4948: **
 4949: **   aiOut[iCol*3 + 1] = Number of occurrences
 4950: **   aiOut[iCol*3 + 2] = Number of rows containing at least one instance
 4951: **
 4952: ** Caveats:
 4953: **
 4954: **   * If a phrase consists entirely of deferred tokens, then all output 
 4955: **     values are set to the number of documents in the table. In other
 4956: **     words we assume that very common tokens occur exactly once in each 
 4957: **     column of each row of the table.
 4958: **
 4959: **   * If a phrase contains some deferred tokens (and some non-deferred 
 4960: **     tokens), count the potential occurrence identified by considering
 4961: **     the non-deferred tokens instead of actual phrase occurrences.
 4962: **
 4963: **   * If the phrase is part of a NEAR expression, then only phrase instances
 4964: **     that meet the NEAR constraint are included in the counts.
 4965: */
 4966: int sqlite3Fts3EvalPhraseStats(
 4967:   Fts3Cursor *pCsr,               /* FTS cursor handle */
 4968:   Fts3Expr *pExpr,                /* Phrase expression */
 4969:   u32 *aiOut                      /* Array to write results into (see above) */
 4970: ){
 4971:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 4972:   int rc = SQLITE_OK;
 4973:   int iCol;
 4974: 
 4975:   if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
 4976:     assert( pCsr->nDoc>0 );
 4977:     for(iCol=0; iCol<pTab->nColumn; iCol++){
 4978:       aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
 4979:       aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
 4980:     }
 4981:   }else{
 4982:     rc = fts3EvalGatherStats(pCsr, pExpr);
 4983:     if( rc==SQLITE_OK ){
 4984:       assert( pExpr->aMI );
 4985:       for(iCol=0; iCol<pTab->nColumn; iCol++){
 4986:         aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
 4987:         aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
 4988:       }
 4989:     }
 4990:   }
 4991: 
 4992:   return rc;
 4993: }
 4994: 
 4995: /*
 4996: ** The expression pExpr passed as the second argument to this function
 4997: ** must be of type FTSQUERY_PHRASE. 
 4998: **
 4999: ** The returned value is either NULL or a pointer to a buffer containing
 5000: ** a position-list indicating the occurrences of the phrase in column iCol
 5001: ** of the current row. 
 5002: **
 5003: ** More specifically, the returned buffer contains 1 varint for each 
 5004: ** occurence of the phrase in the column, stored using the normal (delta+2) 
 5005: ** compression and is terminated by either an 0x01 or 0x00 byte. For example,
 5006: ** if the requested column contains "a b X c d X X" and the position-list
 5007: ** for 'X' is requested, the buffer returned may contain:
 5008: **
 5009: **     0x04 0x05 0x03 0x01   or   0x04 0x05 0x03 0x00
 5010: **
 5011: ** This function works regardless of whether or not the phrase is deferred,
 5012: ** incremental, or neither.
 5013: */
 5014: char *sqlite3Fts3EvalPhrasePoslist(
 5015:   Fts3Cursor *pCsr,               /* FTS3 cursor object */
 5016:   Fts3Expr *pExpr,                /* Phrase to return doclist for */
 5017:   int iCol                        /* Column to return position list for */
 5018: ){
 5019:   Fts3Phrase *pPhrase = pExpr->pPhrase;
 5020:   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 5021:   char *pIter = pPhrase->doclist.pList;
 5022:   int iThis;
 5023: 
 5024:   assert( iCol>=0 && iCol<pTab->nColumn );
 5025:   if( !pIter 
 5026:    || pExpr->bEof 
 5027:    || pExpr->iDocid!=pCsr->iPrevId
 5028:    || (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) 
 5029:   ){
 5030:     return 0;
 5031:   }
 5032: 
 5033:   assert( pPhrase->doclist.nList>0 );
 5034:   if( *pIter==0x01 ){
 5035:     pIter++;
 5036:     pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
 5037:   }else{
 5038:     iThis = 0;
 5039:   }
 5040:   while( iThis<iCol ){
 5041:     fts3ColumnlistCopy(0, &pIter);
 5042:     if( *pIter==0x00 ) return 0;
 5043:     pIter++;
 5044:     pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
 5045:   }
 5046: 
 5047:   return ((iCol==iThis)?pIter:0);
 5048: }
 5049: 
 5050: /*
 5051: ** Free all components of the Fts3Phrase structure that were allocated by
 5052: ** the eval module. Specifically, this means to free:
 5053: **
 5054: **   * the contents of pPhrase->doclist, and
 5055: **   * any Fts3MultiSegReader objects held by phrase tokens.
 5056: */
 5057: void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
 5058:   if( pPhrase ){
 5059:     int i;
 5060:     sqlite3_free(pPhrase->doclist.aAll);
 5061:     fts3EvalInvalidatePoslist(pPhrase);
 5062:     memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
 5063:     for(i=0; i<pPhrase->nToken; i++){
 5064:       fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
 5065:       pPhrase->aToken[i].pSegcsr = 0;
 5066:     }
 5067:   }
 5068: }
 5069: 
 5070: /*
 5071: ** Return SQLITE_CORRUPT_VTAB.
 5072: */
 5073: #ifdef SQLITE_DEBUG
 5074: int sqlite3Fts3Corrupt(){
 5075:   return SQLITE_CORRUPT_VTAB;
 5076: }
 5077: #endif
 5078: 
 5079: #if !SQLITE_CORE
 5080: /*
 5081: ** Initialize API pointer table, if required.
 5082: */
 5083: int sqlite3_extension_init(
 5084:   sqlite3 *db, 
 5085:   char **pzErrMsg,
 5086:   const sqlite3_api_routines *pApi
 5087: ){
 5088:   SQLITE_EXTENSION_INIT2(pApi)
 5089:   return sqlite3Fts3Init(db);
 5090: }
 5091: #endif
 5092: 
 5093: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>