Annotation of embedaddon/sqlite3/ext/fts3/fts3.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2006 Oct 10
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: ******************************************************************************
! 12: **
! 13: ** This is an SQLite module implementing full-text search.
! 14: */
! 15:
! 16: /*
! 17: ** The code in this file is only compiled if:
! 18: **
! 19: ** * The FTS3 module is being built as an extension
! 20: ** (in which case SQLITE_CORE is not defined), or
! 21: **
! 22: ** * The FTS3 module is being built into the core of
! 23: ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
! 24: */
! 25:
! 26: /* The full-text index is stored in a series of b+tree (-like)
! 27: ** structures called segments which map terms to doclists. The
! 28: ** structures are like b+trees in layout, but are constructed from the
! 29: ** bottom up in optimal fashion and are not updatable. Since trees
! 30: ** are built from the bottom up, things will be described from the
! 31: ** bottom up.
! 32: **
! 33: **
! 34: **** Varints ****
! 35: ** The basic unit of encoding is a variable-length integer called a
! 36: ** varint. We encode variable-length integers in little-endian order
! 37: ** using seven bits * per byte as follows:
! 38: **
! 39: ** KEY:
! 40: ** A = 0xxxxxxx 7 bits of data and one flag bit
! 41: ** B = 1xxxxxxx 7 bits of data and one flag bit
! 42: **
! 43: ** 7 bits - A
! 44: ** 14 bits - BA
! 45: ** 21 bits - BBA
! 46: ** and so on.
! 47: **
! 48: ** This is similar in concept to how sqlite encodes "varints" but
! 49: ** the encoding is not the same. SQLite varints are big-endian
! 50: ** are are limited to 9 bytes in length whereas FTS3 varints are
! 51: ** little-endian and can be up to 10 bytes in length (in theory).
! 52: **
! 53: ** Example encodings:
! 54: **
! 55: ** 1: 0x01
! 56: ** 127: 0x7f
! 57: ** 128: 0x81 0x00
! 58: **
! 59: **
! 60: **** Document lists ****
! 61: ** A doclist (document list) holds a docid-sorted list of hits for a
! 62: ** given term. Doclists hold docids and associated token positions.
! 63: ** A docid is the unique integer identifier for a single document.
! 64: ** A position is the index of a word within the document. The first
! 65: ** word of the document has a position of 0.
! 66: **
! 67: ** FTS3 used to optionally store character offsets using a compile-time
! 68: ** option. But that functionality is no longer supported.
! 69: **
! 70: ** A doclist is stored like this:
! 71: **
! 72: ** array {
! 73: ** varint docid;
! 74: ** array { (position list for column 0)
! 75: ** varint position; (2 more than the delta from previous position)
! 76: ** }
! 77: ** array {
! 78: ** varint POS_COLUMN; (marks start of position list for new column)
! 79: ** varint column; (index of new column)
! 80: ** array {
! 81: ** varint position; (2 more than the delta from previous position)
! 82: ** }
! 83: ** }
! 84: ** varint POS_END; (marks end of positions for this document.
! 85: ** }
! 86: **
! 87: ** Here, array { X } means zero or more occurrences of X, adjacent in
! 88: ** memory. A "position" is an index of a token in the token stream
! 89: ** generated by the tokenizer. Note that POS_END and POS_COLUMN occur
! 90: ** in the same logical place as the position element, and act as sentinals
! 91: ** ending a position list array. POS_END is 0. POS_COLUMN is 1.
! 92: ** The positions numbers are not stored literally but rather as two more
! 93: ** than the difference from the prior position, or the just the position plus
! 94: ** 2 for the first position. Example:
! 95: **
! 96: ** label: A B C D E F G H I J K
! 97: ** value: 123 5 9 1 1 14 35 0 234 72 0
! 98: **
! 99: ** The 123 value is the first docid. For column zero in this document
! 100: ** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1
! 101: ** at D signals the start of a new column; the 1 at E indicates that the
! 102: ** new column is column number 1. There are two positions at 12 and 45
! 103: ** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
! 104: ** 234 at I is the next docid. It has one position 72 (72-2) and then
! 105: ** terminates with the 0 at K.
! 106: **
! 107: ** A "position-list" is the list of positions for multiple columns for
! 108: ** a single docid. A "column-list" is the set of positions for a single
! 109: ** column. Hence, a position-list consists of one or more column-lists,
! 110: ** a document record consists of a docid followed by a position-list and
! 111: ** a doclist consists of one or more document records.
! 112: **
! 113: ** A bare doclist omits the position information, becoming an
! 114: ** array of varint-encoded docids.
! 115: **
! 116: **** Segment leaf nodes ****
! 117: ** Segment leaf nodes store terms and doclists, ordered by term. Leaf
! 118: ** nodes are written using LeafWriter, and read using LeafReader (to
! 119: ** iterate through a single leaf node's data) and LeavesReader (to
! 120: ** iterate through a segment's entire leaf layer). Leaf nodes have
! 121: ** the format:
! 122: **
! 123: ** varint iHeight; (height from leaf level, always 0)
! 124: ** varint nTerm; (length of first term)
! 125: ** char pTerm[nTerm]; (content of first term)
! 126: ** varint nDoclist; (length of term's associated doclist)
! 127: ** char pDoclist[nDoclist]; (content of doclist)
! 128: ** array {
! 129: ** (further terms are delta-encoded)
! 130: ** varint nPrefix; (length of prefix shared with previous term)
! 131: ** varint nSuffix; (length of unshared suffix)
! 132: ** char pTermSuffix[nSuffix];(unshared suffix of next term)
! 133: ** varint nDoclist; (length of term's associated doclist)
! 134: ** char pDoclist[nDoclist]; (content of doclist)
! 135: ** }
! 136: **
! 137: ** Here, array { X } means zero or more occurrences of X, adjacent in
! 138: ** memory.
! 139: **
! 140: ** Leaf nodes are broken into blocks which are stored contiguously in
! 141: ** the %_segments table in sorted order. This means that when the end
! 142: ** of a node is reached, the next term is in the node with the next
! 143: ** greater node id.
! 144: **
! 145: ** New data is spilled to a new leaf node when the current node
! 146: ** exceeds LEAF_MAX bytes (default 2048). New data which itself is
! 147: ** larger than STANDALONE_MIN (default 1024) is placed in a standalone
! 148: ** node (a leaf node with a single term and doclist). The goal of
! 149: ** these settings is to pack together groups of small doclists while
! 150: ** making it efficient to directly access large doclists. The
! 151: ** assumption is that large doclists represent terms which are more
! 152: ** likely to be query targets.
! 153: **
! 154: ** TODO(shess) It may be useful for blocking decisions to be more
! 155: ** dynamic. For instance, it may make more sense to have a 2.5k leaf
! 156: ** node rather than splitting into 2k and .5k nodes. My intuition is
! 157: ** that this might extend through 2x or 4x the pagesize.
! 158: **
! 159: **
! 160: **** Segment interior nodes ****
! 161: ** Segment interior nodes store blockids for subtree nodes and terms
! 162: ** to describe what data is stored by the each subtree. Interior
! 163: ** nodes are written using InteriorWriter, and read using
! 164: ** InteriorReader. InteriorWriters are created as needed when
! 165: ** SegmentWriter creates new leaf nodes, or when an interior node
! 166: ** itself grows too big and must be split. The format of interior
! 167: ** nodes:
! 168: **
! 169: ** varint iHeight; (height from leaf level, always >0)
! 170: ** varint iBlockid; (block id of node's leftmost subtree)
! 171: ** optional {
! 172: ** varint nTerm; (length of first term)
! 173: ** char pTerm[nTerm]; (content of first term)
! 174: ** array {
! 175: ** (further terms are delta-encoded)
! 176: ** varint nPrefix; (length of shared prefix with previous term)
! 177: ** varint nSuffix; (length of unshared suffix)
! 178: ** char pTermSuffix[nSuffix]; (unshared suffix of next term)
! 179: ** }
! 180: ** }
! 181: **
! 182: ** Here, optional { X } means an optional element, while array { X }
! 183: ** means zero or more occurrences of X, adjacent in memory.
! 184: **
! 185: ** An interior node encodes n terms separating n+1 subtrees. The
! 186: ** subtree blocks are contiguous, so only the first subtree's blockid
! 187: ** is encoded. The subtree at iBlockid will contain all terms less
! 188: ** than the first term encoded (or all terms if no term is encoded).
! 189: ** Otherwise, for terms greater than or equal to pTerm[i] but less
! 190: ** than pTerm[i+1], the subtree for that term will be rooted at
! 191: ** iBlockid+i. Interior nodes only store enough term data to
! 192: ** distinguish adjacent children (if the rightmost term of the left
! 193: ** child is "something", and the leftmost term of the right child is
! 194: ** "wicked", only "w" is stored).
! 195: **
! 196: ** New data is spilled to a new interior node at the same height when
! 197: ** the current node exceeds INTERIOR_MAX bytes (default 2048).
! 198: ** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
! 199: ** interior nodes and making the tree too skinny. The interior nodes
! 200: ** at a given height are naturally tracked by interior nodes at
! 201: ** height+1, and so on.
! 202: **
! 203: **
! 204: **** Segment directory ****
! 205: ** The segment directory in table %_segdir stores meta-information for
! 206: ** merging and deleting segments, and also the root node of the
! 207: ** segment's tree.
! 208: **
! 209: ** The root node is the top node of the segment's tree after encoding
! 210: ** the entire segment, restricted to ROOT_MAX bytes (default 1024).
! 211: ** This could be either a leaf node or an interior node. If the top
! 212: ** node requires more than ROOT_MAX bytes, it is flushed to %_segments
! 213: ** and a new root interior node is generated (which should always fit
! 214: ** within ROOT_MAX because it only needs space for 2 varints, the
! 215: ** height and the blockid of the previous root).
! 216: **
! 217: ** The meta-information in the segment directory is:
! 218: ** level - segment level (see below)
! 219: ** idx - index within level
! 220: ** - (level,idx uniquely identify a segment)
! 221: ** start_block - first leaf node
! 222: ** leaves_end_block - last leaf node
! 223: ** end_block - last block (including interior nodes)
! 224: ** root - contents of root node
! 225: **
! 226: ** If the root node is a leaf node, then start_block,
! 227: ** leaves_end_block, and end_block are all 0.
! 228: **
! 229: **
! 230: **** Segment merging ****
! 231: ** To amortize update costs, segments are grouped into levels and
! 232: ** merged in batches. Each increase in level represents exponentially
! 233: ** more documents.
! 234: **
! 235: ** New documents (actually, document updates) are tokenized and
! 236: ** written individually (using LeafWriter) to a level 0 segment, with
! 237: ** incrementing idx. When idx reaches MERGE_COUNT (default 16), all
! 238: ** level 0 segments are merged into a single level 1 segment. Level 1
! 239: ** is populated like level 0, and eventually MERGE_COUNT level 1
! 240: ** segments are merged to a single level 2 segment (representing
! 241: ** MERGE_COUNT^2 updates), and so on.
! 242: **
! 243: ** A segment merge traverses all segments at a given level in
! 244: ** parallel, performing a straightforward sorted merge. Since segment
! 245: ** leaf nodes are written in to the %_segments table in order, this
! 246: ** merge traverses the underlying sqlite disk structures efficiently.
! 247: ** After the merge, all segment blocks from the merged level are
! 248: ** deleted.
! 249: **
! 250: ** MERGE_COUNT controls how often we merge segments. 16 seems to be
! 251: ** somewhat of a sweet spot for insertion performance. 32 and 64 show
! 252: ** very similar performance numbers to 16 on insertion, though they're
! 253: ** a tiny bit slower (perhaps due to more overhead in merge-time
! 254: ** sorting). 8 is about 20% slower than 16, 4 about 50% slower than
! 255: ** 16, 2 about 66% slower than 16.
! 256: **
! 257: ** At query time, high MERGE_COUNT increases the number of segments
! 258: ** which need to be scanned and merged. For instance, with 100k docs
! 259: ** inserted:
! 260: **
! 261: ** MERGE_COUNT segments
! 262: ** 16 25
! 263: ** 8 12
! 264: ** 4 10
! 265: ** 2 6
! 266: **
! 267: ** This appears to have only a moderate impact on queries for very
! 268: ** frequent terms (which are somewhat dominated by segment merge
! 269: ** costs), and infrequent and non-existent terms still seem to be fast
! 270: ** even with many segments.
! 271: **
! 272: ** TODO(shess) That said, it would be nice to have a better query-side
! 273: ** argument for MERGE_COUNT of 16. Also, it is possible/likely that
! 274: ** optimizations to things like doclist merging will swing the sweet
! 275: ** spot around.
! 276: **
! 277: **
! 278: **
! 279: **** Handling of deletions and updates ****
! 280: ** Since we're using a segmented structure, with no docid-oriented
! 281: ** index into the term index, we clearly cannot simply update the term
! 282: ** index when a document is deleted or updated. For deletions, we
! 283: ** write an empty doclist (varint(docid) varint(POS_END)), for updates
! 284: ** we simply write the new doclist. Segment merges overwrite older
! 285: ** data for a particular docid with newer data, so deletes or updates
! 286: ** will eventually overtake the earlier data and knock it out. The
! 287: ** query logic likewise merges doclists so that newer data knocks out
! 288: ** older data.
! 289: **
! 290: ** TODO(shess) Provide a VACUUM type operation to clear out all
! 291: ** deletions and duplications. This would basically be a forced merge
! 292: ** into a single segment.
! 293: */
! 294:
! 295: #include "fts3Int.h"
! 296: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
! 297:
! 298: #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
! 299: # define SQLITE_CORE 1
! 300: #endif
! 301:
! 302: #include <assert.h>
! 303: #include <stdlib.h>
! 304: #include <stddef.h>
! 305: #include <stdio.h>
! 306: #include <string.h>
! 307: #include <stdarg.h>
! 308:
! 309: #include "fts3.h"
! 310: #ifndef SQLITE_CORE
! 311: # include "sqlite3ext.h"
! 312: SQLITE_EXTENSION_INIT1
! 313: #endif
! 314:
! 315: static int fts3EvalNext(Fts3Cursor *pCsr);
! 316: static int fts3EvalStart(Fts3Cursor *pCsr);
! 317: static int fts3TermSegReaderCursor(
! 318: Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
! 319:
! 320: /*
! 321: ** Write a 64-bit variable-length integer to memory starting at p[0].
! 322: ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
! 323: ** The number of bytes written is returned.
! 324: */
! 325: int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
! 326: unsigned char *q = (unsigned char *) p;
! 327: sqlite_uint64 vu = v;
! 328: do{
! 329: *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
! 330: vu >>= 7;
! 331: }while( vu!=0 );
! 332: q[-1] &= 0x7f; /* turn off high bit in final byte */
! 333: assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
! 334: return (int) (q - (unsigned char *)p);
! 335: }
! 336:
! 337: /*
! 338: ** Read a 64-bit variable-length integer from memory starting at p[0].
! 339: ** Return the number of bytes read, or 0 on error.
! 340: ** The value is stored in *v.
! 341: */
! 342: int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
! 343: const unsigned char *q = (const unsigned char *) p;
! 344: sqlite_uint64 x = 0, y = 1;
! 345: while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){
! 346: x += y * (*q++ & 0x7f);
! 347: y <<= 7;
! 348: }
! 349: x += y * (*q++);
! 350: *v = (sqlite_int64) x;
! 351: return (int) (q - (unsigned char *)p);
! 352: }
! 353:
! 354: /*
! 355: ** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
! 356: ** 32-bit integer before it is returned.
! 357: */
! 358: int sqlite3Fts3GetVarint32(const char *p, int *pi){
! 359: sqlite_int64 i;
! 360: int ret = sqlite3Fts3GetVarint(p, &i);
! 361: *pi = (int) i;
! 362: return ret;
! 363: }
! 364:
! 365: /*
! 366: ** Return the number of bytes required to encode v as a varint
! 367: */
! 368: int sqlite3Fts3VarintLen(sqlite3_uint64 v){
! 369: int i = 0;
! 370: do{
! 371: i++;
! 372: v >>= 7;
! 373: }while( v!=0 );
! 374: return i;
! 375: }
! 376:
! 377: /*
! 378: ** Convert an SQL-style quoted string into a normal string by removing
! 379: ** the quote characters. The conversion is done in-place. If the
! 380: ** input does not begin with a quote character, then this routine
! 381: ** is a no-op.
! 382: **
! 383: ** Examples:
! 384: **
! 385: ** "abc" becomes abc
! 386: ** 'xyz' becomes xyz
! 387: ** [pqr] becomes pqr
! 388: ** `mno` becomes mno
! 389: **
! 390: */
! 391: void sqlite3Fts3Dequote(char *z){
! 392: char quote; /* Quote character (if any ) */
! 393:
! 394: quote = z[0];
! 395: if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
! 396: int iIn = 1; /* Index of next byte to read from input */
! 397: int iOut = 0; /* Index of next byte to write to output */
! 398:
! 399: /* If the first byte was a '[', then the close-quote character is a ']' */
! 400: if( quote=='[' ) quote = ']';
! 401:
! 402: while( ALWAYS(z[iIn]) ){
! 403: if( z[iIn]==quote ){
! 404: if( z[iIn+1]!=quote ) break;
! 405: z[iOut++] = quote;
! 406: iIn += 2;
! 407: }else{
! 408: z[iOut++] = z[iIn++];
! 409: }
! 410: }
! 411: z[iOut] = '\0';
! 412: }
! 413: }
! 414:
! 415: /*
! 416: ** Read a single varint from the doclist at *pp and advance *pp to point
! 417: ** to the first byte past the end of the varint. Add the value of the varint
! 418: ** to *pVal.
! 419: */
! 420: static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
! 421: sqlite3_int64 iVal;
! 422: *pp += sqlite3Fts3GetVarint(*pp, &iVal);
! 423: *pVal += iVal;
! 424: }
! 425:
! 426: /*
! 427: ** When this function is called, *pp points to the first byte following a
! 428: ** varint that is part of a doclist (or position-list, or any other list
! 429: ** of varints). This function moves *pp to point to the start of that varint,
! 430: ** and sets *pVal by the varint value.
! 431: **
! 432: ** Argument pStart points to the first byte of the doclist that the
! 433: ** varint is part of.
! 434: */
! 435: static void fts3GetReverseVarint(
! 436: char **pp,
! 437: char *pStart,
! 438: sqlite3_int64 *pVal
! 439: ){
! 440: sqlite3_int64 iVal;
! 441: char *p;
! 442:
! 443: /* Pointer p now points at the first byte past the varint we are
! 444: ** interested in. So, unless the doclist is corrupt, the 0x80 bit is
! 445: ** clear on character p[-1]. */
! 446: for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
! 447: p++;
! 448: *pp = p;
! 449:
! 450: sqlite3Fts3GetVarint(p, &iVal);
! 451: *pVal = iVal;
! 452: }
! 453:
! 454: /*
! 455: ** The xDisconnect() virtual table method.
! 456: */
! 457: static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
! 458: Fts3Table *p = (Fts3Table *)pVtab;
! 459: int i;
! 460:
! 461: assert( p->nPendingData==0 );
! 462: assert( p->pSegments==0 );
! 463:
! 464: /* Free any prepared statements held */
! 465: for(i=0; i<SizeofArray(p->aStmt); i++){
! 466: sqlite3_finalize(p->aStmt[i]);
! 467: }
! 468: sqlite3_free(p->zSegmentsTbl);
! 469: sqlite3_free(p->zReadExprlist);
! 470: sqlite3_free(p->zWriteExprlist);
! 471: sqlite3_free(p->zContentTbl);
! 472:
! 473: /* Invoke the tokenizer destructor to free the tokenizer. */
! 474: p->pTokenizer->pModule->xDestroy(p->pTokenizer);
! 475:
! 476: sqlite3_free(p);
! 477: return SQLITE_OK;
! 478: }
! 479:
! 480: /*
! 481: ** Construct one or more SQL statements from the format string given
! 482: ** and then evaluate those statements. The success code is written
! 483: ** into *pRc.
! 484: **
! 485: ** If *pRc is initially non-zero then this routine is a no-op.
! 486: */
! 487: static void fts3DbExec(
! 488: int *pRc, /* Success code */
! 489: sqlite3 *db, /* Database in which to run SQL */
! 490: const char *zFormat, /* Format string for SQL */
! 491: ... /* Arguments to the format string */
! 492: ){
! 493: va_list ap;
! 494: char *zSql;
! 495: if( *pRc ) return;
! 496: va_start(ap, zFormat);
! 497: zSql = sqlite3_vmprintf(zFormat, ap);
! 498: va_end(ap);
! 499: if( zSql==0 ){
! 500: *pRc = SQLITE_NOMEM;
! 501: }else{
! 502: *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
! 503: sqlite3_free(zSql);
! 504: }
! 505: }
! 506:
! 507: /*
! 508: ** The xDestroy() virtual table method.
! 509: */
! 510: static int fts3DestroyMethod(sqlite3_vtab *pVtab){
! 511: Fts3Table *p = (Fts3Table *)pVtab;
! 512: int rc = SQLITE_OK; /* Return code */
! 513: const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
! 514: sqlite3 *db = p->db; /* Database handle */
! 515:
! 516: /* Drop the shadow tables */
! 517: if( p->zContentTbl==0 ){
! 518: fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
! 519: }
! 520: fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
! 521: fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
! 522: fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
! 523: fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
! 524:
! 525: /* If everything has worked, invoke fts3DisconnectMethod() to free the
! 526: ** memory associated with the Fts3Table structure and return SQLITE_OK.
! 527: ** Otherwise, return an SQLite error code.
! 528: */
! 529: return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
! 530: }
! 531:
! 532:
! 533: /*
! 534: ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
! 535: ** passed as the first argument. This is done as part of the xConnect()
! 536: ** and xCreate() methods.
! 537: **
! 538: ** If *pRc is non-zero when this function is called, it is a no-op.
! 539: ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
! 540: ** before returning.
! 541: */
! 542: static void fts3DeclareVtab(int *pRc, Fts3Table *p){
! 543: if( *pRc==SQLITE_OK ){
! 544: int i; /* Iterator variable */
! 545: int rc; /* Return code */
! 546: char *zSql; /* SQL statement passed to declare_vtab() */
! 547: char *zCols; /* List of user defined columns */
! 548:
! 549: sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
! 550:
! 551: /* Create a list of user columns for the virtual table */
! 552: zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
! 553: for(i=1; zCols && i<p->nColumn; i++){
! 554: zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
! 555: }
! 556:
! 557: /* Create the whole "CREATE TABLE" statement to pass to SQLite */
! 558: zSql = sqlite3_mprintf(
! 559: "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
! 560: );
! 561: if( !zCols || !zSql ){
! 562: rc = SQLITE_NOMEM;
! 563: }else{
! 564: rc = sqlite3_declare_vtab(p->db, zSql);
! 565: }
! 566:
! 567: sqlite3_free(zSql);
! 568: sqlite3_free(zCols);
! 569: *pRc = rc;
! 570: }
! 571: }
! 572:
! 573: /*
! 574: ** Create the backing store tables (%_content, %_segments and %_segdir)
! 575: ** required by the FTS3 table passed as the only argument. This is done
! 576: ** as part of the vtab xCreate() method.
! 577: **
! 578: ** If the p->bHasDocsize boolean is true (indicating that this is an
! 579: ** FTS4 table, not an FTS3 table) then also create the %_docsize and
! 580: ** %_stat tables required by FTS4.
! 581: */
! 582: static int fts3CreateTables(Fts3Table *p){
! 583: int rc = SQLITE_OK; /* Return code */
! 584: int i; /* Iterator variable */
! 585: sqlite3 *db = p->db; /* The database connection */
! 586:
! 587: if( p->zContentTbl==0 ){
! 588: char *zContentCols; /* Columns of %_content table */
! 589:
! 590: /* Create a list of user columns for the content table */
! 591: zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
! 592: for(i=0; zContentCols && i<p->nColumn; i++){
! 593: char *z = p->azColumn[i];
! 594: zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
! 595: }
! 596: if( zContentCols==0 ) rc = SQLITE_NOMEM;
! 597:
! 598: /* Create the content table */
! 599: fts3DbExec(&rc, db,
! 600: "CREATE TABLE %Q.'%q_content'(%s)",
! 601: p->zDb, p->zName, zContentCols
! 602: );
! 603: sqlite3_free(zContentCols);
! 604: }
! 605:
! 606: /* Create other tables */
! 607: fts3DbExec(&rc, db,
! 608: "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
! 609: p->zDb, p->zName
! 610: );
! 611: fts3DbExec(&rc, db,
! 612: "CREATE TABLE %Q.'%q_segdir'("
! 613: "level INTEGER,"
! 614: "idx INTEGER,"
! 615: "start_block INTEGER,"
! 616: "leaves_end_block INTEGER,"
! 617: "end_block INTEGER,"
! 618: "root BLOB,"
! 619: "PRIMARY KEY(level, idx)"
! 620: ");",
! 621: p->zDb, p->zName
! 622: );
! 623: if( p->bHasDocsize ){
! 624: fts3DbExec(&rc, db,
! 625: "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
! 626: p->zDb, p->zName
! 627: );
! 628: }
! 629: if( p->bHasStat ){
! 630: fts3DbExec(&rc, db,
! 631: "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
! 632: p->zDb, p->zName
! 633: );
! 634: }
! 635: return rc;
! 636: }
! 637:
! 638: /*
! 639: ** Store the current database page-size in bytes in p->nPgsz.
! 640: **
! 641: ** If *pRc is non-zero when this function is called, it is a no-op.
! 642: ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
! 643: ** before returning.
! 644: */
! 645: static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
! 646: if( *pRc==SQLITE_OK ){
! 647: int rc; /* Return code */
! 648: char *zSql; /* SQL text "PRAGMA %Q.page_size" */
! 649: sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
! 650:
! 651: zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
! 652: if( !zSql ){
! 653: rc = SQLITE_NOMEM;
! 654: }else{
! 655: rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
! 656: if( rc==SQLITE_OK ){
! 657: sqlite3_step(pStmt);
! 658: p->nPgsz = sqlite3_column_int(pStmt, 0);
! 659: rc = sqlite3_finalize(pStmt);
! 660: }else if( rc==SQLITE_AUTH ){
! 661: p->nPgsz = 1024;
! 662: rc = SQLITE_OK;
! 663: }
! 664: }
! 665: assert( p->nPgsz>0 || rc!=SQLITE_OK );
! 666: sqlite3_free(zSql);
! 667: *pRc = rc;
! 668: }
! 669: }
! 670:
! 671: /*
! 672: ** "Special" FTS4 arguments are column specifications of the following form:
! 673: **
! 674: ** <key> = <value>
! 675: **
! 676: ** There may not be whitespace surrounding the "=" character. The <value>
! 677: ** term may be quoted, but the <key> may not.
! 678: */
! 679: static int fts3IsSpecialColumn(
! 680: const char *z,
! 681: int *pnKey,
! 682: char **pzValue
! 683: ){
! 684: char *zValue;
! 685: const char *zCsr = z;
! 686:
! 687: while( *zCsr!='=' ){
! 688: if( *zCsr=='\0' ) return 0;
! 689: zCsr++;
! 690: }
! 691:
! 692: *pnKey = (int)(zCsr-z);
! 693: zValue = sqlite3_mprintf("%s", &zCsr[1]);
! 694: if( zValue ){
! 695: sqlite3Fts3Dequote(zValue);
! 696: }
! 697: *pzValue = zValue;
! 698: return 1;
! 699: }
! 700:
! 701: /*
! 702: ** Append the output of a printf() style formatting to an existing string.
! 703: */
! 704: static void fts3Appendf(
! 705: int *pRc, /* IN/OUT: Error code */
! 706: char **pz, /* IN/OUT: Pointer to string buffer */
! 707: const char *zFormat, /* Printf format string to append */
! 708: ... /* Arguments for printf format string */
! 709: ){
! 710: if( *pRc==SQLITE_OK ){
! 711: va_list ap;
! 712: char *z;
! 713: va_start(ap, zFormat);
! 714: z = sqlite3_vmprintf(zFormat, ap);
! 715: va_end(ap);
! 716: if( z && *pz ){
! 717: char *z2 = sqlite3_mprintf("%s%s", *pz, z);
! 718: sqlite3_free(z);
! 719: z = z2;
! 720: }
! 721: if( z==0 ) *pRc = SQLITE_NOMEM;
! 722: sqlite3_free(*pz);
! 723: *pz = z;
! 724: }
! 725: }
! 726:
! 727: /*
! 728: ** Return a copy of input string zInput enclosed in double-quotes (") and
! 729: ** with all double quote characters escaped. For example:
! 730: **
! 731: ** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\""
! 732: **
! 733: ** The pointer returned points to memory obtained from sqlite3_malloc(). It
! 734: ** is the callers responsibility to call sqlite3_free() to release this
! 735: ** memory.
! 736: */
! 737: static char *fts3QuoteId(char const *zInput){
! 738: int nRet;
! 739: char *zRet;
! 740: nRet = 2 + strlen(zInput)*2 + 1;
! 741: zRet = sqlite3_malloc(nRet);
! 742: if( zRet ){
! 743: int i;
! 744: char *z = zRet;
! 745: *(z++) = '"';
! 746: for(i=0; zInput[i]; i++){
! 747: if( zInput[i]=='"' ) *(z++) = '"';
! 748: *(z++) = zInput[i];
! 749: }
! 750: *(z++) = '"';
! 751: *(z++) = '\0';
! 752: }
! 753: return zRet;
! 754: }
! 755:
! 756: /*
! 757: ** Return a list of comma separated SQL expressions and a FROM clause that
! 758: ** could be used in a SELECT statement such as the following:
! 759: **
! 760: ** SELECT <list of expressions> FROM %_content AS x ...
! 761: **
! 762: ** to return the docid, followed by each column of text data in order
! 763: ** from left to write. If parameter zFunc is not NULL, then instead of
! 764: ** being returned directly each column of text data is passed to an SQL
! 765: ** function named zFunc first. For example, if zFunc is "unzip" and the
! 766: ** table has the three user-defined columns "a", "b", and "c", the following
! 767: ** string is returned:
! 768: **
! 769: ** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
! 770: **
! 771: ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
! 772: ** is the responsibility of the caller to eventually free it.
! 773: **
! 774: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
! 775: ** a NULL pointer is returned). Otherwise, if an OOM error is encountered
! 776: ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
! 777: ** no error occurs, *pRc is left unmodified.
! 778: */
! 779: static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
! 780: char *zRet = 0;
! 781: char *zFree = 0;
! 782: char *zFunction;
! 783: int i;
! 784:
! 785: if( p->zContentTbl==0 ){
! 786: if( !zFunc ){
! 787: zFunction = "";
! 788: }else{
! 789: zFree = zFunction = fts3QuoteId(zFunc);
! 790: }
! 791: fts3Appendf(pRc, &zRet, "docid");
! 792: for(i=0; i<p->nColumn; i++){
! 793: fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
! 794: }
! 795: sqlite3_free(zFree);
! 796: }else{
! 797: fts3Appendf(pRc, &zRet, "rowid");
! 798: for(i=0; i<p->nColumn; i++){
! 799: fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
! 800: }
! 801: }
! 802: fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x",
! 803: p->zDb,
! 804: (p->zContentTbl ? p->zContentTbl : p->zName),
! 805: (p->zContentTbl ? "" : "_content")
! 806: );
! 807: return zRet;
! 808: }
! 809:
! 810: /*
! 811: ** Return a list of N comma separated question marks, where N is the number
! 812: ** of columns in the %_content table (one for the docid plus one for each
! 813: ** user-defined text column).
! 814: **
! 815: ** If argument zFunc is not NULL, then all but the first question mark
! 816: ** is preceded by zFunc and an open bracket, and followed by a closed
! 817: ** bracket. For example, if zFunc is "zip" and the FTS3 table has three
! 818: ** user-defined text columns, the following string is returned:
! 819: **
! 820: ** "?, zip(?), zip(?), zip(?)"
! 821: **
! 822: ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
! 823: ** is the responsibility of the caller to eventually free it.
! 824: **
! 825: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
! 826: ** a NULL pointer is returned). Otherwise, if an OOM error is encountered
! 827: ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
! 828: ** no error occurs, *pRc is left unmodified.
! 829: */
! 830: static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
! 831: char *zRet = 0;
! 832: char *zFree = 0;
! 833: char *zFunction;
! 834: int i;
! 835:
! 836: if( !zFunc ){
! 837: zFunction = "";
! 838: }else{
! 839: zFree = zFunction = fts3QuoteId(zFunc);
! 840: }
! 841: fts3Appendf(pRc, &zRet, "?");
! 842: for(i=0; i<p->nColumn; i++){
! 843: fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
! 844: }
! 845: sqlite3_free(zFree);
! 846: return zRet;
! 847: }
! 848:
! 849: /*
! 850: ** This function interprets the string at (*pp) as a non-negative integer
! 851: ** value. It reads the integer and sets *pnOut to the value read, then
! 852: ** sets *pp to point to the byte immediately following the last byte of
! 853: ** the integer value.
! 854: **
! 855: ** Only decimal digits ('0'..'9') may be part of an integer value.
! 856: **
! 857: ** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
! 858: ** the output value undefined. Otherwise SQLITE_OK is returned.
! 859: **
! 860: ** This function is used when parsing the "prefix=" FTS4 parameter.
! 861: */
! 862: static int fts3GobbleInt(const char **pp, int *pnOut){
! 863: const char *p; /* Iterator pointer */
! 864: int nInt = 0; /* Output value */
! 865:
! 866: for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
! 867: nInt = nInt * 10 + (p[0] - '0');
! 868: }
! 869: if( p==*pp ) return SQLITE_ERROR;
! 870: *pnOut = nInt;
! 871: *pp = p;
! 872: return SQLITE_OK;
! 873: }
! 874:
! 875: /*
! 876: ** This function is called to allocate an array of Fts3Index structures
! 877: ** representing the indexes maintained by the current FTS table. FTS tables
! 878: ** always maintain the main "terms" index, but may also maintain one or
! 879: ** more "prefix" indexes, depending on the value of the "prefix=" parameter
! 880: ** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
! 881: **
! 882: ** Argument zParam is passed the value of the "prefix=" option if one was
! 883: ** specified, or NULL otherwise.
! 884: **
! 885: ** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
! 886: ** the allocated array. *pnIndex is set to the number of elements in the
! 887: ** array. If an error does occur, an SQLite error code is returned.
! 888: **
! 889: ** Regardless of whether or not an error is returned, it is the responsibility
! 890: ** of the caller to call sqlite3_free() on the output array to free it.
! 891: */
! 892: static int fts3PrefixParameter(
! 893: const char *zParam, /* ABC in prefix=ABC parameter to parse */
! 894: int *pnIndex, /* OUT: size of *apIndex[] array */
! 895: struct Fts3Index **apIndex /* OUT: Array of indexes for this table */
! 896: ){
! 897: struct Fts3Index *aIndex; /* Allocated array */
! 898: int nIndex = 1; /* Number of entries in array */
! 899:
! 900: if( zParam && zParam[0] ){
! 901: const char *p;
! 902: nIndex++;
! 903: for(p=zParam; *p; p++){
! 904: if( *p==',' ) nIndex++;
! 905: }
! 906: }
! 907:
! 908: aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
! 909: *apIndex = aIndex;
! 910: *pnIndex = nIndex;
! 911: if( !aIndex ){
! 912: return SQLITE_NOMEM;
! 913: }
! 914:
! 915: memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
! 916: if( zParam ){
! 917: const char *p = zParam;
! 918: int i;
! 919: for(i=1; i<nIndex; i++){
! 920: int nPrefix;
! 921: if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
! 922: aIndex[i].nPrefix = nPrefix;
! 923: p++;
! 924: }
! 925: }
! 926:
! 927: return SQLITE_OK;
! 928: }
! 929:
! 930: /*
! 931: ** This function is called when initializing an FTS4 table that uses the
! 932: ** content=xxx option. It determines the number of and names of the columns
! 933: ** of the new FTS4 table.
! 934: **
! 935: ** The third argument passed to this function is the value passed to the
! 936: ** config=xxx option (i.e. "xxx"). This function queries the database for
! 937: ** a table of that name. If found, the output variables are populated
! 938: ** as follows:
! 939: **
! 940: ** *pnCol: Set to the number of columns table xxx has,
! 941: **
! 942: ** *pnStr: Set to the total amount of space required to store a copy
! 943: ** of each columns name, including the nul-terminator.
! 944: **
! 945: ** *pazCol: Set to point to an array of *pnCol strings. Each string is
! 946: ** the name of the corresponding column in table xxx. The array
! 947: ** and its contents are allocated using a single allocation. It
! 948: ** is the responsibility of the caller to free this allocation
! 949: ** by eventually passing the *pazCol value to sqlite3_free().
! 950: **
! 951: ** If the table cannot be found, an error code is returned and the output
! 952: ** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
! 953: ** returned (and the output variables are undefined).
! 954: */
! 955: static int fts3ContentColumns(
! 956: sqlite3 *db, /* Database handle */
! 957: const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */
! 958: const char *zTbl, /* Name of content table */
! 959: const char ***pazCol, /* OUT: Malloc'd array of column names */
! 960: int *pnCol, /* OUT: Size of array *pazCol */
! 961: int *pnStr /* OUT: Bytes of string content */
! 962: ){
! 963: int rc = SQLITE_OK; /* Return code */
! 964: char *zSql; /* "SELECT *" statement on zTbl */
! 965: sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */
! 966:
! 967: zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
! 968: if( !zSql ){
! 969: rc = SQLITE_NOMEM;
! 970: }else{
! 971: rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
! 972: }
! 973: sqlite3_free(zSql);
! 974:
! 975: if( rc==SQLITE_OK ){
! 976: const char **azCol; /* Output array */
! 977: int nStr = 0; /* Size of all column names (incl. 0x00) */
! 978: int nCol; /* Number of table columns */
! 979: int i; /* Used to iterate through columns */
! 980:
! 981: /* Loop through the returned columns. Set nStr to the number of bytes of
! 982: ** space required to store a copy of each column name, including the
! 983: ** nul-terminator byte. */
! 984: nCol = sqlite3_column_count(pStmt);
! 985: for(i=0; i<nCol; i++){
! 986: const char *zCol = sqlite3_column_name(pStmt, i);
! 987: nStr += strlen(zCol) + 1;
! 988: }
! 989:
! 990: /* Allocate and populate the array to return. */
! 991: azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
! 992: if( azCol==0 ){
! 993: rc = SQLITE_NOMEM;
! 994: }else{
! 995: char *p = (char *)&azCol[nCol];
! 996: for(i=0; i<nCol; i++){
! 997: const char *zCol = sqlite3_column_name(pStmt, i);
! 998: int n = strlen(zCol)+1;
! 999: memcpy(p, zCol, n);
! 1000: azCol[i] = p;
! 1001: p += n;
! 1002: }
! 1003: }
! 1004: sqlite3_finalize(pStmt);
! 1005:
! 1006: /* Set the output variables. */
! 1007: *pnCol = nCol;
! 1008: *pnStr = nStr;
! 1009: *pazCol = azCol;
! 1010: }
! 1011:
! 1012: return rc;
! 1013: }
! 1014:
! 1015: /*
! 1016: ** This function is the implementation of both the xConnect and xCreate
! 1017: ** methods of the FTS3 virtual table.
! 1018: **
! 1019: ** The argv[] array contains the following:
! 1020: **
! 1021: ** argv[0] -> module name ("fts3" or "fts4")
! 1022: ** argv[1] -> database name
! 1023: ** argv[2] -> table name
! 1024: ** argv[...] -> "column name" and other module argument fields.
! 1025: */
! 1026: static int fts3InitVtab(
! 1027: int isCreate, /* True for xCreate, false for xConnect */
! 1028: sqlite3 *db, /* The SQLite database connection */
! 1029: void *pAux, /* Hash table containing tokenizers */
! 1030: int argc, /* Number of elements in argv array */
! 1031: const char * const *argv, /* xCreate/xConnect argument array */
! 1032: sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
! 1033: char **pzErr /* Write any error message here */
! 1034: ){
! 1035: Fts3Hash *pHash = (Fts3Hash *)pAux;
! 1036: Fts3Table *p = 0; /* Pointer to allocated vtab */
! 1037: int rc = SQLITE_OK; /* Return code */
! 1038: int i; /* Iterator variable */
! 1039: int nByte; /* Size of allocation used for *p */
! 1040: int iCol; /* Column index */
! 1041: int nString = 0; /* Bytes required to hold all column names */
! 1042: int nCol = 0; /* Number of columns in the FTS table */
! 1043: char *zCsr; /* Space for holding column names */
! 1044: int nDb; /* Bytes required to hold database name */
! 1045: int nName; /* Bytes required to hold table name */
! 1046: int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
! 1047: const char **aCol; /* Array of column names */
! 1048: sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
! 1049:
! 1050: int nIndex; /* Size of aIndex[] array */
! 1051: struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
! 1052:
! 1053: /* The results of parsing supported FTS4 key=value options: */
! 1054: int bNoDocsize = 0; /* True to omit %_docsize table */
! 1055: int bDescIdx = 0; /* True to store descending indexes */
! 1056: char *zPrefix = 0; /* Prefix parameter value (or NULL) */
! 1057: char *zCompress = 0; /* compress=? parameter (or NULL) */
! 1058: char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
! 1059: char *zContent = 0; /* content=? parameter (or NULL) */
! 1060:
! 1061: assert( strlen(argv[0])==4 );
! 1062: assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
! 1063: || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
! 1064: );
! 1065:
! 1066: nDb = (int)strlen(argv[1]) + 1;
! 1067: nName = (int)strlen(argv[2]) + 1;
! 1068:
! 1069: aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
! 1070: if( !aCol ) return SQLITE_NOMEM;
! 1071: memset((void *)aCol, 0, sizeof(const char *) * (argc-2));
! 1072:
! 1073: /* Loop through all of the arguments passed by the user to the FTS3/4
! 1074: ** module (i.e. all the column names and special arguments). This loop
! 1075: ** does the following:
! 1076: **
! 1077: ** + Figures out the number of columns the FTSX table will have, and
! 1078: ** the number of bytes of space that must be allocated to store copies
! 1079: ** of the column names.
! 1080: **
! 1081: ** + If there is a tokenizer specification included in the arguments,
! 1082: ** initializes the tokenizer pTokenizer.
! 1083: */
! 1084: for(i=3; rc==SQLITE_OK && i<argc; i++){
! 1085: char const *z = argv[i];
! 1086: int nKey;
! 1087: char *zVal;
! 1088:
! 1089: /* Check if this is a tokenizer specification */
! 1090: if( !pTokenizer
! 1091: && strlen(z)>8
! 1092: && 0==sqlite3_strnicmp(z, "tokenize", 8)
! 1093: && 0==sqlite3Fts3IsIdChar(z[8])
! 1094: ){
! 1095: rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
! 1096: }
! 1097:
! 1098: /* Check if it is an FTS4 special argument. */
! 1099: else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
! 1100: struct Fts4Option {
! 1101: const char *zOpt;
! 1102: int nOpt;
! 1103: } aFts4Opt[] = {
! 1104: { "matchinfo", 9 }, /* 0 -> MATCHINFO */
! 1105: { "prefix", 6 }, /* 1 -> PREFIX */
! 1106: { "compress", 8 }, /* 2 -> COMPRESS */
! 1107: { "uncompress", 10 }, /* 3 -> UNCOMPRESS */
! 1108: { "order", 5 }, /* 4 -> ORDER */
! 1109: { "content", 7 } /* 5 -> CONTENT */
! 1110: };
! 1111:
! 1112: int iOpt;
! 1113: if( !zVal ){
! 1114: rc = SQLITE_NOMEM;
! 1115: }else{
! 1116: for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
! 1117: struct Fts4Option *pOp = &aFts4Opt[iOpt];
! 1118: if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
! 1119: break;
! 1120: }
! 1121: }
! 1122: if( iOpt==SizeofArray(aFts4Opt) ){
! 1123: *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
! 1124: rc = SQLITE_ERROR;
! 1125: }else{
! 1126: switch( iOpt ){
! 1127: case 0: /* MATCHINFO */
! 1128: if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
! 1129: *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
! 1130: rc = SQLITE_ERROR;
! 1131: }
! 1132: bNoDocsize = 1;
! 1133: break;
! 1134:
! 1135: case 1: /* PREFIX */
! 1136: sqlite3_free(zPrefix);
! 1137: zPrefix = zVal;
! 1138: zVal = 0;
! 1139: break;
! 1140:
! 1141: case 2: /* COMPRESS */
! 1142: sqlite3_free(zCompress);
! 1143: zCompress = zVal;
! 1144: zVal = 0;
! 1145: break;
! 1146:
! 1147: case 3: /* UNCOMPRESS */
! 1148: sqlite3_free(zUncompress);
! 1149: zUncompress = zVal;
! 1150: zVal = 0;
! 1151: break;
! 1152:
! 1153: case 4: /* ORDER */
! 1154: if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
! 1155: && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
! 1156: ){
! 1157: *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
! 1158: rc = SQLITE_ERROR;
! 1159: }
! 1160: bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
! 1161: break;
! 1162:
! 1163: default: /* CONTENT */
! 1164: assert( iOpt==5 );
! 1165: sqlite3_free(zUncompress);
! 1166: zContent = zVal;
! 1167: zVal = 0;
! 1168: break;
! 1169: }
! 1170: }
! 1171: sqlite3_free(zVal);
! 1172: }
! 1173: }
! 1174:
! 1175: /* Otherwise, the argument is a column name. */
! 1176: else {
! 1177: nString += (int)(strlen(z) + 1);
! 1178: aCol[nCol++] = z;
! 1179: }
! 1180: }
! 1181:
! 1182: /* If a content=xxx option was specified, the following:
! 1183: **
! 1184: ** 1. Ignore any compress= and uncompress= options.
! 1185: **
! 1186: ** 2. If no column names were specified as part of the CREATE VIRTUAL
! 1187: ** TABLE statement, use all columns from the content table.
! 1188: */
! 1189: if( rc==SQLITE_OK && zContent ){
! 1190: sqlite3_free(zCompress);
! 1191: sqlite3_free(zUncompress);
! 1192: zCompress = 0;
! 1193: zUncompress = 0;
! 1194: if( nCol==0 ){
! 1195: sqlite3_free((void*)aCol);
! 1196: aCol = 0;
! 1197: rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
! 1198: }
! 1199: assert( rc!=SQLITE_OK || nCol>0 );
! 1200: }
! 1201: if( rc!=SQLITE_OK ) goto fts3_init_out;
! 1202:
! 1203: if( nCol==0 ){
! 1204: assert( nString==0 );
! 1205: aCol[0] = "content";
! 1206: nString = 8;
! 1207: nCol = 1;
! 1208: }
! 1209:
! 1210: if( pTokenizer==0 ){
! 1211: rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
! 1212: if( rc!=SQLITE_OK ) goto fts3_init_out;
! 1213: }
! 1214: assert( pTokenizer );
! 1215:
! 1216: rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
! 1217: if( rc==SQLITE_ERROR ){
! 1218: assert( zPrefix );
! 1219: *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
! 1220: }
! 1221: if( rc!=SQLITE_OK ) goto fts3_init_out;
! 1222:
! 1223: /* Allocate and populate the Fts3Table structure. */
! 1224: nByte = sizeof(Fts3Table) + /* Fts3Table */
! 1225: nCol * sizeof(char *) + /* azColumn */
! 1226: nIndex * sizeof(struct Fts3Index) + /* aIndex */
! 1227: nName + /* zName */
! 1228: nDb + /* zDb */
! 1229: nString; /* Space for azColumn strings */
! 1230: p = (Fts3Table*)sqlite3_malloc(nByte);
! 1231: if( p==0 ){
! 1232: rc = SQLITE_NOMEM;
! 1233: goto fts3_init_out;
! 1234: }
! 1235: memset(p, 0, nByte);
! 1236: p->db = db;
! 1237: p->nColumn = nCol;
! 1238: p->nPendingData = 0;
! 1239: p->azColumn = (char **)&p[1];
! 1240: p->pTokenizer = pTokenizer;
! 1241: p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
! 1242: p->bHasDocsize = (isFts4 && bNoDocsize==0);
! 1243: p->bHasStat = isFts4;
! 1244: p->bDescIdx = bDescIdx;
! 1245: p->zContentTbl = zContent;
! 1246: zContent = 0;
! 1247: TESTONLY( p->inTransaction = -1 );
! 1248: TESTONLY( p->mxSavepoint = -1 );
! 1249:
! 1250: p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
! 1251: memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
! 1252: p->nIndex = nIndex;
! 1253: for(i=0; i<nIndex; i++){
! 1254: fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
! 1255: }
! 1256:
! 1257: /* Fill in the zName and zDb fields of the vtab structure. */
! 1258: zCsr = (char *)&p->aIndex[nIndex];
! 1259: p->zName = zCsr;
! 1260: memcpy(zCsr, argv[2], nName);
! 1261: zCsr += nName;
! 1262: p->zDb = zCsr;
! 1263: memcpy(zCsr, argv[1], nDb);
! 1264: zCsr += nDb;
! 1265:
! 1266: /* Fill in the azColumn array */
! 1267: for(iCol=0; iCol<nCol; iCol++){
! 1268: char *z;
! 1269: int n = 0;
! 1270: z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
! 1271: memcpy(zCsr, z, n);
! 1272: zCsr[n] = '\0';
! 1273: sqlite3Fts3Dequote(zCsr);
! 1274: p->azColumn[iCol] = zCsr;
! 1275: zCsr += n+1;
! 1276: assert( zCsr <= &((char *)p)[nByte] );
! 1277: }
! 1278:
! 1279: if( (zCompress==0)!=(zUncompress==0) ){
! 1280: char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
! 1281: rc = SQLITE_ERROR;
! 1282: *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
! 1283: }
! 1284: p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
! 1285: p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
! 1286: if( rc!=SQLITE_OK ) goto fts3_init_out;
! 1287:
! 1288: /* If this is an xCreate call, create the underlying tables in the
! 1289: ** database. TODO: For xConnect(), it could verify that said tables exist.
! 1290: */
! 1291: if( isCreate ){
! 1292: rc = fts3CreateTables(p);
! 1293: }
! 1294:
! 1295: /* Figure out the page-size for the database. This is required in order to
! 1296: ** estimate the cost of loading large doclists from the database. */
! 1297: fts3DatabasePageSize(&rc, p);
! 1298: p->nNodeSize = p->nPgsz-35;
! 1299:
! 1300: /* Declare the table schema to SQLite. */
! 1301: fts3DeclareVtab(&rc, p);
! 1302:
! 1303: fts3_init_out:
! 1304: sqlite3_free(zPrefix);
! 1305: sqlite3_free(aIndex);
! 1306: sqlite3_free(zCompress);
! 1307: sqlite3_free(zUncompress);
! 1308: sqlite3_free(zContent);
! 1309: sqlite3_free((void *)aCol);
! 1310: if( rc!=SQLITE_OK ){
! 1311: if( p ){
! 1312: fts3DisconnectMethod((sqlite3_vtab *)p);
! 1313: }else if( pTokenizer ){
! 1314: pTokenizer->pModule->xDestroy(pTokenizer);
! 1315: }
! 1316: }else{
! 1317: assert( p->pSegments==0 );
! 1318: *ppVTab = &p->base;
! 1319: }
! 1320: return rc;
! 1321: }
! 1322:
! 1323: /*
! 1324: ** The xConnect() and xCreate() methods for the virtual table. All the
! 1325: ** work is done in function fts3InitVtab().
! 1326: */
! 1327: static int fts3ConnectMethod(
! 1328: sqlite3 *db, /* Database connection */
! 1329: void *pAux, /* Pointer to tokenizer hash table */
! 1330: int argc, /* Number of elements in argv array */
! 1331: const char * const *argv, /* xCreate/xConnect argument array */
! 1332: sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
! 1333: char **pzErr /* OUT: sqlite3_malloc'd error message */
! 1334: ){
! 1335: return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
! 1336: }
! 1337: static int fts3CreateMethod(
! 1338: sqlite3 *db, /* Database connection */
! 1339: void *pAux, /* Pointer to tokenizer hash table */
! 1340: int argc, /* Number of elements in argv array */
! 1341: const char * const *argv, /* xCreate/xConnect argument array */
! 1342: sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
! 1343: char **pzErr /* OUT: sqlite3_malloc'd error message */
! 1344: ){
! 1345: return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
! 1346: }
! 1347:
! 1348: /*
! 1349: ** Implementation of the xBestIndex method for FTS3 tables. There
! 1350: ** are three possible strategies, in order of preference:
! 1351: **
! 1352: ** 1. Direct lookup by rowid or docid.
! 1353: ** 2. Full-text search using a MATCH operator on a non-docid column.
! 1354: ** 3. Linear scan of %_content table.
! 1355: */
! 1356: static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
! 1357: Fts3Table *p = (Fts3Table *)pVTab;
! 1358: int i; /* Iterator variable */
! 1359: int iCons = -1; /* Index of constraint to use */
! 1360:
! 1361: /* By default use a full table scan. This is an expensive option,
! 1362: ** so search through the constraints to see if a more efficient
! 1363: ** strategy is possible.
! 1364: */
! 1365: pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
! 1366: pInfo->estimatedCost = 500000;
! 1367: for(i=0; i<pInfo->nConstraint; i++){
! 1368: struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
! 1369: if( pCons->usable==0 ) continue;
! 1370:
! 1371: /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
! 1372: if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
! 1373: && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
! 1374: ){
! 1375: pInfo->idxNum = FTS3_DOCID_SEARCH;
! 1376: pInfo->estimatedCost = 1.0;
! 1377: iCons = i;
! 1378: }
! 1379:
! 1380: /* A MATCH constraint. Use a full-text search.
! 1381: **
! 1382: ** If there is more than one MATCH constraint available, use the first
! 1383: ** one encountered. If there is both a MATCH constraint and a direct
! 1384: ** rowid/docid lookup, prefer the MATCH strategy. This is done even
! 1385: ** though the rowid/docid lookup is faster than a MATCH query, selecting
! 1386: ** it would lead to an "unable to use function MATCH in the requested
! 1387: ** context" error.
! 1388: */
! 1389: if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
! 1390: && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
! 1391: ){
! 1392: pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
! 1393: pInfo->estimatedCost = 2.0;
! 1394: iCons = i;
! 1395: break;
! 1396: }
! 1397: }
! 1398:
! 1399: if( iCons>=0 ){
! 1400: pInfo->aConstraintUsage[iCons].argvIndex = 1;
! 1401: pInfo->aConstraintUsage[iCons].omit = 1;
! 1402: }
! 1403:
! 1404: /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
! 1405: ** docid) order. Both ascending and descending are possible.
! 1406: */
! 1407: if( pInfo->nOrderBy==1 ){
! 1408: struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
! 1409: if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
! 1410: if( pOrder->desc ){
! 1411: pInfo->idxStr = "DESC";
! 1412: }else{
! 1413: pInfo->idxStr = "ASC";
! 1414: }
! 1415: pInfo->orderByConsumed = 1;
! 1416: }
! 1417: }
! 1418:
! 1419: assert( p->pSegments==0 );
! 1420: return SQLITE_OK;
! 1421: }
! 1422:
! 1423: /*
! 1424: ** Implementation of xOpen method.
! 1425: */
! 1426: static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
! 1427: sqlite3_vtab_cursor *pCsr; /* Allocated cursor */
! 1428:
! 1429: UNUSED_PARAMETER(pVTab);
! 1430:
! 1431: /* Allocate a buffer large enough for an Fts3Cursor structure. If the
! 1432: ** allocation succeeds, zero it and return SQLITE_OK. Otherwise,
! 1433: ** if the allocation fails, return SQLITE_NOMEM.
! 1434: */
! 1435: *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
! 1436: if( !pCsr ){
! 1437: return SQLITE_NOMEM;
! 1438: }
! 1439: memset(pCsr, 0, sizeof(Fts3Cursor));
! 1440: return SQLITE_OK;
! 1441: }
! 1442:
! 1443: /*
! 1444: ** Close the cursor. For additional information see the documentation
! 1445: ** on the xClose method of the virtual table interface.
! 1446: */
! 1447: static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
! 1448: Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
! 1449: assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
! 1450: sqlite3_finalize(pCsr->pStmt);
! 1451: sqlite3Fts3ExprFree(pCsr->pExpr);
! 1452: sqlite3Fts3FreeDeferredTokens(pCsr);
! 1453: sqlite3_free(pCsr->aDoclist);
! 1454: sqlite3_free(pCsr->aMatchinfo);
! 1455: assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
! 1456: sqlite3_free(pCsr);
! 1457: return SQLITE_OK;
! 1458: }
! 1459:
! 1460: /*
! 1461: ** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
! 1462: ** compose and prepare an SQL statement of the form:
! 1463: **
! 1464: ** "SELECT <columns> FROM %_content WHERE rowid = ?"
! 1465: **
! 1466: ** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
! 1467: ** it. If an error occurs, return an SQLite error code.
! 1468: **
! 1469: ** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
! 1470: */
! 1471: static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
! 1472: int rc = SQLITE_OK;
! 1473: if( pCsr->pStmt==0 ){
! 1474: Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
! 1475: char *zSql;
! 1476: zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
! 1477: if( !zSql ) return SQLITE_NOMEM;
! 1478: rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
! 1479: sqlite3_free(zSql);
! 1480: }
! 1481: *ppStmt = pCsr->pStmt;
! 1482: return rc;
! 1483: }
! 1484:
! 1485: /*
! 1486: ** Position the pCsr->pStmt statement so that it is on the row
! 1487: ** of the %_content table that contains the last match. Return
! 1488: ** SQLITE_OK on success.
! 1489: */
! 1490: static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
! 1491: int rc = SQLITE_OK;
! 1492: if( pCsr->isRequireSeek ){
! 1493: sqlite3_stmt *pStmt = 0;
! 1494:
! 1495: rc = fts3CursorSeekStmt(pCsr, &pStmt);
! 1496: if( rc==SQLITE_OK ){
! 1497: sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
! 1498: pCsr->isRequireSeek = 0;
! 1499: if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
! 1500: return SQLITE_OK;
! 1501: }else{
! 1502: rc = sqlite3_reset(pCsr->pStmt);
! 1503: if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
! 1504: /* If no row was found and no error has occured, then the %_content
! 1505: ** table is missing a row that is present in the full-text index.
! 1506: ** The data structures are corrupt. */
! 1507: rc = FTS_CORRUPT_VTAB;
! 1508: pCsr->isEof = 1;
! 1509: }
! 1510: }
! 1511: }
! 1512: }
! 1513:
! 1514: if( rc!=SQLITE_OK && pContext ){
! 1515: sqlite3_result_error_code(pContext, rc);
! 1516: }
! 1517: return rc;
! 1518: }
! 1519:
! 1520: /*
! 1521: ** This function is used to process a single interior node when searching
! 1522: ** a b-tree for a term or term prefix. The node data is passed to this
! 1523: ** function via the zNode/nNode parameters. The term to search for is
! 1524: ** passed in zTerm/nTerm.
! 1525: **
! 1526: ** If piFirst is not NULL, then this function sets *piFirst to the blockid
! 1527: ** of the child node that heads the sub-tree that may contain the term.
! 1528: **
! 1529: ** If piLast is not NULL, then *piLast is set to the right-most child node
! 1530: ** that heads a sub-tree that may contain a term for which zTerm/nTerm is
! 1531: ** a prefix.
! 1532: **
! 1533: ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
! 1534: */
! 1535: static int fts3ScanInteriorNode(
! 1536: const char *zTerm, /* Term to select leaves for */
! 1537: int nTerm, /* Size of term zTerm in bytes */
! 1538: const char *zNode, /* Buffer containing segment interior node */
! 1539: int nNode, /* Size of buffer at zNode */
! 1540: sqlite3_int64 *piFirst, /* OUT: Selected child node */
! 1541: sqlite3_int64 *piLast /* OUT: Selected child node */
! 1542: ){
! 1543: int rc = SQLITE_OK; /* Return code */
! 1544: const char *zCsr = zNode; /* Cursor to iterate through node */
! 1545: const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
! 1546: char *zBuffer = 0; /* Buffer to load terms into */
! 1547: int nAlloc = 0; /* Size of allocated buffer */
! 1548: int isFirstTerm = 1; /* True when processing first term on page */
! 1549: sqlite3_int64 iChild; /* Block id of child node to descend to */
! 1550:
! 1551: /* Skip over the 'height' varint that occurs at the start of every
! 1552: ** interior node. Then load the blockid of the left-child of the b-tree
! 1553: ** node into variable iChild.
! 1554: **
! 1555: ** Even if the data structure on disk is corrupted, this (reading two
! 1556: ** varints from the buffer) does not risk an overread. If zNode is a
! 1557: ** root node, then the buffer comes from a SELECT statement. SQLite does
! 1558: ** not make this guarantee explicitly, but in practice there are always
! 1559: ** either more than 20 bytes of allocated space following the nNode bytes of
! 1560: ** contents, or two zero bytes. Or, if the node is read from the %_segments
! 1561: ** table, then there are always 20 bytes of zeroed padding following the
! 1562: ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
! 1563: */
! 1564: zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
! 1565: zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
! 1566: if( zCsr>zEnd ){
! 1567: return FTS_CORRUPT_VTAB;
! 1568: }
! 1569:
! 1570: while( zCsr<zEnd && (piFirst || piLast) ){
! 1571: int cmp; /* memcmp() result */
! 1572: int nSuffix; /* Size of term suffix */
! 1573: int nPrefix = 0; /* Size of term prefix */
! 1574: int nBuffer; /* Total term size */
! 1575:
! 1576: /* Load the next term on the node into zBuffer. Use realloc() to expand
! 1577: ** the size of zBuffer if required. */
! 1578: if( !isFirstTerm ){
! 1579: zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
! 1580: }
! 1581: isFirstTerm = 0;
! 1582: zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
! 1583:
! 1584: if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
! 1585: rc = FTS_CORRUPT_VTAB;
! 1586: goto finish_scan;
! 1587: }
! 1588: if( nPrefix+nSuffix>nAlloc ){
! 1589: char *zNew;
! 1590: nAlloc = (nPrefix+nSuffix) * 2;
! 1591: zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
! 1592: if( !zNew ){
! 1593: rc = SQLITE_NOMEM;
! 1594: goto finish_scan;
! 1595: }
! 1596: zBuffer = zNew;
! 1597: }
! 1598: assert( zBuffer );
! 1599: memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
! 1600: nBuffer = nPrefix + nSuffix;
! 1601: zCsr += nSuffix;
! 1602:
! 1603: /* Compare the term we are searching for with the term just loaded from
! 1604: ** the interior node. If the specified term is greater than or equal
! 1605: ** to the term from the interior node, then all terms on the sub-tree
! 1606: ** headed by node iChild are smaller than zTerm. No need to search
! 1607: ** iChild.
! 1608: **
! 1609: ** If the interior node term is larger than the specified term, then
! 1610: ** the tree headed by iChild may contain the specified term.
! 1611: */
! 1612: cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
! 1613: if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
! 1614: *piFirst = iChild;
! 1615: piFirst = 0;
! 1616: }
! 1617:
! 1618: if( piLast && cmp<0 ){
! 1619: *piLast = iChild;
! 1620: piLast = 0;
! 1621: }
! 1622:
! 1623: iChild++;
! 1624: };
! 1625:
! 1626: if( piFirst ) *piFirst = iChild;
! 1627: if( piLast ) *piLast = iChild;
! 1628:
! 1629: finish_scan:
! 1630: sqlite3_free(zBuffer);
! 1631: return rc;
! 1632: }
! 1633:
! 1634:
! 1635: /*
! 1636: ** The buffer pointed to by argument zNode (size nNode bytes) contains an
! 1637: ** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
! 1638: ** contains a term. This function searches the sub-tree headed by the zNode
! 1639: ** node for the range of leaf nodes that may contain the specified term
! 1640: ** or terms for which the specified term is a prefix.
! 1641: **
! 1642: ** If piLeaf is not NULL, then *piLeaf is set to the blockid of the
! 1643: ** left-most leaf node in the tree that may contain the specified term.
! 1644: ** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
! 1645: ** right-most leaf node that may contain a term for which the specified
! 1646: ** term is a prefix.
! 1647: **
! 1648: ** It is possible that the range of returned leaf nodes does not contain
! 1649: ** the specified term or any terms for which it is a prefix. However, if the
! 1650: ** segment does contain any such terms, they are stored within the identified
! 1651: ** range. Because this function only inspects interior segment nodes (and
! 1652: ** never loads leaf nodes into memory), it is not possible to be sure.
! 1653: **
! 1654: ** If an error occurs, an error code other than SQLITE_OK is returned.
! 1655: */
! 1656: static int fts3SelectLeaf(
! 1657: Fts3Table *p, /* Virtual table handle */
! 1658: const char *zTerm, /* Term to select leaves for */
! 1659: int nTerm, /* Size of term zTerm in bytes */
! 1660: const char *zNode, /* Buffer containing segment interior node */
! 1661: int nNode, /* Size of buffer at zNode */
! 1662: sqlite3_int64 *piLeaf, /* Selected leaf node */
! 1663: sqlite3_int64 *piLeaf2 /* Selected leaf node */
! 1664: ){
! 1665: int rc; /* Return code */
! 1666: int iHeight; /* Height of this node in tree */
! 1667:
! 1668: assert( piLeaf || piLeaf2 );
! 1669:
! 1670: sqlite3Fts3GetVarint32(zNode, &iHeight);
! 1671: rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
! 1672: assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
! 1673:
! 1674: if( rc==SQLITE_OK && iHeight>1 ){
! 1675: char *zBlob = 0; /* Blob read from %_segments table */
! 1676: int nBlob; /* Size of zBlob in bytes */
! 1677:
! 1678: if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
! 1679: rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
! 1680: if( rc==SQLITE_OK ){
! 1681: rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
! 1682: }
! 1683: sqlite3_free(zBlob);
! 1684: piLeaf = 0;
! 1685: zBlob = 0;
! 1686: }
! 1687:
! 1688: if( rc==SQLITE_OK ){
! 1689: rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
! 1690: }
! 1691: if( rc==SQLITE_OK ){
! 1692: rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
! 1693: }
! 1694: sqlite3_free(zBlob);
! 1695: }
! 1696:
! 1697: return rc;
! 1698: }
! 1699:
! 1700: /*
! 1701: ** This function is used to create delta-encoded serialized lists of FTS3
! 1702: ** varints. Each call to this function appends a single varint to a list.
! 1703: */
! 1704: static void fts3PutDeltaVarint(
! 1705: char **pp, /* IN/OUT: Output pointer */
! 1706: sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
! 1707: sqlite3_int64 iVal /* Write this value to the list */
! 1708: ){
! 1709: assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
! 1710: *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
! 1711: *piPrev = iVal;
! 1712: }
! 1713:
! 1714: /*
! 1715: ** When this function is called, *ppPoslist is assumed to point to the
! 1716: ** start of a position-list. After it returns, *ppPoslist points to the
! 1717: ** first byte after the position-list.
! 1718: **
! 1719: ** A position list is list of positions (delta encoded) and columns for
! 1720: ** a single document record of a doclist. So, in other words, this
! 1721: ** routine advances *ppPoslist so that it points to the next docid in
! 1722: ** the doclist, or to the first byte past the end of the doclist.
! 1723: **
! 1724: ** If pp is not NULL, then the contents of the position list are copied
! 1725: ** to *pp. *pp is set to point to the first byte past the last byte copied
! 1726: ** before this function returns.
! 1727: */
! 1728: static void fts3PoslistCopy(char **pp, char **ppPoslist){
! 1729: char *pEnd = *ppPoslist;
! 1730: char c = 0;
! 1731:
! 1732: /* The end of a position list is marked by a zero encoded as an FTS3
! 1733: ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
! 1734: ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
! 1735: ** of some other, multi-byte, value.
! 1736: **
! 1737: ** The following while-loop moves pEnd to point to the first byte that is not
! 1738: ** immediately preceded by a byte with the 0x80 bit set. Then increments
! 1739: ** pEnd once more so that it points to the byte immediately following the
! 1740: ** last byte in the position-list.
! 1741: */
! 1742: while( *pEnd | c ){
! 1743: c = *pEnd++ & 0x80;
! 1744: testcase( c!=0 && (*pEnd)==0 );
! 1745: }
! 1746: pEnd++; /* Advance past the POS_END terminator byte */
! 1747:
! 1748: if( pp ){
! 1749: int n = (int)(pEnd - *ppPoslist);
! 1750: char *p = *pp;
! 1751: memcpy(p, *ppPoslist, n);
! 1752: p += n;
! 1753: *pp = p;
! 1754: }
! 1755: *ppPoslist = pEnd;
! 1756: }
! 1757:
! 1758: /*
! 1759: ** When this function is called, *ppPoslist is assumed to point to the
! 1760: ** start of a column-list. After it returns, *ppPoslist points to the
! 1761: ** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
! 1762: **
! 1763: ** A column-list is list of delta-encoded positions for a single column
! 1764: ** within a single document within a doclist.
! 1765: **
! 1766: ** The column-list is terminated either by a POS_COLUMN varint (1) or
! 1767: ** a POS_END varint (0). This routine leaves *ppPoslist pointing to
! 1768: ** the POS_COLUMN or POS_END that terminates the column-list.
! 1769: **
! 1770: ** If pp is not NULL, then the contents of the column-list are copied
! 1771: ** to *pp. *pp is set to point to the first byte past the last byte copied
! 1772: ** before this function returns. The POS_COLUMN or POS_END terminator
! 1773: ** is not copied into *pp.
! 1774: */
! 1775: static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
! 1776: char *pEnd = *ppPoslist;
! 1777: char c = 0;
! 1778:
! 1779: /* A column-list is terminated by either a 0x01 or 0x00 byte that is
! 1780: ** not part of a multi-byte varint.
! 1781: */
! 1782: while( 0xFE & (*pEnd | c) ){
! 1783: c = *pEnd++ & 0x80;
! 1784: testcase( c!=0 && ((*pEnd)&0xfe)==0 );
! 1785: }
! 1786: if( pp ){
! 1787: int n = (int)(pEnd - *ppPoslist);
! 1788: char *p = *pp;
! 1789: memcpy(p, *ppPoslist, n);
! 1790: p += n;
! 1791: *pp = p;
! 1792: }
! 1793: *ppPoslist = pEnd;
! 1794: }
! 1795:
! 1796: /*
! 1797: ** Value used to signify the end of an position-list. This is safe because
! 1798: ** it is not possible to have a document with 2^31 terms.
! 1799: */
! 1800: #define POSITION_LIST_END 0x7fffffff
! 1801:
! 1802: /*
! 1803: ** This function is used to help parse position-lists. When this function is
! 1804: ** called, *pp may point to the start of the next varint in the position-list
! 1805: ** being parsed, or it may point to 1 byte past the end of the position-list
! 1806: ** (in which case **pp will be a terminator bytes POS_END (0) or
! 1807: ** (1)).
! 1808: **
! 1809: ** If *pp points past the end of the current position-list, set *pi to
! 1810: ** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
! 1811: ** increment the current value of *pi by the value read, and set *pp to
! 1812: ** point to the next value before returning.
! 1813: **
! 1814: ** Before calling this routine *pi must be initialized to the value of
! 1815: ** the previous position, or zero if we are reading the first position
! 1816: ** in the position-list. Because positions are delta-encoded, the value
! 1817: ** of the previous position is needed in order to compute the value of
! 1818: ** the next position.
! 1819: */
! 1820: static void fts3ReadNextPos(
! 1821: char **pp, /* IN/OUT: Pointer into position-list buffer */
! 1822: sqlite3_int64 *pi /* IN/OUT: Value read from position-list */
! 1823: ){
! 1824: if( (**pp)&0xFE ){
! 1825: fts3GetDeltaVarint(pp, pi);
! 1826: *pi -= 2;
! 1827: }else{
! 1828: *pi = POSITION_LIST_END;
! 1829: }
! 1830: }
! 1831:
! 1832: /*
! 1833: ** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
! 1834: ** the value of iCol encoded as a varint to *pp. This will start a new
! 1835: ** column list.
! 1836: **
! 1837: ** Set *pp to point to the byte just after the last byte written before
! 1838: ** returning (do not modify it if iCol==0). Return the total number of bytes
! 1839: ** written (0 if iCol==0).
! 1840: */
! 1841: static int fts3PutColNumber(char **pp, int iCol){
! 1842: int n = 0; /* Number of bytes written */
! 1843: if( iCol ){
! 1844: char *p = *pp; /* Output pointer */
! 1845: n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
! 1846: *p = 0x01;
! 1847: *pp = &p[n];
! 1848: }
! 1849: return n;
! 1850: }
! 1851:
! 1852: /*
! 1853: ** Compute the union of two position lists. The output written
! 1854: ** into *pp contains all positions of both *pp1 and *pp2 in sorted
! 1855: ** order and with any duplicates removed. All pointers are
! 1856: ** updated appropriately. The caller is responsible for insuring
! 1857: ** that there is enough space in *pp to hold the complete output.
! 1858: */
! 1859: static void fts3PoslistMerge(
! 1860: char **pp, /* Output buffer */
! 1861: char **pp1, /* Left input list */
! 1862: char **pp2 /* Right input list */
! 1863: ){
! 1864: char *p = *pp;
! 1865: char *p1 = *pp1;
! 1866: char *p2 = *pp2;
! 1867:
! 1868: while( *p1 || *p2 ){
! 1869: int iCol1; /* The current column index in pp1 */
! 1870: int iCol2; /* The current column index in pp2 */
! 1871:
! 1872: if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
! 1873: else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
! 1874: else iCol1 = 0;
! 1875:
! 1876: if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);
! 1877: else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
! 1878: else iCol2 = 0;
! 1879:
! 1880: if( iCol1==iCol2 ){
! 1881: sqlite3_int64 i1 = 0; /* Last position from pp1 */
! 1882: sqlite3_int64 i2 = 0; /* Last position from pp2 */
! 1883: sqlite3_int64 iPrev = 0;
! 1884: int n = fts3PutColNumber(&p, iCol1);
! 1885: p1 += n;
! 1886: p2 += n;
! 1887:
! 1888: /* At this point, both p1 and p2 point to the start of column-lists
! 1889: ** for the same column (the column with index iCol1 and iCol2).
! 1890: ** A column-list is a list of non-negative delta-encoded varints, each
! 1891: ** incremented by 2 before being stored. Each list is terminated by a
! 1892: ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
! 1893: ** and writes the results to buffer p. p is left pointing to the byte
! 1894: ** after the list written. No terminator (POS_END or POS_COLUMN) is
! 1895: ** written to the output.
! 1896: */
! 1897: fts3GetDeltaVarint(&p1, &i1);
! 1898: fts3GetDeltaVarint(&p2, &i2);
! 1899: do {
! 1900: fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2);
! 1901: iPrev -= 2;
! 1902: if( i1==i2 ){
! 1903: fts3ReadNextPos(&p1, &i1);
! 1904: fts3ReadNextPos(&p2, &i2);
! 1905: }else if( i1<i2 ){
! 1906: fts3ReadNextPos(&p1, &i1);
! 1907: }else{
! 1908: fts3ReadNextPos(&p2, &i2);
! 1909: }
! 1910: }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
! 1911: }else if( iCol1<iCol2 ){
! 1912: p1 += fts3PutColNumber(&p, iCol1);
! 1913: fts3ColumnlistCopy(&p, &p1);
! 1914: }else{
! 1915: p2 += fts3PutColNumber(&p, iCol2);
! 1916: fts3ColumnlistCopy(&p, &p2);
! 1917: }
! 1918: }
! 1919:
! 1920: *p++ = POS_END;
! 1921: *pp = p;
! 1922: *pp1 = p1 + 1;
! 1923: *pp2 = p2 + 1;
! 1924: }
! 1925:
! 1926: /*
! 1927: ** This function is used to merge two position lists into one. When it is
! 1928: ** called, *pp1 and *pp2 must both point to position lists. A position-list is
! 1929: ** the part of a doclist that follows each document id. For example, if a row
! 1930: ** contains:
! 1931: **
! 1932: ** 'a b c'|'x y z'|'a b b a'
! 1933: **
! 1934: ** Then the position list for this row for token 'b' would consist of:
! 1935: **
! 1936: ** 0x02 0x01 0x02 0x03 0x03 0x00
! 1937: **
! 1938: ** When this function returns, both *pp1 and *pp2 are left pointing to the
! 1939: ** byte following the 0x00 terminator of their respective position lists.
! 1940: **
! 1941: ** If isSaveLeft is 0, an entry is added to the output position list for
! 1942: ** each position in *pp2 for which there exists one or more positions in
! 1943: ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
! 1944: ** when the *pp1 token appears before the *pp2 token, but not more than nToken
! 1945: ** slots before it.
! 1946: **
! 1947: ** e.g. nToken==1 searches for adjacent positions.
! 1948: */
! 1949: static int fts3PoslistPhraseMerge(
! 1950: char **pp, /* IN/OUT: Preallocated output buffer */
! 1951: int nToken, /* Maximum difference in token positions */
! 1952: int isSaveLeft, /* Save the left position */
! 1953: int isExact, /* If *pp1 is exactly nTokens before *pp2 */
! 1954: char **pp1, /* IN/OUT: Left input list */
! 1955: char **pp2 /* IN/OUT: Right input list */
! 1956: ){
! 1957: char *p = *pp;
! 1958: char *p1 = *pp1;
! 1959: char *p2 = *pp2;
! 1960: int iCol1 = 0;
! 1961: int iCol2 = 0;
! 1962:
! 1963: /* Never set both isSaveLeft and isExact for the same invocation. */
! 1964: assert( isSaveLeft==0 || isExact==0 );
! 1965:
! 1966: assert( p!=0 && *p1!=0 && *p2!=0 );
! 1967: if( *p1==POS_COLUMN ){
! 1968: p1++;
! 1969: p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
! 1970: }
! 1971: if( *p2==POS_COLUMN ){
! 1972: p2++;
! 1973: p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
! 1974: }
! 1975:
! 1976: while( 1 ){
! 1977: if( iCol1==iCol2 ){
! 1978: char *pSave = p;
! 1979: sqlite3_int64 iPrev = 0;
! 1980: sqlite3_int64 iPos1 = 0;
! 1981: sqlite3_int64 iPos2 = 0;
! 1982:
! 1983: if( iCol1 ){
! 1984: *p++ = POS_COLUMN;
! 1985: p += sqlite3Fts3PutVarint(p, iCol1);
! 1986: }
! 1987:
! 1988: assert( *p1!=POS_END && *p1!=POS_COLUMN );
! 1989: assert( *p2!=POS_END && *p2!=POS_COLUMN );
! 1990: fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
! 1991: fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
! 1992:
! 1993: while( 1 ){
! 1994: if( iPos2==iPos1+nToken
! 1995: || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
! 1996: ){
! 1997: sqlite3_int64 iSave;
! 1998: iSave = isSaveLeft ? iPos1 : iPos2;
! 1999: fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
! 2000: pSave = 0;
! 2001: assert( p );
! 2002: }
! 2003: if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
! 2004: if( (*p2&0xFE)==0 ) break;
! 2005: fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
! 2006: }else{
! 2007: if( (*p1&0xFE)==0 ) break;
! 2008: fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
! 2009: }
! 2010: }
! 2011:
! 2012: if( pSave ){
! 2013: assert( pp && p );
! 2014: p = pSave;
! 2015: }
! 2016:
! 2017: fts3ColumnlistCopy(0, &p1);
! 2018: fts3ColumnlistCopy(0, &p2);
! 2019: assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
! 2020: if( 0==*p1 || 0==*p2 ) break;
! 2021:
! 2022: p1++;
! 2023: p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
! 2024: p2++;
! 2025: p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
! 2026: }
! 2027:
! 2028: /* Advance pointer p1 or p2 (whichever corresponds to the smaller of
! 2029: ** iCol1 and iCol2) so that it points to either the 0x00 that marks the
! 2030: ** end of the position list, or the 0x01 that precedes the next
! 2031: ** column-number in the position list.
! 2032: */
! 2033: else if( iCol1<iCol2 ){
! 2034: fts3ColumnlistCopy(0, &p1);
! 2035: if( 0==*p1 ) break;
! 2036: p1++;
! 2037: p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
! 2038: }else{
! 2039: fts3ColumnlistCopy(0, &p2);
! 2040: if( 0==*p2 ) break;
! 2041: p2++;
! 2042: p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
! 2043: }
! 2044: }
! 2045:
! 2046: fts3PoslistCopy(0, &p2);
! 2047: fts3PoslistCopy(0, &p1);
! 2048: *pp1 = p1;
! 2049: *pp2 = p2;
! 2050: if( *pp==p ){
! 2051: return 0;
! 2052: }
! 2053: *p++ = 0x00;
! 2054: *pp = p;
! 2055: return 1;
! 2056: }
! 2057:
! 2058: /*
! 2059: ** Merge two position-lists as required by the NEAR operator. The argument
! 2060: ** position lists correspond to the left and right phrases of an expression
! 2061: ** like:
! 2062: **
! 2063: ** "phrase 1" NEAR "phrase number 2"
! 2064: **
! 2065: ** Position list *pp1 corresponds to the left-hand side of the NEAR
! 2066: ** expression and *pp2 to the right. As usual, the indexes in the position
! 2067: ** lists are the offsets of the last token in each phrase (tokens "1" and "2"
! 2068: ** in the example above).
! 2069: **
! 2070: ** The output position list - written to *pp - is a copy of *pp2 with those
! 2071: ** entries that are not sufficiently NEAR entries in *pp1 removed.
! 2072: */
! 2073: static int fts3PoslistNearMerge(
! 2074: char **pp, /* Output buffer */
! 2075: char *aTmp, /* Temporary buffer space */
! 2076: int nRight, /* Maximum difference in token positions */
! 2077: int nLeft, /* Maximum difference in token positions */
! 2078: char **pp1, /* IN/OUT: Left input list */
! 2079: char **pp2 /* IN/OUT: Right input list */
! 2080: ){
! 2081: char *p1 = *pp1;
! 2082: char *p2 = *pp2;
! 2083:
! 2084: char *pTmp1 = aTmp;
! 2085: char *pTmp2;
! 2086: char *aTmp2;
! 2087: int res = 1;
! 2088:
! 2089: fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
! 2090: aTmp2 = pTmp2 = pTmp1;
! 2091: *pp1 = p1;
! 2092: *pp2 = p2;
! 2093: fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
! 2094: if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
! 2095: fts3PoslistMerge(pp, &aTmp, &aTmp2);
! 2096: }else if( pTmp1!=aTmp ){
! 2097: fts3PoslistCopy(pp, &aTmp);
! 2098: }else if( pTmp2!=aTmp2 ){
! 2099: fts3PoslistCopy(pp, &aTmp2);
! 2100: }else{
! 2101: res = 0;
! 2102: }
! 2103:
! 2104: return res;
! 2105: }
! 2106:
! 2107: /*
! 2108: ** An instance of this function is used to merge together the (potentially
! 2109: ** large number of) doclists for each term that matches a prefix query.
! 2110: ** See function fts3TermSelectMerge() for details.
! 2111: */
! 2112: typedef struct TermSelect TermSelect;
! 2113: struct TermSelect {
! 2114: char *aaOutput[16]; /* Malloc'd output buffers */
! 2115: int anOutput[16]; /* Size each output buffer in bytes */
! 2116: };
! 2117:
! 2118: /*
! 2119: ** This function is used to read a single varint from a buffer. Parameter
! 2120: ** pEnd points 1 byte past the end of the buffer. When this function is
! 2121: ** called, if *pp points to pEnd or greater, then the end of the buffer
! 2122: ** has been reached. In this case *pp is set to 0 and the function returns.
! 2123: **
! 2124: ** If *pp does not point to or past pEnd, then a single varint is read
! 2125: ** from *pp. *pp is then set to point 1 byte past the end of the read varint.
! 2126: **
! 2127: ** If bDescIdx is false, the value read is added to *pVal before returning.
! 2128: ** If it is true, the value read is subtracted from *pVal before this
! 2129: ** function returns.
! 2130: */
! 2131: static void fts3GetDeltaVarint3(
! 2132: char **pp, /* IN/OUT: Point to read varint from */
! 2133: char *pEnd, /* End of buffer */
! 2134: int bDescIdx, /* True if docids are descending */
! 2135: sqlite3_int64 *pVal /* IN/OUT: Integer value */
! 2136: ){
! 2137: if( *pp>=pEnd ){
! 2138: *pp = 0;
! 2139: }else{
! 2140: sqlite3_int64 iVal;
! 2141: *pp += sqlite3Fts3GetVarint(*pp, &iVal);
! 2142: if( bDescIdx ){
! 2143: *pVal -= iVal;
! 2144: }else{
! 2145: *pVal += iVal;
! 2146: }
! 2147: }
! 2148: }
! 2149:
! 2150: /*
! 2151: ** This function is used to write a single varint to a buffer. The varint
! 2152: ** is written to *pp. Before returning, *pp is set to point 1 byte past the
! 2153: ** end of the value written.
! 2154: **
! 2155: ** If *pbFirst is zero when this function is called, the value written to
! 2156: ** the buffer is that of parameter iVal.
! 2157: **
! 2158: ** If *pbFirst is non-zero when this function is called, then the value
! 2159: ** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
! 2160: ** (if bDescIdx is non-zero).
! 2161: **
! 2162: ** Before returning, this function always sets *pbFirst to 1 and *piPrev
! 2163: ** to the value of parameter iVal.
! 2164: */
! 2165: static void fts3PutDeltaVarint3(
! 2166: char **pp, /* IN/OUT: Output pointer */
! 2167: int bDescIdx, /* True for descending docids */
! 2168: sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
! 2169: int *pbFirst, /* IN/OUT: True after first int written */
! 2170: sqlite3_int64 iVal /* Write this value to the list */
! 2171: ){
! 2172: sqlite3_int64 iWrite;
! 2173: if( bDescIdx==0 || *pbFirst==0 ){
! 2174: iWrite = iVal - *piPrev;
! 2175: }else{
! 2176: iWrite = *piPrev - iVal;
! 2177: }
! 2178: assert( *pbFirst || *piPrev==0 );
! 2179: assert( *pbFirst==0 || iWrite>0 );
! 2180: *pp += sqlite3Fts3PutVarint(*pp, iWrite);
! 2181: *piPrev = iVal;
! 2182: *pbFirst = 1;
! 2183: }
! 2184:
! 2185:
! 2186: /*
! 2187: ** This macro is used by various functions that merge doclists. The two
! 2188: ** arguments are 64-bit docid values. If the value of the stack variable
! 2189: ** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2).
! 2190: ** Otherwise, (i2-i1).
! 2191: **
! 2192: ** Using this makes it easier to write code that can merge doclists that are
! 2193: ** sorted in either ascending or descending order.
! 2194: */
! 2195: #define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
! 2196:
! 2197: /*
! 2198: ** This function does an "OR" merge of two doclists (output contains all
! 2199: ** positions contained in either argument doclist). If the docids in the
! 2200: ** input doclists are sorted in ascending order, parameter bDescDoclist
! 2201: ** should be false. If they are sorted in ascending order, it should be
! 2202: ** passed a non-zero value.
! 2203: **
! 2204: ** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
! 2205: ** containing the output doclist and SQLITE_OK is returned. In this case
! 2206: ** *pnOut is set to the number of bytes in the output doclist.
! 2207: **
! 2208: ** If an error occurs, an SQLite error code is returned. The output values
! 2209: ** are undefined in this case.
! 2210: */
! 2211: static int fts3DoclistOrMerge(
! 2212: int bDescDoclist, /* True if arguments are desc */
! 2213: char *a1, int n1, /* First doclist */
! 2214: char *a2, int n2, /* Second doclist */
! 2215: char **paOut, int *pnOut /* OUT: Malloc'd doclist */
! 2216: ){
! 2217: sqlite3_int64 i1 = 0;
! 2218: sqlite3_int64 i2 = 0;
! 2219: sqlite3_int64 iPrev = 0;
! 2220: char *pEnd1 = &a1[n1];
! 2221: char *pEnd2 = &a2[n2];
! 2222: char *p1 = a1;
! 2223: char *p2 = a2;
! 2224: char *p;
! 2225: char *aOut;
! 2226: int bFirstOut = 0;
! 2227:
! 2228: *paOut = 0;
! 2229: *pnOut = 0;
! 2230:
! 2231: /* Allocate space for the output. Both the input and output doclists
! 2232: ** are delta encoded. If they are in ascending order (bDescDoclist==0),
! 2233: ** then the first docid in each list is simply encoded as a varint. For
! 2234: ** each subsequent docid, the varint stored is the difference between the
! 2235: ** current and previous docid (a positive number - since the list is in
! 2236: ** ascending order).
! 2237: **
! 2238: ** The first docid written to the output is therefore encoded using the
! 2239: ** same number of bytes as it is in whichever of the input lists it is
! 2240: ** read from. And each subsequent docid read from the same input list
! 2241: ** consumes either the same or less bytes as it did in the input (since
! 2242: ** the difference between it and the previous value in the output must
! 2243: ** be a positive value less than or equal to the delta value read from
! 2244: ** the input list). The same argument applies to all but the first docid
! 2245: ** read from the 'other' list. And to the contents of all position lists
! 2246: ** that will be copied and merged from the input to the output.
! 2247: **
! 2248: ** However, if the first docid copied to the output is a negative number,
! 2249: ** then the encoding of the first docid from the 'other' input list may
! 2250: ** be larger in the output than it was in the input (since the delta value
! 2251: ** may be a larger positive integer than the actual docid).
! 2252: **
! 2253: ** The space required to store the output is therefore the sum of the
! 2254: ** sizes of the two inputs, plus enough space for exactly one of the input
! 2255: ** docids to grow.
! 2256: **
! 2257: ** A symetric argument may be made if the doclists are in descending
! 2258: ** order.
! 2259: */
! 2260: aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
! 2261: if( !aOut ) return SQLITE_NOMEM;
! 2262:
! 2263: p = aOut;
! 2264: fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
! 2265: fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
! 2266: while( p1 || p2 ){
! 2267: sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
! 2268:
! 2269: if( p2 && p1 && iDiff==0 ){
! 2270: fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
! 2271: fts3PoslistMerge(&p, &p1, &p2);
! 2272: fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
! 2273: fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
! 2274: }else if( !p2 || (p1 && iDiff<0) ){
! 2275: fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
! 2276: fts3PoslistCopy(&p, &p1);
! 2277: fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
! 2278: }else{
! 2279: fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
! 2280: fts3PoslistCopy(&p, &p2);
! 2281: fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
! 2282: }
! 2283: }
! 2284:
! 2285: *paOut = aOut;
! 2286: *pnOut = (p-aOut);
! 2287: assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
! 2288: return SQLITE_OK;
! 2289: }
! 2290:
! 2291: /*
! 2292: ** This function does a "phrase" merge of two doclists. In a phrase merge,
! 2293: ** the output contains a copy of each position from the right-hand input
! 2294: ** doclist for which there is a position in the left-hand input doclist
! 2295: ** exactly nDist tokens before it.
! 2296: **
! 2297: ** If the docids in the input doclists are sorted in ascending order,
! 2298: ** parameter bDescDoclist should be false. If they are sorted in ascending
! 2299: ** order, it should be passed a non-zero value.
! 2300: **
! 2301: ** The right-hand input doclist is overwritten by this function.
! 2302: */
! 2303: static void fts3DoclistPhraseMerge(
! 2304: int bDescDoclist, /* True if arguments are desc */
! 2305: int nDist, /* Distance from left to right (1=adjacent) */
! 2306: char *aLeft, int nLeft, /* Left doclist */
! 2307: char *aRight, int *pnRight /* IN/OUT: Right/output doclist */
! 2308: ){
! 2309: sqlite3_int64 i1 = 0;
! 2310: sqlite3_int64 i2 = 0;
! 2311: sqlite3_int64 iPrev = 0;
! 2312: char *pEnd1 = &aLeft[nLeft];
! 2313: char *pEnd2 = &aRight[*pnRight];
! 2314: char *p1 = aLeft;
! 2315: char *p2 = aRight;
! 2316: char *p;
! 2317: int bFirstOut = 0;
! 2318: char *aOut = aRight;
! 2319:
! 2320: assert( nDist>0 );
! 2321:
! 2322: p = aOut;
! 2323: fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
! 2324: fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
! 2325:
! 2326: while( p1 && p2 ){
! 2327: sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
! 2328: if( iDiff==0 ){
! 2329: char *pSave = p;
! 2330: sqlite3_int64 iPrevSave = iPrev;
! 2331: int bFirstOutSave = bFirstOut;
! 2332:
! 2333: fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
! 2334: if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
! 2335: p = pSave;
! 2336: iPrev = iPrevSave;
! 2337: bFirstOut = bFirstOutSave;
! 2338: }
! 2339: fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
! 2340: fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
! 2341: }else if( iDiff<0 ){
! 2342: fts3PoslistCopy(0, &p1);
! 2343: fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
! 2344: }else{
! 2345: fts3PoslistCopy(0, &p2);
! 2346: fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
! 2347: }
! 2348: }
! 2349:
! 2350: *pnRight = p - aOut;
! 2351: }
! 2352:
! 2353: /*
! 2354: ** Argument pList points to a position list nList bytes in size. This
! 2355: ** function checks to see if the position list contains any entries for
! 2356: ** a token in position 0 (of any column). If so, it writes argument iDelta
! 2357: ** to the output buffer pOut, followed by a position list consisting only
! 2358: ** of the entries from pList at position 0, and terminated by an 0x00 byte.
! 2359: ** The value returned is the number of bytes written to pOut (if any).
! 2360: */
! 2361: int sqlite3Fts3FirstFilter(
! 2362: sqlite3_int64 iDelta, /* Varint that may be written to pOut */
! 2363: char *pList, /* Position list (no 0x00 term) */
! 2364: int nList, /* Size of pList in bytes */
! 2365: char *pOut /* Write output here */
! 2366: ){
! 2367: int nOut = 0;
! 2368: int bWritten = 0; /* True once iDelta has been written */
! 2369: char *p = pList;
! 2370: char *pEnd = &pList[nList];
! 2371:
! 2372: if( *p!=0x01 ){
! 2373: if( *p==0x02 ){
! 2374: nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
! 2375: pOut[nOut++] = 0x02;
! 2376: bWritten = 1;
! 2377: }
! 2378: fts3ColumnlistCopy(0, &p);
! 2379: }
! 2380:
! 2381: while( p<pEnd && *p==0x01 ){
! 2382: sqlite3_int64 iCol;
! 2383: p++;
! 2384: p += sqlite3Fts3GetVarint(p, &iCol);
! 2385: if( *p==0x02 ){
! 2386: if( bWritten==0 ){
! 2387: nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
! 2388: bWritten = 1;
! 2389: }
! 2390: pOut[nOut++] = 0x01;
! 2391: nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
! 2392: pOut[nOut++] = 0x02;
! 2393: }
! 2394: fts3ColumnlistCopy(0, &p);
! 2395: }
! 2396: if( bWritten ){
! 2397: pOut[nOut++] = 0x00;
! 2398: }
! 2399:
! 2400: return nOut;
! 2401: }
! 2402:
! 2403:
! 2404: /*
! 2405: ** Merge all doclists in the TermSelect.aaOutput[] array into a single
! 2406: ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
! 2407: ** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
! 2408: **
! 2409: ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
! 2410: ** the responsibility of the caller to free any doclists left in the
! 2411: ** TermSelect.aaOutput[] array.
! 2412: */
! 2413: static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
! 2414: char *aOut = 0;
! 2415: int nOut = 0;
! 2416: int i;
! 2417:
! 2418: /* Loop through the doclists in the aaOutput[] array. Merge them all
! 2419: ** into a single doclist.
! 2420: */
! 2421: for(i=0; i<SizeofArray(pTS->aaOutput); i++){
! 2422: if( pTS->aaOutput[i] ){
! 2423: if( !aOut ){
! 2424: aOut = pTS->aaOutput[i];
! 2425: nOut = pTS->anOutput[i];
! 2426: pTS->aaOutput[i] = 0;
! 2427: }else{
! 2428: int nNew;
! 2429: char *aNew;
! 2430:
! 2431: int rc = fts3DoclistOrMerge(p->bDescIdx,
! 2432: pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
! 2433: );
! 2434: if( rc!=SQLITE_OK ){
! 2435: sqlite3_free(aOut);
! 2436: return rc;
! 2437: }
! 2438:
! 2439: sqlite3_free(pTS->aaOutput[i]);
! 2440: sqlite3_free(aOut);
! 2441: pTS->aaOutput[i] = 0;
! 2442: aOut = aNew;
! 2443: nOut = nNew;
! 2444: }
! 2445: }
! 2446: }
! 2447:
! 2448: pTS->aaOutput[0] = aOut;
! 2449: pTS->anOutput[0] = nOut;
! 2450: return SQLITE_OK;
! 2451: }
! 2452:
! 2453: /*
! 2454: ** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
! 2455: ** as the first argument. The merge is an "OR" merge (see function
! 2456: ** fts3DoclistOrMerge() for details).
! 2457: **
! 2458: ** This function is called with the doclist for each term that matches
! 2459: ** a queried prefix. It merges all these doclists into one, the doclist
! 2460: ** for the specified prefix. Since there can be a very large number of
! 2461: ** doclists to merge, the merging is done pair-wise using the TermSelect
! 2462: ** object.
! 2463: **
! 2464: ** This function returns SQLITE_OK if the merge is successful, or an
! 2465: ** SQLite error code (SQLITE_NOMEM) if an error occurs.
! 2466: */
! 2467: static int fts3TermSelectMerge(
! 2468: Fts3Table *p, /* FTS table handle */
! 2469: TermSelect *pTS, /* TermSelect object to merge into */
! 2470: char *aDoclist, /* Pointer to doclist */
! 2471: int nDoclist /* Size of aDoclist in bytes */
! 2472: ){
! 2473: if( pTS->aaOutput[0]==0 ){
! 2474: /* If this is the first term selected, copy the doclist to the output
! 2475: ** buffer using memcpy(). */
! 2476: pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
! 2477: pTS->anOutput[0] = nDoclist;
! 2478: if( pTS->aaOutput[0] ){
! 2479: memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
! 2480: }else{
! 2481: return SQLITE_NOMEM;
! 2482: }
! 2483: }else{
! 2484: char *aMerge = aDoclist;
! 2485: int nMerge = nDoclist;
! 2486: int iOut;
! 2487:
! 2488: for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
! 2489: if( pTS->aaOutput[iOut]==0 ){
! 2490: assert( iOut>0 );
! 2491: pTS->aaOutput[iOut] = aMerge;
! 2492: pTS->anOutput[iOut] = nMerge;
! 2493: break;
! 2494: }else{
! 2495: char *aNew;
! 2496: int nNew;
! 2497:
! 2498: int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
! 2499: pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
! 2500: );
! 2501: if( rc!=SQLITE_OK ){
! 2502: if( aMerge!=aDoclist ) sqlite3_free(aMerge);
! 2503: return rc;
! 2504: }
! 2505:
! 2506: if( aMerge!=aDoclist ) sqlite3_free(aMerge);
! 2507: sqlite3_free(pTS->aaOutput[iOut]);
! 2508: pTS->aaOutput[iOut] = 0;
! 2509:
! 2510: aMerge = aNew;
! 2511: nMerge = nNew;
! 2512: if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
! 2513: pTS->aaOutput[iOut] = aMerge;
! 2514: pTS->anOutput[iOut] = nMerge;
! 2515: }
! 2516: }
! 2517: }
! 2518: }
! 2519: return SQLITE_OK;
! 2520: }
! 2521:
! 2522: /*
! 2523: ** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
! 2524: */
! 2525: static int fts3SegReaderCursorAppend(
! 2526: Fts3MultiSegReader *pCsr,
! 2527: Fts3SegReader *pNew
! 2528: ){
! 2529: if( (pCsr->nSegment%16)==0 ){
! 2530: Fts3SegReader **apNew;
! 2531: int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
! 2532: apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
! 2533: if( !apNew ){
! 2534: sqlite3Fts3SegReaderFree(pNew);
! 2535: return SQLITE_NOMEM;
! 2536: }
! 2537: pCsr->apSegment = apNew;
! 2538: }
! 2539: pCsr->apSegment[pCsr->nSegment++] = pNew;
! 2540: return SQLITE_OK;
! 2541: }
! 2542:
! 2543: /*
! 2544: ** Add seg-reader objects to the Fts3MultiSegReader object passed as the
! 2545: ** 8th argument.
! 2546: **
! 2547: ** This function returns SQLITE_OK if successful, or an SQLite error code
! 2548: ** otherwise.
! 2549: */
! 2550: static int fts3SegReaderCursor(
! 2551: Fts3Table *p, /* FTS3 table handle */
! 2552: int iIndex, /* Index to search (from 0 to p->nIndex-1) */
! 2553: int iLevel, /* Level of segments to scan */
! 2554: const char *zTerm, /* Term to query for */
! 2555: int nTerm, /* Size of zTerm in bytes */
! 2556: int isPrefix, /* True for a prefix search */
! 2557: int isScan, /* True to scan from zTerm to EOF */
! 2558: Fts3MultiSegReader *pCsr /* Cursor object to populate */
! 2559: ){
! 2560: int rc = SQLITE_OK; /* Error code */
! 2561: sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
! 2562: int rc2; /* Result of sqlite3_reset() */
! 2563:
! 2564: /* If iLevel is less than 0 and this is not a scan, include a seg-reader
! 2565: ** for the pending-terms. If this is a scan, then this call must be being
! 2566: ** made by an fts4aux module, not an FTS table. In this case calling
! 2567: ** Fts3SegReaderPending might segfault, as the data structures used by
! 2568: ** fts4aux are not completely populated. So it's easiest to filter these
! 2569: ** calls out here. */
! 2570: if( iLevel<0 && p->aIndex ){
! 2571: Fts3SegReader *pSeg = 0;
! 2572: rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg);
! 2573: if( rc==SQLITE_OK && pSeg ){
! 2574: rc = fts3SegReaderCursorAppend(pCsr, pSeg);
! 2575: }
! 2576: }
! 2577:
! 2578: if( iLevel!=FTS3_SEGCURSOR_PENDING ){
! 2579: if( rc==SQLITE_OK ){
! 2580: rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
! 2581: }
! 2582:
! 2583: while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
! 2584: Fts3SegReader *pSeg = 0;
! 2585:
! 2586: /* Read the values returned by the SELECT into local variables. */
! 2587: sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
! 2588: sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
! 2589: sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
! 2590: int nRoot = sqlite3_column_bytes(pStmt, 4);
! 2591: char const *zRoot = sqlite3_column_blob(pStmt, 4);
! 2592:
! 2593: /* If zTerm is not NULL, and this segment is not stored entirely on its
! 2594: ** root node, the range of leaves scanned can be reduced. Do this. */
! 2595: if( iStartBlock && zTerm ){
! 2596: sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
! 2597: rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
! 2598: if( rc!=SQLITE_OK ) goto finished;
! 2599: if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
! 2600: }
! 2601:
! 2602: rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
! 2603: iStartBlock, iLeavesEndBlock, iEndBlock, zRoot, nRoot, &pSeg
! 2604: );
! 2605: if( rc!=SQLITE_OK ) goto finished;
! 2606: rc = fts3SegReaderCursorAppend(pCsr, pSeg);
! 2607: }
! 2608: }
! 2609:
! 2610: finished:
! 2611: rc2 = sqlite3_reset(pStmt);
! 2612: if( rc==SQLITE_DONE ) rc = rc2;
! 2613:
! 2614: return rc;
! 2615: }
! 2616:
! 2617: /*
! 2618: ** Set up a cursor object for iterating through a full-text index or a
! 2619: ** single level therein.
! 2620: */
! 2621: int sqlite3Fts3SegReaderCursor(
! 2622: Fts3Table *p, /* FTS3 table handle */
! 2623: int iIndex, /* Index to search (from 0 to p->nIndex-1) */
! 2624: int iLevel, /* Level of segments to scan */
! 2625: const char *zTerm, /* Term to query for */
! 2626: int nTerm, /* Size of zTerm in bytes */
! 2627: int isPrefix, /* True for a prefix search */
! 2628: int isScan, /* True to scan from zTerm to EOF */
! 2629: Fts3MultiSegReader *pCsr /* Cursor object to populate */
! 2630: ){
! 2631: assert( iIndex>=0 && iIndex<p->nIndex );
! 2632: assert( iLevel==FTS3_SEGCURSOR_ALL
! 2633: || iLevel==FTS3_SEGCURSOR_PENDING
! 2634: || iLevel>=0
! 2635: );
! 2636: assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
! 2637: assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
! 2638: assert( isPrefix==0 || isScan==0 );
! 2639:
! 2640: /* "isScan" is only set to true by the ft4aux module, an ordinary
! 2641: ** full-text tables. */
! 2642: assert( isScan==0 || p->aIndex==0 );
! 2643:
! 2644: memset(pCsr, 0, sizeof(Fts3MultiSegReader));
! 2645:
! 2646: return fts3SegReaderCursor(
! 2647: p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
! 2648: );
! 2649: }
! 2650:
! 2651: /*
! 2652: ** In addition to its current configuration, have the Fts3MultiSegReader
! 2653: ** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
! 2654: **
! 2655: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
! 2656: */
! 2657: static int fts3SegReaderCursorAddZero(
! 2658: Fts3Table *p, /* FTS virtual table handle */
! 2659: const char *zTerm, /* Term to scan doclist of */
! 2660: int nTerm, /* Number of bytes in zTerm */
! 2661: Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
! 2662: ){
! 2663: return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
! 2664: }
! 2665:
! 2666: /*
! 2667: ** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
! 2668: ** if isPrefix is true, to scan the doclist for all terms for which
! 2669: ** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
! 2670: ** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
! 2671: ** an SQLite error code.
! 2672: **
! 2673: ** It is the responsibility of the caller to free this object by eventually
! 2674: ** passing it to fts3SegReaderCursorFree()
! 2675: **
! 2676: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
! 2677: ** Output parameter *ppSegcsr is set to 0 if an error occurs.
! 2678: */
! 2679: static int fts3TermSegReaderCursor(
! 2680: Fts3Cursor *pCsr, /* Virtual table cursor handle */
! 2681: const char *zTerm, /* Term to query for */
! 2682: int nTerm, /* Size of zTerm in bytes */
! 2683: int isPrefix, /* True for a prefix search */
! 2684: Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
! 2685: ){
! 2686: Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
! 2687: int rc = SQLITE_NOMEM; /* Return code */
! 2688:
! 2689: pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
! 2690: if( pSegcsr ){
! 2691: int i;
! 2692: int bFound = 0; /* True once an index has been found */
! 2693: Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
! 2694:
! 2695: if( isPrefix ){
! 2696: for(i=1; bFound==0 && i<p->nIndex; i++){
! 2697: if( p->aIndex[i].nPrefix==nTerm ){
! 2698: bFound = 1;
! 2699: rc = sqlite3Fts3SegReaderCursor(
! 2700: p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
! 2701: pSegcsr->bLookup = 1;
! 2702: }
! 2703: }
! 2704:
! 2705: for(i=1; bFound==0 && i<p->nIndex; i++){
! 2706: if( p->aIndex[i].nPrefix==nTerm+1 ){
! 2707: bFound = 1;
! 2708: rc = sqlite3Fts3SegReaderCursor(
! 2709: p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
! 2710: );
! 2711: if( rc==SQLITE_OK ){
! 2712: rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
! 2713: }
! 2714: }
! 2715: }
! 2716: }
! 2717:
! 2718: if( bFound==0 ){
! 2719: rc = sqlite3Fts3SegReaderCursor(
! 2720: p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
! 2721: );
! 2722: pSegcsr->bLookup = !isPrefix;
! 2723: }
! 2724: }
! 2725:
! 2726: *ppSegcsr = pSegcsr;
! 2727: return rc;
! 2728: }
! 2729:
! 2730: /*
! 2731: ** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
! 2732: */
! 2733: static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
! 2734: sqlite3Fts3SegReaderFinish(pSegcsr);
! 2735: sqlite3_free(pSegcsr);
! 2736: }
! 2737:
! 2738: /*
! 2739: ** This function retreives the doclist for the specified term (or term
! 2740: ** prefix) from the database.
! 2741: */
! 2742: static int fts3TermSelect(
! 2743: Fts3Table *p, /* Virtual table handle */
! 2744: Fts3PhraseToken *pTok, /* Token to query for */
! 2745: int iColumn, /* Column to query (or -ve for all columns) */
! 2746: int *pnOut, /* OUT: Size of buffer at *ppOut */
! 2747: char **ppOut /* OUT: Malloced result buffer */
! 2748: ){
! 2749: int rc; /* Return code */
! 2750: Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
! 2751: TermSelect tsc; /* Object for pair-wise doclist merging */
! 2752: Fts3SegFilter filter; /* Segment term filter configuration */
! 2753:
! 2754: pSegcsr = pTok->pSegcsr;
! 2755: memset(&tsc, 0, sizeof(TermSelect));
! 2756:
! 2757: filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
! 2758: | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
! 2759: | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
! 2760: | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
! 2761: filter.iCol = iColumn;
! 2762: filter.zTerm = pTok->z;
! 2763: filter.nTerm = pTok->n;
! 2764:
! 2765: rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
! 2766: while( SQLITE_OK==rc
! 2767: && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
! 2768: ){
! 2769: rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
! 2770: }
! 2771:
! 2772: if( rc==SQLITE_OK ){
! 2773: rc = fts3TermSelectFinishMerge(p, &tsc);
! 2774: }
! 2775: if( rc==SQLITE_OK ){
! 2776: *ppOut = tsc.aaOutput[0];
! 2777: *pnOut = tsc.anOutput[0];
! 2778: }else{
! 2779: int i;
! 2780: for(i=0; i<SizeofArray(tsc.aaOutput); i++){
! 2781: sqlite3_free(tsc.aaOutput[i]);
! 2782: }
! 2783: }
! 2784:
! 2785: fts3SegReaderCursorFree(pSegcsr);
! 2786: pTok->pSegcsr = 0;
! 2787: return rc;
! 2788: }
! 2789:
! 2790: /*
! 2791: ** This function counts the total number of docids in the doclist stored
! 2792: ** in buffer aList[], size nList bytes.
! 2793: **
! 2794: ** If the isPoslist argument is true, then it is assumed that the doclist
! 2795: ** contains a position-list following each docid. Otherwise, it is assumed
! 2796: ** that the doclist is simply a list of docids stored as delta encoded
! 2797: ** varints.
! 2798: */
! 2799: static int fts3DoclistCountDocids(char *aList, int nList){
! 2800: int nDoc = 0; /* Return value */
! 2801: if( aList ){
! 2802: char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */
! 2803: char *p = aList; /* Cursor */
! 2804: while( p<aEnd ){
! 2805: nDoc++;
! 2806: while( (*p++)&0x80 ); /* Skip docid varint */
! 2807: fts3PoslistCopy(0, &p); /* Skip over position list */
! 2808: }
! 2809: }
! 2810:
! 2811: return nDoc;
! 2812: }
! 2813:
! 2814: /*
! 2815: ** Advance the cursor to the next row in the %_content table that
! 2816: ** matches the search criteria. For a MATCH search, this will be
! 2817: ** the next row that matches. For a full-table scan, this will be
! 2818: ** simply the next row in the %_content table. For a docid lookup,
! 2819: ** this routine simply sets the EOF flag.
! 2820: **
! 2821: ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
! 2822: ** even if we reach end-of-file. The fts3EofMethod() will be called
! 2823: ** subsequently to determine whether or not an EOF was hit.
! 2824: */
! 2825: static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
! 2826: int rc;
! 2827: Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
! 2828: if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
! 2829: if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
! 2830: pCsr->isEof = 1;
! 2831: rc = sqlite3_reset(pCsr->pStmt);
! 2832: }else{
! 2833: pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
! 2834: rc = SQLITE_OK;
! 2835: }
! 2836: }else{
! 2837: rc = fts3EvalNext((Fts3Cursor *)pCursor);
! 2838: }
! 2839: assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
! 2840: return rc;
! 2841: }
! 2842:
! 2843: /*
! 2844: ** This is the xFilter interface for the virtual table. See
! 2845: ** the virtual table xFilter method documentation for additional
! 2846: ** information.
! 2847: **
! 2848: ** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
! 2849: ** the %_content table.
! 2850: **
! 2851: ** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
! 2852: ** in the %_content table.
! 2853: **
! 2854: ** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The
! 2855: ** column on the left-hand side of the MATCH operator is column
! 2856: ** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand
! 2857: ** side of the MATCH operator.
! 2858: */
! 2859: static int fts3FilterMethod(
! 2860: sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
! 2861: int idxNum, /* Strategy index */
! 2862: const char *idxStr, /* Unused */
! 2863: int nVal, /* Number of elements in apVal */
! 2864: sqlite3_value **apVal /* Arguments for the indexing scheme */
! 2865: ){
! 2866: int rc;
! 2867: char *zSql; /* SQL statement used to access %_content */
! 2868: Fts3Table *p = (Fts3Table *)pCursor->pVtab;
! 2869: Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
! 2870:
! 2871: UNUSED_PARAMETER(idxStr);
! 2872: UNUSED_PARAMETER(nVal);
! 2873:
! 2874: assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
! 2875: assert( nVal==0 || nVal==1 );
! 2876: assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
! 2877: assert( p->pSegments==0 );
! 2878:
! 2879: /* In case the cursor has been used before, clear it now. */
! 2880: sqlite3_finalize(pCsr->pStmt);
! 2881: sqlite3_free(pCsr->aDoclist);
! 2882: sqlite3Fts3ExprFree(pCsr->pExpr);
! 2883: memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
! 2884:
! 2885: if( idxStr ){
! 2886: pCsr->bDesc = (idxStr[0]=='D');
! 2887: }else{
! 2888: pCsr->bDesc = p->bDescIdx;
! 2889: }
! 2890: pCsr->eSearch = (i16)idxNum;
! 2891:
! 2892: if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
! 2893: int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
! 2894: const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
! 2895:
! 2896: if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
! 2897: return SQLITE_NOMEM;
! 2898: }
! 2899:
! 2900: rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat,
! 2901: p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
! 2902: );
! 2903: if( rc!=SQLITE_OK ){
! 2904: if( rc==SQLITE_ERROR ){
! 2905: static const char *zErr = "malformed MATCH expression: [%s]";
! 2906: p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
! 2907: }
! 2908: return rc;
! 2909: }
! 2910:
! 2911: rc = sqlite3Fts3ReadLock(p);
! 2912: if( rc!=SQLITE_OK ) return rc;
! 2913:
! 2914: rc = fts3EvalStart(pCsr);
! 2915:
! 2916: sqlite3Fts3SegmentsClose(p);
! 2917: if( rc!=SQLITE_OK ) return rc;
! 2918: pCsr->pNextId = pCsr->aDoclist;
! 2919: pCsr->iPrevId = 0;
! 2920: }
! 2921:
! 2922: /* Compile a SELECT statement for this cursor. For a full-table-scan, the
! 2923: ** statement loops through all rows of the %_content table. For a
! 2924: ** full-text query or docid lookup, the statement retrieves a single
! 2925: ** row by docid.
! 2926: */
! 2927: if( idxNum==FTS3_FULLSCAN_SEARCH ){
! 2928: zSql = sqlite3_mprintf(
! 2929: "SELECT %s ORDER BY rowid %s",
! 2930: p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
! 2931: );
! 2932: if( zSql ){
! 2933: rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
! 2934: sqlite3_free(zSql);
! 2935: }else{
! 2936: rc = SQLITE_NOMEM;
! 2937: }
! 2938: }else if( idxNum==FTS3_DOCID_SEARCH ){
! 2939: rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
! 2940: if( rc==SQLITE_OK ){
! 2941: rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
! 2942: }
! 2943: }
! 2944: if( rc!=SQLITE_OK ) return rc;
! 2945:
! 2946: return fts3NextMethod(pCursor);
! 2947: }
! 2948:
! 2949: /*
! 2950: ** This is the xEof method of the virtual table. SQLite calls this
! 2951: ** routine to find out if it has reached the end of a result set.
! 2952: */
! 2953: static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
! 2954: return ((Fts3Cursor *)pCursor)->isEof;
! 2955: }
! 2956:
! 2957: /*
! 2958: ** This is the xRowid method. The SQLite core calls this routine to
! 2959: ** retrieve the rowid for the current row of the result set. fts3
! 2960: ** exposes %_content.docid as the rowid for the virtual table. The
! 2961: ** rowid should be written to *pRowid.
! 2962: */
! 2963: static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
! 2964: Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
! 2965: *pRowid = pCsr->iPrevId;
! 2966: return SQLITE_OK;
! 2967: }
! 2968:
! 2969: /*
! 2970: ** This is the xColumn method, called by SQLite to request a value from
! 2971: ** the row that the supplied cursor currently points to.
! 2972: */
! 2973: static int fts3ColumnMethod(
! 2974: sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
! 2975: sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
! 2976: int iCol /* Index of column to read value from */
! 2977: ){
! 2978: int rc = SQLITE_OK; /* Return Code */
! 2979: Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
! 2980: Fts3Table *p = (Fts3Table *)pCursor->pVtab;
! 2981:
! 2982: /* The column value supplied by SQLite must be in range. */
! 2983: assert( iCol>=0 && iCol<=p->nColumn+1 );
! 2984:
! 2985: if( iCol==p->nColumn+1 ){
! 2986: /* This call is a request for the "docid" column. Since "docid" is an
! 2987: ** alias for "rowid", use the xRowid() method to obtain the value.
! 2988: */
! 2989: sqlite3_result_int64(pContext, pCsr->iPrevId);
! 2990: }else if( iCol==p->nColumn ){
! 2991: /* The extra column whose name is the same as the table.
! 2992: ** Return a blob which is a pointer to the cursor.
! 2993: */
! 2994: sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
! 2995: }else{
! 2996: rc = fts3CursorSeek(0, pCsr);
! 2997: if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
! 2998: sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
! 2999: }
! 3000: }
! 3001:
! 3002: assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
! 3003: return rc;
! 3004: }
! 3005:
! 3006: /*
! 3007: ** This function is the implementation of the xUpdate callback used by
! 3008: ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
! 3009: ** inserted, updated or deleted.
! 3010: */
! 3011: static int fts3UpdateMethod(
! 3012: sqlite3_vtab *pVtab, /* Virtual table handle */
! 3013: int nArg, /* Size of argument array */
! 3014: sqlite3_value **apVal, /* Array of arguments */
! 3015: sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
! 3016: ){
! 3017: return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
! 3018: }
! 3019:
! 3020: /*
! 3021: ** Implementation of xSync() method. Flush the contents of the pending-terms
! 3022: ** hash-table to the database.
! 3023: */
! 3024: static int fts3SyncMethod(sqlite3_vtab *pVtab){
! 3025: int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
! 3026: sqlite3Fts3SegmentsClose((Fts3Table *)pVtab);
! 3027: return rc;
! 3028: }
! 3029:
! 3030: /*
! 3031: ** Implementation of xBegin() method. This is a no-op.
! 3032: */
! 3033: static int fts3BeginMethod(sqlite3_vtab *pVtab){
! 3034: TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
! 3035: UNUSED_PARAMETER(pVtab);
! 3036: assert( p->pSegments==0 );
! 3037: assert( p->nPendingData==0 );
! 3038: assert( p->inTransaction!=1 );
! 3039: TESTONLY( p->inTransaction = 1 );
! 3040: TESTONLY( p->mxSavepoint = -1; );
! 3041: return SQLITE_OK;
! 3042: }
! 3043:
! 3044: /*
! 3045: ** Implementation of xCommit() method. This is a no-op. The contents of
! 3046: ** the pending-terms hash-table have already been flushed into the database
! 3047: ** by fts3SyncMethod().
! 3048: */
! 3049: static int fts3CommitMethod(sqlite3_vtab *pVtab){
! 3050: TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
! 3051: UNUSED_PARAMETER(pVtab);
! 3052: assert( p->nPendingData==0 );
! 3053: assert( p->inTransaction!=0 );
! 3054: assert( p->pSegments==0 );
! 3055: TESTONLY( p->inTransaction = 0 );
! 3056: TESTONLY( p->mxSavepoint = -1; );
! 3057: return SQLITE_OK;
! 3058: }
! 3059:
! 3060: /*
! 3061: ** Implementation of xRollback(). Discard the contents of the pending-terms
! 3062: ** hash-table. Any changes made to the database are reverted by SQLite.
! 3063: */
! 3064: static int fts3RollbackMethod(sqlite3_vtab *pVtab){
! 3065: Fts3Table *p = (Fts3Table*)pVtab;
! 3066: sqlite3Fts3PendingTermsClear(p);
! 3067: assert( p->inTransaction!=0 );
! 3068: TESTONLY( p->inTransaction = 0 );
! 3069: TESTONLY( p->mxSavepoint = -1; );
! 3070: return SQLITE_OK;
! 3071: }
! 3072:
! 3073: /*
! 3074: ** When called, *ppPoslist must point to the byte immediately following the
! 3075: ** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
! 3076: ** moves *ppPoslist so that it instead points to the first byte of the
! 3077: ** same position list.
! 3078: */
! 3079: static void fts3ReversePoslist(char *pStart, char **ppPoslist){
! 3080: char *p = &(*ppPoslist)[-2];
! 3081: char c = 0;
! 3082:
! 3083: while( p>pStart && (c=*p--)==0 );
! 3084: while( p>pStart && (*p & 0x80) | c ){
! 3085: c = *p--;
! 3086: }
! 3087: if( p>pStart ){ p = &p[2]; }
! 3088: while( *p++&0x80 );
! 3089: *ppPoslist = p;
! 3090: }
! 3091:
! 3092: /*
! 3093: ** Helper function used by the implementation of the overloaded snippet(),
! 3094: ** offsets() and optimize() SQL functions.
! 3095: **
! 3096: ** If the value passed as the third argument is a blob of size
! 3097: ** sizeof(Fts3Cursor*), then the blob contents are copied to the
! 3098: ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
! 3099: ** message is written to context pContext and SQLITE_ERROR returned. The
! 3100: ** string passed via zFunc is used as part of the error message.
! 3101: */
! 3102: static int fts3FunctionArg(
! 3103: sqlite3_context *pContext, /* SQL function call context */
! 3104: const char *zFunc, /* Function name */
! 3105: sqlite3_value *pVal, /* argv[0] passed to function */
! 3106: Fts3Cursor **ppCsr /* OUT: Store cursor handle here */
! 3107: ){
! 3108: Fts3Cursor *pRet;
! 3109: if( sqlite3_value_type(pVal)!=SQLITE_BLOB
! 3110: || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
! 3111: ){
! 3112: char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
! 3113: sqlite3_result_error(pContext, zErr, -1);
! 3114: sqlite3_free(zErr);
! 3115: return SQLITE_ERROR;
! 3116: }
! 3117: memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
! 3118: *ppCsr = pRet;
! 3119: return SQLITE_OK;
! 3120: }
! 3121:
! 3122: /*
! 3123: ** Implementation of the snippet() function for FTS3
! 3124: */
! 3125: static void fts3SnippetFunc(
! 3126: sqlite3_context *pContext, /* SQLite function call context */
! 3127: int nVal, /* Size of apVal[] array */
! 3128: sqlite3_value **apVal /* Array of arguments */
! 3129: ){
! 3130: Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
! 3131: const char *zStart = "<b>";
! 3132: const char *zEnd = "</b>";
! 3133: const char *zEllipsis = "<b>...</b>";
! 3134: int iCol = -1;
! 3135: int nToken = 15; /* Default number of tokens in snippet */
! 3136:
! 3137: /* There must be at least one argument passed to this function (otherwise
! 3138: ** the non-overloaded version would have been called instead of this one).
! 3139: */
! 3140: assert( nVal>=1 );
! 3141:
! 3142: if( nVal>6 ){
! 3143: sqlite3_result_error(pContext,
! 3144: "wrong number of arguments to function snippet()", -1);
! 3145: return;
! 3146: }
! 3147: if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
! 3148:
! 3149: switch( nVal ){
! 3150: case 6: nToken = sqlite3_value_int(apVal[5]);
! 3151: case 5: iCol = sqlite3_value_int(apVal[4]);
! 3152: case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
! 3153: case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
! 3154: case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
! 3155: }
! 3156: if( !zEllipsis || !zEnd || !zStart ){
! 3157: sqlite3_result_error_nomem(pContext);
! 3158: }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
! 3159: sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
! 3160: }
! 3161: }
! 3162:
! 3163: /*
! 3164: ** Implementation of the offsets() function for FTS3
! 3165: */
! 3166: static void fts3OffsetsFunc(
! 3167: sqlite3_context *pContext, /* SQLite function call context */
! 3168: int nVal, /* Size of argument array */
! 3169: sqlite3_value **apVal /* Array of arguments */
! 3170: ){
! 3171: Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
! 3172:
! 3173: UNUSED_PARAMETER(nVal);
! 3174:
! 3175: assert( nVal==1 );
! 3176: if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
! 3177: assert( pCsr );
! 3178: if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
! 3179: sqlite3Fts3Offsets(pContext, pCsr);
! 3180: }
! 3181: }
! 3182:
! 3183: /*
! 3184: ** Implementation of the special optimize() function for FTS3. This
! 3185: ** function merges all segments in the database to a single segment.
! 3186: ** Example usage is:
! 3187: **
! 3188: ** SELECT optimize(t) FROM t LIMIT 1;
! 3189: **
! 3190: ** where 't' is the name of an FTS3 table.
! 3191: */
! 3192: static void fts3OptimizeFunc(
! 3193: sqlite3_context *pContext, /* SQLite function call context */
! 3194: int nVal, /* Size of argument array */
! 3195: sqlite3_value **apVal /* Array of arguments */
! 3196: ){
! 3197: int rc; /* Return code */
! 3198: Fts3Table *p; /* Virtual table handle */
! 3199: Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */
! 3200:
! 3201: UNUSED_PARAMETER(nVal);
! 3202:
! 3203: assert( nVal==1 );
! 3204: if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
! 3205: p = (Fts3Table *)pCursor->base.pVtab;
! 3206: assert( p );
! 3207:
! 3208: rc = sqlite3Fts3Optimize(p);
! 3209:
! 3210: switch( rc ){
! 3211: case SQLITE_OK:
! 3212: sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
! 3213: break;
! 3214: case SQLITE_DONE:
! 3215: sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
! 3216: break;
! 3217: default:
! 3218: sqlite3_result_error_code(pContext, rc);
! 3219: break;
! 3220: }
! 3221: }
! 3222:
! 3223: /*
! 3224: ** Implementation of the matchinfo() function for FTS3
! 3225: */
! 3226: static void fts3MatchinfoFunc(
! 3227: sqlite3_context *pContext, /* SQLite function call context */
! 3228: int nVal, /* Size of argument array */
! 3229: sqlite3_value **apVal /* Array of arguments */
! 3230: ){
! 3231: Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
! 3232: assert( nVal==1 || nVal==2 );
! 3233: if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
! 3234: const char *zArg = 0;
! 3235: if( nVal>1 ){
! 3236: zArg = (const char *)sqlite3_value_text(apVal[1]);
! 3237: }
! 3238: sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
! 3239: }
! 3240: }
! 3241:
! 3242: /*
! 3243: ** This routine implements the xFindFunction method for the FTS3
! 3244: ** virtual table.
! 3245: */
! 3246: static int fts3FindFunctionMethod(
! 3247: sqlite3_vtab *pVtab, /* Virtual table handle */
! 3248: int nArg, /* Number of SQL function arguments */
! 3249: const char *zName, /* Name of SQL function */
! 3250: void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
! 3251: void **ppArg /* Unused */
! 3252: ){
! 3253: struct Overloaded {
! 3254: const char *zName;
! 3255: void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
! 3256: } aOverload[] = {
! 3257: { "snippet", fts3SnippetFunc },
! 3258: { "offsets", fts3OffsetsFunc },
! 3259: { "optimize", fts3OptimizeFunc },
! 3260: { "matchinfo", fts3MatchinfoFunc },
! 3261: };
! 3262: int i; /* Iterator variable */
! 3263:
! 3264: UNUSED_PARAMETER(pVtab);
! 3265: UNUSED_PARAMETER(nArg);
! 3266: UNUSED_PARAMETER(ppArg);
! 3267:
! 3268: for(i=0; i<SizeofArray(aOverload); i++){
! 3269: if( strcmp(zName, aOverload[i].zName)==0 ){
! 3270: *pxFunc = aOverload[i].xFunc;
! 3271: return 1;
! 3272: }
! 3273: }
! 3274:
! 3275: /* No function of the specified name was found. Return 0. */
! 3276: return 0;
! 3277: }
! 3278:
! 3279: /*
! 3280: ** Implementation of FTS3 xRename method. Rename an fts3 table.
! 3281: */
! 3282: static int fts3RenameMethod(
! 3283: sqlite3_vtab *pVtab, /* Virtual table handle */
! 3284: const char *zName /* New name of table */
! 3285: ){
! 3286: Fts3Table *p = (Fts3Table *)pVtab;
! 3287: sqlite3 *db = p->db; /* Database connection */
! 3288: int rc; /* Return Code */
! 3289:
! 3290: /* As it happens, the pending terms table is always empty here. This is
! 3291: ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
! 3292: ** always opens a savepoint transaction. And the xSavepoint() method
! 3293: ** flushes the pending terms table. But leave the (no-op) call to
! 3294: ** PendingTermsFlush() in in case that changes.
! 3295: */
! 3296: assert( p->nPendingData==0 );
! 3297: rc = sqlite3Fts3PendingTermsFlush(p);
! 3298:
! 3299: if( p->zContentTbl==0 ){
! 3300: fts3DbExec(&rc, db,
! 3301: "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
! 3302: p->zDb, p->zName, zName
! 3303: );
! 3304: }
! 3305:
! 3306: if( p->bHasDocsize ){
! 3307: fts3DbExec(&rc, db,
! 3308: "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
! 3309: p->zDb, p->zName, zName
! 3310: );
! 3311: }
! 3312: if( p->bHasStat ){
! 3313: fts3DbExec(&rc, db,
! 3314: "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
! 3315: p->zDb, p->zName, zName
! 3316: );
! 3317: }
! 3318: fts3DbExec(&rc, db,
! 3319: "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
! 3320: p->zDb, p->zName, zName
! 3321: );
! 3322: fts3DbExec(&rc, db,
! 3323: "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';",
! 3324: p->zDb, p->zName, zName
! 3325: );
! 3326: return rc;
! 3327: }
! 3328:
! 3329: /*
! 3330: ** The xSavepoint() method.
! 3331: **
! 3332: ** Flush the contents of the pending-terms table to disk.
! 3333: */
! 3334: static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
! 3335: UNUSED_PARAMETER(iSavepoint);
! 3336: assert( ((Fts3Table *)pVtab)->inTransaction );
! 3337: assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
! 3338: TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
! 3339: return fts3SyncMethod(pVtab);
! 3340: }
! 3341:
! 3342: /*
! 3343: ** The xRelease() method.
! 3344: **
! 3345: ** This is a no-op.
! 3346: */
! 3347: static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
! 3348: TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
! 3349: UNUSED_PARAMETER(iSavepoint);
! 3350: UNUSED_PARAMETER(pVtab);
! 3351: assert( p->inTransaction );
! 3352: assert( p->mxSavepoint >= iSavepoint );
! 3353: TESTONLY( p->mxSavepoint = iSavepoint-1 );
! 3354: return SQLITE_OK;
! 3355: }
! 3356:
! 3357: /*
! 3358: ** The xRollbackTo() method.
! 3359: **
! 3360: ** Discard the contents of the pending terms table.
! 3361: */
! 3362: static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
! 3363: Fts3Table *p = (Fts3Table*)pVtab;
! 3364: UNUSED_PARAMETER(iSavepoint);
! 3365: assert( p->inTransaction );
! 3366: assert( p->mxSavepoint >= iSavepoint );
! 3367: TESTONLY( p->mxSavepoint = iSavepoint );
! 3368: sqlite3Fts3PendingTermsClear(p);
! 3369: return SQLITE_OK;
! 3370: }
! 3371:
! 3372: static const sqlite3_module fts3Module = {
! 3373: /* iVersion */ 2,
! 3374: /* xCreate */ fts3CreateMethod,
! 3375: /* xConnect */ fts3ConnectMethod,
! 3376: /* xBestIndex */ fts3BestIndexMethod,
! 3377: /* xDisconnect */ fts3DisconnectMethod,
! 3378: /* xDestroy */ fts3DestroyMethod,
! 3379: /* xOpen */ fts3OpenMethod,
! 3380: /* xClose */ fts3CloseMethod,
! 3381: /* xFilter */ fts3FilterMethod,
! 3382: /* xNext */ fts3NextMethod,
! 3383: /* xEof */ fts3EofMethod,
! 3384: /* xColumn */ fts3ColumnMethod,
! 3385: /* xRowid */ fts3RowidMethod,
! 3386: /* xUpdate */ fts3UpdateMethod,
! 3387: /* xBegin */ fts3BeginMethod,
! 3388: /* xSync */ fts3SyncMethod,
! 3389: /* xCommit */ fts3CommitMethod,
! 3390: /* xRollback */ fts3RollbackMethod,
! 3391: /* xFindFunction */ fts3FindFunctionMethod,
! 3392: /* xRename */ fts3RenameMethod,
! 3393: /* xSavepoint */ fts3SavepointMethod,
! 3394: /* xRelease */ fts3ReleaseMethod,
! 3395: /* xRollbackTo */ fts3RollbackToMethod,
! 3396: };
! 3397:
! 3398: /*
! 3399: ** This function is registered as the module destructor (called when an
! 3400: ** FTS3 enabled database connection is closed). It frees the memory
! 3401: ** allocated for the tokenizer hash table.
! 3402: */
! 3403: static void hashDestroy(void *p){
! 3404: Fts3Hash *pHash = (Fts3Hash *)p;
! 3405: sqlite3Fts3HashClear(pHash);
! 3406: sqlite3_free(pHash);
! 3407: }
! 3408:
! 3409: /*
! 3410: ** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are
! 3411: ** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
! 3412: ** respectively. The following three forward declarations are for functions
! 3413: ** declared in these files used to retrieve the respective implementations.
! 3414: **
! 3415: ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
! 3416: ** to by the argument to point to the "simple" tokenizer implementation.
! 3417: ** And so on.
! 3418: */
! 3419: void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
! 3420: void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
! 3421: #ifdef SQLITE_ENABLE_ICU
! 3422: void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
! 3423: #endif
! 3424:
! 3425: /*
! 3426: ** Initialise the fts3 extension. If this extension is built as part
! 3427: ** of the sqlite library, then this function is called directly by
! 3428: ** SQLite. If fts3 is built as a dynamically loadable extension, this
! 3429: ** function is called by the sqlite3_extension_init() entry point.
! 3430: */
! 3431: int sqlite3Fts3Init(sqlite3 *db){
! 3432: int rc = SQLITE_OK;
! 3433: Fts3Hash *pHash = 0;
! 3434: const sqlite3_tokenizer_module *pSimple = 0;
! 3435: const sqlite3_tokenizer_module *pPorter = 0;
! 3436:
! 3437: #ifdef SQLITE_ENABLE_ICU
! 3438: const sqlite3_tokenizer_module *pIcu = 0;
! 3439: sqlite3Fts3IcuTokenizerModule(&pIcu);
! 3440: #endif
! 3441:
! 3442: #ifdef SQLITE_TEST
! 3443: rc = sqlite3Fts3InitTerm(db);
! 3444: if( rc!=SQLITE_OK ) return rc;
! 3445: #endif
! 3446:
! 3447: rc = sqlite3Fts3InitAux(db);
! 3448: if( rc!=SQLITE_OK ) return rc;
! 3449:
! 3450: sqlite3Fts3SimpleTokenizerModule(&pSimple);
! 3451: sqlite3Fts3PorterTokenizerModule(&pPorter);
! 3452:
! 3453: /* Allocate and initialise the hash-table used to store tokenizers. */
! 3454: pHash = sqlite3_malloc(sizeof(Fts3Hash));
! 3455: if( !pHash ){
! 3456: rc = SQLITE_NOMEM;
! 3457: }else{
! 3458: sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
! 3459: }
! 3460:
! 3461: /* Load the built-in tokenizers into the hash table */
! 3462: if( rc==SQLITE_OK ){
! 3463: if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
! 3464: || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
! 3465: #ifdef SQLITE_ENABLE_ICU
! 3466: || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
! 3467: #endif
! 3468: ){
! 3469: rc = SQLITE_NOMEM;
! 3470: }
! 3471: }
! 3472:
! 3473: #ifdef SQLITE_TEST
! 3474: if( rc==SQLITE_OK ){
! 3475: rc = sqlite3Fts3ExprInitTestInterface(db);
! 3476: }
! 3477: #endif
! 3478:
! 3479: /* Create the virtual table wrapper around the hash-table and overload
! 3480: ** the two scalar functions. If this is successful, register the
! 3481: ** module with sqlite.
! 3482: */
! 3483: if( SQLITE_OK==rc
! 3484: && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
! 3485: && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
! 3486: && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
! 3487: && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
! 3488: && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
! 3489: && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
! 3490: ){
! 3491: rc = sqlite3_create_module_v2(
! 3492: db, "fts3", &fts3Module, (void *)pHash, hashDestroy
! 3493: );
! 3494: if( rc==SQLITE_OK ){
! 3495: rc = sqlite3_create_module_v2(
! 3496: db, "fts4", &fts3Module, (void *)pHash, 0
! 3497: );
! 3498: }
! 3499: return rc;
! 3500: }
! 3501:
! 3502: /* An error has occurred. Delete the hash table and return the error code. */
! 3503: assert( rc!=SQLITE_OK );
! 3504: if( pHash ){
! 3505: sqlite3Fts3HashClear(pHash);
! 3506: sqlite3_free(pHash);
! 3507: }
! 3508: return rc;
! 3509: }
! 3510:
! 3511: /*
! 3512: ** Allocate an Fts3MultiSegReader for each token in the expression headed
! 3513: ** by pExpr.
! 3514: **
! 3515: ** An Fts3SegReader object is a cursor that can seek or scan a range of
! 3516: ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
! 3517: ** Fts3SegReader objects internally to provide an interface to seek or scan
! 3518: ** within the union of all segments of a b-tree. Hence the name.
! 3519: **
! 3520: ** If the allocated Fts3MultiSegReader just seeks to a single entry in a
! 3521: ** segment b-tree (if the term is not a prefix or it is a prefix for which
! 3522: ** there exists prefix b-tree of the right length) then it may be traversed
! 3523: ** and merged incrementally. Otherwise, it has to be merged into an in-memory
! 3524: ** doclist and then traversed.
! 3525: */
! 3526: static void fts3EvalAllocateReaders(
! 3527: Fts3Cursor *pCsr, /* FTS cursor handle */
! 3528: Fts3Expr *pExpr, /* Allocate readers for this expression */
! 3529: int *pnToken, /* OUT: Total number of tokens in phrase. */
! 3530: int *pnOr, /* OUT: Total number of OR nodes in expr. */
! 3531: int *pRc /* IN/OUT: Error code */
! 3532: ){
! 3533: if( pExpr && SQLITE_OK==*pRc ){
! 3534: if( pExpr->eType==FTSQUERY_PHRASE ){
! 3535: int i;
! 3536: int nToken = pExpr->pPhrase->nToken;
! 3537: *pnToken += nToken;
! 3538: for(i=0; i<nToken; i++){
! 3539: Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
! 3540: int rc = fts3TermSegReaderCursor(pCsr,
! 3541: pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
! 3542: );
! 3543: if( rc!=SQLITE_OK ){
! 3544: *pRc = rc;
! 3545: return;
! 3546: }
! 3547: }
! 3548: assert( pExpr->pPhrase->iDoclistToken==0 );
! 3549: pExpr->pPhrase->iDoclistToken = -1;
! 3550: }else{
! 3551: *pnOr += (pExpr->eType==FTSQUERY_OR);
! 3552: fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
! 3553: fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
! 3554: }
! 3555: }
! 3556: }
! 3557:
! 3558: /*
! 3559: ** Arguments pList/nList contain the doclist for token iToken of phrase p.
! 3560: ** It is merged into the main doclist stored in p->doclist.aAll/nAll.
! 3561: **
! 3562: ** This function assumes that pList points to a buffer allocated using
! 3563: ** sqlite3_malloc(). This function takes responsibility for eventually
! 3564: ** freeing the buffer.
! 3565: */
! 3566: static void fts3EvalPhraseMergeToken(
! 3567: Fts3Table *pTab, /* FTS Table pointer */
! 3568: Fts3Phrase *p, /* Phrase to merge pList/nList into */
! 3569: int iToken, /* Token pList/nList corresponds to */
! 3570: char *pList, /* Pointer to doclist */
! 3571: int nList /* Number of bytes in pList */
! 3572: ){
! 3573: assert( iToken!=p->iDoclistToken );
! 3574:
! 3575: if( pList==0 ){
! 3576: sqlite3_free(p->doclist.aAll);
! 3577: p->doclist.aAll = 0;
! 3578: p->doclist.nAll = 0;
! 3579: }
! 3580:
! 3581: else if( p->iDoclistToken<0 ){
! 3582: p->doclist.aAll = pList;
! 3583: p->doclist.nAll = nList;
! 3584: }
! 3585:
! 3586: else if( p->doclist.aAll==0 ){
! 3587: sqlite3_free(pList);
! 3588: }
! 3589:
! 3590: else {
! 3591: char *pLeft;
! 3592: char *pRight;
! 3593: int nLeft;
! 3594: int nRight;
! 3595: int nDiff;
! 3596:
! 3597: if( p->iDoclistToken<iToken ){
! 3598: pLeft = p->doclist.aAll;
! 3599: nLeft = p->doclist.nAll;
! 3600: pRight = pList;
! 3601: nRight = nList;
! 3602: nDiff = iToken - p->iDoclistToken;
! 3603: }else{
! 3604: pRight = p->doclist.aAll;
! 3605: nRight = p->doclist.nAll;
! 3606: pLeft = pList;
! 3607: nLeft = nList;
! 3608: nDiff = p->iDoclistToken - iToken;
! 3609: }
! 3610:
! 3611: fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
! 3612: sqlite3_free(pLeft);
! 3613: p->doclist.aAll = pRight;
! 3614: p->doclist.nAll = nRight;
! 3615: }
! 3616:
! 3617: if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
! 3618: }
! 3619:
! 3620: /*
! 3621: ** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
! 3622: ** does not take deferred tokens into account.
! 3623: **
! 3624: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
! 3625: */
! 3626: static int fts3EvalPhraseLoad(
! 3627: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 3628: Fts3Phrase *p /* Phrase object */
! 3629: ){
! 3630: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 3631: int iToken;
! 3632: int rc = SQLITE_OK;
! 3633:
! 3634: for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
! 3635: Fts3PhraseToken *pToken = &p->aToken[iToken];
! 3636: assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
! 3637:
! 3638: if( pToken->pSegcsr ){
! 3639: int nThis = 0;
! 3640: char *pThis = 0;
! 3641: rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
! 3642: if( rc==SQLITE_OK ){
! 3643: fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
! 3644: }
! 3645: }
! 3646: assert( pToken->pSegcsr==0 );
! 3647: }
! 3648:
! 3649: return rc;
! 3650: }
! 3651:
! 3652: /*
! 3653: ** This function is called on each phrase after the position lists for
! 3654: ** any deferred tokens have been loaded into memory. It updates the phrases
! 3655: ** current position list to include only those positions that are really
! 3656: ** instances of the phrase (after considering deferred tokens). If this
! 3657: ** means that the phrase does not appear in the current row, doclist.pList
! 3658: ** and doclist.nList are both zeroed.
! 3659: **
! 3660: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
! 3661: */
! 3662: static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
! 3663: int iToken; /* Used to iterate through phrase tokens */
! 3664: char *aPoslist = 0; /* Position list for deferred tokens */
! 3665: int nPoslist = 0; /* Number of bytes in aPoslist */
! 3666: int iPrev = -1; /* Token number of previous deferred token */
! 3667:
! 3668: assert( pPhrase->doclist.bFreeList==0 );
! 3669:
! 3670: for(iToken=0; iToken<pPhrase->nToken; iToken++){
! 3671: Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
! 3672: Fts3DeferredToken *pDeferred = pToken->pDeferred;
! 3673:
! 3674: if( pDeferred ){
! 3675: char *pList;
! 3676: int nList;
! 3677: int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
! 3678: if( rc!=SQLITE_OK ) return rc;
! 3679:
! 3680: if( pList==0 ){
! 3681: sqlite3_free(aPoslist);
! 3682: pPhrase->doclist.pList = 0;
! 3683: pPhrase->doclist.nList = 0;
! 3684: return SQLITE_OK;
! 3685:
! 3686: }else if( aPoslist==0 ){
! 3687: aPoslist = pList;
! 3688: nPoslist = nList;
! 3689:
! 3690: }else{
! 3691: char *aOut = pList;
! 3692: char *p1 = aPoslist;
! 3693: char *p2 = aOut;
! 3694:
! 3695: assert( iPrev>=0 );
! 3696: fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
! 3697: sqlite3_free(aPoslist);
! 3698: aPoslist = pList;
! 3699: nPoslist = aOut - aPoslist;
! 3700: if( nPoslist==0 ){
! 3701: sqlite3_free(aPoslist);
! 3702: pPhrase->doclist.pList = 0;
! 3703: pPhrase->doclist.nList = 0;
! 3704: return SQLITE_OK;
! 3705: }
! 3706: }
! 3707: iPrev = iToken;
! 3708: }
! 3709: }
! 3710:
! 3711: if( iPrev>=0 ){
! 3712: int nMaxUndeferred = pPhrase->iDoclistToken;
! 3713: if( nMaxUndeferred<0 ){
! 3714: pPhrase->doclist.pList = aPoslist;
! 3715: pPhrase->doclist.nList = nPoslist;
! 3716: pPhrase->doclist.iDocid = pCsr->iPrevId;
! 3717: pPhrase->doclist.bFreeList = 1;
! 3718: }else{
! 3719: int nDistance;
! 3720: char *p1;
! 3721: char *p2;
! 3722: char *aOut;
! 3723:
! 3724: if( nMaxUndeferred>iPrev ){
! 3725: p1 = aPoslist;
! 3726: p2 = pPhrase->doclist.pList;
! 3727: nDistance = nMaxUndeferred - iPrev;
! 3728: }else{
! 3729: p1 = pPhrase->doclist.pList;
! 3730: p2 = aPoslist;
! 3731: nDistance = iPrev - nMaxUndeferred;
! 3732: }
! 3733:
! 3734: aOut = (char *)sqlite3_malloc(nPoslist+8);
! 3735: if( !aOut ){
! 3736: sqlite3_free(aPoslist);
! 3737: return SQLITE_NOMEM;
! 3738: }
! 3739:
! 3740: pPhrase->doclist.pList = aOut;
! 3741: if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
! 3742: pPhrase->doclist.bFreeList = 1;
! 3743: pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList);
! 3744: }else{
! 3745: sqlite3_free(aOut);
! 3746: pPhrase->doclist.pList = 0;
! 3747: pPhrase->doclist.nList = 0;
! 3748: }
! 3749: sqlite3_free(aPoslist);
! 3750: }
! 3751: }
! 3752:
! 3753: return SQLITE_OK;
! 3754: }
! 3755:
! 3756: /*
! 3757: ** This function is called for each Fts3Phrase in a full-text query
! 3758: ** expression to initialize the mechanism for returning rows. Once this
! 3759: ** function has been called successfully on an Fts3Phrase, it may be
! 3760: ** used with fts3EvalPhraseNext() to iterate through the matching docids.
! 3761: **
! 3762: ** If parameter bOptOk is true, then the phrase may (or may not) use the
! 3763: ** incremental loading strategy. Otherwise, the entire doclist is loaded into
! 3764: ** memory within this call.
! 3765: **
! 3766: ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
! 3767: */
! 3768: static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
! 3769: int rc; /* Error code */
! 3770: Fts3PhraseToken *pFirst = &p->aToken[0];
! 3771: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 3772:
! 3773: if( pCsr->bDesc==pTab->bDescIdx
! 3774: && bOptOk==1
! 3775: && p->nToken==1
! 3776: && pFirst->pSegcsr
! 3777: && pFirst->pSegcsr->bLookup
! 3778: && pFirst->bFirst==0
! 3779: ){
! 3780: /* Use the incremental approach. */
! 3781: int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
! 3782: rc = sqlite3Fts3MsrIncrStart(
! 3783: pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
! 3784: p->bIncr = 1;
! 3785:
! 3786: }else{
! 3787: /* Load the full doclist for the phrase into memory. */
! 3788: rc = fts3EvalPhraseLoad(pCsr, p);
! 3789: p->bIncr = 0;
! 3790: }
! 3791:
! 3792: assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
! 3793: return rc;
! 3794: }
! 3795:
! 3796: /*
! 3797: ** This function is used to iterate backwards (from the end to start)
! 3798: ** through doclists. It is used by this module to iterate through phrase
! 3799: ** doclists in reverse and by the fts3_write.c module to iterate through
! 3800: ** pending-terms lists when writing to databases with "order=desc".
! 3801: **
! 3802: ** The doclist may be sorted in ascending (parameter bDescIdx==0) or
! 3803: ** descending (parameter bDescIdx==1) order of docid. Regardless, this
! 3804: ** function iterates from the end of the doclist to the beginning.
! 3805: */
! 3806: void sqlite3Fts3DoclistPrev(
! 3807: int bDescIdx, /* True if the doclist is desc */
! 3808: char *aDoclist, /* Pointer to entire doclist */
! 3809: int nDoclist, /* Length of aDoclist in bytes */
! 3810: char **ppIter, /* IN/OUT: Iterator pointer */
! 3811: sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
! 3812: int *pnList, /* IN/OUT: List length pointer */
! 3813: u8 *pbEof /* OUT: End-of-file flag */
! 3814: ){
! 3815: char *p = *ppIter;
! 3816:
! 3817: assert( nDoclist>0 );
! 3818: assert( *pbEof==0 );
! 3819: assert( p || *piDocid==0 );
! 3820: assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
! 3821:
! 3822: if( p==0 ){
! 3823: sqlite3_int64 iDocid = 0;
! 3824: char *pNext = 0;
! 3825: char *pDocid = aDoclist;
! 3826: char *pEnd = &aDoclist[nDoclist];
! 3827: int iMul = 1;
! 3828:
! 3829: while( pDocid<pEnd ){
! 3830: sqlite3_int64 iDelta;
! 3831: pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
! 3832: iDocid += (iMul * iDelta);
! 3833: pNext = pDocid;
! 3834: fts3PoslistCopy(0, &pDocid);
! 3835: while( pDocid<pEnd && *pDocid==0 ) pDocid++;
! 3836: iMul = (bDescIdx ? -1 : 1);
! 3837: }
! 3838:
! 3839: *pnList = pEnd - pNext;
! 3840: *ppIter = pNext;
! 3841: *piDocid = iDocid;
! 3842: }else{
! 3843: int iMul = (bDescIdx ? -1 : 1);
! 3844: sqlite3_int64 iDelta;
! 3845: fts3GetReverseVarint(&p, aDoclist, &iDelta);
! 3846: *piDocid -= (iMul * iDelta);
! 3847:
! 3848: if( p==aDoclist ){
! 3849: *pbEof = 1;
! 3850: }else{
! 3851: char *pSave = p;
! 3852: fts3ReversePoslist(aDoclist, &p);
! 3853: *pnList = (pSave - p);
! 3854: }
! 3855: *ppIter = p;
! 3856: }
! 3857: }
! 3858:
! 3859: /*
! 3860: ** Attempt to move the phrase iterator to point to the next matching docid.
! 3861: ** If an error occurs, return an SQLite error code. Otherwise, return
! 3862: ** SQLITE_OK.
! 3863: **
! 3864: ** If there is no "next" entry and no error occurs, then *pbEof is set to
! 3865: ** 1 before returning. Otherwise, if no error occurs and the iterator is
! 3866: ** successfully advanced, *pbEof is set to 0.
! 3867: */
! 3868: static int fts3EvalPhraseNext(
! 3869: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 3870: Fts3Phrase *p, /* Phrase object to advance to next docid */
! 3871: u8 *pbEof /* OUT: Set to 1 if EOF */
! 3872: ){
! 3873: int rc = SQLITE_OK;
! 3874: Fts3Doclist *pDL = &p->doclist;
! 3875: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 3876:
! 3877: if( p->bIncr ){
! 3878: assert( p->nToken==1 );
! 3879: assert( pDL->pNextDocid==0 );
! 3880: rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
! 3881: &pDL->iDocid, &pDL->pList, &pDL->nList
! 3882: );
! 3883: if( rc==SQLITE_OK && !pDL->pList ){
! 3884: *pbEof = 1;
! 3885: }
! 3886: }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
! 3887: sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
! 3888: &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
! 3889: );
! 3890: pDL->pList = pDL->pNextDocid;
! 3891: }else{
! 3892: char *pIter; /* Used to iterate through aAll */
! 3893: char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
! 3894: if( pDL->pNextDocid ){
! 3895: pIter = pDL->pNextDocid;
! 3896: }else{
! 3897: pIter = pDL->aAll;
! 3898: }
! 3899:
! 3900: if( pIter>=pEnd ){
! 3901: /* We have already reached the end of this doclist. EOF. */
! 3902: *pbEof = 1;
! 3903: }else{
! 3904: sqlite3_int64 iDelta;
! 3905: pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
! 3906: if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
! 3907: pDL->iDocid += iDelta;
! 3908: }else{
! 3909: pDL->iDocid -= iDelta;
! 3910: }
! 3911: pDL->pList = pIter;
! 3912: fts3PoslistCopy(0, &pIter);
! 3913: pDL->nList = (pIter - pDL->pList);
! 3914:
! 3915: /* pIter now points just past the 0x00 that terminates the position-
! 3916: ** list for document pDL->iDocid. However, if this position-list was
! 3917: ** edited in place by fts3EvalNearTrim(), then pIter may not actually
! 3918: ** point to the start of the next docid value. The following line deals
! 3919: ** with this case by advancing pIter past the zero-padding added by
! 3920: ** fts3EvalNearTrim(). */
! 3921: while( pIter<pEnd && *pIter==0 ) pIter++;
! 3922:
! 3923: pDL->pNextDocid = pIter;
! 3924: assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
! 3925: *pbEof = 0;
! 3926: }
! 3927: }
! 3928:
! 3929: return rc;
! 3930: }
! 3931:
! 3932: /*
! 3933: **
! 3934: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
! 3935: ** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
! 3936: ** expression. Also the Fts3Expr.bDeferred variable is set to true for any
! 3937: ** expressions for which all descendent tokens are deferred.
! 3938: **
! 3939: ** If parameter bOptOk is zero, then it is guaranteed that the
! 3940: ** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
! 3941: ** each phrase in the expression (subject to deferred token processing).
! 3942: ** Or, if bOptOk is non-zero, then one or more tokens within the expression
! 3943: ** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
! 3944: **
! 3945: ** If an error occurs within this function, *pRc is set to an SQLite error
! 3946: ** code before returning.
! 3947: */
! 3948: static void fts3EvalStartReaders(
! 3949: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 3950: Fts3Expr *pExpr, /* Expression to initialize phrases in */
! 3951: int bOptOk, /* True to enable incremental loading */
! 3952: int *pRc /* IN/OUT: Error code */
! 3953: ){
! 3954: if( pExpr && SQLITE_OK==*pRc ){
! 3955: if( pExpr->eType==FTSQUERY_PHRASE ){
! 3956: int i;
! 3957: int nToken = pExpr->pPhrase->nToken;
! 3958: for(i=0; i<nToken; i++){
! 3959: if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
! 3960: }
! 3961: pExpr->bDeferred = (i==nToken);
! 3962: *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
! 3963: }else{
! 3964: fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
! 3965: fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
! 3966: pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
! 3967: }
! 3968: }
! 3969: }
! 3970:
! 3971: /*
! 3972: ** An array of the following structures is assembled as part of the process
! 3973: ** of selecting tokens to defer before the query starts executing (as part
! 3974: ** of the xFilter() method). There is one element in the array for each
! 3975: ** token in the FTS expression.
! 3976: **
! 3977: ** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
! 3978: ** to phrases that are connected only by AND and NEAR operators (not OR or
! 3979: ** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
! 3980: ** separately. The root of a tokens AND/NEAR cluster is stored in
! 3981: ** Fts3TokenAndCost.pRoot.
! 3982: */
! 3983: typedef struct Fts3TokenAndCost Fts3TokenAndCost;
! 3984: struct Fts3TokenAndCost {
! 3985: Fts3Phrase *pPhrase; /* The phrase the token belongs to */
! 3986: int iToken; /* Position of token in phrase */
! 3987: Fts3PhraseToken *pToken; /* The token itself */
! 3988: Fts3Expr *pRoot; /* Root of NEAR/AND cluster */
! 3989: int nOvfl; /* Number of overflow pages to load doclist */
! 3990: int iCol; /* The column the token must match */
! 3991: };
! 3992:
! 3993: /*
! 3994: ** This function is used to populate an allocated Fts3TokenAndCost array.
! 3995: **
! 3996: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
! 3997: ** Otherwise, if an error occurs during execution, *pRc is set to an
! 3998: ** SQLite error code.
! 3999: */
! 4000: static void fts3EvalTokenCosts(
! 4001: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 4002: Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */
! 4003: Fts3Expr *pExpr, /* Expression to consider */
! 4004: Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */
! 4005: Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
! 4006: int *pRc /* IN/OUT: Error code */
! 4007: ){
! 4008: if( *pRc==SQLITE_OK ){
! 4009: if( pExpr->eType==FTSQUERY_PHRASE ){
! 4010: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 4011: int i;
! 4012: for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
! 4013: Fts3TokenAndCost *pTC = (*ppTC)++;
! 4014: pTC->pPhrase = pPhrase;
! 4015: pTC->iToken = i;
! 4016: pTC->pRoot = pRoot;
! 4017: pTC->pToken = &pPhrase->aToken[i];
! 4018: pTC->iCol = pPhrase->iColumn;
! 4019: *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
! 4020: }
! 4021: }else if( pExpr->eType!=FTSQUERY_NOT ){
! 4022: assert( pExpr->eType==FTSQUERY_OR
! 4023: || pExpr->eType==FTSQUERY_AND
! 4024: || pExpr->eType==FTSQUERY_NEAR
! 4025: );
! 4026: assert( pExpr->pLeft && pExpr->pRight );
! 4027: if( pExpr->eType==FTSQUERY_OR ){
! 4028: pRoot = pExpr->pLeft;
! 4029: **ppOr = pRoot;
! 4030: (*ppOr)++;
! 4031: }
! 4032: fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
! 4033: if( pExpr->eType==FTSQUERY_OR ){
! 4034: pRoot = pExpr->pRight;
! 4035: **ppOr = pRoot;
! 4036: (*ppOr)++;
! 4037: }
! 4038: fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
! 4039: }
! 4040: }
! 4041: }
! 4042:
! 4043: /*
! 4044: ** Determine the average document (row) size in pages. If successful,
! 4045: ** write this value to *pnPage and return SQLITE_OK. Otherwise, return
! 4046: ** an SQLite error code.
! 4047: **
! 4048: ** The average document size in pages is calculated by first calculating
! 4049: ** determining the average size in bytes, B. If B is less than the amount
! 4050: ** of data that will fit on a single leaf page of an intkey table in
! 4051: ** this database, then the average docsize is 1. Otherwise, it is 1 plus
! 4052: ** the number of overflow pages consumed by a record B bytes in size.
! 4053: */
! 4054: static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
! 4055: if( pCsr->nRowAvg==0 ){
! 4056: /* The average document size, which is required to calculate the cost
! 4057: ** of each doclist, has not yet been determined. Read the required
! 4058: ** data from the %_stat table to calculate it.
! 4059: **
! 4060: ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
! 4061: ** varints, where nCol is the number of columns in the FTS3 table.
! 4062: ** The first varint is the number of documents currently stored in
! 4063: ** the table. The following nCol varints contain the total amount of
! 4064: ** data stored in all rows of each column of the table, from left
! 4065: ** to right.
! 4066: */
! 4067: int rc;
! 4068: Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
! 4069: sqlite3_stmt *pStmt;
! 4070: sqlite3_int64 nDoc = 0;
! 4071: sqlite3_int64 nByte = 0;
! 4072: const char *pEnd;
! 4073: const char *a;
! 4074:
! 4075: rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
! 4076: if( rc!=SQLITE_OK ) return rc;
! 4077: a = sqlite3_column_blob(pStmt, 0);
! 4078: assert( a );
! 4079:
! 4080: pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
! 4081: a += sqlite3Fts3GetVarint(a, &nDoc);
! 4082: while( a<pEnd ){
! 4083: a += sqlite3Fts3GetVarint(a, &nByte);
! 4084: }
! 4085: if( nDoc==0 || nByte==0 ){
! 4086: sqlite3_reset(pStmt);
! 4087: return FTS_CORRUPT_VTAB;
! 4088: }
! 4089:
! 4090: pCsr->nDoc = nDoc;
! 4091: pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
! 4092: assert( pCsr->nRowAvg>0 );
! 4093: rc = sqlite3_reset(pStmt);
! 4094: if( rc!=SQLITE_OK ) return rc;
! 4095: }
! 4096:
! 4097: *pnPage = pCsr->nRowAvg;
! 4098: return SQLITE_OK;
! 4099: }
! 4100:
! 4101: /*
! 4102: ** This function is called to select the tokens (if any) that will be
! 4103: ** deferred. The array aTC[] has already been populated when this is
! 4104: ** called.
! 4105: **
! 4106: ** This function is called once for each AND/NEAR cluster in the
! 4107: ** expression. Each invocation determines which tokens to defer within
! 4108: ** the cluster with root node pRoot. See comments above the definition
! 4109: ** of struct Fts3TokenAndCost for more details.
! 4110: **
! 4111: ** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
! 4112: ** called on each token to defer. Otherwise, an SQLite error code is
! 4113: ** returned.
! 4114: */
! 4115: static int fts3EvalSelectDeferred(
! 4116: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 4117: Fts3Expr *pRoot, /* Consider tokens with this root node */
! 4118: Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */
! 4119: int nTC /* Number of entries in aTC[] */
! 4120: ){
! 4121: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 4122: int nDocSize = 0; /* Number of pages per doc loaded */
! 4123: int rc = SQLITE_OK; /* Return code */
! 4124: int ii; /* Iterator variable for various purposes */
! 4125: int nOvfl = 0; /* Total overflow pages used by doclists */
! 4126: int nToken = 0; /* Total number of tokens in cluster */
! 4127:
! 4128: int nMinEst = 0; /* The minimum count for any phrase so far. */
! 4129: int nLoad4 = 1; /* (Phrases that will be loaded)^4. */
! 4130:
! 4131: /* Tokens are never deferred for FTS tables created using the content=xxx
! 4132: ** option. The reason being that it is not guaranteed that the content
! 4133: ** table actually contains the same data as the index. To prevent this from
! 4134: ** causing any problems, the deferred token optimization is completely
! 4135: ** disabled for content=xxx tables. */
! 4136: if( pTab->zContentTbl ){
! 4137: return SQLITE_OK;
! 4138: }
! 4139:
! 4140: /* Count the tokens in this AND/NEAR cluster. If none of the doclists
! 4141: ** associated with the tokens spill onto overflow pages, or if there is
! 4142: ** only 1 token, exit early. No tokens to defer in this case. */
! 4143: for(ii=0; ii<nTC; ii++){
! 4144: if( aTC[ii].pRoot==pRoot ){
! 4145: nOvfl += aTC[ii].nOvfl;
! 4146: nToken++;
! 4147: }
! 4148: }
! 4149: if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
! 4150:
! 4151: /* Obtain the average docsize (in pages). */
! 4152: rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
! 4153: assert( rc!=SQLITE_OK || nDocSize>0 );
! 4154:
! 4155:
! 4156: /* Iterate through all tokens in this AND/NEAR cluster, in ascending order
! 4157: ** of the number of overflow pages that will be loaded by the pager layer
! 4158: ** to retrieve the entire doclist for the token from the full-text index.
! 4159: ** Load the doclists for tokens that are either:
! 4160: **
! 4161: ** a. The cheapest token in the entire query (i.e. the one visited by the
! 4162: ** first iteration of this loop), or
! 4163: **
! 4164: ** b. Part of a multi-token phrase.
! 4165: **
! 4166: ** After each token doclist is loaded, merge it with the others from the
! 4167: ** same phrase and count the number of documents that the merged doclist
! 4168: ** contains. Set variable "nMinEst" to the smallest number of documents in
! 4169: ** any phrase doclist for which 1 or more token doclists have been loaded.
! 4170: ** Let nOther be the number of other phrases for which it is certain that
! 4171: ** one or more tokens will not be deferred.
! 4172: **
! 4173: ** Then, for each token, defer it if loading the doclist would result in
! 4174: ** loading N or more overflow pages into memory, where N is computed as:
! 4175: **
! 4176: ** (nMinEst + 4^nOther - 1) / (4^nOther)
! 4177: */
! 4178: for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
! 4179: int iTC; /* Used to iterate through aTC[] array. */
! 4180: Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */
! 4181:
! 4182: /* Set pTC to point to the cheapest remaining token. */
! 4183: for(iTC=0; iTC<nTC; iTC++){
! 4184: if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot
! 4185: && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl)
! 4186: ){
! 4187: pTC = &aTC[iTC];
! 4188: }
! 4189: }
! 4190: assert( pTC );
! 4191:
! 4192: if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
! 4193: /* The number of overflow pages to load for this (and therefore all
! 4194: ** subsequent) tokens is greater than the estimated number of pages
! 4195: ** that will be loaded if all subsequent tokens are deferred.
! 4196: */
! 4197: Fts3PhraseToken *pToken = pTC->pToken;
! 4198: rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
! 4199: fts3SegReaderCursorFree(pToken->pSegcsr);
! 4200: pToken->pSegcsr = 0;
! 4201: }else{
! 4202: /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
! 4203: ** for-loop. Except, limit the value to 2^24 to prevent it from
! 4204: ** overflowing the 32-bit integer it is stored in. */
! 4205: if( ii<12 ) nLoad4 = nLoad4*4;
! 4206:
! 4207: if( ii==0 || pTC->pPhrase->nToken>1 ){
! 4208: /* Either this is the cheapest token in the entire query, or it is
! 4209: ** part of a multi-token phrase. Either way, the entire doclist will
! 4210: ** (eventually) be loaded into memory. It may as well be now. */
! 4211: Fts3PhraseToken *pToken = pTC->pToken;
! 4212: int nList = 0;
! 4213: char *pList = 0;
! 4214: rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
! 4215: assert( rc==SQLITE_OK || pList==0 );
! 4216: if( rc==SQLITE_OK ){
! 4217: int nCount;
! 4218: fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
! 4219: nCount = fts3DoclistCountDocids(
! 4220: pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
! 4221: );
! 4222: if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
! 4223: }
! 4224: }
! 4225: }
! 4226: pTC->pToken = 0;
! 4227: }
! 4228:
! 4229: return rc;
! 4230: }
! 4231:
! 4232: /*
! 4233: ** This function is called from within the xFilter method. It initializes
! 4234: ** the full-text query currently stored in pCsr->pExpr. To iterate through
! 4235: ** the results of a query, the caller does:
! 4236: **
! 4237: ** fts3EvalStart(pCsr);
! 4238: ** while( 1 ){
! 4239: ** fts3EvalNext(pCsr);
! 4240: ** if( pCsr->bEof ) break;
! 4241: ** ... return row pCsr->iPrevId to the caller ...
! 4242: ** }
! 4243: */
! 4244: static int fts3EvalStart(Fts3Cursor *pCsr){
! 4245: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 4246: int rc = SQLITE_OK;
! 4247: int nToken = 0;
! 4248: int nOr = 0;
! 4249:
! 4250: /* Allocate a MultiSegReader for each token in the expression. */
! 4251: fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
! 4252:
! 4253: /* Determine which, if any, tokens in the expression should be deferred. */
! 4254: if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
! 4255: Fts3TokenAndCost *aTC;
! 4256: Fts3Expr **apOr;
! 4257: aTC = (Fts3TokenAndCost *)sqlite3_malloc(
! 4258: sizeof(Fts3TokenAndCost) * nToken
! 4259: + sizeof(Fts3Expr *) * nOr * 2
! 4260: );
! 4261: apOr = (Fts3Expr **)&aTC[nToken];
! 4262:
! 4263: if( !aTC ){
! 4264: rc = SQLITE_NOMEM;
! 4265: }else{
! 4266: int ii;
! 4267: Fts3TokenAndCost *pTC = aTC;
! 4268: Fts3Expr **ppOr = apOr;
! 4269:
! 4270: fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
! 4271: nToken = pTC-aTC;
! 4272: nOr = ppOr-apOr;
! 4273:
! 4274: if( rc==SQLITE_OK ){
! 4275: rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
! 4276: for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
! 4277: rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
! 4278: }
! 4279: }
! 4280:
! 4281: sqlite3_free(aTC);
! 4282: }
! 4283: }
! 4284:
! 4285: fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
! 4286: return rc;
! 4287: }
! 4288:
! 4289: /*
! 4290: ** Invalidate the current position list for phrase pPhrase.
! 4291: */
! 4292: static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
! 4293: if( pPhrase->doclist.bFreeList ){
! 4294: sqlite3_free(pPhrase->doclist.pList);
! 4295: }
! 4296: pPhrase->doclist.pList = 0;
! 4297: pPhrase->doclist.nList = 0;
! 4298: pPhrase->doclist.bFreeList = 0;
! 4299: }
! 4300:
! 4301: /*
! 4302: ** This function is called to edit the position list associated with
! 4303: ** the phrase object passed as the fifth argument according to a NEAR
! 4304: ** condition. For example:
! 4305: **
! 4306: ** abc NEAR/5 "def ghi"
! 4307: **
! 4308: ** Parameter nNear is passed the NEAR distance of the expression (5 in
! 4309: ** the example above). When this function is called, *paPoslist points to
! 4310: ** the position list, and *pnToken is the number of phrase tokens in, the
! 4311: ** phrase on the other side of the NEAR operator to pPhrase. For example,
! 4312: ** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
! 4313: ** the position list associated with phrase "abc".
! 4314: **
! 4315: ** All positions in the pPhrase position list that are not sufficiently
! 4316: ** close to a position in the *paPoslist position list are removed. If this
! 4317: ** leaves 0 positions, zero is returned. Otherwise, non-zero.
! 4318: **
! 4319: ** Before returning, *paPoslist is set to point to the position lsit
! 4320: ** associated with pPhrase. And *pnToken is set to the number of tokens in
! 4321: ** pPhrase.
! 4322: */
! 4323: static int fts3EvalNearTrim(
! 4324: int nNear, /* NEAR distance. As in "NEAR/nNear". */
! 4325: char *aTmp, /* Temporary space to use */
! 4326: char **paPoslist, /* IN/OUT: Position list */
! 4327: int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */
! 4328: Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */
! 4329: ){
! 4330: int nParam1 = nNear + pPhrase->nToken;
! 4331: int nParam2 = nNear + *pnToken;
! 4332: int nNew;
! 4333: char *p2;
! 4334: char *pOut;
! 4335: int res;
! 4336:
! 4337: assert( pPhrase->doclist.pList );
! 4338:
! 4339: p2 = pOut = pPhrase->doclist.pList;
! 4340: res = fts3PoslistNearMerge(
! 4341: &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
! 4342: );
! 4343: if( res ){
! 4344: nNew = (pOut - pPhrase->doclist.pList) - 1;
! 4345: assert( pPhrase->doclist.pList[nNew]=='\0' );
! 4346: assert( nNew<=pPhrase->doclist.nList && nNew>0 );
! 4347: memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
! 4348: pPhrase->doclist.nList = nNew;
! 4349: *paPoslist = pPhrase->doclist.pList;
! 4350: *pnToken = pPhrase->nToken;
! 4351: }
! 4352:
! 4353: return res;
! 4354: }
! 4355:
! 4356: /*
! 4357: ** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
! 4358: ** Otherwise, it advances the expression passed as the second argument to
! 4359: ** point to the next matching row in the database. Expressions iterate through
! 4360: ** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
! 4361: ** or descending if it is non-zero.
! 4362: **
! 4363: ** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
! 4364: ** successful, the following variables in pExpr are set:
! 4365: **
! 4366: ** Fts3Expr.bEof (non-zero if EOF - there is no next row)
! 4367: ** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row)
! 4368: **
! 4369: ** If the expression is of type FTSQUERY_PHRASE, and the expression is not
! 4370: ** at EOF, then the following variables are populated with the position list
! 4371: ** for the phrase for the visited row:
! 4372: **
! 4373: ** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes)
! 4374: ** FTs3Expr.pPhrase->doclist.pList (pointer to position list)
! 4375: **
! 4376: ** It says above that this function advances the expression to the next
! 4377: ** matching row. This is usually true, but there are the following exceptions:
! 4378: **
! 4379: ** 1. Deferred tokens are not taken into account. If a phrase consists
! 4380: ** entirely of deferred tokens, it is assumed to match every row in
! 4381: ** the db. In this case the position-list is not populated at all.
! 4382: **
! 4383: ** Or, if a phrase contains one or more deferred tokens and one or
! 4384: ** more non-deferred tokens, then the expression is advanced to the
! 4385: ** next possible match, considering only non-deferred tokens. In other
! 4386: ** words, if the phrase is "A B C", and "B" is deferred, the expression
! 4387: ** is advanced to the next row that contains an instance of "A * C",
! 4388: ** where "*" may match any single token. The position list in this case
! 4389: ** is populated as for "A * C" before returning.
! 4390: **
! 4391: ** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
! 4392: ** advanced to point to the next row that matches "x AND y".
! 4393: **
! 4394: ** See fts3EvalTestDeferredAndNear() for details on testing if a row is
! 4395: ** really a match, taking into account deferred tokens and NEAR operators.
! 4396: */
! 4397: static void fts3EvalNextRow(
! 4398: Fts3Cursor *pCsr, /* FTS Cursor handle */
! 4399: Fts3Expr *pExpr, /* Expr. to advance to next matching row */
! 4400: int *pRc /* IN/OUT: Error code */
! 4401: ){
! 4402: if( *pRc==SQLITE_OK ){
! 4403: int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
! 4404: assert( pExpr->bEof==0 );
! 4405: pExpr->bStart = 1;
! 4406:
! 4407: switch( pExpr->eType ){
! 4408: case FTSQUERY_NEAR:
! 4409: case FTSQUERY_AND: {
! 4410: Fts3Expr *pLeft = pExpr->pLeft;
! 4411: Fts3Expr *pRight = pExpr->pRight;
! 4412: assert( !pLeft->bDeferred || !pRight->bDeferred );
! 4413:
! 4414: if( pLeft->bDeferred ){
! 4415: /* LHS is entirely deferred. So we assume it matches every row.
! 4416: ** Advance the RHS iterator to find the next row visited. */
! 4417: fts3EvalNextRow(pCsr, pRight, pRc);
! 4418: pExpr->iDocid = pRight->iDocid;
! 4419: pExpr->bEof = pRight->bEof;
! 4420: }else if( pRight->bDeferred ){
! 4421: /* RHS is entirely deferred. So we assume it matches every row.
! 4422: ** Advance the LHS iterator to find the next row visited. */
! 4423: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4424: pExpr->iDocid = pLeft->iDocid;
! 4425: pExpr->bEof = pLeft->bEof;
! 4426: }else{
! 4427: /* Neither the RHS or LHS are deferred. */
! 4428: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4429: fts3EvalNextRow(pCsr, pRight, pRc);
! 4430: while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
! 4431: sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
! 4432: if( iDiff==0 ) break;
! 4433: if( iDiff<0 ){
! 4434: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4435: }else{
! 4436: fts3EvalNextRow(pCsr, pRight, pRc);
! 4437: }
! 4438: }
! 4439: pExpr->iDocid = pLeft->iDocid;
! 4440: pExpr->bEof = (pLeft->bEof || pRight->bEof);
! 4441: }
! 4442: break;
! 4443: }
! 4444:
! 4445: case FTSQUERY_OR: {
! 4446: Fts3Expr *pLeft = pExpr->pLeft;
! 4447: Fts3Expr *pRight = pExpr->pRight;
! 4448: sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
! 4449:
! 4450: assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
! 4451: assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
! 4452:
! 4453: if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
! 4454: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4455: }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
! 4456: fts3EvalNextRow(pCsr, pRight, pRc);
! 4457: }else{
! 4458: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4459: fts3EvalNextRow(pCsr, pRight, pRc);
! 4460: }
! 4461:
! 4462: pExpr->bEof = (pLeft->bEof && pRight->bEof);
! 4463: iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
! 4464: if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
! 4465: pExpr->iDocid = pLeft->iDocid;
! 4466: }else{
! 4467: pExpr->iDocid = pRight->iDocid;
! 4468: }
! 4469:
! 4470: break;
! 4471: }
! 4472:
! 4473: case FTSQUERY_NOT: {
! 4474: Fts3Expr *pLeft = pExpr->pLeft;
! 4475: Fts3Expr *pRight = pExpr->pRight;
! 4476:
! 4477: if( pRight->bStart==0 ){
! 4478: fts3EvalNextRow(pCsr, pRight, pRc);
! 4479: assert( *pRc!=SQLITE_OK || pRight->bStart );
! 4480: }
! 4481:
! 4482: fts3EvalNextRow(pCsr, pLeft, pRc);
! 4483: if( pLeft->bEof==0 ){
! 4484: while( !*pRc
! 4485: && !pRight->bEof
! 4486: && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0
! 4487: ){
! 4488: fts3EvalNextRow(pCsr, pRight, pRc);
! 4489: }
! 4490: }
! 4491: pExpr->iDocid = pLeft->iDocid;
! 4492: pExpr->bEof = pLeft->bEof;
! 4493: break;
! 4494: }
! 4495:
! 4496: default: {
! 4497: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 4498: fts3EvalInvalidatePoslist(pPhrase);
! 4499: *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
! 4500: pExpr->iDocid = pPhrase->doclist.iDocid;
! 4501: break;
! 4502: }
! 4503: }
! 4504: }
! 4505: }
! 4506:
! 4507: /*
! 4508: ** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
! 4509: ** cluster, then this function returns 1 immediately.
! 4510: **
! 4511: ** Otherwise, it checks if the current row really does match the NEAR
! 4512: ** expression, using the data currently stored in the position lists
! 4513: ** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression.
! 4514: **
! 4515: ** If the current row is a match, the position list associated with each
! 4516: ** phrase in the NEAR expression is edited in place to contain only those
! 4517: ** phrase instances sufficiently close to their peers to satisfy all NEAR
! 4518: ** constraints. In this case it returns 1. If the NEAR expression does not
! 4519: ** match the current row, 0 is returned. The position lists may or may not
! 4520: ** be edited if 0 is returned.
! 4521: */
! 4522: static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
! 4523: int res = 1;
! 4524:
! 4525: /* The following block runs if pExpr is the root of a NEAR query.
! 4526: ** For example, the query:
! 4527: **
! 4528: ** "w" NEAR "x" NEAR "y" NEAR "z"
! 4529: **
! 4530: ** which is represented in tree form as:
! 4531: **
! 4532: ** |
! 4533: ** +--NEAR--+ <-- root of NEAR query
! 4534: ** | |
! 4535: ** +--NEAR--+ "z"
! 4536: ** | |
! 4537: ** +--NEAR--+ "y"
! 4538: ** | |
! 4539: ** "w" "x"
! 4540: **
! 4541: ** The right-hand child of a NEAR node is always a phrase. The
! 4542: ** left-hand child may be either a phrase or a NEAR node. There are
! 4543: ** no exceptions to this - it's the way the parser in fts3_expr.c works.
! 4544: */
! 4545: if( *pRc==SQLITE_OK
! 4546: && pExpr->eType==FTSQUERY_NEAR
! 4547: && pExpr->bEof==0
! 4548: && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
! 4549: ){
! 4550: Fts3Expr *p;
! 4551: int nTmp = 0; /* Bytes of temp space */
! 4552: char *aTmp; /* Temp space for PoslistNearMerge() */
! 4553:
! 4554: /* Allocate temporary working space. */
! 4555: for(p=pExpr; p->pLeft; p=p->pLeft){
! 4556: nTmp += p->pRight->pPhrase->doclist.nList;
! 4557: }
! 4558: nTmp += p->pPhrase->doclist.nList;
! 4559: aTmp = sqlite3_malloc(nTmp*2);
! 4560: if( !aTmp ){
! 4561: *pRc = SQLITE_NOMEM;
! 4562: res = 0;
! 4563: }else{
! 4564: char *aPoslist = p->pPhrase->doclist.pList;
! 4565: int nToken = p->pPhrase->nToken;
! 4566:
! 4567: for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
! 4568: Fts3Phrase *pPhrase = p->pRight->pPhrase;
! 4569: int nNear = p->nNear;
! 4570: res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
! 4571: }
! 4572:
! 4573: aPoslist = pExpr->pRight->pPhrase->doclist.pList;
! 4574: nToken = pExpr->pRight->pPhrase->nToken;
! 4575: for(p=pExpr->pLeft; p && res; p=p->pLeft){
! 4576: int nNear;
! 4577: Fts3Phrase *pPhrase;
! 4578: assert( p->pParent && p->pParent->pLeft==p );
! 4579: nNear = p->pParent->nNear;
! 4580: pPhrase = (
! 4581: p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
! 4582: );
! 4583: res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
! 4584: }
! 4585: }
! 4586:
! 4587: sqlite3_free(aTmp);
! 4588: }
! 4589:
! 4590: return res;
! 4591: }
! 4592:
! 4593: /*
! 4594: ** This function is a helper function for fts3EvalTestDeferredAndNear().
! 4595: ** Assuming no error occurs or has occurred, It returns non-zero if the
! 4596: ** expression passed as the second argument matches the row that pCsr
! 4597: ** currently points to, or zero if it does not.
! 4598: **
! 4599: ** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
! 4600: ** If an error occurs during execution of this function, *pRc is set to
! 4601: ** the appropriate SQLite error code. In this case the returned value is
! 4602: ** undefined.
! 4603: */
! 4604: static int fts3EvalTestExpr(
! 4605: Fts3Cursor *pCsr, /* FTS cursor handle */
! 4606: Fts3Expr *pExpr, /* Expr to test. May or may not be root. */
! 4607: int *pRc /* IN/OUT: Error code */
! 4608: ){
! 4609: int bHit = 1; /* Return value */
! 4610: if( *pRc==SQLITE_OK ){
! 4611: switch( pExpr->eType ){
! 4612: case FTSQUERY_NEAR:
! 4613: case FTSQUERY_AND:
! 4614: bHit = (
! 4615: fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
! 4616: && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
! 4617: && fts3EvalNearTest(pExpr, pRc)
! 4618: );
! 4619:
! 4620: /* If the NEAR expression does not match any rows, zero the doclist for
! 4621: ** all phrases involved in the NEAR. This is because the snippet(),
! 4622: ** offsets() and matchinfo() functions are not supposed to recognize
! 4623: ** any instances of phrases that are part of unmatched NEAR queries.
! 4624: ** For example if this expression:
! 4625: **
! 4626: ** ... MATCH 'a OR (b NEAR c)'
! 4627: **
! 4628: ** is matched against a row containing:
! 4629: **
! 4630: ** 'a b d e'
! 4631: **
! 4632: ** then any snippet() should ony highlight the "a" term, not the "b"
! 4633: ** (as "b" is part of a non-matching NEAR clause).
! 4634: */
! 4635: if( bHit==0
! 4636: && pExpr->eType==FTSQUERY_NEAR
! 4637: && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
! 4638: ){
! 4639: Fts3Expr *p;
! 4640: for(p=pExpr; p->pPhrase==0; p=p->pLeft){
! 4641: if( p->pRight->iDocid==pCsr->iPrevId ){
! 4642: fts3EvalInvalidatePoslist(p->pRight->pPhrase);
! 4643: }
! 4644: }
! 4645: if( p->iDocid==pCsr->iPrevId ){
! 4646: fts3EvalInvalidatePoslist(p->pPhrase);
! 4647: }
! 4648: }
! 4649:
! 4650: break;
! 4651:
! 4652: case FTSQUERY_OR: {
! 4653: int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
! 4654: int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
! 4655: bHit = bHit1 || bHit2;
! 4656: break;
! 4657: }
! 4658:
! 4659: case FTSQUERY_NOT:
! 4660: bHit = (
! 4661: fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
! 4662: && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
! 4663: );
! 4664: break;
! 4665:
! 4666: default: {
! 4667: if( pCsr->pDeferred
! 4668: && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
! 4669: ){
! 4670: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 4671: assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
! 4672: if( pExpr->bDeferred ){
! 4673: fts3EvalInvalidatePoslist(pPhrase);
! 4674: }
! 4675: *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
! 4676: bHit = (pPhrase->doclist.pList!=0);
! 4677: pExpr->iDocid = pCsr->iPrevId;
! 4678: }else{
! 4679: bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
! 4680: }
! 4681: break;
! 4682: }
! 4683: }
! 4684: }
! 4685: return bHit;
! 4686: }
! 4687:
! 4688: /*
! 4689: ** This function is called as the second part of each xNext operation when
! 4690: ** iterating through the results of a full-text query. At this point the
! 4691: ** cursor points to a row that matches the query expression, with the
! 4692: ** following caveats:
! 4693: **
! 4694: ** * Up until this point, "NEAR" operators in the expression have been
! 4695: ** treated as "AND".
! 4696: **
! 4697: ** * Deferred tokens have not yet been considered.
! 4698: **
! 4699: ** If *pRc is not SQLITE_OK when this function is called, it immediately
! 4700: ** returns 0. Otherwise, it tests whether or not after considering NEAR
! 4701: ** operators and deferred tokens the current row is still a match for the
! 4702: ** expression. It returns 1 if both of the following are true:
! 4703: **
! 4704: ** 1. *pRc is SQLITE_OK when this function returns, and
! 4705: **
! 4706: ** 2. After scanning the current FTS table row for the deferred tokens,
! 4707: ** it is determined that the row does *not* match the query.
! 4708: **
! 4709: ** Or, if no error occurs and it seems the current row does match the FTS
! 4710: ** query, return 0.
! 4711: */
! 4712: static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
! 4713: int rc = *pRc;
! 4714: int bMiss = 0;
! 4715: if( rc==SQLITE_OK ){
! 4716:
! 4717: /* If there are one or more deferred tokens, load the current row into
! 4718: ** memory and scan it to determine the position list for each deferred
! 4719: ** token. Then, see if this row is really a match, considering deferred
! 4720: ** tokens and NEAR operators (neither of which were taken into account
! 4721: ** earlier, by fts3EvalNextRow()).
! 4722: */
! 4723: if( pCsr->pDeferred ){
! 4724: rc = fts3CursorSeek(0, pCsr);
! 4725: if( rc==SQLITE_OK ){
! 4726: rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
! 4727: }
! 4728: }
! 4729: bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
! 4730:
! 4731: /* Free the position-lists accumulated for each deferred token above. */
! 4732: sqlite3Fts3FreeDeferredDoclists(pCsr);
! 4733: *pRc = rc;
! 4734: }
! 4735: return (rc==SQLITE_OK && bMiss);
! 4736: }
! 4737:
! 4738: /*
! 4739: ** Advance to the next document that matches the FTS expression in
! 4740: ** Fts3Cursor.pExpr.
! 4741: */
! 4742: static int fts3EvalNext(Fts3Cursor *pCsr){
! 4743: int rc = SQLITE_OK; /* Return Code */
! 4744: Fts3Expr *pExpr = pCsr->pExpr;
! 4745: assert( pCsr->isEof==0 );
! 4746: if( pExpr==0 ){
! 4747: pCsr->isEof = 1;
! 4748: }else{
! 4749: do {
! 4750: if( pCsr->isRequireSeek==0 ){
! 4751: sqlite3_reset(pCsr->pStmt);
! 4752: }
! 4753: assert( sqlite3_data_count(pCsr->pStmt)==0 );
! 4754: fts3EvalNextRow(pCsr, pExpr, &rc);
! 4755: pCsr->isEof = pExpr->bEof;
! 4756: pCsr->isRequireSeek = 1;
! 4757: pCsr->isMatchinfoNeeded = 1;
! 4758: pCsr->iPrevId = pExpr->iDocid;
! 4759: }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
! 4760: }
! 4761: return rc;
! 4762: }
! 4763:
! 4764: /*
! 4765: ** Restart interation for expression pExpr so that the next call to
! 4766: ** fts3EvalNext() visits the first row. Do not allow incremental
! 4767: ** loading or merging of phrase doclists for this iteration.
! 4768: **
! 4769: ** If *pRc is other than SQLITE_OK when this function is called, it is
! 4770: ** a no-op. If an error occurs within this function, *pRc is set to an
! 4771: ** SQLite error code before returning.
! 4772: */
! 4773: static void fts3EvalRestart(
! 4774: Fts3Cursor *pCsr,
! 4775: Fts3Expr *pExpr,
! 4776: int *pRc
! 4777: ){
! 4778: if( pExpr && *pRc==SQLITE_OK ){
! 4779: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 4780:
! 4781: if( pPhrase ){
! 4782: fts3EvalInvalidatePoslist(pPhrase);
! 4783: if( pPhrase->bIncr ){
! 4784: assert( pPhrase->nToken==1 );
! 4785: assert( pPhrase->aToken[0].pSegcsr );
! 4786: sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
! 4787: *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
! 4788: }
! 4789:
! 4790: pPhrase->doclist.pNextDocid = 0;
! 4791: pPhrase->doclist.iDocid = 0;
! 4792: }
! 4793:
! 4794: pExpr->iDocid = 0;
! 4795: pExpr->bEof = 0;
! 4796: pExpr->bStart = 0;
! 4797:
! 4798: fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
! 4799: fts3EvalRestart(pCsr, pExpr->pRight, pRc);
! 4800: }
! 4801: }
! 4802:
! 4803: /*
! 4804: ** After allocating the Fts3Expr.aMI[] array for each phrase in the
! 4805: ** expression rooted at pExpr, the cursor iterates through all rows matched
! 4806: ** by pExpr, calling this function for each row. This function increments
! 4807: ** the values in Fts3Expr.aMI[] according to the position-list currently
! 4808: ** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase
! 4809: ** expression nodes.
! 4810: */
! 4811: static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
! 4812: if( pExpr ){
! 4813: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 4814: if( pPhrase && pPhrase->doclist.pList ){
! 4815: int iCol = 0;
! 4816: char *p = pPhrase->doclist.pList;
! 4817:
! 4818: assert( *p );
! 4819: while( 1 ){
! 4820: u8 c = 0;
! 4821: int iCnt = 0;
! 4822: while( 0xFE & (*p | c) ){
! 4823: if( (c&0x80)==0 ) iCnt++;
! 4824: c = *p++ & 0x80;
! 4825: }
! 4826:
! 4827: /* aMI[iCol*3 + 1] = Number of occurrences
! 4828: ** aMI[iCol*3 + 2] = Number of rows containing at least one instance
! 4829: */
! 4830: pExpr->aMI[iCol*3 + 1] += iCnt;
! 4831: pExpr->aMI[iCol*3 + 2] += (iCnt>0);
! 4832: if( *p==0x00 ) break;
! 4833: p++;
! 4834: p += sqlite3Fts3GetVarint32(p, &iCol);
! 4835: }
! 4836: }
! 4837:
! 4838: fts3EvalUpdateCounts(pExpr->pLeft);
! 4839: fts3EvalUpdateCounts(pExpr->pRight);
! 4840: }
! 4841: }
! 4842:
! 4843: /*
! 4844: ** Expression pExpr must be of type FTSQUERY_PHRASE.
! 4845: **
! 4846: ** If it is not already allocated and populated, this function allocates and
! 4847: ** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
! 4848: ** of a NEAR expression, then it also allocates and populates the same array
! 4849: ** for all other phrases that are part of the NEAR expression.
! 4850: **
! 4851: ** SQLITE_OK is returned if the aMI[] array is successfully allocated and
! 4852: ** populated. Otherwise, if an error occurs, an SQLite error code is returned.
! 4853: */
! 4854: static int fts3EvalGatherStats(
! 4855: Fts3Cursor *pCsr, /* Cursor object */
! 4856: Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */
! 4857: ){
! 4858: int rc = SQLITE_OK; /* Return code */
! 4859:
! 4860: assert( pExpr->eType==FTSQUERY_PHRASE );
! 4861: if( pExpr->aMI==0 ){
! 4862: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 4863: Fts3Expr *pRoot; /* Root of NEAR expression */
! 4864: Fts3Expr *p; /* Iterator used for several purposes */
! 4865:
! 4866: sqlite3_int64 iPrevId = pCsr->iPrevId;
! 4867: sqlite3_int64 iDocid;
! 4868: u8 bEof;
! 4869:
! 4870: /* Find the root of the NEAR expression */
! 4871: pRoot = pExpr;
! 4872: while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
! 4873: pRoot = pRoot->pParent;
! 4874: }
! 4875: iDocid = pRoot->iDocid;
! 4876: bEof = pRoot->bEof;
! 4877: assert( pRoot->bStart );
! 4878:
! 4879: /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
! 4880: for(p=pRoot; p; p=p->pLeft){
! 4881: Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
! 4882: assert( pE->aMI==0 );
! 4883: pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
! 4884: if( !pE->aMI ) return SQLITE_NOMEM;
! 4885: memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
! 4886: }
! 4887:
! 4888: fts3EvalRestart(pCsr, pRoot, &rc);
! 4889:
! 4890: while( pCsr->isEof==0 && rc==SQLITE_OK ){
! 4891:
! 4892: do {
! 4893: /* Ensure the %_content statement is reset. */
! 4894: if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
! 4895: assert( sqlite3_data_count(pCsr->pStmt)==0 );
! 4896:
! 4897: /* Advance to the next document */
! 4898: fts3EvalNextRow(pCsr, pRoot, &rc);
! 4899: pCsr->isEof = pRoot->bEof;
! 4900: pCsr->isRequireSeek = 1;
! 4901: pCsr->isMatchinfoNeeded = 1;
! 4902: pCsr->iPrevId = pRoot->iDocid;
! 4903: }while( pCsr->isEof==0
! 4904: && pRoot->eType==FTSQUERY_NEAR
! 4905: && fts3EvalTestDeferredAndNear(pCsr, &rc)
! 4906: );
! 4907:
! 4908: if( rc==SQLITE_OK && pCsr->isEof==0 ){
! 4909: fts3EvalUpdateCounts(pRoot);
! 4910: }
! 4911: }
! 4912:
! 4913: pCsr->isEof = 0;
! 4914: pCsr->iPrevId = iPrevId;
! 4915:
! 4916: if( bEof ){
! 4917: pRoot->bEof = bEof;
! 4918: }else{
! 4919: /* Caution: pRoot may iterate through docids in ascending or descending
! 4920: ** order. For this reason, even though it seems more defensive, the
! 4921: ** do loop can not be written:
! 4922: **
! 4923: ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
! 4924: */
! 4925: fts3EvalRestart(pCsr, pRoot, &rc);
! 4926: do {
! 4927: fts3EvalNextRow(pCsr, pRoot, &rc);
! 4928: assert( pRoot->bEof==0 );
! 4929: }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
! 4930: fts3EvalTestDeferredAndNear(pCsr, &rc);
! 4931: }
! 4932: }
! 4933: return rc;
! 4934: }
! 4935:
! 4936: /*
! 4937: ** This function is used by the matchinfo() module to query a phrase
! 4938: ** expression node for the following information:
! 4939: **
! 4940: ** 1. The total number of occurrences of the phrase in each column of
! 4941: ** the FTS table (considering all rows), and
! 4942: **
! 4943: ** 2. For each column, the number of rows in the table for which the
! 4944: ** column contains at least one instance of the phrase.
! 4945: **
! 4946: ** If no error occurs, SQLITE_OK is returned and the values for each column
! 4947: ** written into the array aiOut as follows:
! 4948: **
! 4949: ** aiOut[iCol*3 + 1] = Number of occurrences
! 4950: ** aiOut[iCol*3 + 2] = Number of rows containing at least one instance
! 4951: **
! 4952: ** Caveats:
! 4953: **
! 4954: ** * If a phrase consists entirely of deferred tokens, then all output
! 4955: ** values are set to the number of documents in the table. In other
! 4956: ** words we assume that very common tokens occur exactly once in each
! 4957: ** column of each row of the table.
! 4958: **
! 4959: ** * If a phrase contains some deferred tokens (and some non-deferred
! 4960: ** tokens), count the potential occurrence identified by considering
! 4961: ** the non-deferred tokens instead of actual phrase occurrences.
! 4962: **
! 4963: ** * If the phrase is part of a NEAR expression, then only phrase instances
! 4964: ** that meet the NEAR constraint are included in the counts.
! 4965: */
! 4966: int sqlite3Fts3EvalPhraseStats(
! 4967: Fts3Cursor *pCsr, /* FTS cursor handle */
! 4968: Fts3Expr *pExpr, /* Phrase expression */
! 4969: u32 *aiOut /* Array to write results into (see above) */
! 4970: ){
! 4971: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 4972: int rc = SQLITE_OK;
! 4973: int iCol;
! 4974:
! 4975: if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
! 4976: assert( pCsr->nDoc>0 );
! 4977: for(iCol=0; iCol<pTab->nColumn; iCol++){
! 4978: aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
! 4979: aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
! 4980: }
! 4981: }else{
! 4982: rc = fts3EvalGatherStats(pCsr, pExpr);
! 4983: if( rc==SQLITE_OK ){
! 4984: assert( pExpr->aMI );
! 4985: for(iCol=0; iCol<pTab->nColumn; iCol++){
! 4986: aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
! 4987: aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
! 4988: }
! 4989: }
! 4990: }
! 4991:
! 4992: return rc;
! 4993: }
! 4994:
! 4995: /*
! 4996: ** The expression pExpr passed as the second argument to this function
! 4997: ** must be of type FTSQUERY_PHRASE.
! 4998: **
! 4999: ** The returned value is either NULL or a pointer to a buffer containing
! 5000: ** a position-list indicating the occurrences of the phrase in column iCol
! 5001: ** of the current row.
! 5002: **
! 5003: ** More specifically, the returned buffer contains 1 varint for each
! 5004: ** occurence of the phrase in the column, stored using the normal (delta+2)
! 5005: ** compression and is terminated by either an 0x01 or 0x00 byte. For example,
! 5006: ** if the requested column contains "a b X c d X X" and the position-list
! 5007: ** for 'X' is requested, the buffer returned may contain:
! 5008: **
! 5009: ** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00
! 5010: **
! 5011: ** This function works regardless of whether or not the phrase is deferred,
! 5012: ** incremental, or neither.
! 5013: */
! 5014: char *sqlite3Fts3EvalPhrasePoslist(
! 5015: Fts3Cursor *pCsr, /* FTS3 cursor object */
! 5016: Fts3Expr *pExpr, /* Phrase to return doclist for */
! 5017: int iCol /* Column to return position list for */
! 5018: ){
! 5019: Fts3Phrase *pPhrase = pExpr->pPhrase;
! 5020: Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
! 5021: char *pIter = pPhrase->doclist.pList;
! 5022: int iThis;
! 5023:
! 5024: assert( iCol>=0 && iCol<pTab->nColumn );
! 5025: if( !pIter
! 5026: || pExpr->bEof
! 5027: || pExpr->iDocid!=pCsr->iPrevId
! 5028: || (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol)
! 5029: ){
! 5030: return 0;
! 5031: }
! 5032:
! 5033: assert( pPhrase->doclist.nList>0 );
! 5034: if( *pIter==0x01 ){
! 5035: pIter++;
! 5036: pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
! 5037: }else{
! 5038: iThis = 0;
! 5039: }
! 5040: while( iThis<iCol ){
! 5041: fts3ColumnlistCopy(0, &pIter);
! 5042: if( *pIter==0x00 ) return 0;
! 5043: pIter++;
! 5044: pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
! 5045: }
! 5046:
! 5047: return ((iCol==iThis)?pIter:0);
! 5048: }
! 5049:
! 5050: /*
! 5051: ** Free all components of the Fts3Phrase structure that were allocated by
! 5052: ** the eval module. Specifically, this means to free:
! 5053: **
! 5054: ** * the contents of pPhrase->doclist, and
! 5055: ** * any Fts3MultiSegReader objects held by phrase tokens.
! 5056: */
! 5057: void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
! 5058: if( pPhrase ){
! 5059: int i;
! 5060: sqlite3_free(pPhrase->doclist.aAll);
! 5061: fts3EvalInvalidatePoslist(pPhrase);
! 5062: memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
! 5063: for(i=0; i<pPhrase->nToken; i++){
! 5064: fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
! 5065: pPhrase->aToken[i].pSegcsr = 0;
! 5066: }
! 5067: }
! 5068: }
! 5069:
! 5070: /*
! 5071: ** Return SQLITE_CORRUPT_VTAB.
! 5072: */
! 5073: #ifdef SQLITE_DEBUG
! 5074: int sqlite3Fts3Corrupt(){
! 5075: return SQLITE_CORRUPT_VTAB;
! 5076: }
! 5077: #endif
! 5078:
! 5079: #if !SQLITE_CORE
! 5080: /*
! 5081: ** Initialize API pointer table, if required.
! 5082: */
! 5083: int sqlite3_extension_init(
! 5084: sqlite3 *db,
! 5085: char **pzErrMsg,
! 5086: const sqlite3_api_routines *pApi
! 5087: ){
! 5088: SQLITE_EXTENSION_INIT2(pApi)
! 5089: return sqlite3Fts3Init(db);
! 5090: }
! 5091: #endif
! 5092:
! 5093: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>