Annotation of embedaddon/sqlite3/ext/fts3/fts3_tokenizer.h, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2006 July 10
                      3: **
                      4: ** The author disclaims copyright to this source code.
                      5: **
                      6: *************************************************************************
                      7: ** Defines the interface to tokenizers used by fulltext-search.  There
                      8: ** are three basic components:
                      9: **
                     10: ** sqlite3_tokenizer_module is a singleton defining the tokenizer
                     11: ** interface functions.  This is essentially the class structure for
                     12: ** tokenizers.
                     13: **
                     14: ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
                     15: ** including customization information defined at creation time.
                     16: **
                     17: ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
                     18: ** tokens from a particular input.
                     19: */
                     20: #ifndef _FTS3_TOKENIZER_H_
                     21: #define _FTS3_TOKENIZER_H_
                     22: 
                     23: /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
                     24: ** If tokenizers are to be allowed to call sqlite3_*() functions, then
                     25: ** we will need a way to register the API consistently.
                     26: */
                     27: #include "sqlite3.h"
                     28: 
                     29: /*
                     30: ** Structures used by the tokenizer interface. When a new tokenizer
                     31: ** implementation is registered, the caller provides a pointer to
                     32: ** an sqlite3_tokenizer_module containing pointers to the callback
                     33: ** functions that make up an implementation.
                     34: **
                     35: ** When an fts3 table is created, it passes any arguments passed to
                     36: ** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
                     37: ** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
                     38: ** implementation. The xCreate() function in turn returns an 
                     39: ** sqlite3_tokenizer structure representing the specific tokenizer to
                     40: ** be used for the fts3 table (customized by the tokenizer clause arguments).
                     41: **
                     42: ** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
                     43: ** method is called. It returns an sqlite3_tokenizer_cursor object
                     44: ** that may be used to tokenize a specific input buffer based on
                     45: ** the tokenization rules supplied by a specific sqlite3_tokenizer
                     46: ** object.
                     47: */
                     48: typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
                     49: typedef struct sqlite3_tokenizer sqlite3_tokenizer;
                     50: typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
                     51: 
                     52: struct sqlite3_tokenizer_module {
                     53: 
                     54:   /*
                     55:   ** Structure version. Should always be set to 0.
                     56:   */
                     57:   int iVersion;
                     58: 
                     59:   /*
                     60:   ** Create a new tokenizer. The values in the argv[] array are the
                     61:   ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
                     62:   ** TABLE statement that created the fts3 table. For example, if
                     63:   ** the following SQL is executed:
                     64:   **
                     65:   **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
                     66:   **
                     67:   ** then argc is set to 2, and the argv[] array contains pointers
                     68:   ** to the strings "arg1" and "arg2".
                     69:   **
                     70:   ** This method should return either SQLITE_OK (0), or an SQLite error 
                     71:   ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
                     72:   ** to point at the newly created tokenizer structure. The generic
                     73:   ** sqlite3_tokenizer.pModule variable should not be initialised by
                     74:   ** this callback. The caller will do so.
                     75:   */
                     76:   int (*xCreate)(
                     77:     int argc,                           /* Size of argv array */
                     78:     const char *const*argv,             /* Tokenizer argument strings */
                     79:     sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
                     80:   );
                     81: 
                     82:   /*
                     83:   ** Destroy an existing tokenizer. The fts3 module calls this method
                     84:   ** exactly once for each successful call to xCreate().
                     85:   */
                     86:   int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
                     87: 
                     88:   /*
                     89:   ** Create a tokenizer cursor to tokenize an input buffer. The caller
                     90:   ** is responsible for ensuring that the input buffer remains valid
                     91:   ** until the cursor is closed (using the xClose() method). 
                     92:   */
                     93:   int (*xOpen)(
                     94:     sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
                     95:     const char *pInput, int nBytes,      /* Input buffer */
                     96:     sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
                     97:   );
                     98: 
                     99:   /*
                    100:   ** Destroy an existing tokenizer cursor. The fts3 module calls this 
                    101:   ** method exactly once for each successful call to xOpen().
                    102:   */
                    103:   int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
                    104: 
                    105:   /*
                    106:   ** Retrieve the next token from the tokenizer cursor pCursor. This
                    107:   ** method should either return SQLITE_OK and set the values of the
                    108:   ** "OUT" variables identified below, or SQLITE_DONE to indicate that
                    109:   ** the end of the buffer has been reached, or an SQLite error code.
                    110:   **
                    111:   ** *ppToken should be set to point at a buffer containing the 
                    112:   ** normalized version of the token (i.e. after any case-folding and/or
                    113:   ** stemming has been performed). *pnBytes should be set to the length
                    114:   ** of this buffer in bytes. The input text that generated the token is
                    115:   ** identified by the byte offsets returned in *piStartOffset and
                    116:   ** *piEndOffset. *piStartOffset should be set to the index of the first
                    117:   ** byte of the token in the input buffer. *piEndOffset should be set
                    118:   ** to the index of the first byte just past the end of the token in
                    119:   ** the input buffer.
                    120:   **
                    121:   ** The buffer *ppToken is set to point at is managed by the tokenizer
                    122:   ** implementation. It is only required to be valid until the next call
                    123:   ** to xNext() or xClose(). 
                    124:   */
                    125:   /* TODO(shess) current implementation requires pInput to be
                    126:   ** nul-terminated.  This should either be fixed, or pInput/nBytes
                    127:   ** should be converted to zInput.
                    128:   */
                    129:   int (*xNext)(
                    130:     sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
                    131:     const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
                    132:     int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
                    133:     int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
                    134:     int *piPosition      /* OUT: Number of tokens returned before this one */
                    135:   );
                    136: };
                    137: 
                    138: struct sqlite3_tokenizer {
                    139:   const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
                    140:   /* Tokenizer implementations will typically add additional fields */
                    141: };
                    142: 
                    143: struct sqlite3_tokenizer_cursor {
                    144:   sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
                    145:   /* Tokenizer implementations will typically add additional fields */
                    146: };
                    147: 
                    148: int fts3_global_term_cnt(int iTerm, int iCol);
                    149: int fts3_term_cnt(int iTerm, int iCol);
                    150: 
                    151: 
                    152: #endif /* _FTS3_TOKENIZER_H_ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>