Annotation of embedaddon/sqlite3/ext/fts1/tokenizer.h, revision 1.1.1.1

1.1       misho       1: /*
                      2: ** 2006 July 10
                      3: **
                      4: ** The author disclaims copyright to this source code.
                      5: **
                      6: *************************************************************************
                      7: ** Defines the interface to tokenizers used by fulltext-search.  There
                      8: ** are three basic components:
                      9: **
                     10: ** sqlite3_tokenizer_module is a singleton defining the tokenizer
                     11: ** interface functions.  This is essentially the class structure for
                     12: ** tokenizers.
                     13: **
                     14: ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
                     15: ** including customization information defined at creation time.
                     16: **
                     17: ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
                     18: ** tokens from a particular input.
                     19: */
                     20: #ifndef _TOKENIZER_H_
                     21: #define _TOKENIZER_H_
                     22: 
                     23: /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
                     24: ** If tokenizers are to be allowed to call sqlite3_*() functions, then
                     25: ** we will need a way to register the API consistently.
                     26: */
                     27: #include "sqlite3.h"
                     28: 
                     29: /*
                     30: ** Structures used by the tokenizer interface.
                     31: */
                     32: typedef struct sqlite3_tokenizer sqlite3_tokenizer;
                     33: typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
                     34: typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
                     35: 
                     36: struct sqlite3_tokenizer_module {
                     37:   int iVersion;                  /* currently 0 */
                     38: 
                     39:   /*
                     40:   ** Create and destroy a tokenizer.  argc/argv are passed down from
                     41:   ** the fulltext virtual table creation to allow customization.
                     42:   */
                     43:   int (*xCreate)(int argc, const char **argv,
                     44:                  sqlite3_tokenizer **ppTokenizer);
                     45:   int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
                     46: 
                     47:   /*
                     48:   ** Tokenize a particular input.  Call xOpen() to prepare to
                     49:   ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
                     50:   ** xClose() to free any internal state.  The pInput passed to
                     51:   ** xOpen() must exist until the cursor is closed.  The ppToken
                     52:   ** result from xNext() is only valid until the next call to xNext()
                     53:   ** or until xClose() is called.
                     54:   */
                     55:   /* TODO(shess) current implementation requires pInput to be
                     56:   ** nul-terminated.  This should either be fixed, or pInput/nBytes
                     57:   ** should be converted to zInput.
                     58:   */
                     59:   int (*xOpen)(sqlite3_tokenizer *pTokenizer,
                     60:                const char *pInput, int nBytes,
                     61:                sqlite3_tokenizer_cursor **ppCursor);
                     62:   int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
                     63:   int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
                     64:                const char **ppToken, int *pnBytes,
                     65:                int *piStartOffset, int *piEndOffset, int *piPosition);
                     66: };
                     67: 
                     68: struct sqlite3_tokenizer {
                     69:   sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
                     70:   /* Tokenizer implementations will typically add additional fields */
                     71: };
                     72: 
                     73: struct sqlite3_tokenizer_cursor {
                     74:   sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
                     75:   /* Tokenizer implementations will typically add additional fields */
                     76: };
                     77: 
                     78: /*
                     79: ** Get the module for a tokenizer which generates tokens based on a
                     80: ** set of non-token characters.  The default is to break tokens at any
                     81: ** non-alnum character, though the set of delimiters can also be
                     82: ** specified by the first argv argument to xCreate().
                     83: */
                     84: /* TODO(shess) This doesn't belong here.  Need some sort of
                     85: ** registration process.
                     86: */
                     87: void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
                     88: 
                     89: #endif /* _TOKENIZER_H_ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>