Return to tokenizer.h CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / ext / fts1 |
1.1 ! misho 1: /* ! 2: ** 2006 July 10 ! 3: ** ! 4: ** The author disclaims copyright to this source code. ! 5: ** ! 6: ************************************************************************* ! 7: ** Defines the interface to tokenizers used by fulltext-search. There ! 8: ** are three basic components: ! 9: ** ! 10: ** sqlite3_tokenizer_module is a singleton defining the tokenizer ! 11: ** interface functions. This is essentially the class structure for ! 12: ** tokenizers. ! 13: ** ! 14: ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps ! 15: ** including customization information defined at creation time. ! 16: ** ! 17: ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate ! 18: ** tokens from a particular input. ! 19: */ ! 20: #ifndef _TOKENIZER_H_ ! 21: #define _TOKENIZER_H_ ! 22: ! 23: /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. ! 24: ** If tokenizers are to be allowed to call sqlite3_*() functions, then ! 25: ** we will need a way to register the API consistently. ! 26: */ ! 27: #include "sqlite3.h" ! 28: ! 29: /* ! 30: ** Structures used by the tokenizer interface. ! 31: */ ! 32: typedef struct sqlite3_tokenizer sqlite3_tokenizer; ! 33: typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; ! 34: typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; ! 35: ! 36: struct sqlite3_tokenizer_module { ! 37: int iVersion; /* currently 0 */ ! 38: ! 39: /* ! 40: ** Create and destroy a tokenizer. argc/argv are passed down from ! 41: ** the fulltext virtual table creation to allow customization. ! 42: */ ! 43: int (*xCreate)(int argc, const char **argv, ! 44: sqlite3_tokenizer **ppTokenizer); ! 45: int (*xDestroy)(sqlite3_tokenizer *pTokenizer); ! 46: ! 47: /* ! 48: ** Tokenize a particular input. Call xOpen() to prepare to ! 49: ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then ! 50: ** xClose() to free any internal state. The pInput passed to ! 51: ** xOpen() must exist until the cursor is closed. The ppToken ! 52: ** result from xNext() is only valid until the next call to xNext() ! 53: ** or until xClose() is called. ! 54: */ ! 55: /* TODO(shess) current implementation requires pInput to be ! 56: ** nul-terminated. This should either be fixed, or pInput/nBytes ! 57: ** should be converted to zInput. ! 58: */ ! 59: int (*xOpen)(sqlite3_tokenizer *pTokenizer, ! 60: const char *pInput, int nBytes, ! 61: sqlite3_tokenizer_cursor **ppCursor); ! 62: int (*xClose)(sqlite3_tokenizer_cursor *pCursor); ! 63: int (*xNext)(sqlite3_tokenizer_cursor *pCursor, ! 64: const char **ppToken, int *pnBytes, ! 65: int *piStartOffset, int *piEndOffset, int *piPosition); ! 66: }; ! 67: ! 68: struct sqlite3_tokenizer { ! 69: sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ ! 70: /* Tokenizer implementations will typically add additional fields */ ! 71: }; ! 72: ! 73: struct sqlite3_tokenizer_cursor { ! 74: sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ ! 75: /* Tokenizer implementations will typically add additional fields */ ! 76: }; ! 77: ! 78: /* ! 79: ** Get the module for a tokenizer which generates tokens based on a ! 80: ** set of non-token characters. The default is to break tokens at any ! 81: ** non-alnum character, though the set of delimiters can also be ! 82: ** specified by the first argv argument to xCreate(). ! 83: */ ! 84: /* TODO(shess) This doesn't belong here. Need some sort of ! 85: ** registration process. ! 86: */ ! 87: void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule); ! 88: ! 89: #endif /* _TOKENIZER_H_ */