Annotation of embedaddon/sqlite3/ext/fts2/fts2_tokenizer.h, revision 1.1

1.1     ! misho       1: /*
        !             2: ** 2006 July 10
        !             3: **
        !             4: ** The author disclaims copyright to this source code.
        !             5: **
        !             6: *************************************************************************
        !             7: ** Defines the interface to tokenizers used by fulltext-search.  There
        !             8: ** are three basic components:
        !             9: **
        !            10: ** sqlite3_tokenizer_module is a singleton defining the tokenizer
        !            11: ** interface functions.  This is essentially the class structure for
        !            12: ** tokenizers.
        !            13: **
        !            14: ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
        !            15: ** including customization information defined at creation time.
        !            16: **
        !            17: ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
        !            18: ** tokens from a particular input.
        !            19: */
        !            20: #ifndef _FTS2_TOKENIZER_H_
        !            21: #define _FTS2_TOKENIZER_H_
        !            22: 
        !            23: /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
        !            24: ** If tokenizers are to be allowed to call sqlite3_*() functions, then
        !            25: ** we will need a way to register the API consistently.
        !            26: */
        !            27: #include "sqlite3.h"
        !            28: 
        !            29: /*
        !            30: ** Structures used by the tokenizer interface. When a new tokenizer
        !            31: ** implementation is registered, the caller provides a pointer to
        !            32: ** an sqlite3_tokenizer_module containing pointers to the callback
        !            33: ** functions that make up an implementation.
        !            34: **
        !            35: ** When an fts2 table is created, it passes any arguments passed to
        !            36: ** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
        !            37: ** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
        !            38: ** implementation. The xCreate() function in turn returns an 
        !            39: ** sqlite3_tokenizer structure representing the specific tokenizer to
        !            40: ** be used for the fts2 table (customized by the tokenizer clause arguments).
        !            41: **
        !            42: ** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
        !            43: ** method is called. It returns an sqlite3_tokenizer_cursor object
        !            44: ** that may be used to tokenize a specific input buffer based on
        !            45: ** the tokenization rules supplied by a specific sqlite3_tokenizer
        !            46: ** object.
        !            47: */
        !            48: typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
        !            49: typedef struct sqlite3_tokenizer sqlite3_tokenizer;
        !            50: typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
        !            51: 
        !            52: struct sqlite3_tokenizer_module {
        !            53: 
        !            54:   /*
        !            55:   ** Structure version. Should always be set to 0.
        !            56:   */
        !            57:   int iVersion;
        !            58: 
        !            59:   /*
        !            60:   ** Create a new tokenizer. The values in the argv[] array are the
        !            61:   ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
        !            62:   ** TABLE statement that created the fts2 table. For example, if
        !            63:   ** the following SQL is executed:
        !            64:   **
        !            65:   **   CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
        !            66:   **
        !            67:   ** then argc is set to 2, and the argv[] array contains pointers
        !            68:   ** to the strings "arg1" and "arg2".
        !            69:   **
        !            70:   ** This method should return either SQLITE_OK (0), or an SQLite error 
        !            71:   ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
        !            72:   ** to point at the newly created tokenizer structure. The generic
        !            73:   ** sqlite3_tokenizer.pModule variable should not be initialised by
        !            74:   ** this callback. The caller will do so.
        !            75:   */
        !            76:   int (*xCreate)(
        !            77:     int argc,                           /* Size of argv array */
        !            78:     const char *const*argv,             /* Tokenizer argument strings */
        !            79:     sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
        !            80:   );
        !            81: 
        !            82:   /*
        !            83:   ** Destroy an existing tokenizer. The fts2 module calls this method
        !            84:   ** exactly once for each successful call to xCreate().
        !            85:   */
        !            86:   int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
        !            87: 
        !            88:   /*
        !            89:   ** Create a tokenizer cursor to tokenize an input buffer. The caller
        !            90:   ** is responsible for ensuring that the input buffer remains valid
        !            91:   ** until the cursor is closed (using the xClose() method). 
        !            92:   */
        !            93:   int (*xOpen)(
        !            94:     sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
        !            95:     const char *pInput, int nBytes,      /* Input buffer */
        !            96:     sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
        !            97:   );
        !            98: 
        !            99:   /*
        !           100:   ** Destroy an existing tokenizer cursor. The fts2 module calls this 
        !           101:   ** method exactly once for each successful call to xOpen().
        !           102:   */
        !           103:   int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
        !           104: 
        !           105:   /*
        !           106:   ** Retrieve the next token from the tokenizer cursor pCursor. This
        !           107:   ** method should either return SQLITE_OK and set the values of the
        !           108:   ** "OUT" variables identified below, or SQLITE_DONE to indicate that
        !           109:   ** the end of the buffer has been reached, or an SQLite error code.
        !           110:   **
        !           111:   ** *ppToken should be set to point at a buffer containing the 
        !           112:   ** normalized version of the token (i.e. after any case-folding and/or
        !           113:   ** stemming has been performed). *pnBytes should be set to the length
        !           114:   ** of this buffer in bytes. The input text that generated the token is
        !           115:   ** identified by the byte offsets returned in *piStartOffset and
        !           116:   ** *piEndOffset.
        !           117:   **
        !           118:   ** The buffer *ppToken is set to point at is managed by the tokenizer
        !           119:   ** implementation. It is only required to be valid until the next call
        !           120:   ** to xNext() or xClose(). 
        !           121:   */
        !           122:   /* TODO(shess) current implementation requires pInput to be
        !           123:   ** nul-terminated.  This should either be fixed, or pInput/nBytes
        !           124:   ** should be converted to zInput.
        !           125:   */
        !           126:   int (*xNext)(
        !           127:     sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
        !           128:     const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
        !           129:     int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
        !           130:     int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
        !           131:     int *piPosition      /* OUT: Number of tokens returned before this one */
        !           132:   );
        !           133: };
        !           134: 
        !           135: struct sqlite3_tokenizer {
        !           136:   const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
        !           137:   /* Tokenizer implementations will typically add additional fields */
        !           138: };
        !           139: 
        !           140: struct sqlite3_tokenizer_cursor {
        !           141:   sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
        !           142:   /* Tokenizer implementations will typically add additional fields */
        !           143: };
        !           144: 
        !           145: #endif /* _FTS2_TOKENIZER_H_ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>