Annotation of embedaddon/libxml2/include/libxml/encoding.h, revision 1.1.1.2

1.1       misho       1: /*
                      2:  * Summary: interface for the encoding conversion functions
                      3:  * Description: interface for the encoding conversion functions needed for
                      4:  *              XML basic encoding and iconv() support.
                      5:  *
                      6:  * Related specs are
                      7:  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
                      8:  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
                      9:  * [ISO-8859-1]   ISO Latin-1 characters codes.
                     10:  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
                     11:  *                Worldwide Character Encoding -- Version 1.0", Addison-
                     12:  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
                     13:  *                described in Unicode Technical Report #4.
                     14:  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
                     15:  *                Information Interchange, ANSI X3.4-1986.
                     16:  *
                     17:  * Copy: See Copyright for the status of this software.
                     18:  *
                     19:  * Author: Daniel Veillard
                     20:  */
                     21: 
                     22: #ifndef __XML_CHAR_ENCODING_H__
                     23: #define __XML_CHAR_ENCODING_H__
                     24: 
                     25: #include <libxml/xmlversion.h>
                     26: 
                     27: #ifdef LIBXML_ICONV_ENABLED
                     28: #include <iconv.h>
                     29: #endif
                     30: #ifdef LIBXML_ICU_ENABLED
                     31: #include <unicode/ucnv.h>
                     32: #endif
                     33: #ifdef __cplusplus
                     34: extern "C" {
                     35: #endif
                     36: 
                     37: /*
                     38:  * xmlCharEncoding:
                     39:  *
                     40:  * Predefined values for some standard encodings.
                     41:  * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
                     42:  * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
                     43:  *
                     44:  * Anything else would have to be translated to UTF8 before being
                     45:  * given to the parser itself. The BOM for UTF16 and the encoding
                     46:  * declaration are looked at and a converter is looked for at that
                     47:  * point. If not found the parser stops here as asked by the XML REC. A
                     48:  * converter can be registered by the user using xmlRegisterCharEncodingHandler
                     49:  * but the current form doesn't allow stateful transcoding (a serious
                     50:  * problem agreed !). If iconv has been found it will be used
                     51:  * automatically and allow stateful transcoding, the simplest is then
                     52:  * to be sure to enable iconv and to provide iconv libs for the encoding
                     53:  * support needed.
                     54:  *
                     55:  * Note that the generic "UTF-16" is not a predefined value.  Instead, only
                     56:  * the specific UTF-16LE and UTF-16BE are present.
                     57:  */
                     58: typedef enum {
                     59:     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
                     60:     XML_CHAR_ENCODING_NONE=    0, /* No char encoding detected */
                     61:     XML_CHAR_ENCODING_UTF8=    1, /* UTF-8 */
                     62:     XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */
                     63:     XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */
                     64:     XML_CHAR_ENCODING_UCS4LE=  4, /* UCS-4 little endian */
                     65:     XML_CHAR_ENCODING_UCS4BE=  5, /* UCS-4 big endian */
                     66:     XML_CHAR_ENCODING_EBCDIC=  6, /* EBCDIC uh! */
                     67:     XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
                     68:     XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
                     69:     XML_CHAR_ENCODING_UCS2=    9, /* UCS-2 */
                     70:     XML_CHAR_ENCODING_8859_1=  10,/* ISO-8859-1 ISO Latin 1 */
                     71:     XML_CHAR_ENCODING_8859_2=  11,/* ISO-8859-2 ISO Latin 2 */
                     72:     XML_CHAR_ENCODING_8859_3=  12,/* ISO-8859-3 */
                     73:     XML_CHAR_ENCODING_8859_4=  13,/* ISO-8859-4 */
                     74:     XML_CHAR_ENCODING_8859_5=  14,/* ISO-8859-5 */
                     75:     XML_CHAR_ENCODING_8859_6=  15,/* ISO-8859-6 */
                     76:     XML_CHAR_ENCODING_8859_7=  16,/* ISO-8859-7 */
                     77:     XML_CHAR_ENCODING_8859_8=  17,/* ISO-8859-8 */
                     78:     XML_CHAR_ENCODING_8859_9=  18,/* ISO-8859-9 */
                     79:     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
                     80:     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
                     81:     XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
                     82:     XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
                     83: } xmlCharEncoding;
                     84: 
                     85: /**
                     86:  * xmlCharEncodingInputFunc:
                     87:  * @out:  a pointer to an array of bytes to store the UTF-8 result
                     88:  * @outlen:  the length of @out
                     89:  * @in:  a pointer to an array of chars in the original encoding
                     90:  * @inlen:  the length of @in
                     91:  *
                     92:  * Take a block of chars in the original encoding and try to convert
                     93:  * it to an UTF-8 block of chars out.
                     94:  *
                     95:  * Returns the number of bytes written, -1 if lack of space, or -2
                     96:  *     if the transcoding failed.
                     97:  * The value of @inlen after return is the number of octets consumed
                     98:  *     if the return value is positive, else unpredictiable.
                     99:  * The value of @outlen after return is the number of octets consumed.
                    100:  */
                    101: typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
                    102:                                          const unsigned char *in, int *inlen);
                    103: 
                    104: 
                    105: /**
                    106:  * xmlCharEncodingOutputFunc:
                    107:  * @out:  a pointer to an array of bytes to store the result
                    108:  * @outlen:  the length of @out
                    109:  * @in:  a pointer to an array of UTF-8 chars
                    110:  * @inlen:  the length of @in
                    111:  *
                    112:  * Take a block of UTF-8 chars in and try to convert it to another
                    113:  * encoding.
                    114:  * Note: a first call designed to produce heading info is called with
                    115:  * in = NULL. If stateful this should also initialize the encoder state.
                    116:  *
                    117:  * Returns the number of bytes written, -1 if lack of space, or -2
                    118:  *     if the transcoding failed.
                    119:  * The value of @inlen after return is the number of octets consumed
                    120:  *     if the return value is positive, else unpredictiable.
                    121:  * The value of @outlen after return is the number of octets produced.
                    122:  */
                    123: typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
                    124:                                           const unsigned char *in, int *inlen);
                    125: 
                    126: 
                    127: /*
                    128:  * Block defining the handlers for non UTF-8 encodings.
                    129:  * If iconv is supported, there are two extra fields.
                    130:  */
                    131: #ifdef LIBXML_ICU_ENABLED
                    132: struct _uconv_t {
                    133:   UConverter *uconv; /* for conversion between an encoding and UTF-16 */
                    134:   UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
                    135: };
                    136: typedef struct _uconv_t uconv_t;
                    137: #endif
                    138: 
                    139: typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
                    140: typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
                    141: struct _xmlCharEncodingHandler {
                    142:     char                       *name;
                    143:     xmlCharEncodingInputFunc   input;
                    144:     xmlCharEncodingOutputFunc  output;
                    145: #ifdef LIBXML_ICONV_ENABLED
                    146:     iconv_t                    iconv_in;
                    147:     iconv_t                    iconv_out;
                    148: #endif /* LIBXML_ICONV_ENABLED */
                    149: #ifdef LIBXML_ICU_ENABLED
                    150:     uconv_t                    *uconv_in;
                    151:     uconv_t                    *uconv_out;
                    152: #endif /* LIBXML_ICU_ENABLED */
                    153: };
                    154: 
                    155: #ifdef __cplusplus
                    156: }
                    157: #endif
                    158: #include <libxml/tree.h>
                    159: #ifdef __cplusplus
                    160: extern "C" {
                    161: #endif
                    162: 
                    163: /*
                    164:  * Interfaces for encoding handlers.
                    165:  */
1.1.1.2 ! misho     166: XMLPUBFUN void XMLCALL
1.1       misho     167:        xmlInitCharEncodingHandlers     (void);
1.1.1.2 ! misho     168: XMLPUBFUN void XMLCALL
1.1       misho     169:        xmlCleanupCharEncodingHandlers  (void);
1.1.1.2 ! misho     170: XMLPUBFUN void XMLCALL
1.1       misho     171:        xmlRegisterCharEncodingHandler  (xmlCharEncodingHandlerPtr handler);
                    172: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
                    173:        xmlGetCharEncodingHandler       (xmlCharEncoding enc);
                    174: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
                    175:        xmlFindCharEncodingHandler      (const char *name);
                    176: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
1.1.1.2 ! misho     177:        xmlNewCharEncodingHandler       (const char *name,
        !           178:                                         xmlCharEncodingInputFunc input,
        !           179:                                         xmlCharEncodingOutputFunc output);
1.1       misho     180: 
                    181: /*
                    182:  * Interfaces for encoding names and aliases.
                    183:  */
1.1.1.2 ! misho     184: XMLPUBFUN int XMLCALL
1.1       misho     185:        xmlAddEncodingAlias             (const char *name,
                    186:                                         const char *alias);
1.1.1.2 ! misho     187: XMLPUBFUN int XMLCALL
1.1       misho     188:        xmlDelEncodingAlias             (const char *alias);
                    189: XMLPUBFUN const char * XMLCALL
                    190:        xmlGetEncodingAlias             (const char *alias);
1.1.1.2 ! misho     191: XMLPUBFUN void XMLCALL
1.1       misho     192:        xmlCleanupEncodingAliases       (void);
                    193: XMLPUBFUN xmlCharEncoding XMLCALL
                    194:        xmlParseCharEncoding            (const char *name);
                    195: XMLPUBFUN const char * XMLCALL
                    196:        xmlGetCharEncodingName          (xmlCharEncoding enc);
                    197: 
                    198: /*
                    199:  * Interfaces directly used by the parsers.
                    200:  */
                    201: XMLPUBFUN xmlCharEncoding XMLCALL
                    202:        xmlDetectCharEncoding           (const unsigned char *in,
                    203:                                         int len);
                    204: 
1.1.1.2 ! misho     205: XMLPUBFUN int XMLCALL
1.1       misho     206:        xmlCharEncOutFunc               (xmlCharEncodingHandler *handler,
                    207:                                         xmlBufferPtr out,
                    208:                                         xmlBufferPtr in);
                    209: 
1.1.1.2 ! misho     210: XMLPUBFUN int XMLCALL
1.1       misho     211:        xmlCharEncInFunc                (xmlCharEncodingHandler *handler,
                    212:                                         xmlBufferPtr out,
                    213:                                         xmlBufferPtr in);
                    214: XMLPUBFUN int XMLCALL
                    215:        xmlCharEncFirstLine             (xmlCharEncodingHandler *handler,
                    216:                                         xmlBufferPtr out,
                    217:                                         xmlBufferPtr in);
1.1.1.2 ! misho     218: XMLPUBFUN int XMLCALL
1.1       misho     219:        xmlCharEncCloseFunc             (xmlCharEncodingHandler *handler);
                    220: 
                    221: /*
                    222:  * Export a few useful functions
                    223:  */
                    224: #ifdef LIBXML_OUTPUT_ENABLED
1.1.1.2 ! misho     225: XMLPUBFUN int XMLCALL
1.1       misho     226:        UTF8Toisolat1                   (unsigned char *out,
                    227:                                         int *outlen,
                    228:                                         const unsigned char *in,
                    229:                                         int *inlen);
                    230: #endif /* LIBXML_OUTPUT_ENABLED */
1.1.1.2 ! misho     231: XMLPUBFUN int XMLCALL
1.1       misho     232:        isolat1ToUTF8                   (unsigned char *out,
                    233:                                         int *outlen,
                    234:                                         const unsigned char *in,
                    235:                                         int *inlen);
                    236: #ifdef __cplusplus
                    237: }
                    238: #endif
                    239: 
                    240: #endif /* __XML_CHAR_ENCODING_H__ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>