File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / include / libxml / encoding.h
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:38 2014 UTC (10 years, 2 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD
libxml2 2.9.1

    1: /*
    2:  * Summary: interface for the encoding conversion functions
    3:  * Description: interface for the encoding conversion functions needed for
    4:  *              XML basic encoding and iconv() support.
    5:  *
    6:  * Related specs are
    7:  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
    8:  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
    9:  * [ISO-8859-1]   ISO Latin-1 characters codes.
   10:  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
   11:  *                Worldwide Character Encoding -- Version 1.0", Addison-
   12:  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
   13:  *                described in Unicode Technical Report #4.
   14:  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
   15:  *                Information Interchange, ANSI X3.4-1986.
   16:  *
   17:  * Copy: See Copyright for the status of this software.
   18:  *
   19:  * Author: Daniel Veillard
   20:  */
   21: 
   22: #ifndef __XML_CHAR_ENCODING_H__
   23: #define __XML_CHAR_ENCODING_H__
   24: 
   25: #include <libxml/xmlversion.h>
   26: 
   27: #ifdef LIBXML_ICONV_ENABLED
   28: #include <iconv.h>
   29: #endif
   30: #ifdef LIBXML_ICU_ENABLED
   31: #include <unicode/ucnv.h>
   32: #endif
   33: #ifdef __cplusplus
   34: extern "C" {
   35: #endif
   36: 
   37: /*
   38:  * xmlCharEncoding:
   39:  *
   40:  * Predefined values for some standard encodings.
   41:  * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
   42:  * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
   43:  *
   44:  * Anything else would have to be translated to UTF8 before being
   45:  * given to the parser itself. The BOM for UTF16 and the encoding
   46:  * declaration are looked at and a converter is looked for at that
   47:  * point. If not found the parser stops here as asked by the XML REC. A
   48:  * converter can be registered by the user using xmlRegisterCharEncodingHandler
   49:  * but the current form doesn't allow stateful transcoding (a serious
   50:  * problem agreed !). If iconv has been found it will be used
   51:  * automatically and allow stateful transcoding, the simplest is then
   52:  * to be sure to enable iconv and to provide iconv libs for the encoding
   53:  * support needed.
   54:  *
   55:  * Note that the generic "UTF-16" is not a predefined value.  Instead, only
   56:  * the specific UTF-16LE and UTF-16BE are present.
   57:  */
   58: typedef enum {
   59:     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
   60:     XML_CHAR_ENCODING_NONE=	0, /* No char encoding detected */
   61:     XML_CHAR_ENCODING_UTF8=	1, /* UTF-8 */
   62:     XML_CHAR_ENCODING_UTF16LE=	2, /* UTF-16 little endian */
   63:     XML_CHAR_ENCODING_UTF16BE=	3, /* UTF-16 big endian */
   64:     XML_CHAR_ENCODING_UCS4LE=	4, /* UCS-4 little endian */
   65:     XML_CHAR_ENCODING_UCS4BE=	5, /* UCS-4 big endian */
   66:     XML_CHAR_ENCODING_EBCDIC=	6, /* EBCDIC uh! */
   67:     XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
   68:     XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
   69:     XML_CHAR_ENCODING_UCS2=	9, /* UCS-2 */
   70:     XML_CHAR_ENCODING_8859_1=	10,/* ISO-8859-1 ISO Latin 1 */
   71:     XML_CHAR_ENCODING_8859_2=	11,/* ISO-8859-2 ISO Latin 2 */
   72:     XML_CHAR_ENCODING_8859_3=	12,/* ISO-8859-3 */
   73:     XML_CHAR_ENCODING_8859_4=	13,/* ISO-8859-4 */
   74:     XML_CHAR_ENCODING_8859_5=	14,/* ISO-8859-5 */
   75:     XML_CHAR_ENCODING_8859_6=	15,/* ISO-8859-6 */
   76:     XML_CHAR_ENCODING_8859_7=	16,/* ISO-8859-7 */
   77:     XML_CHAR_ENCODING_8859_8=	17,/* ISO-8859-8 */
   78:     XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
   79:     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
   80:     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
   81:     XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
   82:     XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
   83: } xmlCharEncoding;
   84: 
   85: /**
   86:  * xmlCharEncodingInputFunc:
   87:  * @out:  a pointer to an array of bytes to store the UTF-8 result
   88:  * @outlen:  the length of @out
   89:  * @in:  a pointer to an array of chars in the original encoding
   90:  * @inlen:  the length of @in
   91:  *
   92:  * Take a block of chars in the original encoding and try to convert
   93:  * it to an UTF-8 block of chars out.
   94:  *
   95:  * Returns the number of bytes written, -1 if lack of space, or -2
   96:  *     if the transcoding failed.
   97:  * The value of @inlen after return is the number of octets consumed
   98:  *     if the return value is positive, else unpredictiable.
   99:  * The value of @outlen after return is the number of octets consumed.
  100:  */
  101: typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
  102:                                          const unsigned char *in, int *inlen);
  103: 
  104: 
  105: /**
  106:  * xmlCharEncodingOutputFunc:
  107:  * @out:  a pointer to an array of bytes to store the result
  108:  * @outlen:  the length of @out
  109:  * @in:  a pointer to an array of UTF-8 chars
  110:  * @inlen:  the length of @in
  111:  *
  112:  * Take a block of UTF-8 chars in and try to convert it to another
  113:  * encoding.
  114:  * Note: a first call designed to produce heading info is called with
  115:  * in = NULL. If stateful this should also initialize the encoder state.
  116:  *
  117:  * Returns the number of bytes written, -1 if lack of space, or -2
  118:  *     if the transcoding failed.
  119:  * The value of @inlen after return is the number of octets consumed
  120:  *     if the return value is positive, else unpredictiable.
  121:  * The value of @outlen after return is the number of octets produced.
  122:  */
  123: typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
  124:                                           const unsigned char *in, int *inlen);
  125: 
  126: 
  127: /*
  128:  * Block defining the handlers for non UTF-8 encodings.
  129:  * If iconv is supported, there are two extra fields.
  130:  */
  131: #ifdef LIBXML_ICU_ENABLED
  132: struct _uconv_t {
  133:   UConverter *uconv; /* for conversion between an encoding and UTF-16 */
  134:   UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
  135: };
  136: typedef struct _uconv_t uconv_t;
  137: #endif
  138: 
  139: typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
  140: typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
  141: struct _xmlCharEncodingHandler {
  142:     char                       *name;
  143:     xmlCharEncodingInputFunc   input;
  144:     xmlCharEncodingOutputFunc  output;
  145: #ifdef LIBXML_ICONV_ENABLED
  146:     iconv_t                    iconv_in;
  147:     iconv_t                    iconv_out;
  148: #endif /* LIBXML_ICONV_ENABLED */
  149: #ifdef LIBXML_ICU_ENABLED
  150:     uconv_t                    *uconv_in;
  151:     uconv_t                    *uconv_out;
  152: #endif /* LIBXML_ICU_ENABLED */
  153: };
  154: 
  155: #ifdef __cplusplus
  156: }
  157: #endif
  158: #include <libxml/tree.h>
  159: #ifdef __cplusplus
  160: extern "C" {
  161: #endif
  162: 
  163: /*
  164:  * Interfaces for encoding handlers.
  165:  */
  166: XMLPUBFUN void XMLCALL
  167: 	xmlInitCharEncodingHandlers	(void);
  168: XMLPUBFUN void XMLCALL
  169: 	xmlCleanupCharEncodingHandlers	(void);
  170: XMLPUBFUN void XMLCALL
  171: 	xmlRegisterCharEncodingHandler	(xmlCharEncodingHandlerPtr handler);
  172: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
  173: 	xmlGetCharEncodingHandler	(xmlCharEncoding enc);
  174: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
  175: 	xmlFindCharEncodingHandler	(const char *name);
  176: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
  177: 	xmlNewCharEncodingHandler	(const char *name,
  178: 					 xmlCharEncodingInputFunc input,
  179: 					 xmlCharEncodingOutputFunc output);
  180: 
  181: /*
  182:  * Interfaces for encoding names and aliases.
  183:  */
  184: XMLPUBFUN int XMLCALL
  185: 	xmlAddEncodingAlias		(const char *name,
  186: 					 const char *alias);
  187: XMLPUBFUN int XMLCALL
  188: 	xmlDelEncodingAlias		(const char *alias);
  189: XMLPUBFUN const char * XMLCALL
  190: 	xmlGetEncodingAlias		(const char *alias);
  191: XMLPUBFUN void XMLCALL
  192: 	xmlCleanupEncodingAliases	(void);
  193: XMLPUBFUN xmlCharEncoding XMLCALL
  194: 	xmlParseCharEncoding		(const char *name);
  195: XMLPUBFUN const char * XMLCALL
  196: 	xmlGetCharEncodingName		(xmlCharEncoding enc);
  197: 
  198: /*
  199:  * Interfaces directly used by the parsers.
  200:  */
  201: XMLPUBFUN xmlCharEncoding XMLCALL
  202: 	xmlDetectCharEncoding		(const unsigned char *in,
  203: 					 int len);
  204: 
  205: XMLPUBFUN int XMLCALL
  206: 	xmlCharEncOutFunc		(xmlCharEncodingHandler *handler,
  207: 					 xmlBufferPtr out,
  208: 					 xmlBufferPtr in);
  209: 
  210: XMLPUBFUN int XMLCALL
  211: 	xmlCharEncInFunc		(xmlCharEncodingHandler *handler,
  212: 					 xmlBufferPtr out,
  213: 					 xmlBufferPtr in);
  214: XMLPUBFUN int XMLCALL
  215: 	xmlCharEncFirstLine		(xmlCharEncodingHandler *handler,
  216: 					 xmlBufferPtr out,
  217: 					 xmlBufferPtr in);
  218: XMLPUBFUN int XMLCALL
  219: 	xmlCharEncCloseFunc		(xmlCharEncodingHandler *handler);
  220: 
  221: /*
  222:  * Export a few useful functions
  223:  */
  224: #ifdef LIBXML_OUTPUT_ENABLED
  225: XMLPUBFUN int XMLCALL
  226: 	UTF8Toisolat1			(unsigned char *out,
  227: 					 int *outlen,
  228: 					 const unsigned char *in,
  229: 					 int *inlen);
  230: #endif /* LIBXML_OUTPUT_ENABLED */
  231: XMLPUBFUN int XMLCALL
  232: 	isolat1ToUTF8			(unsigned char *out,
  233: 					 int *outlen,
  234: 					 const unsigned char *in,
  235: 					 int *inlen);
  236: #ifdef __cplusplus
  237: }
  238: #endif
  239: 
  240: #endif /* __XML_CHAR_ENCODING_H__ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>