Return to encoding.h CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / include / libxml |
1.1 ! misho 1: /* ! 2: * Summary: interface for the encoding conversion functions ! 3: * Description: interface for the encoding conversion functions needed for ! 4: * XML basic encoding and iconv() support. ! 5: * ! 6: * Related specs are ! 7: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies ! 8: * [ISO-10646] UTF-8 and UTF-16 in Annexes ! 9: * [ISO-8859-1] ISO Latin-1 characters codes. ! 10: * [UNICODE] The Unicode Consortium, "The Unicode Standard -- ! 11: * Worldwide Character Encoding -- Version 1.0", Addison- ! 12: * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is ! 13: * described in Unicode Technical Report #4. ! 14: * [US-ASCII] Coded Character Set--7-bit American Standard Code for ! 15: * Information Interchange, ANSI X3.4-1986. ! 16: * ! 17: * Copy: See Copyright for the status of this software. ! 18: * ! 19: * Author: Daniel Veillard ! 20: */ ! 21: ! 22: #ifndef __XML_CHAR_ENCODING_H__ ! 23: #define __XML_CHAR_ENCODING_H__ ! 24: ! 25: #include <libxml/xmlversion.h> ! 26: ! 27: #ifdef LIBXML_ICONV_ENABLED ! 28: #include <iconv.h> ! 29: #endif ! 30: #ifdef LIBXML_ICU_ENABLED ! 31: #include <unicode/ucnv.h> ! 32: #endif ! 33: #ifdef __cplusplus ! 34: extern "C" { ! 35: #endif ! 36: ! 37: /* ! 38: * xmlCharEncoding: ! 39: * ! 40: * Predefined values for some standard encodings. ! 41: * Libxml does not do beforehand translation on UTF8 and ISOLatinX. ! 42: * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. ! 43: * ! 44: * Anything else would have to be translated to UTF8 before being ! 45: * given to the parser itself. The BOM for UTF16 and the encoding ! 46: * declaration are looked at and a converter is looked for at that ! 47: * point. If not found the parser stops here as asked by the XML REC. A ! 48: * converter can be registered by the user using xmlRegisterCharEncodingHandler ! 49: * but the current form doesn't allow stateful transcoding (a serious ! 50: * problem agreed !). If iconv has been found it will be used ! 51: * automatically and allow stateful transcoding, the simplest is then ! 52: * to be sure to enable iconv and to provide iconv libs for the encoding ! 53: * support needed. ! 54: * ! 55: * Note that the generic "UTF-16" is not a predefined value. Instead, only ! 56: * the specific UTF-16LE and UTF-16BE are present. ! 57: */ ! 58: typedef enum { ! 59: XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ ! 60: XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ ! 61: XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ ! 62: XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ ! 63: XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ ! 64: XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ ! 65: XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ ! 66: XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ ! 67: XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ ! 68: XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ ! 69: XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ ! 70: XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ ! 71: XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ ! 72: XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ ! 73: XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ ! 74: XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ ! 75: XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ ! 76: XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ ! 77: XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ ! 78: XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ ! 79: XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ ! 80: XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ ! 81: XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ ! 82: XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ ! 83: } xmlCharEncoding; ! 84: ! 85: /** ! 86: * xmlCharEncodingInputFunc: ! 87: * @out: a pointer to an array of bytes to store the UTF-8 result ! 88: * @outlen: the length of @out ! 89: * @in: a pointer to an array of chars in the original encoding ! 90: * @inlen: the length of @in ! 91: * ! 92: * Take a block of chars in the original encoding and try to convert ! 93: * it to an UTF-8 block of chars out. ! 94: * ! 95: * Returns the number of bytes written, -1 if lack of space, or -2 ! 96: * if the transcoding failed. ! 97: * The value of @inlen after return is the number of octets consumed ! 98: * if the return value is positive, else unpredictiable. ! 99: * The value of @outlen after return is the number of octets consumed. ! 100: */ ! 101: typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, ! 102: const unsigned char *in, int *inlen); ! 103: ! 104: ! 105: /** ! 106: * xmlCharEncodingOutputFunc: ! 107: * @out: a pointer to an array of bytes to store the result ! 108: * @outlen: the length of @out ! 109: * @in: a pointer to an array of UTF-8 chars ! 110: * @inlen: the length of @in ! 111: * ! 112: * Take a block of UTF-8 chars in and try to convert it to another ! 113: * encoding. ! 114: * Note: a first call designed to produce heading info is called with ! 115: * in = NULL. If stateful this should also initialize the encoder state. ! 116: * ! 117: * Returns the number of bytes written, -1 if lack of space, or -2 ! 118: * if the transcoding failed. ! 119: * The value of @inlen after return is the number of octets consumed ! 120: * if the return value is positive, else unpredictiable. ! 121: * The value of @outlen after return is the number of octets produced. ! 122: */ ! 123: typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, ! 124: const unsigned char *in, int *inlen); ! 125: ! 126: ! 127: /* ! 128: * Block defining the handlers for non UTF-8 encodings. ! 129: * If iconv is supported, there are two extra fields. ! 130: */ ! 131: #ifdef LIBXML_ICU_ENABLED ! 132: struct _uconv_t { ! 133: UConverter *uconv; /* for conversion between an encoding and UTF-16 */ ! 134: UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ ! 135: }; ! 136: typedef struct _uconv_t uconv_t; ! 137: #endif ! 138: ! 139: typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; ! 140: typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; ! 141: struct _xmlCharEncodingHandler { ! 142: char *name; ! 143: xmlCharEncodingInputFunc input; ! 144: xmlCharEncodingOutputFunc output; ! 145: #ifdef LIBXML_ICONV_ENABLED ! 146: iconv_t iconv_in; ! 147: iconv_t iconv_out; ! 148: #endif /* LIBXML_ICONV_ENABLED */ ! 149: #ifdef LIBXML_ICU_ENABLED ! 150: uconv_t *uconv_in; ! 151: uconv_t *uconv_out; ! 152: #endif /* LIBXML_ICU_ENABLED */ ! 153: }; ! 154: ! 155: #ifdef __cplusplus ! 156: } ! 157: #endif ! 158: #include <libxml/tree.h> ! 159: #ifdef __cplusplus ! 160: extern "C" { ! 161: #endif ! 162: ! 163: /* ! 164: * Interfaces for encoding handlers. ! 165: */ ! 166: XMLPUBFUN void XMLCALL ! 167: xmlInitCharEncodingHandlers (void); ! 168: XMLPUBFUN void XMLCALL ! 169: xmlCleanupCharEncodingHandlers (void); ! 170: XMLPUBFUN void XMLCALL ! 171: xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); ! 172: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL ! 173: xmlGetCharEncodingHandler (xmlCharEncoding enc); ! 174: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL ! 175: xmlFindCharEncodingHandler (const char *name); ! 176: XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL ! 177: xmlNewCharEncodingHandler (const char *name, ! 178: xmlCharEncodingInputFunc input, ! 179: xmlCharEncodingOutputFunc output); ! 180: ! 181: /* ! 182: * Interfaces for encoding names and aliases. ! 183: */ ! 184: XMLPUBFUN int XMLCALL ! 185: xmlAddEncodingAlias (const char *name, ! 186: const char *alias); ! 187: XMLPUBFUN int XMLCALL ! 188: xmlDelEncodingAlias (const char *alias); ! 189: XMLPUBFUN const char * XMLCALL ! 190: xmlGetEncodingAlias (const char *alias); ! 191: XMLPUBFUN void XMLCALL ! 192: xmlCleanupEncodingAliases (void); ! 193: XMLPUBFUN xmlCharEncoding XMLCALL ! 194: xmlParseCharEncoding (const char *name); ! 195: XMLPUBFUN const char * XMLCALL ! 196: xmlGetCharEncodingName (xmlCharEncoding enc); ! 197: ! 198: /* ! 199: * Interfaces directly used by the parsers. ! 200: */ ! 201: XMLPUBFUN xmlCharEncoding XMLCALL ! 202: xmlDetectCharEncoding (const unsigned char *in, ! 203: int len); ! 204: ! 205: XMLPUBFUN int XMLCALL ! 206: xmlCharEncOutFunc (xmlCharEncodingHandler *handler, ! 207: xmlBufferPtr out, ! 208: xmlBufferPtr in); ! 209: ! 210: XMLPUBFUN int XMLCALL ! 211: xmlCharEncInFunc (xmlCharEncodingHandler *handler, ! 212: xmlBufferPtr out, ! 213: xmlBufferPtr in); ! 214: XMLPUBFUN int XMLCALL ! 215: xmlCharEncFirstLine (xmlCharEncodingHandler *handler, ! 216: xmlBufferPtr out, ! 217: xmlBufferPtr in); ! 218: XMLPUBFUN int XMLCALL ! 219: xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); ! 220: ! 221: /* ! 222: * Export a few useful functions ! 223: */ ! 224: #ifdef LIBXML_OUTPUT_ENABLED ! 225: XMLPUBFUN int XMLCALL ! 226: UTF8Toisolat1 (unsigned char *out, ! 227: int *outlen, ! 228: const unsigned char *in, ! 229: int *inlen); ! 230: #endif /* LIBXML_OUTPUT_ENABLED */ ! 231: XMLPUBFUN int XMLCALL ! 232: isolat1ToUTF8 (unsigned char *out, ! 233: int *outlen, ! 234: const unsigned char *in, ! 235: int *inlen); ! 236: #ifdef __cplusplus ! 237: } ! 238: #endif ! 239: ! 240: #endif /* __XML_CHAR_ENCODING_H__ */