File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / encoding.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:29 2014 UTC (10 years ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD
libxml2 2.9.1

    1: /*
    2:  * encoding.c : implements the encoding conversion functions needed for XML
    3:  *
    4:  * Related specs:
    5:  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
    6:  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
    7:  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
    8:  * [ISO-8859-1]   ISO Latin-1 characters codes.
    9:  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
   10:  *                Worldwide Character Encoding -- Version 1.0", Addison-
   11:  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
   12:  *                described in Unicode Technical Report #4.
   13:  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
   14:  *                Information Interchange, ANSI X3.4-1986.
   15:  *
   16:  * See Copyright for the status of this software.
   17:  *
   18:  * daniel@veillard.com
   19:  *
   20:  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
   21:  */
   22: 
   23: #define IN_LIBXML
   24: #include "libxml.h"
   25: 
   26: #include <string.h>
   27: #include <limits.h>
   28: 
   29: #ifdef HAVE_CTYPE_H
   30: #include <ctype.h>
   31: #endif
   32: #ifdef HAVE_STDLIB_H
   33: #include <stdlib.h>
   34: #endif
   35: #ifdef LIBXML_ICONV_ENABLED
   36: #ifdef HAVE_ERRNO_H
   37: #include <errno.h>
   38: #endif
   39: #endif
   40: #include <libxml/encoding.h>
   41: #include <libxml/xmlmemory.h>
   42: #ifdef LIBXML_HTML_ENABLED
   43: #include <libxml/HTMLparser.h>
   44: #endif
   45: #include <libxml/globals.h>
   46: #include <libxml/xmlerror.h>
   47: 
   48: #include "buf.h"
   49: #include "enc.h"
   50: 
   51: static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
   52: static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
   53: 
   54: typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
   55: typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
   56: struct _xmlCharEncodingAlias {
   57:     const char *name;
   58:     const char *alias;
   59: };
   60: 
   61: static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
   62: static int xmlCharEncodingAliasesNb = 0;
   63: static int xmlCharEncodingAliasesMax = 0;
   64: 
   65: #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
   66: #if 0
   67: #define DEBUG_ENCODING  /* Define this to get encoding traces */
   68: #endif
   69: #else
   70: #ifdef LIBXML_ISO8859X_ENABLED
   71: static void xmlRegisterCharEncodingHandlersISO8859x (void);
   72: #endif
   73: #endif
   74: 
   75: static int xmlLittleEndian = 1;
   76: 
   77: /**
   78:  * xmlEncodingErrMemory:
   79:  * @extra:  extra informations
   80:  *
   81:  * Handle an out of memory condition
   82:  */
   83: static void
   84: xmlEncodingErrMemory(const char *extra)
   85: {
   86:     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
   87: }
   88: 
   89: /**
   90:  * xmlErrEncoding:
   91:  * @error:  the error number
   92:  * @msg:  the error message
   93:  *
   94:  * n encoding error
   95:  */
   96: static void
   97: xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
   98: {
   99:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
  100:                     XML_FROM_I18N, error, XML_ERR_FATAL,
  101:                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
  102: }
  103: 
  104: #ifdef LIBXML_ICU_ENABLED
  105: static uconv_t*
  106: openIcuConverter(const char* name, int toUnicode)
  107: {
  108:   UErrorCode status = U_ZERO_ERROR;
  109:   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
  110:   if (conv == NULL)
  111:     return NULL;
  112: 
  113:   conv->uconv = ucnv_open(name, &status);
  114:   if (U_FAILURE(status))
  115:     goto error;
  116: 
  117:   status = U_ZERO_ERROR;
  118:   if (toUnicode) {
  119:     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
  120:                         NULL, NULL, NULL, &status);
  121:   }
  122:   else {
  123:     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
  124:                         NULL, NULL, NULL, &status);
  125:   }
  126:   if (U_FAILURE(status))
  127:     goto error;
  128: 
  129:   status = U_ZERO_ERROR;
  130:   conv->utf8 = ucnv_open("UTF-8", &status);
  131:   if (U_SUCCESS(status))
  132:     return conv;
  133: 
  134: error:
  135:   if (conv->uconv)
  136:     ucnv_close(conv->uconv);
  137:   xmlFree(conv);
  138:   return NULL;
  139: }
  140: 
  141: static void
  142: closeIcuConverter(uconv_t *conv)
  143: {
  144:   if (conv != NULL) {
  145:     ucnv_close(conv->uconv);
  146:     ucnv_close(conv->utf8);
  147:     xmlFree(conv);
  148:   }
  149: }
  150: #endif /* LIBXML_ICU_ENABLED */
  151: 
  152: /************************************************************************
  153:  *									*
  154:  *		Conversions To/From UTF8 encoding			*
  155:  *									*
  156:  ************************************************************************/
  157: 
  158: /**
  159:  * asciiToUTF8:
  160:  * @out:  a pointer to an array of bytes to store the result
  161:  * @outlen:  the length of @out
  162:  * @in:  a pointer to an array of ASCII chars
  163:  * @inlen:  the length of @in
  164:  *
  165:  * Take a block of ASCII chars in and try to convert it to an UTF-8
  166:  * block of chars out.
  167:  * Returns 0 if success, or -1 otherwise
  168:  * The value of @inlen after return is the number of octets consumed
  169:  *     if the return value is positive, else unpredictable.
  170:  * The value of @outlen after return is the number of octets consumed.
  171:  */
  172: static int
  173: asciiToUTF8(unsigned char* out, int *outlen,
  174:               const unsigned char* in, int *inlen) {
  175:     unsigned char* outstart = out;
  176:     const unsigned char* base = in;
  177:     const unsigned char* processed = in;
  178:     unsigned char* outend = out + *outlen;
  179:     const unsigned char* inend;
  180:     unsigned int c;
  181: 
  182:     inend = in + (*inlen);
  183:     while ((in < inend) && (out - outstart + 5 < *outlen)) {
  184: 	c= *in++;
  185: 
  186:         if (out >= outend)
  187: 	    break;
  188:         if (c < 0x80) {
  189: 	    *out++ = c;
  190: 	} else {
  191: 	    *outlen = out - outstart;
  192: 	    *inlen = processed - base;
  193: 	    return(-1);
  194: 	}
  195: 
  196: 	processed = (const unsigned char*) in;
  197:     }
  198:     *outlen = out - outstart;
  199:     *inlen = processed - base;
  200:     return(*outlen);
  201: }
  202: 
  203: #ifdef LIBXML_OUTPUT_ENABLED
  204: /**
  205:  * UTF8Toascii:
  206:  * @out:  a pointer to an array of bytes to store the result
  207:  * @outlen:  the length of @out
  208:  * @in:  a pointer to an array of UTF-8 chars
  209:  * @inlen:  the length of @in
  210:  *
  211:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
  212:  * block of chars out.
  213:  *
  214:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  215:  * The value of @inlen after return is the number of octets consumed
  216:  *     if the return value is positive, else unpredictable.
  217:  * The value of @outlen after return is the number of octets consumed.
  218:  */
  219: static int
  220: UTF8Toascii(unsigned char* out, int *outlen,
  221:               const unsigned char* in, int *inlen) {
  222:     const unsigned char* processed = in;
  223:     const unsigned char* outend;
  224:     const unsigned char* outstart = out;
  225:     const unsigned char* instart = in;
  226:     const unsigned char* inend;
  227:     unsigned int c, d;
  228:     int trailing;
  229: 
  230:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  231:     if (in == NULL) {
  232:         /*
  233: 	 * initialization nothing to do
  234: 	 */
  235: 	*outlen = 0;
  236: 	*inlen = 0;
  237: 	return(0);
  238:     }
  239:     inend = in + (*inlen);
  240:     outend = out + (*outlen);
  241:     while (in < inend) {
  242: 	d = *in++;
  243: 	if      (d < 0x80)  { c= d; trailing= 0; }
  244: 	else if (d < 0xC0) {
  245: 	    /* trailing byte in leading position */
  246: 	    *outlen = out - outstart;
  247: 	    *inlen = processed - instart;
  248: 	    return(-2);
  249:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
  250:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
  251:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
  252: 	else {
  253: 	    /* no chance for this in Ascii */
  254: 	    *outlen = out - outstart;
  255: 	    *inlen = processed - instart;
  256: 	    return(-2);
  257: 	}
  258: 
  259: 	if (inend - in < trailing) {
  260: 	    break;
  261: 	}
  262: 
  263: 	for ( ; trailing; trailing--) {
  264: 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
  265: 		break;
  266: 	    c <<= 6;
  267: 	    c |= d & 0x3F;
  268: 	}
  269: 
  270: 	/* assertion: c is a single UTF-4 value */
  271: 	if (c < 0x80) {
  272: 	    if (out >= outend)
  273: 		break;
  274: 	    *out++ = c;
  275: 	} else {
  276: 	    /* no chance for this in Ascii */
  277: 	    *outlen = out - outstart;
  278: 	    *inlen = processed - instart;
  279: 	    return(-2);
  280: 	}
  281: 	processed = in;
  282:     }
  283:     *outlen = out - outstart;
  284:     *inlen = processed - instart;
  285:     return(*outlen);
  286: }
  287: #endif /* LIBXML_OUTPUT_ENABLED */
  288: 
  289: /**
  290:  * isolat1ToUTF8:
  291:  * @out:  a pointer to an array of bytes to store the result
  292:  * @outlen:  the length of @out
  293:  * @in:  a pointer to an array of ISO Latin 1 chars
  294:  * @inlen:  the length of @in
  295:  *
  296:  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
  297:  * block of chars out.
  298:  * Returns the number of bytes written if success, or -1 otherwise
  299:  * The value of @inlen after return is the number of octets consumed
  300:  *     if the return value is positive, else unpredictable.
  301:  * The value of @outlen after return is the number of octets consumed.
  302:  */
  303: int
  304: isolat1ToUTF8(unsigned char* out, int *outlen,
  305:               const unsigned char* in, int *inlen) {
  306:     unsigned char* outstart = out;
  307:     const unsigned char* base = in;
  308:     unsigned char* outend;
  309:     const unsigned char* inend;
  310:     const unsigned char* instop;
  311: 
  312:     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
  313: 	return(-1);
  314: 
  315:     outend = out + *outlen;
  316:     inend = in + (*inlen);
  317:     instop = inend;
  318: 
  319:     while ((in < inend) && (out < outend - 1)) {
  320: 	if (*in >= 0x80) {
  321: 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
  322:             *out++ = ((*in) & 0x3F) | 0x80;
  323: 	    ++in;
  324: 	}
  325: 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
  326: 	while ((in < instop) && (*in < 0x80)) {
  327: 	    *out++ = *in++;
  328: 	}
  329:     }
  330:     if ((in < inend) && (out < outend) && (*in < 0x80)) {
  331:         *out++ = *in++;
  332:     }
  333:     *outlen = out - outstart;
  334:     *inlen = in - base;
  335:     return(*outlen);
  336: }
  337: 
  338: /**
  339:  * UTF8ToUTF8:
  340:  * @out:  a pointer to an array of bytes to store the result
  341:  * @outlen:  the length of @out
  342:  * @inb:  a pointer to an array of UTF-8 chars
  343:  * @inlenb:  the length of @in in UTF-8 chars
  344:  *
  345:  * No op copy operation for UTF8 handling.
  346:  *
  347:  * Returns the number of bytes written, or -1 if lack of space.
  348:  *     The value of *inlen after return is the number of octets consumed
  349:  *     if the return value is positive, else unpredictable.
  350:  */
  351: static int
  352: UTF8ToUTF8(unsigned char* out, int *outlen,
  353:            const unsigned char* inb, int *inlenb)
  354: {
  355:     int len;
  356: 
  357:     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
  358: 	return(-1);
  359:     if (*outlen > *inlenb) {
  360: 	len = *inlenb;
  361:     } else {
  362: 	len = *outlen;
  363:     }
  364:     if (len < 0)
  365: 	return(-1);
  366: 
  367:     memcpy(out, inb, len);
  368: 
  369:     *outlen = len;
  370:     *inlenb = len;
  371:     return(*outlen);
  372: }
  373: 
  374: 
  375: #ifdef LIBXML_OUTPUT_ENABLED
  376: /**
  377:  * UTF8Toisolat1:
  378:  * @out:  a pointer to an array of bytes to store the result
  379:  * @outlen:  the length of @out
  380:  * @in:  a pointer to an array of UTF-8 chars
  381:  * @inlen:  the length of @in
  382:  *
  383:  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
  384:  * block of chars out.
  385:  *
  386:  * Returns the number of bytes written if success, -2 if the transcoding fails,
  387:            or -1 otherwise
  388:  * The value of @inlen after return is the number of octets consumed
  389:  *     if the return value is positive, else unpredictable.
  390:  * The value of @outlen after return is the number of octets consumed.
  391:  */
  392: int
  393: UTF8Toisolat1(unsigned char* out, int *outlen,
  394:               const unsigned char* in, int *inlen) {
  395:     const unsigned char* processed = in;
  396:     const unsigned char* outend;
  397:     const unsigned char* outstart = out;
  398:     const unsigned char* instart = in;
  399:     const unsigned char* inend;
  400:     unsigned int c, d;
  401:     int trailing;
  402: 
  403:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  404:     if (in == NULL) {
  405:         /*
  406: 	 * initialization nothing to do
  407: 	 */
  408: 	*outlen = 0;
  409: 	*inlen = 0;
  410: 	return(0);
  411:     }
  412:     inend = in + (*inlen);
  413:     outend = out + (*outlen);
  414:     while (in < inend) {
  415: 	d = *in++;
  416: 	if      (d < 0x80)  { c= d; trailing= 0; }
  417: 	else if (d < 0xC0) {
  418: 	    /* trailing byte in leading position */
  419: 	    *outlen = out - outstart;
  420: 	    *inlen = processed - instart;
  421: 	    return(-2);
  422:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
  423:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
  424:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
  425: 	else {
  426: 	    /* no chance for this in IsoLat1 */
  427: 	    *outlen = out - outstart;
  428: 	    *inlen = processed - instart;
  429: 	    return(-2);
  430: 	}
  431: 
  432: 	if (inend - in < trailing) {
  433: 	    break;
  434: 	}
  435: 
  436: 	for ( ; trailing; trailing--) {
  437: 	    if (in >= inend)
  438: 		break;
  439: 	    if (((d= *in++) & 0xC0) != 0x80) {
  440: 		*outlen = out - outstart;
  441: 		*inlen = processed - instart;
  442: 		return(-2);
  443: 	    }
  444: 	    c <<= 6;
  445: 	    c |= d & 0x3F;
  446: 	}
  447: 
  448: 	/* assertion: c is a single UTF-4 value */
  449: 	if (c <= 0xFF) {
  450: 	    if (out >= outend)
  451: 		break;
  452: 	    *out++ = c;
  453: 	} else {
  454: 	    /* no chance for this in IsoLat1 */
  455: 	    *outlen = out - outstart;
  456: 	    *inlen = processed - instart;
  457: 	    return(-2);
  458: 	}
  459: 	processed = in;
  460:     }
  461:     *outlen = out - outstart;
  462:     *inlen = processed - instart;
  463:     return(*outlen);
  464: }
  465: #endif /* LIBXML_OUTPUT_ENABLED */
  466: 
  467: /**
  468:  * UTF16LEToUTF8:
  469:  * @out:  a pointer to an array of bytes to store the result
  470:  * @outlen:  the length of @out
  471:  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
  472:  * @inlenb:  the length of @in in UTF-16LE chars
  473:  *
  474:  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
  475:  * block of chars out. This function assumes the endian property
  476:  * is the same between the native type of this machine and the
  477:  * inputed one.
  478:  *
  479:  * Returns the number of bytes written, or -1 if lack of space, or -2
  480:  *     if the transcoding fails (if *in is not a valid utf16 string)
  481:  *     The value of *inlen after return is the number of octets consumed
  482:  *     if the return value is positive, else unpredictable.
  483:  */
  484: static int
  485: UTF16LEToUTF8(unsigned char* out, int *outlen,
  486:             const unsigned char* inb, int *inlenb)
  487: {
  488:     unsigned char* outstart = out;
  489:     const unsigned char* processed = inb;
  490:     unsigned char* outend = out + *outlen;
  491:     unsigned short* in = (unsigned short*) inb;
  492:     unsigned short* inend;
  493:     unsigned int c, d, inlen;
  494:     unsigned char *tmp;
  495:     int bits;
  496: 
  497:     if ((*inlenb % 2) == 1)
  498:         (*inlenb)--;
  499:     inlen = *inlenb / 2;
  500:     inend = in + inlen;
  501:     while ((in < inend) && (out - outstart + 5 < *outlen)) {
  502:         if (xmlLittleEndian) {
  503: 	    c= *in++;
  504: 	} else {
  505: 	    tmp = (unsigned char *) in;
  506: 	    c = *tmp++;
  507: 	    c = c | (((unsigned int)*tmp) << 8);
  508: 	    in++;
  509: 	}
  510:         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
  511: 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
  512: 		break;
  513: 	    }
  514: 	    if (xmlLittleEndian) {
  515: 		d = *in++;
  516: 	    } else {
  517: 		tmp = (unsigned char *) in;
  518: 		d = *tmp++;
  519: 		d = d | (((unsigned int)*tmp) << 8);
  520: 		in++;
  521: 	    }
  522:             if ((d & 0xFC00) == 0xDC00) {
  523:                 c &= 0x03FF;
  524:                 c <<= 10;
  525:                 c |= d & 0x03FF;
  526:                 c += 0x10000;
  527:             }
  528:             else {
  529: 		*outlen = out - outstart;
  530: 		*inlenb = processed - inb;
  531: 	        return(-2);
  532: 	    }
  533:         }
  534: 
  535: 	/* assertion: c is a single UTF-4 value */
  536:         if (out >= outend)
  537: 	    break;
  538:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
  539:         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
  540:         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
  541:         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
  542: 
  543:         for ( ; bits >= 0; bits-= 6) {
  544:             if (out >= outend)
  545: 	        break;
  546:             *out++= ((c >> bits) & 0x3F) | 0x80;
  547:         }
  548: 	processed = (const unsigned char*) in;
  549:     }
  550:     *outlen = out - outstart;
  551:     *inlenb = processed - inb;
  552:     return(*outlen);
  553: }
  554: 
  555: #ifdef LIBXML_OUTPUT_ENABLED
  556: /**
  557:  * UTF8ToUTF16LE:
  558:  * @outb:  a pointer to an array of bytes to store the result
  559:  * @outlen:  the length of @outb
  560:  * @in:  a pointer to an array of UTF-8 chars
  561:  * @inlen:  the length of @in
  562:  *
  563:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
  564:  * block of chars out.
  565:  *
  566:  * Returns the number of bytes written, or -1 if lack of space, or -2
  567:  *     if the transcoding failed.
  568:  */
  569: static int
  570: UTF8ToUTF16LE(unsigned char* outb, int *outlen,
  571:             const unsigned char* in, int *inlen)
  572: {
  573:     unsigned short* out = (unsigned short*) outb;
  574:     const unsigned char* processed = in;
  575:     const unsigned char *const instart = in;
  576:     unsigned short* outstart= out;
  577:     unsigned short* outend;
  578:     const unsigned char* inend;
  579:     unsigned int c, d;
  580:     int trailing;
  581:     unsigned char *tmp;
  582:     unsigned short tmp1, tmp2;
  583: 
  584:     /* UTF16LE encoding has no BOM */
  585:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  586:     if (in == NULL) {
  587: 	*outlen = 0;
  588: 	*inlen = 0;
  589: 	return(0);
  590:     }
  591:     inend= in + *inlen;
  592:     outend = out + (*outlen / 2);
  593:     while (in < inend) {
  594:       d= *in++;
  595:       if      (d < 0x80)  { c= d; trailing= 0; }
  596:       else if (d < 0xC0) {
  597:           /* trailing byte in leading position */
  598: 	  *outlen = (out - outstart) * 2;
  599: 	  *inlen = processed - instart;
  600: 	  return(-2);
  601:       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
  602:       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
  603:       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
  604:       else {
  605: 	/* no chance for this in UTF-16 */
  606: 	*outlen = (out - outstart) * 2;
  607: 	*inlen = processed - instart;
  608: 	return(-2);
  609:       }
  610: 
  611:       if (inend - in < trailing) {
  612:           break;
  613:       }
  614: 
  615:       for ( ; trailing; trailing--) {
  616:           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
  617: 	      break;
  618:           c <<= 6;
  619:           c |= d & 0x3F;
  620:       }
  621: 
  622:       /* assertion: c is a single UTF-4 value */
  623:         if (c < 0x10000) {
  624:             if (out >= outend)
  625: 	        break;
  626: 	    if (xmlLittleEndian) {
  627: 		*out++ = c;
  628: 	    } else {
  629: 		tmp = (unsigned char *) out;
  630: 		*tmp = c ;
  631: 		*(tmp + 1) = c >> 8 ;
  632: 		out++;
  633: 	    }
  634:         }
  635:         else if (c < 0x110000) {
  636:             if (out+1 >= outend)
  637: 	        break;
  638:             c -= 0x10000;
  639: 	    if (xmlLittleEndian) {
  640: 		*out++ = 0xD800 | (c >> 10);
  641: 		*out++ = 0xDC00 | (c & 0x03FF);
  642: 	    } else {
  643: 		tmp1 = 0xD800 | (c >> 10);
  644: 		tmp = (unsigned char *) out;
  645: 		*tmp = (unsigned char) tmp1;
  646: 		*(tmp + 1) = tmp1 >> 8;
  647: 		out++;
  648: 
  649: 		tmp2 = 0xDC00 | (c & 0x03FF);
  650: 		tmp = (unsigned char *) out;
  651: 		*tmp  = (unsigned char) tmp2;
  652: 		*(tmp + 1) = tmp2 >> 8;
  653: 		out++;
  654: 	    }
  655:         }
  656:         else
  657: 	    break;
  658: 	processed = in;
  659:     }
  660:     *outlen = (out - outstart) * 2;
  661:     *inlen = processed - instart;
  662:     return(*outlen);
  663: }
  664: 
  665: /**
  666:  * UTF8ToUTF16:
  667:  * @outb:  a pointer to an array of bytes to store the result
  668:  * @outlen:  the length of @outb
  669:  * @in:  a pointer to an array of UTF-8 chars
  670:  * @inlen:  the length of @in
  671:  *
  672:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
  673:  * block of chars out.
  674:  *
  675:  * Returns the number of bytes written, or -1 if lack of space, or -2
  676:  *     if the transcoding failed.
  677:  */
  678: static int
  679: UTF8ToUTF16(unsigned char* outb, int *outlen,
  680:             const unsigned char* in, int *inlen)
  681: {
  682:     if (in == NULL) {
  683: 	/*
  684: 	 * initialization, add the Byte Order Mark for UTF-16LE
  685: 	 */
  686:         if (*outlen >= 2) {
  687: 	    outb[0] = 0xFF;
  688: 	    outb[1] = 0xFE;
  689: 	    *outlen = 2;
  690: 	    *inlen = 0;
  691: #ifdef DEBUG_ENCODING
  692:             xmlGenericError(xmlGenericErrorContext,
  693: 		    "Added FFFE Byte Order Mark\n");
  694: #endif
  695: 	    return(2);
  696: 	}
  697: 	*outlen = 0;
  698: 	*inlen = 0;
  699: 	return(0);
  700:     }
  701:     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
  702: }
  703: #endif /* LIBXML_OUTPUT_ENABLED */
  704: 
  705: /**
  706:  * UTF16BEToUTF8:
  707:  * @out:  a pointer to an array of bytes to store the result
  708:  * @outlen:  the length of @out
  709:  * @inb:  a pointer to an array of UTF-16 passed as a byte array
  710:  * @inlenb:  the length of @in in UTF-16 chars
  711:  *
  712:  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
  713:  * block of chars out. This function assumes the endian property
  714:  * is the same between the native type of this machine and the
  715:  * inputed one.
  716:  *
  717:  * Returns the number of bytes written, or -1 if lack of space, or -2
  718:  *     if the transcoding fails (if *in is not a valid utf16 string)
  719:  * The value of *inlen after return is the number of octets consumed
  720:  *     if the return value is positive, else unpredictable.
  721:  */
  722: static int
  723: UTF16BEToUTF8(unsigned char* out, int *outlen,
  724:             const unsigned char* inb, int *inlenb)
  725: {
  726:     unsigned char* outstart = out;
  727:     const unsigned char* processed = inb;
  728:     unsigned char* outend = out + *outlen;
  729:     unsigned short* in = (unsigned short*) inb;
  730:     unsigned short* inend;
  731:     unsigned int c, d, inlen;
  732:     unsigned char *tmp;
  733:     int bits;
  734: 
  735:     if ((*inlenb % 2) == 1)
  736:         (*inlenb)--;
  737:     inlen = *inlenb / 2;
  738:     inend= in + inlen;
  739:     while (in < inend) {
  740: 	if (xmlLittleEndian) {
  741: 	    tmp = (unsigned char *) in;
  742: 	    c = *tmp++;
  743: 	    c = c << 8;
  744: 	    c = c | (unsigned int) *tmp;
  745: 	    in++;
  746: 	} else {
  747: 	    c= *in++;
  748: 	}
  749:         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
  750: 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
  751: 		*outlen = out - outstart;
  752: 		*inlenb = processed - inb;
  753: 	        return(-2);
  754: 	    }
  755: 	    if (xmlLittleEndian) {
  756: 		tmp = (unsigned char *) in;
  757: 		d = *tmp++;
  758: 		d = d << 8;
  759: 		d = d | (unsigned int) *tmp;
  760: 		in++;
  761: 	    } else {
  762: 		d= *in++;
  763: 	    }
  764:             if ((d & 0xFC00) == 0xDC00) {
  765:                 c &= 0x03FF;
  766:                 c <<= 10;
  767:                 c |= d & 0x03FF;
  768:                 c += 0x10000;
  769:             }
  770:             else {
  771: 		*outlen = out - outstart;
  772: 		*inlenb = processed - inb;
  773: 	        return(-2);
  774: 	    }
  775:         }
  776: 
  777: 	/* assertion: c is a single UTF-4 value */
  778:         if (out >= outend)
  779: 	    break;
  780:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
  781:         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
  782:         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
  783:         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
  784: 
  785:         for ( ; bits >= 0; bits-= 6) {
  786:             if (out >= outend)
  787: 	        break;
  788:             *out++= ((c >> bits) & 0x3F) | 0x80;
  789:         }
  790: 	processed = (const unsigned char*) in;
  791:     }
  792:     *outlen = out - outstart;
  793:     *inlenb = processed - inb;
  794:     return(*outlen);
  795: }
  796: 
  797: #ifdef LIBXML_OUTPUT_ENABLED
  798: /**
  799:  * UTF8ToUTF16BE:
  800:  * @outb:  a pointer to an array of bytes to store the result
  801:  * @outlen:  the length of @outb
  802:  * @in:  a pointer to an array of UTF-8 chars
  803:  * @inlen:  the length of @in
  804:  *
  805:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
  806:  * block of chars out.
  807:  *
  808:  * Returns the number of byte written, or -1 by lack of space, or -2
  809:  *     if the transcoding failed.
  810:  */
  811: static int
  812: UTF8ToUTF16BE(unsigned char* outb, int *outlen,
  813:             const unsigned char* in, int *inlen)
  814: {
  815:     unsigned short* out = (unsigned short*) outb;
  816:     const unsigned char* processed = in;
  817:     const unsigned char *const instart = in;
  818:     unsigned short* outstart= out;
  819:     unsigned short* outend;
  820:     const unsigned char* inend;
  821:     unsigned int c, d;
  822:     int trailing;
  823:     unsigned char *tmp;
  824:     unsigned short tmp1, tmp2;
  825: 
  826:     /* UTF-16BE has no BOM */
  827:     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
  828:     if (in == NULL) {
  829: 	*outlen = 0;
  830: 	*inlen = 0;
  831: 	return(0);
  832:     }
  833:     inend= in + *inlen;
  834:     outend = out + (*outlen / 2);
  835:     while (in < inend) {
  836:       d= *in++;
  837:       if      (d < 0x80)  { c= d; trailing= 0; }
  838:       else if (d < 0xC0)  {
  839:           /* trailing byte in leading position */
  840: 	  *outlen = out - outstart;
  841: 	  *inlen = processed - instart;
  842: 	  return(-2);
  843:       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
  844:       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
  845:       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
  846:       else {
  847:           /* no chance for this in UTF-16 */
  848: 	  *outlen = out - outstart;
  849: 	  *inlen = processed - instart;
  850: 	  return(-2);
  851:       }
  852: 
  853:       if (inend - in < trailing) {
  854:           break;
  855:       }
  856: 
  857:       for ( ; trailing; trailing--) {
  858:           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
  859:           c <<= 6;
  860:           c |= d & 0x3F;
  861:       }
  862: 
  863:       /* assertion: c is a single UTF-4 value */
  864:         if (c < 0x10000) {
  865:             if (out >= outend)  break;
  866: 	    if (xmlLittleEndian) {
  867: 		tmp = (unsigned char *) out;
  868: 		*tmp = c >> 8;
  869: 		*(tmp + 1) = c;
  870: 		out++;
  871: 	    } else {
  872: 		*out++ = c;
  873: 	    }
  874:         }
  875:         else if (c < 0x110000) {
  876:             if (out+1 >= outend)  break;
  877:             c -= 0x10000;
  878: 	    if (xmlLittleEndian) {
  879: 		tmp1 = 0xD800 | (c >> 10);
  880: 		tmp = (unsigned char *) out;
  881: 		*tmp = tmp1 >> 8;
  882: 		*(tmp + 1) = (unsigned char) tmp1;
  883: 		out++;
  884: 
  885: 		tmp2 = 0xDC00 | (c & 0x03FF);
  886: 		tmp = (unsigned char *) out;
  887: 		*tmp = tmp2 >> 8;
  888: 		*(tmp + 1) = (unsigned char) tmp2;
  889: 		out++;
  890: 	    } else {
  891: 		*out++ = 0xD800 | (c >> 10);
  892: 		*out++ = 0xDC00 | (c & 0x03FF);
  893: 	    }
  894:         }
  895:         else
  896: 	    break;
  897: 	processed = in;
  898:     }
  899:     *outlen = (out - outstart) * 2;
  900:     *inlen = processed - instart;
  901:     return(*outlen);
  902: }
  903: #endif /* LIBXML_OUTPUT_ENABLED */
  904: 
  905: /************************************************************************
  906:  *									*
  907:  *		Generic encoding handling routines			*
  908:  *									*
  909:  ************************************************************************/
  910: 
  911: /**
  912:  * xmlDetectCharEncoding:
  913:  * @in:  a pointer to the first bytes of the XML entity, must be at least
  914:  *       2 bytes long (at least 4 if encoding is UTF4 variant).
  915:  * @len:  pointer to the length of the buffer
  916:  *
  917:  * Guess the encoding of the entity using the first bytes of the entity content
  918:  * according to the non-normative appendix F of the XML-1.0 recommendation.
  919:  *
  920:  * Returns one of the XML_CHAR_ENCODING_... values.
  921:  */
  922: xmlCharEncoding
  923: xmlDetectCharEncoding(const unsigned char* in, int len)
  924: {
  925:     if (in == NULL)
  926:         return(XML_CHAR_ENCODING_NONE);
  927:     if (len >= 4) {
  928: 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
  929: 	    (in[2] == 0x00) && (in[3] == 0x3C))
  930: 	    return(XML_CHAR_ENCODING_UCS4BE);
  931: 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
  932: 	    (in[2] == 0x00) && (in[3] == 0x00))
  933: 	    return(XML_CHAR_ENCODING_UCS4LE);
  934: 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
  935: 	    (in[2] == 0x3C) && (in[3] == 0x00))
  936: 	    return(XML_CHAR_ENCODING_UCS4_2143);
  937: 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
  938: 	    (in[2] == 0x00) && (in[3] == 0x00))
  939: 	    return(XML_CHAR_ENCODING_UCS4_3412);
  940: 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
  941: 	    (in[2] == 0xA7) && (in[3] == 0x94))
  942: 	    return(XML_CHAR_ENCODING_EBCDIC);
  943: 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
  944: 	    (in[2] == 0x78) && (in[3] == 0x6D))
  945: 	    return(XML_CHAR_ENCODING_UTF8);
  946: 	/*
  947: 	 * Although not part of the recommendation, we also
  948: 	 * attempt an "auto-recognition" of UTF-16LE and
  949: 	 * UTF-16BE encodings.
  950: 	 */
  951: 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
  952: 	    (in[2] == 0x3F) && (in[3] == 0x00))
  953: 	    return(XML_CHAR_ENCODING_UTF16LE);
  954: 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
  955: 	    (in[2] == 0x00) && (in[3] == 0x3F))
  956: 	    return(XML_CHAR_ENCODING_UTF16BE);
  957:     }
  958:     if (len >= 3) {
  959: 	/*
  960: 	 * Errata on XML-1.0 June 20 2001
  961: 	 * We now allow an UTF8 encoded BOM
  962: 	 */
  963: 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
  964: 	    (in[2] == 0xBF))
  965: 	    return(XML_CHAR_ENCODING_UTF8);
  966:     }
  967:     /* For UTF-16 we can recognize by the BOM */
  968:     if (len >= 2) {
  969: 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
  970: 	    return(XML_CHAR_ENCODING_UTF16BE);
  971: 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
  972: 	    return(XML_CHAR_ENCODING_UTF16LE);
  973:     }
  974:     return(XML_CHAR_ENCODING_NONE);
  975: }
  976: 
  977: /**
  978:  * xmlCleanupEncodingAliases:
  979:  *
  980:  * Unregisters all aliases
  981:  */
  982: void
  983: xmlCleanupEncodingAliases(void) {
  984:     int i;
  985: 
  986:     if (xmlCharEncodingAliases == NULL)
  987: 	return;
  988: 
  989:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
  990: 	if (xmlCharEncodingAliases[i].name != NULL)
  991: 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
  992: 	if (xmlCharEncodingAliases[i].alias != NULL)
  993: 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
  994:     }
  995:     xmlCharEncodingAliasesNb = 0;
  996:     xmlCharEncodingAliasesMax = 0;
  997:     xmlFree(xmlCharEncodingAliases);
  998:     xmlCharEncodingAliases = NULL;
  999: }
 1000: 
 1001: /**
 1002:  * xmlGetEncodingAlias:
 1003:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
 1004:  *
 1005:  * Lookup an encoding name for the given alias.
 1006:  *
 1007:  * Returns NULL if not found, otherwise the original name
 1008:  */
 1009: const char *
 1010: xmlGetEncodingAlias(const char *alias) {
 1011:     int i;
 1012:     char upper[100];
 1013: 
 1014:     if (alias == NULL)
 1015: 	return(NULL);
 1016: 
 1017:     if (xmlCharEncodingAliases == NULL)
 1018: 	return(NULL);
 1019: 
 1020:     for (i = 0;i < 99;i++) {
 1021:         upper[i] = toupper(alias[i]);
 1022: 	if (upper[i] == 0) break;
 1023:     }
 1024:     upper[i] = 0;
 1025: 
 1026:     /*
 1027:      * Walk down the list looking for a definition of the alias
 1028:      */
 1029:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 1030: 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
 1031: 	    return(xmlCharEncodingAliases[i].name);
 1032: 	}
 1033:     }
 1034:     return(NULL);
 1035: }
 1036: 
 1037: /**
 1038:  * xmlAddEncodingAlias:
 1039:  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
 1040:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
 1041:  *
 1042:  * Registers an alias @alias for an encoding named @name. Existing alias
 1043:  * will be overwritten.
 1044:  *
 1045:  * Returns 0 in case of success, -1 in case of error
 1046:  */
 1047: int
 1048: xmlAddEncodingAlias(const char *name, const char *alias) {
 1049:     int i;
 1050:     char upper[100];
 1051: 
 1052:     if ((name == NULL) || (alias == NULL))
 1053: 	return(-1);
 1054: 
 1055:     for (i = 0;i < 99;i++) {
 1056:         upper[i] = toupper(alias[i]);
 1057: 	if (upper[i] == 0) break;
 1058:     }
 1059:     upper[i] = 0;
 1060: 
 1061:     if (xmlCharEncodingAliases == NULL) {
 1062: 	xmlCharEncodingAliasesNb = 0;
 1063: 	xmlCharEncodingAliasesMax = 20;
 1064: 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
 1065: 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
 1066: 	if (xmlCharEncodingAliases == NULL)
 1067: 	    return(-1);
 1068:     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
 1069: 	xmlCharEncodingAliasesMax *= 2;
 1070: 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
 1071: 	      xmlRealloc(xmlCharEncodingAliases,
 1072: 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
 1073:     }
 1074:     /*
 1075:      * Walk down the list looking for a definition of the alias
 1076:      */
 1077:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 1078: 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
 1079: 	    /*
 1080: 	     * Replace the definition.
 1081: 	     */
 1082: 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
 1083: 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
 1084: 	    return(0);
 1085: 	}
 1086:     }
 1087:     /*
 1088:      * Add the definition
 1089:      */
 1090:     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
 1091:     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
 1092:     xmlCharEncodingAliasesNb++;
 1093:     return(0);
 1094: }
 1095: 
 1096: /**
 1097:  * xmlDelEncodingAlias:
 1098:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
 1099:  *
 1100:  * Unregisters an encoding alias @alias
 1101:  *
 1102:  * Returns 0 in case of success, -1 in case of error
 1103:  */
 1104: int
 1105: xmlDelEncodingAlias(const char *alias) {
 1106:     int i;
 1107: 
 1108:     if (alias == NULL)
 1109: 	return(-1);
 1110: 
 1111:     if (xmlCharEncodingAliases == NULL)
 1112: 	return(-1);
 1113:     /*
 1114:      * Walk down the list looking for a definition of the alias
 1115:      */
 1116:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 1117: 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
 1118: 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
 1119: 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
 1120: 	    xmlCharEncodingAliasesNb--;
 1121: 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
 1122: 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
 1123: 	    return(0);
 1124: 	}
 1125:     }
 1126:     return(-1);
 1127: }
 1128: 
 1129: /**
 1130:  * xmlParseCharEncoding:
 1131:  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
 1132:  *
 1133:  * Compare the string to the encoding schemes already known. Note
 1134:  * that the comparison is case insensitive accordingly to the section
 1135:  * [XML] 4.3.3 Character Encoding in Entities.
 1136:  *
 1137:  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
 1138:  * if not recognized.
 1139:  */
 1140: xmlCharEncoding
 1141: xmlParseCharEncoding(const char* name)
 1142: {
 1143:     const char *alias;
 1144:     char upper[500];
 1145:     int i;
 1146: 
 1147:     if (name == NULL)
 1148: 	return(XML_CHAR_ENCODING_NONE);
 1149: 
 1150:     /*
 1151:      * Do the alias resolution
 1152:      */
 1153:     alias = xmlGetEncodingAlias(name);
 1154:     if (alias != NULL)
 1155: 	name = alias;
 1156: 
 1157:     for (i = 0;i < 499;i++) {
 1158:         upper[i] = toupper(name[i]);
 1159: 	if (upper[i] == 0) break;
 1160:     }
 1161:     upper[i] = 0;
 1162: 
 1163:     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
 1164:     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
 1165:     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
 1166: 
 1167:     /*
 1168:      * NOTE: if we were able to parse this, the endianness of UTF16 is
 1169:      *       already found and in use
 1170:      */
 1171:     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
 1172:     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
 1173: 
 1174:     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
 1175:     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
 1176:     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
 1177: 
 1178:     /*
 1179:      * NOTE: if we were able to parse this, the endianness of UCS4 is
 1180:      *       already found and in use
 1181:      */
 1182:     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
 1183:     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
 1184:     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
 1185: 
 1186: 
 1187:     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
 1188:     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
 1189:     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
 1190: 
 1191:     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
 1192:     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
 1193:     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
 1194: 
 1195:     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
 1196:     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
 1197:     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
 1198:     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
 1199:     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
 1200:     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
 1201:     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
 1202: 
 1203:     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
 1204:     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
 1205:     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
 1206: 
 1207: #ifdef DEBUG_ENCODING
 1208:     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
 1209: #endif
 1210:     return(XML_CHAR_ENCODING_ERROR);
 1211: }
 1212: 
 1213: /**
 1214:  * xmlGetCharEncodingName:
 1215:  * @enc:  the encoding
 1216:  *
 1217:  * The "canonical" name for XML encoding.
 1218:  * C.f. http://www.w3.org/TR/REC-xml#charencoding
 1219:  * Section 4.3.3  Character Encoding in Entities
 1220:  *
 1221:  * Returns the canonical name for the given encoding
 1222:  */
 1223: 
 1224: const char*
 1225: xmlGetCharEncodingName(xmlCharEncoding enc) {
 1226:     switch (enc) {
 1227:         case XML_CHAR_ENCODING_ERROR:
 1228: 	    return(NULL);
 1229:         case XML_CHAR_ENCODING_NONE:
 1230: 	    return(NULL);
 1231:         case XML_CHAR_ENCODING_UTF8:
 1232: 	    return("UTF-8");
 1233:         case XML_CHAR_ENCODING_UTF16LE:
 1234: 	    return("UTF-16");
 1235:         case XML_CHAR_ENCODING_UTF16BE:
 1236: 	    return("UTF-16");
 1237:         case XML_CHAR_ENCODING_EBCDIC:
 1238:             return("EBCDIC");
 1239:         case XML_CHAR_ENCODING_UCS4LE:
 1240:             return("ISO-10646-UCS-4");
 1241:         case XML_CHAR_ENCODING_UCS4BE:
 1242:             return("ISO-10646-UCS-4");
 1243:         case XML_CHAR_ENCODING_UCS4_2143:
 1244:             return("ISO-10646-UCS-4");
 1245:         case XML_CHAR_ENCODING_UCS4_3412:
 1246:             return("ISO-10646-UCS-4");
 1247:         case XML_CHAR_ENCODING_UCS2:
 1248:             return("ISO-10646-UCS-2");
 1249:         case XML_CHAR_ENCODING_8859_1:
 1250: 	    return("ISO-8859-1");
 1251:         case XML_CHAR_ENCODING_8859_2:
 1252: 	    return("ISO-8859-2");
 1253:         case XML_CHAR_ENCODING_8859_3:
 1254: 	    return("ISO-8859-3");
 1255:         case XML_CHAR_ENCODING_8859_4:
 1256: 	    return("ISO-8859-4");
 1257:         case XML_CHAR_ENCODING_8859_5:
 1258: 	    return("ISO-8859-5");
 1259:         case XML_CHAR_ENCODING_8859_6:
 1260: 	    return("ISO-8859-6");
 1261:         case XML_CHAR_ENCODING_8859_7:
 1262: 	    return("ISO-8859-7");
 1263:         case XML_CHAR_ENCODING_8859_8:
 1264: 	    return("ISO-8859-8");
 1265:         case XML_CHAR_ENCODING_8859_9:
 1266: 	    return("ISO-8859-9");
 1267:         case XML_CHAR_ENCODING_2022_JP:
 1268:             return("ISO-2022-JP");
 1269:         case XML_CHAR_ENCODING_SHIFT_JIS:
 1270:             return("Shift-JIS");
 1271:         case XML_CHAR_ENCODING_EUC_JP:
 1272:             return("EUC-JP");
 1273: 	case XML_CHAR_ENCODING_ASCII:
 1274: 	    return(NULL);
 1275:     }
 1276:     return(NULL);
 1277: }
 1278: 
 1279: /************************************************************************
 1280:  *									*
 1281:  *			Char encoding handlers				*
 1282:  *									*
 1283:  ************************************************************************/
 1284: 
 1285: 
 1286: /* the size should be growable, but it's not a big deal ... */
 1287: #define MAX_ENCODING_HANDLERS 50
 1288: static xmlCharEncodingHandlerPtr *handlers = NULL;
 1289: static int nbCharEncodingHandler = 0;
 1290: 
 1291: /*
 1292:  * The default is UTF-8 for XML, that's also the default used for the
 1293:  * parser internals, so the default encoding handler is NULL
 1294:  */
 1295: 
 1296: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
 1297: 
 1298: /**
 1299:  * xmlNewCharEncodingHandler:
 1300:  * @name:  the encoding name, in UTF-8 format (ASCII actually)
 1301:  * @input:  the xmlCharEncodingInputFunc to read that encoding
 1302:  * @output:  the xmlCharEncodingOutputFunc to write that encoding
 1303:  *
 1304:  * Create and registers an xmlCharEncodingHandler.
 1305:  *
 1306:  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
 1307:  */
 1308: xmlCharEncodingHandlerPtr
 1309: xmlNewCharEncodingHandler(const char *name,
 1310:                           xmlCharEncodingInputFunc input,
 1311:                           xmlCharEncodingOutputFunc output) {
 1312:     xmlCharEncodingHandlerPtr handler;
 1313:     const char *alias;
 1314:     char upper[500];
 1315:     int i;
 1316:     char *up = NULL;
 1317: 
 1318:     /*
 1319:      * Do the alias resolution
 1320:      */
 1321:     alias = xmlGetEncodingAlias(name);
 1322:     if (alias != NULL)
 1323: 	name = alias;
 1324: 
 1325:     /*
 1326:      * Keep only the uppercase version of the encoding.
 1327:      */
 1328:     if (name == NULL) {
 1329:         xmlEncodingErr(XML_I18N_NO_NAME,
 1330: 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
 1331: 	return(NULL);
 1332:     }
 1333:     for (i = 0;i < 499;i++) {
 1334:         upper[i] = toupper(name[i]);
 1335: 	if (upper[i] == 0) break;
 1336:     }
 1337:     upper[i] = 0;
 1338:     up = xmlMemStrdup(upper);
 1339:     if (up == NULL) {
 1340:         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
 1341: 	return(NULL);
 1342:     }
 1343: 
 1344:     /*
 1345:      * allocate and fill-up an handler block.
 1346:      */
 1347:     handler = (xmlCharEncodingHandlerPtr)
 1348:               xmlMalloc(sizeof(xmlCharEncodingHandler));
 1349:     if (handler == NULL) {
 1350:         xmlFree(up);
 1351:         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
 1352: 	return(NULL);
 1353:     }
 1354:     memset(handler, 0, sizeof(xmlCharEncodingHandler));
 1355:     handler->input = input;
 1356:     handler->output = output;
 1357:     handler->name = up;
 1358: 
 1359: #ifdef LIBXML_ICONV_ENABLED
 1360:     handler->iconv_in = NULL;
 1361:     handler->iconv_out = NULL;
 1362: #endif
 1363: #ifdef LIBXML_ICU_ENABLED
 1364:     handler->uconv_in = NULL;
 1365:     handler->uconv_out = NULL;
 1366: #endif
 1367: 
 1368:     /*
 1369:      * registers and returns the handler.
 1370:      */
 1371:     xmlRegisterCharEncodingHandler(handler);
 1372: #ifdef DEBUG_ENCODING
 1373:     xmlGenericError(xmlGenericErrorContext,
 1374: 	    "Registered encoding handler for %s\n", name);
 1375: #endif
 1376:     return(handler);
 1377: }
 1378: 
 1379: /**
 1380:  * xmlInitCharEncodingHandlers:
 1381:  *
 1382:  * Initialize the char encoding support, it registers the default
 1383:  * encoding supported.
 1384:  * NOTE: while public, this function usually doesn't need to be called
 1385:  *       in normal processing.
 1386:  */
 1387: void
 1388: xmlInitCharEncodingHandlers(void) {
 1389:     unsigned short int tst = 0x1234;
 1390:     unsigned char *ptr = (unsigned char *) &tst;
 1391: 
 1392:     if (handlers != NULL) return;
 1393: 
 1394:     handlers = (xmlCharEncodingHandlerPtr *)
 1395:         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
 1396: 
 1397:     if (*ptr == 0x12) xmlLittleEndian = 0;
 1398:     else if (*ptr == 0x34) xmlLittleEndian = 1;
 1399:     else {
 1400:         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
 1401: 	               "Odd problem at endianness detection\n", NULL);
 1402:     }
 1403: 
 1404:     if (handlers == NULL) {
 1405:         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
 1406: 	return;
 1407:     }
 1408:     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
 1409: #ifdef LIBXML_OUTPUT_ENABLED
 1410:     xmlUTF16LEHandler =
 1411:           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
 1412:     xmlUTF16BEHandler =
 1413:           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
 1414:     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
 1415:     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
 1416:     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
 1417:     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
 1418: #ifdef LIBXML_HTML_ENABLED
 1419:     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
 1420: #endif
 1421: #else
 1422:     xmlUTF16LEHandler =
 1423:           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
 1424:     xmlUTF16BEHandler =
 1425:           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
 1426:     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
 1427:     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
 1428:     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
 1429:     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
 1430: #endif /* LIBXML_OUTPUT_ENABLED */
 1431: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
 1432: #ifdef LIBXML_ISO8859X_ENABLED
 1433:     xmlRegisterCharEncodingHandlersISO8859x ();
 1434: #endif
 1435: #endif
 1436: 
 1437: }
 1438: 
 1439: /**
 1440:  * xmlCleanupCharEncodingHandlers:
 1441:  *
 1442:  * Cleanup the memory allocated for the char encoding support, it
 1443:  * unregisters all the encoding handlers and the aliases.
 1444:  */
 1445: void
 1446: xmlCleanupCharEncodingHandlers(void) {
 1447:     xmlCleanupEncodingAliases();
 1448: 
 1449:     if (handlers == NULL) return;
 1450: 
 1451:     for (;nbCharEncodingHandler > 0;) {
 1452:         nbCharEncodingHandler--;
 1453: 	if (handlers[nbCharEncodingHandler] != NULL) {
 1454: 	    if (handlers[nbCharEncodingHandler]->name != NULL)
 1455: 		xmlFree(handlers[nbCharEncodingHandler]->name);
 1456: 	    xmlFree(handlers[nbCharEncodingHandler]);
 1457: 	}
 1458:     }
 1459:     xmlFree(handlers);
 1460:     handlers = NULL;
 1461:     nbCharEncodingHandler = 0;
 1462:     xmlDefaultCharEncodingHandler = NULL;
 1463: }
 1464: 
 1465: /**
 1466:  * xmlRegisterCharEncodingHandler:
 1467:  * @handler:  the xmlCharEncodingHandlerPtr handler block
 1468:  *
 1469:  * Register the char encoding handler, surprising, isn't it ?
 1470:  */
 1471: void
 1472: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
 1473:     if (handlers == NULL) xmlInitCharEncodingHandlers();
 1474:     if ((handler == NULL) || (handlers == NULL)) {
 1475:         xmlEncodingErr(XML_I18N_NO_HANDLER,
 1476: 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
 1477: 	return;
 1478:     }
 1479: 
 1480:     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
 1481:         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
 1482: 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
 1483: 	               "MAX_ENCODING_HANDLERS");
 1484: 	return;
 1485:     }
 1486:     handlers[nbCharEncodingHandler++] = handler;
 1487: }
 1488: 
 1489: /**
 1490:  * xmlGetCharEncodingHandler:
 1491:  * @enc:  an xmlCharEncoding value.
 1492:  *
 1493:  * Search in the registered set the handler able to read/write that encoding.
 1494:  *
 1495:  * Returns the handler or NULL if not found
 1496:  */
 1497: xmlCharEncodingHandlerPtr
 1498: xmlGetCharEncodingHandler(xmlCharEncoding enc) {
 1499:     xmlCharEncodingHandlerPtr handler;
 1500: 
 1501:     if (handlers == NULL) xmlInitCharEncodingHandlers();
 1502:     switch (enc) {
 1503:         case XML_CHAR_ENCODING_ERROR:
 1504: 	    return(NULL);
 1505:         case XML_CHAR_ENCODING_NONE:
 1506: 	    return(NULL);
 1507:         case XML_CHAR_ENCODING_UTF8:
 1508: 	    return(NULL);
 1509:         case XML_CHAR_ENCODING_UTF16LE:
 1510: 	    return(xmlUTF16LEHandler);
 1511:         case XML_CHAR_ENCODING_UTF16BE:
 1512: 	    return(xmlUTF16BEHandler);
 1513:         case XML_CHAR_ENCODING_EBCDIC:
 1514:             handler = xmlFindCharEncodingHandler("EBCDIC");
 1515:             if (handler != NULL) return(handler);
 1516:             handler = xmlFindCharEncodingHandler("ebcdic");
 1517:             if (handler != NULL) return(handler);
 1518:             handler = xmlFindCharEncodingHandler("EBCDIC-US");
 1519:             if (handler != NULL) return(handler);
 1520:             handler = xmlFindCharEncodingHandler("IBM-037");
 1521:             if (handler != NULL) return(handler);
 1522: 	    break;
 1523:         case XML_CHAR_ENCODING_UCS4BE:
 1524:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
 1525:             if (handler != NULL) return(handler);
 1526:             handler = xmlFindCharEncodingHandler("UCS-4");
 1527:             if (handler != NULL) return(handler);
 1528:             handler = xmlFindCharEncodingHandler("UCS4");
 1529:             if (handler != NULL) return(handler);
 1530: 	    break;
 1531:         case XML_CHAR_ENCODING_UCS4LE:
 1532:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
 1533:             if (handler != NULL) return(handler);
 1534:             handler = xmlFindCharEncodingHandler("UCS-4");
 1535:             if (handler != NULL) return(handler);
 1536:             handler = xmlFindCharEncodingHandler("UCS4");
 1537:             if (handler != NULL) return(handler);
 1538: 	    break;
 1539:         case XML_CHAR_ENCODING_UCS4_2143:
 1540: 	    break;
 1541:         case XML_CHAR_ENCODING_UCS4_3412:
 1542: 	    break;
 1543:         case XML_CHAR_ENCODING_UCS2:
 1544:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
 1545:             if (handler != NULL) return(handler);
 1546:             handler = xmlFindCharEncodingHandler("UCS-2");
 1547:             if (handler != NULL) return(handler);
 1548:             handler = xmlFindCharEncodingHandler("UCS2");
 1549:             if (handler != NULL) return(handler);
 1550: 	    break;
 1551: 
 1552: 	    /*
 1553: 	     * We used to keep ISO Latin encodings native in the
 1554: 	     * generated data. This led to so many problems that
 1555: 	     * this has been removed. One can still change this
 1556: 	     * back by registering no-ops encoders for those
 1557: 	     */
 1558:         case XML_CHAR_ENCODING_8859_1:
 1559: 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
 1560: 	    if (handler != NULL) return(handler);
 1561: 	    break;
 1562:         case XML_CHAR_ENCODING_8859_2:
 1563: 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
 1564: 	    if (handler != NULL) return(handler);
 1565: 	    break;
 1566:         case XML_CHAR_ENCODING_8859_3:
 1567: 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
 1568: 	    if (handler != NULL) return(handler);
 1569: 	    break;
 1570:         case XML_CHAR_ENCODING_8859_4:
 1571: 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
 1572: 	    if (handler != NULL) return(handler);
 1573: 	    break;
 1574:         case XML_CHAR_ENCODING_8859_5:
 1575: 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
 1576: 	    if (handler != NULL) return(handler);
 1577: 	    break;
 1578:         case XML_CHAR_ENCODING_8859_6:
 1579: 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
 1580: 	    if (handler != NULL) return(handler);
 1581: 	    break;
 1582:         case XML_CHAR_ENCODING_8859_7:
 1583: 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
 1584: 	    if (handler != NULL) return(handler);
 1585: 	    break;
 1586:         case XML_CHAR_ENCODING_8859_8:
 1587: 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
 1588: 	    if (handler != NULL) return(handler);
 1589: 	    break;
 1590:         case XML_CHAR_ENCODING_8859_9:
 1591: 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
 1592: 	    if (handler != NULL) return(handler);
 1593: 	    break;
 1594: 
 1595: 
 1596:         case XML_CHAR_ENCODING_2022_JP:
 1597:             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
 1598:             if (handler != NULL) return(handler);
 1599: 	    break;
 1600:         case XML_CHAR_ENCODING_SHIFT_JIS:
 1601:             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
 1602:             if (handler != NULL) return(handler);
 1603:             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
 1604:             if (handler != NULL) return(handler);
 1605:             handler = xmlFindCharEncodingHandler("Shift_JIS");
 1606:             if (handler != NULL) return(handler);
 1607: 	    break;
 1608:         case XML_CHAR_ENCODING_EUC_JP:
 1609:             handler = xmlFindCharEncodingHandler("EUC-JP");
 1610:             if (handler != NULL) return(handler);
 1611: 	    break;
 1612: 	default:
 1613: 	    break;
 1614:     }
 1615: 
 1616: #ifdef DEBUG_ENCODING
 1617:     xmlGenericError(xmlGenericErrorContext,
 1618: 	    "No handler found for encoding %d\n", enc);
 1619: #endif
 1620:     return(NULL);
 1621: }
 1622: 
 1623: /**
 1624:  * xmlFindCharEncodingHandler:
 1625:  * @name:  a string describing the char encoding.
 1626:  *
 1627:  * Search in the registered set the handler able to read/write that encoding.
 1628:  *
 1629:  * Returns the handler or NULL if not found
 1630:  */
 1631: xmlCharEncodingHandlerPtr
 1632: xmlFindCharEncodingHandler(const char *name) {
 1633:     const char *nalias;
 1634:     const char *norig;
 1635:     xmlCharEncoding alias;
 1636: #ifdef LIBXML_ICONV_ENABLED
 1637:     xmlCharEncodingHandlerPtr enc;
 1638:     iconv_t icv_in, icv_out;
 1639: #endif /* LIBXML_ICONV_ENABLED */
 1640: #ifdef LIBXML_ICU_ENABLED
 1641:     xmlCharEncodingHandlerPtr encu;
 1642:     uconv_t *ucv_in, *ucv_out;
 1643: #endif /* LIBXML_ICU_ENABLED */
 1644:     char upper[100];
 1645:     int i;
 1646: 
 1647:     if (handlers == NULL) xmlInitCharEncodingHandlers();
 1648:     if (name == NULL) return(xmlDefaultCharEncodingHandler);
 1649:     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
 1650: 
 1651:     /*
 1652:      * Do the alias resolution
 1653:      */
 1654:     norig = name;
 1655:     nalias = xmlGetEncodingAlias(name);
 1656:     if (nalias != NULL)
 1657: 	name = nalias;
 1658: 
 1659:     /*
 1660:      * Check first for directly registered encoding names
 1661:      */
 1662:     for (i = 0;i < 99;i++) {
 1663:         upper[i] = toupper(name[i]);
 1664: 	if (upper[i] == 0) break;
 1665:     }
 1666:     upper[i] = 0;
 1667: 
 1668:     if (handlers != NULL) {
 1669:         for (i = 0;i < nbCharEncodingHandler; i++) {
 1670:             if (!strcmp(upper, handlers[i]->name)) {
 1671: #ifdef DEBUG_ENCODING
 1672:                 xmlGenericError(xmlGenericErrorContext,
 1673:                         "Found registered handler for encoding %s\n", name);
 1674: #endif
 1675:                 return(handlers[i]);
 1676:             }
 1677:         }
 1678:     }
 1679: 
 1680: #ifdef LIBXML_ICONV_ENABLED
 1681:     /* check whether iconv can handle this */
 1682:     icv_in = iconv_open("UTF-8", name);
 1683:     icv_out = iconv_open(name, "UTF-8");
 1684:     if (icv_in == (iconv_t) -1) {
 1685:         icv_in = iconv_open("UTF-8", upper);
 1686:     }
 1687:     if (icv_out == (iconv_t) -1) {
 1688: 	icv_out = iconv_open(upper, "UTF-8");
 1689:     }
 1690:     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
 1691: 	    enc = (xmlCharEncodingHandlerPtr)
 1692: 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
 1693: 	    if (enc == NULL) {
 1694: 	        iconv_close(icv_in);
 1695: 	        iconv_close(icv_out);
 1696: 		return(NULL);
 1697: 	    }
 1698:             memset(enc, 0, sizeof(xmlCharEncodingHandler));
 1699: 	    enc->name = xmlMemStrdup(name);
 1700: 	    enc->input = NULL;
 1701: 	    enc->output = NULL;
 1702: 	    enc->iconv_in = icv_in;
 1703: 	    enc->iconv_out = icv_out;
 1704: #ifdef DEBUG_ENCODING
 1705:             xmlGenericError(xmlGenericErrorContext,
 1706: 		    "Found iconv handler for encoding %s\n", name);
 1707: #endif
 1708: 	    return enc;
 1709:     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
 1710: 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
 1711: 		    "iconv : problems with filters for '%s'\n", name);
 1712:     }
 1713: #endif /* LIBXML_ICONV_ENABLED */
 1714: #ifdef LIBXML_ICU_ENABLED
 1715:     /* check whether icu can handle this */
 1716:     ucv_in = openIcuConverter(name, 1);
 1717:     ucv_out = openIcuConverter(name, 0);
 1718:     if (ucv_in != NULL && ucv_out != NULL) {
 1719: 	    encu = (xmlCharEncodingHandlerPtr)
 1720: 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
 1721: 	    if (encu == NULL) {
 1722:                 closeIcuConverter(ucv_in);
 1723:                 closeIcuConverter(ucv_out);
 1724: 		return(NULL);
 1725: 	    }
 1726:             memset(encu, 0, sizeof(xmlCharEncodingHandler));
 1727: 	    encu->name = xmlMemStrdup(name);
 1728: 	    encu->input = NULL;
 1729: 	    encu->output = NULL;
 1730: 	    encu->uconv_in = ucv_in;
 1731: 	    encu->uconv_out = ucv_out;
 1732: #ifdef DEBUG_ENCODING
 1733:             xmlGenericError(xmlGenericErrorContext,
 1734: 		    "Found ICU converter handler for encoding %s\n", name);
 1735: #endif
 1736: 	    return encu;
 1737:     } else if (ucv_in != NULL || ucv_out != NULL) {
 1738:             closeIcuConverter(ucv_in);
 1739:             closeIcuConverter(ucv_out);
 1740: 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
 1741: 		    "ICU converter : problems with filters for '%s'\n", name);
 1742:     }
 1743: #endif /* LIBXML_ICU_ENABLED */
 1744: 
 1745: #ifdef DEBUG_ENCODING
 1746:     xmlGenericError(xmlGenericErrorContext,
 1747: 	    "No handler found for encoding %s\n", name);
 1748: #endif
 1749: 
 1750:     /*
 1751:      * Fallback using the canonical names
 1752:      */
 1753:     alias = xmlParseCharEncoding(norig);
 1754:     if (alias != XML_CHAR_ENCODING_ERROR) {
 1755:         const char* canon;
 1756:         canon = xmlGetCharEncodingName(alias);
 1757:         if ((canon != NULL) && (strcmp(name, canon))) {
 1758: 	    return(xmlFindCharEncodingHandler(canon));
 1759:         }
 1760:     }
 1761: 
 1762:     /* If "none of the above", give up */
 1763:     return(NULL);
 1764: }
 1765: 
 1766: /************************************************************************
 1767:  *									*
 1768:  *		ICONV based generic conversion functions		*
 1769:  *									*
 1770:  ************************************************************************/
 1771: 
 1772: #ifdef LIBXML_ICONV_ENABLED
 1773: /**
 1774:  * xmlIconvWrapper:
 1775:  * @cd:		iconv converter data structure
 1776:  * @out:  a pointer to an array of bytes to store the result
 1777:  * @outlen:  the length of @out
 1778:  * @in:  a pointer to an array of ISO Latin 1 chars
 1779:  * @inlen:  the length of @in
 1780:  *
 1781:  * Returns 0 if success, or
 1782:  *     -1 by lack of space, or
 1783:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 1784:  *        the result of transformation can't fit into the encoding we want), or
 1785:  *     -3 if there the last byte can't form a single output char.
 1786:  *
 1787:  * The value of @inlen after return is the number of octets consumed
 1788:  *     as the return value is positive, else unpredictable.
 1789:  * The value of @outlen after return is the number of ocetes consumed.
 1790:  */
 1791: static int
 1792: xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
 1793:                 const unsigned char *in, int *inlen) {
 1794:     size_t icv_inlen, icv_outlen;
 1795:     const char *icv_in = (const char *) in;
 1796:     char *icv_out = (char *) out;
 1797:     int ret;
 1798: 
 1799:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
 1800:         if (outlen != NULL) *outlen = 0;
 1801:         return(-1);
 1802:     }
 1803:     icv_inlen = *inlen;
 1804:     icv_outlen = *outlen;
 1805:     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
 1806:     *inlen -= icv_inlen;
 1807:     *outlen -= icv_outlen;
 1808:     if ((icv_inlen != 0) || (ret == -1)) {
 1809: #ifdef EILSEQ
 1810:         if (errno == EILSEQ) {
 1811:             return -2;
 1812:         } else
 1813: #endif
 1814: #ifdef E2BIG
 1815:         if (errno == E2BIG) {
 1816:             return -1;
 1817:         } else
 1818: #endif
 1819: #ifdef EINVAL
 1820:         if (errno == EINVAL) {
 1821:             return -3;
 1822:         } else
 1823: #endif
 1824:         {
 1825:             return -3;
 1826:         }
 1827:     }
 1828:     return 0;
 1829: }
 1830: #endif /* LIBXML_ICONV_ENABLED */
 1831: 
 1832: /************************************************************************
 1833:  *									*
 1834:  *		ICU based generic conversion functions		*
 1835:  *									*
 1836:  ************************************************************************/
 1837: 
 1838: #ifdef LIBXML_ICU_ENABLED
 1839: /**
 1840:  * xmlUconvWrapper:
 1841:  * @cd: ICU uconverter data structure
 1842:  * @toUnicode : non-zero if toUnicode. 0 otherwise.
 1843:  * @out:  a pointer to an array of bytes to store the result
 1844:  * @outlen:  the length of @out
 1845:  * @in:  a pointer to an array of ISO Latin 1 chars
 1846:  * @inlen:  the length of @in
 1847:  *
 1848:  * Returns 0 if success, or
 1849:  *     -1 by lack of space, or
 1850:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 1851:  *        the result of transformation can't fit into the encoding we want), or
 1852:  *     -3 if there the last byte can't form a single output char.
 1853:  *
 1854:  * The value of @inlen after return is the number of octets consumed
 1855:  *     as the return value is positive, else unpredictable.
 1856:  * The value of @outlen after return is the number of ocetes consumed.
 1857:  */
 1858: static int
 1859: xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
 1860:                 const unsigned char *in, int *inlen) {
 1861:     const char *ucv_in = (const char *) in;
 1862:     char *ucv_out = (char *) out;
 1863:     UErrorCode err = U_ZERO_ERROR;
 1864: 
 1865:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
 1866:         if (outlen != NULL) *outlen = 0;
 1867:         return(-1);
 1868:     }
 1869: 
 1870:     /*
 1871:      * TODO(jungshik)
 1872:      * 1. is ucnv_convert(To|From)Algorithmic better?
 1873:      * 2. had we better use an explicit pivot buffer?
 1874:      * 3. error returned comes from 'fromUnicode' only even
 1875:      *    when toUnicode is true !
 1876:      */
 1877:     if (toUnicode) {
 1878:         /* encoding => UTF-16 => UTF-8 */
 1879:         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
 1880:                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
 1881:                        0, TRUE, &err);
 1882:     } else {
 1883:         /* UTF-8 => UTF-16 => encoding */
 1884:         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
 1885:                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
 1886:                        0, TRUE, &err);
 1887:     }
 1888:     *inlen = ucv_in - (const char*) in;
 1889:     *outlen = ucv_out - (char *) out;
 1890:     if (U_SUCCESS(err))
 1891:         return 0;
 1892:     if (err == U_BUFFER_OVERFLOW_ERROR)
 1893:         return -1;
 1894:     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
 1895:         return -2;
 1896:     /* if (err == U_TRUNCATED_CHAR_FOUND) */
 1897:     return -3;
 1898: }
 1899: #endif /* LIBXML_ICU_ENABLED */
 1900: 
 1901: /************************************************************************
 1902:  *									*
 1903:  *		The real API used by libxml for on-the-fly conversion	*
 1904:  *									*
 1905:  ************************************************************************/
 1906: 
 1907: /**
 1908:  * xmlCharEncFirstLineInt:
 1909:  * @handler:	char enconding transformation data structure
 1910:  * @out:  an xmlBuffer for the output.
 1911:  * @in:  an xmlBuffer for the input
 1912:  * @len:  number of bytes to convert for the first line, or -1
 1913:  *
 1914:  * Front-end for the encoding handler input function, but handle only
 1915:  * the very first line, i.e. limit itself to 45 chars.
 1916:  *
 1917:  * Returns the number of byte written if success, or
 1918:  *     -1 general error
 1919:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 1920:  *        the result of transformation can't fit into the encoding we want), or
 1921:  */
 1922: int
 1923: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 1924:                        xmlBufferPtr in, int len) {
 1925:     int ret = -2;
 1926:     int written;
 1927:     int toconv;
 1928: 
 1929:     if (handler == NULL) return(-1);
 1930:     if (out == NULL) return(-1);
 1931:     if (in == NULL) return(-1);
 1932: 
 1933:     /* calculate space available */
 1934:     written = out->size - out->use - 1; /* count '\0' */
 1935:     toconv = in->use;
 1936:     /*
 1937:      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
 1938:      * 45 chars should be sufficient to reach the end of the encoding
 1939:      * declaration without going too far inside the document content.
 1940:      * on UTF-16 this means 90bytes, on UCS4 this means 180
 1941:      * The actual value depending on guessed encoding is passed as @len
 1942:      * if provided
 1943:      */
 1944:     if (len >= 0) {
 1945:         if (toconv > len)
 1946:             toconv = len;
 1947:     } else {
 1948:         if (toconv > 180)
 1949:             toconv = 180;
 1950:     }
 1951:     if (toconv * 2 >= written) {
 1952:         xmlBufferGrow(out, toconv * 2);
 1953: 	written = out->size - out->use - 1;
 1954:     }
 1955: 
 1956:     if (handler->input != NULL) {
 1957: 	ret = handler->input(&out->content[out->use], &written,
 1958: 	                     in->content, &toconv);
 1959: 	xmlBufferShrink(in, toconv);
 1960: 	out->use += written;
 1961: 	out->content[out->use] = 0;
 1962:     }
 1963: #ifdef LIBXML_ICONV_ENABLED
 1964:     else if (handler->iconv_in != NULL) {
 1965: 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
 1966: 	                      &written, in->content, &toconv);
 1967: 	xmlBufferShrink(in, toconv);
 1968: 	out->use += written;
 1969: 	out->content[out->use] = 0;
 1970: 	if (ret == -1) ret = -3;
 1971:     }
 1972: #endif /* LIBXML_ICONV_ENABLED */
 1973: #ifdef LIBXML_ICU_ENABLED
 1974:     else if (handler->uconv_in != NULL) {
 1975: 	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
 1976: 	                      &written, in->content, &toconv);
 1977: 	xmlBufferShrink(in, toconv);
 1978: 	out->use += written;
 1979: 	out->content[out->use] = 0;
 1980: 	if (ret == -1) ret = -3;
 1981:     }
 1982: #endif /* LIBXML_ICU_ENABLED */
 1983: #ifdef DEBUG_ENCODING
 1984:     switch (ret) {
 1985:         case 0:
 1986: 	    xmlGenericError(xmlGenericErrorContext,
 1987: 		    "converted %d bytes to %d bytes of input\n",
 1988: 	            toconv, written);
 1989: 	    break;
 1990:         case -1:
 1991: 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
 1992: 	            toconv, written, in->use);
 1993: 	    break;
 1994:         case -2:
 1995: 	    xmlGenericError(xmlGenericErrorContext,
 1996: 		    "input conversion failed due to input error\n");
 1997: 	    break;
 1998:         case -3:
 1999: 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
 2000: 	            toconv, written, in->use);
 2001: 	    break;
 2002: 	default:
 2003: 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
 2004:     }
 2005: #endif /* DEBUG_ENCODING */
 2006:     /*
 2007:      * Ignore when input buffer is not on a boundary
 2008:      */
 2009:     if (ret == -3) ret = 0;
 2010:     if (ret == -1) ret = 0;
 2011:     return(ret);
 2012: }
 2013: 
 2014: /**
 2015:  * xmlCharEncFirstLine:
 2016:  * @handler:	char enconding transformation data structure
 2017:  * @out:  an xmlBuffer for the output.
 2018:  * @in:  an xmlBuffer for the input
 2019:  *
 2020:  * Front-end for the encoding handler input function, but handle only
 2021:  * the very first line, i.e. limit itself to 45 chars.
 2022:  *
 2023:  * Returns the number of byte written if success, or
 2024:  *     -1 general error
 2025:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2026:  *        the result of transformation can't fit into the encoding we want), or
 2027:  */
 2028: int
 2029: xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 2030:                  xmlBufferPtr in) {
 2031:     return(xmlCharEncFirstLineInt(handler, out, in, -1));
 2032: }
 2033: 
 2034: /**
 2035:  * xmlCharEncFirstLineInput:
 2036:  * @input: a parser input buffer
 2037:  * @len:  number of bytes to convert for the first line, or -1
 2038:  *
 2039:  * Front-end for the encoding handler input function, but handle only
 2040:  * the very first line. Point is that this is based on autodetection
 2041:  * of the encoding and once that first line is converted we may find
 2042:  * out that a different decoder is needed to process the input.
 2043:  *
 2044:  * Returns the number of byte written if success, or
 2045:  *     -1 general error
 2046:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2047:  *        the result of transformation can't fit into the encoding we want), or
 2048:  */
 2049: int
 2050: xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
 2051: {
 2052:     int ret = -2;
 2053:     size_t written;
 2054:     size_t toconv;
 2055:     int c_in;
 2056:     int c_out;
 2057:     xmlBufPtr in;
 2058:     xmlBufPtr out;
 2059: 
 2060:     if ((input == NULL) || (input->encoder == NULL) ||
 2061:         (input->buffer == NULL) || (input->raw == NULL))
 2062:         return (-1);
 2063:     out = input->buffer;
 2064:     in = input->raw;
 2065: 
 2066:     toconv = xmlBufUse(in);
 2067:     if (toconv == 0)
 2068:         return (0);
 2069:     written = xmlBufAvail(out) - 1; /* count '\0' */
 2070:     /*
 2071:      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
 2072:      * 45 chars should be sufficient to reach the end of the encoding
 2073:      * declaration without going too far inside the document content.
 2074:      * on UTF-16 this means 90bytes, on UCS4 this means 180
 2075:      * The actual value depending on guessed encoding is passed as @len
 2076:      * if provided
 2077:      */
 2078:     if (len >= 0) {
 2079:         if (toconv > (unsigned int) len)
 2080:             toconv = len;
 2081:     } else {
 2082:         if (toconv > 180)
 2083:             toconv = 180;
 2084:     }
 2085:     if (toconv * 2 >= written) {
 2086:         xmlBufGrow(out, toconv * 2);
 2087:         written = xmlBufAvail(out) - 1;
 2088:     }
 2089:     if (written > 360)
 2090:         written = 360;
 2091: 
 2092:     c_in = toconv;
 2093:     c_out = written;
 2094:     if (input->encoder->input != NULL) {
 2095:         ret = input->encoder->input(xmlBufEnd(out), &c_out,
 2096:                                     xmlBufContent(in), &c_in);
 2097:         xmlBufShrink(in, c_in);
 2098:         xmlBufAddLen(out, c_out);
 2099:     }
 2100: #ifdef LIBXML_ICONV_ENABLED
 2101:     else if (input->encoder->iconv_in != NULL) {
 2102:         ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
 2103:                               &c_out, xmlBufContent(in), &c_in);
 2104:         xmlBufShrink(in, c_in);
 2105:         xmlBufAddLen(out, c_out);
 2106:         if (ret == -1)
 2107:             ret = -3;
 2108:     }
 2109: #endif /* LIBXML_ICONV_ENABLED */
 2110: #ifdef LIBXML_ICU_ENABLED
 2111:     else if (input->encoder->uconv_in != NULL) {
 2112:         ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
 2113:                               &c_out, xmlBufContent(in), &c_in);
 2114:         xmlBufShrink(in, c_in);
 2115:         xmlBufAddLen(out, c_out);
 2116:         if (ret == -1)
 2117:             ret = -3;
 2118:     }
 2119: #endif /* LIBXML_ICU_ENABLED */
 2120:     switch (ret) {
 2121:         case 0:
 2122: #ifdef DEBUG_ENCODING
 2123:             xmlGenericError(xmlGenericErrorContext,
 2124:                             "converted %d bytes to %d bytes of input\n",
 2125:                             c_in, c_out);
 2126: #endif
 2127:             break;
 2128:         case -1:
 2129: #ifdef DEBUG_ENCODING
 2130:             xmlGenericError(xmlGenericErrorContext,
 2131:                          "converted %d bytes to %d bytes of input, %d left\n",
 2132:                             c_in, c_out, (int)xmlBufUse(in));
 2133: #endif
 2134:             break;
 2135:         case -3:
 2136: #ifdef DEBUG_ENCODING
 2137:             xmlGenericError(xmlGenericErrorContext,
 2138:                         "converted %d bytes to %d bytes of input, %d left\n",
 2139:                             c_in, c_out, (int)xmlBufUse(in));
 2140: #endif
 2141:             break;
 2142:         case -2: {
 2143:             char buf[50];
 2144:             const xmlChar *content = xmlBufContent(in);
 2145: 
 2146: 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
 2147: 		     content[0], content[1],
 2148: 		     content[2], content[3]);
 2149: 	    buf[49] = 0;
 2150: 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
 2151: 		    "input conversion failed due to input error, bytes %s\n",
 2152: 		           buf);
 2153:         }
 2154:     }
 2155:     /*
 2156:      * Ignore when input buffer is not on a boundary
 2157:      */
 2158:     if (ret == -3) ret = 0;
 2159:     if (ret == -1) ret = 0;
 2160:     return(ret);
 2161: }
 2162: 
 2163: /**
 2164:  * xmlCharEncInput:
 2165:  * @input: a parser input buffer
 2166:  * @flush: try to flush all the raw buffer
 2167:  *
 2168:  * Generic front-end for the encoding handler on parser input
 2169:  *
 2170:  * Returns the number of byte written if success, or
 2171:  *     -1 general error
 2172:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2173:  *        the result of transformation can't fit into the encoding we want), or
 2174:  */
 2175: int
 2176: xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
 2177: {
 2178:     int ret = -2;
 2179:     size_t written;
 2180:     size_t toconv;
 2181:     int c_in;
 2182:     int c_out;
 2183:     xmlBufPtr in;
 2184:     xmlBufPtr out;
 2185: 
 2186:     if ((input == NULL) || (input->encoder == NULL) ||
 2187:         (input->buffer == NULL) || (input->raw == NULL))
 2188:         return (-1);
 2189:     out = input->buffer;
 2190:     in = input->raw;
 2191: 
 2192:     toconv = xmlBufUse(in);
 2193:     if (toconv == 0)
 2194:         return (0);
 2195:     if ((toconv > 64 * 1024) && (flush == 0))
 2196:         toconv = 64 * 1024;
 2197:     written = xmlBufAvail(out);
 2198:     if (written > 0)
 2199:         written--; /* count '\0' */
 2200:     if (toconv * 2 >= written) {
 2201:         xmlBufGrow(out, toconv * 2);
 2202:         written = xmlBufAvail(out);
 2203:         if (written > 0)
 2204:             written--; /* count '\0' */
 2205:     }
 2206:     if ((written > 128 * 1024) && (flush == 0))
 2207:         written = 128 * 1024;
 2208: 
 2209:     c_in = toconv;
 2210:     c_out = written;
 2211:     if (input->encoder->input != NULL) {
 2212:         ret = input->encoder->input(xmlBufEnd(out), &c_out,
 2213:                                     xmlBufContent(in), &c_in);
 2214:         xmlBufShrink(in, c_in);
 2215:         xmlBufAddLen(out, c_out);
 2216:     }
 2217: #ifdef LIBXML_ICONV_ENABLED
 2218:     else if (input->encoder->iconv_in != NULL) {
 2219:         ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
 2220:                               &c_out, xmlBufContent(in), &c_in);
 2221:         xmlBufShrink(in, c_in);
 2222:         xmlBufAddLen(out, c_out);
 2223:         if (ret == -1)
 2224:             ret = -3;
 2225:     }
 2226: #endif /* LIBXML_ICONV_ENABLED */
 2227: #ifdef LIBXML_ICU_ENABLED
 2228:     else if (input->encoder->uconv_in != NULL) {
 2229:         ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
 2230:                               &c_out, xmlBufContent(in), &c_in);
 2231:         xmlBufShrink(in, c_in);
 2232:         xmlBufAddLen(out, c_out);
 2233:         if (ret == -1)
 2234:             ret = -3;
 2235:     }
 2236: #endif /* LIBXML_ICU_ENABLED */
 2237:     switch (ret) {
 2238:         case 0:
 2239: #ifdef DEBUG_ENCODING
 2240:             xmlGenericError(xmlGenericErrorContext,
 2241:                             "converted %d bytes to %d bytes of input\n",
 2242:                             c_in, c_out);
 2243: #endif
 2244:             break;
 2245:         case -1:
 2246: #ifdef DEBUG_ENCODING
 2247:             xmlGenericError(xmlGenericErrorContext,
 2248:                          "converted %d bytes to %d bytes of input, %d left\n",
 2249:                             c_in, c_out, (int)xmlBufUse(in));
 2250: #endif
 2251:             break;
 2252:         case -3:
 2253: #ifdef DEBUG_ENCODING
 2254:             xmlGenericError(xmlGenericErrorContext,
 2255:                         "converted %d bytes to %d bytes of input, %d left\n",
 2256:                             c_in, c_out, (int)xmlBufUse(in));
 2257: #endif
 2258:             break;
 2259:         case -2: {
 2260:             char buf[50];
 2261:             const xmlChar *content = xmlBufContent(in);
 2262: 
 2263: 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
 2264: 		     content[0], content[1],
 2265: 		     content[2], content[3]);
 2266: 	    buf[49] = 0;
 2267: 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
 2268: 		    "input conversion failed due to input error, bytes %s\n",
 2269: 		           buf);
 2270:         }
 2271:     }
 2272:     /*
 2273:      * Ignore when input buffer is not on a boundary
 2274:      */
 2275:     if (ret == -3)
 2276:         ret = 0;
 2277:     return (c_out? c_out : ret);
 2278: }
 2279: 
 2280: /**
 2281:  * xmlCharEncInFunc:
 2282:  * @handler:	char encoding transformation data structure
 2283:  * @out:  an xmlBuffer for the output.
 2284:  * @in:  an xmlBuffer for the input
 2285:  *
 2286:  * Generic front-end for the encoding handler input function
 2287:  *
 2288:  * Returns the number of byte written if success, or
 2289:  *     -1 general error
 2290:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2291:  *        the result of transformation can't fit into the encoding we want), or
 2292:  */
 2293: int
 2294: xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
 2295:                  xmlBufferPtr in)
 2296: {
 2297:     int ret = -2;
 2298:     int written;
 2299:     int toconv;
 2300: 
 2301:     if (handler == NULL)
 2302:         return (-1);
 2303:     if (out == NULL)
 2304:         return (-1);
 2305:     if (in == NULL)
 2306:         return (-1);
 2307: 
 2308:     toconv = in->use;
 2309:     if (toconv == 0)
 2310:         return (0);
 2311:     written = out->size - out->use -1; /* count '\0' */
 2312:     if (toconv * 2 >= written) {
 2313:         xmlBufferGrow(out, out->size + toconv * 2);
 2314:         written = out->size - out->use - 1;
 2315:     }
 2316:     if (handler->input != NULL) {
 2317:         ret = handler->input(&out->content[out->use], &written,
 2318:                              in->content, &toconv);
 2319:         xmlBufferShrink(in, toconv);
 2320:         out->use += written;
 2321:         out->content[out->use] = 0;
 2322:     }
 2323: #ifdef LIBXML_ICONV_ENABLED
 2324:     else if (handler->iconv_in != NULL) {
 2325:         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
 2326:                               &written, in->content, &toconv);
 2327:         xmlBufferShrink(in, toconv);
 2328:         out->use += written;
 2329:         out->content[out->use] = 0;
 2330:         if (ret == -1)
 2331:             ret = -3;
 2332:     }
 2333: #endif /* LIBXML_ICONV_ENABLED */
 2334: #ifdef LIBXML_ICU_ENABLED
 2335:     else if (handler->uconv_in != NULL) {
 2336:         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
 2337:                               &written, in->content, &toconv);
 2338:         xmlBufferShrink(in, toconv);
 2339:         out->use += written;
 2340:         out->content[out->use] = 0;
 2341:         if (ret == -1)
 2342:             ret = -3;
 2343:     }
 2344: #endif /* LIBXML_ICU_ENABLED */
 2345:     switch (ret) {
 2346:         case 0:
 2347: #ifdef DEBUG_ENCODING
 2348:             xmlGenericError(xmlGenericErrorContext,
 2349:                             "converted %d bytes to %d bytes of input\n",
 2350:                             toconv, written);
 2351: #endif
 2352:             break;
 2353:         case -1:
 2354: #ifdef DEBUG_ENCODING
 2355:             xmlGenericError(xmlGenericErrorContext,
 2356:                          "converted %d bytes to %d bytes of input, %d left\n",
 2357:                             toconv, written, in->use);
 2358: #endif
 2359:             break;
 2360:         case -3:
 2361: #ifdef DEBUG_ENCODING
 2362:             xmlGenericError(xmlGenericErrorContext,
 2363:                         "converted %d bytes to %d bytes of input, %d left\n",
 2364:                             toconv, written, in->use);
 2365: #endif
 2366:             break;
 2367:         case -2: {
 2368:             char buf[50];
 2369: 
 2370: 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
 2371: 		     in->content[0], in->content[1],
 2372: 		     in->content[2], in->content[3]);
 2373: 	    buf[49] = 0;
 2374: 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
 2375: 		    "input conversion failed due to input error, bytes %s\n",
 2376: 		           buf);
 2377:         }
 2378:     }
 2379:     /*
 2380:      * Ignore when input buffer is not on a boundary
 2381:      */
 2382:     if (ret == -3)
 2383:         ret = 0;
 2384:     return (written? written : ret);
 2385: }
 2386: 
 2387: /**
 2388:  * xmlCharEncOutput:
 2389:  * @output: a parser output buffer
 2390:  * @init: is this an initialization call without data
 2391:  *
 2392:  * Generic front-end for the encoding handler on parser output
 2393:  * a first call with @init == 1 has to be made first to initiate the
 2394:  * output in case of non-stateless encoding needing to initiate their
 2395:  * state or the output (like the BOM in UTF16).
 2396:  * In case of UTF8 sequence conversion errors for the given encoder,
 2397:  * the content will be automatically remapped to a CharRef sequence.
 2398:  *
 2399:  * Returns the number of byte written if success, or
 2400:  *     -1 general error
 2401:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2402:  *        the result of transformation can't fit into the encoding we want), or
 2403:  */
 2404: int
 2405: xmlCharEncOutput(xmlOutputBufferPtr output, int init)
 2406: {
 2407:     int ret = -2;
 2408:     size_t written;
 2409:     size_t writtentot = 0;
 2410:     size_t toconv;
 2411:     int c_in;
 2412:     int c_out;
 2413:     xmlBufPtr in;
 2414:     xmlBufPtr out;
 2415:     int charref_len = 0;
 2416: 
 2417:     if ((output == NULL) || (output->encoder == NULL) ||
 2418:         (output->buffer == NULL) || (output->conv == NULL))
 2419:         return (-1);
 2420:     out = output->conv;
 2421:     in = output->buffer;
 2422: 
 2423: retry:
 2424: 
 2425:     written = xmlBufAvail(out);
 2426:     if (written > 0)
 2427:         written--; /* count '\0' */
 2428: 
 2429:     /*
 2430:      * First specific handling of the initialization call
 2431:      */
 2432:     if (init) {
 2433:         c_in = 0;
 2434:         c_out = written;
 2435:         if (output->encoder->output != NULL) {
 2436:             ret = output->encoder->output(xmlBufEnd(out), &c_out,
 2437:                                           NULL, &c_in);
 2438:             if (ret > 0) /* Gennady: check return value */
 2439:                 xmlBufAddLen(out, c_out);
 2440:         }
 2441: #ifdef LIBXML_ICONV_ENABLED
 2442:         else if (output->encoder->iconv_out != NULL) {
 2443:             ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
 2444:                                   &c_out, NULL, &c_in);
 2445:             xmlBufAddLen(out, c_out);
 2446:         }
 2447: #endif /* LIBXML_ICONV_ENABLED */
 2448: #ifdef LIBXML_ICU_ENABLED
 2449:         else if (output->encoder->uconv_out != NULL) {
 2450:             ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
 2451:                                   &c_out, NULL, &c_in);
 2452:             xmlBufAddLen(out, c_out);
 2453:         }
 2454: #endif /* LIBXML_ICU_ENABLED */
 2455: #ifdef DEBUG_ENCODING
 2456: 	xmlGenericError(xmlGenericErrorContext,
 2457: 		"initialized encoder\n");
 2458: #endif
 2459:         return(0);
 2460:     }
 2461: 
 2462:     /*
 2463:      * Conversion itself.
 2464:      */
 2465:     toconv = xmlBufUse(in);
 2466:     if (toconv == 0)
 2467:         return (0);
 2468:     if (toconv > 64 * 1024)
 2469:         toconv = 64 * 1024;
 2470:     if (toconv * 4 >= written) {
 2471:         xmlBufGrow(out, toconv * 4);
 2472:         written = xmlBufAvail(out) - 1;
 2473:     }
 2474:     if (written > 256 * 1024)
 2475:         written = 256 * 1024;
 2476: 
 2477:     c_in = toconv;
 2478:     c_out = written;
 2479:     if (output->encoder->output != NULL) {
 2480:         ret = output->encoder->output(xmlBufEnd(out), &c_out,
 2481:                                       xmlBufContent(in), &c_in);
 2482:         if (c_out > 0) {
 2483:             xmlBufShrink(in, c_in);
 2484:             xmlBufAddLen(out, c_out);
 2485:             writtentot += c_out;
 2486:         }
 2487:     }
 2488: #ifdef LIBXML_ICONV_ENABLED
 2489:     else if (output->encoder->iconv_out != NULL) {
 2490:         ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
 2491:                               &c_out, xmlBufContent(in), &c_in);
 2492:         xmlBufShrink(in, c_in);
 2493:         xmlBufAddLen(out, c_out);
 2494:         writtentot += c_out;
 2495:         if (ret == -1) {
 2496:             if (c_out > 0) {
 2497:                 /*
 2498:                  * Can be a limitation of iconv
 2499:                  */
 2500:                 charref_len = 0;
 2501:                 goto retry;
 2502:             }
 2503:             ret = -3;
 2504:         }
 2505:     }
 2506: #endif /* LIBXML_ICONV_ENABLED */
 2507: #ifdef LIBXML_ICU_ENABLED
 2508:     else if (output->encoder->uconv_out != NULL) {
 2509:         ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
 2510:                               &c_out, xmlBufContent(in), &c_in);
 2511:         xmlBufShrink(in, c_in);
 2512:         xmlBufAddLen(out, c_out);
 2513:         writtentot += c_out;
 2514:         if (ret == -1) {
 2515:             if (c_out > 0) {
 2516:                 /*
 2517:                  * Can be a limitation of uconv
 2518:                  */
 2519:                 charref_len = 0;
 2520:                 goto retry;
 2521:             }
 2522:             ret = -3;
 2523:         }
 2524:     }
 2525: #endif /* LIBXML_ICU_ENABLED */
 2526:     else {
 2527:         xmlEncodingErr(XML_I18N_NO_OUTPUT,
 2528:                        "xmlCharEncOutFunc: no output function !\n", NULL);
 2529:         return(-1);
 2530:     }
 2531: 
 2532:     if (ret >= 0) output += ret;
 2533: 
 2534:     /*
 2535:      * Attempt to handle error cases
 2536:      */
 2537:     switch (ret) {
 2538:         case 0:
 2539: #ifdef DEBUG_ENCODING
 2540: 	    xmlGenericError(xmlGenericErrorContext,
 2541: 		    "converted %d bytes to %d bytes of output\n",
 2542: 	            c_in, c_out);
 2543: #endif
 2544: 	    break;
 2545:         case -1:
 2546: #ifdef DEBUG_ENCODING
 2547: 	    xmlGenericError(xmlGenericErrorContext,
 2548: 		    "output conversion failed by lack of space\n");
 2549: #endif
 2550: 	    break;
 2551:         case -3:
 2552: #ifdef DEBUG_ENCODING
 2553: 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
 2554: 	            c_in, c_out, (int) xmlBufUse(in));
 2555: #endif
 2556: 	    break;
 2557:         case -2: {
 2558: 	    int len = (int) xmlBufUse(in);
 2559:             xmlChar *content = xmlBufContent(in);
 2560: 	    int cur;
 2561: 
 2562: 	    cur = xmlGetUTF8Char(content, &len);
 2563: 	    if ((charref_len != 0) && (c_out < charref_len)) {
 2564: 		/*
 2565: 		 * We attempted to insert a character reference and failed.
 2566: 		 * Undo what was written and skip the remaining charref.
 2567: 		 */
 2568:                 xmlBufErase(out, c_out);
 2569: 		writtentot -= c_out;
 2570: 		xmlBufShrink(in, charref_len - c_out);
 2571: 		charref_len = 0;
 2572: 
 2573: 		ret = -1;
 2574:                 break;
 2575: 	    } else if (cur > 0) {
 2576: 		xmlChar charref[20];
 2577: 
 2578: #ifdef DEBUG_ENCODING
 2579: 		xmlGenericError(xmlGenericErrorContext,
 2580: 			"handling output conversion error\n");
 2581: 		xmlGenericError(xmlGenericErrorContext,
 2582: 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
 2583: 			content[0], content[1],
 2584: 			content[2], content[3]);
 2585: #endif
 2586: 		/*
 2587: 		 * Removes the UTF8 sequence, and replace it by a charref
 2588: 		 * and continue the transcoding phase, hoping the error
 2589: 		 * did not mangle the encoder state.
 2590: 		 */
 2591: 		charref_len = snprintf((char *) &charref[0], sizeof(charref),
 2592: 				 "&#%d;", cur);
 2593: 		xmlBufShrink(in, len);
 2594: 		xmlBufAddHead(in, charref, -1);
 2595: 
 2596: 		goto retry;
 2597: 	    } else {
 2598: 		char buf[50];
 2599: 
 2600: 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
 2601: 			 content[0], content[1],
 2602: 			 content[2], content[3]);
 2603: 		buf[49] = 0;
 2604: 		xmlEncodingErr(XML_I18N_CONV_FAILED,
 2605: 		    "output conversion failed due to conv error, bytes %s\n",
 2606: 			       buf);
 2607: 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
 2608: 		    content[0] = ' ';
 2609: 	    }
 2610: 	    break;
 2611: 	}
 2612:     }
 2613:     return(ret);
 2614: }
 2615: 
 2616: /**
 2617:  * xmlCharEncOutFunc:
 2618:  * @handler:	char enconding transformation data structure
 2619:  * @out:  an xmlBuffer for the output.
 2620:  * @in:  an xmlBuffer for the input
 2621:  *
 2622:  * Generic front-end for the encoding handler output function
 2623:  * a first call with @in == NULL has to be made firs to initiate the
 2624:  * output in case of non-stateless encoding needing to initiate their
 2625:  * state or the output (like the BOM in UTF16).
 2626:  * In case of UTF8 sequence conversion errors for the given encoder,
 2627:  * the content will be automatically remapped to a CharRef sequence.
 2628:  *
 2629:  * Returns the number of byte written if success, or
 2630:  *     -1 general error
 2631:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
 2632:  *        the result of transformation can't fit into the encoding we want), or
 2633:  */
 2634: int
 2635: xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
 2636:                   xmlBufferPtr in) {
 2637:     int ret = -2;
 2638:     int written;
 2639:     int writtentot = 0;
 2640:     int toconv;
 2641:     int output = 0;
 2642:     int charref_len = 0;
 2643: 
 2644:     if (handler == NULL) return(-1);
 2645:     if (out == NULL) return(-1);
 2646: 
 2647: retry:
 2648: 
 2649:     written = out->size - out->use;
 2650: 
 2651:     if (written > 0)
 2652: 	written--; /* Gennady: count '/0' */
 2653: 
 2654:     /*
 2655:      * First specific handling of in = NULL, i.e. the initialization call
 2656:      */
 2657:     if (in == NULL) {
 2658:         toconv = 0;
 2659: 	if (handler->output != NULL) {
 2660: 	    ret = handler->output(&out->content[out->use], &written,
 2661: 				  NULL, &toconv);
 2662: 	    if (ret >= 0) { /* Gennady: check return value */
 2663: 		out->use += written;
 2664: 		out->content[out->use] = 0;
 2665: 	    }
 2666: 	}
 2667: #ifdef LIBXML_ICONV_ENABLED
 2668: 	else if (handler->iconv_out != NULL) {
 2669: 	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
 2670: 				  &written, NULL, &toconv);
 2671: 	    out->use += written;
 2672: 	    out->content[out->use] = 0;
 2673: 	}
 2674: #endif /* LIBXML_ICONV_ENABLED */
 2675: #ifdef LIBXML_ICU_ENABLED
 2676: 	else if (handler->uconv_out != NULL) {
 2677: 	    ret = xmlUconvWrapper(handler->uconv_out, 0,
 2678:                               &out->content[out->use],
 2679: 				              &written, NULL, &toconv);
 2680: 	    out->use += written;
 2681: 	    out->content[out->use] = 0;
 2682: 	}
 2683: #endif /* LIBXML_ICU_ENABLED */
 2684: #ifdef DEBUG_ENCODING
 2685: 	xmlGenericError(xmlGenericErrorContext,
 2686: 		"initialized encoder\n");
 2687: #endif
 2688:         return(0);
 2689:     }
 2690: 
 2691:     /*
 2692:      * Conversion itself.
 2693:      */
 2694:     toconv = in->use;
 2695:     if (toconv == 0)
 2696: 	return(0);
 2697:     if (toconv * 4 >= written) {
 2698:         xmlBufferGrow(out, toconv * 4);
 2699: 	written = out->size - out->use - 1;
 2700:     }
 2701:     if (handler->output != NULL) {
 2702: 	ret = handler->output(&out->content[out->use], &written,
 2703: 	                      in->content, &toconv);
 2704: 	if (written > 0) {
 2705: 	    xmlBufferShrink(in, toconv);
 2706: 	    out->use += written;
 2707: 	    writtentot += written;
 2708: 	}
 2709: 	out->content[out->use] = 0;
 2710:     }
 2711: #ifdef LIBXML_ICONV_ENABLED
 2712:     else if (handler->iconv_out != NULL) {
 2713: 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
 2714: 	                      &written, in->content, &toconv);
 2715: 	xmlBufferShrink(in, toconv);
 2716: 	out->use += written;
 2717: 	writtentot += written;
 2718: 	out->content[out->use] = 0;
 2719: 	if (ret == -1) {
 2720: 	    if (written > 0) {
 2721: 		/*
 2722: 		 * Can be a limitation of iconv
 2723: 		 */
 2724:                 charref_len = 0;
 2725: 		goto retry;
 2726: 	    }
 2727: 	    ret = -3;
 2728: 	}
 2729:     }
 2730: #endif /* LIBXML_ICONV_ENABLED */
 2731: #ifdef LIBXML_ICU_ENABLED
 2732:     else if (handler->uconv_out != NULL) {
 2733: 	ret = xmlUconvWrapper(handler->uconv_out, 0,
 2734:                               &out->content[out->use],
 2735: 	                      &written, in->content, &toconv);
 2736: 	xmlBufferShrink(in, toconv);
 2737: 	out->use += written;
 2738: 	writtentot += written;
 2739: 	out->content[out->use] = 0;
 2740: 	if (ret == -1) {
 2741: 	    if (written > 0) {
 2742: 		/*
 2743: 		 * Can be a limitation of iconv
 2744: 		 */
 2745:                 charref_len = 0;
 2746: 		goto retry;
 2747: 	    }
 2748: 	    ret = -3;
 2749: 	}
 2750:     }
 2751: #endif /* LIBXML_ICU_ENABLED */
 2752:     else {
 2753: 	xmlEncodingErr(XML_I18N_NO_OUTPUT,
 2754: 		       "xmlCharEncOutFunc: no output function !\n", NULL);
 2755: 	return(-1);
 2756:     }
 2757: 
 2758:     if (ret >= 0) output += ret;
 2759: 
 2760:     /*
 2761:      * Attempt to handle error cases
 2762:      */
 2763:     switch (ret) {
 2764:         case 0:
 2765: #ifdef DEBUG_ENCODING
 2766: 	    xmlGenericError(xmlGenericErrorContext,
 2767: 		    "converted %d bytes to %d bytes of output\n",
 2768: 	            toconv, written);
 2769: #endif
 2770: 	    break;
 2771:         case -1:
 2772: #ifdef DEBUG_ENCODING
 2773: 	    xmlGenericError(xmlGenericErrorContext,
 2774: 		    "output conversion failed by lack of space\n");
 2775: #endif
 2776: 	    break;
 2777:         case -3:
 2778: #ifdef DEBUG_ENCODING
 2779: 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
 2780: 	            toconv, written, in->use);
 2781: #endif
 2782: 	    break;
 2783:         case -2: {
 2784: 	    int len = in->use;
 2785: 	    const xmlChar *utf = (const xmlChar *) in->content;
 2786: 	    int cur;
 2787: 
 2788: 	    cur = xmlGetUTF8Char(utf, &len);
 2789: 	    if ((charref_len != 0) && (written < charref_len)) {
 2790: 		/*
 2791: 		 * We attempted to insert a character reference and failed.
 2792: 		 * Undo what was written and skip the remaining charref.
 2793: 		 */
 2794: 		out->use -= written;
 2795: 		writtentot -= written;
 2796: 		xmlBufferShrink(in, charref_len - written);
 2797: 		charref_len = 0;
 2798: 
 2799: 		ret = -1;
 2800:                 break;
 2801: 	    } else if (cur > 0) {
 2802: 		xmlChar charref[20];
 2803: 
 2804: #ifdef DEBUG_ENCODING
 2805: 		xmlGenericError(xmlGenericErrorContext,
 2806: 			"handling output conversion error\n");
 2807: 		xmlGenericError(xmlGenericErrorContext,
 2808: 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
 2809: 			in->content[0], in->content[1],
 2810: 			in->content[2], in->content[3]);
 2811: #endif
 2812: 		/*
 2813: 		 * Removes the UTF8 sequence, and replace it by a charref
 2814: 		 * and continue the transcoding phase, hoping the error
 2815: 		 * did not mangle the encoder state.
 2816: 		 */
 2817: 		charref_len = snprintf((char *) &charref[0], sizeof(charref),
 2818: 				 "&#%d;", cur);
 2819: 		xmlBufferShrink(in, len);
 2820: 		xmlBufferAddHead(in, charref, -1);
 2821: 
 2822: 		goto retry;
 2823: 	    } else {
 2824: 		char buf[50];
 2825: 
 2826: 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
 2827: 			 in->content[0], in->content[1],
 2828: 			 in->content[2], in->content[3]);
 2829: 		buf[49] = 0;
 2830: 		xmlEncodingErr(XML_I18N_CONV_FAILED,
 2831: 		    "output conversion failed due to conv error, bytes %s\n",
 2832: 			       buf);
 2833: 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
 2834: 		    in->content[0] = ' ';
 2835: 	    }
 2836: 	    break;
 2837: 	}
 2838:     }
 2839:     return(ret);
 2840: }
 2841: 
 2842: /**
 2843:  * xmlCharEncCloseFunc:
 2844:  * @handler:	char enconding transformation data structure
 2845:  *
 2846:  * Generic front-end for encoding handler close function
 2847:  *
 2848:  * Returns 0 if success, or -1 in case of error
 2849:  */
 2850: int
 2851: xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
 2852:     int ret = 0;
 2853:     int tofree = 0;
 2854:     if (handler == NULL) return(-1);
 2855:     if (handler->name == NULL) return(-1);
 2856: #ifdef LIBXML_ICONV_ENABLED
 2857:     /*
 2858:      * Iconv handlers can be used only once, free the whole block.
 2859:      * and the associated icon resources.
 2860:      */
 2861:     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
 2862:         tofree = 1;
 2863: 	if (handler->iconv_out != NULL) {
 2864: 	    if (iconv_close(handler->iconv_out))
 2865: 		ret = -1;
 2866: 	    handler->iconv_out = NULL;
 2867: 	}
 2868: 	if (handler->iconv_in != NULL) {
 2869: 	    if (iconv_close(handler->iconv_in))
 2870: 		ret = -1;
 2871: 	    handler->iconv_in = NULL;
 2872: 	}
 2873:     }
 2874: #endif /* LIBXML_ICONV_ENABLED */
 2875: #ifdef LIBXML_ICU_ENABLED
 2876:     if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
 2877:         tofree = 1;
 2878: 	if (handler->uconv_out != NULL) {
 2879: 	    closeIcuConverter(handler->uconv_out);
 2880: 	    handler->uconv_out = NULL;
 2881: 	}
 2882: 	if (handler->uconv_in != NULL) {
 2883: 	    closeIcuConverter(handler->uconv_in);
 2884: 	    handler->uconv_in = NULL;
 2885: 	}
 2886:     }
 2887: #endif
 2888:     if (tofree) {
 2889:         /* free up only dynamic handlers iconv/uconv */
 2890:         if (handler->name != NULL)
 2891:             xmlFree(handler->name);
 2892:         handler->name = NULL;
 2893:         xmlFree(handler);
 2894:     }
 2895: #ifdef DEBUG_ENCODING
 2896:     if (ret)
 2897:         xmlGenericError(xmlGenericErrorContext,
 2898: 		"failed to close the encoding handler\n");
 2899:     else
 2900:         xmlGenericError(xmlGenericErrorContext,
 2901: 		"closed the encoding handler\n");
 2902: #endif
 2903: 
 2904:     return(ret);
 2905: }
 2906: 
 2907: /**
 2908:  * xmlByteConsumed:
 2909:  * @ctxt: an XML parser context
 2910:  *
 2911:  * This function provides the current index of the parser relative
 2912:  * to the start of the current entity. This function is computed in
 2913:  * bytes from the beginning starting at zero and finishing at the
 2914:  * size in byte of the file if parsing a file. The function is
 2915:  * of constant cost if the input is UTF-8 but can be costly if run
 2916:  * on non-UTF-8 input.
 2917:  *
 2918:  * Returns the index in bytes from the beginning of the entity or -1
 2919:  *         in case the index could not be computed.
 2920:  */
 2921: long
 2922: xmlByteConsumed(xmlParserCtxtPtr ctxt) {
 2923:     xmlParserInputPtr in;
 2924: 
 2925:     if (ctxt == NULL) return(-1);
 2926:     in = ctxt->input;
 2927:     if (in == NULL)  return(-1);
 2928:     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
 2929:         unsigned int unused = 0;
 2930: 	xmlCharEncodingHandler * handler = in->buf->encoder;
 2931:         /*
 2932: 	 * Encoding conversion, compute the number of unused original
 2933: 	 * bytes from the input not consumed and substract that from
 2934: 	 * the raw consumed value, this is not a cheap operation
 2935: 	 */
 2936:         if (in->end - in->cur > 0) {
 2937: 	    unsigned char convbuf[32000];
 2938: 	    const unsigned char *cur = (const unsigned char *)in->cur;
 2939: 	    int toconv = in->end - in->cur, written = 32000;
 2940: 
 2941: 	    int ret;
 2942: 
 2943: 	    if (handler->output != NULL) {
 2944: 	        do {
 2945: 		    toconv = in->end - cur;
 2946: 		    written = 32000;
 2947: 		    ret = handler->output(&convbuf[0], &written,
 2948: 				      cur, &toconv);
 2949: 		    if (ret == -1) return(-1);
 2950: 		    unused += written;
 2951: 		    cur += toconv;
 2952: 		} while (ret == -2);
 2953: #ifdef LIBXML_ICONV_ENABLED
 2954: 	    } else if (handler->iconv_out != NULL) {
 2955: 	        do {
 2956: 		    toconv = in->end - cur;
 2957: 		    written = 32000;
 2958: 		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
 2959: 	                      &written, cur, &toconv);
 2960: 		    if (ret < 0) {
 2961: 		        if (written > 0)
 2962: 			    ret = -2;
 2963: 			else
 2964: 			    return(-1);
 2965: 		    }
 2966: 		    unused += written;
 2967: 		    cur += toconv;
 2968: 		} while (ret == -2);
 2969: #endif
 2970: #ifdef LIBXML_ICU_ENABLED
 2971: 	    } else if (handler->uconv_out != NULL) {
 2972: 	        do {
 2973: 		    toconv = in->end - cur;
 2974: 		    written = 32000;
 2975: 		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
 2976: 	                      &written, cur, &toconv);
 2977: 		    if (ret < 0) {
 2978: 		        if (written > 0)
 2979: 			    ret = -2;
 2980: 			else
 2981: 			    return(-1);
 2982: 		    }
 2983: 		    unused += written;
 2984: 		    cur += toconv;
 2985: 		} while (ret == -2);
 2986: #endif
 2987:             } else {
 2988: 	        /* could not find a converter */
 2989: 	        return(-1);
 2990: 	    }
 2991: 	}
 2992: 	if (in->buf->rawconsumed < unused)
 2993: 	    return(-1);
 2994: 	return(in->buf->rawconsumed - unused);
 2995:     }
 2996:     return(in->consumed + (in->cur - in->base));
 2997: }
 2998: 
 2999: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
 3000: #ifdef LIBXML_ISO8859X_ENABLED
 3001: 
 3002: /**
 3003:  * UTF8ToISO8859x:
 3004:  * @out:  a pointer to an array of bytes to store the result
 3005:  * @outlen:  the length of @out
 3006:  * @in:  a pointer to an array of UTF-8 chars
 3007:  * @inlen:  the length of @in
 3008:  * @xlattable: the 2-level transcoding table
 3009:  *
 3010:  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
 3011:  * block of chars out.
 3012:  *
 3013:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 3014:  * The value of @inlen after return is the number of octets consumed
 3015:  *     as the return value is positive, else unpredictable.
 3016:  * The value of @outlen after return is the number of ocetes consumed.
 3017:  */
 3018: static int
 3019: UTF8ToISO8859x(unsigned char* out, int *outlen,
 3020:               const unsigned char* in, int *inlen,
 3021:               unsigned char const *xlattable) {
 3022:     const unsigned char* outstart = out;
 3023:     const unsigned char* inend;
 3024:     const unsigned char* instart = in;
 3025:     const unsigned char* processed = in;
 3026: 
 3027:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
 3028:         (xlattable == NULL))
 3029: 	return(-1);
 3030:     if (in == NULL) {
 3031:         /*
 3032:         * initialization nothing to do
 3033:         */
 3034:         *outlen = 0;
 3035:         *inlen = 0;
 3036:         return(0);
 3037:     }
 3038:     inend = in + (*inlen);
 3039:     while (in < inend) {
 3040:         unsigned char d = *in++;
 3041:         if  (d < 0x80)  {
 3042:             *out++ = d;
 3043:         } else if (d < 0xC0) {
 3044:             /* trailing byte in leading position */
 3045:             *outlen = out - outstart;
 3046:             *inlen = processed - instart;
 3047:             return(-2);
 3048:         } else if (d < 0xE0) {
 3049:             unsigned char c;
 3050:             if (!(in < inend)) {
 3051:                 /* trailing byte not in input buffer */
 3052:                 *outlen = out - outstart;
 3053:                 *inlen = processed - instart;
 3054:                 return(-3);
 3055:             }
 3056:             c = *in++;
 3057:             if ((c & 0xC0) != 0x80) {
 3058:                 /* not a trailing byte */
 3059:                 *outlen = out - outstart;
 3060:                 *inlen = processed - instart;
 3061:                 return(-2);
 3062:             }
 3063:             c = c & 0x3F;
 3064:             d = d & 0x1F;
 3065:             d = xlattable [48 + c + xlattable [d] * 64];
 3066:             if (d == 0) {
 3067:                 /* not in character set */
 3068:                 *outlen = out - outstart;
 3069:                 *inlen = processed - instart;
 3070:                 return(-2);
 3071:             }
 3072:             *out++ = d;
 3073:         } else if (d < 0xF0) {
 3074:             unsigned char c1;
 3075:             unsigned char c2;
 3076:             if (!(in < inend - 1)) {
 3077:                 /* trailing bytes not in input buffer */
 3078:                 *outlen = out - outstart;
 3079:                 *inlen = processed - instart;
 3080:                 return(-3);
 3081:             }
 3082:             c1 = *in++;
 3083:             if ((c1 & 0xC0) != 0x80) {
 3084:                 /* not a trailing byte (c1) */
 3085:                 *outlen = out - outstart;
 3086:                 *inlen = processed - instart;
 3087:                 return(-2);
 3088:             }
 3089:             c2 = *in++;
 3090:             if ((c2 & 0xC0) != 0x80) {
 3091:                 /* not a trailing byte (c2) */
 3092:                 *outlen = out - outstart;
 3093:                 *inlen = processed - instart;
 3094:                 return(-2);
 3095:             }
 3096:             c1 = c1 & 0x3F;
 3097:             c2 = c2 & 0x3F;
 3098: 	    d = d & 0x0F;
 3099: 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
 3100: 			xlattable [32 + d] * 64] * 64];
 3101:             if (d == 0) {
 3102:                 /* not in character set */
 3103:                 *outlen = out - outstart;
 3104:                 *inlen = processed - instart;
 3105:                 return(-2);
 3106:             }
 3107:             *out++ = d;
 3108:         } else {
 3109:             /* cannot transcode >= U+010000 */
 3110:             *outlen = out - outstart;
 3111:             *inlen = processed - instart;
 3112:             return(-2);
 3113:         }
 3114:         processed = in;
 3115:     }
 3116:     *outlen = out - outstart;
 3117:     *inlen = processed - instart;
 3118:     return(*outlen);
 3119: }
 3120: 
 3121: /**
 3122:  * ISO8859xToUTF8
 3123:  * @out:  a pointer to an array of bytes to store the result
 3124:  * @outlen:  the length of @out
 3125:  * @in:  a pointer to an array of ISO Latin 1 chars
 3126:  * @inlen:  the length of @in
 3127:  *
 3128:  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
 3129:  * block of chars out.
 3130:  * Returns 0 if success, or -1 otherwise
 3131:  * The value of @inlen after return is the number of octets consumed
 3132:  * The value of @outlen after return is the number of ocetes produced.
 3133:  */
 3134: static int
 3135: ISO8859xToUTF8(unsigned char* out, int *outlen,
 3136:               const unsigned char* in, int *inlen,
 3137:               unsigned short const *unicodetable) {
 3138:     unsigned char* outstart = out;
 3139:     unsigned char* outend;
 3140:     const unsigned char* instart = in;
 3141:     const unsigned char* inend;
 3142:     const unsigned char* instop;
 3143:     unsigned int c;
 3144: 
 3145:     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
 3146:         (in == NULL) || (unicodetable == NULL))
 3147: 	return(-1);
 3148:     outend = out + *outlen;
 3149:     inend = in + *inlen;
 3150:     instop = inend;
 3151: 
 3152:     while ((in < inend) && (out < outend - 2)) {
 3153:         if (*in >= 0x80) {
 3154:             c = unicodetable [*in - 0x80];
 3155:             if (c == 0) {
 3156:                 /* undefined code point */
 3157:                 *outlen = out - outstart;
 3158:                 *inlen = in - instart;
 3159:                 return (-1);
 3160:             }
 3161:             if (c < 0x800) {
 3162:                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
 3163:                 *out++ = (c & 0x3F) | 0x80;
 3164:             } else {
 3165:                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
 3166:                 *out++ = ((c >>  6) & 0x3F) | 0x80;
 3167:                 *out++ = (c & 0x3F) | 0x80;
 3168:             }
 3169:             ++in;
 3170:         }
 3171:         if (instop - in > outend - out) instop = in + (outend - out);
 3172:         while ((*in < 0x80) && (in < instop)) {
 3173:             *out++ = *in++;
 3174:         }
 3175:     }
 3176:     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 3177:         *out++ =  *in++;
 3178:     }
 3179:     if ((in < inend) && (out < outend) && (*in < 0x80)) {
 3180:         *out++ =  *in++;
 3181:     }
 3182:     *outlen = out - outstart;
 3183:     *inlen = in - instart;
 3184:     return (*outlen);
 3185: }
 3186: 
 3187: 
 3188: /************************************************************************
 3189:  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
 3190:  ************************************************************************/
 3191: 
 3192: static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
 3193:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3194:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3195:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3196:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3197:     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
 3198:     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
 3199:     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
 3200:     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
 3201:     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
 3202:     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
 3203:     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
 3204:     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
 3205:     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
 3206:     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
 3207:     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
 3208:     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
 3209: };
 3210: 
 3211: static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
 3212:     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
 3213:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3214:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3215:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3216:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3217:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3218:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3219:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3220:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3221:     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
 3222:     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
 3223:     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
 3224:     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
 3225:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3226:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
 3227:     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
 3228:     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
 3229:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3230:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3231:     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
 3232:     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
 3233:     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
 3234:     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
 3235:     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
 3236:     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
 3237:     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
 3238:     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
 3239: };
 3240: 
 3241: static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
 3242:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3243:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3244:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3245:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3246:     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
 3247:     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
 3248:     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
 3249:     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
 3250:     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
 3251:     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 3252:     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
 3253:     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
 3254:     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
 3255:     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 3256:     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
 3257:     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
 3258: };
 3259: 
 3260: static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
 3261:     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
 3262:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3263:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3264:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3265:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3266:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3267:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3268:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3269:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3270:     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
 3271:     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
 3272:     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
 3273:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
 3274:     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
 3275:     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3276:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3277:     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
 3278:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3279:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3280:     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3281:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3282:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3283:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3284:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3285:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
 3286:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
 3287:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
 3288:     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3289:     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
 3290:     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3291:     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
 3292: };
 3293: 
 3294: static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
 3295:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3296:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3297:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3298:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3299:     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
 3300:     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
 3301:     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
 3302:     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
 3303:     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
 3304:     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
 3305:     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
 3306:     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
 3307:     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
 3308:     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
 3309:     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
 3310:     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
 3311: };
 3312: 
 3313: static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
 3314:     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
 3315:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3316:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3317:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3318:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3319:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3320:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3321:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3322:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3323:     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
 3324:     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
 3325:     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
 3326:     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
 3327:     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
 3328:     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
 3329:     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
 3330:     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
 3331:     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
 3332:     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
 3333:     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
 3334:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
 3335:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3336:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3337:     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
 3338:     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
 3339:     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
 3340:     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
 3341: };
 3342: 
 3343: static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
 3344:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3345:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3346:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3347:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3348:     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
 3349:     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
 3350:     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
 3351:     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
 3352:     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
 3353:     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
 3354:     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
 3355:     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
 3356:     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
 3357:     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
 3358:     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
 3359:     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
 3360: };
 3361: 
 3362: static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
 3363:     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3364:     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3365:     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3366:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3367:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3368:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3369:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3370:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3371:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3372:     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
 3373:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3374:     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
 3375:     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
 3376:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3377:     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
 3378:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3379:     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
 3380:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3381:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3382:     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3383:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3384:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3385:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3386:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3387:     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3388:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3389:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3390: };
 3391: 
 3392: static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
 3393:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3394:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3395:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3396:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3397:     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
 3398:     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
 3399:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3400:     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
 3401:     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
 3402:     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
 3403:     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
 3404:     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3405:     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
 3406:     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
 3407:     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3408:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3409: };
 3410: 
 3411: static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
 3412:     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3413:     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
 3414:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3415:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3416:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3417:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3418:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3419:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3420:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3421:     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
 3422:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3423:     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3424:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3425:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3426:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3427:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
 3428:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
 3429:     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3430:     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
 3431:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3432:     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3433:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3434:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3435: };
 3436: 
 3437: static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
 3438:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3439:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3440:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3441:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3442:     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
 3443:     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
 3444:     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
 3445:     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
 3446:     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
 3447:     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
 3448:     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
 3449:     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
 3450:     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
 3451:     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
 3452:     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
 3453:     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
 3454: };
 3455: 
 3456: static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
 3457:     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
 3458:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3459:     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3460:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3461:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3462:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3463:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3464:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3465:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3466:     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
 3467:     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
 3468:     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3469:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3470:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3471:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3472:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3473:     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
 3474:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3475:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3476:     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3477:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3478:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3479:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3480:     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
 3481:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3482:     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
 3483:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3484:     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
 3485:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3486:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3487:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3488: };
 3489: 
 3490: static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
 3491:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3492:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3493:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3494:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3495:     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
 3496:     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
 3497:     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
 3498:     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
 3499:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3500:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3501:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 3502:     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
 3503:     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
 3504:     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
 3505:     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
 3506:     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
 3507: };
 3508: 
 3509: static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
 3510:     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3511:     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
 3512:     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3513:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3514:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3515:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3516:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3517:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3518:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3519:     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
 3520:     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
 3521:     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3522:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3523:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3524:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3525:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3526:     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
 3527:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3528:     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
 3529:     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3530:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3531:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3532:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3533:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
 3534:     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
 3535:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3536:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3537:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3538:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3539:     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
 3540:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3541: };
 3542: 
 3543: static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
 3544:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3545:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3546:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3547:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3548:     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
 3549:     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
 3550:     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
 3551:     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
 3552:     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
 3553:     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 3554:     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
 3555:     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
 3556:     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
 3557:     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 3558:     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
 3559:     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
 3560: };
 3561: 
 3562: static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
 3563:     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3564:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3565:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3566:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3567:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3568:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3569:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3570:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3571:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3572:     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
 3573:     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
 3574:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3575:     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
 3576:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3577:     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
 3578:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3579:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
 3580:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3581:     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3582:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3583:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
 3584:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3585:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3586: };
 3587: 
 3588: static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
 3589:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3590:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3591:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3592:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3593:     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
 3594:     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
 3595:     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
 3596:     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
 3597:     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
 3598:     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
 3599:     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
 3600:     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
 3601:     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
 3602:     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
 3603:     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
 3604:     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
 3605: };
 3606: 
 3607: static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
 3608:     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3609:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3610:     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3611:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3612:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3613:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3614:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3615:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3616:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3617:     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
 3618:     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
 3619:     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
 3620:     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
 3621:     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
 3622:     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
 3623:     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
 3624:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3625:     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
 3626:     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
 3627:     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3628:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3629:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3630:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3631:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3632:     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3633:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3634:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3635:     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
 3636:     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
 3637:     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
 3638:     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
 3639: };
 3640: 
 3641: static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
 3642:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3643:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3644:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3645:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3646:     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
 3647:     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
 3648:     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
 3649:     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
 3650:     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
 3651:     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
 3652:     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
 3653:     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
 3654:     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
 3655:     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
 3656:     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
 3657:     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
 3658: };
 3659: 
 3660: static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
 3661:     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3662:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3663:     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3664:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3665:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3666:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3667:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3668:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3669:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3670:     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3671:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3672:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3673:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3674:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3675:     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
 3676:     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
 3677:     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
 3678:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3679:     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
 3680:     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3681:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3682:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3683:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3684:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3685:     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
 3686:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3687:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3688: };
 3689: 
 3690: static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
 3691:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3692:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3693:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3694:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3695:     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
 3696:     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
 3697:     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
 3698:     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
 3699:     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
 3700:     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
 3701:     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
 3702:     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
 3703:     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
 3704:     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
 3705:     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
 3706:     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
 3707: };
 3708: 
 3709: static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
 3710:     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3711:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3712:     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3713:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3714:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3715:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3716:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3717:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3718:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3719:     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
 3720:     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
 3721:     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3722:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3723:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3724:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3725:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3726:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
 3727:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3728:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3729:     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
 3730:     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
 3731:     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
 3732:     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
 3733:     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
 3734:     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
 3735:     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
 3736:     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
 3737:     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
 3738:     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
 3739:     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
 3740:     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
 3741: };
 3742: 
 3743: static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
 3744:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3745:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3746:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3747:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3748:     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
 3749:     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
 3750:     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
 3751:     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
 3752:     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
 3753:     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 3754:     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
 3755:     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
 3756:     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
 3757:     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 3758:     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
 3759:     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
 3760: };
 3761: 
 3762: static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
 3763:     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3764:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3765:     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3766:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3767:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3768:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3769:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3770:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3771:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3772:     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
 3773:     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3774:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3775:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3776:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3777:     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
 3778:     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
 3779:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
 3780:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3781:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3782:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
 3783:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3784:     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3785:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3786:     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3787:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3788:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3789:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3790:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3791:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3792:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3793:     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3794:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3795:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3796:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3797:     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
 3798:     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3799:     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
 3800:     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
 3801:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3802:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3803:     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
 3804:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3805:     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
 3806: };
 3807: 
 3808: static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
 3809:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3810:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3811:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3812:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3813:     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
 3814:     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
 3815:     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
 3816:     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
 3817:     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
 3818:     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 3819:     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
 3820:     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
 3821:     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
 3822:     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 3823:     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
 3824:     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
 3825: };
 3826: 
 3827: static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
 3828:     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3829:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3830:     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3831:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3832:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3833:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3834:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3835:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3836:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3837:     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
 3838:     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
 3839:     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3840:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3841:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3842:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3843:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3844:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3845:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
 3846:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3847:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3848:     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3849:     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3850:     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
 3851:     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3852:     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
 3853:     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3854:     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
 3855: };
 3856: 
 3857: static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
 3858:     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
 3859:     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
 3860:     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
 3861:     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
 3862:     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
 3863:     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
 3864:     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
 3865:     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
 3866:     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
 3867:     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 3868:     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
 3869:     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
 3870:     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
 3871:     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 3872:     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
 3873:     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
 3874: };
 3875: 
 3876: static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
 3877:     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
 3878:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3879:     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3880:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3881:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3882:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3883:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3884:     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
 3885:     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
 3886:     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
 3887:     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
 3888:     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
 3889:     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
 3890:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3891:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3892:     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3893:     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
 3894:     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3895:     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
 3896:     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3897:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3898:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3899:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3900:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3901:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3902:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
 3903:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3904:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3905:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
 3906:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3907:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3908:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3909:     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
 3910:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3911:     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 3912:     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
 3913:     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
 3914:     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 3915:     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
 3916: };
 3917: 
 3918: 
 3919: /*
 3920:  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
 3921:  */
 3922: 
 3923: static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
 3924:     const unsigned char* in, int *inlen) {
 3925:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
 3926: }
 3927: static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
 3928:     const unsigned char* in, int *inlen) {
 3929:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
 3930: }
 3931: 
 3932: static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
 3933:     const unsigned char* in, int *inlen) {
 3934:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
 3935: }
 3936: static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
 3937:     const unsigned char* in, int *inlen) {
 3938:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
 3939: }
 3940: 
 3941: static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
 3942:     const unsigned char* in, int *inlen) {
 3943:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
 3944: }
 3945: static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
 3946:     const unsigned char* in, int *inlen) {
 3947:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
 3948: }
 3949: 
 3950: static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
 3951:     const unsigned char* in, int *inlen) {
 3952:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
 3953: }
 3954: static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
 3955:     const unsigned char* in, int *inlen) {
 3956:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
 3957: }
 3958: 
 3959: static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
 3960:     const unsigned char* in, int *inlen) {
 3961:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
 3962: }
 3963: static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
 3964:     const unsigned char* in, int *inlen) {
 3965:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
 3966: }
 3967: 
 3968: static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
 3969:     const unsigned char* in, int *inlen) {
 3970:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
 3971: }
 3972: static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
 3973:     const unsigned char* in, int *inlen) {
 3974:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
 3975: }
 3976: 
 3977: static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
 3978:     const unsigned char* in, int *inlen) {
 3979:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
 3980: }
 3981: static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
 3982:     const unsigned char* in, int *inlen) {
 3983:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
 3984: }
 3985: 
 3986: static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
 3987:     const unsigned char* in, int *inlen) {
 3988:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
 3989: }
 3990: static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
 3991:     const unsigned char* in, int *inlen) {
 3992:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
 3993: }
 3994: 
 3995: static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
 3996:     const unsigned char* in, int *inlen) {
 3997:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
 3998: }
 3999: static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
 4000:     const unsigned char* in, int *inlen) {
 4001:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
 4002: }
 4003: 
 4004: static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
 4005:     const unsigned char* in, int *inlen) {
 4006:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
 4007: }
 4008: static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
 4009:     const unsigned char* in, int *inlen) {
 4010:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
 4011: }
 4012: 
 4013: static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
 4014:     const unsigned char* in, int *inlen) {
 4015:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
 4016: }
 4017: static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
 4018:     const unsigned char* in, int *inlen) {
 4019:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
 4020: }
 4021: 
 4022: static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
 4023:     const unsigned char* in, int *inlen) {
 4024:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
 4025: }
 4026: static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
 4027:     const unsigned char* in, int *inlen) {
 4028:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
 4029: }
 4030: 
 4031: static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
 4032:     const unsigned char* in, int *inlen) {
 4033:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
 4034: }
 4035: static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
 4036:     const unsigned char* in, int *inlen) {
 4037:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
 4038: }
 4039: 
 4040: static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
 4041:     const unsigned char* in, int *inlen) {
 4042:     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
 4043: }
 4044: static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
 4045:     const unsigned char* in, int *inlen) {
 4046:     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
 4047: }
 4048: 
 4049: static void
 4050: xmlRegisterCharEncodingHandlersISO8859x (void) {
 4051:     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
 4052:     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
 4053:     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
 4054:     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
 4055:     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
 4056:     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
 4057:     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
 4058:     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
 4059:     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
 4060:     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
 4061:     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
 4062:     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
 4063:     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
 4064:     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
 4065: }
 4066: 
 4067: #endif
 4068: #endif
 4069: 
 4070: #define bottom_encoding
 4071: #include "elfgcchack.h"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>