embedaddon/libxml2/encoding.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / encoding.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:29 2014 UTC (10 years ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD

libxml2 2.9.1

1: /* 2: * encoding.c : implements the encoding conversion functions needed for XML 3: * 4: * Related specs: 5: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6: * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7: * [ISO-10646] UTF-8 and UTF-16 in Annexes 8: * [ISO-8859-1] ISO Latin-1 characters codes. 9: * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10: * Worldwide Character Encoding -- Version 1.0", Addison- 11: * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12: * described in Unicode Technical Report #4. 13: * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14: * Information Interchange, ANSI X3.4-1986. 15: * 16: * See Copyright for the status of this software. 17: * 18: * daniel@veillard.com 19: * 20: * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21: */ 22: 23: #define IN_LIBXML 24: #include "libxml.h" 25: 26: #include <string.h> 27: #include <limits.h> 28: 29: #ifdef HAVE_CTYPE_H 30: #include <ctype.h> 31: #endif 32: #ifdef HAVE_STDLIB_H 33: #include <stdlib.h> 34: #endif 35: #ifdef LIBXML_ICONV_ENABLED 36: #ifdef HAVE_ERRNO_H 37: #include <errno.h> 38: #endif 39: #endif 40: #include <libxml/encoding.h> 41: #include <libxml/xmlmemory.h> 42: #ifdef LIBXML_HTML_ENABLED 43: #include <libxml/HTMLparser.h> 44: #endif 45: #include <libxml/globals.h> 46: #include <libxml/xmlerror.h> 47: 48: #include "buf.h" 49: #include "enc.h" 50: 51: static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 52: static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 53: 54: typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 55: typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 56: struct _xmlCharEncodingAlias { 57: const char *name; 58: const char *alias; 59: }; 60: 61: static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 62: static int xmlCharEncodingAliasesNb = 0; 63: static int xmlCharEncodingAliasesMax = 0; 64: 65: #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 66: #if 0 67: #define DEBUG_ENCODING /* Define this to get encoding traces */ 68: #endif 69: #else 70: #ifdef LIBXML_ISO8859X_ENABLED 71: static void xmlRegisterCharEncodingHandlersISO8859x (void); 72: #endif 73: #endif 74: 75: static int xmlLittleEndian = 1; 76: 77: /** 78: * xmlEncodingErrMemory: 79: * @extra: extra informations 80: * 81: * Handle an out of memory condition 82: */ 83: static void 84: xmlEncodingErrMemory(const char *extra) 85: { 86: __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 87: } 88: 89: /** 90: * xmlErrEncoding: 91: * @error: the error number 92: * @msg: the error message 93: * 94: * n encoding error 95: */ 96: static void 97: xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 98: { 99: __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 100: XML_FROM_I18N, error, XML_ERR_FATAL, 101: NULL, 0, val, NULL, NULL, 0, 0, msg, val); 102: } 103: 104: #ifdef LIBXML_ICU_ENABLED 105: static uconv_t* 106: openIcuConverter(const char* name, int toUnicode) 107: { 108: UErrorCode status = U_ZERO_ERROR; 109: uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 110: if (conv == NULL) 111: return NULL; 112: 113: conv->uconv = ucnv_open(name, &status); 114: if (U_FAILURE(status)) 115: goto error; 116: 117: status = U_ZERO_ERROR; 118: if (toUnicode) { 119: ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 120: NULL, NULL, NULL, &status); 121: } 122: else { 123: ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 124: NULL, NULL, NULL, &status); 125: } 126: if (U_FAILURE(status)) 127: goto error; 128: 129: status = U_ZERO_ERROR; 130: conv->utf8 = ucnv_open("UTF-8", &status); 131: if (U_SUCCESS(status)) 132: return conv; 133: 134: error: 135: if (conv->uconv) 136: ucnv_close(conv->uconv); 137: xmlFree(conv); 138: return NULL; 139: } 140: 141: static void 142: closeIcuConverter(uconv_t *conv) 143: { 144: if (conv != NULL) { 145: ucnv_close(conv->uconv); 146: ucnv_close(conv->utf8); 147: xmlFree(conv); 148: } 149: } 150: #endif /* LIBXML_ICU_ENABLED */ 151: 152: /************************************************************************ 153: * * 154: * Conversions To/From UTF8 encoding * 155: * * 156: ************************************************************************/ 157: 158: /** 159: * asciiToUTF8: 160: * @out: a pointer to an array of bytes to store the result 161: * @outlen: the length of @out 162: * @in: a pointer to an array of ASCII chars 163: * @inlen: the length of @in 164: * 165: * Take a block of ASCII chars in and try to convert it to an UTF-8 166: * block of chars out. 167: * Returns 0 if success, or -1 otherwise 168: * The value of @inlen after return is the number of octets consumed 169: * if the return value is positive, else unpredictable. 170: * The value of @outlen after return is the number of octets consumed. 171: */ 172: static int 173: asciiToUTF8(unsigned char* out, int *outlen, 174: const unsigned char* in, int *inlen) { 175: unsigned char* outstart = out; 176: const unsigned char* base = in; 177: const unsigned char* processed = in; 178: unsigned char* outend = out + *outlen; 179: const unsigned char* inend; 180: unsigned int c; 181: 182: inend = in + (*inlen); 183: while ((in < inend) && (out - outstart + 5 < *outlen)) { 184: c= *in++; 185: 186: if (out >= outend) 187: break; 188: if (c < 0x80) { 189: *out++ = c; 190: } else { 191: *outlen = out - outstart; 192: *inlen = processed - base; 193: return(-1); 194: } 195: 196: processed = (const unsigned char*) in; 197: } 198: *outlen = out - outstart; 199: *inlen = processed - base; 200: return(*outlen); 201: } 202: 203: #ifdef LIBXML_OUTPUT_ENABLED 204: /** 205: * UTF8Toascii: 206: * @out: a pointer to an array of bytes to store the result 207: * @outlen: the length of @out 208: * @in: a pointer to an array of UTF-8 chars 209: * @inlen: the length of @in 210: * 211: * Take a block of UTF-8 chars in and try to convert it to an ASCII 212: * block of chars out. 213: * 214: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 215: * The value of @inlen after return is the number of octets consumed 216: * if the return value is positive, else unpredictable. 217: * The value of @outlen after return is the number of octets consumed. 218: */ 219: static int 220: UTF8Toascii(unsigned char* out, int *outlen, 221: const unsigned char* in, int *inlen) { 222: const unsigned char* processed = in; 223: const unsigned char* outend; 224: const unsigned char* outstart = out; 225: const unsigned char* instart = in; 226: const unsigned char* inend; 227: unsigned int c, d; 228: int trailing; 229: 230: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 231: if (in == NULL) { 232: /* 233: * initialization nothing to do 234: */ 235: *outlen = 0; 236: *inlen = 0; 237: return(0); 238: } 239: inend = in + (*inlen); 240: outend = out + (*outlen); 241: while (in < inend) { 242: d = *in++; 243: if (d < 0x80) { c= d; trailing= 0; } 244: else if (d < 0xC0) { 245: /* trailing byte in leading position */ 246: *outlen = out - outstart; 247: *inlen = processed - instart; 248: return(-2); 249: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 250: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 251: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 252: else { 253: /* no chance for this in Ascii */ 254: *outlen = out - outstart; 255: *inlen = processed - instart; 256: return(-2); 257: } 258: 259: if (inend - in < trailing) { 260: break; 261: } 262: 263: for ( ; trailing; trailing--) { 264: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 265: break; 266: c <<= 6; 267: c |= d & 0x3F; 268: } 269: 270: /* assertion: c is a single UTF-4 value */ 271: if (c < 0x80) { 272: if (out >= outend) 273: break; 274: *out++ = c; 275: } else { 276: /* no chance for this in Ascii */ 277: *outlen = out - outstart; 278: *inlen = processed - instart; 279: return(-2); 280: } 281: processed = in; 282: } 283: *outlen = out - outstart; 284: *inlen = processed - instart; 285: return(*outlen); 286: } 287: #endif /* LIBXML_OUTPUT_ENABLED */ 288: 289: /** 290: * isolat1ToUTF8: 291: * @out: a pointer to an array of bytes to store the result 292: * @outlen: the length of @out 293: * @in: a pointer to an array of ISO Latin 1 chars 294: * @inlen: the length of @in 295: * 296: * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 297: * block of chars out. 298: * Returns the number of bytes written if success, or -1 otherwise 299: * The value of @inlen after return is the number of octets consumed 300: * if the return value is positive, else unpredictable. 301: * The value of @outlen after return is the number of octets consumed. 302: */ 303: int 304: isolat1ToUTF8(unsigned char* out, int *outlen, 305: const unsigned char* in, int *inlen) { 306: unsigned char* outstart = out; 307: const unsigned char* base = in; 308: unsigned char* outend; 309: const unsigned char* inend; 310: const unsigned char* instop; 311: 312: if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 313: return(-1); 314: 315: outend = out + *outlen; 316: inend = in + (*inlen); 317: instop = inend; 318: 319: while ((in < inend) && (out < outend - 1)) { 320: if (*in >= 0x80) { 321: *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 322: *out++ = ((*in) & 0x3F) | 0x80; 323: ++in; 324: } 325: if ((instop - in) > (outend - out)) instop = in + (outend - out); 326: while ((in < instop) && (*in < 0x80)) { 327: *out++ = *in++; 328: } 329: } 330: if ((in < inend) && (out < outend) && (*in < 0x80)) { 331: *out++ = *in++; 332: } 333: *outlen = out - outstart; 334: *inlen = in - base; 335: return(*outlen); 336: } 337: 338: /** 339: * UTF8ToUTF8: 340: * @out: a pointer to an array of bytes to store the result 341: * @outlen: the length of @out 342: * @inb: a pointer to an array of UTF-8 chars 343: * @inlenb: the length of @in in UTF-8 chars 344: * 345: * No op copy operation for UTF8 handling. 346: * 347: * Returns the number of bytes written, or -1 if lack of space. 348: * The value of *inlen after return is the number of octets consumed 349: * if the return value is positive, else unpredictable. 350: */ 351: static int 352: UTF8ToUTF8(unsigned char* out, int *outlen, 353: const unsigned char* inb, int *inlenb) 354: { 355: int len; 356: 357: if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 358: return(-1); 359: if (*outlen > *inlenb) { 360: len = *inlenb; 361: } else { 362: len = *outlen; 363: } 364: if (len < 0) 365: return(-1); 366: 367: memcpy(out, inb, len); 368: 369: *outlen = len; 370: *inlenb = len; 371: return(*outlen); 372: } 373: 374: 375: #ifdef LIBXML_OUTPUT_ENABLED 376: /** 377: * UTF8Toisolat1: 378: * @out: a pointer to an array of bytes to store the result 379: * @outlen: the length of @out 380: * @in: a pointer to an array of UTF-8 chars 381: * @inlen: the length of @in 382: * 383: * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 384: * block of chars out. 385: * 386: * Returns the number of bytes written if success, -2 if the transcoding fails, 387: or -1 otherwise 388: * The value of @inlen after return is the number of octets consumed 389: * if the return value is positive, else unpredictable. 390: * The value of @outlen after return is the number of octets consumed. 391: */ 392: int 393: UTF8Toisolat1(unsigned char* out, int *outlen, 394: const unsigned char* in, int *inlen) { 395: const unsigned char* processed = in; 396: const unsigned char* outend; 397: const unsigned char* outstart = out; 398: const unsigned char* instart = in; 399: const unsigned char* inend; 400: unsigned int c, d; 401: int trailing; 402: 403: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 404: if (in == NULL) { 405: /* 406: * initialization nothing to do 407: */ 408: *outlen = 0; 409: *inlen = 0; 410: return(0); 411: } 412: inend = in + (*inlen); 413: outend = out + (*outlen); 414: while (in < inend) { 415: d = *in++; 416: if (d < 0x80) { c= d; trailing= 0; } 417: else if (d < 0xC0) { 418: /* trailing byte in leading position */ 419: *outlen = out - outstart; 420: *inlen = processed - instart; 421: return(-2); 422: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 423: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 424: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 425: else { 426: /* no chance for this in IsoLat1 */ 427: *outlen = out - outstart; 428: *inlen = processed - instart; 429: return(-2); 430: } 431: 432: if (inend - in < trailing) { 433: break; 434: } 435: 436: for ( ; trailing; trailing--) { 437: if (in >= inend) 438: break; 439: if (((d= *in++) & 0xC0) != 0x80) { 440: *outlen = out - outstart; 441: *inlen = processed - instart; 442: return(-2); 443: } 444: c <<= 6; 445: c |= d & 0x3F; 446: } 447: 448: /* assertion: c is a single UTF-4 value */ 449: if (c <= 0xFF) { 450: if (out >= outend) 451: break; 452: *out++ = c; 453: } else { 454: /* no chance for this in IsoLat1 */ 455: *outlen = out - outstart; 456: *inlen = processed - instart; 457: return(-2); 458: } 459: processed = in; 460: } 461: *outlen = out - outstart; 462: *inlen = processed - instart; 463: return(*outlen); 464: } 465: #endif /* LIBXML_OUTPUT_ENABLED */ 466: 467: /** 468: * UTF16LEToUTF8: 469: * @out: a pointer to an array of bytes to store the result 470: * @outlen: the length of @out 471: * @inb: a pointer to an array of UTF-16LE passwd as a byte array 472: * @inlenb: the length of @in in UTF-16LE chars 473: * 474: * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 475: * block of chars out. This function assumes the endian property 476: * is the same between the native type of this machine and the 477: * inputed one. 478: * 479: * Returns the number of bytes written, or -1 if lack of space, or -2 480: * if the transcoding fails (if *in is not a valid utf16 string) 481: * The value of *inlen after return is the number of octets consumed 482: * if the return value is positive, else unpredictable. 483: */ 484: static int 485: UTF16LEToUTF8(unsigned char* out, int *outlen, 486: const unsigned char* inb, int *inlenb) 487: { 488: unsigned char* outstart = out; 489: const unsigned char* processed = inb; 490: unsigned char* outend = out + *outlen; 491: unsigned short* in = (unsigned short*) inb; 492: unsigned short* inend; 493: unsigned int c, d, inlen; 494: unsigned char *tmp; 495: int bits; 496: 497: if ((*inlenb % 2) == 1) 498: (*inlenb)--; 499: inlen = *inlenb / 2; 500: inend = in + inlen; 501: while ((in < inend) && (out - outstart + 5 < *outlen)) { 502: if (xmlLittleEndian) { 503: c= *in++; 504: } else { 505: tmp = (unsigned char *) in; 506: c = *tmp++; 507: c = c | (((unsigned int)*tmp) << 8); 508: in++; 509: } 510: if ((c & 0xFC00) == 0xD800) { /* surrogates */ 511: if (in >= inend) { /* (in > inend) shouldn't happens */ 512: break; 513: } 514: if (xmlLittleEndian) { 515: d = *in++; 516: } else { 517: tmp = (unsigned char *) in; 518: d = *tmp++; 519: d = d | (((unsigned int)*tmp) << 8); 520: in++; 521: } 522: if ((d & 0xFC00) == 0xDC00) { 523: c &= 0x03FF; 524: c <<= 10; 525: c |= d & 0x03FF; 526: c += 0x10000; 527: } 528: else { 529: *outlen = out - outstart; 530: *inlenb = processed - inb; 531: return(-2); 532: } 533: } 534: 535: /* assertion: c is a single UTF-4 value */ 536: if (out >= outend) 537: break; 538: if (c < 0x80) { *out++= c; bits= -6; } 539: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 540: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 541: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 542: 543: for ( ; bits >= 0; bits-= 6) { 544: if (out >= outend) 545: break; 546: *out++= ((c >> bits) & 0x3F) | 0x80; 547: } 548: processed = (const unsigned char*) in; 549: } 550: *outlen = out - outstart; 551: *inlenb = processed - inb; 552: return(*outlen); 553: } 554: 555: #ifdef LIBXML_OUTPUT_ENABLED 556: /** 557: * UTF8ToUTF16LE: 558: * @outb: a pointer to an array of bytes to store the result 559: * @outlen: the length of @outb 560: * @in: a pointer to an array of UTF-8 chars 561: * @inlen: the length of @in 562: * 563: * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 564: * block of chars out. 565: * 566: * Returns the number of bytes written, or -1 if lack of space, or -2 567: * if the transcoding failed. 568: */ 569: static int 570: UTF8ToUTF16LE(unsigned char* outb, int *outlen, 571: const unsigned char* in, int *inlen) 572: { 573: unsigned short* out = (unsigned short*) outb; 574: const unsigned char* processed = in; 575: const unsigned char *const instart = in; 576: unsigned short* outstart= out; 577: unsigned short* outend; 578: const unsigned char* inend; 579: unsigned int c, d; 580: int trailing; 581: unsigned char *tmp; 582: unsigned short tmp1, tmp2; 583: 584: /* UTF16LE encoding has no BOM */ 585: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 586: if (in == NULL) { 587: *outlen = 0; 588: *inlen = 0; 589: return(0); 590: } 591: inend= in + *inlen; 592: outend = out + (*outlen / 2); 593: while (in < inend) { 594: d= *in++; 595: if (d < 0x80) { c= d; trailing= 0; } 596: else if (d < 0xC0) { 597: /* trailing byte in leading position */ 598: *outlen = (out - outstart) * 2; 599: *inlen = processed - instart; 600: return(-2); 601: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 602: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 603: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 604: else { 605: /* no chance for this in UTF-16 */ 606: *outlen = (out - outstart) * 2; 607: *inlen = processed - instart; 608: return(-2); 609: } 610: 611: if (inend - in < trailing) { 612: break; 613: } 614: 615: for ( ; trailing; trailing--) { 616: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 617: break; 618: c <<= 6; 619: c |= d & 0x3F; 620: } 621: 622: /* assertion: c is a single UTF-4 value */ 623: if (c < 0x10000) { 624: if (out >= outend) 625: break; 626: if (xmlLittleEndian) { 627: *out++ = c; 628: } else { 629: tmp = (unsigned char *) out; 630: *tmp = c ; 631: *(tmp + 1) = c >> 8 ; 632: out++; 633: } 634: } 635: else if (c < 0x110000) { 636: if (out+1 >= outend) 637: break; 638: c -= 0x10000; 639: if (xmlLittleEndian) { 640: *out++ = 0xD800 | (c >> 10); 641: *out++ = 0xDC00 | (c & 0x03FF); 642: } else { 643: tmp1 = 0xD800 | (c >> 10); 644: tmp = (unsigned char *) out; 645: *tmp = (unsigned char) tmp1; 646: *(tmp + 1) = tmp1 >> 8; 647: out++; 648: 649: tmp2 = 0xDC00 | (c & 0x03FF); 650: tmp = (unsigned char *) out; 651: *tmp = (unsigned char) tmp2; 652: *(tmp + 1) = tmp2 >> 8; 653: out++; 654: } 655: } 656: else 657: break; 658: processed = in; 659: } 660: *outlen = (out - outstart) * 2; 661: *inlen = processed - instart; 662: return(*outlen); 663: } 664: 665: /** 666: * UTF8ToUTF16: 667: * @outb: a pointer to an array of bytes to store the result 668: * @outlen: the length of @outb 669: * @in: a pointer to an array of UTF-8 chars 670: * @inlen: the length of @in 671: * 672: * Take a block of UTF-8 chars in and try to convert it to an UTF-16 673: * block of chars out. 674: * 675: * Returns the number of bytes written, or -1 if lack of space, or -2 676: * if the transcoding failed. 677: */ 678: static int 679: UTF8ToUTF16(unsigned char* outb, int *outlen, 680: const unsigned char* in, int *inlen) 681: { 682: if (in == NULL) { 683: /* 684: * initialization, add the Byte Order Mark for UTF-16LE 685: */ 686: if (*outlen >= 2) { 687: outb[0] = 0xFF; 688: outb[1] = 0xFE; 689: *outlen = 2; 690: *inlen = 0; 691: #ifdef DEBUG_ENCODING 692: xmlGenericError(xmlGenericErrorContext, 693: "Added FFFE Byte Order Mark\n"); 694: #endif 695: return(2); 696: } 697: *outlen = 0; 698: *inlen = 0; 699: return(0); 700: } 701: return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 702: } 703: #endif /* LIBXML_OUTPUT_ENABLED */ 704: 705: /** 706: * UTF16BEToUTF8: 707: * @out: a pointer to an array of bytes to store the result 708: * @outlen: the length of @out 709: * @inb: a pointer to an array of UTF-16 passed as a byte array 710: * @inlenb: the length of @in in UTF-16 chars 711: * 712: * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 713: * block of chars out. This function assumes the endian property 714: * is the same between the native type of this machine and the 715: * inputed one. 716: * 717: * Returns the number of bytes written, or -1 if lack of space, or -2 718: * if the transcoding fails (if *in is not a valid utf16 string) 719: * The value of *inlen after return is the number of octets consumed 720: * if the return value is positive, else unpredictable. 721: */ 722: static int 723: UTF16BEToUTF8(unsigned char* out, int *outlen, 724: const unsigned char* inb, int *inlenb) 725: { 726: unsigned char* outstart = out; 727: const unsigned char* processed = inb; 728: unsigned char* outend = out + *outlen; 729: unsigned short* in = (unsigned short*) inb; 730: unsigned short* inend; 731: unsigned int c, d, inlen; 732: unsigned char *tmp; 733: int bits; 734: 735: if ((*inlenb % 2) == 1) 736: (*inlenb)--; 737: inlen = *inlenb / 2; 738: inend= in + inlen; 739: while (in < inend) { 740: if (xmlLittleEndian) { 741: tmp = (unsigned char *) in; 742: c = *tmp++; 743: c = c << 8; 744: c = c | (unsigned int) *tmp; 745: in++; 746: } else { 747: c= *in++; 748: } 749: if ((c & 0xFC00) == 0xD800) { /* surrogates */ 750: if (in >= inend) { /* (in > inend) shouldn't happens */ 751: *outlen = out - outstart; 752: *inlenb = processed - inb; 753: return(-2); 754: } 755: if (xmlLittleEndian) { 756: tmp = (unsigned char *) in; 757: d = *tmp++; 758: d = d << 8; 759: d = d | (unsigned int) *tmp; 760: in++; 761: } else { 762: d= *in++; 763: } 764: if ((d & 0xFC00) == 0xDC00) { 765: c &= 0x03FF; 766: c <<= 10; 767: c |= d & 0x03FF; 768: c += 0x10000; 769: } 770: else { 771: *outlen = out - outstart; 772: *inlenb = processed - inb; 773: return(-2); 774: } 775: } 776: 777: /* assertion: c is a single UTF-4 value */ 778: if (out >= outend) 779: break; 780: if (c < 0x80) { *out++= c; bits= -6; } 781: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 782: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 783: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 784: 785: for ( ; bits >= 0; bits-= 6) { 786: if (out >= outend) 787: break; 788: *out++= ((c >> bits) & 0x3F) | 0x80; 789: } 790: processed = (const unsigned char*) in; 791: } 792: *outlen = out - outstart; 793: *inlenb = processed - inb; 794: return(*outlen); 795: } 796: 797: #ifdef LIBXML_OUTPUT_ENABLED 798: /** 799: * UTF8ToUTF16BE: 800: * @outb: a pointer to an array of bytes to store the result 801: * @outlen: the length of @outb 802: * @in: a pointer to an array of UTF-8 chars 803: * @inlen: the length of @in 804: * 805: * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 806: * block of chars out. 807: * 808: * Returns the number of byte written, or -1 by lack of space, or -2 809: * if the transcoding failed. 810: */ 811: static int 812: UTF8ToUTF16BE(unsigned char* outb, int *outlen, 813: const unsigned char* in, int *inlen) 814: { 815: unsigned short* out = (unsigned short*) outb; 816: const unsigned char* processed = in; 817: const unsigned char *const instart = in; 818: unsigned short* outstart= out; 819: unsigned short* outend; 820: const unsigned char* inend; 821: unsigned int c, d; 822: int trailing; 823: unsigned char *tmp; 824: unsigned short tmp1, tmp2; 825: 826: /* UTF-16BE has no BOM */ 827: if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 828: if (in == NULL) { 829: *outlen = 0; 830: *inlen = 0; 831: return(0); 832: } 833: inend= in + *inlen; 834: outend = out + (*outlen / 2); 835: while (in < inend) { 836: d= *in++; 837: if (d < 0x80) { c= d; trailing= 0; } 838: else if (d < 0xC0) { 839: /* trailing byte in leading position */ 840: *outlen = out - outstart; 841: *inlen = processed - instart; 842: return(-2); 843: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 844: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 845: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 846: else { 847: /* no chance for this in UTF-16 */ 848: *outlen = out - outstart; 849: *inlen = processed - instart; 850: return(-2); 851: } 852: 853: if (inend - in < trailing) { 854: break; 855: } 856: 857: for ( ; trailing; trailing--) { 858: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 859: c <<= 6; 860: c |= d & 0x3F; 861: } 862: 863: /* assertion: c is a single UTF-4 value */ 864: if (c < 0x10000) { 865: if (out >= outend) break; 866: if (xmlLittleEndian) { 867: tmp = (unsigned char *) out; 868: *tmp = c >> 8; 869: *(tmp + 1) = c; 870: out++; 871: } else { 872: *out++ = c; 873: } 874: } 875: else if (c < 0x110000) { 876: if (out+1 >= outend) break; 877: c -= 0x10000; 878: if (xmlLittleEndian) { 879: tmp1 = 0xD800 | (c >> 10); 880: tmp = (unsigned char *) out; 881: *tmp = tmp1 >> 8; 882: *(tmp + 1) = (unsigned char) tmp1; 883: out++; 884: 885: tmp2 = 0xDC00 | (c & 0x03FF); 886: tmp = (unsigned char *) out; 887: *tmp = tmp2 >> 8; 888: *(tmp + 1) = (unsigned char) tmp2; 889: out++; 890: } else { 891: *out++ = 0xD800 | (c >> 10); 892: *out++ = 0xDC00 | (c & 0x03FF); 893: } 894: } 895: else 896: break; 897: processed = in; 898: } 899: *outlen = (out - outstart) * 2; 900: *inlen = processed - instart; 901: return(*outlen); 902: } 903: #endif /* LIBXML_OUTPUT_ENABLED */ 904: 905: /************************************************************************ 906: * * 907: * Generic encoding handling routines * 908: * * 909: ************************************************************************/ 910: 911: /** 912: * xmlDetectCharEncoding: 913: * @in: a pointer to the first bytes of the XML entity, must be at least 914: * 2 bytes long (at least 4 if encoding is UTF4 variant). 915: * @len: pointer to the length of the buffer 916: * 917: * Guess the encoding of the entity using the first bytes of the entity content 918: * according to the non-normative appendix F of the XML-1.0 recommendation. 919: * 920: * Returns one of the XML_CHAR_ENCODING_... values. 921: */ 922: xmlCharEncoding 923: xmlDetectCharEncoding(const unsigned char* in, int len) 924: { 925: if (in == NULL) 926: return(XML_CHAR_ENCODING_NONE); 927: if (len >= 4) { 928: if ((in[0] == 0x00) && (in[1] == 0x00) && 929: (in[2] == 0x00) && (in[3] == 0x3C)) 930: return(XML_CHAR_ENCODING_UCS4BE); 931: if ((in[0] == 0x3C) && (in[1] == 0x00) && 932: (in[2] == 0x00) && (in[3] == 0x00)) 933: return(XML_CHAR_ENCODING_UCS4LE); 934: if ((in[0] == 0x00) && (in[1] == 0x00) && 935: (in[2] == 0x3C) && (in[3] == 0x00)) 936: return(XML_CHAR_ENCODING_UCS4_2143); 937: if ((in[0] == 0x00) && (in[1] == 0x3C) && 938: (in[2] == 0x00) && (in[3] == 0x00)) 939: return(XML_CHAR_ENCODING_UCS4_3412); 940: if ((in[0] == 0x4C) && (in[1] == 0x6F) && 941: (in[2] == 0xA7) && (in[3] == 0x94)) 942: return(XML_CHAR_ENCODING_EBCDIC); 943: if ((in[0] == 0x3C) && (in[1] == 0x3F) && 944: (in[2] == 0x78) && (in[3] == 0x6D)) 945: return(XML_CHAR_ENCODING_UTF8); 946: /* 947: * Although not part of the recommendation, we also 948: * attempt an "auto-recognition" of UTF-16LE and 949: * UTF-16BE encodings. 950: */ 951: if ((in[0] == 0x3C) && (in[1] == 0x00) && 952: (in[2] == 0x3F) && (in[3] == 0x00)) 953: return(XML_CHAR_ENCODING_UTF16LE); 954: if ((in[0] == 0x00) && (in[1] == 0x3C) && 955: (in[2] == 0x00) && (in[3] == 0x3F)) 956: return(XML_CHAR_ENCODING_UTF16BE); 957: } 958: if (len >= 3) { 959: /* 960: * Errata on XML-1.0 June 20 2001 961: * We now allow an UTF8 encoded BOM 962: */ 963: if ((in[0] == 0xEF) && (in[1] == 0xBB) && 964: (in[2] == 0xBF)) 965: return(XML_CHAR_ENCODING_UTF8); 966: } 967: /* For UTF-16 we can recognize by the BOM */ 968: if (len >= 2) { 969: if ((in[0] == 0xFE) && (in[1] == 0xFF)) 970: return(XML_CHAR_ENCODING_UTF16BE); 971: if ((in[0] == 0xFF) && (in[1] == 0xFE)) 972: return(XML_CHAR_ENCODING_UTF16LE); 973: } 974: return(XML_CHAR_ENCODING_NONE); 975: } 976: 977: /** 978: * xmlCleanupEncodingAliases: 979: * 980: * Unregisters all aliases 981: */ 982: void 983: xmlCleanupEncodingAliases(void) { 984: int i; 985: 986: if (xmlCharEncodingAliases == NULL) 987: return; 988: 989: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 990: if (xmlCharEncodingAliases[i].name != NULL) 991: xmlFree((char *) xmlCharEncodingAliases[i].name); 992: if (xmlCharEncodingAliases[i].alias != NULL) 993: xmlFree((char *) xmlCharEncodingAliases[i].alias); 994: } 995: xmlCharEncodingAliasesNb = 0; 996: xmlCharEncodingAliasesMax = 0; 997: xmlFree(xmlCharEncodingAliases); 998: xmlCharEncodingAliases = NULL; 999: } 1000: 1001: /** 1002: * xmlGetEncodingAlias: 1003: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1004: * 1005: * Lookup an encoding name for the given alias. 1006: * 1007: * Returns NULL if not found, otherwise the original name 1008: */ 1009: const char * 1010: xmlGetEncodingAlias(const char *alias) { 1011: int i; 1012: char upper[100]; 1013: 1014: if (alias == NULL) 1015: return(NULL); 1016: 1017: if (xmlCharEncodingAliases == NULL) 1018: return(NULL); 1019: 1020: for (i = 0;i < 99;i++) { 1021: upper[i] = toupper(alias[i]); 1022: if (upper[i] == 0) break; 1023: } 1024: upper[i] = 0; 1025: 1026: /* 1027: * Walk down the list looking for a definition of the alias 1028: */ 1029: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1030: if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1031: return(xmlCharEncodingAliases[i].name); 1032: } 1033: } 1034: return(NULL); 1035: } 1036: 1037: /** 1038: * xmlAddEncodingAlias: 1039: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1040: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1041: * 1042: * Registers an alias @alias for an encoding named @name. Existing alias 1043: * will be overwritten. 1044: * 1045: * Returns 0 in case of success, -1 in case of error 1046: */ 1047: int 1048: xmlAddEncodingAlias(const char *name, const char *alias) { 1049: int i; 1050: char upper[100]; 1051: 1052: if ((name == NULL) || (alias == NULL)) 1053: return(-1); 1054: 1055: for (i = 0;i < 99;i++) { 1056: upper[i] = toupper(alias[i]); 1057: if (upper[i] == 0) break; 1058: } 1059: upper[i] = 0; 1060: 1061: if (xmlCharEncodingAliases == NULL) { 1062: xmlCharEncodingAliasesNb = 0; 1063: xmlCharEncodingAliasesMax = 20; 1064: xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1065: xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1066: if (xmlCharEncodingAliases == NULL) 1067: return(-1); 1068: } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1069: xmlCharEncodingAliasesMax *= 2; 1070: xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1071: xmlRealloc(xmlCharEncodingAliases, 1072: xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1073: } 1074: /* 1075: * Walk down the list looking for a definition of the alias 1076: */ 1077: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1078: if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1079: /* 1080: * Replace the definition. 1081: */ 1082: xmlFree((char *) xmlCharEncodingAliases[i].name); 1083: xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1084: return(0); 1085: } 1086: } 1087: /* 1088: * Add the definition 1089: */ 1090: xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1091: xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1092: xmlCharEncodingAliasesNb++; 1093: return(0); 1094: } 1095: 1096: /** 1097: * xmlDelEncodingAlias: 1098: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1099: * 1100: * Unregisters an encoding alias @alias 1101: * 1102: * Returns 0 in case of success, -1 in case of error 1103: */ 1104: int 1105: xmlDelEncodingAlias(const char *alias) { 1106: int i; 1107: 1108: if (alias == NULL) 1109: return(-1); 1110: 1111: if (xmlCharEncodingAliases == NULL) 1112: return(-1); 1113: /* 1114: * Walk down the list looking for a definition of the alias 1115: */ 1116: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1117: if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1118: xmlFree((char *) xmlCharEncodingAliases[i].name); 1119: xmlFree((char *) xmlCharEncodingAliases[i].alias); 1120: xmlCharEncodingAliasesNb--; 1121: memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1122: sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1123: return(0); 1124: } 1125: } 1126: return(-1); 1127: } 1128: 1129: /** 1130: * xmlParseCharEncoding: 1131: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1132: * 1133: * Compare the string to the encoding schemes already known. Note 1134: * that the comparison is case insensitive accordingly to the section 1135: * [XML] 4.3.3 Character Encoding in Entities. 1136: * 1137: * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1138: * if not recognized. 1139: */ 1140: xmlCharEncoding 1141: xmlParseCharEncoding(const char* name) 1142: { 1143: const char *alias; 1144: char upper[500]; 1145: int i; 1146: 1147: if (name == NULL) 1148: return(XML_CHAR_ENCODING_NONE); 1149: 1150: /* 1151: * Do the alias resolution 1152: */ 1153: alias = xmlGetEncodingAlias(name); 1154: if (alias != NULL) 1155: name = alias; 1156: 1157: for (i = 0;i < 499;i++) { 1158: upper[i] = toupper(name[i]); 1159: if (upper[i] == 0) break; 1160: } 1161: upper[i] = 0; 1162: 1163: if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1164: if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1165: if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1166: 1167: /* 1168: * NOTE: if we were able to parse this, the endianness of UTF16 is 1169: * already found and in use 1170: */ 1171: if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1172: if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1173: 1174: if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1175: if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1176: if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1177: 1178: /* 1179: * NOTE: if we were able to parse this, the endianness of UCS4 is 1180: * already found and in use 1181: */ 1182: if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1183: if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1184: if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1185: 1186: 1187: if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1188: if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1189: if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1190: 1191: if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1192: if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1193: if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1194: 1195: if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1196: if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1197: if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1198: if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1199: if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1200: if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1201: if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1202: 1203: if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1204: if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1205: if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1206: 1207: #ifdef DEBUG_ENCODING 1208: xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1209: #endif 1210: return(XML_CHAR_ENCODING_ERROR); 1211: } 1212: 1213: /** 1214: * xmlGetCharEncodingName: 1215: * @enc: the encoding 1216: * 1217: * The "canonical" name for XML encoding. 1218: * C.f. http://www.w3.org/TR/REC-xml#charencoding 1219: * Section 4.3.3 Character Encoding in Entities 1220: * 1221: * Returns the canonical name for the given encoding 1222: */ 1223: 1224: const char* 1225: xmlGetCharEncodingName(xmlCharEncoding enc) { 1226: switch (enc) { 1227: case XML_CHAR_ENCODING_ERROR: 1228: return(NULL); 1229: case XML_CHAR_ENCODING_NONE: 1230: return(NULL); 1231: case XML_CHAR_ENCODING_UTF8: 1232: return("UTF-8"); 1233: case XML_CHAR_ENCODING_UTF16LE: 1234: return("UTF-16"); 1235: case XML_CHAR_ENCODING_UTF16BE: 1236: return("UTF-16"); 1237: case XML_CHAR_ENCODING_EBCDIC: 1238: return("EBCDIC"); 1239: case XML_CHAR_ENCODING_UCS4LE: 1240: return("ISO-10646-UCS-4"); 1241: case XML_CHAR_ENCODING_UCS4BE: 1242: return("ISO-10646-UCS-4"); 1243: case XML_CHAR_ENCODING_UCS4_2143: 1244: return("ISO-10646-UCS-4"); 1245: case XML_CHAR_ENCODING_UCS4_3412: 1246: return("ISO-10646-UCS-4"); 1247: case XML_CHAR_ENCODING_UCS2: 1248: return("ISO-10646-UCS-2"); 1249: case XML_CHAR_ENCODING_8859_1: 1250: return("ISO-8859-1"); 1251: case XML_CHAR_ENCODING_8859_2: 1252: return("ISO-8859-2"); 1253: case XML_CHAR_ENCODING_8859_3: 1254: return("ISO-8859-3"); 1255: case XML_CHAR_ENCODING_8859_4: 1256: return("ISO-8859-4"); 1257: case XML_CHAR_ENCODING_8859_5: 1258: return("ISO-8859-5"); 1259: case XML_CHAR_ENCODING_8859_6: 1260: return("ISO-8859-6"); 1261: case XML_CHAR_ENCODING_8859_7: 1262: return("ISO-8859-7"); 1263: case XML_CHAR_ENCODING_8859_8: 1264: return("ISO-8859-8"); 1265: case XML_CHAR_ENCODING_8859_9: 1266: return("ISO-8859-9"); 1267: case XML_CHAR_ENCODING_2022_JP: 1268: return("ISO-2022-JP"); 1269: case XML_CHAR_ENCODING_SHIFT_JIS: 1270: return("Shift-JIS"); 1271: case XML_CHAR_ENCODING_EUC_JP: 1272: return("EUC-JP"); 1273: case XML_CHAR_ENCODING_ASCII: 1274: return(NULL); 1275: } 1276: return(NULL); 1277: } 1278: 1279: /************************************************************************ 1280: * * 1281: * Char encoding handlers * 1282: * * 1283: ************************************************************************/ 1284: 1285: 1286: /* the size should be growable, but it's not a big deal ... */ 1287: #define MAX_ENCODING_HANDLERS 50 1288: static xmlCharEncodingHandlerPtr *handlers = NULL; 1289: static int nbCharEncodingHandler = 0; 1290: 1291: /* 1292: * The default is UTF-8 for XML, that's also the default used for the 1293: * parser internals, so the default encoding handler is NULL 1294: */ 1295: 1296: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1297: 1298: /** 1299: * xmlNewCharEncodingHandler: 1300: * @name: the encoding name, in UTF-8 format (ASCII actually) 1301: * @input: the xmlCharEncodingInputFunc to read that encoding 1302: * @output: the xmlCharEncodingOutputFunc to write that encoding 1303: * 1304: * Create and registers an xmlCharEncodingHandler. 1305: * 1306: * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1307: */ 1308: xmlCharEncodingHandlerPtr 1309: xmlNewCharEncodingHandler(const char *name, 1310: xmlCharEncodingInputFunc input, 1311: xmlCharEncodingOutputFunc output) { 1312: xmlCharEncodingHandlerPtr handler; 1313: const char *alias; 1314: char upper[500]; 1315: int i; 1316: char *up = NULL; 1317: 1318: /* 1319: * Do the alias resolution 1320: */ 1321: alias = xmlGetEncodingAlias(name); 1322: if (alias != NULL) 1323: name = alias; 1324: 1325: /* 1326: * Keep only the uppercase version of the encoding. 1327: */ 1328: if (name == NULL) { 1329: xmlEncodingErr(XML_I18N_NO_NAME, 1330: "xmlNewCharEncodingHandler : no name !\n", NULL); 1331: return(NULL); 1332: } 1333: for (i = 0;i < 499;i++) { 1334: upper[i] = toupper(name[i]); 1335: if (upper[i] == 0) break; 1336: } 1337: upper[i] = 0; 1338: up = xmlMemStrdup(upper); 1339: if (up == NULL) { 1340: xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1341: return(NULL); 1342: } 1343: 1344: /* 1345: * allocate and fill-up an handler block. 1346: */ 1347: handler = (xmlCharEncodingHandlerPtr) 1348: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1349: if (handler == NULL) { 1350: xmlFree(up); 1351: xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1352: return(NULL); 1353: } 1354: memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1355: handler->input = input; 1356: handler->output = output; 1357: handler->name = up; 1358: 1359: #ifdef LIBXML_ICONV_ENABLED 1360: handler->iconv_in = NULL; 1361: handler->iconv_out = NULL; 1362: #endif 1363: #ifdef LIBXML_ICU_ENABLED 1364: handler->uconv_in = NULL; 1365: handler->uconv_out = NULL; 1366: #endif 1367: 1368: /* 1369: * registers and returns the handler. 1370: */ 1371: xmlRegisterCharEncodingHandler(handler); 1372: #ifdef DEBUG_ENCODING 1373: xmlGenericError(xmlGenericErrorContext, 1374: "Registered encoding handler for %s\n", name); 1375: #endif 1376: return(handler); 1377: } 1378: 1379: /** 1380: * xmlInitCharEncodingHandlers: 1381: * 1382: * Initialize the char encoding support, it registers the default 1383: * encoding supported. 1384: * NOTE: while public, this function usually doesn't need to be called 1385: * in normal processing. 1386: */ 1387: void 1388: xmlInitCharEncodingHandlers(void) { 1389: unsigned short int tst = 0x1234; 1390: unsigned char *ptr = (unsigned char *) &tst; 1391: 1392: if (handlers != NULL) return; 1393: 1394: handlers = (xmlCharEncodingHandlerPtr *) 1395: xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1396: 1397: if (*ptr == 0x12) xmlLittleEndian = 0; 1398: else if (*ptr == 0x34) xmlLittleEndian = 1; 1399: else { 1400: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1401: "Odd problem at endianness detection\n", NULL); 1402: } 1403: 1404: if (handlers == NULL) { 1405: xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1406: return; 1407: } 1408: xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1409: #ifdef LIBXML_OUTPUT_ENABLED 1410: xmlUTF16LEHandler = 1411: xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1412: xmlUTF16BEHandler = 1413: xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1414: xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1415: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1416: xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1417: xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1418: #ifdef LIBXML_HTML_ENABLED 1419: xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1420: #endif 1421: #else 1422: xmlUTF16LEHandler = 1423: xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1424: xmlUTF16BEHandler = 1425: xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1426: xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1427: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1428: xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1429: xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1430: #endif /* LIBXML_OUTPUT_ENABLED */ 1431: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1432: #ifdef LIBXML_ISO8859X_ENABLED 1433: xmlRegisterCharEncodingHandlersISO8859x (); 1434: #endif 1435: #endif 1436: 1437: } 1438: 1439: /** 1440: * xmlCleanupCharEncodingHandlers: 1441: * 1442: * Cleanup the memory allocated for the char encoding support, it 1443: * unregisters all the encoding handlers and the aliases. 1444: */ 1445: void 1446: xmlCleanupCharEncodingHandlers(void) { 1447: xmlCleanupEncodingAliases(); 1448: 1449: if (handlers == NULL) return; 1450: 1451: for (;nbCharEncodingHandler > 0;) { 1452: nbCharEncodingHandler--; 1453: if (handlers[nbCharEncodingHandler] != NULL) { 1454: if (handlers[nbCharEncodingHandler]->name != NULL) 1455: xmlFree(handlers[nbCharEncodingHandler]->name); 1456: xmlFree(handlers[nbCharEncodingHandler]); 1457: } 1458: } 1459: xmlFree(handlers); 1460: handlers = NULL; 1461: nbCharEncodingHandler = 0; 1462: xmlDefaultCharEncodingHandler = NULL; 1463: } 1464: 1465: /** 1466: * xmlRegisterCharEncodingHandler: 1467: * @handler: the xmlCharEncodingHandlerPtr handler block 1468: * 1469: * Register the char encoding handler, surprising, isn't it ? 1470: */ 1471: void 1472: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1473: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1474: if ((handler == NULL) || (handlers == NULL)) { 1475: xmlEncodingErr(XML_I18N_NO_HANDLER, 1476: "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1477: return; 1478: } 1479: 1480: if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1481: xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1482: "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1483: "MAX_ENCODING_HANDLERS"); 1484: return; 1485: } 1486: handlers[nbCharEncodingHandler++] = handler; 1487: } 1488: 1489: /** 1490: * xmlGetCharEncodingHandler: 1491: * @enc: an xmlCharEncoding value. 1492: * 1493: * Search in the registered set the handler able to read/write that encoding. 1494: * 1495: * Returns the handler or NULL if not found 1496: */ 1497: xmlCharEncodingHandlerPtr 1498: xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1499: xmlCharEncodingHandlerPtr handler; 1500: 1501: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1502: switch (enc) { 1503: case XML_CHAR_ENCODING_ERROR: 1504: return(NULL); 1505: case XML_CHAR_ENCODING_NONE: 1506: return(NULL); 1507: case XML_CHAR_ENCODING_UTF8: 1508: return(NULL); 1509: case XML_CHAR_ENCODING_UTF16LE: 1510: return(xmlUTF16LEHandler); 1511: case XML_CHAR_ENCODING_UTF16BE: 1512: return(xmlUTF16BEHandler); 1513: case XML_CHAR_ENCODING_EBCDIC: 1514: handler = xmlFindCharEncodingHandler("EBCDIC"); 1515: if (handler != NULL) return(handler); 1516: handler = xmlFindCharEncodingHandler("ebcdic"); 1517: if (handler != NULL) return(handler); 1518: handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1519: if (handler != NULL) return(handler); 1520: handler = xmlFindCharEncodingHandler("IBM-037"); 1521: if (handler != NULL) return(handler); 1522: break; 1523: case XML_CHAR_ENCODING_UCS4BE: 1524: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1525: if (handler != NULL) return(handler); 1526: handler = xmlFindCharEncodingHandler("UCS-4"); 1527: if (handler != NULL) return(handler); 1528: handler = xmlFindCharEncodingHandler("UCS4"); 1529: if (handler != NULL) return(handler); 1530: break; 1531: case XML_CHAR_ENCODING_UCS4LE: 1532: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1533: if (handler != NULL) return(handler); 1534: handler = xmlFindCharEncodingHandler("UCS-4"); 1535: if (handler != NULL) return(handler); 1536: handler = xmlFindCharEncodingHandler("UCS4"); 1537: if (handler != NULL) return(handler); 1538: break; 1539: case XML_CHAR_ENCODING_UCS4_2143: 1540: break; 1541: case XML_CHAR_ENCODING_UCS4_3412: 1542: break; 1543: case XML_CHAR_ENCODING_UCS2: 1544: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1545: if (handler != NULL) return(handler); 1546: handler = xmlFindCharEncodingHandler("UCS-2"); 1547: if (handler != NULL) return(handler); 1548: handler = xmlFindCharEncodingHandler("UCS2"); 1549: if (handler != NULL) return(handler); 1550: break; 1551: 1552: /* 1553: * We used to keep ISO Latin encodings native in the 1554: * generated data. This led to so many problems that 1555: * this has been removed. One can still change this 1556: * back by registering no-ops encoders for those 1557: */ 1558: case XML_CHAR_ENCODING_8859_1: 1559: handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1560: if (handler != NULL) return(handler); 1561: break; 1562: case XML_CHAR_ENCODING_8859_2: 1563: handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1564: if (handler != NULL) return(handler); 1565: break; 1566: case XML_CHAR_ENCODING_8859_3: 1567: handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1568: if (handler != NULL) return(handler); 1569: break; 1570: case XML_CHAR_ENCODING_8859_4: 1571: handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1572: if (handler != NULL) return(handler); 1573: break; 1574: case XML_CHAR_ENCODING_8859_5: 1575: handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1576: if (handler != NULL) return(handler); 1577: break; 1578: case XML_CHAR_ENCODING_8859_6: 1579: handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1580: if (handler != NULL) return(handler); 1581: break; 1582: case XML_CHAR_ENCODING_8859_7: 1583: handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1584: if (handler != NULL) return(handler); 1585: break; 1586: case XML_CHAR_ENCODING_8859_8: 1587: handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1588: if (handler != NULL) return(handler); 1589: break; 1590: case XML_CHAR_ENCODING_8859_9: 1591: handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1592: if (handler != NULL) return(handler); 1593: break; 1594: 1595: 1596: case XML_CHAR_ENCODING_2022_JP: 1597: handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1598: if (handler != NULL) return(handler); 1599: break; 1600: case XML_CHAR_ENCODING_SHIFT_JIS: 1601: handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1602: if (handler != NULL) return(handler); 1603: handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1604: if (handler != NULL) return(handler); 1605: handler = xmlFindCharEncodingHandler("Shift_JIS"); 1606: if (handler != NULL) return(handler); 1607: break; 1608: case XML_CHAR_ENCODING_EUC_JP: 1609: handler = xmlFindCharEncodingHandler("EUC-JP"); 1610: if (handler != NULL) return(handler); 1611: break; 1612: default: 1613: break; 1614: } 1615: 1616: #ifdef DEBUG_ENCODING 1617: xmlGenericError(xmlGenericErrorContext, 1618: "No handler found for encoding %d\n", enc); 1619: #endif 1620: return(NULL); 1621: } 1622: 1623: /** 1624: * xmlFindCharEncodingHandler: 1625: * @name: a string describing the char encoding. 1626: * 1627: * Search in the registered set the handler able to read/write that encoding. 1628: * 1629: * Returns the handler or NULL if not found 1630: */ 1631: xmlCharEncodingHandlerPtr 1632: xmlFindCharEncodingHandler(const char *name) { 1633: const char *nalias; 1634: const char *norig; 1635: xmlCharEncoding alias; 1636: #ifdef LIBXML_ICONV_ENABLED 1637: xmlCharEncodingHandlerPtr enc; 1638: iconv_t icv_in, icv_out; 1639: #endif /* LIBXML_ICONV_ENABLED */ 1640: #ifdef LIBXML_ICU_ENABLED 1641: xmlCharEncodingHandlerPtr encu; 1642: uconv_t *ucv_in, *ucv_out; 1643: #endif /* LIBXML_ICU_ENABLED */ 1644: char upper[100]; 1645: int i; 1646: 1647: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1648: if (name == NULL) return(xmlDefaultCharEncodingHandler); 1649: if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1650: 1651: /* 1652: * Do the alias resolution 1653: */ 1654: norig = name; 1655: nalias = xmlGetEncodingAlias(name); 1656: if (nalias != NULL) 1657: name = nalias; 1658: 1659: /* 1660: * Check first for directly registered encoding names 1661: */ 1662: for (i = 0;i < 99;i++) { 1663: upper[i] = toupper(name[i]); 1664: if (upper[i] == 0) break; 1665: } 1666: upper[i] = 0; 1667: 1668: if (handlers != NULL) { 1669: for (i = 0;i < nbCharEncodingHandler; i++) { 1670: if (!strcmp(upper, handlers[i]->name)) { 1671: #ifdef DEBUG_ENCODING 1672: xmlGenericError(xmlGenericErrorContext, 1673: "Found registered handler for encoding %s\n", name); 1674: #endif 1675: return(handlers[i]); 1676: } 1677: } 1678: } 1679: 1680: #ifdef LIBXML_ICONV_ENABLED 1681: /* check whether iconv can handle this */ 1682: icv_in = iconv_open("UTF-8", name); 1683: icv_out = iconv_open(name, "UTF-8"); 1684: if (icv_in == (iconv_t) -1) { 1685: icv_in = iconv_open("UTF-8", upper); 1686: } 1687: if (icv_out == (iconv_t) -1) { 1688: icv_out = iconv_open(upper, "UTF-8"); 1689: } 1690: if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1691: enc = (xmlCharEncodingHandlerPtr) 1692: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1693: if (enc == NULL) { 1694: iconv_close(icv_in); 1695: iconv_close(icv_out); 1696: return(NULL); 1697: } 1698: memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1699: enc->name = xmlMemStrdup(name); 1700: enc->input = NULL; 1701: enc->output = NULL; 1702: enc->iconv_in = icv_in; 1703: enc->iconv_out = icv_out; 1704: #ifdef DEBUG_ENCODING 1705: xmlGenericError(xmlGenericErrorContext, 1706: "Found iconv handler for encoding %s\n", name); 1707: #endif 1708: return enc; 1709: } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1710: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1711: "iconv : problems with filters for '%s'\n", name); 1712: } 1713: #endif /* LIBXML_ICONV_ENABLED */ 1714: #ifdef LIBXML_ICU_ENABLED 1715: /* check whether icu can handle this */ 1716: ucv_in = openIcuConverter(name, 1); 1717: ucv_out = openIcuConverter(name, 0); 1718: if (ucv_in != NULL && ucv_out != NULL) { 1719: encu = (xmlCharEncodingHandlerPtr) 1720: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1721: if (encu == NULL) { 1722: closeIcuConverter(ucv_in); 1723: closeIcuConverter(ucv_out); 1724: return(NULL); 1725: } 1726: memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1727: encu->name = xmlMemStrdup(name); 1728: encu->input = NULL; 1729: encu->output = NULL; 1730: encu->uconv_in = ucv_in; 1731: encu->uconv_out = ucv_out; 1732: #ifdef DEBUG_ENCODING 1733: xmlGenericError(xmlGenericErrorContext, 1734: "Found ICU converter handler for encoding %s\n", name); 1735: #endif 1736: return encu; 1737: } else if (ucv_in != NULL || ucv_out != NULL) { 1738: closeIcuConverter(ucv_in); 1739: closeIcuConverter(ucv_out); 1740: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1741: "ICU converter : problems with filters for '%s'\n", name); 1742: } 1743: #endif /* LIBXML_ICU_ENABLED */ 1744: 1745: #ifdef DEBUG_ENCODING 1746: xmlGenericError(xmlGenericErrorContext, 1747: "No handler found for encoding %s\n", name); 1748: #endif 1749: 1750: /* 1751: * Fallback using the canonical names 1752: */ 1753: alias = xmlParseCharEncoding(norig); 1754: if (alias != XML_CHAR_ENCODING_ERROR) { 1755: const char* canon; 1756: canon = xmlGetCharEncodingName(alias); 1757: if ((canon != NULL) && (strcmp(name, canon))) { 1758: return(xmlFindCharEncodingHandler(canon)); 1759: } 1760: } 1761: 1762: /* If "none of the above", give up */ 1763: return(NULL); 1764: } 1765: 1766: /************************************************************************ 1767: * * 1768: * ICONV based generic conversion functions * 1769: * * 1770: ************************************************************************/ 1771: 1772: #ifdef LIBXML_ICONV_ENABLED 1773: /** 1774: * xmlIconvWrapper: 1775: * @cd: iconv converter data structure 1776: * @out: a pointer to an array of bytes to store the result 1777: * @outlen: the length of @out 1778: * @in: a pointer to an array of ISO Latin 1 chars 1779: * @inlen: the length of @in 1780: * 1781: * Returns 0 if success, or 1782: * -1 by lack of space, or 1783: * -2 if the transcoding fails (for *in is not valid utf8 string or 1784: * the result of transformation can't fit into the encoding we want), or 1785: * -3 if there the last byte can't form a single output char. 1786: * 1787: * The value of @inlen after return is the number of octets consumed 1788: * as the return value is positive, else unpredictable. 1789: * The value of @outlen after return is the number of ocetes consumed. 1790: */ 1791: static int 1792: xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1793: const unsigned char *in, int *inlen) { 1794: size_t icv_inlen, icv_outlen; 1795: const char *icv_in = (const char *) in; 1796: char *icv_out = (char *) out; 1797: int ret; 1798: 1799: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1800: if (outlen != NULL) *outlen = 0; 1801: return(-1); 1802: } 1803: icv_inlen = *inlen; 1804: icv_outlen = *outlen; 1805: ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1806: *inlen -= icv_inlen; 1807: *outlen -= icv_outlen; 1808: if ((icv_inlen != 0) || (ret == -1)) { 1809: #ifdef EILSEQ 1810: if (errno == EILSEQ) { 1811: return -2; 1812: } else 1813: #endif 1814: #ifdef E2BIG 1815: if (errno == E2BIG) { 1816: return -1; 1817: } else 1818: #endif 1819: #ifdef EINVAL 1820: if (errno == EINVAL) { 1821: return -3; 1822: } else 1823: #endif 1824: { 1825: return -3; 1826: } 1827: } 1828: return 0; 1829: } 1830: #endif /* LIBXML_ICONV_ENABLED */ 1831: 1832: /************************************************************************ 1833: * * 1834: * ICU based generic conversion functions * 1835: * * 1836: ************************************************************************/ 1837: 1838: #ifdef LIBXML_ICU_ENABLED 1839: /** 1840: * xmlUconvWrapper: 1841: * @cd: ICU uconverter data structure 1842: * @toUnicode : non-zero if toUnicode. 0 otherwise. 1843: * @out: a pointer to an array of bytes to store the result 1844: * @outlen: the length of @out 1845: * @in: a pointer to an array of ISO Latin 1 chars 1846: * @inlen: the length of @in 1847: * 1848: * Returns 0 if success, or 1849: * -1 by lack of space, or 1850: * -2 if the transcoding fails (for *in is not valid utf8 string or 1851: * the result of transformation can't fit into the encoding we want), or 1852: * -3 if there the last byte can't form a single output char. 1853: * 1854: * The value of @inlen after return is the number of octets consumed 1855: * as the return value is positive, else unpredictable. 1856: * The value of @outlen after return is the number of ocetes consumed. 1857: */ 1858: static int 1859: xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1860: const unsigned char *in, int *inlen) { 1861: const char *ucv_in = (const char *) in; 1862: char *ucv_out = (char *) out; 1863: UErrorCode err = U_ZERO_ERROR; 1864: 1865: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1866: if (outlen != NULL) *outlen = 0; 1867: return(-1); 1868: } 1869: 1870: /* 1871: * TODO(jungshik) 1872: * 1. is ucnv_convert(To|From)Algorithmic better? 1873: * 2. had we better use an explicit pivot buffer? 1874: * 3. error returned comes from 'fromUnicode' only even 1875: * when toUnicode is true ! 1876: */ 1877: if (toUnicode) { 1878: /* encoding => UTF-16 => UTF-8 */ 1879: ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1880: &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1881: 0, TRUE, &err); 1882: } else { 1883: /* UTF-8 => UTF-16 => encoding */ 1884: ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1885: &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1886: 0, TRUE, &err); 1887: } 1888: *inlen = ucv_in - (const char*) in; 1889: *outlen = ucv_out - (char *) out; 1890: if (U_SUCCESS(err)) 1891: return 0; 1892: if (err == U_BUFFER_OVERFLOW_ERROR) 1893: return -1; 1894: if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1895: return -2; 1896: /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1897: return -3; 1898: } 1899: #endif /* LIBXML_ICU_ENABLED */ 1900: 1901: /************************************************************************ 1902: * * 1903: * The real API used by libxml for on-the-fly conversion * 1904: * * 1905: ************************************************************************/ 1906: 1907: /** 1908: * xmlCharEncFirstLineInt: 1909: * @handler: char enconding transformation data structure 1910: * @out: an xmlBuffer for the output. 1911: * @in: an xmlBuffer for the input 1912: * @len: number of bytes to convert for the first line, or -1 1913: * 1914: * Front-end for the encoding handler input function, but handle only 1915: * the very first line, i.e. limit itself to 45 chars. 1916: * 1917: * Returns the number of byte written if success, or 1918: * -1 general error 1919: * -2 if the transcoding fails (for *in is not valid utf8 string or 1920: * the result of transformation can't fit into the encoding we want), or 1921: */ 1922: int 1923: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1924: xmlBufferPtr in, int len) { 1925: int ret = -2; 1926: int written; 1927: int toconv; 1928: 1929: if (handler == NULL) return(-1); 1930: if (out == NULL) return(-1); 1931: if (in == NULL) return(-1); 1932: 1933: /* calculate space available */ 1934: written = out->size - out->use - 1; /* count '\0' */ 1935: toconv = in->use; 1936: /* 1937: * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1938: * 45 chars should be sufficient to reach the end of the encoding 1939: * declaration without going too far inside the document content. 1940: * on UTF-16 this means 90bytes, on UCS4 this means 180 1941: * The actual value depending on guessed encoding is passed as @len 1942: * if provided 1943: */ 1944: if (len >= 0) { 1945: if (toconv > len) 1946: toconv = len; 1947: } else { 1948: if (toconv > 180) 1949: toconv = 180; 1950: } 1951: if (toconv * 2 >= written) { 1952: xmlBufferGrow(out, toconv * 2); 1953: written = out->size - out->use - 1; 1954: } 1955: 1956: if (handler->input != NULL) { 1957: ret = handler->input(&out->content[out->use], &written, 1958: in->content, &toconv); 1959: xmlBufferShrink(in, toconv); 1960: out->use += written; 1961: out->content[out->use] = 0; 1962: } 1963: #ifdef LIBXML_ICONV_ENABLED 1964: else if (handler->iconv_in != NULL) { 1965: ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1966: &written, in->content, &toconv); 1967: xmlBufferShrink(in, toconv); 1968: out->use += written; 1969: out->content[out->use] = 0; 1970: if (ret == -1) ret = -3; 1971: } 1972: #endif /* LIBXML_ICONV_ENABLED */ 1973: #ifdef LIBXML_ICU_ENABLED 1974: else if (handler->uconv_in != NULL) { 1975: ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1976: &written, in->content, &toconv); 1977: xmlBufferShrink(in, toconv); 1978: out->use += written; 1979: out->content[out->use] = 0; 1980: if (ret == -1) ret = -3; 1981: } 1982: #endif /* LIBXML_ICU_ENABLED */ 1983: #ifdef DEBUG_ENCODING 1984: switch (ret) { 1985: case 0: 1986: xmlGenericError(xmlGenericErrorContext, 1987: "converted %d bytes to %d bytes of input\n", 1988: toconv, written); 1989: break; 1990: case -1: 1991: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1992: toconv, written, in->use); 1993: break; 1994: case -2: 1995: xmlGenericError(xmlGenericErrorContext, 1996: "input conversion failed due to input error\n"); 1997: break; 1998: case -3: 1999: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2000: toconv, written, in->use); 2001: break; 2002: default: 2003: xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2004: } 2005: #endif /* DEBUG_ENCODING */ 2006: /* 2007: * Ignore when input buffer is not on a boundary 2008: */ 2009: if (ret == -3) ret = 0; 2010: if (ret == -1) ret = 0; 2011: return(ret); 2012: } 2013: 2014: /** 2015: * xmlCharEncFirstLine: 2016: * @handler: char enconding transformation data structure 2017: * @out: an xmlBuffer for the output. 2018: * @in: an xmlBuffer for the input 2019: * 2020: * Front-end for the encoding handler input function, but handle only 2021: * the very first line, i.e. limit itself to 45 chars. 2022: * 2023: * Returns the number of byte written if success, or 2024: * -1 general error 2025: * -2 if the transcoding fails (for *in is not valid utf8 string or 2026: * the result of transformation can't fit into the encoding we want), or 2027: */ 2028: int 2029: xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2030: xmlBufferPtr in) { 2031: return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2032: } 2033: 2034: /** 2035: * xmlCharEncFirstLineInput: 2036: * @input: a parser input buffer 2037: * @len: number of bytes to convert for the first line, or -1 2038: * 2039: * Front-end for the encoding handler input function, but handle only 2040: * the very first line. Point is that this is based on autodetection 2041: * of the encoding and once that first line is converted we may find 2042: * out that a different decoder is needed to process the input. 2043: * 2044: * Returns the number of byte written if success, or 2045: * -1 general error 2046: * -2 if the transcoding fails (for *in is not valid utf8 string or 2047: * the result of transformation can't fit into the encoding we want), or 2048: */ 2049: int 2050: xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2051: { 2052: int ret = -2; 2053: size_t written; 2054: size_t toconv; 2055: int c_in; 2056: int c_out; 2057: xmlBufPtr in; 2058: xmlBufPtr out; 2059: 2060: if ((input == NULL) || (input->encoder == NULL) || 2061: (input->buffer == NULL) || (input->raw == NULL)) 2062: return (-1); 2063: out = input->buffer; 2064: in = input->raw; 2065: 2066: toconv = xmlBufUse(in); 2067: if (toconv == 0) 2068: return (0); 2069: written = xmlBufAvail(out) - 1; /* count '\0' */ 2070: /* 2071: * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2072: * 45 chars should be sufficient to reach the end of the encoding 2073: * declaration without going too far inside the document content. 2074: * on UTF-16 this means 90bytes, on UCS4 this means 180 2075: * The actual value depending on guessed encoding is passed as @len 2076: * if provided 2077: */ 2078: if (len >= 0) { 2079: if (toconv > (unsigned int) len) 2080: toconv = len; 2081: } else { 2082: if (toconv > 180) 2083: toconv = 180; 2084: } 2085: if (toconv * 2 >= written) { 2086: xmlBufGrow(out, toconv * 2); 2087: written = xmlBufAvail(out) - 1; 2088: } 2089: if (written > 360) 2090: written = 360; 2091: 2092: c_in = toconv; 2093: c_out = written; 2094: if (input->encoder->input != NULL) { 2095: ret = input->encoder->input(xmlBufEnd(out), &c_out, 2096: xmlBufContent(in), &c_in); 2097: xmlBufShrink(in, c_in); 2098: xmlBufAddLen(out, c_out); 2099: } 2100: #ifdef LIBXML_ICONV_ENABLED 2101: else if (input->encoder->iconv_in != NULL) { 2102: ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2103: &c_out, xmlBufContent(in), &c_in); 2104: xmlBufShrink(in, c_in); 2105: xmlBufAddLen(out, c_out); 2106: if (ret == -1) 2107: ret = -3; 2108: } 2109: #endif /* LIBXML_ICONV_ENABLED */ 2110: #ifdef LIBXML_ICU_ENABLED 2111: else if (input->encoder->uconv_in != NULL) { 2112: ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2113: &c_out, xmlBufContent(in), &c_in); 2114: xmlBufShrink(in, c_in); 2115: xmlBufAddLen(out, c_out); 2116: if (ret == -1) 2117: ret = -3; 2118: } 2119: #endif /* LIBXML_ICU_ENABLED */ 2120: switch (ret) { 2121: case 0: 2122: #ifdef DEBUG_ENCODING 2123: xmlGenericError(xmlGenericErrorContext, 2124: "converted %d bytes to %d bytes of input\n", 2125: c_in, c_out); 2126: #endif 2127: break; 2128: case -1: 2129: #ifdef DEBUG_ENCODING 2130: xmlGenericError(xmlGenericErrorContext, 2131: "converted %d bytes to %d bytes of input, %d left\n", 2132: c_in, c_out, (int)xmlBufUse(in)); 2133: #endif 2134: break; 2135: case -3: 2136: #ifdef DEBUG_ENCODING 2137: xmlGenericError(xmlGenericErrorContext, 2138: "converted %d bytes to %d bytes of input, %d left\n", 2139: c_in, c_out, (int)xmlBufUse(in)); 2140: #endif 2141: break; 2142: case -2: { 2143: char buf[50]; 2144: const xmlChar *content = xmlBufContent(in); 2145: 2146: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2147: content[0], content[1], 2148: content[2], content[3]); 2149: buf[49] = 0; 2150: xmlEncodingErr(XML_I18N_CONV_FAILED, 2151: "input conversion failed due to input error, bytes %s\n", 2152: buf); 2153: } 2154: } 2155: /* 2156: * Ignore when input buffer is not on a boundary 2157: */ 2158: if (ret == -3) ret = 0; 2159: if (ret == -1) ret = 0; 2160: return(ret); 2161: } 2162: 2163: /** 2164: * xmlCharEncInput: 2165: * @input: a parser input buffer 2166: * @flush: try to flush all the raw buffer 2167: * 2168: * Generic front-end for the encoding handler on parser input 2169: * 2170: * Returns the number of byte written if success, or 2171: * -1 general error 2172: * -2 if the transcoding fails (for *in is not valid utf8 string or 2173: * the result of transformation can't fit into the encoding we want), or 2174: */ 2175: int 2176: xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2177: { 2178: int ret = -2; 2179: size_t written; 2180: size_t toconv; 2181: int c_in; 2182: int c_out; 2183: xmlBufPtr in; 2184: xmlBufPtr out; 2185: 2186: if ((input == NULL) || (input->encoder == NULL) || 2187: (input->buffer == NULL) || (input->raw == NULL)) 2188: return (-1); 2189: out = input->buffer; 2190: in = input->raw; 2191: 2192: toconv = xmlBufUse(in); 2193: if (toconv == 0) 2194: return (0); 2195: if ((toconv > 64 * 1024) && (flush == 0)) 2196: toconv = 64 * 1024; 2197: written = xmlBufAvail(out); 2198: if (written > 0) 2199: written--; /* count '\0' */ 2200: if (toconv * 2 >= written) { 2201: xmlBufGrow(out, toconv * 2); 2202: written = xmlBufAvail(out); 2203: if (written > 0) 2204: written--; /* count '\0' */ 2205: } 2206: if ((written > 128 * 1024) && (flush == 0)) 2207: written = 128 * 1024; 2208: 2209: c_in = toconv; 2210: c_out = written; 2211: if (input->encoder->input != NULL) { 2212: ret = input->encoder->input(xmlBufEnd(out), &c_out, 2213: xmlBufContent(in), &c_in); 2214: xmlBufShrink(in, c_in); 2215: xmlBufAddLen(out, c_out); 2216: } 2217: #ifdef LIBXML_ICONV_ENABLED 2218: else if (input->encoder->iconv_in != NULL) { 2219: ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2220: &c_out, xmlBufContent(in), &c_in); 2221: xmlBufShrink(in, c_in); 2222: xmlBufAddLen(out, c_out); 2223: if (ret == -1) 2224: ret = -3; 2225: } 2226: #endif /* LIBXML_ICONV_ENABLED */ 2227: #ifdef LIBXML_ICU_ENABLED 2228: else if (input->encoder->uconv_in != NULL) { 2229: ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2230: &c_out, xmlBufContent(in), &c_in); 2231: xmlBufShrink(in, c_in); 2232: xmlBufAddLen(out, c_out); 2233: if (ret == -1) 2234: ret = -3; 2235: } 2236: #endif /* LIBXML_ICU_ENABLED */ 2237: switch (ret) { 2238: case 0: 2239: #ifdef DEBUG_ENCODING 2240: xmlGenericError(xmlGenericErrorContext, 2241: "converted %d bytes to %d bytes of input\n", 2242: c_in, c_out); 2243: #endif 2244: break; 2245: case -1: 2246: #ifdef DEBUG_ENCODING 2247: xmlGenericError(xmlGenericErrorContext, 2248: "converted %d bytes to %d bytes of input, %d left\n", 2249: c_in, c_out, (int)xmlBufUse(in)); 2250: #endif 2251: break; 2252: case -3: 2253: #ifdef DEBUG_ENCODING 2254: xmlGenericError(xmlGenericErrorContext, 2255: "converted %d bytes to %d bytes of input, %d left\n", 2256: c_in, c_out, (int)xmlBufUse(in)); 2257: #endif 2258: break; 2259: case -2: { 2260: char buf[50]; 2261: const xmlChar *content = xmlBufContent(in); 2262: 2263: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2264: content[0], content[1], 2265: content[2], content[3]); 2266: buf[49] = 0; 2267: xmlEncodingErr(XML_I18N_CONV_FAILED, 2268: "input conversion failed due to input error, bytes %s\n", 2269: buf); 2270: } 2271: } 2272: /* 2273: * Ignore when input buffer is not on a boundary 2274: */ 2275: if (ret == -3) 2276: ret = 0; 2277: return (c_out? c_out : ret); 2278: } 2279: 2280: /** 2281: * xmlCharEncInFunc: 2282: * @handler: char encoding transformation data structure 2283: * @out: an xmlBuffer for the output. 2284: * @in: an xmlBuffer for the input 2285: * 2286: * Generic front-end for the encoding handler input function 2287: * 2288: * Returns the number of byte written if success, or 2289: * -1 general error 2290: * -2 if the transcoding fails (for *in is not valid utf8 string or 2291: * the result of transformation can't fit into the encoding we want), or 2292: */ 2293: int 2294: xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2295: xmlBufferPtr in) 2296: { 2297: int ret = -2; 2298: int written; 2299: int toconv; 2300: 2301: if (handler == NULL) 2302: return (-1); 2303: if (out == NULL) 2304: return (-1); 2305: if (in == NULL) 2306: return (-1); 2307: 2308: toconv = in->use; 2309: if (toconv == 0) 2310: return (0); 2311: written = out->size - out->use -1; /* count '\0' */ 2312: if (toconv * 2 >= written) { 2313: xmlBufferGrow(out, out->size + toconv * 2); 2314: written = out->size - out->use - 1; 2315: } 2316: if (handler->input != NULL) { 2317: ret = handler->input(&out->content[out->use], &written, 2318: in->content, &toconv); 2319: xmlBufferShrink(in, toconv); 2320: out->use += written; 2321: out->content[out->use] = 0; 2322: } 2323: #ifdef LIBXML_ICONV_ENABLED 2324: else if (handler->iconv_in != NULL) { 2325: ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2326: &written, in->content, &toconv); 2327: xmlBufferShrink(in, toconv); 2328: out->use += written; 2329: out->content[out->use] = 0; 2330: if (ret == -1) 2331: ret = -3; 2332: } 2333: #endif /* LIBXML_ICONV_ENABLED */ 2334: #ifdef LIBXML_ICU_ENABLED 2335: else if (handler->uconv_in != NULL) { 2336: ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2337: &written, in->content, &toconv); 2338: xmlBufferShrink(in, toconv); 2339: out->use += written; 2340: out->content[out->use] = 0; 2341: if (ret == -1) 2342: ret = -3; 2343: } 2344: #endif /* LIBXML_ICU_ENABLED */ 2345: switch (ret) { 2346: case 0: 2347: #ifdef DEBUG_ENCODING 2348: xmlGenericError(xmlGenericErrorContext, 2349: "converted %d bytes to %d bytes of input\n", 2350: toconv, written); 2351: #endif 2352: break; 2353: case -1: 2354: #ifdef DEBUG_ENCODING 2355: xmlGenericError(xmlGenericErrorContext, 2356: "converted %d bytes to %d bytes of input, %d left\n", 2357: toconv, written, in->use); 2358: #endif 2359: break; 2360: case -3: 2361: #ifdef DEBUG_ENCODING 2362: xmlGenericError(xmlGenericErrorContext, 2363: "converted %d bytes to %d bytes of input, %d left\n", 2364: toconv, written, in->use); 2365: #endif 2366: break; 2367: case -2: { 2368: char buf[50]; 2369: 2370: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2371: in->content[0], in->content[1], 2372: in->content[2], in->content[3]); 2373: buf[49] = 0; 2374: xmlEncodingErr(XML_I18N_CONV_FAILED, 2375: "input conversion failed due to input error, bytes %s\n", 2376: buf); 2377: } 2378: } 2379: /* 2380: * Ignore when input buffer is not on a boundary 2381: */ 2382: if (ret == -3) 2383: ret = 0; 2384: return (written? written : ret); 2385: } 2386: 2387: /** 2388: * xmlCharEncOutput: 2389: * @output: a parser output buffer 2390: * @init: is this an initialization call without data 2391: * 2392: * Generic front-end for the encoding handler on parser output 2393: * a first call with @init == 1 has to be made first to initiate the 2394: * output in case of non-stateless encoding needing to initiate their 2395: * state or the output (like the BOM in UTF16). 2396: * In case of UTF8 sequence conversion errors for the given encoder, 2397: * the content will be automatically remapped to a CharRef sequence. 2398: * 2399: * Returns the number of byte written if success, or 2400: * -1 general error 2401: * -2 if the transcoding fails (for *in is not valid utf8 string or 2402: * the result of transformation can't fit into the encoding we want), or 2403: */ 2404: int 2405: xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2406: { 2407: int ret = -2; 2408: size_t written; 2409: size_t writtentot = 0; 2410: size_t toconv; 2411: int c_in; 2412: int c_out; 2413: xmlBufPtr in; 2414: xmlBufPtr out; 2415: int charref_len = 0; 2416: 2417: if ((output == NULL) || (output->encoder == NULL) || 2418: (output->buffer == NULL) || (output->conv == NULL)) 2419: return (-1); 2420: out = output->conv; 2421: in = output->buffer; 2422: 2423: retry: 2424: 2425: written = xmlBufAvail(out); 2426: if (written > 0) 2427: written--; /* count '\0' */ 2428: 2429: /* 2430: * First specific handling of the initialization call 2431: */ 2432: if (init) { 2433: c_in = 0; 2434: c_out = written; 2435: if (output->encoder->output != NULL) { 2436: ret = output->encoder->output(xmlBufEnd(out), &c_out, 2437: NULL, &c_in); 2438: if (ret > 0) /* Gennady: check return value */ 2439: xmlBufAddLen(out, c_out); 2440: } 2441: #ifdef LIBXML_ICONV_ENABLED 2442: else if (output->encoder->iconv_out != NULL) { 2443: ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2444: &c_out, NULL, &c_in); 2445: xmlBufAddLen(out, c_out); 2446: } 2447: #endif /* LIBXML_ICONV_ENABLED */ 2448: #ifdef LIBXML_ICU_ENABLED 2449: else if (output->encoder->uconv_out != NULL) { 2450: ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2451: &c_out, NULL, &c_in); 2452: xmlBufAddLen(out, c_out); 2453: } 2454: #endif /* LIBXML_ICU_ENABLED */ 2455: #ifdef DEBUG_ENCODING 2456: xmlGenericError(xmlGenericErrorContext, 2457: "initialized encoder\n"); 2458: #endif 2459: return(0); 2460: } 2461: 2462: /* 2463: * Conversion itself. 2464: */ 2465: toconv = xmlBufUse(in); 2466: if (toconv == 0) 2467: return (0); 2468: if (toconv > 64 * 1024) 2469: toconv = 64 * 1024; 2470: if (toconv * 4 >= written) { 2471: xmlBufGrow(out, toconv * 4); 2472: written = xmlBufAvail(out) - 1; 2473: } 2474: if (written > 256 * 1024) 2475: written = 256 * 1024; 2476: 2477: c_in = toconv; 2478: c_out = written; 2479: if (output->encoder->output != NULL) { 2480: ret = output->encoder->output(xmlBufEnd(out), &c_out, 2481: xmlBufContent(in), &c_in); 2482: if (c_out > 0) { 2483: xmlBufShrink(in, c_in); 2484: xmlBufAddLen(out, c_out); 2485: writtentot += c_out; 2486: } 2487: } 2488: #ifdef LIBXML_ICONV_ENABLED 2489: else if (output->encoder->iconv_out != NULL) { 2490: ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2491: &c_out, xmlBufContent(in), &c_in); 2492: xmlBufShrink(in, c_in); 2493: xmlBufAddLen(out, c_out); 2494: writtentot += c_out; 2495: if (ret == -1) { 2496: if (c_out > 0) { 2497: /* 2498: * Can be a limitation of iconv 2499: */ 2500: charref_len = 0; 2501: goto retry; 2502: } 2503: ret = -3; 2504: } 2505: } 2506: #endif /* LIBXML_ICONV_ENABLED */ 2507: #ifdef LIBXML_ICU_ENABLED 2508: else if (output->encoder->uconv_out != NULL) { 2509: ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2510: &c_out, xmlBufContent(in), &c_in); 2511: xmlBufShrink(in, c_in); 2512: xmlBufAddLen(out, c_out); 2513: writtentot += c_out; 2514: if (ret == -1) { 2515: if (c_out > 0) { 2516: /* 2517: * Can be a limitation of uconv 2518: */ 2519: charref_len = 0; 2520: goto retry; 2521: } 2522: ret = -3; 2523: } 2524: } 2525: #endif /* LIBXML_ICU_ENABLED */ 2526: else { 2527: xmlEncodingErr(XML_I18N_NO_OUTPUT, 2528: "xmlCharEncOutFunc: no output function !\n", NULL); 2529: return(-1); 2530: } 2531: 2532: if (ret >= 0) output += ret; 2533: 2534: /* 2535: * Attempt to handle error cases 2536: */ 2537: switch (ret) { 2538: case 0: 2539: #ifdef DEBUG_ENCODING 2540: xmlGenericError(xmlGenericErrorContext, 2541: "converted %d bytes to %d bytes of output\n", 2542: c_in, c_out); 2543: #endif 2544: break; 2545: case -1: 2546: #ifdef DEBUG_ENCODING 2547: xmlGenericError(xmlGenericErrorContext, 2548: "output conversion failed by lack of space\n"); 2549: #endif 2550: break; 2551: case -3: 2552: #ifdef DEBUG_ENCODING 2553: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2554: c_in, c_out, (int) xmlBufUse(in)); 2555: #endif 2556: break; 2557: case -2: { 2558: int len = (int) xmlBufUse(in); 2559: xmlChar *content = xmlBufContent(in); 2560: int cur; 2561: 2562: cur = xmlGetUTF8Char(content, &len); 2563: if ((charref_len != 0) && (c_out < charref_len)) { 2564: /* 2565: * We attempted to insert a character reference and failed. 2566: * Undo what was written and skip the remaining charref. 2567: */ 2568: xmlBufErase(out, c_out); 2569: writtentot -= c_out; 2570: xmlBufShrink(in, charref_len - c_out); 2571: charref_len = 0; 2572: 2573: ret = -1; 2574: break; 2575: } else if (cur > 0) { 2576: xmlChar charref[20]; 2577: 2578: #ifdef DEBUG_ENCODING 2579: xmlGenericError(xmlGenericErrorContext, 2580: "handling output conversion error\n"); 2581: xmlGenericError(xmlGenericErrorContext, 2582: "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2583: content[0], content[1], 2584: content[2], content[3]); 2585: #endif 2586: /* 2587: * Removes the UTF8 sequence, and replace it by a charref 2588: * and continue the transcoding phase, hoping the error 2589: * did not mangle the encoder state. 2590: */ 2591: charref_len = snprintf((char *) &charref[0], sizeof(charref), 2592: "&#%d;", cur); 2593: xmlBufShrink(in, len); 2594: xmlBufAddHead(in, charref, -1); 2595: 2596: goto retry; 2597: } else { 2598: char buf[50]; 2599: 2600: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2601: content[0], content[1], 2602: content[2], content[3]); 2603: buf[49] = 0; 2604: xmlEncodingErr(XML_I18N_CONV_FAILED, 2605: "output conversion failed due to conv error, bytes %s\n", 2606: buf); 2607: if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2608: content[0] = ' '; 2609: } 2610: break; 2611: } 2612: } 2613: return(ret); 2614: } 2615: 2616: /** 2617: * xmlCharEncOutFunc: 2618: * @handler: char enconding transformation data structure 2619: * @out: an xmlBuffer for the output. 2620: * @in: an xmlBuffer for the input 2621: * 2622: * Generic front-end for the encoding handler output function 2623: * a first call with @in == NULL has to be made firs to initiate the 2624: * output in case of non-stateless encoding needing to initiate their 2625: * state or the output (like the BOM in UTF16). 2626: * In case of UTF8 sequence conversion errors for the given encoder, 2627: * the content will be automatically remapped to a CharRef sequence. 2628: * 2629: * Returns the number of byte written if success, or 2630: * -1 general error 2631: * -2 if the transcoding fails (for *in is not valid utf8 string or 2632: * the result of transformation can't fit into the encoding we want), or 2633: */ 2634: int 2635: xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2636: xmlBufferPtr in) { 2637: int ret = -2; 2638: int written; 2639: int writtentot = 0; 2640: int toconv; 2641: int output = 0; 2642: int charref_len = 0; 2643: 2644: if (handler == NULL) return(-1); 2645: if (out == NULL) return(-1); 2646: 2647: retry: 2648: 2649: written = out->size - out->use; 2650: 2651: if (written > 0) 2652: written--; /* Gennady: count '/0' */ 2653: 2654: /* 2655: * First specific handling of in = NULL, i.e. the initialization call 2656: */ 2657: if (in == NULL) { 2658: toconv = 0; 2659: if (handler->output != NULL) { 2660: ret = handler->output(&out->content[out->use], &written, 2661: NULL, &toconv); 2662: if (ret >= 0) { /* Gennady: check return value */ 2663: out->use += written; 2664: out->content[out->use] = 0; 2665: } 2666: } 2667: #ifdef LIBXML_ICONV_ENABLED 2668: else if (handler->iconv_out != NULL) { 2669: ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2670: &written, NULL, &toconv); 2671: out->use += written; 2672: out->content[out->use] = 0; 2673: } 2674: #endif /* LIBXML_ICONV_ENABLED */ 2675: #ifdef LIBXML_ICU_ENABLED 2676: else if (handler->uconv_out != NULL) { 2677: ret = xmlUconvWrapper(handler->uconv_out, 0, 2678: &out->content[out->use], 2679: &written, NULL, &toconv); 2680: out->use += written; 2681: out->content[out->use] = 0; 2682: } 2683: #endif /* LIBXML_ICU_ENABLED */ 2684: #ifdef DEBUG_ENCODING 2685: xmlGenericError(xmlGenericErrorContext, 2686: "initialized encoder\n"); 2687: #endif 2688: return(0); 2689: } 2690: 2691: /* 2692: * Conversion itself. 2693: */ 2694: toconv = in->use; 2695: if (toconv == 0) 2696: return(0); 2697: if (toconv * 4 >= written) { 2698: xmlBufferGrow(out, toconv * 4); 2699: written = out->size - out->use - 1; 2700: } 2701: if (handler->output != NULL) { 2702: ret = handler->output(&out->content[out->use], &written, 2703: in->content, &toconv); 2704: if (written > 0) { 2705: xmlBufferShrink(in, toconv); 2706: out->use += written; 2707: writtentot += written; 2708: } 2709: out->content[out->use] = 0; 2710: } 2711: #ifdef LIBXML_ICONV_ENABLED 2712: else if (handler->iconv_out != NULL) { 2713: ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2714: &written, in->content, &toconv); 2715: xmlBufferShrink(in, toconv); 2716: out->use += written; 2717: writtentot += written; 2718: out->content[out->use] = 0; 2719: if (ret == -1) { 2720: if (written > 0) { 2721: /* 2722: * Can be a limitation of iconv 2723: */ 2724: charref_len = 0; 2725: goto retry; 2726: } 2727: ret = -3; 2728: } 2729: } 2730: #endif /* LIBXML_ICONV_ENABLED */ 2731: #ifdef LIBXML_ICU_ENABLED 2732: else if (handler->uconv_out != NULL) { 2733: ret = xmlUconvWrapper(handler->uconv_out, 0, 2734: &out->content[out->use], 2735: &written, in->content, &toconv); 2736: xmlBufferShrink(in, toconv); 2737: out->use += written; 2738: writtentot += written; 2739: out->content[out->use] = 0; 2740: if (ret == -1) { 2741: if (written > 0) { 2742: /* 2743: * Can be a limitation of iconv 2744: */ 2745: charref_len = 0; 2746: goto retry; 2747: } 2748: ret = -3; 2749: } 2750: } 2751: #endif /* LIBXML_ICU_ENABLED */ 2752: else { 2753: xmlEncodingErr(XML_I18N_NO_OUTPUT, 2754: "xmlCharEncOutFunc: no output function !\n", NULL); 2755: return(-1); 2756: } 2757: 2758: if (ret >= 0) output += ret; 2759: 2760: /* 2761: * Attempt to handle error cases 2762: */ 2763: switch (ret) { 2764: case 0: 2765: #ifdef DEBUG_ENCODING 2766: xmlGenericError(xmlGenericErrorContext, 2767: "converted %d bytes to %d bytes of output\n", 2768: toconv, written); 2769: #endif 2770: break; 2771: case -1: 2772: #ifdef DEBUG_ENCODING 2773: xmlGenericError(xmlGenericErrorContext, 2774: "output conversion failed by lack of space\n"); 2775: #endif 2776: break; 2777: case -3: 2778: #ifdef DEBUG_ENCODING 2779: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2780: toconv, written, in->use); 2781: #endif 2782: break; 2783: case -2: { 2784: int len = in->use; 2785: const xmlChar *utf = (const xmlChar *) in->content; 2786: int cur; 2787: 2788: cur = xmlGetUTF8Char(utf, &len); 2789: if ((charref_len != 0) && (written < charref_len)) { 2790: /* 2791: * We attempted to insert a character reference and failed. 2792: * Undo what was written and skip the remaining charref. 2793: */ 2794: out->use -= written; 2795: writtentot -= written; 2796: xmlBufferShrink(in, charref_len - written); 2797: charref_len = 0; 2798: 2799: ret = -1; 2800: break; 2801: } else if (cur > 0) { 2802: xmlChar charref[20]; 2803: 2804: #ifdef DEBUG_ENCODING 2805: xmlGenericError(xmlGenericErrorContext, 2806: "handling output conversion error\n"); 2807: xmlGenericError(xmlGenericErrorContext, 2808: "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2809: in->content[0], in->content[1], 2810: in->content[2], in->content[3]); 2811: #endif 2812: /* 2813: * Removes the UTF8 sequence, and replace it by a charref 2814: * and continue the transcoding phase, hoping the error 2815: * did not mangle the encoder state. 2816: */ 2817: charref_len = snprintf((char *) &charref[0], sizeof(charref), 2818: "&#%d;", cur); 2819: xmlBufferShrink(in, len); 2820: xmlBufferAddHead(in, charref, -1); 2821: 2822: goto retry; 2823: } else { 2824: char buf[50]; 2825: 2826: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2827: in->content[0], in->content[1], 2828: in->content[2], in->content[3]); 2829: buf[49] = 0; 2830: xmlEncodingErr(XML_I18N_CONV_FAILED, 2831: "output conversion failed due to conv error, bytes %s\n", 2832: buf); 2833: if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2834: in->content[0] = ' '; 2835: } 2836: break; 2837: } 2838: } 2839: return(ret); 2840: } 2841: 2842: /** 2843: * xmlCharEncCloseFunc: 2844: * @handler: char enconding transformation data structure 2845: * 2846: * Generic front-end for encoding handler close function 2847: * 2848: * Returns 0 if success, or -1 in case of error 2849: */ 2850: int 2851: xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2852: int ret = 0; 2853: int tofree = 0; 2854: if (handler == NULL) return(-1); 2855: if (handler->name == NULL) return(-1); 2856: #ifdef LIBXML_ICONV_ENABLED 2857: /* 2858: * Iconv handlers can be used only once, free the whole block. 2859: * and the associated icon resources. 2860: */ 2861: if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2862: tofree = 1; 2863: if (handler->iconv_out != NULL) { 2864: if (iconv_close(handler->iconv_out)) 2865: ret = -1; 2866: handler->iconv_out = NULL; 2867: } 2868: if (handler->iconv_in != NULL) { 2869: if (iconv_close(handler->iconv_in)) 2870: ret = -1; 2871: handler->iconv_in = NULL; 2872: } 2873: } 2874: #endif /* LIBXML_ICONV_ENABLED */ 2875: #ifdef LIBXML_ICU_ENABLED 2876: if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { 2877: tofree = 1; 2878: if (handler->uconv_out != NULL) { 2879: closeIcuConverter(handler->uconv_out); 2880: handler->uconv_out = NULL; 2881: } 2882: if (handler->uconv_in != NULL) { 2883: closeIcuConverter(handler->uconv_in); 2884: handler->uconv_in = NULL; 2885: } 2886: } 2887: #endif 2888: if (tofree) { 2889: /* free up only dynamic handlers iconv/uconv */ 2890: if (handler->name != NULL) 2891: xmlFree(handler->name); 2892: handler->name = NULL; 2893: xmlFree(handler); 2894: } 2895: #ifdef DEBUG_ENCODING 2896: if (ret) 2897: xmlGenericError(xmlGenericErrorContext, 2898: "failed to close the encoding handler\n"); 2899: else 2900: xmlGenericError(xmlGenericErrorContext, 2901: "closed the encoding handler\n"); 2902: #endif 2903: 2904: return(ret); 2905: } 2906: 2907: /** 2908: * xmlByteConsumed: 2909: * @ctxt: an XML parser context 2910: * 2911: * This function provides the current index of the parser relative 2912: * to the start of the current entity. This function is computed in 2913: * bytes from the beginning starting at zero and finishing at the 2914: * size in byte of the file if parsing a file. The function is 2915: * of constant cost if the input is UTF-8 but can be costly if run 2916: * on non-UTF-8 input. 2917: * 2918: * Returns the index in bytes from the beginning of the entity or -1 2919: * in case the index could not be computed. 2920: */ 2921: long 2922: xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2923: xmlParserInputPtr in; 2924: 2925: if (ctxt == NULL) return(-1); 2926: in = ctxt->input; 2927: if (in == NULL) return(-1); 2928: if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2929: unsigned int unused = 0; 2930: xmlCharEncodingHandler * handler = in->buf->encoder; 2931: /* 2932: * Encoding conversion, compute the number of unused original 2933: * bytes from the input not consumed and substract that from 2934: * the raw consumed value, this is not a cheap operation 2935: */ 2936: if (in->end - in->cur > 0) { 2937: unsigned char convbuf[32000]; 2938: const unsigned char *cur = (const unsigned char *)in->cur; 2939: int toconv = in->end - in->cur, written = 32000; 2940: 2941: int ret; 2942: 2943: if (handler->output != NULL) { 2944: do { 2945: toconv = in->end - cur; 2946: written = 32000; 2947: ret = handler->output(&convbuf[0], &written, 2948: cur, &toconv); 2949: if (ret == -1) return(-1); 2950: unused += written; 2951: cur += toconv; 2952: } while (ret == -2); 2953: #ifdef LIBXML_ICONV_ENABLED 2954: } else if (handler->iconv_out != NULL) { 2955: do { 2956: toconv = in->end - cur; 2957: written = 32000; 2958: ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2959: &written, cur, &toconv); 2960: if (ret < 0) { 2961: if (written > 0) 2962: ret = -2; 2963: else 2964: return(-1); 2965: } 2966: unused += written; 2967: cur += toconv; 2968: } while (ret == -2); 2969: #endif 2970: #ifdef LIBXML_ICU_ENABLED 2971: } else if (handler->uconv_out != NULL) { 2972: do { 2973: toconv = in->end - cur; 2974: written = 32000; 2975: ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 2976: &written, cur, &toconv); 2977: if (ret < 0) { 2978: if (written > 0) 2979: ret = -2; 2980: else 2981: return(-1); 2982: } 2983: unused += written; 2984: cur += toconv; 2985: } while (ret == -2); 2986: #endif 2987: } else { 2988: /* could not find a converter */ 2989: return(-1); 2990: } 2991: } 2992: if (in->buf->rawconsumed < unused) 2993: return(-1); 2994: return(in->buf->rawconsumed - unused); 2995: } 2996: return(in->consumed + (in->cur - in->base)); 2997: } 2998: 2999: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 3000: #ifdef LIBXML_ISO8859X_ENABLED 3001: 3002: /** 3003: * UTF8ToISO8859x: 3004: * @out: a pointer to an array of bytes to store the result 3005: * @outlen: the length of @out 3006: * @in: a pointer to an array of UTF-8 chars 3007: * @inlen: the length of @in 3008: * @xlattable: the 2-level transcoding table 3009: * 3010: * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 3011: * block of chars out. 3012: * 3013: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 3014: * The value of @inlen after return is the number of octets consumed 3015: * as the return value is positive, else unpredictable. 3016: * The value of @outlen after return is the number of ocetes consumed. 3017: */ 3018: static int 3019: UTF8ToISO8859x(unsigned char* out, int *outlen, 3020: const unsigned char* in, int *inlen, 3021: unsigned char const *xlattable) { 3022: const unsigned char* outstart = out; 3023: const unsigned char* inend; 3024: const unsigned char* instart = in; 3025: const unsigned char* processed = in; 3026: 3027: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3028: (xlattable == NULL)) 3029: return(-1); 3030: if (in == NULL) { 3031: /* 3032: * initialization nothing to do 3033: */ 3034: *outlen = 0; 3035: *inlen = 0; 3036: return(0); 3037: } 3038: inend = in + (*inlen); 3039: while (in < inend) { 3040: unsigned char d = *in++; 3041: if (d < 0x80) { 3042: *out++ = d; 3043: } else if (d < 0xC0) { 3044: /* trailing byte in leading position */ 3045: *outlen = out - outstart; 3046: *inlen = processed - instart; 3047: return(-2); 3048: } else if (d < 0xE0) { 3049: unsigned char c; 3050: if (!(in < inend)) { 3051: /* trailing byte not in input buffer */ 3052: *outlen = out - outstart; 3053: *inlen = processed - instart; 3054: return(-3); 3055: } 3056: c = *in++; 3057: if ((c & 0xC0) != 0x80) { 3058: /* not a trailing byte */ 3059: *outlen = out - outstart; 3060: *inlen = processed - instart; 3061: return(-2); 3062: } 3063: c = c & 0x3F; 3064: d = d & 0x1F; 3065: d = xlattable [48 + c + xlattable [d] * 64]; 3066: if (d == 0) { 3067: /* not in character set */ 3068: *outlen = out - outstart; 3069: *inlen = processed - instart; 3070: return(-2); 3071: } 3072: *out++ = d; 3073: } else if (d < 0xF0) { 3074: unsigned char c1; 3075: unsigned char c2; 3076: if (!(in < inend - 1)) { 3077: /* trailing bytes not in input buffer */ 3078: *outlen = out - outstart; 3079: *inlen = processed - instart; 3080: return(-3); 3081: } 3082: c1 = *in++; 3083: if ((c1 & 0xC0) != 0x80) { 3084: /* not a trailing byte (c1) */ 3085: *outlen = out - outstart; 3086: *inlen = processed - instart; 3087: return(-2); 3088: } 3089: c2 = *in++; 3090: if ((c2 & 0xC0) != 0x80) { 3091: /* not a trailing byte (c2) */ 3092: *outlen = out - outstart; 3093: *inlen = processed - instart; 3094: return(-2); 3095: } 3096: c1 = c1 & 0x3F; 3097: c2 = c2 & 0x3F; 3098: d = d & 0x0F; 3099: d = xlattable [48 + c2 + xlattable [48 + c1 + 3100: xlattable [32 + d] * 64] * 64]; 3101: if (d == 0) { 3102: /* not in character set */ 3103: *outlen = out - outstart; 3104: *inlen = processed - instart; 3105: return(-2); 3106: } 3107: *out++ = d; 3108: } else { 3109: /* cannot transcode >= U+010000 */ 3110: *outlen = out - outstart; 3111: *inlen = processed - instart; 3112: return(-2); 3113: } 3114: processed = in; 3115: } 3116: *outlen = out - outstart; 3117: *inlen = processed - instart; 3118: return(*outlen); 3119: } 3120: 3121: /** 3122: * ISO8859xToUTF8 3123: * @out: a pointer to an array of bytes to store the result 3124: * @outlen: the length of @out 3125: * @in: a pointer to an array of ISO Latin 1 chars 3126: * @inlen: the length of @in 3127: * 3128: * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 3129: * block of chars out. 3130: * Returns 0 if success, or -1 otherwise 3131: * The value of @inlen after return is the number of octets consumed 3132: * The value of @outlen after return is the number of ocetes produced. 3133: */ 3134: static int 3135: ISO8859xToUTF8(unsigned char* out, int *outlen, 3136: const unsigned char* in, int *inlen, 3137: unsigned short const *unicodetable) { 3138: unsigned char* outstart = out; 3139: unsigned char* outend; 3140: const unsigned char* instart = in; 3141: const unsigned char* inend; 3142: const unsigned char* instop; 3143: unsigned int c; 3144: 3145: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3146: (in == NULL) || (unicodetable == NULL)) 3147: return(-1); 3148: outend = out + *outlen; 3149: inend = in + *inlen; 3150: instop = inend; 3151: 3152: while ((in < inend) && (out < outend - 2)) { 3153: if (*in >= 0x80) { 3154: c = unicodetable [*in - 0x80]; 3155: if (c == 0) { 3156: /* undefined code point */ 3157: *outlen = out - outstart; 3158: *inlen = in - instart; 3159: return (-1); 3160: } 3161: if (c < 0x800) { 3162: *out++ = ((c >> 6) & 0x1F) | 0xC0; 3163: *out++ = (c & 0x3F) | 0x80; 3164: } else { 3165: *out++ = ((c >> 12) & 0x0F) | 0xE0; 3166: *out++ = ((c >> 6) & 0x3F) | 0x80; 3167: *out++ = (c & 0x3F) | 0x80; 3168: } 3169: ++in; 3170: } 3171: if (instop - in > outend - out) instop = in + (outend - out); 3172: while ((*in < 0x80) && (in < instop)) { 3173: *out++ = *in++; 3174: } 3175: } 3176: if ((in < inend) && (out < outend) && (*in < 0x80)) { 3177: *out++ = *in++; 3178: } 3179: if ((in < inend) && (out < outend) && (*in < 0x80)) { 3180: *out++ = *in++; 3181: } 3182: *outlen = out - outstart; 3183: *inlen = in - instart; 3184: return (*outlen); 3185: } 3186: 3187: 3188: /************************************************************************ 3189: * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3190: ************************************************************************/ 3191: 3192: static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3193: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3194: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3195: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3196: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3197: 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3198: 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3199: 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3200: 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3201: 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3202: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3203: 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3204: 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3205: 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3206: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3207: 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3208: 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3209: }; 3210: 3211: static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3212: "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3213: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3214: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3215: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3216: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3217: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3218: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3219: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3220: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3221: "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3222: "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3223: "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3224: "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3225: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3226: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3227: "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3228: "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3229: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3230: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3231: "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3232: "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3233: "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3234: "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3235: "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3236: "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3237: "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3238: "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3239: }; 3240: 3241: static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3242: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3243: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3244: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3245: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3246: 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3247: 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3248: 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3249: 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3250: 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3251: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3252: 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3253: 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3254: 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3255: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3256: 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3257: 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3258: }; 3259: 3260: static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3261: "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3262: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3263: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3264: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3265: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3266: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3267: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3268: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3269: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3270: "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3271: "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3272: "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3273: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3274: "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3275: "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3276: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3277: "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3278: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3279: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3280: "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3281: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3282: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3283: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3284: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3285: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3286: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3287: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3288: "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3289: "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3290: "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3291: "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3292: }; 3293: 3294: static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3295: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3296: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3297: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3298: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3299: 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3300: 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3301: 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3302: 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3303: 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3304: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3305: 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3306: 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3307: 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3308: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3309: 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3310: 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3311: }; 3312: 3313: static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3314: "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3315: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3316: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3317: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3318: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3319: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3320: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3321: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3322: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3323: "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3324: "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3325: "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3326: "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3327: "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3328: "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3329: "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3330: "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3331: "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3332: "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3333: "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3334: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3335: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3336: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3337: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3338: "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3339: "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3340: "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3341: }; 3342: 3343: static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3344: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3345: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3346: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3347: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3348: 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3349: 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3350: 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3351: 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3352: 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3353: 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3354: 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3355: 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3356: 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3357: 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3358: 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3359: 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3360: }; 3361: 3362: static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3363: "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3364: "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3365: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3366: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3367: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3368: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3369: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3370: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3371: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3372: "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3373: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3374: "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3375: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3376: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3377: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3378: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3379: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3380: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3381: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3382: "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3383: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3384: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3385: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3386: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3387: "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3388: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3389: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3390: }; 3391: 3392: static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3393: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3394: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3395: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3396: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3397: 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3398: 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3399: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3400: 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3401: 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3402: 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3403: 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3404: 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3405: 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3406: 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3407: 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3408: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3409: }; 3410: 3411: static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3412: "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3414: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3416: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3418: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3419: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3420: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3421: "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3422: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3423: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3424: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3425: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3426: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3427: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3428: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3429: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3430: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3431: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3432: "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3433: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3434: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3435: }; 3436: 3437: static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3438: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3439: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3440: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3441: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3442: 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3443: 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3444: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3445: 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3446: 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3447: 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3448: 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3449: 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3450: 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3451: 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3452: 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3453: 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3454: }; 3455: 3456: static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3457: "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3458: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3459: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3460: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3461: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3462: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3463: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3464: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3465: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3466: "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3467: "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3468: "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3469: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3470: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3471: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3472: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3473: "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3474: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3475: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3476: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3477: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3478: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3479: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3480: "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3481: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3482: "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3483: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3484: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3485: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3486: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3487: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3488: }; 3489: 3490: static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3491: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3492: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3493: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3494: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3495: 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3496: 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3497: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3498: 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3499: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3500: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3501: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3502: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3503: 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3504: 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3505: 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3506: 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3507: }; 3508: 3509: static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3510: "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3511: "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3512: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3513: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3514: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3515: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3516: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3517: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3518: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3519: "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3520: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3521: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3522: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3523: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3524: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3525: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3526: "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3527: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3528: "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3529: "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3530: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3531: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3532: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3533: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3534: "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3535: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3536: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3537: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3538: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3539: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3540: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3541: }; 3542: 3543: static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3544: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3545: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3546: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3547: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3548: 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3549: 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3550: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3551: 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3552: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3553: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3554: 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3555: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3556: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3557: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3558: 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3559: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3560: }; 3561: 3562: static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3563: "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3564: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3565: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3566: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3567: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3568: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3569: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3570: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3571: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3572: "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3573: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3574: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3575: "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3576: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3577: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3578: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3579: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3580: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3581: "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3582: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3583: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3584: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3585: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3586: }; 3587: 3588: static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3589: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3590: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3591: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3592: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3593: 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3594: 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3595: 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3596: 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3597: 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3598: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3599: 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3600: 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3601: 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3602: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3603: 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3604: 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3605: }; 3606: 3607: static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3608: "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3609: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3610: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3611: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3612: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3613: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3614: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3615: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3616: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3617: "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3618: "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3619: "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3620: "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3621: "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3622: "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3623: "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3624: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3625: "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3626: "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3627: "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3628: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3629: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3630: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3631: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3632: "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3633: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3634: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3636: "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3637: "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3638: "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3639: }; 3640: 3641: static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3642: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3643: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3644: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3645: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3646: 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3647: 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3648: 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3649: 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3650: 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3651: 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3652: 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3653: 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3654: 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3655: 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3656: 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3657: 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3658: }; 3659: 3660: static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3661: "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3662: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3663: "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3664: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3665: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3666: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3667: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3668: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3669: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3670: "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3671: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3672: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3673: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3674: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3675: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3676: "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3677: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3678: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3679: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3680: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3681: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3682: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3683: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3684: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3685: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3686: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3687: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3688: }; 3689: 3690: static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3691: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3692: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3693: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3694: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3695: 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3696: 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3697: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3698: 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3699: 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3700: 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3701: 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3702: 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3703: 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3704: 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3705: 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3706: 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3707: }; 3708: 3709: static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3710: "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3711: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3712: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3713: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3714: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3715: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3716: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3717: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3718: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3719: "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3720: "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3721: "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3722: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3723: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3724: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3725: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3726: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3727: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3728: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3729: "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3730: "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3731: "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3732: "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3733: "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3734: "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3735: "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3736: "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3737: "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3738: "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3739: "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3740: "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3741: }; 3742: 3743: static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3744: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3745: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3746: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3747: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3748: 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3749: 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3750: 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3751: 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3752: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3753: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3754: 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3755: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3756: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3757: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3758: 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3759: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3760: }; 3761: 3762: static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3763: "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3764: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3765: "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3766: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3767: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3768: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3769: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3770: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3771: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3772: "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3773: "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3774: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3775: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3776: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3777: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3778: "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3779: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3780: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3781: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3782: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3783: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3784: "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3785: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3786: "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3787: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3788: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3789: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3790: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3791: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3792: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3793: "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3794: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3795: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3796: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3797: "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3798: "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3799: "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3800: "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3801: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3802: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3803: "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3804: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3805: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3806: }; 3807: 3808: static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3809: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3810: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3811: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3812: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3813: 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3814: 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3815: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3816: 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3817: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3818: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3819: 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3820: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3821: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3822: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3823: 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3824: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3825: }; 3826: 3827: static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3828: "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3829: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3830: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3831: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3832: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3833: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3834: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3835: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3836: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3837: "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3838: "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3839: "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3840: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3841: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3842: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3843: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3844: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3845: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3846: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3847: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3848: "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3849: "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3850: "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3851: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3852: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3853: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3854: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3855: }; 3856: 3857: static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3858: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3859: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3860: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3861: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3862: 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3863: 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3864: 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3865: 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3866: 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3867: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3868: 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3869: 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3870: 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3871: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3872: 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3873: 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3874: }; 3875: 3876: static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3877: "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3878: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3879: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3880: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3881: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3882: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3883: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3884: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3885: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3886: "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3887: "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3888: "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3889: "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3890: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3891: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3892: "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3893: "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3894: "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3895: "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3896: "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3897: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3898: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3899: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3900: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3901: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3902: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3903: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3904: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3905: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3906: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3907: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3908: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3909: "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3910: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3911: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3912: "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3913: "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3914: "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3915: "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3916: }; 3917: 3918: 3919: /* 3920: * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3921: */ 3922: 3923: static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3924: const unsigned char* in, int *inlen) { 3925: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3926: } 3927: static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3928: const unsigned char* in, int *inlen) { 3929: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3930: } 3931: 3932: static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3933: const unsigned char* in, int *inlen) { 3934: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3935: } 3936: static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3937: const unsigned char* in, int *inlen) { 3938: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3939: } 3940: 3941: static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3942: const unsigned char* in, int *inlen) { 3943: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3944: } 3945: static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3946: const unsigned char* in, int *inlen) { 3947: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3948: } 3949: 3950: static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3951: const unsigned char* in, int *inlen) { 3952: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3953: } 3954: static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3955: const unsigned char* in, int *inlen) { 3956: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3957: } 3958: 3959: static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3960: const unsigned char* in, int *inlen) { 3961: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3962: } 3963: static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3964: const unsigned char* in, int *inlen) { 3965: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3966: } 3967: 3968: static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3969: const unsigned char* in, int *inlen) { 3970: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3971: } 3972: static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3973: const unsigned char* in, int *inlen) { 3974: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3975: } 3976: 3977: static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3978: const unsigned char* in, int *inlen) { 3979: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3980: } 3981: static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3982: const unsigned char* in, int *inlen) { 3983: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3984: } 3985: 3986: static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3987: const unsigned char* in, int *inlen) { 3988: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3989: } 3990: static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3991: const unsigned char* in, int *inlen) { 3992: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3993: } 3994: 3995: static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3996: const unsigned char* in, int *inlen) { 3997: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3998: } 3999: static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 4000: const unsigned char* in, int *inlen) { 4001: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 4002: } 4003: 4004: static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 4005: const unsigned char* in, int *inlen) { 4006: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 4007: } 4008: static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 4009: const unsigned char* in, int *inlen) { 4010: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 4011: } 4012: 4013: static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 4014: const unsigned char* in, int *inlen) { 4015: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 4016: } 4017: static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 4018: const unsigned char* in, int *inlen) { 4019: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 4020: } 4021: 4022: static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 4023: const unsigned char* in, int *inlen) { 4024: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 4025: } 4026: static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 4027: const unsigned char* in, int *inlen) { 4028: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 4029: } 4030: 4031: static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 4032: const unsigned char* in, int *inlen) { 4033: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 4034: } 4035: static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 4036: const unsigned char* in, int *inlen) { 4037: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 4038: } 4039: 4040: static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 4041: const unsigned char* in, int *inlen) { 4042: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 4043: } 4044: static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 4045: const unsigned char* in, int *inlen) { 4046: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 4047: } 4048: 4049: static void 4050: xmlRegisterCharEncodingHandlersISO8859x (void) { 4051: xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 4052: xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 4053: xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 4054: xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 4055: xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 4056: xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 4057: xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 4058: xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 4059: xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 4060: xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 4061: xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 4062: xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 4063: xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 4064: xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 4065: } 4066: 4067: #endif 4068: #endif 4069: 4070: #define bottom_encoding 4071: #include "elfgcchack.h"