embedaddon/libxml2/encoding.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / encoding.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:22 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD

2.8.0

1: /* 2: * encoding.c : implements the encoding conversion functions needed for XML 3: * 4: * Related specs: 5: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6: * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7: * [ISO-10646] UTF-8 and UTF-16 in Annexes 8: * [ISO-8859-1] ISO Latin-1 characters codes. 9: * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10: * Worldwide Character Encoding -- Version 1.0", Addison- 11: * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12: * described in Unicode Technical Report #4. 13: * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14: * Information Interchange, ANSI X3.4-1986. 15: * 16: * See Copyright for the status of this software. 17: * 18: * daniel@veillard.com 19: * 20: * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21: */ 22: 23: #define IN_LIBXML 24: #include "libxml.h" 25: 26: #include <string.h> 27: 28: #ifdef HAVE_CTYPE_H 29: #include <ctype.h> 30: #endif 31: #ifdef HAVE_STDLIB_H 32: #include <stdlib.h> 33: #endif 34: #ifdef LIBXML_ICONV_ENABLED 35: #ifdef HAVE_ERRNO_H 36: #include <errno.h> 37: #endif 38: #endif 39: #include <libxml/encoding.h> 40: #include <libxml/xmlmemory.h> 41: #ifdef LIBXML_HTML_ENABLED 42: #include <libxml/HTMLparser.h> 43: #endif 44: #include <libxml/globals.h> 45: #include <libxml/xmlerror.h> 46: 47: static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 48: static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 49: 50: typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 51: typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 52: struct _xmlCharEncodingAlias { 53: const char *name; 54: const char *alias; 55: }; 56: 57: static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 58: static int xmlCharEncodingAliasesNb = 0; 59: static int xmlCharEncodingAliasesMax = 0; 60: 61: #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 62: #if 0 63: #define DEBUG_ENCODING /* Define this to get encoding traces */ 64: #endif 65: #else 66: #ifdef LIBXML_ISO8859X_ENABLED 67: static void xmlRegisterCharEncodingHandlersISO8859x (void); 68: #endif 69: #endif 70: 71: static int xmlLittleEndian = 1; 72: 73: /** 74: * xmlEncodingErrMemory: 75: * @extra: extra informations 76: * 77: * Handle an out of memory condition 78: */ 79: static void 80: xmlEncodingErrMemory(const char *extra) 81: { 82: __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 83: } 84: 85: /** 86: * xmlErrEncoding: 87: * @error: the error number 88: * @msg: the error message 89: * 90: * n encoding error 91: */ 92: static void 93: xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 94: { 95: __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 96: XML_FROM_I18N, error, XML_ERR_FATAL, 97: NULL, 0, val, NULL, NULL, 0, 0, msg, val); 98: } 99: 100: #ifdef LIBXML_ICU_ENABLED 101: static uconv_t* 102: openIcuConverter(const char* name, int toUnicode) 103: { 104: UErrorCode status = U_ZERO_ERROR; 105: uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 106: if (conv == NULL) 107: return NULL; 108: 109: conv->uconv = ucnv_open(name, &status); 110: if (U_FAILURE(status)) 111: goto error; 112: 113: status = U_ZERO_ERROR; 114: if (toUnicode) { 115: ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 116: NULL, NULL, NULL, &status); 117: } 118: else { 119: ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 120: NULL, NULL, NULL, &status); 121: } 122: if (U_FAILURE(status)) 123: goto error; 124: 125: status = U_ZERO_ERROR; 126: conv->utf8 = ucnv_open("UTF-8", &status); 127: if (U_SUCCESS(status)) 128: return conv; 129: 130: error: 131: if (conv->uconv) 132: ucnv_close(conv->uconv); 133: xmlFree(conv); 134: return NULL; 135: } 136: 137: static void 138: closeIcuConverter(uconv_t *conv) 139: { 140: if (conv != NULL) { 141: ucnv_close(conv->uconv); 142: ucnv_close(conv->utf8); 143: xmlFree(conv); 144: } 145: } 146: #endif /* LIBXML_ICU_ENABLED */ 147: 148: /************************************************************************ 149: * * 150: * Conversions To/From UTF8 encoding * 151: * * 152: ************************************************************************/ 153: 154: /** 155: * asciiToUTF8: 156: * @out: a pointer to an array of bytes to store the result 157: * @outlen: the length of @out 158: * @in: a pointer to an array of ASCII chars 159: * @inlen: the length of @in 160: * 161: * Take a block of ASCII chars in and try to convert it to an UTF-8 162: * block of chars out. 163: * Returns 0 if success, or -1 otherwise 164: * The value of @inlen after return is the number of octets consumed 165: * if the return value is positive, else unpredictable. 166: * The value of @outlen after return is the number of octets consumed. 167: */ 168: static int 169: asciiToUTF8(unsigned char* out, int *outlen, 170: const unsigned char* in, int *inlen) { 171: unsigned char* outstart = out; 172: const unsigned char* base = in; 173: const unsigned char* processed = in; 174: unsigned char* outend = out + *outlen; 175: const unsigned char* inend; 176: unsigned int c; 177: 178: inend = in + (*inlen); 179: while ((in < inend) && (out - outstart + 5 < *outlen)) { 180: c= *in++; 181: 182: if (out >= outend) 183: break; 184: if (c < 0x80) { 185: *out++ = c; 186: } else { 187: *outlen = out - outstart; 188: *inlen = processed - base; 189: return(-1); 190: } 191: 192: processed = (const unsigned char*) in; 193: } 194: *outlen = out - outstart; 195: *inlen = processed - base; 196: return(*outlen); 197: } 198: 199: #ifdef LIBXML_OUTPUT_ENABLED 200: /** 201: * UTF8Toascii: 202: * @out: a pointer to an array of bytes to store the result 203: * @outlen: the length of @out 204: * @in: a pointer to an array of UTF-8 chars 205: * @inlen: the length of @in 206: * 207: * Take a block of UTF-8 chars in and try to convert it to an ASCII 208: * block of chars out. 209: * 210: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 211: * The value of @inlen after return is the number of octets consumed 212: * if the return value is positive, else unpredictable. 213: * The value of @outlen after return is the number of octets consumed. 214: */ 215: static int 216: UTF8Toascii(unsigned char* out, int *outlen, 217: const unsigned char* in, int *inlen) { 218: const unsigned char* processed = in; 219: const unsigned char* outend; 220: const unsigned char* outstart = out; 221: const unsigned char* instart = in; 222: const unsigned char* inend; 223: unsigned int c, d; 224: int trailing; 225: 226: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 227: if (in == NULL) { 228: /* 229: * initialization nothing to do 230: */ 231: *outlen = 0; 232: *inlen = 0; 233: return(0); 234: } 235: inend = in + (*inlen); 236: outend = out + (*outlen); 237: while (in < inend) { 238: d = *in++; 239: if (d < 0x80) { c= d; trailing= 0; } 240: else if (d < 0xC0) { 241: /* trailing byte in leading position */ 242: *outlen = out - outstart; 243: *inlen = processed - instart; 244: return(-2); 245: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 246: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 247: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 248: else { 249: /* no chance for this in Ascii */ 250: *outlen = out - outstart; 251: *inlen = processed - instart; 252: return(-2); 253: } 254: 255: if (inend - in < trailing) { 256: break; 257: } 258: 259: for ( ; trailing; trailing--) { 260: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 261: break; 262: c <<= 6; 263: c |= d & 0x3F; 264: } 265: 266: /* assertion: c is a single UTF-4 value */ 267: if (c < 0x80) { 268: if (out >= outend) 269: break; 270: *out++ = c; 271: } else { 272: /* no chance for this in Ascii */ 273: *outlen = out - outstart; 274: *inlen = processed - instart; 275: return(-2); 276: } 277: processed = in; 278: } 279: *outlen = out - outstart; 280: *inlen = processed - instart; 281: return(*outlen); 282: } 283: #endif /* LIBXML_OUTPUT_ENABLED */ 284: 285: /** 286: * isolat1ToUTF8: 287: * @out: a pointer to an array of bytes to store the result 288: * @outlen: the length of @out 289: * @in: a pointer to an array of ISO Latin 1 chars 290: * @inlen: the length of @in 291: * 292: * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 293: * block of chars out. 294: * Returns the number of bytes written if success, or -1 otherwise 295: * The value of @inlen after return is the number of octets consumed 296: * if the return value is positive, else unpredictable. 297: * The value of @outlen after return is the number of octets consumed. 298: */ 299: int 300: isolat1ToUTF8(unsigned char* out, int *outlen, 301: const unsigned char* in, int *inlen) { 302: unsigned char* outstart = out; 303: const unsigned char* base = in; 304: unsigned char* outend; 305: const unsigned char* inend; 306: const unsigned char* instop; 307: 308: if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 309: return(-1); 310: 311: outend = out + *outlen; 312: inend = in + (*inlen); 313: instop = inend; 314: 315: while ((in < inend) && (out < outend - 1)) { 316: if (*in >= 0x80) { 317: *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 318: *out++ = ((*in) & 0x3F) | 0x80; 319: ++in; 320: } 321: if ((instop - in) > (outend - out)) instop = in + (outend - out); 322: while ((in < instop) && (*in < 0x80)) { 323: *out++ = *in++; 324: } 325: } 326: if ((in < inend) && (out < outend) && (*in < 0x80)) { 327: *out++ = *in++; 328: } 329: *outlen = out - outstart; 330: *inlen = in - base; 331: return(*outlen); 332: } 333: 334: /** 335: * UTF8ToUTF8: 336: * @out: a pointer to an array of bytes to store the result 337: * @outlen: the length of @out 338: * @inb: a pointer to an array of UTF-8 chars 339: * @inlenb: the length of @in in UTF-8 chars 340: * 341: * No op copy operation for UTF8 handling. 342: * 343: * Returns the number of bytes written, or -1 if lack of space. 344: * The value of *inlen after return is the number of octets consumed 345: * if the return value is positive, else unpredictable. 346: */ 347: static int 348: UTF8ToUTF8(unsigned char* out, int *outlen, 349: const unsigned char* inb, int *inlenb) 350: { 351: int len; 352: 353: if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 354: return(-1); 355: if (*outlen > *inlenb) { 356: len = *inlenb; 357: } else { 358: len = *outlen; 359: } 360: if (len < 0) 361: return(-1); 362: 363: memcpy(out, inb, len); 364: 365: *outlen = len; 366: *inlenb = len; 367: return(*outlen); 368: } 369: 370: 371: #ifdef LIBXML_OUTPUT_ENABLED 372: /** 373: * UTF8Toisolat1: 374: * @out: a pointer to an array of bytes to store the result 375: * @outlen: the length of @out 376: * @in: a pointer to an array of UTF-8 chars 377: * @inlen: the length of @in 378: * 379: * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 380: * block of chars out. 381: * 382: * Returns the number of bytes written if success, -2 if the transcoding fails, 383: or -1 otherwise 384: * The value of @inlen after return is the number of octets consumed 385: * if the return value is positive, else unpredictable. 386: * The value of @outlen after return is the number of octets consumed. 387: */ 388: int 389: UTF8Toisolat1(unsigned char* out, int *outlen, 390: const unsigned char* in, int *inlen) { 391: const unsigned char* processed = in; 392: const unsigned char* outend; 393: const unsigned char* outstart = out; 394: const unsigned char* instart = in; 395: const unsigned char* inend; 396: unsigned int c, d; 397: int trailing; 398: 399: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 400: if (in == NULL) { 401: /* 402: * initialization nothing to do 403: */ 404: *outlen = 0; 405: *inlen = 0; 406: return(0); 407: } 408: inend = in + (*inlen); 409: outend = out + (*outlen); 410: while (in < inend) { 411: d = *in++; 412: if (d < 0x80) { c= d; trailing= 0; } 413: else if (d < 0xC0) { 414: /* trailing byte in leading position */ 415: *outlen = out - outstart; 416: *inlen = processed - instart; 417: return(-2); 418: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 419: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 420: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 421: else { 422: /* no chance for this in IsoLat1 */ 423: *outlen = out - outstart; 424: *inlen = processed - instart; 425: return(-2); 426: } 427: 428: if (inend - in < trailing) { 429: break; 430: } 431: 432: for ( ; trailing; trailing--) { 433: if (in >= inend) 434: break; 435: if (((d= *in++) & 0xC0) != 0x80) { 436: *outlen = out - outstart; 437: *inlen = processed - instart; 438: return(-2); 439: } 440: c <<= 6; 441: c |= d & 0x3F; 442: } 443: 444: /* assertion: c is a single UTF-4 value */ 445: if (c <= 0xFF) { 446: if (out >= outend) 447: break; 448: *out++ = c; 449: } else { 450: /* no chance for this in IsoLat1 */ 451: *outlen = out - outstart; 452: *inlen = processed - instart; 453: return(-2); 454: } 455: processed = in; 456: } 457: *outlen = out - outstart; 458: *inlen = processed - instart; 459: return(*outlen); 460: } 461: #endif /* LIBXML_OUTPUT_ENABLED */ 462: 463: /** 464: * UTF16LEToUTF8: 465: * @out: a pointer to an array of bytes to store the result 466: * @outlen: the length of @out 467: * @inb: a pointer to an array of UTF-16LE passwd as a byte array 468: * @inlenb: the length of @in in UTF-16LE chars 469: * 470: * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 471: * block of chars out. This function assumes the endian property 472: * is the same between the native type of this machine and the 473: * inputed one. 474: * 475: * Returns the number of bytes written, or -1 if lack of space, or -2 476: * if the transcoding fails (if *in is not a valid utf16 string) 477: * The value of *inlen after return is the number of octets consumed 478: * if the return value is positive, else unpredictable. 479: */ 480: static int 481: UTF16LEToUTF8(unsigned char* out, int *outlen, 482: const unsigned char* inb, int *inlenb) 483: { 484: unsigned char* outstart = out; 485: const unsigned char* processed = inb; 486: unsigned char* outend = out + *outlen; 487: unsigned short* in = (unsigned short*) inb; 488: unsigned short* inend; 489: unsigned int c, d, inlen; 490: unsigned char *tmp; 491: int bits; 492: 493: if ((*inlenb % 2) == 1) 494: (*inlenb)--; 495: inlen = *inlenb / 2; 496: inend = in + inlen; 497: while ((in < inend) && (out - outstart + 5 < *outlen)) { 498: if (xmlLittleEndian) { 499: c= *in++; 500: } else { 501: tmp = (unsigned char *) in; 502: c = *tmp++; 503: c = c | (((unsigned int)*tmp) << 8); 504: in++; 505: } 506: if ((c & 0xFC00) == 0xD800) { /* surrogates */ 507: if (in >= inend) { /* (in > inend) shouldn't happens */ 508: break; 509: } 510: if (xmlLittleEndian) { 511: d = *in++; 512: } else { 513: tmp = (unsigned char *) in; 514: d = *tmp++; 515: d = d | (((unsigned int)*tmp) << 8); 516: in++; 517: } 518: if ((d & 0xFC00) == 0xDC00) { 519: c &= 0x03FF; 520: c <<= 10; 521: c |= d & 0x03FF; 522: c += 0x10000; 523: } 524: else { 525: *outlen = out - outstart; 526: *inlenb = processed - inb; 527: return(-2); 528: } 529: } 530: 531: /* assertion: c is a single UTF-4 value */ 532: if (out >= outend) 533: break; 534: if (c < 0x80) { *out++= c; bits= -6; } 535: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 536: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 537: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 538: 539: for ( ; bits >= 0; bits-= 6) { 540: if (out >= outend) 541: break; 542: *out++= ((c >> bits) & 0x3F) | 0x80; 543: } 544: processed = (const unsigned char*) in; 545: } 546: *outlen = out - outstart; 547: *inlenb = processed - inb; 548: return(*outlen); 549: } 550: 551: #ifdef LIBXML_OUTPUT_ENABLED 552: /** 553: * UTF8ToUTF16LE: 554: * @outb: a pointer to an array of bytes to store the result 555: * @outlen: the length of @outb 556: * @in: a pointer to an array of UTF-8 chars 557: * @inlen: the length of @in 558: * 559: * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 560: * block of chars out. 561: * 562: * Returns the number of bytes written, or -1 if lack of space, or -2 563: * if the transcoding failed. 564: */ 565: static int 566: UTF8ToUTF16LE(unsigned char* outb, int *outlen, 567: const unsigned char* in, int *inlen) 568: { 569: unsigned short* out = (unsigned short*) outb; 570: const unsigned char* processed = in; 571: const unsigned char *const instart = in; 572: unsigned short* outstart= out; 573: unsigned short* outend; 574: const unsigned char* inend; 575: unsigned int c, d; 576: int trailing; 577: unsigned char *tmp; 578: unsigned short tmp1, tmp2; 579: 580: /* UTF16LE encoding has no BOM */ 581: if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 582: if (in == NULL) { 583: *outlen = 0; 584: *inlen = 0; 585: return(0); 586: } 587: inend= in + *inlen; 588: outend = out + (*outlen / 2); 589: while (in < inend) { 590: d= *in++; 591: if (d < 0x80) { c= d; trailing= 0; } 592: else if (d < 0xC0) { 593: /* trailing byte in leading position */ 594: *outlen = (out - outstart) * 2; 595: *inlen = processed - instart; 596: return(-2); 597: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 598: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 599: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 600: else { 601: /* no chance for this in UTF-16 */ 602: *outlen = (out - outstart) * 2; 603: *inlen = processed - instart; 604: return(-2); 605: } 606: 607: if (inend - in < trailing) { 608: break; 609: } 610: 611: for ( ; trailing; trailing--) { 612: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 613: break; 614: c <<= 6; 615: c |= d & 0x3F; 616: } 617: 618: /* assertion: c is a single UTF-4 value */ 619: if (c < 0x10000) { 620: if (out >= outend) 621: break; 622: if (xmlLittleEndian) { 623: *out++ = c; 624: } else { 625: tmp = (unsigned char *) out; 626: *tmp = c ; 627: *(tmp + 1) = c >> 8 ; 628: out++; 629: } 630: } 631: else if (c < 0x110000) { 632: if (out+1 >= outend) 633: break; 634: c -= 0x10000; 635: if (xmlLittleEndian) { 636: *out++ = 0xD800 | (c >> 10); 637: *out++ = 0xDC00 | (c & 0x03FF); 638: } else { 639: tmp1 = 0xD800 | (c >> 10); 640: tmp = (unsigned char *) out; 641: *tmp = (unsigned char) tmp1; 642: *(tmp + 1) = tmp1 >> 8; 643: out++; 644: 645: tmp2 = 0xDC00 | (c & 0x03FF); 646: tmp = (unsigned char *) out; 647: *tmp = (unsigned char) tmp2; 648: *(tmp + 1) = tmp2 >> 8; 649: out++; 650: } 651: } 652: else 653: break; 654: processed = in; 655: } 656: *outlen = (out - outstart) * 2; 657: *inlen = processed - instart; 658: return(*outlen); 659: } 660: 661: /** 662: * UTF8ToUTF16: 663: * @outb: a pointer to an array of bytes to store the result 664: * @outlen: the length of @outb 665: * @in: a pointer to an array of UTF-8 chars 666: * @inlen: the length of @in 667: * 668: * Take a block of UTF-8 chars in and try to convert it to an UTF-16 669: * block of chars out. 670: * 671: * Returns the number of bytes written, or -1 if lack of space, or -2 672: * if the transcoding failed. 673: */ 674: static int 675: UTF8ToUTF16(unsigned char* outb, int *outlen, 676: const unsigned char* in, int *inlen) 677: { 678: if (in == NULL) { 679: /* 680: * initialization, add the Byte Order Mark for UTF-16LE 681: */ 682: if (*outlen >= 2) { 683: outb[0] = 0xFF; 684: outb[1] = 0xFE; 685: *outlen = 2; 686: *inlen = 0; 687: #ifdef DEBUG_ENCODING 688: xmlGenericError(xmlGenericErrorContext, 689: "Added FFFE Byte Order Mark\n"); 690: #endif 691: return(2); 692: } 693: *outlen = 0; 694: *inlen = 0; 695: return(0); 696: } 697: return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 698: } 699: #endif /* LIBXML_OUTPUT_ENABLED */ 700: 701: /** 702: * UTF16BEToUTF8: 703: * @out: a pointer to an array of bytes to store the result 704: * @outlen: the length of @out 705: * @inb: a pointer to an array of UTF-16 passed as a byte array 706: * @inlenb: the length of @in in UTF-16 chars 707: * 708: * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 709: * block of chars out. This function assumes the endian property 710: * is the same between the native type of this machine and the 711: * inputed one. 712: * 713: * Returns the number of bytes written, or -1 if lack of space, or -2 714: * if the transcoding fails (if *in is not a valid utf16 string) 715: * The value of *inlen after return is the number of octets consumed 716: * if the return value is positive, else unpredictable. 717: */ 718: static int 719: UTF16BEToUTF8(unsigned char* out, int *outlen, 720: const unsigned char* inb, int *inlenb) 721: { 722: unsigned char* outstart = out; 723: const unsigned char* processed = inb; 724: unsigned char* outend = out + *outlen; 725: unsigned short* in = (unsigned short*) inb; 726: unsigned short* inend; 727: unsigned int c, d, inlen; 728: unsigned char *tmp; 729: int bits; 730: 731: if ((*inlenb % 2) == 1) 732: (*inlenb)--; 733: inlen = *inlenb / 2; 734: inend= in + inlen; 735: while (in < inend) { 736: if (xmlLittleEndian) { 737: tmp = (unsigned char *) in; 738: c = *tmp++; 739: c = c << 8; 740: c = c | (unsigned int) *tmp; 741: in++; 742: } else { 743: c= *in++; 744: } 745: if ((c & 0xFC00) == 0xD800) { /* surrogates */ 746: if (in >= inend) { /* (in > inend) shouldn't happens */ 747: *outlen = out - outstart; 748: *inlenb = processed - inb; 749: return(-2); 750: } 751: if (xmlLittleEndian) { 752: tmp = (unsigned char *) in; 753: d = *tmp++; 754: d = d << 8; 755: d = d | (unsigned int) *tmp; 756: in++; 757: } else { 758: d= *in++; 759: } 760: if ((d & 0xFC00) == 0xDC00) { 761: c &= 0x03FF; 762: c <<= 10; 763: c |= d & 0x03FF; 764: c += 0x10000; 765: } 766: else { 767: *outlen = out - outstart; 768: *inlenb = processed - inb; 769: return(-2); 770: } 771: } 772: 773: /* assertion: c is a single UTF-4 value */ 774: if (out >= outend) 775: break; 776: if (c < 0x80) { *out++= c; bits= -6; } 777: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 778: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 779: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 780: 781: for ( ; bits >= 0; bits-= 6) { 782: if (out >= outend) 783: break; 784: *out++= ((c >> bits) & 0x3F) | 0x80; 785: } 786: processed = (const unsigned char*) in; 787: } 788: *outlen = out - outstart; 789: *inlenb = processed - inb; 790: return(*outlen); 791: } 792: 793: #ifdef LIBXML_OUTPUT_ENABLED 794: /** 795: * UTF8ToUTF16BE: 796: * @outb: a pointer to an array of bytes to store the result 797: * @outlen: the length of @outb 798: * @in: a pointer to an array of UTF-8 chars 799: * @inlen: the length of @in 800: * 801: * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 802: * block of chars out. 803: * 804: * Returns the number of byte written, or -1 by lack of space, or -2 805: * if the transcoding failed. 806: */ 807: static int 808: UTF8ToUTF16BE(unsigned char* outb, int *outlen, 809: const unsigned char* in, int *inlen) 810: { 811: unsigned short* out = (unsigned short*) outb; 812: const unsigned char* processed = in; 813: const unsigned char *const instart = in; 814: unsigned short* outstart= out; 815: unsigned short* outend; 816: const unsigned char* inend; 817: unsigned int c, d; 818: int trailing; 819: unsigned char *tmp; 820: unsigned short tmp1, tmp2; 821: 822: /* UTF-16BE has no BOM */ 823: if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 824: if (in == NULL) { 825: *outlen = 0; 826: *inlen = 0; 827: return(0); 828: } 829: inend= in + *inlen; 830: outend = out + (*outlen / 2); 831: while (in < inend) { 832: d= *in++; 833: if (d < 0x80) { c= d; trailing= 0; } 834: else if (d < 0xC0) { 835: /* trailing byte in leading position */ 836: *outlen = out - outstart; 837: *inlen = processed - instart; 838: return(-2); 839: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 840: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 841: else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 842: else { 843: /* no chance for this in UTF-16 */ 844: *outlen = out - outstart; 845: *inlen = processed - instart; 846: return(-2); 847: } 848: 849: if (inend - in < trailing) { 850: break; 851: } 852: 853: for ( ; trailing; trailing--) { 854: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 855: c <<= 6; 856: c |= d & 0x3F; 857: } 858: 859: /* assertion: c is a single UTF-4 value */ 860: if (c < 0x10000) { 861: if (out >= outend) break; 862: if (xmlLittleEndian) { 863: tmp = (unsigned char *) out; 864: *tmp = c >> 8; 865: *(tmp + 1) = c; 866: out++; 867: } else { 868: *out++ = c; 869: } 870: } 871: else if (c < 0x110000) { 872: if (out+1 >= outend) break; 873: c -= 0x10000; 874: if (xmlLittleEndian) { 875: tmp1 = 0xD800 | (c >> 10); 876: tmp = (unsigned char *) out; 877: *tmp = tmp1 >> 8; 878: *(tmp + 1) = (unsigned char) tmp1; 879: out++; 880: 881: tmp2 = 0xDC00 | (c & 0x03FF); 882: tmp = (unsigned char *) out; 883: *tmp = tmp2 >> 8; 884: *(tmp + 1) = (unsigned char) tmp2; 885: out++; 886: } else { 887: *out++ = 0xD800 | (c >> 10); 888: *out++ = 0xDC00 | (c & 0x03FF); 889: } 890: } 891: else 892: break; 893: processed = in; 894: } 895: *outlen = (out - outstart) * 2; 896: *inlen = processed - instart; 897: return(*outlen); 898: } 899: #endif /* LIBXML_OUTPUT_ENABLED */ 900: 901: /************************************************************************ 902: * * 903: * Generic encoding handling routines * 904: * * 905: ************************************************************************/ 906: 907: /** 908: * xmlDetectCharEncoding: 909: * @in: a pointer to the first bytes of the XML entity, must be at least 910: * 2 bytes long (at least 4 if encoding is UTF4 variant). 911: * @len: pointer to the length of the buffer 912: * 913: * Guess the encoding of the entity using the first bytes of the entity content 914: * according to the non-normative appendix F of the XML-1.0 recommendation. 915: * 916: * Returns one of the XML_CHAR_ENCODING_... values. 917: */ 918: xmlCharEncoding 919: xmlDetectCharEncoding(const unsigned char* in, int len) 920: { 921: if (in == NULL) 922: return(XML_CHAR_ENCODING_NONE); 923: if (len >= 4) { 924: if ((in[0] == 0x00) && (in[1] == 0x00) && 925: (in[2] == 0x00) && (in[3] == 0x3C)) 926: return(XML_CHAR_ENCODING_UCS4BE); 927: if ((in[0] == 0x3C) && (in[1] == 0x00) && 928: (in[2] == 0x00) && (in[3] == 0x00)) 929: return(XML_CHAR_ENCODING_UCS4LE); 930: if ((in[0] == 0x00) && (in[1] == 0x00) && 931: (in[2] == 0x3C) && (in[3] == 0x00)) 932: return(XML_CHAR_ENCODING_UCS4_2143); 933: if ((in[0] == 0x00) && (in[1] == 0x3C) && 934: (in[2] == 0x00) && (in[3] == 0x00)) 935: return(XML_CHAR_ENCODING_UCS4_3412); 936: if ((in[0] == 0x4C) && (in[1] == 0x6F) && 937: (in[2] == 0xA7) && (in[3] == 0x94)) 938: return(XML_CHAR_ENCODING_EBCDIC); 939: if ((in[0] == 0x3C) && (in[1] == 0x3F) && 940: (in[2] == 0x78) && (in[3] == 0x6D)) 941: return(XML_CHAR_ENCODING_UTF8); 942: /* 943: * Although not part of the recommendation, we also 944: * attempt an "auto-recognition" of UTF-16LE and 945: * UTF-16BE encodings. 946: */ 947: if ((in[0] == 0x3C) && (in[1] == 0x00) && 948: (in[2] == 0x3F) && (in[3] == 0x00)) 949: return(XML_CHAR_ENCODING_UTF16LE); 950: if ((in[0] == 0x00) && (in[1] == 0x3C) && 951: (in[2] == 0x00) && (in[3] == 0x3F)) 952: return(XML_CHAR_ENCODING_UTF16BE); 953: } 954: if (len >= 3) { 955: /* 956: * Errata on XML-1.0 June 20 2001 957: * We now allow an UTF8 encoded BOM 958: */ 959: if ((in[0] == 0xEF) && (in[1] == 0xBB) && 960: (in[2] == 0xBF)) 961: return(XML_CHAR_ENCODING_UTF8); 962: } 963: /* For UTF-16 we can recognize by the BOM */ 964: if (len >= 2) { 965: if ((in[0] == 0xFE) && (in[1] == 0xFF)) 966: return(XML_CHAR_ENCODING_UTF16BE); 967: if ((in[0] == 0xFF) && (in[1] == 0xFE)) 968: return(XML_CHAR_ENCODING_UTF16LE); 969: } 970: return(XML_CHAR_ENCODING_NONE); 971: } 972: 973: /** 974: * xmlCleanupEncodingAliases: 975: * 976: * Unregisters all aliases 977: */ 978: void 979: xmlCleanupEncodingAliases(void) { 980: int i; 981: 982: if (xmlCharEncodingAliases == NULL) 983: return; 984: 985: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 986: if (xmlCharEncodingAliases[i].name != NULL) 987: xmlFree((char *) xmlCharEncodingAliases[i].name); 988: if (xmlCharEncodingAliases[i].alias != NULL) 989: xmlFree((char *) xmlCharEncodingAliases[i].alias); 990: } 991: xmlCharEncodingAliasesNb = 0; 992: xmlCharEncodingAliasesMax = 0; 993: xmlFree(xmlCharEncodingAliases); 994: xmlCharEncodingAliases = NULL; 995: } 996: 997: /** 998: * xmlGetEncodingAlias: 999: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1000: * 1001: * Lookup an encoding name for the given alias. 1002: * 1003: * Returns NULL if not found, otherwise the original name 1004: */ 1005: const char * 1006: xmlGetEncodingAlias(const char *alias) { 1007: int i; 1008: char upper[100]; 1009: 1010: if (alias == NULL) 1011: return(NULL); 1012: 1013: if (xmlCharEncodingAliases == NULL) 1014: return(NULL); 1015: 1016: for (i = 0;i < 99;i++) { 1017: upper[i] = toupper(alias[i]); 1018: if (upper[i] == 0) break; 1019: } 1020: upper[i] = 0; 1021: 1022: /* 1023: * Walk down the list looking for a definition of the alias 1024: */ 1025: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1026: if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1027: return(xmlCharEncodingAliases[i].name); 1028: } 1029: } 1030: return(NULL); 1031: } 1032: 1033: /** 1034: * xmlAddEncodingAlias: 1035: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1036: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1037: * 1038: * Registers an alias @alias for an encoding named @name. Existing alias 1039: * will be overwritten. 1040: * 1041: * Returns 0 in case of success, -1 in case of error 1042: */ 1043: int 1044: xmlAddEncodingAlias(const char *name, const char *alias) { 1045: int i; 1046: char upper[100]; 1047: 1048: if ((name == NULL) || (alias == NULL)) 1049: return(-1); 1050: 1051: for (i = 0;i < 99;i++) { 1052: upper[i] = toupper(alias[i]); 1053: if (upper[i] == 0) break; 1054: } 1055: upper[i] = 0; 1056: 1057: if (xmlCharEncodingAliases == NULL) { 1058: xmlCharEncodingAliasesNb = 0; 1059: xmlCharEncodingAliasesMax = 20; 1060: xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1061: xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1062: if (xmlCharEncodingAliases == NULL) 1063: return(-1); 1064: } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1065: xmlCharEncodingAliasesMax *= 2; 1066: xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1067: xmlRealloc(xmlCharEncodingAliases, 1068: xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1069: } 1070: /* 1071: * Walk down the list looking for a definition of the alias 1072: */ 1073: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1074: if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1075: /* 1076: * Replace the definition. 1077: */ 1078: xmlFree((char *) xmlCharEncodingAliases[i].name); 1079: xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1080: return(0); 1081: } 1082: } 1083: /* 1084: * Add the definition 1085: */ 1086: xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1087: xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1088: xmlCharEncodingAliasesNb++; 1089: return(0); 1090: } 1091: 1092: /** 1093: * xmlDelEncodingAlias: 1094: * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1095: * 1096: * Unregisters an encoding alias @alias 1097: * 1098: * Returns 0 in case of success, -1 in case of error 1099: */ 1100: int 1101: xmlDelEncodingAlias(const char *alias) { 1102: int i; 1103: 1104: if (alias == NULL) 1105: return(-1); 1106: 1107: if (xmlCharEncodingAliases == NULL) 1108: return(-1); 1109: /* 1110: * Walk down the list looking for a definition of the alias 1111: */ 1112: for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1113: if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1114: xmlFree((char *) xmlCharEncodingAliases[i].name); 1115: xmlFree((char *) xmlCharEncodingAliases[i].alias); 1116: xmlCharEncodingAliasesNb--; 1117: memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1118: sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1119: return(0); 1120: } 1121: } 1122: return(-1); 1123: } 1124: 1125: /** 1126: * xmlParseCharEncoding: 1127: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1128: * 1129: * Compare the string to the encoding schemes already known. Note 1130: * that the comparison is case insensitive accordingly to the section 1131: * [XML] 4.3.3 Character Encoding in Entities. 1132: * 1133: * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1134: * if not recognized. 1135: */ 1136: xmlCharEncoding 1137: xmlParseCharEncoding(const char* name) 1138: { 1139: const char *alias; 1140: char upper[500]; 1141: int i; 1142: 1143: if (name == NULL) 1144: return(XML_CHAR_ENCODING_NONE); 1145: 1146: /* 1147: * Do the alias resolution 1148: */ 1149: alias = xmlGetEncodingAlias(name); 1150: if (alias != NULL) 1151: name = alias; 1152: 1153: for (i = 0;i < 499;i++) { 1154: upper[i] = toupper(name[i]); 1155: if (upper[i] == 0) break; 1156: } 1157: upper[i] = 0; 1158: 1159: if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1160: if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1161: if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1162: 1163: /* 1164: * NOTE: if we were able to parse this, the endianness of UTF16 is 1165: * already found and in use 1166: */ 1167: if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1168: if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1169: 1170: if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1171: if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1172: if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1173: 1174: /* 1175: * NOTE: if we were able to parse this, the endianness of UCS4 is 1176: * already found and in use 1177: */ 1178: if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1179: if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1180: if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1181: 1182: 1183: if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1184: if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1185: if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1186: 1187: if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1188: if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1189: if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1190: 1191: if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1192: if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1193: if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1194: if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1195: if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1196: if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1197: if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1198: 1199: if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1200: if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1201: if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1202: 1203: #ifdef DEBUG_ENCODING 1204: xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1205: #endif 1206: return(XML_CHAR_ENCODING_ERROR); 1207: } 1208: 1209: /** 1210: * xmlGetCharEncodingName: 1211: * @enc: the encoding 1212: * 1213: * The "canonical" name for XML encoding. 1214: * C.f. http://www.w3.org/TR/REC-xml#charencoding 1215: * Section 4.3.3 Character Encoding in Entities 1216: * 1217: * Returns the canonical name for the given encoding 1218: */ 1219: 1220: const char* 1221: xmlGetCharEncodingName(xmlCharEncoding enc) { 1222: switch (enc) { 1223: case XML_CHAR_ENCODING_ERROR: 1224: return(NULL); 1225: case XML_CHAR_ENCODING_NONE: 1226: return(NULL); 1227: case XML_CHAR_ENCODING_UTF8: 1228: return("UTF-8"); 1229: case XML_CHAR_ENCODING_UTF16LE: 1230: return("UTF-16"); 1231: case XML_CHAR_ENCODING_UTF16BE: 1232: return("UTF-16"); 1233: case XML_CHAR_ENCODING_EBCDIC: 1234: return("EBCDIC"); 1235: case XML_CHAR_ENCODING_UCS4LE: 1236: return("ISO-10646-UCS-4"); 1237: case XML_CHAR_ENCODING_UCS4BE: 1238: return("ISO-10646-UCS-4"); 1239: case XML_CHAR_ENCODING_UCS4_2143: 1240: return("ISO-10646-UCS-4"); 1241: case XML_CHAR_ENCODING_UCS4_3412: 1242: return("ISO-10646-UCS-4"); 1243: case XML_CHAR_ENCODING_UCS2: 1244: return("ISO-10646-UCS-2"); 1245: case XML_CHAR_ENCODING_8859_1: 1246: return("ISO-8859-1"); 1247: case XML_CHAR_ENCODING_8859_2: 1248: return("ISO-8859-2"); 1249: case XML_CHAR_ENCODING_8859_3: 1250: return("ISO-8859-3"); 1251: case XML_CHAR_ENCODING_8859_4: 1252: return("ISO-8859-4"); 1253: case XML_CHAR_ENCODING_8859_5: 1254: return("ISO-8859-5"); 1255: case XML_CHAR_ENCODING_8859_6: 1256: return("ISO-8859-6"); 1257: case XML_CHAR_ENCODING_8859_7: 1258: return("ISO-8859-7"); 1259: case XML_CHAR_ENCODING_8859_8: 1260: return("ISO-8859-8"); 1261: case XML_CHAR_ENCODING_8859_9: 1262: return("ISO-8859-9"); 1263: case XML_CHAR_ENCODING_2022_JP: 1264: return("ISO-2022-JP"); 1265: case XML_CHAR_ENCODING_SHIFT_JIS: 1266: return("Shift-JIS"); 1267: case XML_CHAR_ENCODING_EUC_JP: 1268: return("EUC-JP"); 1269: case XML_CHAR_ENCODING_ASCII: 1270: return(NULL); 1271: } 1272: return(NULL); 1273: } 1274: 1275: /************************************************************************ 1276: * * 1277: * Char encoding handlers * 1278: * * 1279: ************************************************************************/ 1280: 1281: 1282: /* the size should be growable, but it's not a big deal ... */ 1283: #define MAX_ENCODING_HANDLERS 50 1284: static xmlCharEncodingHandlerPtr *handlers = NULL; 1285: static int nbCharEncodingHandler = 0; 1286: 1287: /* 1288: * The default is UTF-8 for XML, that's also the default used for the 1289: * parser internals, so the default encoding handler is NULL 1290: */ 1291: 1292: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1293: 1294: /** 1295: * xmlNewCharEncodingHandler: 1296: * @name: the encoding name, in UTF-8 format (ASCII actually) 1297: * @input: the xmlCharEncodingInputFunc to read that encoding 1298: * @output: the xmlCharEncodingOutputFunc to write that encoding 1299: * 1300: * Create and registers an xmlCharEncodingHandler. 1301: * 1302: * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1303: */ 1304: xmlCharEncodingHandlerPtr 1305: xmlNewCharEncodingHandler(const char *name, 1306: xmlCharEncodingInputFunc input, 1307: xmlCharEncodingOutputFunc output) { 1308: xmlCharEncodingHandlerPtr handler; 1309: const char *alias; 1310: char upper[500]; 1311: int i; 1312: char *up = NULL; 1313: 1314: /* 1315: * Do the alias resolution 1316: */ 1317: alias = xmlGetEncodingAlias(name); 1318: if (alias != NULL) 1319: name = alias; 1320: 1321: /* 1322: * Keep only the uppercase version of the encoding. 1323: */ 1324: if (name == NULL) { 1325: xmlEncodingErr(XML_I18N_NO_NAME, 1326: "xmlNewCharEncodingHandler : no name !\n", NULL); 1327: return(NULL); 1328: } 1329: for (i = 0;i < 499;i++) { 1330: upper[i] = toupper(name[i]); 1331: if (upper[i] == 0) break; 1332: } 1333: upper[i] = 0; 1334: up = xmlMemStrdup(upper); 1335: if (up == NULL) { 1336: xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1337: return(NULL); 1338: } 1339: 1340: /* 1341: * allocate and fill-up an handler block. 1342: */ 1343: handler = (xmlCharEncodingHandlerPtr) 1344: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1345: if (handler == NULL) { 1346: xmlFree(up); 1347: xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1348: return(NULL); 1349: } 1350: memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1351: handler->input = input; 1352: handler->output = output; 1353: handler->name = up; 1354: 1355: #ifdef LIBXML_ICONV_ENABLED 1356: handler->iconv_in = NULL; 1357: handler->iconv_out = NULL; 1358: #endif 1359: #ifdef LIBXML_ICU_ENABLED 1360: handler->uconv_in = NULL; 1361: handler->uconv_out = NULL; 1362: #endif 1363: 1364: /* 1365: * registers and returns the handler. 1366: */ 1367: xmlRegisterCharEncodingHandler(handler); 1368: #ifdef DEBUG_ENCODING 1369: xmlGenericError(xmlGenericErrorContext, 1370: "Registered encoding handler for %s\n", name); 1371: #endif 1372: return(handler); 1373: } 1374: 1375: /** 1376: * xmlInitCharEncodingHandlers: 1377: * 1378: * Initialize the char encoding support, it registers the default 1379: * encoding supported. 1380: * NOTE: while public, this function usually doesn't need to be called 1381: * in normal processing. 1382: */ 1383: void 1384: xmlInitCharEncodingHandlers(void) { 1385: unsigned short int tst = 0x1234; 1386: unsigned char *ptr = (unsigned char *) &tst; 1387: 1388: if (handlers != NULL) return; 1389: 1390: handlers = (xmlCharEncodingHandlerPtr *) 1391: xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1392: 1393: if (*ptr == 0x12) xmlLittleEndian = 0; 1394: else if (*ptr == 0x34) xmlLittleEndian = 1; 1395: else { 1396: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1397: "Odd problem at endianness detection\n", NULL); 1398: } 1399: 1400: if (handlers == NULL) { 1401: xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1402: return; 1403: } 1404: xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1405: #ifdef LIBXML_OUTPUT_ENABLED 1406: xmlUTF16LEHandler = 1407: xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1408: xmlUTF16BEHandler = 1409: xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1410: xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1411: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1412: xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1413: xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1414: #ifdef LIBXML_HTML_ENABLED 1415: xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1416: #endif 1417: #else 1418: xmlUTF16LEHandler = 1419: xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1420: xmlUTF16BEHandler = 1421: xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1422: xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1423: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1424: xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1425: xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1426: #endif /* LIBXML_OUTPUT_ENABLED */ 1427: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1428: #ifdef LIBXML_ISO8859X_ENABLED 1429: xmlRegisterCharEncodingHandlersISO8859x (); 1430: #endif 1431: #endif 1432: 1433: } 1434: 1435: /** 1436: * xmlCleanupCharEncodingHandlers: 1437: * 1438: * Cleanup the memory allocated for the char encoding support, it 1439: * unregisters all the encoding handlers and the aliases. 1440: */ 1441: void 1442: xmlCleanupCharEncodingHandlers(void) { 1443: xmlCleanupEncodingAliases(); 1444: 1445: if (handlers == NULL) return; 1446: 1447: for (;nbCharEncodingHandler > 0;) { 1448: nbCharEncodingHandler--; 1449: if (handlers[nbCharEncodingHandler] != NULL) { 1450: if (handlers[nbCharEncodingHandler]->name != NULL) 1451: xmlFree(handlers[nbCharEncodingHandler]->name); 1452: xmlFree(handlers[nbCharEncodingHandler]); 1453: } 1454: } 1455: xmlFree(handlers); 1456: handlers = NULL; 1457: nbCharEncodingHandler = 0; 1458: xmlDefaultCharEncodingHandler = NULL; 1459: } 1460: 1461: /** 1462: * xmlRegisterCharEncodingHandler: 1463: * @handler: the xmlCharEncodingHandlerPtr handler block 1464: * 1465: * Register the char encoding handler, surprising, isn't it ? 1466: */ 1467: void 1468: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1469: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1470: if ((handler == NULL) || (handlers == NULL)) { 1471: xmlEncodingErr(XML_I18N_NO_HANDLER, 1472: "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1473: return; 1474: } 1475: 1476: if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1477: xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1478: "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1479: "MAX_ENCODING_HANDLERS"); 1480: return; 1481: } 1482: handlers[nbCharEncodingHandler++] = handler; 1483: } 1484: 1485: /** 1486: * xmlGetCharEncodingHandler: 1487: * @enc: an xmlCharEncoding value. 1488: * 1489: * Search in the registered set the handler able to read/write that encoding. 1490: * 1491: * Returns the handler or NULL if not found 1492: */ 1493: xmlCharEncodingHandlerPtr 1494: xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1495: xmlCharEncodingHandlerPtr handler; 1496: 1497: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1498: switch (enc) { 1499: case XML_CHAR_ENCODING_ERROR: 1500: return(NULL); 1501: case XML_CHAR_ENCODING_NONE: 1502: return(NULL); 1503: case XML_CHAR_ENCODING_UTF8: 1504: return(NULL); 1505: case XML_CHAR_ENCODING_UTF16LE: 1506: return(xmlUTF16LEHandler); 1507: case XML_CHAR_ENCODING_UTF16BE: 1508: return(xmlUTF16BEHandler); 1509: case XML_CHAR_ENCODING_EBCDIC: 1510: handler = xmlFindCharEncodingHandler("EBCDIC"); 1511: if (handler != NULL) return(handler); 1512: handler = xmlFindCharEncodingHandler("ebcdic"); 1513: if (handler != NULL) return(handler); 1514: handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1515: if (handler != NULL) return(handler); 1516: break; 1517: case XML_CHAR_ENCODING_UCS4BE: 1518: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1519: if (handler != NULL) return(handler); 1520: handler = xmlFindCharEncodingHandler("UCS-4"); 1521: if (handler != NULL) return(handler); 1522: handler = xmlFindCharEncodingHandler("UCS4"); 1523: if (handler != NULL) return(handler); 1524: break; 1525: case XML_CHAR_ENCODING_UCS4LE: 1526: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1527: if (handler != NULL) return(handler); 1528: handler = xmlFindCharEncodingHandler("UCS-4"); 1529: if (handler != NULL) return(handler); 1530: handler = xmlFindCharEncodingHandler("UCS4"); 1531: if (handler != NULL) return(handler); 1532: break; 1533: case XML_CHAR_ENCODING_UCS4_2143: 1534: break; 1535: case XML_CHAR_ENCODING_UCS4_3412: 1536: break; 1537: case XML_CHAR_ENCODING_UCS2: 1538: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1539: if (handler != NULL) return(handler); 1540: handler = xmlFindCharEncodingHandler("UCS-2"); 1541: if (handler != NULL) return(handler); 1542: handler = xmlFindCharEncodingHandler("UCS2"); 1543: if (handler != NULL) return(handler); 1544: break; 1545: 1546: /* 1547: * We used to keep ISO Latin encodings native in the 1548: * generated data. This led to so many problems that 1549: * this has been removed. One can still change this 1550: * back by registering no-ops encoders for those 1551: */ 1552: case XML_CHAR_ENCODING_8859_1: 1553: handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1554: if (handler != NULL) return(handler); 1555: break; 1556: case XML_CHAR_ENCODING_8859_2: 1557: handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1558: if (handler != NULL) return(handler); 1559: break; 1560: case XML_CHAR_ENCODING_8859_3: 1561: handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1562: if (handler != NULL) return(handler); 1563: break; 1564: case XML_CHAR_ENCODING_8859_4: 1565: handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1566: if (handler != NULL) return(handler); 1567: break; 1568: case XML_CHAR_ENCODING_8859_5: 1569: handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1570: if (handler != NULL) return(handler); 1571: break; 1572: case XML_CHAR_ENCODING_8859_6: 1573: handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1574: if (handler != NULL) return(handler); 1575: break; 1576: case XML_CHAR_ENCODING_8859_7: 1577: handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1578: if (handler != NULL) return(handler); 1579: break; 1580: case XML_CHAR_ENCODING_8859_8: 1581: handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1582: if (handler != NULL) return(handler); 1583: break; 1584: case XML_CHAR_ENCODING_8859_9: 1585: handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1586: if (handler != NULL) return(handler); 1587: break; 1588: 1589: 1590: case XML_CHAR_ENCODING_2022_JP: 1591: handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1592: if (handler != NULL) return(handler); 1593: break; 1594: case XML_CHAR_ENCODING_SHIFT_JIS: 1595: handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1596: if (handler != NULL) return(handler); 1597: handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1598: if (handler != NULL) return(handler); 1599: handler = xmlFindCharEncodingHandler("Shift_JIS"); 1600: if (handler != NULL) return(handler); 1601: break; 1602: case XML_CHAR_ENCODING_EUC_JP: 1603: handler = xmlFindCharEncodingHandler("EUC-JP"); 1604: if (handler != NULL) return(handler); 1605: break; 1606: default: 1607: break; 1608: } 1609: 1610: #ifdef DEBUG_ENCODING 1611: xmlGenericError(xmlGenericErrorContext, 1612: "No handler found for encoding %d\n", enc); 1613: #endif 1614: return(NULL); 1615: } 1616: 1617: /** 1618: * xmlFindCharEncodingHandler: 1619: * @name: a string describing the char encoding. 1620: * 1621: * Search in the registered set the handler able to read/write that encoding. 1622: * 1623: * Returns the handler or NULL if not found 1624: */ 1625: xmlCharEncodingHandlerPtr 1626: xmlFindCharEncodingHandler(const char *name) { 1627: const char *nalias; 1628: const char *norig; 1629: xmlCharEncoding alias; 1630: #ifdef LIBXML_ICONV_ENABLED 1631: xmlCharEncodingHandlerPtr enc; 1632: iconv_t icv_in, icv_out; 1633: #endif /* LIBXML_ICONV_ENABLED */ 1634: #ifdef LIBXML_ICU_ENABLED 1635: xmlCharEncodingHandlerPtr encu; 1636: uconv_t *ucv_in, *ucv_out; 1637: #endif /* LIBXML_ICU_ENABLED */ 1638: char upper[100]; 1639: int i; 1640: 1641: if (handlers == NULL) xmlInitCharEncodingHandlers(); 1642: if (name == NULL) return(xmlDefaultCharEncodingHandler); 1643: if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1644: 1645: /* 1646: * Do the alias resolution 1647: */ 1648: norig = name; 1649: nalias = xmlGetEncodingAlias(name); 1650: if (nalias != NULL) 1651: name = nalias; 1652: 1653: /* 1654: * Check first for directly registered encoding names 1655: */ 1656: for (i = 0;i < 99;i++) { 1657: upper[i] = toupper(name[i]); 1658: if (upper[i] == 0) break; 1659: } 1660: upper[i] = 0; 1661: 1662: if (handlers != NULL) { 1663: for (i = 0;i < nbCharEncodingHandler; i++) { 1664: if (!strcmp(upper, handlers[i]->name)) { 1665: #ifdef DEBUG_ENCODING 1666: xmlGenericError(xmlGenericErrorContext, 1667: "Found registered handler for encoding %s\n", name); 1668: #endif 1669: return(handlers[i]); 1670: } 1671: } 1672: } 1673: 1674: #ifdef LIBXML_ICONV_ENABLED 1675: /* check whether iconv can handle this */ 1676: icv_in = iconv_open("UTF-8", name); 1677: icv_out = iconv_open(name, "UTF-8"); 1678: if (icv_in == (iconv_t) -1) { 1679: icv_in = iconv_open("UTF-8", upper); 1680: } 1681: if (icv_out == (iconv_t) -1) { 1682: icv_out = iconv_open(upper, "UTF-8"); 1683: } 1684: if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1685: enc = (xmlCharEncodingHandlerPtr) 1686: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1687: if (enc == NULL) { 1688: iconv_close(icv_in); 1689: iconv_close(icv_out); 1690: return(NULL); 1691: } 1692: memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1693: enc->name = xmlMemStrdup(name); 1694: enc->input = NULL; 1695: enc->output = NULL; 1696: enc->iconv_in = icv_in; 1697: enc->iconv_out = icv_out; 1698: #ifdef DEBUG_ENCODING 1699: xmlGenericError(xmlGenericErrorContext, 1700: "Found iconv handler for encoding %s\n", name); 1701: #endif 1702: return enc; 1703: } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1704: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1705: "iconv : problems with filters for '%s'\n", name); 1706: } 1707: #endif /* LIBXML_ICONV_ENABLED */ 1708: #ifdef LIBXML_ICU_ENABLED 1709: /* check whether icu can handle this */ 1710: ucv_in = openIcuConverter(name, 1); 1711: ucv_out = openIcuConverter(name, 0); 1712: if (ucv_in != NULL && ucv_out != NULL) { 1713: encu = (xmlCharEncodingHandlerPtr) 1714: xmlMalloc(sizeof(xmlCharEncodingHandler)); 1715: if (encu == NULL) { 1716: closeIcuConverter(ucv_in); 1717: closeIcuConverter(ucv_out); 1718: return(NULL); 1719: } 1720: memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1721: encu->name = xmlMemStrdup(name); 1722: encu->input = NULL; 1723: encu->output = NULL; 1724: encu->uconv_in = ucv_in; 1725: encu->uconv_out = ucv_out; 1726: #ifdef DEBUG_ENCODING 1727: xmlGenericError(xmlGenericErrorContext, 1728: "Found ICU converter handler for encoding %s\n", name); 1729: #endif 1730: return encu; 1731: } else if (ucv_in != NULL || ucv_out != NULL) { 1732: closeIcuConverter(ucv_in); 1733: closeIcuConverter(ucv_out); 1734: xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1735: "ICU converter : problems with filters for '%s'\n", name); 1736: } 1737: #endif /* LIBXML_ICU_ENABLED */ 1738: 1739: #ifdef DEBUG_ENCODING 1740: xmlGenericError(xmlGenericErrorContext, 1741: "No handler found for encoding %s\n", name); 1742: #endif 1743: 1744: /* 1745: * Fallback using the canonical names 1746: */ 1747: alias = xmlParseCharEncoding(norig); 1748: if (alias != XML_CHAR_ENCODING_ERROR) { 1749: const char* canon; 1750: canon = xmlGetCharEncodingName(alias); 1751: if ((canon != NULL) && (strcmp(name, canon))) { 1752: return(xmlFindCharEncodingHandler(canon)); 1753: } 1754: } 1755: 1756: /* If "none of the above", give up */ 1757: return(NULL); 1758: } 1759: 1760: /************************************************************************ 1761: * * 1762: * ICONV based generic conversion functions * 1763: * * 1764: ************************************************************************/ 1765: 1766: #ifdef LIBXML_ICONV_ENABLED 1767: /** 1768: * xmlIconvWrapper: 1769: * @cd: iconv converter data structure 1770: * @out: a pointer to an array of bytes to store the result 1771: * @outlen: the length of @out 1772: * @in: a pointer to an array of ISO Latin 1 chars 1773: * @inlen: the length of @in 1774: * 1775: * Returns 0 if success, or 1776: * -1 by lack of space, or 1777: * -2 if the transcoding fails (for *in is not valid utf8 string or 1778: * the result of transformation can't fit into the encoding we want), or 1779: * -3 if there the last byte can't form a single output char. 1780: * 1781: * The value of @inlen after return is the number of octets consumed 1782: * as the return value is positive, else unpredictable. 1783: * The value of @outlen after return is the number of ocetes consumed. 1784: */ 1785: static int 1786: xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1787: const unsigned char *in, int *inlen) { 1788: size_t icv_inlen, icv_outlen; 1789: const char *icv_in = (const char *) in; 1790: char *icv_out = (char *) out; 1791: int ret; 1792: 1793: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1794: if (outlen != NULL) *outlen = 0; 1795: return(-1); 1796: } 1797: icv_inlen = *inlen; 1798: icv_outlen = *outlen; 1799: ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1800: *inlen -= icv_inlen; 1801: *outlen -= icv_outlen; 1802: if ((icv_inlen != 0) || (ret == -1)) { 1803: #ifdef EILSEQ 1804: if (errno == EILSEQ) { 1805: return -2; 1806: } else 1807: #endif 1808: #ifdef E2BIG 1809: if (errno == E2BIG) { 1810: return -1; 1811: } else 1812: #endif 1813: #ifdef EINVAL 1814: if (errno == EINVAL) { 1815: return -3; 1816: } else 1817: #endif 1818: { 1819: return -3; 1820: } 1821: } 1822: return 0; 1823: } 1824: #endif /* LIBXML_ICONV_ENABLED */ 1825: 1826: /************************************************************************ 1827: * * 1828: * ICU based generic conversion functions * 1829: * * 1830: ************************************************************************/ 1831: 1832: #ifdef LIBXML_ICU_ENABLED 1833: /** 1834: * xmlUconvWrapper: 1835: * @cd: ICU uconverter data structure 1836: * @toUnicode : non-zero if toUnicode. 0 otherwise. 1837: * @out: a pointer to an array of bytes to store the result 1838: * @outlen: the length of @out 1839: * @in: a pointer to an array of ISO Latin 1 chars 1840: * @inlen: the length of @in 1841: * 1842: * Returns 0 if success, or 1843: * -1 by lack of space, or 1844: * -2 if the transcoding fails (for *in is not valid utf8 string or 1845: * the result of transformation can't fit into the encoding we want), or 1846: * -3 if there the last byte can't form a single output char. 1847: * 1848: * The value of @inlen after return is the number of octets consumed 1849: * as the return value is positive, else unpredictable. 1850: * The value of @outlen after return is the number of ocetes consumed. 1851: */ 1852: static int 1853: xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1854: const unsigned char *in, int *inlen) { 1855: const char *ucv_in = (const char *) in; 1856: char *ucv_out = (char *) out; 1857: UErrorCode err = U_ZERO_ERROR; 1858: 1859: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1860: if (outlen != NULL) *outlen = 0; 1861: return(-1); 1862: } 1863: 1864: /* 1865: * TODO(jungshik) 1866: * 1. is ucnv_convert(To|From)Algorithmic better? 1867: * 2. had we better use an explicit pivot buffer? 1868: * 3. error returned comes from 'fromUnicode' only even 1869: * when toUnicode is true ! 1870: */ 1871: if (toUnicode) { 1872: /* encoding => UTF-16 => UTF-8 */ 1873: ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1874: &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1875: 0, TRUE, &err); 1876: } else { 1877: /* UTF-8 => UTF-16 => encoding */ 1878: ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1879: &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1880: 0, TRUE, &err); 1881: } 1882: *inlen = ucv_in - (const char*) in; 1883: *outlen = ucv_out - (char *) out; 1884: if (U_SUCCESS(err)) 1885: return 0; 1886: if (err == U_BUFFER_OVERFLOW_ERROR) 1887: return -1; 1888: if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1889: return -2; 1890: /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1891: return -3; 1892: } 1893: #endif /* LIBXML_ICU_ENABLED */ 1894: 1895: /************************************************************************ 1896: * * 1897: * The real API used by libxml for on-the-fly conversion * 1898: * * 1899: ************************************************************************/ 1900: int 1901: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1902: xmlBufferPtr in, int len); 1903: 1904: /** 1905: * xmlCharEncFirstLineInt: 1906: * @handler: char enconding transformation data structure 1907: * @out: an xmlBuffer for the output. 1908: * @in: an xmlBuffer for the input 1909: * @len: number of bytes to convert for the first line, or -1 1910: * 1911: * Front-end for the encoding handler input function, but handle only 1912: * the very first line, i.e. limit itself to 45 chars. 1913: * 1914: * Returns the number of byte written if success, or 1915: * -1 general error 1916: * -2 if the transcoding fails (for *in is not valid utf8 string or 1917: * the result of transformation can't fit into the encoding we want), or 1918: */ 1919: int 1920: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1921: xmlBufferPtr in, int len) { 1922: int ret = -2; 1923: int written; 1924: int toconv; 1925: 1926: if (handler == NULL) return(-1); 1927: if (out == NULL) return(-1); 1928: if (in == NULL) return(-1); 1929: 1930: /* calculate space available */ 1931: written = out->size - out->use - 1; /* count '\0' */ 1932: toconv = in->use; 1933: /* 1934: * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1935: * 45 chars should be sufficient to reach the end of the encoding 1936: * declaration without going too far inside the document content. 1937: * on UTF-16 this means 90bytes, on UCS4 this means 180 1938: * The actual value depending on guessed encoding is passed as @len 1939: * if provided 1940: */ 1941: if (len >= 0) { 1942: if (toconv > len) 1943: toconv = len; 1944: } else { 1945: if (toconv > 180) 1946: toconv = 180; 1947: } 1948: if (toconv * 2 >= written) { 1949: xmlBufferGrow(out, toconv); 1950: written = out->size - out->use - 1; 1951: } 1952: 1953: if (handler->input != NULL) { 1954: ret = handler->input(&out->content[out->use], &written, 1955: in->content, &toconv); 1956: xmlBufferShrink(in, toconv); 1957: out->use += written; 1958: out->content[out->use] = 0; 1959: } 1960: #ifdef LIBXML_ICONV_ENABLED 1961: else if (handler->iconv_in != NULL) { 1962: ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1963: &written, in->content, &toconv); 1964: xmlBufferShrink(in, toconv); 1965: out->use += written; 1966: out->content[out->use] = 0; 1967: if (ret == -1) ret = -3; 1968: } 1969: #endif /* LIBXML_ICONV_ENABLED */ 1970: #ifdef LIBXML_ICU_ENABLED 1971: else if (handler->uconv_in != NULL) { 1972: ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1973: &written, in->content, &toconv); 1974: xmlBufferShrink(in, toconv); 1975: out->use += written; 1976: out->content[out->use] = 0; 1977: if (ret == -1) ret = -3; 1978: } 1979: #endif /* LIBXML_ICU_ENABLED */ 1980: #ifdef DEBUG_ENCODING 1981: switch (ret) { 1982: case 0: 1983: xmlGenericError(xmlGenericErrorContext, 1984: "converted %d bytes to %d bytes of input\n", 1985: toconv, written); 1986: break; 1987: case -1: 1988: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1989: toconv, written, in->use); 1990: break; 1991: case -2: 1992: xmlGenericError(xmlGenericErrorContext, 1993: "input conversion failed due to input error\n"); 1994: break; 1995: case -3: 1996: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1997: toconv, written, in->use); 1998: break; 1999: default: 2000: xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2001: } 2002: #endif /* DEBUG_ENCODING */ 2003: /* 2004: * Ignore when input buffer is not on a boundary 2005: */ 2006: if (ret == -3) ret = 0; 2007: if (ret == -1) ret = 0; 2008: return(ret); 2009: } 2010: 2011: /** 2012: * xmlCharEncFirstLine: 2013: * @handler: char enconding transformation data structure 2014: * @out: an xmlBuffer for the output. 2015: * @in: an xmlBuffer for the input 2016: * 2017: * Front-end for the encoding handler input function, but handle only 2018: * the very first line, i.e. limit itself to 45 chars. 2019: * 2020: * Returns the number of byte written if success, or 2021: * -1 general error 2022: * -2 if the transcoding fails (for *in is not valid utf8 string or 2023: * the result of transformation can't fit into the encoding we want), or 2024: */ 2025: int 2026: xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2027: xmlBufferPtr in) { 2028: return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2029: } 2030: 2031: /** 2032: * xmlCharEncInFunc: 2033: * @handler: char encoding transformation data structure 2034: * @out: an xmlBuffer for the output. 2035: * @in: an xmlBuffer for the input 2036: * 2037: * Generic front-end for the encoding handler input function 2038: * 2039: * Returns the number of byte written if success, or 2040: * -1 general error 2041: * -2 if the transcoding fails (for *in is not valid utf8 string or 2042: * the result of transformation can't fit into the encoding we want), or 2043: */ 2044: int 2045: xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2046: xmlBufferPtr in) 2047: { 2048: int ret = -2; 2049: int written; 2050: int toconv; 2051: 2052: if (handler == NULL) 2053: return (-1); 2054: if (out == NULL) 2055: return (-1); 2056: if (in == NULL) 2057: return (-1); 2058: 2059: toconv = in->use; 2060: if (toconv == 0) 2061: return (0); 2062: written = out->size - out->use -1; /* count '\0' */ 2063: if (toconv * 2 >= written) { 2064: xmlBufferGrow(out, out->size + toconv * 2); 2065: written = out->size - out->use - 1; 2066: } 2067: if (handler->input != NULL) { 2068: ret = handler->input(&out->content[out->use], &written, 2069: in->content, &toconv); 2070: xmlBufferShrink(in, toconv); 2071: out->use += written; 2072: out->content[out->use] = 0; 2073: } 2074: #ifdef LIBXML_ICONV_ENABLED 2075: else if (handler->iconv_in != NULL) { 2076: ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2077: &written, in->content, &toconv); 2078: xmlBufferShrink(in, toconv); 2079: out->use += written; 2080: out->content[out->use] = 0; 2081: if (ret == -1) 2082: ret = -3; 2083: } 2084: #endif /* LIBXML_ICONV_ENABLED */ 2085: #ifdef LIBXML_ICU_ENABLED 2086: else if (handler->uconv_in != NULL) { 2087: ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2088: &written, in->content, &toconv); 2089: xmlBufferShrink(in, toconv); 2090: out->use += written; 2091: out->content[out->use] = 0; 2092: if (ret == -1) 2093: ret = -3; 2094: } 2095: #endif /* LIBXML_ICU_ENABLED */ 2096: switch (ret) { 2097: case 0: 2098: #ifdef DEBUG_ENCODING 2099: xmlGenericError(xmlGenericErrorContext, 2100: "converted %d bytes to %d bytes of input\n", 2101: toconv, written); 2102: #endif 2103: break; 2104: case -1: 2105: #ifdef DEBUG_ENCODING 2106: xmlGenericError(xmlGenericErrorContext, 2107: "converted %d bytes to %d bytes of input, %d left\n", 2108: toconv, written, in->use); 2109: #endif 2110: break; 2111: case -3: 2112: #ifdef DEBUG_ENCODING 2113: xmlGenericError(xmlGenericErrorContext, 2114: "converted %d bytes to %d bytes of input, %d left\n", 2115: toconv, written, in->use); 2116: #endif 2117: break; 2118: case -2: { 2119: char buf[50]; 2120: 2121: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2122: in->content[0], in->content[1], 2123: in->content[2], in->content[3]); 2124: buf[49] = 0; 2125: xmlEncodingErr(XML_I18N_CONV_FAILED, 2126: "input conversion failed due to input error, bytes %s\n", 2127: buf); 2128: } 2129: } 2130: /* 2131: * Ignore when input buffer is not on a boundary 2132: */ 2133: if (ret == -3) 2134: ret = 0; 2135: return (written? written : ret); 2136: } 2137: 2138: /** 2139: * xmlCharEncOutFunc: 2140: * @handler: char enconding transformation data structure 2141: * @out: an xmlBuffer for the output. 2142: * @in: an xmlBuffer for the input 2143: * 2144: * Generic front-end for the encoding handler output function 2145: * a first call with @in == NULL has to be made firs to initiate the 2146: * output in case of non-stateless encoding needing to initiate their 2147: * state or the output (like the BOM in UTF16). 2148: * In case of UTF8 sequence conversion errors for the given encoder, 2149: * the content will be automatically remapped to a CharRef sequence. 2150: * 2151: * Returns the number of byte written if success, or 2152: * -1 general error 2153: * -2 if the transcoding fails (for *in is not valid utf8 string or 2154: * the result of transformation can't fit into the encoding we want), or 2155: */ 2156: int 2157: xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2158: xmlBufferPtr in) { 2159: int ret = -2; 2160: int written; 2161: int writtentot = 0; 2162: int toconv; 2163: int output = 0; 2164: int charref_len = 0; 2165: 2166: if (handler == NULL) return(-1); 2167: if (out == NULL) return(-1); 2168: 2169: retry: 2170: 2171: written = out->size - out->use; 2172: 2173: if (written > 0) 2174: written--; /* Gennady: count '/0' */ 2175: 2176: /* 2177: * First specific handling of in = NULL, i.e. the initialization call 2178: */ 2179: if (in == NULL) { 2180: toconv = 0; 2181: if (handler->output != NULL) { 2182: ret = handler->output(&out->content[out->use], &written, 2183: NULL, &toconv); 2184: if (ret >= 0) { /* Gennady: check return value */ 2185: out->use += written; 2186: out->content[out->use] = 0; 2187: } 2188: } 2189: #ifdef LIBXML_ICONV_ENABLED 2190: else if (handler->iconv_out != NULL) { 2191: ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2192: &written, NULL, &toconv); 2193: out->use += written; 2194: out->content[out->use] = 0; 2195: } 2196: #endif /* LIBXML_ICONV_ENABLED */ 2197: #ifdef LIBXML_ICU_ENABLED 2198: else if (handler->uconv_out != NULL) { 2199: ret = xmlUconvWrapper(handler->uconv_out, 0, 2200: &out->content[out->use], 2201: &written, NULL, &toconv); 2202: out->use += written; 2203: out->content[out->use] = 0; 2204: } 2205: #endif /* LIBXML_ICU_ENABLED */ 2206: #ifdef DEBUG_ENCODING 2207: xmlGenericError(xmlGenericErrorContext, 2208: "initialized encoder\n"); 2209: #endif 2210: return(0); 2211: } 2212: 2213: /* 2214: * Conversion itself. 2215: */ 2216: toconv = in->use; 2217: if (toconv == 0) 2218: return(0); 2219: if (toconv * 4 >= written) { 2220: xmlBufferGrow(out, toconv * 4); 2221: written = out->size - out->use - 1; 2222: } 2223: if (handler->output != NULL) { 2224: ret = handler->output(&out->content[out->use], &written, 2225: in->content, &toconv); 2226: if (written > 0) { 2227: xmlBufferShrink(in, toconv); 2228: out->use += written; 2229: writtentot += written; 2230: } 2231: out->content[out->use] = 0; 2232: } 2233: #ifdef LIBXML_ICONV_ENABLED 2234: else if (handler->iconv_out != NULL) { 2235: ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2236: &written, in->content, &toconv); 2237: xmlBufferShrink(in, toconv); 2238: out->use += written; 2239: writtentot += written; 2240: out->content[out->use] = 0; 2241: if (ret == -1) { 2242: if (written > 0) { 2243: /* 2244: * Can be a limitation of iconv 2245: */ 2246: charref_len = 0; 2247: goto retry; 2248: } 2249: ret = -3; 2250: } 2251: } 2252: #endif /* LIBXML_ICONV_ENABLED */ 2253: #ifdef LIBXML_ICU_ENABLED 2254: else if (handler->uconv_out != NULL) { 2255: ret = xmlUconvWrapper(handler->uconv_out, 0, 2256: &out->content[out->use], 2257: &written, in->content, &toconv); 2258: xmlBufferShrink(in, toconv); 2259: out->use += written; 2260: writtentot += written; 2261: out->content[out->use] = 0; 2262: if (ret == -1) { 2263: if (written > 0) { 2264: /* 2265: * Can be a limitation of iconv 2266: */ 2267: charref_len = 0; 2268: goto retry; 2269: } 2270: ret = -3; 2271: } 2272: } 2273: #endif /* LIBXML_ICU_ENABLED */ 2274: else { 2275: xmlEncodingErr(XML_I18N_NO_OUTPUT, 2276: "xmlCharEncOutFunc: no output function !\n", NULL); 2277: return(-1); 2278: } 2279: 2280: if (ret >= 0) output += ret; 2281: 2282: /* 2283: * Attempt to handle error cases 2284: */ 2285: switch (ret) { 2286: case 0: 2287: #ifdef DEBUG_ENCODING 2288: xmlGenericError(xmlGenericErrorContext, 2289: "converted %d bytes to %d bytes of output\n", 2290: toconv, written); 2291: #endif 2292: break; 2293: case -1: 2294: #ifdef DEBUG_ENCODING 2295: xmlGenericError(xmlGenericErrorContext, 2296: "output conversion failed by lack of space\n"); 2297: #endif 2298: break; 2299: case -3: 2300: #ifdef DEBUG_ENCODING 2301: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2302: toconv, written, in->use); 2303: #endif 2304: break; 2305: case -2: { 2306: int len = in->use; 2307: const xmlChar *utf = (const xmlChar *) in->content; 2308: int cur; 2309: 2310: cur = xmlGetUTF8Char(utf, &len); 2311: if ((charref_len != 0) && (written < charref_len)) { 2312: /* 2313: * We attempted to insert a character reference and failed. 2314: * Undo what was written and skip the remaining charref. 2315: */ 2316: out->use -= written; 2317: writtentot -= written; 2318: xmlBufferShrink(in, charref_len - written); 2319: charref_len = 0; 2320: 2321: ret = -1; 2322: break; 2323: } else if (cur > 0) { 2324: xmlChar charref[20]; 2325: 2326: #ifdef DEBUG_ENCODING 2327: xmlGenericError(xmlGenericErrorContext, 2328: "handling output conversion error\n"); 2329: xmlGenericError(xmlGenericErrorContext, 2330: "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2331: in->content[0], in->content[1], 2332: in->content[2], in->content[3]); 2333: #endif 2334: /* 2335: * Removes the UTF8 sequence, and replace it by a charref 2336: * and continue the transcoding phase, hoping the error 2337: * did not mangle the encoder state. 2338: */ 2339: charref_len = snprintf((char *) &charref[0], sizeof(charref), 2340: "&#%d;", cur); 2341: xmlBufferShrink(in, len); 2342: xmlBufferAddHead(in, charref, -1); 2343: 2344: goto retry; 2345: } else { 2346: char buf[50]; 2347: 2348: snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2349: in->content[0], in->content[1], 2350: in->content[2], in->content[3]); 2351: buf[49] = 0; 2352: xmlEncodingErr(XML_I18N_CONV_FAILED, 2353: "output conversion failed due to conv error, bytes %s\n", 2354: buf); 2355: if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2356: in->content[0] = ' '; 2357: } 2358: break; 2359: } 2360: } 2361: return(ret); 2362: } 2363: 2364: /** 2365: * xmlCharEncCloseFunc: 2366: * @handler: char enconding transformation data structure 2367: * 2368: * Generic front-end for encoding handler close function 2369: * 2370: * Returns 0 if success, or -1 in case of error 2371: */ 2372: int 2373: xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2374: int ret = 0; 2375: int tofree = 0; 2376: if (handler == NULL) return(-1); 2377: if (handler->name == NULL) return(-1); 2378: #ifdef LIBXML_ICONV_ENABLED 2379: /* 2380: * Iconv handlers can be used only once, free the whole block. 2381: * and the associated icon resources. 2382: */ 2383: if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2384: tofree = 1; 2385: if (handler->iconv_out != NULL) { 2386: if (iconv_close(handler->iconv_out)) 2387: ret = -1; 2388: handler->iconv_out = NULL; 2389: } 2390: if (handler->iconv_in != NULL) { 2391: if (iconv_close(handler->iconv_in)) 2392: ret = -1; 2393: handler->iconv_in = NULL; 2394: } 2395: } 2396: #endif /* LIBXML_ICONV_ENABLED */ 2397: #ifdef LIBXML_ICU_ENABLED 2398: if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { 2399: tofree = 1; 2400: if (handler->uconv_out != NULL) { 2401: closeIcuConverter(handler->uconv_out); 2402: handler->uconv_out = NULL; 2403: } 2404: if (handler->uconv_in != NULL) { 2405: closeIcuConverter(handler->uconv_in); 2406: handler->uconv_in = NULL; 2407: } 2408: } 2409: #endif 2410: if (tofree) { 2411: /* free up only dynamic handlers iconv/uconv */ 2412: if (handler->name != NULL) 2413: xmlFree(handler->name); 2414: handler->name = NULL; 2415: xmlFree(handler); 2416: } 2417: #ifdef DEBUG_ENCODING 2418: if (ret) 2419: xmlGenericError(xmlGenericErrorContext, 2420: "failed to close the encoding handler\n"); 2421: else 2422: xmlGenericError(xmlGenericErrorContext, 2423: "closed the encoding handler\n"); 2424: #endif 2425: 2426: return(ret); 2427: } 2428: 2429: /** 2430: * xmlByteConsumed: 2431: * @ctxt: an XML parser context 2432: * 2433: * This function provides the current index of the parser relative 2434: * to the start of the current entity. This function is computed in 2435: * bytes from the beginning starting at zero and finishing at the 2436: * size in byte of the file if parsing a file. The function is 2437: * of constant cost if the input is UTF-8 but can be costly if run 2438: * on non-UTF-8 input. 2439: * 2440: * Returns the index in bytes from the beginning of the entity or -1 2441: * in case the index could not be computed. 2442: */ 2443: long 2444: xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2445: xmlParserInputPtr in; 2446: 2447: if (ctxt == NULL) return(-1); 2448: in = ctxt->input; 2449: if (in == NULL) return(-1); 2450: if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2451: unsigned int unused = 0; 2452: xmlCharEncodingHandler * handler = in->buf->encoder; 2453: /* 2454: * Encoding conversion, compute the number of unused original 2455: * bytes from the input not consumed and substract that from 2456: * the raw consumed value, this is not a cheap operation 2457: */ 2458: if (in->end - in->cur > 0) { 2459: unsigned char convbuf[32000]; 2460: const unsigned char *cur = (const unsigned char *)in->cur; 2461: int toconv = in->end - in->cur, written = 32000; 2462: 2463: int ret; 2464: 2465: if (handler->output != NULL) { 2466: do { 2467: toconv = in->end - cur; 2468: written = 32000; 2469: ret = handler->output(&convbuf[0], &written, 2470: cur, &toconv); 2471: if (ret == -1) return(-1); 2472: unused += written; 2473: cur += toconv; 2474: } while (ret == -2); 2475: #ifdef LIBXML_ICONV_ENABLED 2476: } else if (handler->iconv_out != NULL) { 2477: do { 2478: toconv = in->end - cur; 2479: written = 32000; 2480: ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2481: &written, cur, &toconv); 2482: if (ret < 0) { 2483: if (written > 0) 2484: ret = -2; 2485: else 2486: return(-1); 2487: } 2488: unused += written; 2489: cur += toconv; 2490: } while (ret == -2); 2491: #endif 2492: #ifdef LIBXML_ICU_ENABLED 2493: } else if (handler->uconv_out != NULL) { 2494: do { 2495: toconv = in->end - cur; 2496: written = 32000; 2497: ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 2498: &written, cur, &toconv); 2499: if (ret < 0) { 2500: if (written > 0) 2501: ret = -2; 2502: else 2503: return(-1); 2504: } 2505: unused += written; 2506: cur += toconv; 2507: } while (ret == -2); 2508: #endif 2509: } else { 2510: /* could not find a converter */ 2511: return(-1); 2512: } 2513: } 2514: if (in->buf->rawconsumed < unused) 2515: return(-1); 2516: return(in->buf->rawconsumed - unused); 2517: } 2518: return(in->consumed + (in->cur - in->base)); 2519: } 2520: 2521: #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 2522: #ifdef LIBXML_ISO8859X_ENABLED 2523: 2524: /** 2525: * UTF8ToISO8859x: 2526: * @out: a pointer to an array of bytes to store the result 2527: * @outlen: the length of @out 2528: * @in: a pointer to an array of UTF-8 chars 2529: * @inlen: the length of @in 2530: * @xlattable: the 2-level transcoding table 2531: * 2532: * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2533: * block of chars out. 2534: * 2535: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2536: * The value of @inlen after return is the number of octets consumed 2537: * as the return value is positive, else unpredictable. 2538: * The value of @outlen after return is the number of ocetes consumed. 2539: */ 2540: static int 2541: UTF8ToISO8859x(unsigned char* out, int *outlen, 2542: const unsigned char* in, int *inlen, 2543: unsigned char const *xlattable) { 2544: const unsigned char* outstart = out; 2545: const unsigned char* inend; 2546: const unsigned char* instart = in; 2547: const unsigned char* processed = in; 2548: 2549: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2550: (xlattable == NULL)) 2551: return(-1); 2552: if (in == NULL) { 2553: /* 2554: * initialization nothing to do 2555: */ 2556: *outlen = 0; 2557: *inlen = 0; 2558: return(0); 2559: } 2560: inend = in + (*inlen); 2561: while (in < inend) { 2562: unsigned char d = *in++; 2563: if (d < 0x80) { 2564: *out++ = d; 2565: } else if (d < 0xC0) { 2566: /* trailing byte in leading position */ 2567: *outlen = out - outstart; 2568: *inlen = processed - instart; 2569: return(-2); 2570: } else if (d < 0xE0) { 2571: unsigned char c; 2572: if (!(in < inend)) { 2573: /* trailing byte not in input buffer */ 2574: *outlen = out - outstart; 2575: *inlen = processed - instart; 2576: return(-3); 2577: } 2578: c = *in++; 2579: if ((c & 0xC0) != 0x80) { 2580: /* not a trailing byte */ 2581: *outlen = out - outstart; 2582: *inlen = processed - instart; 2583: return(-2); 2584: } 2585: c = c & 0x3F; 2586: d = d & 0x1F; 2587: d = xlattable [48 + c + xlattable [d] * 64]; 2588: if (d == 0) { 2589: /* not in character set */ 2590: *outlen = out - outstart; 2591: *inlen = processed - instart; 2592: return(-2); 2593: } 2594: *out++ = d; 2595: } else if (d < 0xF0) { 2596: unsigned char c1; 2597: unsigned char c2; 2598: if (!(in < inend - 1)) { 2599: /* trailing bytes not in input buffer */ 2600: *outlen = out - outstart; 2601: *inlen = processed - instart; 2602: return(-3); 2603: } 2604: c1 = *in++; 2605: if ((c1 & 0xC0) != 0x80) { 2606: /* not a trailing byte (c1) */ 2607: *outlen = out - outstart; 2608: *inlen = processed - instart; 2609: return(-2); 2610: } 2611: c2 = *in++; 2612: if ((c2 & 0xC0) != 0x80) { 2613: /* not a trailing byte (c2) */ 2614: *outlen = out - outstart; 2615: *inlen = processed - instart; 2616: return(-2); 2617: } 2618: c1 = c1 & 0x3F; 2619: c2 = c2 & 0x3F; 2620: d = d & 0x0F; 2621: d = xlattable [48 + c2 + xlattable [48 + c1 + 2622: xlattable [32 + d] * 64] * 64]; 2623: if (d == 0) { 2624: /* not in character set */ 2625: *outlen = out - outstart; 2626: *inlen = processed - instart; 2627: return(-2); 2628: } 2629: *out++ = d; 2630: } else { 2631: /* cannot transcode >= U+010000 */ 2632: *outlen = out - outstart; 2633: *inlen = processed - instart; 2634: return(-2); 2635: } 2636: processed = in; 2637: } 2638: *outlen = out - outstart; 2639: *inlen = processed - instart; 2640: return(*outlen); 2641: } 2642: 2643: /** 2644: * ISO8859xToUTF8 2645: * @out: a pointer to an array of bytes to store the result 2646: * @outlen: the length of @out 2647: * @in: a pointer to an array of ISO Latin 1 chars 2648: * @inlen: the length of @in 2649: * 2650: * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2651: * block of chars out. 2652: * Returns 0 if success, or -1 otherwise 2653: * The value of @inlen after return is the number of octets consumed 2654: * The value of @outlen after return is the number of ocetes produced. 2655: */ 2656: static int 2657: ISO8859xToUTF8(unsigned char* out, int *outlen, 2658: const unsigned char* in, int *inlen, 2659: unsigned short const *unicodetable) { 2660: unsigned char* outstart = out; 2661: unsigned char* outend; 2662: const unsigned char* instart = in; 2663: const unsigned char* inend; 2664: const unsigned char* instop; 2665: unsigned int c; 2666: 2667: if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2668: (in == NULL) || (unicodetable == NULL)) 2669: return(-1); 2670: outend = out + *outlen; 2671: inend = in + *inlen; 2672: instop = inend; 2673: 2674: while ((in < inend) && (out < outend - 2)) { 2675: if (*in >= 0x80) { 2676: c = unicodetable [*in - 0x80]; 2677: if (c == 0) { 2678: /* undefined code point */ 2679: *outlen = out - outstart; 2680: *inlen = in - instart; 2681: return (-1); 2682: } 2683: if (c < 0x800) { 2684: *out++ = ((c >> 6) & 0x1F) | 0xC0; 2685: *out++ = (c & 0x3F) | 0x80; 2686: } else { 2687: *out++ = ((c >> 12) & 0x0F) | 0xE0; 2688: *out++ = ((c >> 6) & 0x3F) | 0x80; 2689: *out++ = (c & 0x3F) | 0x80; 2690: } 2691: ++in; 2692: } 2693: if (instop - in > outend - out) instop = in + (outend - out); 2694: while ((*in < 0x80) && (in < instop)) { 2695: *out++ = *in++; 2696: } 2697: } 2698: if ((in < inend) && (out < outend) && (*in < 0x80)) { 2699: *out++ = *in++; 2700: } 2701: if ((in < inend) && (out < outend) && (*in < 0x80)) { 2702: *out++ = *in++; 2703: } 2704: *outlen = out - outstart; 2705: *inlen = in - instart; 2706: return (*outlen); 2707: } 2708: 2709: 2710: /************************************************************************ 2711: * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 2712: ************************************************************************/ 2713: 2714: static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 2715: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2716: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2717: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2718: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2719: 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 2720: 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 2721: 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 2722: 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 2723: 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 2724: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 2725: 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 2726: 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 2727: 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 2728: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 2729: 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 2730: 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 2731: }; 2732: 2733: static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 2734: "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2735: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2736: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2737: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2738: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2739: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2740: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2741: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2742: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2743: "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2744: "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2745: "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 2746: "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 2747: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2748: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 2749: "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2750: "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 2751: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2752: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2753: "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 2754: "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 2755: "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 2756: "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 2757: "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2758: "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 2759: "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 2760: "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 2761: }; 2762: 2763: static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 2764: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2765: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2766: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2767: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2768: 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 2769: 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 2770: 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 2771: 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 2772: 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 2773: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2774: 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 2775: 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 2776: 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 2777: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2778: 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 2779: 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 2780: }; 2781: 2782: static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 2783: "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2784: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2785: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2786: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2787: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2788: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2789: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2790: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2791: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2792: "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2793: "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 2794: "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 2795: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 2796: "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 2797: "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2798: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2799: "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 2800: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2801: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2802: "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2803: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2804: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2805: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2806: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2807: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 2808: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 2809: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 2810: "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2811: "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 2812: "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2813: "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 2814: }; 2815: 2816: static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 2817: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2818: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2819: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2820: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2821: 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 2822: 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 2823: 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 2824: 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 2825: 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2826: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 2827: 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2828: 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 2829: 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2830: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 2831: 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2832: 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 2833: }; 2834: 2835: static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 2836: "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 2837: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2838: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2839: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2840: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2841: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2842: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2843: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2844: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2845: "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 2846: "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2847: "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2848: "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2849: "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 2850: "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 2851: "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 2852: "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 2853: "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 2854: "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 2855: "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2856: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 2857: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2858: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2859: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2860: "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 2861: "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 2862: "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 2863: }; 2864: 2865: static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 2866: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2867: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2868: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2869: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2870: 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 2871: 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 2872: 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 2873: 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 2874: 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 2875: 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 2876: 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 2877: 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 2878: 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 2879: 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 2880: 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 2881: 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 2882: }; 2883: 2884: static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 2885: "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2886: "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2887: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2888: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2889: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2890: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2891: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2892: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2893: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2894: "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 2895: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2896: "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 2897: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2898: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2899: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2900: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2901: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 2902: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2903: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2904: "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2905: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2906: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2907: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2908: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2909: "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2910: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2911: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2912: }; 2913: 2914: static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 2915: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2916: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2917: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2918: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2919: 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 2920: 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 2921: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2922: 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 2923: 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 2924: 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 2925: 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 2926: 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2927: 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 2928: 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 2929: 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2930: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2931: }; 2932: 2933: static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 2934: "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2935: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 2936: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2937: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2938: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2939: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2940: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2941: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2942: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2943: "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 2944: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2945: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2946: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2947: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2948: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2949: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 2950: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 2951: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2952: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 2953: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2954: "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2955: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2956: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2957: }; 2958: 2959: static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 2960: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2961: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2962: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2963: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2964: 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 2965: 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 2966: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 2967: 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 2968: 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 2969: 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 2970: 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 2971: 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 2972: 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 2973: 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 2974: 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 2975: 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 2976: }; 2977: 2978: static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 2979: "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 2980: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2981: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2982: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2983: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2984: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2985: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2986: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2987: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2988: "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 2989: "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 2990: "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2991: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2992: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2993: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2994: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2995: "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 2996: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2997: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2998: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2999: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3000: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3001: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3002: "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3003: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3004: "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3005: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3006: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3007: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3008: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3009: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3010: }; 3011: 3012: static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3013: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3014: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3015: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3016: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3017: 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3018: 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3019: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3020: 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3021: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3022: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3023: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3024: 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3025: 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3026: 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3027: 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3028: 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3029: }; 3030: 3031: static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3032: "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3033: "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3034: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3035: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3036: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3037: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3038: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3039: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3040: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3041: "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3042: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3043: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3044: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3045: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3046: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3047: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3048: "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3049: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3050: "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3051: "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3052: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3053: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3054: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3055: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3056: "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3057: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3058: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3059: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3060: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3061: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3062: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3063: }; 3064: 3065: static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3066: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3067: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3068: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3069: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3070: 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3071: 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3072: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3073: 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3074: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3075: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3076: 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3077: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3078: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3079: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3080: 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3081: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3082: }; 3083: 3084: static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3085: "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3086: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3087: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3088: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3089: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3090: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3091: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3092: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3093: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3094: "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3095: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3096: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3097: "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3098: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3099: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3100: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3101: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3102: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3103: "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3104: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3105: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3106: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3107: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3108: }; 3109: 3110: static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3111: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3112: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3113: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3114: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3115: 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3116: 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3117: 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3118: 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3119: 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3120: 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3121: 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3122: 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3123: 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3124: 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3125: 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3126: 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3127: }; 3128: 3129: static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3130: "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3131: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3132: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3133: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3134: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3135: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3136: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3137: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3138: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3139: "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3140: "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3141: "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3142: "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3143: "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3144: "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3145: "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3146: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3147: "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3148: "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3149: "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3150: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3151: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3152: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3153: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3154: "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3155: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3156: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3157: "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3158: "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3159: "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3160: "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3161: }; 3162: 3163: static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3164: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3165: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3166: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3167: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3168: 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3169: 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3170: 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3171: 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3172: 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3173: 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3174: 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3175: 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3176: 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3177: 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3178: 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3179: 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3180: }; 3181: 3182: static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3183: "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3184: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3185: "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3186: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3187: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3188: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3189: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3190: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3191: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3192: "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3193: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3194: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3195: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3196: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3197: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3198: "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3199: "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3200: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3201: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3202: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3203: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3204: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3205: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3206: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3207: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3208: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3209: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3210: }; 3211: 3212: static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3213: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3214: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3215: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3216: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3217: 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3218: 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3219: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3220: 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3221: 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3222: 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3223: 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3224: 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3225: 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3226: 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3227: 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3228: 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3229: }; 3230: 3231: static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3232: "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3233: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3234: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3235: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3236: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3237: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3238: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3239: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3240: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3241: "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3242: "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3243: "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3244: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3245: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3246: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3247: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3248: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3249: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3250: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3251: "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3252: "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3253: "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3254: "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3255: "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3256: "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3257: "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3258: "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3259: "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3260: "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3261: "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3262: "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3263: }; 3264: 3265: static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3266: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3267: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3268: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3269: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3270: 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3271: 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3272: 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3273: 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3274: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3275: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3276: 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3277: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3278: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3279: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3280: 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3281: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3282: }; 3283: 3284: static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3285: "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3286: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3287: "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3288: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3289: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3290: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3291: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3292: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3293: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3294: "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3295: "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3296: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3297: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3298: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3299: "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3300: "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3301: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3302: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3303: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3304: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3305: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3306: "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3307: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3308: "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3309: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3310: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3311: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3312: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3313: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3314: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3315: "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3316: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3317: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3318: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3319: "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3320: "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3321: "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3322: "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3323: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3324: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3325: "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3326: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3327: "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3328: }; 3329: 3330: static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3331: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3332: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3333: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3334: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3335: 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3336: 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3337: 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3338: 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3339: 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3340: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3341: 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3342: 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3343: 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3344: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3345: 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3346: 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3347: }; 3348: 3349: static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3350: "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3351: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3352: "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3353: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3354: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3355: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3356: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3357: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3358: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3359: "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3360: "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3361: "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3362: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3363: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3364: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3365: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3366: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3367: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3368: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3369: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3370: "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3371: "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3372: "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3373: "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3374: "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3375: "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3376: "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3377: }; 3378: 3379: static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3380: 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3381: 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3382: 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3383: 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3384: 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3385: 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3386: 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3387: 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3388: 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3389: 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3390: 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3391: 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3392: 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3393: 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3394: 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3395: 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3396: }; 3397: 3398: static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3399: "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3400: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3401: "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3402: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3403: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3404: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3405: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3406: "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3407: "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3408: "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3409: "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3410: "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3411: "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3412: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3414: "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415: "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3416: "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417: "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3418: "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3419: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3420: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3421: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3422: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3423: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3424: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3425: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3426: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3427: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3428: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3429: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3430: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3431: "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3432: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3433: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3434: "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3435: "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3436: "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3437: "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3438: }; 3439: 3440: 3441: /* 3442: * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3443: */ 3444: 3445: static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3446: const unsigned char* in, int *inlen) { 3447: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3448: } 3449: static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3450: const unsigned char* in, int *inlen) { 3451: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3452: } 3453: 3454: static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3455: const unsigned char* in, int *inlen) { 3456: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3457: } 3458: static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3459: const unsigned char* in, int *inlen) { 3460: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3461: } 3462: 3463: static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3464: const unsigned char* in, int *inlen) { 3465: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3466: } 3467: static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3468: const unsigned char* in, int *inlen) { 3469: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3470: } 3471: 3472: static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3473: const unsigned char* in, int *inlen) { 3474: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3475: } 3476: static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3477: const unsigned char* in, int *inlen) { 3478: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3479: } 3480: 3481: static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3482: const unsigned char* in, int *inlen) { 3483: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3484: } 3485: static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3486: const unsigned char* in, int *inlen) { 3487: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3488: } 3489: 3490: static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3491: const unsigned char* in, int *inlen) { 3492: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3493: } 3494: static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3495: const unsigned char* in, int *inlen) { 3496: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3497: } 3498: 3499: static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3500: const unsigned char* in, int *inlen) { 3501: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3502: } 3503: static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3504: const unsigned char* in, int *inlen) { 3505: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3506: } 3507: 3508: static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3509: const unsigned char* in, int *inlen) { 3510: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3511: } 3512: static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3513: const unsigned char* in, int *inlen) { 3514: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3515: } 3516: 3517: static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3518: const unsigned char* in, int *inlen) { 3519: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3520: } 3521: static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3522: const unsigned char* in, int *inlen) { 3523: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3524: } 3525: 3526: static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3527: const unsigned char* in, int *inlen) { 3528: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3529: } 3530: static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3531: const unsigned char* in, int *inlen) { 3532: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3533: } 3534: 3535: static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3536: const unsigned char* in, int *inlen) { 3537: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3538: } 3539: static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3540: const unsigned char* in, int *inlen) { 3541: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3542: } 3543: 3544: static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3545: const unsigned char* in, int *inlen) { 3546: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3547: } 3548: static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3549: const unsigned char* in, int *inlen) { 3550: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3551: } 3552: 3553: static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3554: const unsigned char* in, int *inlen) { 3555: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3556: } 3557: static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3558: const unsigned char* in, int *inlen) { 3559: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3560: } 3561: 3562: static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3563: const unsigned char* in, int *inlen) { 3564: return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3565: } 3566: static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3567: const unsigned char* in, int *inlen) { 3568: return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3569: } 3570: 3571: static void 3572: xmlRegisterCharEncodingHandlersISO8859x (void) { 3573: xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3574: xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3575: xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3576: xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3577: xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3578: xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3579: xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3580: xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3581: xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3582: xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3583: xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3584: xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3585: xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3586: xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3587: } 3588: 3589: #endif 3590: #endif 3591: 3592: #define bottom_encoding 3593: #include "elfgcchack.h"