embedaddon/libxml2/parserInternals.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parserInternals.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:57 2012 UTC (12 years, 4 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_7_8, HEAD

libxml2

1: /* 2: * parserInternals.c : Internal routines (and obsolete ones) needed for the 3: * XML and HTML parsers. 4: * 5: * See Copyright for the status of this software. 6: * 7: * daniel@veillard.com 8: */ 9: 10: #define IN_LIBXML 11: #include "libxml.h" 12: 13: #if defined(WIN32) && !defined (__CYGWIN__) 14: #define XML_DIR_SEP '\\' 15: #else 16: #define XML_DIR_SEP '/' 17: #endif 18: 19: #include <string.h> 20: #ifdef HAVE_CTYPE_H 21: #include <ctype.h> 22: #endif 23: #ifdef HAVE_STDLIB_H 24: #include <stdlib.h> 25: #endif 26: #ifdef HAVE_SYS_STAT_H 27: #include <sys/stat.h> 28: #endif 29: #ifdef HAVE_FCNTL_H 30: #include <fcntl.h> 31: #endif 32: #ifdef HAVE_UNISTD_H 33: #include <unistd.h> 34: #endif 35: #ifdef HAVE_ZLIB_H 36: #include <zlib.h> 37: #endif 38: 39: #include <libxml/xmlmemory.h> 40: #include <libxml/tree.h> 41: #include <libxml/parser.h> 42: #include <libxml/parserInternals.h> 43: #include <libxml/valid.h> 44: #include <libxml/entities.h> 45: #include <libxml/xmlerror.h> 46: #include <libxml/encoding.h> 47: #include <libxml/valid.h> 48: #include <libxml/xmlIO.h> 49: #include <libxml/uri.h> 50: #include <libxml/dict.h> 51: #include <libxml/SAX.h> 52: #ifdef LIBXML_CATALOG_ENABLED 53: #include <libxml/catalog.h> 54: #endif 55: #include <libxml/globals.h> 56: #include <libxml/chvalid.h> 57: 58: /* 59: * Various global defaults for parsing 60: */ 61: 62: /** 63: * xmlCheckVersion: 64: * @version: the include version number 65: * 66: * check the compiled lib version against the include one. 67: * This can warn or immediately kill the application 68: */ 69: void 70: xmlCheckVersion(int version) { 71: int myversion = (int) LIBXML_VERSION; 72: 73: xmlInitParser(); 74: 75: if ((myversion / 10000) != (version / 10000)) { 76: xmlGenericError(xmlGenericErrorContext, 77: "Fatal: program compiled against libxml %d using libxml %d\n", 78: (version / 10000), (myversion / 10000)); 79: fprintf(stderr, 80: "Fatal: program compiled against libxml %d using libxml %d\n", 81: (version / 10000), (myversion / 10000)); 82: } 83: if ((myversion / 100) < (version / 100)) { 84: xmlGenericError(xmlGenericErrorContext, 85: "Warning: program compiled against libxml %d using older %d\n", 86: (version / 100), (myversion / 100)); 87: } 88: } 89: 90: 91: /************************************************************************ 92: * * 93: * Some factorized error routines * 94: * * 95: ************************************************************************/ 96: 97: 98: /** 99: * xmlErrMemory: 100: * @ctxt: an XML parser context 101: * @extra: extra informations 102: * 103: * Handle a redefinition of attribute error 104: */ 105: void 106: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 107: { 108: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 109: (ctxt->instate == XML_PARSER_EOF)) 110: return; 111: if (ctxt != NULL) { 112: ctxt->errNo = XML_ERR_NO_MEMORY; 113: ctxt->instate = XML_PARSER_EOF; 114: ctxt->disableSAX = 1; 115: } 116: if (extra) 117: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 118: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 119: NULL, NULL, 0, 0, 120: "Memory allocation failed : %s\n", extra); 121: else 122: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 123: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 124: NULL, NULL, 0, 0, "Memory allocation failed\n"); 125: } 126: 127: /** 128: * __xmlErrEncoding: 129: * @ctxt: an XML parser context 130: * @xmlerr: the error number 131: * @msg: the error message 132: * @str1: an string info 133: * @str2: an string info 134: * 135: * Handle an encoding error 136: */ 137: void 138: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 139: const char *msg, const xmlChar * str1, const xmlChar * str2) 140: { 141: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 142: (ctxt->instate == XML_PARSER_EOF)) 143: return; 144: if (ctxt != NULL) 145: ctxt->errNo = xmlerr; 146: __xmlRaiseError(NULL, NULL, NULL, 147: ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 148: NULL, 0, (const char *) str1, (const char *) str2, 149: NULL, 0, 0, msg, str1, str2); 150: if (ctxt != NULL) { 151: ctxt->wellFormed = 0; 152: if (ctxt->recovery == 0) 153: ctxt->disableSAX = 1; 154: } 155: } 156: 157: /** 158: * xmlErrInternal: 159: * @ctxt: an XML parser context 160: * @msg: the error message 161: * @str: error informations 162: * 163: * Handle an internal error 164: */ 165: static void 166: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 167: { 168: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 169: (ctxt->instate == XML_PARSER_EOF)) 170: return; 171: if (ctxt != NULL) 172: ctxt->errNo = XML_ERR_INTERNAL_ERROR; 173: __xmlRaiseError(NULL, NULL, NULL, 174: ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 175: XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 176: 0, 0, msg, str); 177: if (ctxt != NULL) { 178: ctxt->wellFormed = 0; 179: if (ctxt->recovery == 0) 180: ctxt->disableSAX = 1; 181: } 182: } 183: 184: /** 185: * xmlErrEncodingInt: 186: * @ctxt: an XML parser context 187: * @error: the error number 188: * @msg: the error message 189: * @val: an integer value 190: * 191: * n encoding error 192: */ 193: static void 194: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 195: const char *msg, int val) 196: { 197: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 198: (ctxt->instate == XML_PARSER_EOF)) 199: return; 200: if (ctxt != NULL) 201: ctxt->errNo = error; 202: __xmlRaiseError(NULL, NULL, NULL, 203: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 204: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 205: if (ctxt != NULL) { 206: ctxt->wellFormed = 0; 207: if (ctxt->recovery == 0) 208: ctxt->disableSAX = 1; 209: } 210: } 211: 212: /** 213: * xmlIsLetter: 214: * @c: an unicode character (int) 215: * 216: * Check whether the character is allowed by the production 217: * [84] Letter ::= BaseChar | Ideographic 218: * 219: * Returns 0 if not, non-zero otherwise 220: */ 221: int 222: xmlIsLetter(int c) { 223: return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 224: } 225: 226: /************************************************************************ 227: * * 228: * Input handling functions for progressive parsing * 229: * * 230: ************************************************************************/ 231: 232: /* #define DEBUG_INPUT */ 233: /* #define DEBUG_STACK */ 234: /* #define DEBUG_PUSH */ 235: 236: 237: /* we need to keep enough input to show errors in context */ 238: #define LINE_LEN 80 239: 240: #ifdef DEBUG_INPUT 241: #define CHECK_BUFFER(in) check_buffer(in) 242: 243: static 244: void check_buffer(xmlParserInputPtr in) { 245: if (in->base != in->buf->buffer->content) { 246: xmlGenericError(xmlGenericErrorContext, 247: "xmlParserInput: base mismatch problem\n"); 248: } 249: if (in->cur < in->base) { 250: xmlGenericError(xmlGenericErrorContext, 251: "xmlParserInput: cur < base problem\n"); 252: } 253: if (in->cur > in->base + in->buf->buffer->use) { 254: xmlGenericError(xmlGenericErrorContext, 255: "xmlParserInput: cur > base + use problem\n"); 256: } 257: xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 258: (int) in, (int) in->buf->buffer->content, in->cur - in->base, 259: in->buf->buffer->use, in->buf->buffer->size); 260: } 261: 262: #else 263: #define CHECK_BUFFER(in) 264: #endif 265: 266: 267: /** 268: * xmlParserInputRead: 269: * @in: an XML parser input 270: * @len: an indicative size for the lookahead 271: * 272: * This function refresh the input for the parser. It doesn't try to 273: * preserve pointers to the input buffer, and discard already read data 274: * 275: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 276: * end of this entity 277: */ 278: int 279: xmlParserInputRead(xmlParserInputPtr in, int len) { 280: int ret; 281: int used; 282: int indx; 283: 284: if (in == NULL) return(-1); 285: #ifdef DEBUG_INPUT 286: xmlGenericError(xmlGenericErrorContext, "Read\n"); 287: #endif 288: if (in->buf == NULL) return(-1); 289: if (in->base == NULL) return(-1); 290: if (in->cur == NULL) return(-1); 291: if (in->buf->buffer == NULL) return(-1); 292: if (in->buf->readcallback == NULL) return(-1); 293: 294: CHECK_BUFFER(in); 295: 296: used = in->cur - in->buf->buffer->content; 297: ret = xmlBufferShrink(in->buf->buffer, used); 298: if (ret > 0) { 299: in->cur -= ret; 300: in->consumed += ret; 301: } 302: ret = xmlParserInputBufferRead(in->buf, len); 303: if (in->base != in->buf->buffer->content) { 304: /* 305: * the buffer has been reallocated 306: */ 307: indx = in->cur - in->base; 308: in->base = in->buf->buffer->content; 309: in->cur = &in->buf->buffer->content[indx]; 310: } 311: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 312: 313: CHECK_BUFFER(in); 314: 315: return(ret); 316: } 317: 318: /** 319: * xmlParserInputGrow: 320: * @in: an XML parser input 321: * @len: an indicative size for the lookahead 322: * 323: * This function increase the input for the parser. It tries to 324: * preserve pointers to the input buffer, and keep already read data 325: * 326: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 327: * end of this entity 328: */ 329: int 330: xmlParserInputGrow(xmlParserInputPtr in, int len) { 331: int ret; 332: int indx; 333: 334: if (in == NULL) return(-1); 335: #ifdef DEBUG_INPUT 336: xmlGenericError(xmlGenericErrorContext, "Grow\n"); 337: #endif 338: if (in->buf == NULL) return(-1); 339: if (in->base == NULL) return(-1); 340: if (in->cur == NULL) return(-1); 341: if (in->buf->buffer == NULL) return(-1); 342: 343: CHECK_BUFFER(in); 344: 345: indx = in->cur - in->base; 346: if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 347: 348: CHECK_BUFFER(in); 349: 350: return(0); 351: } 352: if (in->buf->readcallback != NULL) 353: ret = xmlParserInputBufferGrow(in->buf, len); 354: else 355: return(0); 356: 357: /* 358: * NOTE : in->base may be a "dangling" i.e. freed pointer in this 359: * block, but we use it really as an integer to do some 360: * pointer arithmetic. Insure will raise it as a bug but in 361: * that specific case, that's not ! 362: */ 363: if (in->base != in->buf->buffer->content) { 364: /* 365: * the buffer has been reallocated 366: */ 367: indx = in->cur - in->base; 368: in->base = in->buf->buffer->content; 369: in->cur = &in->buf->buffer->content[indx]; 370: } 371: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 372: 373: CHECK_BUFFER(in); 374: 375: return(ret); 376: } 377: 378: /** 379: * xmlParserInputShrink: 380: * @in: an XML parser input 381: * 382: * This function removes used input for the parser. 383: */ 384: void 385: xmlParserInputShrink(xmlParserInputPtr in) { 386: int used; 387: int ret; 388: int indx; 389: 390: #ifdef DEBUG_INPUT 391: xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 392: #endif 393: if (in == NULL) return; 394: if (in->buf == NULL) return; 395: if (in->base == NULL) return; 396: if (in->cur == NULL) return; 397: if (in->buf->buffer == NULL) return; 398: 399: CHECK_BUFFER(in); 400: 401: used = in->cur - in->buf->buffer->content; 402: /* 403: * Do not shrink on large buffers whose only a tiny fraction 404: * was consumed 405: */ 406: if (used > INPUT_CHUNK) { 407: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 408: if (ret > 0) { 409: in->cur -= ret; 410: in->consumed += ret; 411: } 412: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 413: } 414: 415: CHECK_BUFFER(in); 416: 417: if (in->buf->buffer->use > INPUT_CHUNK) { 418: return; 419: } 420: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 421: if (in->base != in->buf->buffer->content) { 422: /* 423: * the buffer has been reallocated 424: */ 425: indx = in->cur - in->base; 426: in->base = in->buf->buffer->content; 427: in->cur = &in->buf->buffer->content[indx]; 428: } 429: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 430: 431: CHECK_BUFFER(in); 432: } 433: 434: /************************************************************************ 435: * * 436: * UTF8 character input and related functions * 437: * * 438: ************************************************************************/ 439: 440: /** 441: * xmlNextChar: 442: * @ctxt: the XML parser context 443: * 444: * Skip to the next char input char. 445: */ 446: 447: void 448: xmlNextChar(xmlParserCtxtPtr ctxt) 449: { 450: if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 451: (ctxt->input == NULL)) 452: return; 453: 454: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 455: if ((*ctxt->input->cur == 0) && 456: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 457: (ctxt->instate != XML_PARSER_COMMENT)) { 458: /* 459: * If we are at the end of the current entity and 460: * the context allows it, we pop consumed entities 461: * automatically. 462: * the auto closing should be blocked in other cases 463: */ 464: xmlPopInput(ctxt); 465: } else { 466: const unsigned char *cur; 467: unsigned char c; 468: 469: /* 470: * 2.11 End-of-Line Handling 471: * the literal two-character sequence "#xD#xA" or a standalone 472: * literal #xD, an XML processor must pass to the application 473: * the single character #xA. 474: */ 475: if (*(ctxt->input->cur) == '\n') { 476: ctxt->input->line++; ctxt->input->col = 1; 477: } else 478: ctxt->input->col++; 479: 480: /* 481: * We are supposed to handle UTF8, check it's valid 482: * From rfc2044: encoding of the Unicode values on UTF-8: 483: * 484: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 485: * 0000 0000-0000 007F 0xxxxxxx 486: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 487: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 488: * 489: * Check for the 0x110000 limit too 490: */ 491: cur = ctxt->input->cur; 492: 493: c = *cur; 494: if (c & 0x80) { 495: if (c == 0xC0) 496: goto encoding_error; 497: if (cur[1] == 0) { 498: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 499: cur = ctxt->input->cur; 500: } 501: if ((cur[1] & 0xc0) != 0x80) 502: goto encoding_error; 503: if ((c & 0xe0) == 0xe0) { 504: unsigned int val; 505: 506: if (cur[2] == 0) { 507: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 508: cur = ctxt->input->cur; 509: } 510: if ((cur[2] & 0xc0) != 0x80) 511: goto encoding_error; 512: if ((c & 0xf0) == 0xf0) { 513: if (cur[3] == 0) { 514: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 515: cur = ctxt->input->cur; 516: } 517: if (((c & 0xf8) != 0xf0) || 518: ((cur[3] & 0xc0) != 0x80)) 519: goto encoding_error; 520: /* 4-byte code */ 521: ctxt->input->cur += 4; 522: val = (cur[0] & 0x7) << 18; 523: val |= (cur[1] & 0x3f) << 12; 524: val |= (cur[2] & 0x3f) << 6; 525: val |= cur[3] & 0x3f; 526: } else { 527: /* 3-byte code */ 528: ctxt->input->cur += 3; 529: val = (cur[0] & 0xf) << 12; 530: val |= (cur[1] & 0x3f) << 6; 531: val |= cur[2] & 0x3f; 532: } 533: if (((val > 0xd7ff) && (val < 0xe000)) || 534: ((val > 0xfffd) && (val < 0x10000)) || 535: (val >= 0x110000)) { 536: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 537: "Char 0x%X out of allowed range\n", 538: val); 539: } 540: } else 541: /* 2-byte code */ 542: ctxt->input->cur += 2; 543: } else 544: /* 1-byte code */ 545: ctxt->input->cur++; 546: 547: ctxt->nbChars++; 548: if (*ctxt->input->cur == 0) 549: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 550: } 551: } else { 552: /* 553: * Assume it's a fixed length encoding (1) with 554: * a compatible encoding for the ASCII set, since 555: * XML constructs only use < 128 chars 556: */ 557: 558: if (*(ctxt->input->cur) == '\n') { 559: ctxt->input->line++; ctxt->input->col = 1; 560: } else 561: ctxt->input->col++; 562: ctxt->input->cur++; 563: ctxt->nbChars++; 564: if (*ctxt->input->cur == 0) 565: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 566: } 567: if ((*ctxt->input->cur == '%') && (!ctxt->html)) 568: xmlParserHandlePEReference(ctxt); 569: if ((*ctxt->input->cur == 0) && 570: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 571: xmlPopInput(ctxt); 572: return; 573: encoding_error: 574: /* 575: * If we detect an UTF8 error that probably mean that the 576: * input encoding didn't get properly advertised in the 577: * declaration header. Report the error and switch the encoding 578: * to ISO-Latin-1 (if you don't like this policy, just declare the 579: * encoding !) 580: */ 581: if ((ctxt == NULL) || (ctxt->input == NULL) || 582: (ctxt->input->end - ctxt->input->cur < 4)) { 583: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 584: "Input is not proper UTF-8, indicate encoding !\n", 585: NULL, NULL); 586: } else { 587: char buffer[150]; 588: 589: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 590: ctxt->input->cur[0], ctxt->input->cur[1], 591: ctxt->input->cur[2], ctxt->input->cur[3]); 592: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 593: "Input is not proper UTF-8, indicate encoding !\n%s", 594: BAD_CAST buffer, NULL); 595: } 596: ctxt->charset = XML_CHAR_ENCODING_8859_1; 597: ctxt->input->cur++; 598: return; 599: } 600: 601: /** 602: * xmlCurrentChar: 603: * @ctxt: the XML parser context 604: * @len: pointer to the length of the char read 605: * 606: * The current char value, if using UTF-8 this may actually span multiple 607: * bytes in the input buffer. Implement the end of line normalization: 608: * 2.11 End-of-Line Handling 609: * Wherever an external parsed entity or the literal entity value 610: * of an internal parsed entity contains either the literal two-character 611: * sequence "#xD#xA" or a standalone literal #xD, an XML processor 612: * must pass to the application the single character #xA. 613: * This behavior can conveniently be produced by normalizing all 614: * line breaks to #xA on input, before parsing.) 615: * 616: * Returns the current char value and its length 617: */ 618: 619: int 620: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 621: if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 622: if (ctxt->instate == XML_PARSER_EOF) 623: return(0); 624: 625: if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 626: *len = 1; 627: return((int) *ctxt->input->cur); 628: } 629: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 630: /* 631: * We are supposed to handle UTF8, check it's valid 632: * From rfc2044: encoding of the Unicode values on UTF-8: 633: * 634: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 635: * 0000 0000-0000 007F 0xxxxxxx 636: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 637: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 638: * 639: * Check for the 0x110000 limit too 640: */ 641: const unsigned char *cur = ctxt->input->cur; 642: unsigned char c; 643: unsigned int val; 644: 645: c = *cur; 646: if (c & 0x80) { 647: if (((c & 0x40) == 0) || (c == 0xC0)) 648: goto encoding_error; 649: if (cur[1] == 0) { 650: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 651: cur = ctxt->input->cur; 652: } 653: if ((cur[1] & 0xc0) != 0x80) 654: goto encoding_error; 655: if ((c & 0xe0) == 0xe0) { 656: if (cur[2] == 0) { 657: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 658: cur = ctxt->input->cur; 659: } 660: if ((cur[2] & 0xc0) != 0x80) 661: goto encoding_error; 662: if ((c & 0xf0) == 0xf0) { 663: if (cur[3] == 0) { 664: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 665: cur = ctxt->input->cur; 666: } 667: if (((c & 0xf8) != 0xf0) || 668: ((cur[3] & 0xc0) != 0x80)) 669: goto encoding_error; 670: /* 4-byte code */ 671: *len = 4; 672: val = (cur[0] & 0x7) << 18; 673: val |= (cur[1] & 0x3f) << 12; 674: val |= (cur[2] & 0x3f) << 6; 675: val |= cur[3] & 0x3f; 676: if (val < 0x10000) 677: goto encoding_error; 678: } else { 679: /* 3-byte code */ 680: *len = 3; 681: val = (cur[0] & 0xf) << 12; 682: val |= (cur[1] & 0x3f) << 6; 683: val |= cur[2] & 0x3f; 684: if (val < 0x800) 685: goto encoding_error; 686: } 687: } else { 688: /* 2-byte code */ 689: *len = 2; 690: val = (cur[0] & 0x1f) << 6; 691: val |= cur[1] & 0x3f; 692: if (val < 0x80) 693: goto encoding_error; 694: } 695: if (!IS_CHAR(val)) { 696: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 697: "Char 0x%X out of allowed range\n", val); 698: } 699: return(val); 700: } else { 701: /* 1-byte code */ 702: *len = 1; 703: if (*ctxt->input->cur == 0) 704: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 705: if ((*ctxt->input->cur == 0) && 706: (ctxt->input->end > ctxt->input->cur)) { 707: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 708: "Char 0x0 out of allowed range\n", 0); 709: } 710: if (*ctxt->input->cur == 0xD) { 711: if (ctxt->input->cur[1] == 0xA) { 712: ctxt->nbChars++; 713: ctxt->input->cur++; 714: } 715: return(0xA); 716: } 717: return((int) *ctxt->input->cur); 718: } 719: } 720: /* 721: * Assume it's a fixed length encoding (1) with 722: * a compatible encoding for the ASCII set, since 723: * XML constructs only use < 128 chars 724: */ 725: *len = 1; 726: if (*ctxt->input->cur == 0xD) { 727: if (ctxt->input->cur[1] == 0xA) { 728: ctxt->nbChars++; 729: ctxt->input->cur++; 730: } 731: return(0xA); 732: } 733: return((int) *ctxt->input->cur); 734: encoding_error: 735: /* 736: * An encoding problem may arise from a truncated input buffer 737: * splitting a character in the middle. In that case do not raise 738: * an error but return 0 to endicate an end of stream problem 739: */ 740: if (ctxt->input->end - ctxt->input->cur < 4) { 741: *len = 0; 742: return(0); 743: } 744: 745: /* 746: * If we detect an UTF8 error that probably mean that the 747: * input encoding didn't get properly advertised in the 748: * declaration header. Report the error and switch the encoding 749: * to ISO-Latin-1 (if you don't like this policy, just declare the 750: * encoding !) 751: */ 752: { 753: char buffer[150]; 754: 755: snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 756: ctxt->input->cur[0], ctxt->input->cur[1], 757: ctxt->input->cur[2], ctxt->input->cur[3]); 758: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 759: "Input is not proper UTF-8, indicate encoding !\n%s", 760: BAD_CAST buffer, NULL); 761: } 762: ctxt->charset = XML_CHAR_ENCODING_8859_1; 763: *len = 1; 764: return((int) *ctxt->input->cur); 765: } 766: 767: /** 768: * xmlStringCurrentChar: 769: * @ctxt: the XML parser context 770: * @cur: pointer to the beginning of the char 771: * @len: pointer to the length of the char read 772: * 773: * The current char value, if using UTF-8 this may actually span multiple 774: * bytes in the input buffer. 775: * 776: * Returns the current char value and its length 777: */ 778: 779: int 780: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 781: { 782: if ((len == NULL) || (cur == NULL)) return(0); 783: if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 784: /* 785: * We are supposed to handle UTF8, check it's valid 786: * From rfc2044: encoding of the Unicode values on UTF-8: 787: * 788: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 789: * 0000 0000-0000 007F 0xxxxxxx 790: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 791: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 792: * 793: * Check for the 0x110000 limit too 794: */ 795: unsigned char c; 796: unsigned int val; 797: 798: c = *cur; 799: if (c & 0x80) { 800: if ((cur[1] & 0xc0) != 0x80) 801: goto encoding_error; 802: if ((c & 0xe0) == 0xe0) { 803: 804: if ((cur[2] & 0xc0) != 0x80) 805: goto encoding_error; 806: if ((c & 0xf0) == 0xf0) { 807: if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 808: goto encoding_error; 809: /* 4-byte code */ 810: *len = 4; 811: val = (cur[0] & 0x7) << 18; 812: val |= (cur[1] & 0x3f) << 12; 813: val |= (cur[2] & 0x3f) << 6; 814: val |= cur[3] & 0x3f; 815: } else { 816: /* 3-byte code */ 817: *len = 3; 818: val = (cur[0] & 0xf) << 12; 819: val |= (cur[1] & 0x3f) << 6; 820: val |= cur[2] & 0x3f; 821: } 822: } else { 823: /* 2-byte code */ 824: *len = 2; 825: val = (cur[0] & 0x1f) << 6; 826: val |= cur[1] & 0x3f; 827: } 828: if (!IS_CHAR(val)) { 829: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 830: "Char 0x%X out of allowed range\n", val); 831: } 832: return (val); 833: } else { 834: /* 1-byte code */ 835: *len = 1; 836: return ((int) *cur); 837: } 838: } 839: /* 840: * Assume it's a fixed length encoding (1) with 841: * a compatible encoding for the ASCII set, since 842: * XML constructs only use < 128 chars 843: */ 844: *len = 1; 845: return ((int) *cur); 846: encoding_error: 847: 848: /* 849: * An encoding problem may arise from a truncated input buffer 850: * splitting a character in the middle. In that case do not raise 851: * an error but return 0 to endicate an end of stream problem 852: */ 853: if ((ctxt == NULL) || (ctxt->input == NULL) || 854: (ctxt->input->end - ctxt->input->cur < 4)) { 855: *len = 0; 856: return(0); 857: } 858: /* 859: * If we detect an UTF8 error that probably mean that the 860: * input encoding didn't get properly advertised in the 861: * declaration header. Report the error and switch the encoding 862: * to ISO-Latin-1 (if you don't like this policy, just declare the 863: * encoding !) 864: */ 865: { 866: char buffer[150]; 867: 868: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 869: ctxt->input->cur[0], ctxt->input->cur[1], 870: ctxt->input->cur[2], ctxt->input->cur[3]); 871: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 872: "Input is not proper UTF-8, indicate encoding !\n%s", 873: BAD_CAST buffer, NULL); 874: } 875: *len = 1; 876: return ((int) *cur); 877: } 878: 879: /** 880: * xmlCopyCharMultiByte: 881: * @out: pointer to an array of xmlChar 882: * @val: the char value 883: * 884: * append the char value in the array 885: * 886: * Returns the number of xmlChar written 887: */ 888: int 889: xmlCopyCharMultiByte(xmlChar *out, int val) { 890: if (out == NULL) return(0); 891: /* 892: * We are supposed to handle UTF8, check it's valid 893: * From rfc2044: encoding of the Unicode values on UTF-8: 894: * 895: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 896: * 0000 0000-0000 007F 0xxxxxxx 897: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 898: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 899: */ 900: if (val >= 0x80) { 901: xmlChar *savedout = out; 902: int bits; 903: if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 904: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 905: else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 906: else { 907: xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 908: "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 909: val); 910: return(0); 911: } 912: for ( ; bits >= 0; bits-= 6) 913: *out++= ((val >> bits) & 0x3F) | 0x80 ; 914: return (out - savedout); 915: } 916: *out = (xmlChar) val; 917: return 1; 918: } 919: 920: /** 921: * xmlCopyChar: 922: * @len: Ignored, compatibility 923: * @out: pointer to an array of xmlChar 924: * @val: the char value 925: * 926: * append the char value in the array 927: * 928: * Returns the number of xmlChar written 929: */ 930: 931: int 932: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 933: if (out == NULL) return(0); 934: /* the len parameter is ignored */ 935: if (val >= 0x80) { 936: return(xmlCopyCharMultiByte (out, val)); 937: } 938: *out = (xmlChar) val; 939: return 1; 940: } 941: 942: /************************************************************************ 943: * * 944: * Commodity functions to switch encodings * 945: * * 946: ************************************************************************/ 947: 948: /* defined in encoding.c, not public */ 949: int 950: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 951: xmlBufferPtr in, int len); 952: 953: static int 954: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 955: xmlCharEncodingHandlerPtr handler, int len); 956: static int 957: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 958: xmlCharEncodingHandlerPtr handler, int len); 959: /** 960: * xmlSwitchEncoding: 961: * @ctxt: the parser context 962: * @enc: the encoding value (number) 963: * 964: * change the input functions when discovering the character encoding 965: * of a given entity. 966: * 967: * Returns 0 in case of success, -1 otherwise 968: */ 969: int 970: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 971: { 972: xmlCharEncodingHandlerPtr handler; 973: int len = -1; 974: 975: if (ctxt == NULL) return(-1); 976: switch (enc) { 977: case XML_CHAR_ENCODING_ERROR: 978: __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 979: "encoding unknown\n", NULL, NULL); 980: return(-1); 981: case XML_CHAR_ENCODING_NONE: 982: /* let's assume it's UTF-8 without the XML decl */ 983: ctxt->charset = XML_CHAR_ENCODING_UTF8; 984: return(0); 985: case XML_CHAR_ENCODING_UTF8: 986: /* default encoding, no conversion should be needed */ 987: ctxt->charset = XML_CHAR_ENCODING_UTF8; 988: 989: /* 990: * Errata on XML-1.0 June 20 2001 991: * Specific handling of the Byte Order Mark for 992: * UTF-8 993: */ 994: if ((ctxt->input != NULL) && 995: (ctxt->input->cur[0] == 0xEF) && 996: (ctxt->input->cur[1] == 0xBB) && 997: (ctxt->input->cur[2] == 0xBF)) { 998: ctxt->input->cur += 3; 999: } 1000: return(0); 1001: case XML_CHAR_ENCODING_UTF16LE: 1002: case XML_CHAR_ENCODING_UTF16BE: 1003: /*The raw input characters are encoded 1004: *in UTF-16. As we expect this function 1005: *to be called after xmlCharEncInFunc, we expect 1006: *ctxt->input->cur to contain UTF-8 encoded characters. 1007: *So the raw UTF16 Byte Order Mark 1008: *has also been converted into 1009: *an UTF-8 BOM. Let's skip that BOM. 1010: */ 1011: if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 1012: (ctxt->input->cur[0] == 0xEF) && 1013: (ctxt->input->cur[1] == 0xBB) && 1014: (ctxt->input->cur[2] == 0xBF)) { 1015: ctxt->input->cur += 3; 1016: } 1017: len = 90; 1018: break; 1019: case XML_CHAR_ENCODING_UCS2: 1020: len = 90; 1021: break; 1022: case XML_CHAR_ENCODING_UCS4BE: 1023: case XML_CHAR_ENCODING_UCS4LE: 1024: case XML_CHAR_ENCODING_UCS4_2143: 1025: case XML_CHAR_ENCODING_UCS4_3412: 1026: len = 180; 1027: break; 1028: case XML_CHAR_ENCODING_EBCDIC: 1029: case XML_CHAR_ENCODING_8859_1: 1030: case XML_CHAR_ENCODING_8859_2: 1031: case XML_CHAR_ENCODING_8859_3: 1032: case XML_CHAR_ENCODING_8859_4: 1033: case XML_CHAR_ENCODING_8859_5: 1034: case XML_CHAR_ENCODING_8859_6: 1035: case XML_CHAR_ENCODING_8859_7: 1036: case XML_CHAR_ENCODING_8859_8: 1037: case XML_CHAR_ENCODING_8859_9: 1038: case XML_CHAR_ENCODING_ASCII: 1039: case XML_CHAR_ENCODING_2022_JP: 1040: case XML_CHAR_ENCODING_SHIFT_JIS: 1041: case XML_CHAR_ENCODING_EUC_JP: 1042: len = 45; 1043: break; 1044: } 1045: handler = xmlGetCharEncodingHandler(enc); 1046: if (handler == NULL) { 1047: /* 1048: * Default handlers. 1049: */ 1050: switch (enc) { 1051: case XML_CHAR_ENCODING_ASCII: 1052: /* default encoding, no conversion should be needed */ 1053: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1054: return(0); 1055: case XML_CHAR_ENCODING_UTF16LE: 1056: break; 1057: case XML_CHAR_ENCODING_UTF16BE: 1058: break; 1059: case XML_CHAR_ENCODING_UCS4LE: 1060: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1061: "encoding not supported %s\n", 1062: BAD_CAST "USC4 little endian", NULL); 1063: break; 1064: case XML_CHAR_ENCODING_UCS4BE: 1065: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1066: "encoding not supported %s\n", 1067: BAD_CAST "USC4 big endian", NULL); 1068: break; 1069: case XML_CHAR_ENCODING_EBCDIC: 1070: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1071: "encoding not supported %s\n", 1072: BAD_CAST "EBCDIC", NULL); 1073: break; 1074: case XML_CHAR_ENCODING_UCS4_2143: 1075: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1076: "encoding not supported %s\n", 1077: BAD_CAST "UCS4 2143", NULL); 1078: break; 1079: case XML_CHAR_ENCODING_UCS4_3412: 1080: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1081: "encoding not supported %s\n", 1082: BAD_CAST "UCS4 3412", NULL); 1083: break; 1084: case XML_CHAR_ENCODING_UCS2: 1085: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1086: "encoding not supported %s\n", 1087: BAD_CAST "UCS2", NULL); 1088: break; 1089: case XML_CHAR_ENCODING_8859_1: 1090: case XML_CHAR_ENCODING_8859_2: 1091: case XML_CHAR_ENCODING_8859_3: 1092: case XML_CHAR_ENCODING_8859_4: 1093: case XML_CHAR_ENCODING_8859_5: 1094: case XML_CHAR_ENCODING_8859_6: 1095: case XML_CHAR_ENCODING_8859_7: 1096: case XML_CHAR_ENCODING_8859_8: 1097: case XML_CHAR_ENCODING_8859_9: 1098: /* 1099: * We used to keep the internal content in the 1100: * document encoding however this turns being unmaintainable 1101: * So xmlGetCharEncodingHandler() will return non-null 1102: * values for this now. 1103: */ 1104: if ((ctxt->inputNr == 1) && 1105: (ctxt->encoding == NULL) && 1106: (ctxt->input != NULL) && 1107: (ctxt->input->encoding != NULL)) { 1108: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1109: } 1110: ctxt->charset = enc; 1111: return(0); 1112: case XML_CHAR_ENCODING_2022_JP: 1113: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1114: "encoding not supported %s\n", 1115: BAD_CAST "ISO-2022-JP", NULL); 1116: break; 1117: case XML_CHAR_ENCODING_SHIFT_JIS: 1118: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1119: "encoding not supported %s\n", 1120: BAD_CAST "Shift_JIS", NULL); 1121: break; 1122: case XML_CHAR_ENCODING_EUC_JP: 1123: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1124: "encoding not supported %s\n", 1125: BAD_CAST "EUC-JP", NULL); 1126: break; 1127: default: 1128: break; 1129: } 1130: } 1131: if (handler == NULL) 1132: return(-1); 1133: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1134: return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1135: } 1136: 1137: /** 1138: * xmlSwitchInputEncoding: 1139: * @ctxt: the parser context 1140: * @input: the input stream 1141: * @handler: the encoding handler 1142: * @len: the number of bytes to convert for the first line or -1 1143: * 1144: * change the input functions when discovering the character encoding 1145: * of a given entity. 1146: * 1147: * Returns 0 in case of success, -1 otherwise 1148: */ 1149: static int 1150: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1151: xmlCharEncodingHandlerPtr handler, int len) 1152: { 1153: int nbchars; 1154: 1155: if (handler == NULL) 1156: return (-1); 1157: if (input == NULL) 1158: return (-1); 1159: if (input->buf != NULL) { 1160: if (input->buf->encoder != NULL) { 1161: /* 1162: * Check in case the auto encoding detetection triggered 1163: * in already. 1164: */ 1165: if (input->buf->encoder == handler) 1166: return (0); 1167: 1168: /* 1169: * "UTF-16" can be used for both LE and BE 1170: if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1171: BAD_CAST "UTF-16", 6)) && 1172: (!xmlStrncmp(BAD_CAST handler->name, 1173: BAD_CAST "UTF-16", 6))) { 1174: return(0); 1175: } 1176: */ 1177: 1178: /* 1179: * Note: this is a bit dangerous, but that's what it 1180: * takes to use nearly compatible signature for different 1181: * encodings. 1182: */ 1183: xmlCharEncCloseFunc(input->buf->encoder); 1184: input->buf->encoder = handler; 1185: return (0); 1186: } 1187: input->buf->encoder = handler; 1188: 1189: /* 1190: * Is there already some content down the pipe to convert ? 1191: */ 1192: if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1193: int processed; 1194: unsigned int use; 1195: 1196: /* 1197: * Specific handling of the Byte Order Mark for 1198: * UTF-16 1199: */ 1200: if ((handler->name != NULL) && 1201: (!strcmp(handler->name, "UTF-16LE") || 1202: !strcmp(handler->name, "UTF-16")) && 1203: (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1204: input->cur += 2; 1205: } 1206: if ((handler->name != NULL) && 1207: (!strcmp(handler->name, "UTF-16BE")) && 1208: (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1209: input->cur += 2; 1210: } 1211: /* 1212: * Errata on XML-1.0 June 20 2001 1213: * Specific handling of the Byte Order Mark for 1214: * UTF-8 1215: */ 1216: if ((handler->name != NULL) && 1217: (!strcmp(handler->name, "UTF-8")) && 1218: (input->cur[0] == 0xEF) && 1219: (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1220: input->cur += 3; 1221: } 1222: 1223: /* 1224: * Shrink the current input buffer. 1225: * Move it as the raw buffer and create a new input buffer 1226: */ 1227: processed = input->cur - input->base; 1228: xmlBufferShrink(input->buf->buffer, processed); 1229: input->buf->raw = input->buf->buffer; 1230: input->buf->buffer = xmlBufferCreate(); 1231: input->buf->rawconsumed = processed; 1232: use = input->buf->raw->use; 1233: 1234: if (ctxt->html) { 1235: /* 1236: * convert as much as possible of the buffer 1237: */ 1238: nbchars = xmlCharEncInFunc(input->buf->encoder, 1239: input->buf->buffer, 1240: input->buf->raw); 1241: } else { 1242: /* 1243: * convert just enough to get 1244: * '<?xml version="1.0" encoding="xxx"?>' 1245: * parsed with the autodetected encoding 1246: * into the parser reading buffer. 1247: */ 1248: nbchars = xmlCharEncFirstLineInt(input->buf->encoder, 1249: input->buf->buffer, 1250: input->buf->raw, 1251: len); 1252: } 1253: if (nbchars < 0) { 1254: xmlErrInternal(ctxt, 1255: "switching encoding: encoder error\n", 1256: NULL); 1257: return (-1); 1258: } 1259: input->buf->rawconsumed += use - input->buf->raw->use; 1260: input->base = input->cur = input->buf->buffer->content; 1261: input->end = &input->base[input->buf->buffer->use]; 1262: 1263: } 1264: return (0); 1265: } else if (input->length == 0) { 1266: /* 1267: * When parsing a static memory array one must know the 1268: * size to be able to convert the buffer. 1269: */ 1270: xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1271: return (-1); 1272: } 1273: return (0); 1274: } 1275: 1276: /** 1277: * xmlSwitchInputEncoding: 1278: * @ctxt: the parser context 1279: * @input: the input stream 1280: * @handler: the encoding handler 1281: * 1282: * change the input functions when discovering the character encoding 1283: * of a given entity. 1284: * 1285: * Returns 0 in case of success, -1 otherwise 1286: */ 1287: int 1288: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1289: xmlCharEncodingHandlerPtr handler) { 1290: return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1291: } 1292: 1293: /** 1294: * xmlSwitchToEncodingInt: 1295: * @ctxt: the parser context 1296: * @handler: the encoding handler 1297: * @len: the lenght to convert or -1 1298: * 1299: * change the input functions when discovering the character encoding 1300: * of a given entity, and convert only @len bytes of the output, this 1301: * is needed on auto detect to allows any declared encoding later to 1302: * convert the actual content after the xmlDecl 1303: * 1304: * Returns 0 in case of success, -1 otherwise 1305: */ 1306: static int 1307: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1308: xmlCharEncodingHandlerPtr handler, int len) { 1309: int ret = 0; 1310: 1311: if (handler != NULL) { 1312: if (ctxt->input != NULL) { 1313: ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1314: } else { 1315: xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1316: NULL); 1317: return(-1); 1318: } 1319: /* 1320: * The parsing is now done in UTF8 natively 1321: */ 1322: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1323: } else 1324: return(-1); 1325: return(ret); 1326: } 1327: 1328: /** 1329: * xmlSwitchToEncoding: 1330: * @ctxt: the parser context 1331: * @handler: the encoding handler 1332: * 1333: * change the input functions when discovering the character encoding 1334: * of a given entity. 1335: * 1336: * Returns 0 in case of success, -1 otherwise 1337: */ 1338: int 1339: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1340: { 1341: return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1342: } 1343: 1344: /************************************************************************ 1345: * * 1346: * Commodity functions to handle entities processing * 1347: * * 1348: ************************************************************************/ 1349: 1350: /** 1351: * xmlFreeInputStream: 1352: * @input: an xmlParserInputPtr 1353: * 1354: * Free up an input stream. 1355: */ 1356: void 1357: xmlFreeInputStream(xmlParserInputPtr input) { 1358: if (input == NULL) return; 1359: 1360: if (input->filename != NULL) xmlFree((char *) input->filename); 1361: if (input->directory != NULL) xmlFree((char *) input->directory); 1362: if (input->encoding != NULL) xmlFree((char *) input->encoding); 1363: if (input->version != NULL) xmlFree((char *) input->version); 1364: if ((input->free != NULL) && (input->base != NULL)) 1365: input->free((xmlChar *) input->base); 1366: if (input->buf != NULL) 1367: xmlFreeParserInputBuffer(input->buf); 1368: xmlFree(input); 1369: } 1370: 1371: /** 1372: * xmlNewInputStream: 1373: * @ctxt: an XML parser context 1374: * 1375: * Create a new input stream structure 1376: * Returns the new input stream or NULL 1377: */ 1378: xmlParserInputPtr 1379: xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1380: xmlParserInputPtr input; 1381: static int id = 0; 1382: 1383: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1384: if (input == NULL) { 1385: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1386: return(NULL); 1387: } 1388: memset(input, 0, sizeof(xmlParserInput)); 1389: input->line = 1; 1390: input->col = 1; 1391: input->standalone = -1; 1392: /* 1393: * we don't care about thread reentrancy unicity for a single 1394: * parser context (and hence thread) is sufficient. 1395: */ 1396: input->id = id++; 1397: return(input); 1398: } 1399: 1400: /** 1401: * xmlNewIOInputStream: 1402: * @ctxt: an XML parser context 1403: * @input: an I/O Input 1404: * @enc: the charset encoding if known 1405: * 1406: * Create a new input stream structure encapsulating the @input into 1407: * a stream suitable for the parser. 1408: * 1409: * Returns the new input stream or NULL 1410: */ 1411: xmlParserInputPtr 1412: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1413: xmlCharEncoding enc) { 1414: xmlParserInputPtr inputStream; 1415: 1416: if (input == NULL) return(NULL); 1417: if (xmlParserDebugEntities) 1418: xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1419: inputStream = xmlNewInputStream(ctxt); 1420: if (inputStream == NULL) { 1421: return(NULL); 1422: } 1423: inputStream->filename = NULL; 1424: inputStream->buf = input; 1425: inputStream->base = inputStream->buf->buffer->content; 1426: inputStream->cur = inputStream->buf->buffer->content; 1427: inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1428: if (enc != XML_CHAR_ENCODING_NONE) { 1429: xmlSwitchEncoding(ctxt, enc); 1430: } 1431: 1432: return(inputStream); 1433: } 1434: 1435: /** 1436: * xmlNewEntityInputStream: 1437: * @ctxt: an XML parser context 1438: * @entity: an Entity pointer 1439: * 1440: * Create a new input stream based on an xmlEntityPtr 1441: * 1442: * Returns the new input stream or NULL 1443: */ 1444: xmlParserInputPtr 1445: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1446: xmlParserInputPtr input; 1447: 1448: if (entity == NULL) { 1449: xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1450: NULL); 1451: return(NULL); 1452: } 1453: if (xmlParserDebugEntities) 1454: xmlGenericError(xmlGenericErrorContext, 1455: "new input from entity: %s\n", entity->name); 1456: if (entity->content == NULL) { 1457: switch (entity->etype) { 1458: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1459: xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1460: entity->name); 1461: break; 1462: case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1463: case XML_EXTERNAL_PARAMETER_ENTITY: 1464: return(xmlLoadExternalEntity((char *) entity->URI, 1465: (char *) entity->ExternalID, ctxt)); 1466: case XML_INTERNAL_GENERAL_ENTITY: 1467: xmlErrInternal(ctxt, 1468: "Internal entity %s without content !\n", 1469: entity->name); 1470: break; 1471: case XML_INTERNAL_PARAMETER_ENTITY: 1472: xmlErrInternal(ctxt, 1473: "Internal parameter entity %s without content !\n", 1474: entity->name); 1475: break; 1476: case XML_INTERNAL_PREDEFINED_ENTITY: 1477: xmlErrInternal(ctxt, 1478: "Predefined entity %s without content !\n", 1479: entity->name); 1480: break; 1481: } 1482: return(NULL); 1483: } 1484: input = xmlNewInputStream(ctxt); 1485: if (input == NULL) { 1486: return(NULL); 1487: } 1488: if (entity->URI != NULL) 1489: input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1490: input->base = entity->content; 1491: input->cur = entity->content; 1492: input->length = entity->length; 1493: input->end = &entity->content[input->length]; 1494: return(input); 1495: } 1496: 1497: /** 1498: * xmlNewStringInputStream: 1499: * @ctxt: an XML parser context 1500: * @buffer: an memory buffer 1501: * 1502: * Create a new input stream based on a memory buffer. 1503: * Returns the new input stream 1504: */ 1505: xmlParserInputPtr 1506: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1507: xmlParserInputPtr input; 1508: 1509: if (buffer == NULL) { 1510: xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1511: NULL); 1512: return(NULL); 1513: } 1514: if (xmlParserDebugEntities) 1515: xmlGenericError(xmlGenericErrorContext, 1516: "new fixed input: %.30s\n", buffer); 1517: input = xmlNewInputStream(ctxt); 1518: if (input == NULL) { 1519: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1520: return(NULL); 1521: } 1522: input->base = buffer; 1523: input->cur = buffer; 1524: input->length = xmlStrlen(buffer); 1525: input->end = &buffer[input->length]; 1526: return(input); 1527: } 1528: 1529: /** 1530: * xmlNewInputFromFile: 1531: * @ctxt: an XML parser context 1532: * @filename: the filename to use as entity 1533: * 1534: * Create a new input stream based on a file or an URL. 1535: * 1536: * Returns the new input stream or NULL in case of error 1537: */ 1538: xmlParserInputPtr 1539: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1540: xmlParserInputBufferPtr buf; 1541: xmlParserInputPtr inputStream; 1542: char *directory = NULL; 1543: xmlChar *URI = NULL; 1544: 1545: if (xmlParserDebugEntities) 1546: xmlGenericError(xmlGenericErrorContext, 1547: "new input from file: %s\n", filename); 1548: if (ctxt == NULL) return(NULL); 1549: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1550: if (buf == NULL) { 1551: if (filename == NULL) 1552: __xmlLoaderErr(ctxt, 1553: "failed to load external entity: NULL filename \n", 1554: NULL); 1555: else 1556: __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1557: (const char *) filename); 1558: return(NULL); 1559: } 1560: 1561: inputStream = xmlNewInputStream(ctxt); 1562: if (inputStream == NULL) 1563: return(NULL); 1564: 1565: inputStream->buf = buf; 1566: inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1567: if (inputStream == NULL) 1568: return(NULL); 1569: 1570: if (inputStream->filename == NULL) 1571: URI = xmlStrdup((xmlChar *) filename); 1572: else 1573: URI = xmlStrdup((xmlChar *) inputStream->filename); 1574: directory = xmlParserGetDirectory((const char *) URI); 1575: if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1576: inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1577: if (URI != NULL) xmlFree((char *) URI); 1578: inputStream->directory = directory; 1579: 1580: inputStream->base = inputStream->buf->buffer->content; 1581: inputStream->cur = inputStream->buf->buffer->content; 1582: inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1583: if ((ctxt->directory == NULL) && (directory != NULL)) 1584: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1585: return(inputStream); 1586: } 1587: 1588: /************************************************************************ 1589: * * 1590: * Commodity functions to handle parser contexts * 1591: * * 1592: ************************************************************************/ 1593: 1594: /** 1595: * xmlInitParserCtxt: 1596: * @ctxt: an XML parser context 1597: * 1598: * Initialize a parser context 1599: * 1600: * Returns 0 in case of success and -1 in case of error 1601: */ 1602: 1603: int 1604: xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1605: { 1606: xmlParserInputPtr input; 1607: 1608: if(ctxt==NULL) { 1609: xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1610: return(-1); 1611: } 1612: 1613: xmlDefaultSAXHandlerInit(); 1614: 1615: if (ctxt->dict == NULL) 1616: ctxt->dict = xmlDictCreate(); 1617: if (ctxt->dict == NULL) { 1618: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1619: return(-1); 1620: } 1621: if (ctxt->sax == NULL) 1622: ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1623: if (ctxt->sax == NULL) { 1624: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1625: return(-1); 1626: } 1627: else 1628: xmlSAXVersion(ctxt->sax, 2); 1629: 1630: ctxt->maxatts = 0; 1631: ctxt->atts = NULL; 1632: /* Allocate the Input stack */ 1633: if (ctxt->inputTab == NULL) { 1634: ctxt->inputTab = (xmlParserInputPtr *) 1635: xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1636: ctxt->inputMax = 5; 1637: } 1638: if (ctxt->inputTab == NULL) { 1639: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1640: ctxt->inputNr = 0; 1641: ctxt->inputMax = 0; 1642: ctxt->input = NULL; 1643: return(-1); 1644: } 1645: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1646: xmlFreeInputStream(input); 1647: } 1648: ctxt->inputNr = 0; 1649: ctxt->input = NULL; 1650: 1651: ctxt->version = NULL; 1652: ctxt->encoding = NULL; 1653: ctxt->standalone = -1; 1654: ctxt->hasExternalSubset = 0; 1655: ctxt->hasPErefs = 0; 1656: ctxt->html = 0; 1657: ctxt->external = 0; 1658: ctxt->instate = XML_PARSER_START; 1659: ctxt->token = 0; 1660: ctxt->directory = NULL; 1661: 1662: /* Allocate the Node stack */ 1663: if (ctxt->nodeTab == NULL) { 1664: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1665: ctxt->nodeMax = 10; 1666: } 1667: if (ctxt->nodeTab == NULL) { 1668: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1669: ctxt->nodeNr = 0; 1670: ctxt->nodeMax = 0; 1671: ctxt->node = NULL; 1672: ctxt->inputNr = 0; 1673: ctxt->inputMax = 0; 1674: ctxt->input = NULL; 1675: return(-1); 1676: } 1677: ctxt->nodeNr = 0; 1678: ctxt->node = NULL; 1679: 1680: /* Allocate the Name stack */ 1681: if (ctxt->nameTab == NULL) { 1682: ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1683: ctxt->nameMax = 10; 1684: } 1685: if (ctxt->nameTab == NULL) { 1686: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1687: ctxt->nodeNr = 0; 1688: ctxt->nodeMax = 0; 1689: ctxt->node = NULL; 1690: ctxt->inputNr = 0; 1691: ctxt->inputMax = 0; 1692: ctxt->input = NULL; 1693: ctxt->nameNr = 0; 1694: ctxt->nameMax = 0; 1695: ctxt->name = NULL; 1696: return(-1); 1697: } 1698: ctxt->nameNr = 0; 1699: ctxt->name = NULL; 1700: 1701: /* Allocate the space stack */ 1702: if (ctxt->spaceTab == NULL) { 1703: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1704: ctxt->spaceMax = 10; 1705: } 1706: if (ctxt->spaceTab == NULL) { 1707: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1708: ctxt->nodeNr = 0; 1709: ctxt->nodeMax = 0; 1710: ctxt->node = NULL; 1711: ctxt->inputNr = 0; 1712: ctxt->inputMax = 0; 1713: ctxt->input = NULL; 1714: ctxt->nameNr = 0; 1715: ctxt->nameMax = 0; 1716: ctxt->name = NULL; 1717: ctxt->spaceNr = 0; 1718: ctxt->spaceMax = 0; 1719: ctxt->space = NULL; 1720: return(-1); 1721: } 1722: ctxt->spaceNr = 1; 1723: ctxt->spaceMax = 10; 1724: ctxt->spaceTab[0] = -1; 1725: ctxt->space = &ctxt->spaceTab[0]; 1726: ctxt->userData = ctxt; 1727: ctxt->myDoc = NULL; 1728: ctxt->wellFormed = 1; 1729: ctxt->nsWellFormed = 1; 1730: ctxt->valid = 1; 1731: ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1732: ctxt->validate = xmlDoValidityCheckingDefaultValue; 1733: ctxt->pedantic = xmlPedanticParserDefaultValue; 1734: ctxt->linenumbers = xmlLineNumbersDefaultValue; 1735: ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1736: if (ctxt->keepBlanks == 0) 1737: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1738: 1739: ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1740: ctxt->vctxt.userData = ctxt; 1741: ctxt->vctxt.error = xmlParserValidityError; 1742: ctxt->vctxt.warning = xmlParserValidityWarning; 1743: if (ctxt->validate) { 1744: if (xmlGetWarningsDefaultValue == 0) 1745: ctxt->vctxt.warning = NULL; 1746: else 1747: ctxt->vctxt.warning = xmlParserValidityWarning; 1748: ctxt->vctxt.nodeMax = 0; 1749: } 1750: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1751: ctxt->record_info = 0; 1752: ctxt->nbChars = 0; 1753: ctxt->checkIndex = 0; 1754: ctxt->inSubset = 0; 1755: ctxt->errNo = XML_ERR_OK; 1756: ctxt->depth = 0; 1757: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1758: ctxt->catalogs = NULL; 1759: ctxt->nbentities = 0; 1760: xmlInitNodeInfoSeq(&ctxt->node_seq); 1761: return(0); 1762: } 1763: 1764: /** 1765: * xmlFreeParserCtxt: 1766: * @ctxt: an XML parser context 1767: * 1768: * Free all the memory used by a parser context. However the parsed 1769: * document in ctxt->myDoc is not freed. 1770: */ 1771: 1772: void 1773: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1774: { 1775: xmlParserInputPtr input; 1776: 1777: if (ctxt == NULL) return; 1778: 1779: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1780: xmlFreeInputStream(input); 1781: } 1782: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1783: if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1784: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1785: if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1786: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1787: if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1788: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1789: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1790: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1791: #ifdef LIBXML_SAX1_ENABLED 1792: if ((ctxt->sax != NULL) && 1793: (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1794: #else 1795: if (ctxt->sax != NULL) 1796: #endif /* LIBXML_SAX1_ENABLED */ 1797: xmlFree(ctxt->sax); 1798: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1799: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1800: if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1801: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1802: if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1803: if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1804: if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1805: if (ctxt->attsDefault != NULL) 1806: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1807: if (ctxt->attsSpecial != NULL) 1808: xmlHashFree(ctxt->attsSpecial, NULL); 1809: if (ctxt->freeElems != NULL) { 1810: xmlNodePtr cur, next; 1811: 1812: cur = ctxt->freeElems; 1813: while (cur != NULL) { 1814: next = cur->next; 1815: xmlFree(cur); 1816: cur = next; 1817: } 1818: } 1819: if (ctxt->freeAttrs != NULL) { 1820: xmlAttrPtr cur, next; 1821: 1822: cur = ctxt->freeAttrs; 1823: while (cur != NULL) { 1824: next = cur->next; 1825: xmlFree(cur); 1826: cur = next; 1827: } 1828: } 1829: /* 1830: * cleanup the error strings 1831: */ 1832: if (ctxt->lastError.message != NULL) 1833: xmlFree(ctxt->lastError.message); 1834: if (ctxt->lastError.file != NULL) 1835: xmlFree(ctxt->lastError.file); 1836: if (ctxt->lastError.str1 != NULL) 1837: xmlFree(ctxt->lastError.str1); 1838: if (ctxt->lastError.str2 != NULL) 1839: xmlFree(ctxt->lastError.str2); 1840: if (ctxt->lastError.str3 != NULL) 1841: xmlFree(ctxt->lastError.str3); 1842: 1843: #ifdef LIBXML_CATALOG_ENABLED 1844: if (ctxt->catalogs != NULL) 1845: xmlCatalogFreeLocal(ctxt->catalogs); 1846: #endif 1847: xmlFree(ctxt); 1848: } 1849: 1850: /** 1851: * xmlNewParserCtxt: 1852: * 1853: * Allocate and initialize a new parser context. 1854: * 1855: * Returns the xmlParserCtxtPtr or NULL 1856: */ 1857: 1858: xmlParserCtxtPtr 1859: xmlNewParserCtxt(void) 1860: { 1861: xmlParserCtxtPtr ctxt; 1862: 1863: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1864: if (ctxt == NULL) { 1865: xmlErrMemory(NULL, "cannot allocate parser context\n"); 1866: return(NULL); 1867: } 1868: memset(ctxt, 0, sizeof(xmlParserCtxt)); 1869: if (xmlInitParserCtxt(ctxt) < 0) { 1870: xmlFreeParserCtxt(ctxt); 1871: return(NULL); 1872: } 1873: return(ctxt); 1874: } 1875: 1876: /************************************************************************ 1877: * * 1878: * Handling of node informations * 1879: * * 1880: ************************************************************************/ 1881: 1882: /** 1883: * xmlClearParserCtxt: 1884: * @ctxt: an XML parser context 1885: * 1886: * Clear (release owned resources) and reinitialize a parser context 1887: */ 1888: 1889: void 1890: xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1891: { 1892: if (ctxt==NULL) 1893: return; 1894: xmlClearNodeInfoSeq(&ctxt->node_seq); 1895: xmlCtxtReset(ctxt); 1896: } 1897: 1898: 1899: /** 1900: * xmlParserFindNodeInfo: 1901: * @ctx: an XML parser context 1902: * @node: an XML node within the tree 1903: * 1904: * Find the parser node info struct for a given node 1905: * 1906: * Returns an xmlParserNodeInfo block pointer or NULL 1907: */ 1908: const xmlParserNodeInfo * 1909: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1910: { 1911: unsigned long pos; 1912: 1913: if ((ctx == NULL) || (node == NULL)) 1914: return (NULL); 1915: /* Find position where node should be at */ 1916: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1917: if (pos < ctx->node_seq.length 1918: && ctx->node_seq.buffer[pos].node == node) 1919: return &ctx->node_seq.buffer[pos]; 1920: else 1921: return NULL; 1922: } 1923: 1924: 1925: /** 1926: * xmlInitNodeInfoSeq: 1927: * @seq: a node info sequence pointer 1928: * 1929: * -- Initialize (set to initial state) node info sequence 1930: */ 1931: void 1932: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1933: { 1934: if (seq == NULL) 1935: return; 1936: seq->length = 0; 1937: seq->maximum = 0; 1938: seq->buffer = NULL; 1939: } 1940: 1941: /** 1942: * xmlClearNodeInfoSeq: 1943: * @seq: a node info sequence pointer 1944: * 1945: * -- Clear (release memory and reinitialize) node 1946: * info sequence 1947: */ 1948: void 1949: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1950: { 1951: if (seq == NULL) 1952: return; 1953: if (seq->buffer != NULL) 1954: xmlFree(seq->buffer); 1955: xmlInitNodeInfoSeq(seq); 1956: } 1957: 1958: /** 1959: * xmlParserFindNodeInfoIndex: 1960: * @seq: a node info sequence pointer 1961: * @node: an XML node pointer 1962: * 1963: * 1964: * xmlParserFindNodeInfoIndex : Find the index that the info record for 1965: * the given node is or should be at in a sorted sequence 1966: * 1967: * Returns a long indicating the position of the record 1968: */ 1969: unsigned long 1970: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1971: const xmlNodePtr node) 1972: { 1973: unsigned long upper, lower, middle; 1974: int found = 0; 1975: 1976: if ((seq == NULL) || (node == NULL)) 1977: return ((unsigned long) -1); 1978: 1979: /* Do a binary search for the key */ 1980: lower = 1; 1981: upper = seq->length; 1982: middle = 0; 1983: while (lower <= upper && !found) { 1984: middle = lower + (upper - lower) / 2; 1985: if (node == seq->buffer[middle - 1].node) 1986: found = 1; 1987: else if (node < seq->buffer[middle - 1].node) 1988: upper = middle - 1; 1989: else 1990: lower = middle + 1; 1991: } 1992: 1993: /* Return position */ 1994: if (middle == 0 || seq->buffer[middle - 1].node < node) 1995: return middle; 1996: else 1997: return middle - 1; 1998: } 1999: 2000: 2001: /** 2002: * xmlParserAddNodeInfo: 2003: * @ctxt: an XML parser context 2004: * @info: a node info sequence pointer 2005: * 2006: * Insert node info record into the sorted sequence 2007: */ 2008: void 2009: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2010: const xmlParserNodeInfoPtr info) 2011: { 2012: unsigned long pos; 2013: 2014: if ((ctxt == NULL) || (info == NULL)) return; 2015: 2016: /* Find pos and check to see if node is already in the sequence */ 2017: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2018: info->node); 2019: 2020: if ((pos < ctxt->node_seq.length) && 2021: (ctxt->node_seq.buffer != NULL) && 2022: (ctxt->node_seq.buffer[pos].node == info->node)) { 2023: ctxt->node_seq.buffer[pos] = *info; 2024: } 2025: 2026: /* Otherwise, we need to add new node to buffer */ 2027: else { 2028: if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 2029: xmlParserNodeInfo *tmp_buffer; 2030: unsigned int byte_size; 2031: 2032: if (ctxt->node_seq.maximum == 0) 2033: ctxt->node_seq.maximum = 2; 2034: byte_size = (sizeof(*ctxt->node_seq.buffer) * 2035: (2 * ctxt->node_seq.maximum)); 2036: 2037: if (ctxt->node_seq.buffer == NULL) 2038: tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2039: else 2040: tmp_buffer = 2041: (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2042: byte_size); 2043: 2044: if (tmp_buffer == NULL) { 2045: xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2046: return; 2047: } 2048: ctxt->node_seq.buffer = tmp_buffer; 2049: ctxt->node_seq.maximum *= 2; 2050: } 2051: 2052: /* If position is not at end, move elements out of the way */ 2053: if (pos != ctxt->node_seq.length) { 2054: unsigned long i; 2055: 2056: for (i = ctxt->node_seq.length; i > pos; i--) 2057: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2058: } 2059: 2060: /* Copy element and increase length */ 2061: ctxt->node_seq.buffer[pos] = *info; 2062: ctxt->node_seq.length++; 2063: } 2064: } 2065: 2066: /************************************************************************ 2067: * * 2068: * Defaults settings * 2069: * * 2070: ************************************************************************/ 2071: /** 2072: * xmlPedanticParserDefault: 2073: * @val: int 0 or 1 2074: * 2075: * Set and return the previous value for enabling pedantic warnings. 2076: * 2077: * Returns the last value for 0 for no substitution, 1 for substitution. 2078: */ 2079: 2080: int 2081: xmlPedanticParserDefault(int val) { 2082: int old = xmlPedanticParserDefaultValue; 2083: 2084: xmlPedanticParserDefaultValue = val; 2085: return(old); 2086: } 2087: 2088: /** 2089: * xmlLineNumbersDefault: 2090: * @val: int 0 or 1 2091: * 2092: * Set and return the previous value for enabling line numbers in elements 2093: * contents. This may break on old application and is turned off by default. 2094: * 2095: * Returns the last value for 0 for no substitution, 1 for substitution. 2096: */ 2097: 2098: int 2099: xmlLineNumbersDefault(int val) { 2100: int old = xmlLineNumbersDefaultValue; 2101: 2102: xmlLineNumbersDefaultValue = val; 2103: return(old); 2104: } 2105: 2106: /** 2107: * xmlSubstituteEntitiesDefault: 2108: * @val: int 0 or 1 2109: * 2110: * Set and return the previous value for default entity support. 2111: * Initially the parser always keep entity references instead of substituting 2112: * entity values in the output. This function has to be used to change the 2113: * default parser behavior 2114: * SAX::substituteEntities() has to be used for changing that on a file by 2115: * file basis. 2116: * 2117: * Returns the last value for 0 for no substitution, 1 for substitution. 2118: */ 2119: 2120: int 2121: xmlSubstituteEntitiesDefault(int val) { 2122: int old = xmlSubstituteEntitiesDefaultValue; 2123: 2124: xmlSubstituteEntitiesDefaultValue = val; 2125: return(old); 2126: } 2127: 2128: /** 2129: * xmlKeepBlanksDefault: 2130: * @val: int 0 or 1 2131: * 2132: * Set and return the previous value for default blanks text nodes support. 2133: * The 1.x version of the parser used an heuristic to try to detect 2134: * ignorable white spaces. As a result the SAX callback was generating 2135: * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2136: * using the DOM output text nodes containing those blanks were not generated. 2137: * The 2.x and later version will switch to the XML standard way and 2138: * ignorableWhitespace() are only generated when running the parser in 2139: * validating mode and when the current element doesn't allow CDATA or 2140: * mixed content. 2141: * This function is provided as a way to force the standard behavior 2142: * on 1.X libs and to switch back to the old mode for compatibility when 2143: * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2144: * by using xmlIsBlankNode() commodity function to detect the "empty" 2145: * nodes generated. 2146: * This value also affect autogeneration of indentation when saving code 2147: * if blanks sections are kept, indentation is not generated. 2148: * 2149: * Returns the last value for 0 for no substitution, 1 for substitution. 2150: */ 2151: 2152: int 2153: xmlKeepBlanksDefault(int val) { 2154: int old = xmlKeepBlanksDefaultValue; 2155: 2156: xmlKeepBlanksDefaultValue = val; 2157: if (!val) xmlIndentTreeOutput = 1; 2158: return(old); 2159: } 2160: 2161: #define bottom_parserInternals 2162: #include "elfgcchack.h"