embedaddon/libxml2/parserInternals.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parserInternals.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:19 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD

2.8.0

1: /* 2: * parserInternals.c : Internal routines (and obsolete ones) needed for the 3: * XML and HTML parsers. 4: * 5: * See Copyright for the status of this software. 6: * 7: * daniel@veillard.com 8: */ 9: 10: #define IN_LIBXML 11: #include "libxml.h" 12: 13: #if defined(WIN32) && !defined (__CYGWIN__) 14: #define XML_DIR_SEP '\\' 15: #else 16: #define XML_DIR_SEP '/' 17: #endif 18: 19: #include <string.h> 20: #ifdef HAVE_CTYPE_H 21: #include <ctype.h> 22: #endif 23: #ifdef HAVE_STDLIB_H 24: #include <stdlib.h> 25: #endif 26: #ifdef HAVE_SYS_STAT_H 27: #include <sys/stat.h> 28: #endif 29: #ifdef HAVE_FCNTL_H 30: #include <fcntl.h> 31: #endif 32: #ifdef HAVE_UNISTD_H 33: #include <unistd.h> 34: #endif 35: #ifdef HAVE_ZLIB_H 36: #include <zlib.h> 37: #endif 38: 39: #include <libxml/xmlmemory.h> 40: #include <libxml/tree.h> 41: #include <libxml/parser.h> 42: #include <libxml/parserInternals.h> 43: #include <libxml/valid.h> 44: #include <libxml/entities.h> 45: #include <libxml/xmlerror.h> 46: #include <libxml/encoding.h> 47: #include <libxml/valid.h> 48: #include <libxml/xmlIO.h> 49: #include <libxml/uri.h> 50: #include <libxml/dict.h> 51: #include <libxml/SAX.h> 52: #ifdef LIBXML_CATALOG_ENABLED 53: #include <libxml/catalog.h> 54: #endif 55: #include <libxml/globals.h> 56: #include <libxml/chvalid.h> 57: 58: /* 59: * Various global defaults for parsing 60: */ 61: 62: /** 63: * xmlCheckVersion: 64: * @version: the include version number 65: * 66: * check the compiled lib version against the include one. 67: * This can warn or immediately kill the application 68: */ 69: void 70: xmlCheckVersion(int version) { 71: int myversion = (int) LIBXML_VERSION; 72: 73: xmlInitParser(); 74: 75: if ((myversion / 10000) != (version / 10000)) { 76: xmlGenericError(xmlGenericErrorContext, 77: "Fatal: program compiled against libxml %d using libxml %d\n", 78: (version / 10000), (myversion / 10000)); 79: fprintf(stderr, 80: "Fatal: program compiled against libxml %d using libxml %d\n", 81: (version / 10000), (myversion / 10000)); 82: } 83: if ((myversion / 100) < (version / 100)) { 84: xmlGenericError(xmlGenericErrorContext, 85: "Warning: program compiled against libxml %d using older %d\n", 86: (version / 100), (myversion / 100)); 87: } 88: } 89: 90: 91: /************************************************************************ 92: * * 93: * Some factorized error routines * 94: * * 95: ************************************************************************/ 96: 97: 98: /** 99: * xmlErrMemory: 100: * @ctxt: an XML parser context 101: * @extra: extra informations 102: * 103: * Handle a redefinition of attribute error 104: */ 105: void 106: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 107: { 108: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 109: (ctxt->instate == XML_PARSER_EOF)) 110: return; 111: if (ctxt != NULL) { 112: ctxt->errNo = XML_ERR_NO_MEMORY; 113: ctxt->instate = XML_PARSER_EOF; 114: ctxt->disableSAX = 1; 115: } 116: if (extra) 117: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 118: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 119: NULL, NULL, 0, 0, 120: "Memory allocation failed : %s\n", extra); 121: else 122: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 123: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 124: NULL, NULL, 0, 0, "Memory allocation failed\n"); 125: } 126: 127: /** 128: * __xmlErrEncoding: 129: * @ctxt: an XML parser context 130: * @xmlerr: the error number 131: * @msg: the error message 132: * @str1: an string info 133: * @str2: an string info 134: * 135: * Handle an encoding error 136: */ 137: void 138: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 139: const char *msg, const xmlChar * str1, const xmlChar * str2) 140: { 141: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 142: (ctxt->instate == XML_PARSER_EOF)) 143: return; 144: if (ctxt != NULL) 145: ctxt->errNo = xmlerr; 146: __xmlRaiseError(NULL, NULL, NULL, 147: ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 148: NULL, 0, (const char *) str1, (const char *) str2, 149: NULL, 0, 0, msg, str1, str2); 150: if (ctxt != NULL) { 151: ctxt->wellFormed = 0; 152: if (ctxt->recovery == 0) 153: ctxt->disableSAX = 1; 154: } 155: } 156: 157: /** 158: * xmlErrInternal: 159: * @ctxt: an XML parser context 160: * @msg: the error message 161: * @str: error informations 162: * 163: * Handle an internal error 164: */ 165: static void 166: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 167: { 168: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 169: (ctxt->instate == XML_PARSER_EOF)) 170: return; 171: if (ctxt != NULL) 172: ctxt->errNo = XML_ERR_INTERNAL_ERROR; 173: __xmlRaiseError(NULL, NULL, NULL, 174: ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 175: XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 176: 0, 0, msg, str); 177: if (ctxt != NULL) { 178: ctxt->wellFormed = 0; 179: if (ctxt->recovery == 0) 180: ctxt->disableSAX = 1; 181: } 182: } 183: 184: /** 185: * xmlErrEncodingInt: 186: * @ctxt: an XML parser context 187: * @error: the error number 188: * @msg: the error message 189: * @val: an integer value 190: * 191: * n encoding error 192: */ 193: static void 194: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 195: const char *msg, int val) 196: { 197: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 198: (ctxt->instate == XML_PARSER_EOF)) 199: return; 200: if (ctxt != NULL) 201: ctxt->errNo = error; 202: __xmlRaiseError(NULL, NULL, NULL, 203: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 204: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 205: if (ctxt != NULL) { 206: ctxt->wellFormed = 0; 207: if (ctxt->recovery == 0) 208: ctxt->disableSAX = 1; 209: } 210: } 211: 212: /** 213: * xmlIsLetter: 214: * @c: an unicode character (int) 215: * 216: * Check whether the character is allowed by the production 217: * [84] Letter ::= BaseChar | Ideographic 218: * 219: * Returns 0 if not, non-zero otherwise 220: */ 221: int 222: xmlIsLetter(int c) { 223: return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 224: } 225: 226: /************************************************************************ 227: * * 228: * Input handling functions for progressive parsing * 229: * * 230: ************************************************************************/ 231: 232: /* #define DEBUG_INPUT */ 233: /* #define DEBUG_STACK */ 234: /* #define DEBUG_PUSH */ 235: 236: 237: /* we need to keep enough input to show errors in context */ 238: #define LINE_LEN 80 239: 240: #ifdef DEBUG_INPUT 241: #define CHECK_BUFFER(in) check_buffer(in) 242: 243: static 244: void check_buffer(xmlParserInputPtr in) { 245: if (in->base != in->buf->buffer->content) { 246: xmlGenericError(xmlGenericErrorContext, 247: "xmlParserInput: base mismatch problem\n"); 248: } 249: if (in->cur < in->base) { 250: xmlGenericError(xmlGenericErrorContext, 251: "xmlParserInput: cur < base problem\n"); 252: } 253: if (in->cur > in->base + in->buf->buffer->use) { 254: xmlGenericError(xmlGenericErrorContext, 255: "xmlParserInput: cur > base + use problem\n"); 256: } 257: xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 258: (int) in, (int) in->buf->buffer->content, in->cur - in->base, 259: in->buf->buffer->use, in->buf->buffer->size); 260: } 261: 262: #else 263: #define CHECK_BUFFER(in) 264: #endif 265: 266: 267: /** 268: * xmlParserInputRead: 269: * @in: an XML parser input 270: * @len: an indicative size for the lookahead 271: * 272: * This function refresh the input for the parser. It doesn't try to 273: * preserve pointers to the input buffer, and discard already read data 274: * 275: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 276: * end of this entity 277: */ 278: int 279: xmlParserInputRead(xmlParserInputPtr in, int len) { 280: int ret; 281: int used; 282: int indx; 283: 284: if (in == NULL) return(-1); 285: #ifdef DEBUG_INPUT 286: xmlGenericError(xmlGenericErrorContext, "Read\n"); 287: #endif 288: if (in->buf == NULL) return(-1); 289: if (in->base == NULL) return(-1); 290: if (in->cur == NULL) return(-1); 291: if (in->buf->buffer == NULL) return(-1); 292: if (in->buf->readcallback == NULL) return(-1); 293: 294: CHECK_BUFFER(in); 295: 296: used = in->cur - in->buf->buffer->content; 297: ret = xmlBufferShrink(in->buf->buffer, used); 298: if (ret > 0) { 299: in->cur -= ret; 300: in->consumed += ret; 301: } 302: ret = xmlParserInputBufferRead(in->buf, len); 303: if (in->base != in->buf->buffer->content) { 304: /* 305: * the buffer has been reallocated 306: */ 307: indx = in->cur - in->base; 308: in->base = in->buf->buffer->content; 309: in->cur = &in->buf->buffer->content[indx]; 310: } 311: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 312: 313: CHECK_BUFFER(in); 314: 315: return(ret); 316: } 317: 318: /** 319: * xmlParserInputGrow: 320: * @in: an XML parser input 321: * @len: an indicative size for the lookahead 322: * 323: * This function increase the input for the parser. It tries to 324: * preserve pointers to the input buffer, and keep already read data 325: * 326: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 327: * end of this entity 328: */ 329: int 330: xmlParserInputGrow(xmlParserInputPtr in, int len) { 331: int ret; 332: int indx; 333: 334: if (in == NULL) return(-1); 335: #ifdef DEBUG_INPUT 336: xmlGenericError(xmlGenericErrorContext, "Grow\n"); 337: #endif 338: if (in->buf == NULL) return(-1); 339: if (in->base == NULL) return(-1); 340: if (in->cur == NULL) return(-1); 341: if (in->buf->buffer == NULL) return(-1); 342: 343: CHECK_BUFFER(in); 344: 345: indx = in->cur - in->base; 346: if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 347: 348: CHECK_BUFFER(in); 349: 350: return(0); 351: } 352: if (in->buf->readcallback != NULL) 353: ret = xmlParserInputBufferGrow(in->buf, len); 354: else 355: return(0); 356: 357: /* 358: * NOTE : in->base may be a "dangling" i.e. freed pointer in this 359: * block, but we use it really as an integer to do some 360: * pointer arithmetic. Insure will raise it as a bug but in 361: * that specific case, that's not ! 362: */ 363: if (in->base != in->buf->buffer->content) { 364: /* 365: * the buffer has been reallocated 366: */ 367: indx = in->cur - in->base; 368: in->base = in->buf->buffer->content; 369: in->cur = &in->buf->buffer->content[indx]; 370: } 371: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 372: 373: CHECK_BUFFER(in); 374: 375: return(ret); 376: } 377: 378: /** 379: * xmlParserInputShrink: 380: * @in: an XML parser input 381: * 382: * This function removes used input for the parser. 383: */ 384: void 385: xmlParserInputShrink(xmlParserInputPtr in) { 386: int used; 387: int ret; 388: int indx; 389: 390: #ifdef DEBUG_INPUT 391: xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 392: #endif 393: if (in == NULL) return; 394: if (in->buf == NULL) return; 395: if (in->base == NULL) return; 396: if (in->cur == NULL) return; 397: if (in->buf->buffer == NULL) return; 398: 399: CHECK_BUFFER(in); 400: 401: used = in->cur - in->buf->buffer->content; 402: /* 403: * Do not shrink on large buffers whose only a tiny fraction 404: * was consumed 405: */ 406: if (used > INPUT_CHUNK) { 407: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 408: if (ret > 0) { 409: in->cur -= ret; 410: in->consumed += ret; 411: } 412: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 413: } 414: 415: CHECK_BUFFER(in); 416: 417: if (in->buf->buffer->use > INPUT_CHUNK) { 418: return; 419: } 420: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 421: if (in->base != in->buf->buffer->content) { 422: /* 423: * the buffer has been reallocated 424: */ 425: indx = in->cur - in->base; 426: in->base = in->buf->buffer->content; 427: in->cur = &in->buf->buffer->content[indx]; 428: } 429: in->end = &in->buf->buffer->content[in->buf->buffer->use]; 430: 431: CHECK_BUFFER(in); 432: } 433: 434: /************************************************************************ 435: * * 436: * UTF8 character input and related functions * 437: * * 438: ************************************************************************/ 439: 440: /** 441: * xmlNextChar: 442: * @ctxt: the XML parser context 443: * 444: * Skip to the next char input char. 445: */ 446: 447: void 448: xmlNextChar(xmlParserCtxtPtr ctxt) 449: { 450: if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 451: (ctxt->input == NULL)) 452: return; 453: 454: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 455: if ((*ctxt->input->cur == 0) && 456: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 457: (ctxt->instate != XML_PARSER_COMMENT)) { 458: /* 459: * If we are at the end of the current entity and 460: * the context allows it, we pop consumed entities 461: * automatically. 462: * the auto closing should be blocked in other cases 463: */ 464: xmlPopInput(ctxt); 465: } else { 466: const unsigned char *cur; 467: unsigned char c; 468: 469: /* 470: * 2.11 End-of-Line Handling 471: * the literal two-character sequence "#xD#xA" or a standalone 472: * literal #xD, an XML processor must pass to the application 473: * the single character #xA. 474: */ 475: if (*(ctxt->input->cur) == '\n') { 476: ctxt->input->line++; ctxt->input->col = 1; 477: } else 478: ctxt->input->col++; 479: 480: /* 481: * We are supposed to handle UTF8, check it's valid 482: * From rfc2044: encoding of the Unicode values on UTF-8: 483: * 484: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 485: * 0000 0000-0000 007F 0xxxxxxx 486: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 487: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 488: * 489: * Check for the 0x110000 limit too 490: */ 491: cur = ctxt->input->cur; 492: 493: c = *cur; 494: if (c & 0x80) { 495: if (c == 0xC0) 496: goto encoding_error; 497: if (cur[1] == 0) { 498: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 499: cur = ctxt->input->cur; 500: } 501: if ((cur[1] & 0xc0) != 0x80) 502: goto encoding_error; 503: if ((c & 0xe0) == 0xe0) { 504: unsigned int val; 505: 506: if (cur[2] == 0) { 507: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 508: cur = ctxt->input->cur; 509: } 510: if ((cur[2] & 0xc0) != 0x80) 511: goto encoding_error; 512: if ((c & 0xf0) == 0xf0) { 513: if (cur[3] == 0) { 514: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 515: cur = ctxt->input->cur; 516: } 517: if (((c & 0xf8) != 0xf0) || 518: ((cur[3] & 0xc0) != 0x80)) 519: goto encoding_error; 520: /* 4-byte code */ 521: ctxt->input->cur += 4; 522: val = (cur[0] & 0x7) << 18; 523: val |= (cur[1] & 0x3f) << 12; 524: val |= (cur[2] & 0x3f) << 6; 525: val |= cur[3] & 0x3f; 526: } else { 527: /* 3-byte code */ 528: ctxt->input->cur += 3; 529: val = (cur[0] & 0xf) << 12; 530: val |= (cur[1] & 0x3f) << 6; 531: val |= cur[2] & 0x3f; 532: } 533: if (((val > 0xd7ff) && (val < 0xe000)) || 534: ((val > 0xfffd) && (val < 0x10000)) || 535: (val >= 0x110000)) { 536: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 537: "Char 0x%X out of allowed range\n", 538: val); 539: } 540: } else 541: /* 2-byte code */ 542: ctxt->input->cur += 2; 543: } else 544: /* 1-byte code */ 545: ctxt->input->cur++; 546: 547: ctxt->nbChars++; 548: if (*ctxt->input->cur == 0) 549: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 550: } 551: } else { 552: /* 553: * Assume it's a fixed length encoding (1) with 554: * a compatible encoding for the ASCII set, since 555: * XML constructs only use < 128 chars 556: */ 557: 558: if (*(ctxt->input->cur) == '\n') { 559: ctxt->input->line++; ctxt->input->col = 1; 560: } else 561: ctxt->input->col++; 562: ctxt->input->cur++; 563: ctxt->nbChars++; 564: if (*ctxt->input->cur == 0) 565: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 566: } 567: if ((*ctxt->input->cur == '%') && (!ctxt->html)) 568: xmlParserHandlePEReference(ctxt); 569: if ((*ctxt->input->cur == 0) && 570: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 571: xmlPopInput(ctxt); 572: return; 573: encoding_error: 574: /* 575: * If we detect an UTF8 error that probably mean that the 576: * input encoding didn't get properly advertised in the 577: * declaration header. Report the error and switch the encoding 578: * to ISO-Latin-1 (if you don't like this policy, just declare the 579: * encoding !) 580: */ 581: if ((ctxt == NULL) || (ctxt->input == NULL) || 582: (ctxt->input->end - ctxt->input->cur < 4)) { 583: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 584: "Input is not proper UTF-8, indicate encoding !\n", 585: NULL, NULL); 586: } else { 587: char buffer[150]; 588: 589: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 590: ctxt->input->cur[0], ctxt->input->cur[1], 591: ctxt->input->cur[2], ctxt->input->cur[3]); 592: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 593: "Input is not proper UTF-8, indicate encoding !\n%s", 594: BAD_CAST buffer, NULL); 595: } 596: ctxt->charset = XML_CHAR_ENCODING_8859_1; 597: ctxt->input->cur++; 598: return; 599: } 600: 601: /** 602: * xmlCurrentChar: 603: * @ctxt: the XML parser context 604: * @len: pointer to the length of the char read 605: * 606: * The current char value, if using UTF-8 this may actually span multiple 607: * bytes in the input buffer. Implement the end of line normalization: 608: * 2.11 End-of-Line Handling 609: * Wherever an external parsed entity or the literal entity value 610: * of an internal parsed entity contains either the literal two-character 611: * sequence "#xD#xA" or a standalone literal #xD, an XML processor 612: * must pass to the application the single character #xA. 613: * This behavior can conveniently be produced by normalizing all 614: * line breaks to #xA on input, before parsing.) 615: * 616: * Returns the current char value and its length 617: */ 618: 619: int 620: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 621: if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 622: if (ctxt->instate == XML_PARSER_EOF) 623: return(0); 624: 625: if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 626: *len = 1; 627: return((int) *ctxt->input->cur); 628: } 629: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 630: /* 631: * We are supposed to handle UTF8, check it's valid 632: * From rfc2044: encoding of the Unicode values on UTF-8: 633: * 634: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 635: * 0000 0000-0000 007F 0xxxxxxx 636: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 637: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 638: * 639: * Check for the 0x110000 limit too 640: */ 641: const unsigned char *cur = ctxt->input->cur; 642: unsigned char c; 643: unsigned int val; 644: 645: c = *cur; 646: if (c & 0x80) { 647: if (((c & 0x40) == 0) || (c == 0xC0)) 648: goto encoding_error; 649: if (cur[1] == 0) { 650: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 651: cur = ctxt->input->cur; 652: } 653: if ((cur[1] & 0xc0) != 0x80) 654: goto encoding_error; 655: if ((c & 0xe0) == 0xe0) { 656: if (cur[2] == 0) { 657: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 658: cur = ctxt->input->cur; 659: } 660: if ((cur[2] & 0xc0) != 0x80) 661: goto encoding_error; 662: if ((c & 0xf0) == 0xf0) { 663: if (cur[3] == 0) { 664: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 665: cur = ctxt->input->cur; 666: } 667: if (((c & 0xf8) != 0xf0) || 668: ((cur[3] & 0xc0) != 0x80)) 669: goto encoding_error; 670: /* 4-byte code */ 671: *len = 4; 672: val = (cur[0] & 0x7) << 18; 673: val |= (cur[1] & 0x3f) << 12; 674: val |= (cur[2] & 0x3f) << 6; 675: val |= cur[3] & 0x3f; 676: if (val < 0x10000) 677: goto encoding_error; 678: } else { 679: /* 3-byte code */ 680: *len = 3; 681: val = (cur[0] & 0xf) << 12; 682: val |= (cur[1] & 0x3f) << 6; 683: val |= cur[2] & 0x3f; 684: if (val < 0x800) 685: goto encoding_error; 686: } 687: } else { 688: /* 2-byte code */ 689: *len = 2; 690: val = (cur[0] & 0x1f) << 6; 691: val |= cur[1] & 0x3f; 692: if (val < 0x80) 693: goto encoding_error; 694: } 695: if (!IS_CHAR(val)) { 696: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 697: "Char 0x%X out of allowed range\n", val); 698: } 699: return(val); 700: } else { 701: /* 1-byte code */ 702: *len = 1; 703: if (*ctxt->input->cur == 0) 704: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 705: if ((*ctxt->input->cur == 0) && 706: (ctxt->input->end > ctxt->input->cur)) { 707: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 708: "Char 0x0 out of allowed range\n", 0); 709: } 710: if (*ctxt->input->cur == 0xD) { 711: if (ctxt->input->cur[1] == 0xA) { 712: ctxt->nbChars++; 713: ctxt->input->cur++; 714: } 715: return(0xA); 716: } 717: return((int) *ctxt->input->cur); 718: } 719: } 720: /* 721: * Assume it's a fixed length encoding (1) with 722: * a compatible encoding for the ASCII set, since 723: * XML constructs only use < 128 chars 724: */ 725: *len = 1; 726: if (*ctxt->input->cur == 0xD) { 727: if (ctxt->input->cur[1] == 0xA) { 728: ctxt->nbChars++; 729: ctxt->input->cur++; 730: } 731: return(0xA); 732: } 733: return((int) *ctxt->input->cur); 734: encoding_error: 735: /* 736: * An encoding problem may arise from a truncated input buffer 737: * splitting a character in the middle. In that case do not raise 738: * an error but return 0 to endicate an end of stream problem 739: */ 740: if (ctxt->input->end - ctxt->input->cur < 4) { 741: *len = 0; 742: return(0); 743: } 744: 745: /* 746: * If we detect an UTF8 error that probably mean that the 747: * input encoding didn't get properly advertised in the 748: * declaration header. Report the error and switch the encoding 749: * to ISO-Latin-1 (if you don't like this policy, just declare the 750: * encoding !) 751: */ 752: { 753: char buffer[150]; 754: 755: snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 756: ctxt->input->cur[0], ctxt->input->cur[1], 757: ctxt->input->cur[2], ctxt->input->cur[3]); 758: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 759: "Input is not proper UTF-8, indicate encoding !\n%s", 760: BAD_CAST buffer, NULL); 761: } 762: ctxt->charset = XML_CHAR_ENCODING_8859_1; 763: *len = 1; 764: return((int) *ctxt->input->cur); 765: } 766: 767: /** 768: * xmlStringCurrentChar: 769: * @ctxt: the XML parser context 770: * @cur: pointer to the beginning of the char 771: * @len: pointer to the length of the char read 772: * 773: * The current char value, if using UTF-8 this may actually span multiple 774: * bytes in the input buffer. 775: * 776: * Returns the current char value and its length 777: */ 778: 779: int 780: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 781: { 782: if ((len == NULL) || (cur == NULL)) return(0); 783: if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 784: /* 785: * We are supposed to handle UTF8, check it's valid 786: * From rfc2044: encoding of the Unicode values on UTF-8: 787: * 788: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 789: * 0000 0000-0000 007F 0xxxxxxx 790: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 791: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 792: * 793: * Check for the 0x110000 limit too 794: */ 795: unsigned char c; 796: unsigned int val; 797: 798: c = *cur; 799: if (c & 0x80) { 800: if ((cur[1] & 0xc0) != 0x80) 801: goto encoding_error; 802: if ((c & 0xe0) == 0xe0) { 803: 804: if ((cur[2] & 0xc0) != 0x80) 805: goto encoding_error; 806: if ((c & 0xf0) == 0xf0) { 807: if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 808: goto encoding_error; 809: /* 4-byte code */ 810: *len = 4; 811: val = (cur[0] & 0x7) << 18; 812: val |= (cur[1] & 0x3f) << 12; 813: val |= (cur[2] & 0x3f) << 6; 814: val |= cur[3] & 0x3f; 815: } else { 816: /* 3-byte code */ 817: *len = 3; 818: val = (cur[0] & 0xf) << 12; 819: val |= (cur[1] & 0x3f) << 6; 820: val |= cur[2] & 0x3f; 821: } 822: } else { 823: /* 2-byte code */ 824: *len = 2; 825: val = (cur[0] & 0x1f) << 6; 826: val |= cur[1] & 0x3f; 827: } 828: if (!IS_CHAR(val)) { 829: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 830: "Char 0x%X out of allowed range\n", val); 831: } 832: return (val); 833: } else { 834: /* 1-byte code */ 835: *len = 1; 836: return ((int) *cur); 837: } 838: } 839: /* 840: * Assume it's a fixed length encoding (1) with 841: * a compatible encoding for the ASCII set, since 842: * XML constructs only use < 128 chars 843: */ 844: *len = 1; 845: return ((int) *cur); 846: encoding_error: 847: 848: /* 849: * An encoding problem may arise from a truncated input buffer 850: * splitting a character in the middle. In that case do not raise 851: * an error but return 0 to endicate an end of stream problem 852: */ 853: if ((ctxt == NULL) || (ctxt->input == NULL) || 854: (ctxt->input->end - ctxt->input->cur < 4)) { 855: *len = 0; 856: return(0); 857: } 858: /* 859: * If we detect an UTF8 error that probably mean that the 860: * input encoding didn't get properly advertised in the 861: * declaration header. Report the error and switch the encoding 862: * to ISO-Latin-1 (if you don't like this policy, just declare the 863: * encoding !) 864: */ 865: { 866: char buffer[150]; 867: 868: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 869: ctxt->input->cur[0], ctxt->input->cur[1], 870: ctxt->input->cur[2], ctxt->input->cur[3]); 871: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 872: "Input is not proper UTF-8, indicate encoding !\n%s", 873: BAD_CAST buffer, NULL); 874: } 875: *len = 1; 876: return ((int) *cur); 877: } 878: 879: /** 880: * xmlCopyCharMultiByte: 881: * @out: pointer to an array of xmlChar 882: * @val: the char value 883: * 884: * append the char value in the array 885: * 886: * Returns the number of xmlChar written 887: */ 888: int 889: xmlCopyCharMultiByte(xmlChar *out, int val) { 890: if (out == NULL) return(0); 891: /* 892: * We are supposed to handle UTF8, check it's valid 893: * From rfc2044: encoding of the Unicode values on UTF-8: 894: * 895: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 896: * 0000 0000-0000 007F 0xxxxxxx 897: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 898: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 899: */ 900: if (val >= 0x80) { 901: xmlChar *savedout = out; 902: int bits; 903: if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 904: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 905: else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 906: else { 907: xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 908: "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 909: val); 910: return(0); 911: } 912: for ( ; bits >= 0; bits-= 6) 913: *out++= ((val >> bits) & 0x3F) | 0x80 ; 914: return (out - savedout); 915: } 916: *out = (xmlChar) val; 917: return 1; 918: } 919: 920: /** 921: * xmlCopyChar: 922: * @len: Ignored, compatibility 923: * @out: pointer to an array of xmlChar 924: * @val: the char value 925: * 926: * append the char value in the array 927: * 928: * Returns the number of xmlChar written 929: */ 930: 931: int 932: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 933: if (out == NULL) return(0); 934: /* the len parameter is ignored */ 935: if (val >= 0x80) { 936: return(xmlCopyCharMultiByte (out, val)); 937: } 938: *out = (xmlChar) val; 939: return 1; 940: } 941: 942: /************************************************************************ 943: * * 944: * Commodity functions to switch encodings * 945: * * 946: ************************************************************************/ 947: 948: /* defined in encoding.c, not public */ 949: int 950: xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 951: xmlBufferPtr in, int len); 952: 953: static int 954: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 955: xmlCharEncodingHandlerPtr handler, int len); 956: static int 957: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 958: xmlCharEncodingHandlerPtr handler, int len); 959: /** 960: * xmlSwitchEncoding: 961: * @ctxt: the parser context 962: * @enc: the encoding value (number) 963: * 964: * change the input functions when discovering the character encoding 965: * of a given entity. 966: * 967: * Returns 0 in case of success, -1 otherwise 968: */ 969: int 970: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 971: { 972: xmlCharEncodingHandlerPtr handler; 973: int len = -1; 974: 975: if (ctxt == NULL) return(-1); 976: switch (enc) { 977: case XML_CHAR_ENCODING_ERROR: 978: __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 979: "encoding unknown\n", NULL, NULL); 980: return(-1); 981: case XML_CHAR_ENCODING_NONE: 982: /* let's assume it's UTF-8 without the XML decl */ 983: ctxt->charset = XML_CHAR_ENCODING_UTF8; 984: return(0); 985: case XML_CHAR_ENCODING_UTF8: 986: /* default encoding, no conversion should be needed */ 987: ctxt->charset = XML_CHAR_ENCODING_UTF8; 988: 989: /* 990: * Errata on XML-1.0 June 20 2001 991: * Specific handling of the Byte Order Mark for 992: * UTF-8 993: */ 994: if ((ctxt->input != NULL) && 995: (ctxt->input->cur[0] == 0xEF) && 996: (ctxt->input->cur[1] == 0xBB) && 997: (ctxt->input->cur[2] == 0xBF)) { 998: ctxt->input->cur += 3; 999: } 1000: return(0); 1001: case XML_CHAR_ENCODING_UTF16LE: 1002: case XML_CHAR_ENCODING_UTF16BE: 1003: /*The raw input characters are encoded 1004: *in UTF-16. As we expect this function 1005: *to be called after xmlCharEncInFunc, we expect 1006: *ctxt->input->cur to contain UTF-8 encoded characters. 1007: *So the raw UTF16 Byte Order Mark 1008: *has also been converted into 1009: *an UTF-8 BOM. Let's skip that BOM. 1010: */ 1011: if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 1012: (ctxt->input->cur[0] == 0xEF) && 1013: (ctxt->input->cur[1] == 0xBB) && 1014: (ctxt->input->cur[2] == 0xBF)) { 1015: ctxt->input->cur += 3; 1016: } 1017: len = 90; 1018: break; 1019: case XML_CHAR_ENCODING_UCS2: 1020: len = 90; 1021: break; 1022: case XML_CHAR_ENCODING_UCS4BE: 1023: case XML_CHAR_ENCODING_UCS4LE: 1024: case XML_CHAR_ENCODING_UCS4_2143: 1025: case XML_CHAR_ENCODING_UCS4_3412: 1026: len = 180; 1027: break; 1028: case XML_CHAR_ENCODING_EBCDIC: 1029: case XML_CHAR_ENCODING_8859_1: 1030: case XML_CHAR_ENCODING_8859_2: 1031: case XML_CHAR_ENCODING_8859_3: 1032: case XML_CHAR_ENCODING_8859_4: 1033: case XML_CHAR_ENCODING_8859_5: 1034: case XML_CHAR_ENCODING_8859_6: 1035: case XML_CHAR_ENCODING_8859_7: 1036: case XML_CHAR_ENCODING_8859_8: 1037: case XML_CHAR_ENCODING_8859_9: 1038: case XML_CHAR_ENCODING_ASCII: 1039: case XML_CHAR_ENCODING_2022_JP: 1040: case XML_CHAR_ENCODING_SHIFT_JIS: 1041: case XML_CHAR_ENCODING_EUC_JP: 1042: len = 45; 1043: break; 1044: } 1045: handler = xmlGetCharEncodingHandler(enc); 1046: if (handler == NULL) { 1047: /* 1048: * Default handlers. 1049: */ 1050: switch (enc) { 1051: case XML_CHAR_ENCODING_ASCII: 1052: /* default encoding, no conversion should be needed */ 1053: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1054: return(0); 1055: case XML_CHAR_ENCODING_UTF16LE: 1056: break; 1057: case XML_CHAR_ENCODING_UTF16BE: 1058: break; 1059: case XML_CHAR_ENCODING_UCS4LE: 1060: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1061: "encoding not supported %s\n", 1062: BAD_CAST "USC4 little endian", NULL); 1063: break; 1064: case XML_CHAR_ENCODING_UCS4BE: 1065: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1066: "encoding not supported %s\n", 1067: BAD_CAST "USC4 big endian", NULL); 1068: break; 1069: case XML_CHAR_ENCODING_EBCDIC: 1070: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1071: "encoding not supported %s\n", 1072: BAD_CAST "EBCDIC", NULL); 1073: break; 1074: case XML_CHAR_ENCODING_UCS4_2143: 1075: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1076: "encoding not supported %s\n", 1077: BAD_CAST "UCS4 2143", NULL); 1078: break; 1079: case XML_CHAR_ENCODING_UCS4_3412: 1080: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1081: "encoding not supported %s\n", 1082: BAD_CAST "UCS4 3412", NULL); 1083: break; 1084: case XML_CHAR_ENCODING_UCS2: 1085: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1086: "encoding not supported %s\n", 1087: BAD_CAST "UCS2", NULL); 1088: break; 1089: case XML_CHAR_ENCODING_8859_1: 1090: case XML_CHAR_ENCODING_8859_2: 1091: case XML_CHAR_ENCODING_8859_3: 1092: case XML_CHAR_ENCODING_8859_4: 1093: case XML_CHAR_ENCODING_8859_5: 1094: case XML_CHAR_ENCODING_8859_6: 1095: case XML_CHAR_ENCODING_8859_7: 1096: case XML_CHAR_ENCODING_8859_8: 1097: case XML_CHAR_ENCODING_8859_9: 1098: /* 1099: * We used to keep the internal content in the 1100: * document encoding however this turns being unmaintainable 1101: * So xmlGetCharEncodingHandler() will return non-null 1102: * values for this now. 1103: */ 1104: if ((ctxt->inputNr == 1) && 1105: (ctxt->encoding == NULL) && 1106: (ctxt->input != NULL) && 1107: (ctxt->input->encoding != NULL)) { 1108: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1109: } 1110: ctxt->charset = enc; 1111: return(0); 1112: case XML_CHAR_ENCODING_2022_JP: 1113: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1114: "encoding not supported %s\n", 1115: BAD_CAST "ISO-2022-JP", NULL); 1116: break; 1117: case XML_CHAR_ENCODING_SHIFT_JIS: 1118: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1119: "encoding not supported %s\n", 1120: BAD_CAST "Shift_JIS", NULL); 1121: break; 1122: case XML_CHAR_ENCODING_EUC_JP: 1123: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1124: "encoding not supported %s\n", 1125: BAD_CAST "EUC-JP", NULL); 1126: break; 1127: default: 1128: break; 1129: } 1130: } 1131: if (handler == NULL) 1132: return(-1); 1133: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1134: return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1135: } 1136: 1137: /** 1138: * xmlSwitchInputEncoding: 1139: * @ctxt: the parser context 1140: * @input: the input stream 1141: * @handler: the encoding handler 1142: * @len: the number of bytes to convert for the first line or -1 1143: * 1144: * change the input functions when discovering the character encoding 1145: * of a given entity. 1146: * 1147: * Returns 0 in case of success, -1 otherwise 1148: */ 1149: static int 1150: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1151: xmlCharEncodingHandlerPtr handler, int len) 1152: { 1153: int nbchars; 1154: 1155: if (handler == NULL) 1156: return (-1); 1157: if (input == NULL) 1158: return (-1); 1159: if (input->buf != NULL) { 1160: if (input->buf->encoder != NULL) { 1161: /* 1162: * Check in case the auto encoding detetection triggered 1163: * in already. 1164: */ 1165: if (input->buf->encoder == handler) 1166: return (0); 1167: 1168: /* 1169: * "UTF-16" can be used for both LE and BE 1170: if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1171: BAD_CAST "UTF-16", 6)) && 1172: (!xmlStrncmp(BAD_CAST handler->name, 1173: BAD_CAST "UTF-16", 6))) { 1174: return(0); 1175: } 1176: */ 1177: 1178: /* 1179: * Note: this is a bit dangerous, but that's what it 1180: * takes to use nearly compatible signature for different 1181: * encodings. 1182: */ 1183: xmlCharEncCloseFunc(input->buf->encoder); 1184: input->buf->encoder = handler; 1185: return (0); 1186: } 1187: input->buf->encoder = handler; 1188: 1189: /* 1190: * Is there already some content down the pipe to convert ? 1191: */ 1192: if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1193: int processed; 1194: unsigned int use; 1195: 1196: /* 1197: * Specific handling of the Byte Order Mark for 1198: * UTF-16 1199: */ 1200: if ((handler->name != NULL) && 1201: (!strcmp(handler->name, "UTF-16LE") || 1202: !strcmp(handler->name, "UTF-16")) && 1203: (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1204: input->cur += 2; 1205: } 1206: if ((handler->name != NULL) && 1207: (!strcmp(handler->name, "UTF-16BE")) && 1208: (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1209: input->cur += 2; 1210: } 1211: /* 1212: * Errata on XML-1.0 June 20 2001 1213: * Specific handling of the Byte Order Mark for 1214: * UTF-8 1215: */ 1216: if ((handler->name != NULL) && 1217: (!strcmp(handler->name, "UTF-8")) && 1218: (input->cur[0] == 0xEF) && 1219: (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1220: input->cur += 3; 1221: } 1222: 1223: /* 1224: * Shrink the current input buffer. 1225: * Move it as the raw buffer and create a new input buffer 1226: */ 1227: processed = input->cur - input->base; 1228: xmlBufferShrink(input->buf->buffer, processed); 1229: input->buf->raw = input->buf->buffer; 1230: input->buf->buffer = xmlBufferCreate(); 1231: input->buf->rawconsumed = processed; 1232: use = input->buf->raw->use; 1233: 1234: if (ctxt->html) { 1235: /* 1236: * convert as much as possible of the buffer 1237: */ 1238: nbchars = xmlCharEncInFunc(input->buf->encoder, 1239: input->buf->buffer, 1240: input->buf->raw); 1241: } else { 1242: /* 1243: * convert just enough to get 1244: * '<?xml version="1.0" encoding="xxx"?>' 1245: * parsed with the autodetected encoding 1246: * into the parser reading buffer. 1247: */ 1248: nbchars = xmlCharEncFirstLineInt(input->buf->encoder, 1249: input->buf->buffer, 1250: input->buf->raw, 1251: len); 1252: } 1253: if (nbchars < 0) { 1254: xmlErrInternal(ctxt, 1255: "switching encoding: encoder error\n", 1256: NULL); 1257: return (-1); 1258: } 1259: input->buf->rawconsumed += use - input->buf->raw->use; 1260: input->base = input->cur = input->buf->buffer->content; 1261: input->end = &input->base[input->buf->buffer->use]; 1262: 1263: } 1264: return (0); 1265: } else if (input->length == 0) { 1266: /* 1267: * When parsing a static memory array one must know the 1268: * size to be able to convert the buffer. 1269: */ 1270: xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1271: return (-1); 1272: } 1273: return (0); 1274: } 1275: 1276: /** 1277: * xmlSwitchInputEncoding: 1278: * @ctxt: the parser context 1279: * @input: the input stream 1280: * @handler: the encoding handler 1281: * 1282: * change the input functions when discovering the character encoding 1283: * of a given entity. 1284: * 1285: * Returns 0 in case of success, -1 otherwise 1286: */ 1287: int 1288: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1289: xmlCharEncodingHandlerPtr handler) { 1290: return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1291: } 1292: 1293: /** 1294: * xmlSwitchToEncodingInt: 1295: * @ctxt: the parser context 1296: * @handler: the encoding handler 1297: * @len: the lenght to convert or -1 1298: * 1299: * change the input functions when discovering the character encoding 1300: * of a given entity, and convert only @len bytes of the output, this 1301: * is needed on auto detect to allows any declared encoding later to 1302: * convert the actual content after the xmlDecl 1303: * 1304: * Returns 0 in case of success, -1 otherwise 1305: */ 1306: static int 1307: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1308: xmlCharEncodingHandlerPtr handler, int len) { 1309: int ret = 0; 1310: 1311: if (handler != NULL) { 1312: if (ctxt->input != NULL) { 1313: ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1314: } else { 1315: xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1316: NULL); 1317: return(-1); 1318: } 1319: /* 1320: * The parsing is now done in UTF8 natively 1321: */ 1322: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1323: } else 1324: return(-1); 1325: return(ret); 1326: } 1327: 1328: /** 1329: * xmlSwitchToEncoding: 1330: * @ctxt: the parser context 1331: * @handler: the encoding handler 1332: * 1333: * change the input functions when discovering the character encoding 1334: * of a given entity. 1335: * 1336: * Returns 0 in case of success, -1 otherwise 1337: */ 1338: int 1339: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1340: { 1341: return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1342: } 1343: 1344: /************************************************************************ 1345: * * 1346: * Commodity functions to handle entities processing * 1347: * * 1348: ************************************************************************/ 1349: 1350: /** 1351: * xmlFreeInputStream: 1352: * @input: an xmlParserInputPtr 1353: * 1354: * Free up an input stream. 1355: */ 1356: void 1357: xmlFreeInputStream(xmlParserInputPtr input) { 1358: if (input == NULL) return; 1359: 1360: if (input->filename != NULL) xmlFree((char *) input->filename); 1361: if (input->directory != NULL) xmlFree((char *) input->directory); 1362: if (input->encoding != NULL) xmlFree((char *) input->encoding); 1363: if (input->version != NULL) xmlFree((char *) input->version); 1364: if ((input->free != NULL) && (input->base != NULL)) 1365: input->free((xmlChar *) input->base); 1366: if (input->buf != NULL) 1367: xmlFreeParserInputBuffer(input->buf); 1368: xmlFree(input); 1369: } 1370: 1371: /** 1372: * xmlNewInputStream: 1373: * @ctxt: an XML parser context 1374: * 1375: * Create a new input stream structure. 1376: * 1377: * Returns the new input stream or NULL 1378: */ 1379: xmlParserInputPtr 1380: xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1381: xmlParserInputPtr input; 1382: 1383: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1384: if (input == NULL) { 1385: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1386: return(NULL); 1387: } 1388: memset(input, 0, sizeof(xmlParserInput)); 1389: input->line = 1; 1390: input->col = 1; 1391: input->standalone = -1; 1392: 1393: /* 1394: * If the context is NULL the id cannot be initialized, but that 1395: * should not happen while parsing which is the situation where 1396: * the id is actually needed. 1397: */ 1398: if (ctxt != NULL) 1399: input->id = ctxt->input_id++; 1400: 1401: return(input); 1402: } 1403: 1404: /** 1405: * xmlNewIOInputStream: 1406: * @ctxt: an XML parser context 1407: * @input: an I/O Input 1408: * @enc: the charset encoding if known 1409: * 1410: * Create a new input stream structure encapsulating the @input into 1411: * a stream suitable for the parser. 1412: * 1413: * Returns the new input stream or NULL 1414: */ 1415: xmlParserInputPtr 1416: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1417: xmlCharEncoding enc) { 1418: xmlParserInputPtr inputStream; 1419: 1420: if (input == NULL) return(NULL); 1421: if (xmlParserDebugEntities) 1422: xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1423: inputStream = xmlNewInputStream(ctxt); 1424: if (inputStream == NULL) { 1425: return(NULL); 1426: } 1427: inputStream->filename = NULL; 1428: inputStream->buf = input; 1429: inputStream->base = inputStream->buf->buffer->content; 1430: inputStream->cur = inputStream->buf->buffer->content; 1431: inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1432: if (enc != XML_CHAR_ENCODING_NONE) { 1433: xmlSwitchEncoding(ctxt, enc); 1434: } 1435: 1436: return(inputStream); 1437: } 1438: 1439: /** 1440: * xmlNewEntityInputStream: 1441: * @ctxt: an XML parser context 1442: * @entity: an Entity pointer 1443: * 1444: * Create a new input stream based on an xmlEntityPtr 1445: * 1446: * Returns the new input stream or NULL 1447: */ 1448: xmlParserInputPtr 1449: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1450: xmlParserInputPtr input; 1451: 1452: if (entity == NULL) { 1453: xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1454: NULL); 1455: return(NULL); 1456: } 1457: if (xmlParserDebugEntities) 1458: xmlGenericError(xmlGenericErrorContext, 1459: "new input from entity: %s\n", entity->name); 1460: if (entity->content == NULL) { 1461: switch (entity->etype) { 1462: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1463: xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1464: entity->name); 1465: break; 1466: case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1467: case XML_EXTERNAL_PARAMETER_ENTITY: 1468: return(xmlLoadExternalEntity((char *) entity->URI, 1469: (char *) entity->ExternalID, ctxt)); 1470: case XML_INTERNAL_GENERAL_ENTITY: 1471: xmlErrInternal(ctxt, 1472: "Internal entity %s without content !\n", 1473: entity->name); 1474: break; 1475: case XML_INTERNAL_PARAMETER_ENTITY: 1476: xmlErrInternal(ctxt, 1477: "Internal parameter entity %s without content !\n", 1478: entity->name); 1479: break; 1480: case XML_INTERNAL_PREDEFINED_ENTITY: 1481: xmlErrInternal(ctxt, 1482: "Predefined entity %s without content !\n", 1483: entity->name); 1484: break; 1485: } 1486: return(NULL); 1487: } 1488: input = xmlNewInputStream(ctxt); 1489: if (input == NULL) { 1490: return(NULL); 1491: } 1492: if (entity->URI != NULL) 1493: input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1494: input->base = entity->content; 1495: input->cur = entity->content; 1496: input->length = entity->length; 1497: input->end = &entity->content[input->length]; 1498: return(input); 1499: } 1500: 1501: /** 1502: * xmlNewStringInputStream: 1503: * @ctxt: an XML parser context 1504: * @buffer: an memory buffer 1505: * 1506: * Create a new input stream based on a memory buffer. 1507: * Returns the new input stream 1508: */ 1509: xmlParserInputPtr 1510: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1511: xmlParserInputPtr input; 1512: 1513: if (buffer == NULL) { 1514: xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1515: NULL); 1516: return(NULL); 1517: } 1518: if (xmlParserDebugEntities) 1519: xmlGenericError(xmlGenericErrorContext, 1520: "new fixed input: %.30s\n", buffer); 1521: input = xmlNewInputStream(ctxt); 1522: if (input == NULL) { 1523: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1524: return(NULL); 1525: } 1526: input->base = buffer; 1527: input->cur = buffer; 1528: input->length = xmlStrlen(buffer); 1529: input->end = &buffer[input->length]; 1530: return(input); 1531: } 1532: 1533: /** 1534: * xmlNewInputFromFile: 1535: * @ctxt: an XML parser context 1536: * @filename: the filename to use as entity 1537: * 1538: * Create a new input stream based on a file or an URL. 1539: * 1540: * Returns the new input stream or NULL in case of error 1541: */ 1542: xmlParserInputPtr 1543: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1544: xmlParserInputBufferPtr buf; 1545: xmlParserInputPtr inputStream; 1546: char *directory = NULL; 1547: xmlChar *URI = NULL; 1548: 1549: if (xmlParserDebugEntities) 1550: xmlGenericError(xmlGenericErrorContext, 1551: "new input from file: %s\n", filename); 1552: if (ctxt == NULL) return(NULL); 1553: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1554: if (buf == NULL) { 1555: if (filename == NULL) 1556: __xmlLoaderErr(ctxt, 1557: "failed to load external entity: NULL filename \n", 1558: NULL); 1559: else 1560: __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1561: (const char *) filename); 1562: return(NULL); 1563: } 1564: 1565: inputStream = xmlNewInputStream(ctxt); 1566: if (inputStream == NULL) 1567: return(NULL); 1568: 1569: inputStream->buf = buf; 1570: inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1571: if (inputStream == NULL) 1572: return(NULL); 1573: 1574: if (inputStream->filename == NULL) 1575: URI = xmlStrdup((xmlChar *) filename); 1576: else 1577: URI = xmlStrdup((xmlChar *) inputStream->filename); 1578: directory = xmlParserGetDirectory((const char *) URI); 1579: if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1580: inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1581: if (URI != NULL) xmlFree((char *) URI); 1582: inputStream->directory = directory; 1583: 1584: inputStream->base = inputStream->buf->buffer->content; 1585: inputStream->cur = inputStream->buf->buffer->content; 1586: inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1587: if ((ctxt->directory == NULL) && (directory != NULL)) 1588: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1589: return(inputStream); 1590: } 1591: 1592: /************************************************************************ 1593: * * 1594: * Commodity functions to handle parser contexts * 1595: * * 1596: ************************************************************************/ 1597: 1598: /** 1599: * xmlInitParserCtxt: 1600: * @ctxt: an XML parser context 1601: * 1602: * Initialize a parser context 1603: * 1604: * Returns 0 in case of success and -1 in case of error 1605: */ 1606: 1607: int 1608: xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1609: { 1610: xmlParserInputPtr input; 1611: 1612: if(ctxt==NULL) { 1613: xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1614: return(-1); 1615: } 1616: 1617: xmlDefaultSAXHandlerInit(); 1618: 1619: if (ctxt->dict == NULL) 1620: ctxt->dict = xmlDictCreate(); 1621: if (ctxt->dict == NULL) { 1622: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1623: return(-1); 1624: } 1625: if (ctxt->sax == NULL) 1626: ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1627: if (ctxt->sax == NULL) { 1628: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1629: return(-1); 1630: } 1631: else 1632: xmlSAXVersion(ctxt->sax, 2); 1633: 1634: ctxt->maxatts = 0; 1635: ctxt->atts = NULL; 1636: /* Allocate the Input stack */ 1637: if (ctxt->inputTab == NULL) { 1638: ctxt->inputTab = (xmlParserInputPtr *) 1639: xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1640: ctxt->inputMax = 5; 1641: } 1642: if (ctxt->inputTab == NULL) { 1643: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1644: ctxt->inputNr = 0; 1645: ctxt->inputMax = 0; 1646: ctxt->input = NULL; 1647: return(-1); 1648: } 1649: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1650: xmlFreeInputStream(input); 1651: } 1652: ctxt->inputNr = 0; 1653: ctxt->input = NULL; 1654: 1655: ctxt->version = NULL; 1656: ctxt->encoding = NULL; 1657: ctxt->standalone = -1; 1658: ctxt->hasExternalSubset = 0; 1659: ctxt->hasPErefs = 0; 1660: ctxt->html = 0; 1661: ctxt->external = 0; 1662: ctxt->instate = XML_PARSER_START; 1663: ctxt->token = 0; 1664: ctxt->directory = NULL; 1665: 1666: /* Allocate the Node stack */ 1667: if (ctxt->nodeTab == NULL) { 1668: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1669: ctxt->nodeMax = 10; 1670: } 1671: if (ctxt->nodeTab == NULL) { 1672: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1673: ctxt->nodeNr = 0; 1674: ctxt->nodeMax = 0; 1675: ctxt->node = NULL; 1676: ctxt->inputNr = 0; 1677: ctxt->inputMax = 0; 1678: ctxt->input = NULL; 1679: return(-1); 1680: } 1681: ctxt->nodeNr = 0; 1682: ctxt->node = NULL; 1683: 1684: /* Allocate the Name stack */ 1685: if (ctxt->nameTab == NULL) { 1686: ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1687: ctxt->nameMax = 10; 1688: } 1689: if (ctxt->nameTab == NULL) { 1690: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1691: ctxt->nodeNr = 0; 1692: ctxt->nodeMax = 0; 1693: ctxt->node = NULL; 1694: ctxt->inputNr = 0; 1695: ctxt->inputMax = 0; 1696: ctxt->input = NULL; 1697: ctxt->nameNr = 0; 1698: ctxt->nameMax = 0; 1699: ctxt->name = NULL; 1700: return(-1); 1701: } 1702: ctxt->nameNr = 0; 1703: ctxt->name = NULL; 1704: 1705: /* Allocate the space stack */ 1706: if (ctxt->spaceTab == NULL) { 1707: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1708: ctxt->spaceMax = 10; 1709: } 1710: if (ctxt->spaceTab == NULL) { 1711: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1712: ctxt->nodeNr = 0; 1713: ctxt->nodeMax = 0; 1714: ctxt->node = NULL; 1715: ctxt->inputNr = 0; 1716: ctxt->inputMax = 0; 1717: ctxt->input = NULL; 1718: ctxt->nameNr = 0; 1719: ctxt->nameMax = 0; 1720: ctxt->name = NULL; 1721: ctxt->spaceNr = 0; 1722: ctxt->spaceMax = 0; 1723: ctxt->space = NULL; 1724: return(-1); 1725: } 1726: ctxt->spaceNr = 1; 1727: ctxt->spaceMax = 10; 1728: ctxt->spaceTab[0] = -1; 1729: ctxt->space = &ctxt->spaceTab[0]; 1730: ctxt->userData = ctxt; 1731: ctxt->myDoc = NULL; 1732: ctxt->wellFormed = 1; 1733: ctxt->nsWellFormed = 1; 1734: ctxt->valid = 1; 1735: ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1736: ctxt->validate = xmlDoValidityCheckingDefaultValue; 1737: ctxt->pedantic = xmlPedanticParserDefaultValue; 1738: ctxt->linenumbers = xmlLineNumbersDefaultValue; 1739: ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1740: if (ctxt->keepBlanks == 0) 1741: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1742: 1743: ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1744: ctxt->vctxt.userData = ctxt; 1745: ctxt->vctxt.error = xmlParserValidityError; 1746: ctxt->vctxt.warning = xmlParserValidityWarning; 1747: if (ctxt->validate) { 1748: if (xmlGetWarningsDefaultValue == 0) 1749: ctxt->vctxt.warning = NULL; 1750: else 1751: ctxt->vctxt.warning = xmlParserValidityWarning; 1752: ctxt->vctxt.nodeMax = 0; 1753: } 1754: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1755: ctxt->record_info = 0; 1756: ctxt->nbChars = 0; 1757: ctxt->checkIndex = 0; 1758: ctxt->inSubset = 0; 1759: ctxt->errNo = XML_ERR_OK; 1760: ctxt->depth = 0; 1761: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1762: ctxt->catalogs = NULL; 1763: ctxt->nbentities = 0; 1764: ctxt->input_id = 1; 1765: xmlInitNodeInfoSeq(&ctxt->node_seq); 1766: return(0); 1767: } 1768: 1769: /** 1770: * xmlFreeParserCtxt: 1771: * @ctxt: an XML parser context 1772: * 1773: * Free all the memory used by a parser context. However the parsed 1774: * document in ctxt->myDoc is not freed. 1775: */ 1776: 1777: void 1778: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1779: { 1780: xmlParserInputPtr input; 1781: 1782: if (ctxt == NULL) return; 1783: 1784: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1785: xmlFreeInputStream(input); 1786: } 1787: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1788: if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1789: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1790: if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1791: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1792: if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1793: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1794: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1795: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1796: #ifdef LIBXML_SAX1_ENABLED 1797: if ((ctxt->sax != NULL) && 1798: (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1799: #else 1800: if (ctxt->sax != NULL) 1801: #endif /* LIBXML_SAX1_ENABLED */ 1802: xmlFree(ctxt->sax); 1803: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1804: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1805: if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1806: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1807: if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1808: if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1809: if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1810: if (ctxt->attsDefault != NULL) 1811: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1812: if (ctxt->attsSpecial != NULL) 1813: xmlHashFree(ctxt->attsSpecial, NULL); 1814: if (ctxt->freeElems != NULL) { 1815: xmlNodePtr cur, next; 1816: 1817: cur = ctxt->freeElems; 1818: while (cur != NULL) { 1819: next = cur->next; 1820: xmlFree(cur); 1821: cur = next; 1822: } 1823: } 1824: if (ctxt->freeAttrs != NULL) { 1825: xmlAttrPtr cur, next; 1826: 1827: cur = ctxt->freeAttrs; 1828: while (cur != NULL) { 1829: next = cur->next; 1830: xmlFree(cur); 1831: cur = next; 1832: } 1833: } 1834: /* 1835: * cleanup the error strings 1836: */ 1837: if (ctxt->lastError.message != NULL) 1838: xmlFree(ctxt->lastError.message); 1839: if (ctxt->lastError.file != NULL) 1840: xmlFree(ctxt->lastError.file); 1841: if (ctxt->lastError.str1 != NULL) 1842: xmlFree(ctxt->lastError.str1); 1843: if (ctxt->lastError.str2 != NULL) 1844: xmlFree(ctxt->lastError.str2); 1845: if (ctxt->lastError.str3 != NULL) 1846: xmlFree(ctxt->lastError.str3); 1847: 1848: #ifdef LIBXML_CATALOG_ENABLED 1849: if (ctxt->catalogs != NULL) 1850: xmlCatalogFreeLocal(ctxt->catalogs); 1851: #endif 1852: xmlFree(ctxt); 1853: } 1854: 1855: /** 1856: * xmlNewParserCtxt: 1857: * 1858: * Allocate and initialize a new parser context. 1859: * 1860: * Returns the xmlParserCtxtPtr or NULL 1861: */ 1862: 1863: xmlParserCtxtPtr 1864: xmlNewParserCtxt(void) 1865: { 1866: xmlParserCtxtPtr ctxt; 1867: 1868: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1869: if (ctxt == NULL) { 1870: xmlErrMemory(NULL, "cannot allocate parser context\n"); 1871: return(NULL); 1872: } 1873: memset(ctxt, 0, sizeof(xmlParserCtxt)); 1874: if (xmlInitParserCtxt(ctxt) < 0) { 1875: xmlFreeParserCtxt(ctxt); 1876: return(NULL); 1877: } 1878: return(ctxt); 1879: } 1880: 1881: /************************************************************************ 1882: * * 1883: * Handling of node informations * 1884: * * 1885: ************************************************************************/ 1886: 1887: /** 1888: * xmlClearParserCtxt: 1889: * @ctxt: an XML parser context 1890: * 1891: * Clear (release owned resources) and reinitialize a parser context 1892: */ 1893: 1894: void 1895: xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1896: { 1897: if (ctxt==NULL) 1898: return; 1899: xmlClearNodeInfoSeq(&ctxt->node_seq); 1900: xmlCtxtReset(ctxt); 1901: } 1902: 1903: 1904: /** 1905: * xmlParserFindNodeInfo: 1906: * @ctx: an XML parser context 1907: * @node: an XML node within the tree 1908: * 1909: * Find the parser node info struct for a given node 1910: * 1911: * Returns an xmlParserNodeInfo block pointer or NULL 1912: */ 1913: const xmlParserNodeInfo * 1914: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1915: { 1916: unsigned long pos; 1917: 1918: if ((ctx == NULL) || (node == NULL)) 1919: return (NULL); 1920: /* Find position where node should be at */ 1921: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1922: if (pos < ctx->node_seq.length 1923: && ctx->node_seq.buffer[pos].node == node) 1924: return &ctx->node_seq.buffer[pos]; 1925: else 1926: return NULL; 1927: } 1928: 1929: 1930: /** 1931: * xmlInitNodeInfoSeq: 1932: * @seq: a node info sequence pointer 1933: * 1934: * -- Initialize (set to initial state) node info sequence 1935: */ 1936: void 1937: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1938: { 1939: if (seq == NULL) 1940: return; 1941: seq->length = 0; 1942: seq->maximum = 0; 1943: seq->buffer = NULL; 1944: } 1945: 1946: /** 1947: * xmlClearNodeInfoSeq: 1948: * @seq: a node info sequence pointer 1949: * 1950: * -- Clear (release memory and reinitialize) node 1951: * info sequence 1952: */ 1953: void 1954: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1955: { 1956: if (seq == NULL) 1957: return; 1958: if (seq->buffer != NULL) 1959: xmlFree(seq->buffer); 1960: xmlInitNodeInfoSeq(seq); 1961: } 1962: 1963: /** 1964: * xmlParserFindNodeInfoIndex: 1965: * @seq: a node info sequence pointer 1966: * @node: an XML node pointer 1967: * 1968: * 1969: * xmlParserFindNodeInfoIndex : Find the index that the info record for 1970: * the given node is or should be at in a sorted sequence 1971: * 1972: * Returns a long indicating the position of the record 1973: */ 1974: unsigned long 1975: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1976: const xmlNodePtr node) 1977: { 1978: unsigned long upper, lower, middle; 1979: int found = 0; 1980: 1981: if ((seq == NULL) || (node == NULL)) 1982: return ((unsigned long) -1); 1983: 1984: /* Do a binary search for the key */ 1985: lower = 1; 1986: upper = seq->length; 1987: middle = 0; 1988: while (lower <= upper && !found) { 1989: middle = lower + (upper - lower) / 2; 1990: if (node == seq->buffer[middle - 1].node) 1991: found = 1; 1992: else if (node < seq->buffer[middle - 1].node) 1993: upper = middle - 1; 1994: else 1995: lower = middle + 1; 1996: } 1997: 1998: /* Return position */ 1999: if (middle == 0 || seq->buffer[middle - 1].node < node) 2000: return middle; 2001: else 2002: return middle - 1; 2003: } 2004: 2005: 2006: /** 2007: * xmlParserAddNodeInfo: 2008: * @ctxt: an XML parser context 2009: * @info: a node info sequence pointer 2010: * 2011: * Insert node info record into the sorted sequence 2012: */ 2013: void 2014: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2015: const xmlParserNodeInfoPtr info) 2016: { 2017: unsigned long pos; 2018: 2019: if ((ctxt == NULL) || (info == NULL)) return; 2020: 2021: /* Find pos and check to see if node is already in the sequence */ 2022: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2023: info->node); 2024: 2025: if ((pos < ctxt->node_seq.length) && 2026: (ctxt->node_seq.buffer != NULL) && 2027: (ctxt->node_seq.buffer[pos].node == info->node)) { 2028: ctxt->node_seq.buffer[pos] = *info; 2029: } 2030: 2031: /* Otherwise, we need to add new node to buffer */ 2032: else { 2033: if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 2034: xmlParserNodeInfo *tmp_buffer; 2035: unsigned int byte_size; 2036: 2037: if (ctxt->node_seq.maximum == 0) 2038: ctxt->node_seq.maximum = 2; 2039: byte_size = (sizeof(*ctxt->node_seq.buffer) * 2040: (2 * ctxt->node_seq.maximum)); 2041: 2042: if (ctxt->node_seq.buffer == NULL) 2043: tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2044: else 2045: tmp_buffer = 2046: (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2047: byte_size); 2048: 2049: if (tmp_buffer == NULL) { 2050: xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2051: return; 2052: } 2053: ctxt->node_seq.buffer = tmp_buffer; 2054: ctxt->node_seq.maximum *= 2; 2055: } 2056: 2057: /* If position is not at end, move elements out of the way */ 2058: if (pos != ctxt->node_seq.length) { 2059: unsigned long i; 2060: 2061: for (i = ctxt->node_seq.length; i > pos; i--) 2062: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2063: } 2064: 2065: /* Copy element and increase length */ 2066: ctxt->node_seq.buffer[pos] = *info; 2067: ctxt->node_seq.length++; 2068: } 2069: } 2070: 2071: /************************************************************************ 2072: * * 2073: * Defaults settings * 2074: * * 2075: ************************************************************************/ 2076: /** 2077: * xmlPedanticParserDefault: 2078: * @val: int 0 or 1 2079: * 2080: * Set and return the previous value for enabling pedantic warnings. 2081: * 2082: * Returns the last value for 0 for no substitution, 1 for substitution. 2083: */ 2084: 2085: int 2086: xmlPedanticParserDefault(int val) { 2087: int old = xmlPedanticParserDefaultValue; 2088: 2089: xmlPedanticParserDefaultValue = val; 2090: return(old); 2091: } 2092: 2093: /** 2094: * xmlLineNumbersDefault: 2095: * @val: int 0 or 1 2096: * 2097: * Set and return the previous value for enabling line numbers in elements 2098: * contents. This may break on old application and is turned off by default. 2099: * 2100: * Returns the last value for 0 for no substitution, 1 for substitution. 2101: */ 2102: 2103: int 2104: xmlLineNumbersDefault(int val) { 2105: int old = xmlLineNumbersDefaultValue; 2106: 2107: xmlLineNumbersDefaultValue = val; 2108: return(old); 2109: } 2110: 2111: /** 2112: * xmlSubstituteEntitiesDefault: 2113: * @val: int 0 or 1 2114: * 2115: * Set and return the previous value for default entity support. 2116: * Initially the parser always keep entity references instead of substituting 2117: * entity values in the output. This function has to be used to change the 2118: * default parser behavior 2119: * SAX::substituteEntities() has to be used for changing that on a file by 2120: * file basis. 2121: * 2122: * Returns the last value for 0 for no substitution, 1 for substitution. 2123: */ 2124: 2125: int 2126: xmlSubstituteEntitiesDefault(int val) { 2127: int old = xmlSubstituteEntitiesDefaultValue; 2128: 2129: xmlSubstituteEntitiesDefaultValue = val; 2130: return(old); 2131: } 2132: 2133: /** 2134: * xmlKeepBlanksDefault: 2135: * @val: int 0 or 1 2136: * 2137: * Set and return the previous value for default blanks text nodes support. 2138: * The 1.x version of the parser used an heuristic to try to detect 2139: * ignorable white spaces. As a result the SAX callback was generating 2140: * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2141: * using the DOM output text nodes containing those blanks were not generated. 2142: * The 2.x and later version will switch to the XML standard way and 2143: * ignorableWhitespace() are only generated when running the parser in 2144: * validating mode and when the current element doesn't allow CDATA or 2145: * mixed content. 2146: * This function is provided as a way to force the standard behavior 2147: * on 1.X libs and to switch back to the old mode for compatibility when 2148: * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2149: * by using xmlIsBlankNode() commodity function to detect the "empty" 2150: * nodes generated. 2151: * This value also affect autogeneration of indentation when saving code 2152: * if blanks sections are kept, indentation is not generated. 2153: * 2154: * Returns the last value for 0 for no substitution, 1 for substitution. 2155: */ 2156: 2157: int 2158: xmlKeepBlanksDefault(int val) { 2159: int old = xmlKeepBlanksDefaultValue; 2160: 2161: xmlKeepBlanksDefaultValue = val; 2162: if (!val) xmlIndentTreeOutput = 1; 2163: return(old); 2164: } 2165: 2166: #define bottom_parserInternals 2167: #include "elfgcchack.h"