embedaddon/libxml2/parserInternals.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parserInternals.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:29 2014 UTC (9 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD

libxml2 2.9.1

1: /* 2: * parserInternals.c : Internal routines (and obsolete ones) needed for the 3: * XML and HTML parsers. 4: * 5: * See Copyright for the status of this software. 6: * 7: * daniel@veillard.com 8: */ 9: 10: #define IN_LIBXML 11: #include "libxml.h" 12: 13: #if defined(WIN32) && !defined (__CYGWIN__) 14: #define XML_DIR_SEP '\\' 15: #else 16: #define XML_DIR_SEP '/' 17: #endif 18: 19: #include <string.h> 20: #ifdef HAVE_CTYPE_H 21: #include <ctype.h> 22: #endif 23: #ifdef HAVE_STDLIB_H 24: #include <stdlib.h> 25: #endif 26: #ifdef HAVE_SYS_STAT_H 27: #include <sys/stat.h> 28: #endif 29: #ifdef HAVE_FCNTL_H 30: #include <fcntl.h> 31: #endif 32: #ifdef HAVE_UNISTD_H 33: #include <unistd.h> 34: #endif 35: #ifdef HAVE_ZLIB_H 36: #include <zlib.h> 37: #endif 38: 39: #include <libxml/xmlmemory.h> 40: #include <libxml/tree.h> 41: #include <libxml/parser.h> 42: #include <libxml/parserInternals.h> 43: #include <libxml/valid.h> 44: #include <libxml/entities.h> 45: #include <libxml/xmlerror.h> 46: #include <libxml/encoding.h> 47: #include <libxml/valid.h> 48: #include <libxml/xmlIO.h> 49: #include <libxml/uri.h> 50: #include <libxml/dict.h> 51: #include <libxml/SAX.h> 52: #ifdef LIBXML_CATALOG_ENABLED 53: #include <libxml/catalog.h> 54: #endif 55: #include <libxml/globals.h> 56: #include <libxml/chvalid.h> 57: 58: #include "buf.h" 59: #include "enc.h" 60: 61: /* 62: * Various global defaults for parsing 63: */ 64: 65: /** 66: * xmlCheckVersion: 67: * @version: the include version number 68: * 69: * check the compiled lib version against the include one. 70: * This can warn or immediately kill the application 71: */ 72: void 73: xmlCheckVersion(int version) { 74: int myversion = (int) LIBXML_VERSION; 75: 76: xmlInitParser(); 77: 78: if ((myversion / 10000) != (version / 10000)) { 79: xmlGenericError(xmlGenericErrorContext, 80: "Fatal: program compiled against libxml %d using libxml %d\n", 81: (version / 10000), (myversion / 10000)); 82: fprintf(stderr, 83: "Fatal: program compiled against libxml %d using libxml %d\n", 84: (version / 10000), (myversion / 10000)); 85: } 86: if ((myversion / 100) < (version / 100)) { 87: xmlGenericError(xmlGenericErrorContext, 88: "Warning: program compiled against libxml %d using older %d\n", 89: (version / 100), (myversion / 100)); 90: } 91: } 92: 93: 94: /************************************************************************ 95: * * 96: * Some factorized error routines * 97: * * 98: ************************************************************************/ 99: 100: 101: /** 102: * xmlErrMemory: 103: * @ctxt: an XML parser context 104: * @extra: extra informations 105: * 106: * Handle a redefinition of attribute error 107: */ 108: void 109: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 110: { 111: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 112: (ctxt->instate == XML_PARSER_EOF)) 113: return; 114: if (ctxt != NULL) { 115: ctxt->errNo = XML_ERR_NO_MEMORY; 116: ctxt->instate = XML_PARSER_EOF; 117: ctxt->disableSAX = 1; 118: } 119: if (extra) 120: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 121: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 122: NULL, NULL, 0, 0, 123: "Memory allocation failed : %s\n", extra); 124: else 125: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 126: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 127: NULL, NULL, 0, 0, "Memory allocation failed\n"); 128: } 129: 130: /** 131: * __xmlErrEncoding: 132: * @ctxt: an XML parser context 133: * @xmlerr: the error number 134: * @msg: the error message 135: * @str1: an string info 136: * @str2: an string info 137: * 138: * Handle an encoding error 139: */ 140: void 141: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 142: const char *msg, const xmlChar * str1, const xmlChar * str2) 143: { 144: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145: (ctxt->instate == XML_PARSER_EOF)) 146: return; 147: if (ctxt != NULL) 148: ctxt->errNo = xmlerr; 149: __xmlRaiseError(NULL, NULL, NULL, 150: ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 151: NULL, 0, (const char *) str1, (const char *) str2, 152: NULL, 0, 0, msg, str1, str2); 153: if (ctxt != NULL) { 154: ctxt->wellFormed = 0; 155: if (ctxt->recovery == 0) 156: ctxt->disableSAX = 1; 157: } 158: } 159: 160: /** 161: * xmlErrInternal: 162: * @ctxt: an XML parser context 163: * @msg: the error message 164: * @str: error informations 165: * 166: * Handle an internal error 167: */ 168: static void 169: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 170: { 171: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 172: (ctxt->instate == XML_PARSER_EOF)) 173: return; 174: if (ctxt != NULL) 175: ctxt->errNo = XML_ERR_INTERNAL_ERROR; 176: __xmlRaiseError(NULL, NULL, NULL, 177: ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 178: XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 179: 0, 0, msg, str); 180: if (ctxt != NULL) { 181: ctxt->wellFormed = 0; 182: if (ctxt->recovery == 0) 183: ctxt->disableSAX = 1; 184: } 185: } 186: 187: /** 188: * xmlErrEncodingInt: 189: * @ctxt: an XML parser context 190: * @error: the error number 191: * @msg: the error message 192: * @val: an integer value 193: * 194: * n encoding error 195: */ 196: static void 197: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 198: const char *msg, int val) 199: { 200: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 201: (ctxt->instate == XML_PARSER_EOF)) 202: return; 203: if (ctxt != NULL) 204: ctxt->errNo = error; 205: __xmlRaiseError(NULL, NULL, NULL, 206: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 207: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 208: if (ctxt != NULL) { 209: ctxt->wellFormed = 0; 210: if (ctxt->recovery == 0) 211: ctxt->disableSAX = 1; 212: } 213: } 214: 215: /** 216: * xmlIsLetter: 217: * @c: an unicode character (int) 218: * 219: * Check whether the character is allowed by the production 220: * [84] Letter ::= BaseChar | Ideographic 221: * 222: * Returns 0 if not, non-zero otherwise 223: */ 224: int 225: xmlIsLetter(int c) { 226: return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 227: } 228: 229: /************************************************************************ 230: * * 231: * Input handling functions for progressive parsing * 232: * * 233: ************************************************************************/ 234: 235: /* #define DEBUG_INPUT */ 236: /* #define DEBUG_STACK */ 237: /* #define DEBUG_PUSH */ 238: 239: 240: /* we need to keep enough input to show errors in context */ 241: #define LINE_LEN 80 242: 243: #ifdef DEBUG_INPUT 244: #define CHECK_BUFFER(in) check_buffer(in) 245: 246: static 247: void check_buffer(xmlParserInputPtr in) { 248: if (in->base != xmlBufContent(in->buf->buffer)) { 249: xmlGenericError(xmlGenericErrorContext, 250: "xmlParserInput: base mismatch problem\n"); 251: } 252: if (in->cur < in->base) { 253: xmlGenericError(xmlGenericErrorContext, 254: "xmlParserInput: cur < base problem\n"); 255: } 256: if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 257: xmlGenericError(xmlGenericErrorContext, 258: "xmlParserInput: cur > base + use problem\n"); 259: } 260: xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 261: (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 262: xmlBufUse(in->buf->buffer)); 263: } 264: 265: #else 266: #define CHECK_BUFFER(in) 267: #endif 268: 269: 270: /** 271: * xmlParserInputRead: 272: * @in: an XML parser input 273: * @len: an indicative size for the lookahead 274: * 275: * This function was internal and is deprecated. 276: * 277: * Returns -1 as this is an error to use it. 278: */ 279: int 280: xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 281: return(-1); 282: } 283: 284: /** 285: * xmlParserInputGrow: 286: * @in: an XML parser input 287: * @len: an indicative size for the lookahead 288: * 289: * This function increase the input for the parser. It tries to 290: * preserve pointers to the input buffer, and keep already read data 291: * 292: * Returns the amount of char read, or -1 in case of error, 0 indicate the 293: * end of this entity 294: */ 295: int 296: xmlParserInputGrow(xmlParserInputPtr in, int len) { 297: size_t ret; 298: size_t indx; 299: const xmlChar *content; 300: 301: if ((in == NULL) || (len < 0)) return(-1); 302: #ifdef DEBUG_INPUT 303: xmlGenericError(xmlGenericErrorContext, "Grow\n"); 304: #endif 305: if (in->buf == NULL) return(-1); 306: if (in->base == NULL) return(-1); 307: if (in->cur == NULL) return(-1); 308: if (in->buf->buffer == NULL) return(-1); 309: 310: CHECK_BUFFER(in); 311: 312: indx = in->cur - in->base; 313: if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 314: 315: CHECK_BUFFER(in); 316: 317: return(0); 318: } 319: if (in->buf->readcallback != NULL) { 320: ret = xmlParserInputBufferGrow(in->buf, len); 321: } else 322: return(0); 323: 324: /* 325: * NOTE : in->base may be a "dangling" i.e. freed pointer in this 326: * block, but we use it really as an integer to do some 327: * pointer arithmetic. Insure will raise it as a bug but in 328: * that specific case, that's not ! 329: */ 330: 331: content = xmlBufContent(in->buf->buffer); 332: if (in->base != content) { 333: /* 334: * the buffer has been reallocated 335: */ 336: indx = in->cur - in->base; 337: in->base = content; 338: in->cur = &content[indx]; 339: } 340: in->end = xmlBufEnd(in->buf->buffer); 341: 342: CHECK_BUFFER(in); 343: 344: return(ret); 345: } 346: 347: /** 348: * xmlParserInputShrink: 349: * @in: an XML parser input 350: * 351: * This function removes used input for the parser. 352: */ 353: void 354: xmlParserInputShrink(xmlParserInputPtr in) { 355: size_t used; 356: size_t ret; 357: size_t indx; 358: const xmlChar *content; 359: 360: #ifdef DEBUG_INPUT 361: xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 362: #endif 363: if (in == NULL) return; 364: if (in->buf == NULL) return; 365: if (in->base == NULL) return; 366: if (in->cur == NULL) return; 367: if (in->buf->buffer == NULL) return; 368: 369: CHECK_BUFFER(in); 370: 371: used = in->cur - xmlBufContent(in->buf->buffer); 372: /* 373: * Do not shrink on large buffers whose only a tiny fraction 374: * was consumed 375: */ 376: if (used > INPUT_CHUNK) { 377: ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 378: if (ret > 0) { 379: in->cur -= ret; 380: in->consumed += ret; 381: } 382: in->end = xmlBufEnd(in->buf->buffer); 383: } 384: 385: CHECK_BUFFER(in); 386: 387: if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 388: return; 389: } 390: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 391: content = xmlBufContent(in->buf->buffer); 392: if (in->base != content) { 393: /* 394: * the buffer has been reallocated 395: */ 396: indx = in->cur - in->base; 397: in->base = content; 398: in->cur = &content[indx]; 399: } 400: in->end = xmlBufEnd(in->buf->buffer); 401: 402: CHECK_BUFFER(in); 403: } 404: 405: /************************************************************************ 406: * * 407: * UTF8 character input and related functions * 408: * * 409: ************************************************************************/ 410: 411: /** 412: * xmlNextChar: 413: * @ctxt: the XML parser context 414: * 415: * Skip to the next char input char. 416: */ 417: 418: void 419: xmlNextChar(xmlParserCtxtPtr ctxt) 420: { 421: if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 422: (ctxt->input == NULL)) 423: return; 424: 425: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 426: if ((*ctxt->input->cur == 0) && 427: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 428: (ctxt->instate != XML_PARSER_COMMENT)) { 429: /* 430: * If we are at the end of the current entity and 431: * the context allows it, we pop consumed entities 432: * automatically. 433: * the auto closing should be blocked in other cases 434: */ 435: xmlPopInput(ctxt); 436: } else { 437: const unsigned char *cur; 438: unsigned char c; 439: 440: /* 441: * 2.11 End-of-Line Handling 442: * the literal two-character sequence "#xD#xA" or a standalone 443: * literal #xD, an XML processor must pass to the application 444: * the single character #xA. 445: */ 446: if (*(ctxt->input->cur) == '\n') { 447: ctxt->input->line++; ctxt->input->col = 1; 448: } else 449: ctxt->input->col++; 450: 451: /* 452: * We are supposed to handle UTF8, check it's valid 453: * From rfc2044: encoding of the Unicode values on UTF-8: 454: * 455: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 456: * 0000 0000-0000 007F 0xxxxxxx 457: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 458: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 459: * 460: * Check for the 0x110000 limit too 461: */ 462: cur = ctxt->input->cur; 463: 464: c = *cur; 465: if (c & 0x80) { 466: if (c == 0xC0) 467: goto encoding_error; 468: if (cur[1] == 0) { 469: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 470: cur = ctxt->input->cur; 471: } 472: if ((cur[1] & 0xc0) != 0x80) 473: goto encoding_error; 474: if ((c & 0xe0) == 0xe0) { 475: unsigned int val; 476: 477: if (cur[2] == 0) { 478: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 479: cur = ctxt->input->cur; 480: } 481: if ((cur[2] & 0xc0) != 0x80) 482: goto encoding_error; 483: if ((c & 0xf0) == 0xf0) { 484: if (cur[3] == 0) { 485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486: cur = ctxt->input->cur; 487: } 488: if (((c & 0xf8) != 0xf0) || 489: ((cur[3] & 0xc0) != 0x80)) 490: goto encoding_error; 491: /* 4-byte code */ 492: ctxt->input->cur += 4; 493: val = (cur[0] & 0x7) << 18; 494: val |= (cur[1] & 0x3f) << 12; 495: val |= (cur[2] & 0x3f) << 6; 496: val |= cur[3] & 0x3f; 497: } else { 498: /* 3-byte code */ 499: ctxt->input->cur += 3; 500: val = (cur[0] & 0xf) << 12; 501: val |= (cur[1] & 0x3f) << 6; 502: val |= cur[2] & 0x3f; 503: } 504: if (((val > 0xd7ff) && (val < 0xe000)) || 505: ((val > 0xfffd) && (val < 0x10000)) || 506: (val >= 0x110000)) { 507: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 508: "Char 0x%X out of allowed range\n", 509: val); 510: } 511: } else 512: /* 2-byte code */ 513: ctxt->input->cur += 2; 514: } else 515: /* 1-byte code */ 516: ctxt->input->cur++; 517: 518: ctxt->nbChars++; 519: if (*ctxt->input->cur == 0) 520: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 521: } 522: } else { 523: /* 524: * Assume it's a fixed length encoding (1) with 525: * a compatible encoding for the ASCII set, since 526: * XML constructs only use < 128 chars 527: */ 528: 529: if (*(ctxt->input->cur) == '\n') { 530: ctxt->input->line++; ctxt->input->col = 1; 531: } else 532: ctxt->input->col++; 533: ctxt->input->cur++; 534: ctxt->nbChars++; 535: if (*ctxt->input->cur == 0) 536: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 537: } 538: if ((*ctxt->input->cur == '%') && (!ctxt->html)) 539: xmlParserHandlePEReference(ctxt); 540: if ((*ctxt->input->cur == 0) && 541: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 542: xmlPopInput(ctxt); 543: return; 544: encoding_error: 545: /* 546: * If we detect an UTF8 error that probably mean that the 547: * input encoding didn't get properly advertised in the 548: * declaration header. Report the error and switch the encoding 549: * to ISO-Latin-1 (if you don't like this policy, just declare the 550: * encoding !) 551: */ 552: if ((ctxt == NULL) || (ctxt->input == NULL) || 553: (ctxt->input->end - ctxt->input->cur < 4)) { 554: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 555: "Input is not proper UTF-8, indicate encoding !\n", 556: NULL, NULL); 557: } else { 558: char buffer[150]; 559: 560: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 561: ctxt->input->cur[0], ctxt->input->cur[1], 562: ctxt->input->cur[2], ctxt->input->cur[3]); 563: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 564: "Input is not proper UTF-8, indicate encoding !\n%s", 565: BAD_CAST buffer, NULL); 566: } 567: ctxt->charset = XML_CHAR_ENCODING_8859_1; 568: ctxt->input->cur++; 569: return; 570: } 571: 572: /** 573: * xmlCurrentChar: 574: * @ctxt: the XML parser context 575: * @len: pointer to the length of the char read 576: * 577: * The current char value, if using UTF-8 this may actually span multiple 578: * bytes in the input buffer. Implement the end of line normalization: 579: * 2.11 End-of-Line Handling 580: * Wherever an external parsed entity or the literal entity value 581: * of an internal parsed entity contains either the literal two-character 582: * sequence "#xD#xA" or a standalone literal #xD, an XML processor 583: * must pass to the application the single character #xA. 584: * This behavior can conveniently be produced by normalizing all 585: * line breaks to #xA on input, before parsing.) 586: * 587: * Returns the current char value and its length 588: */ 589: 590: int 591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 592: if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 593: if (ctxt->instate == XML_PARSER_EOF) 594: return(0); 595: 596: if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 597: *len = 1; 598: return((int) *ctxt->input->cur); 599: } 600: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 601: /* 602: * We are supposed to handle UTF8, check it's valid 603: * From rfc2044: encoding of the Unicode values on UTF-8: 604: * 605: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 606: * 0000 0000-0000 007F 0xxxxxxx 607: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 608: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 609: * 610: * Check for the 0x110000 limit too 611: */ 612: const unsigned char *cur = ctxt->input->cur; 613: unsigned char c; 614: unsigned int val; 615: 616: c = *cur; 617: if (c & 0x80) { 618: if (((c & 0x40) == 0) || (c == 0xC0)) 619: goto encoding_error; 620: if (cur[1] == 0) { 621: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 622: cur = ctxt->input->cur; 623: } 624: if ((cur[1] & 0xc0) != 0x80) 625: goto encoding_error; 626: if ((c & 0xe0) == 0xe0) { 627: if (cur[2] == 0) { 628: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 629: cur = ctxt->input->cur; 630: } 631: if ((cur[2] & 0xc0) != 0x80) 632: goto encoding_error; 633: if ((c & 0xf0) == 0xf0) { 634: if (cur[3] == 0) { 635: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 636: cur = ctxt->input->cur; 637: } 638: if (((c & 0xf8) != 0xf0) || 639: ((cur[3] & 0xc0) != 0x80)) 640: goto encoding_error; 641: /* 4-byte code */ 642: *len = 4; 643: val = (cur[0] & 0x7) << 18; 644: val |= (cur[1] & 0x3f) << 12; 645: val |= (cur[2] & 0x3f) << 6; 646: val |= cur[3] & 0x3f; 647: if (val < 0x10000) 648: goto encoding_error; 649: } else { 650: /* 3-byte code */ 651: *len = 3; 652: val = (cur[0] & 0xf) << 12; 653: val |= (cur[1] & 0x3f) << 6; 654: val |= cur[2] & 0x3f; 655: if (val < 0x800) 656: goto encoding_error; 657: } 658: } else { 659: /* 2-byte code */ 660: *len = 2; 661: val = (cur[0] & 0x1f) << 6; 662: val |= cur[1] & 0x3f; 663: if (val < 0x80) 664: goto encoding_error; 665: } 666: if (!IS_CHAR(val)) { 667: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 668: "Char 0x%X out of allowed range\n", val); 669: } 670: return(val); 671: } else { 672: /* 1-byte code */ 673: *len = 1; 674: if (*ctxt->input->cur == 0) 675: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 676: if ((*ctxt->input->cur == 0) && 677: (ctxt->input->end > ctxt->input->cur)) { 678: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 679: "Char 0x0 out of allowed range\n", 0); 680: } 681: if (*ctxt->input->cur == 0xD) { 682: if (ctxt->input->cur[1] == 0xA) { 683: ctxt->nbChars++; 684: ctxt->input->cur++; 685: } 686: return(0xA); 687: } 688: return((int) *ctxt->input->cur); 689: } 690: } 691: /* 692: * Assume it's a fixed length encoding (1) with 693: * a compatible encoding for the ASCII set, since 694: * XML constructs only use < 128 chars 695: */ 696: *len = 1; 697: if (*ctxt->input->cur == 0xD) { 698: if (ctxt->input->cur[1] == 0xA) { 699: ctxt->nbChars++; 700: ctxt->input->cur++; 701: } 702: return(0xA); 703: } 704: return((int) *ctxt->input->cur); 705: encoding_error: 706: /* 707: * An encoding problem may arise from a truncated input buffer 708: * splitting a character in the middle. In that case do not raise 709: * an error but return 0 to endicate an end of stream problem 710: */ 711: if (ctxt->input->end - ctxt->input->cur < 4) { 712: *len = 0; 713: return(0); 714: } 715: 716: /* 717: * If we detect an UTF8 error that probably mean that the 718: * input encoding didn't get properly advertised in the 719: * declaration header. Report the error and switch the encoding 720: * to ISO-Latin-1 (if you don't like this policy, just declare the 721: * encoding !) 722: */ 723: { 724: char buffer[150]; 725: 726: snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 727: ctxt->input->cur[0], ctxt->input->cur[1], 728: ctxt->input->cur[2], ctxt->input->cur[3]); 729: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 730: "Input is not proper UTF-8, indicate encoding !\n%s", 731: BAD_CAST buffer, NULL); 732: } 733: ctxt->charset = XML_CHAR_ENCODING_8859_1; 734: *len = 1; 735: return((int) *ctxt->input->cur); 736: } 737: 738: /** 739: * xmlStringCurrentChar: 740: * @ctxt: the XML parser context 741: * @cur: pointer to the beginning of the char 742: * @len: pointer to the length of the char read 743: * 744: * The current char value, if using UTF-8 this may actually span multiple 745: * bytes in the input buffer. 746: * 747: * Returns the current char value and its length 748: */ 749: 750: int 751: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 752: { 753: if ((len == NULL) || (cur == NULL)) return(0); 754: if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 755: /* 756: * We are supposed to handle UTF8, check it's valid 757: * From rfc2044: encoding of the Unicode values on UTF-8: 758: * 759: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 760: * 0000 0000-0000 007F 0xxxxxxx 761: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 762: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 763: * 764: * Check for the 0x110000 limit too 765: */ 766: unsigned char c; 767: unsigned int val; 768: 769: c = *cur; 770: if (c & 0x80) { 771: if ((cur[1] & 0xc0) != 0x80) 772: goto encoding_error; 773: if ((c & 0xe0) == 0xe0) { 774: 775: if ((cur[2] & 0xc0) != 0x80) 776: goto encoding_error; 777: if ((c & 0xf0) == 0xf0) { 778: if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 779: goto encoding_error; 780: /* 4-byte code */ 781: *len = 4; 782: val = (cur[0] & 0x7) << 18; 783: val |= (cur[1] & 0x3f) << 12; 784: val |= (cur[2] & 0x3f) << 6; 785: val |= cur[3] & 0x3f; 786: } else { 787: /* 3-byte code */ 788: *len = 3; 789: val = (cur[0] & 0xf) << 12; 790: val |= (cur[1] & 0x3f) << 6; 791: val |= cur[2] & 0x3f; 792: } 793: } else { 794: /* 2-byte code */ 795: *len = 2; 796: val = (cur[0] & 0x1f) << 6; 797: val |= cur[1] & 0x3f; 798: } 799: if (!IS_CHAR(val)) { 800: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 801: "Char 0x%X out of allowed range\n", val); 802: } 803: return (val); 804: } else { 805: /* 1-byte code */ 806: *len = 1; 807: return ((int) *cur); 808: } 809: } 810: /* 811: * Assume it's a fixed length encoding (1) with 812: * a compatible encoding for the ASCII set, since 813: * XML constructs only use < 128 chars 814: */ 815: *len = 1; 816: return ((int) *cur); 817: encoding_error: 818: 819: /* 820: * An encoding problem may arise from a truncated input buffer 821: * splitting a character in the middle. In that case do not raise 822: * an error but return 0 to endicate an end of stream problem 823: */ 824: if ((ctxt == NULL) || (ctxt->input == NULL) || 825: (ctxt->input->end - ctxt->input->cur < 4)) { 826: *len = 0; 827: return(0); 828: } 829: /* 830: * If we detect an UTF8 error that probably mean that the 831: * input encoding didn't get properly advertised in the 832: * declaration header. Report the error and switch the encoding 833: * to ISO-Latin-1 (if you don't like this policy, just declare the 834: * encoding !) 835: */ 836: { 837: char buffer[150]; 838: 839: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 840: ctxt->input->cur[0], ctxt->input->cur[1], 841: ctxt->input->cur[2], ctxt->input->cur[3]); 842: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 843: "Input is not proper UTF-8, indicate encoding !\n%s", 844: BAD_CAST buffer, NULL); 845: } 846: *len = 1; 847: return ((int) *cur); 848: } 849: 850: /** 851: * xmlCopyCharMultiByte: 852: * @out: pointer to an array of xmlChar 853: * @val: the char value 854: * 855: * append the char value in the array 856: * 857: * Returns the number of xmlChar written 858: */ 859: int 860: xmlCopyCharMultiByte(xmlChar *out, int val) { 861: if (out == NULL) return(0); 862: /* 863: * We are supposed to handle UTF8, check it's valid 864: * From rfc2044: encoding of the Unicode values on UTF-8: 865: * 866: * UCS-4 range (hex.) UTF-8 octet sequence (binary) 867: * 0000 0000-0000 007F 0xxxxxxx 868: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 869: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 870: */ 871: if (val >= 0x80) { 872: xmlChar *savedout = out; 873: int bits; 874: if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 875: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 876: else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 877: else { 878: xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 879: "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 880: val); 881: return(0); 882: } 883: for ( ; bits >= 0; bits-= 6) 884: *out++= ((val >> bits) & 0x3F) | 0x80 ; 885: return (out - savedout); 886: } 887: *out = (xmlChar) val; 888: return 1; 889: } 890: 891: /** 892: * xmlCopyChar: 893: * @len: Ignored, compatibility 894: * @out: pointer to an array of xmlChar 895: * @val: the char value 896: * 897: * append the char value in the array 898: * 899: * Returns the number of xmlChar written 900: */ 901: 902: int 903: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 904: if (out == NULL) return(0); 905: /* the len parameter is ignored */ 906: if (val >= 0x80) { 907: return(xmlCopyCharMultiByte (out, val)); 908: } 909: *out = (xmlChar) val; 910: return 1; 911: } 912: 913: /************************************************************************ 914: * * 915: * Commodity functions to switch encodings * 916: * * 917: ************************************************************************/ 918: 919: static int 920: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 921: xmlCharEncodingHandlerPtr handler, int len); 922: static int 923: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 924: xmlCharEncodingHandlerPtr handler, int len); 925: /** 926: * xmlSwitchEncoding: 927: * @ctxt: the parser context 928: * @enc: the encoding value (number) 929: * 930: * change the input functions when discovering the character encoding 931: * of a given entity. 932: * 933: * Returns 0 in case of success, -1 otherwise 934: */ 935: int 936: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 937: { 938: xmlCharEncodingHandlerPtr handler; 939: int len = -1; 940: 941: if (ctxt == NULL) return(-1); 942: switch (enc) { 943: case XML_CHAR_ENCODING_ERROR: 944: __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 945: "encoding unknown\n", NULL, NULL); 946: return(-1); 947: case XML_CHAR_ENCODING_NONE: 948: /* let's assume it's UTF-8 without the XML decl */ 949: ctxt->charset = XML_CHAR_ENCODING_UTF8; 950: return(0); 951: case XML_CHAR_ENCODING_UTF8: 952: /* default encoding, no conversion should be needed */ 953: ctxt->charset = XML_CHAR_ENCODING_UTF8; 954: 955: /* 956: * Errata on XML-1.0 June 20 2001 957: * Specific handling of the Byte Order Mark for 958: * UTF-8 959: */ 960: if ((ctxt->input != NULL) && 961: (ctxt->input->cur[0] == 0xEF) && 962: (ctxt->input->cur[1] == 0xBB) && 963: (ctxt->input->cur[2] == 0xBF)) { 964: ctxt->input->cur += 3; 965: } 966: return(0); 967: case XML_CHAR_ENCODING_UTF16LE: 968: case XML_CHAR_ENCODING_UTF16BE: 969: /*The raw input characters are encoded 970: *in UTF-16. As we expect this function 971: *to be called after xmlCharEncInFunc, we expect 972: *ctxt->input->cur to contain UTF-8 encoded characters. 973: *So the raw UTF16 Byte Order Mark 974: *has also been converted into 975: *an UTF-8 BOM. Let's skip that BOM. 976: */ 977: if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 978: (ctxt->input->cur[0] == 0xEF) && 979: (ctxt->input->cur[1] == 0xBB) && 980: (ctxt->input->cur[2] == 0xBF)) { 981: ctxt->input->cur += 3; 982: } 983: len = 90; 984: break; 985: case XML_CHAR_ENCODING_UCS2: 986: len = 90; 987: break; 988: case XML_CHAR_ENCODING_UCS4BE: 989: case XML_CHAR_ENCODING_UCS4LE: 990: case XML_CHAR_ENCODING_UCS4_2143: 991: case XML_CHAR_ENCODING_UCS4_3412: 992: len = 180; 993: break; 994: case XML_CHAR_ENCODING_EBCDIC: 995: case XML_CHAR_ENCODING_8859_1: 996: case XML_CHAR_ENCODING_8859_2: 997: case XML_CHAR_ENCODING_8859_3: 998: case XML_CHAR_ENCODING_8859_4: 999: case XML_CHAR_ENCODING_8859_5: 1000: case XML_CHAR_ENCODING_8859_6: 1001: case XML_CHAR_ENCODING_8859_7: 1002: case XML_CHAR_ENCODING_8859_8: 1003: case XML_CHAR_ENCODING_8859_9: 1004: case XML_CHAR_ENCODING_ASCII: 1005: case XML_CHAR_ENCODING_2022_JP: 1006: case XML_CHAR_ENCODING_SHIFT_JIS: 1007: case XML_CHAR_ENCODING_EUC_JP: 1008: len = 45; 1009: break; 1010: } 1011: handler = xmlGetCharEncodingHandler(enc); 1012: if (handler == NULL) { 1013: /* 1014: * Default handlers. 1015: */ 1016: switch (enc) { 1017: case XML_CHAR_ENCODING_ASCII: 1018: /* default encoding, no conversion should be needed */ 1019: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1020: return(0); 1021: case XML_CHAR_ENCODING_UTF16LE: 1022: break; 1023: case XML_CHAR_ENCODING_UTF16BE: 1024: break; 1025: case XML_CHAR_ENCODING_UCS4LE: 1026: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1027: "encoding not supported %s\n", 1028: BAD_CAST "USC4 little endian", NULL); 1029: break; 1030: case XML_CHAR_ENCODING_UCS4BE: 1031: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1032: "encoding not supported %s\n", 1033: BAD_CAST "USC4 big endian", NULL); 1034: break; 1035: case XML_CHAR_ENCODING_EBCDIC: 1036: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1037: "encoding not supported %s\n", 1038: BAD_CAST "EBCDIC", NULL); 1039: break; 1040: case XML_CHAR_ENCODING_UCS4_2143: 1041: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1042: "encoding not supported %s\n", 1043: BAD_CAST "UCS4 2143", NULL); 1044: break; 1045: case XML_CHAR_ENCODING_UCS4_3412: 1046: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1047: "encoding not supported %s\n", 1048: BAD_CAST "UCS4 3412", NULL); 1049: break; 1050: case XML_CHAR_ENCODING_UCS2: 1051: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1052: "encoding not supported %s\n", 1053: BAD_CAST "UCS2", NULL); 1054: break; 1055: case XML_CHAR_ENCODING_8859_1: 1056: case XML_CHAR_ENCODING_8859_2: 1057: case XML_CHAR_ENCODING_8859_3: 1058: case XML_CHAR_ENCODING_8859_4: 1059: case XML_CHAR_ENCODING_8859_5: 1060: case XML_CHAR_ENCODING_8859_6: 1061: case XML_CHAR_ENCODING_8859_7: 1062: case XML_CHAR_ENCODING_8859_8: 1063: case XML_CHAR_ENCODING_8859_9: 1064: /* 1065: * We used to keep the internal content in the 1066: * document encoding however this turns being unmaintainable 1067: * So xmlGetCharEncodingHandler() will return non-null 1068: * values for this now. 1069: */ 1070: if ((ctxt->inputNr == 1) && 1071: (ctxt->encoding == NULL) && 1072: (ctxt->input != NULL) && 1073: (ctxt->input->encoding != NULL)) { 1074: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1075: } 1076: ctxt->charset = enc; 1077: return(0); 1078: case XML_CHAR_ENCODING_2022_JP: 1079: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1080: "encoding not supported %s\n", 1081: BAD_CAST "ISO-2022-JP", NULL); 1082: break; 1083: case XML_CHAR_ENCODING_SHIFT_JIS: 1084: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1085: "encoding not supported %s\n", 1086: BAD_CAST "Shift_JIS", NULL); 1087: break; 1088: case XML_CHAR_ENCODING_EUC_JP: 1089: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1090: "encoding not supported %s\n", 1091: BAD_CAST "EUC-JP", NULL); 1092: break; 1093: default: 1094: break; 1095: } 1096: } 1097: if (handler == NULL) 1098: return(-1); 1099: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1100: return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1101: } 1102: 1103: /** 1104: * xmlSwitchInputEncoding: 1105: * @ctxt: the parser context 1106: * @input: the input stream 1107: * @handler: the encoding handler 1108: * @len: the number of bytes to convert for the first line or -1 1109: * 1110: * change the input functions when discovering the character encoding 1111: * of a given entity. 1112: * 1113: * Returns 0 in case of success, -1 otherwise 1114: */ 1115: static int 1116: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1117: xmlCharEncodingHandlerPtr handler, int len) 1118: { 1119: int nbchars; 1120: 1121: if (handler == NULL) 1122: return (-1); 1123: if (input == NULL) 1124: return (-1); 1125: if (input->buf != NULL) { 1126: if (input->buf->encoder != NULL) { 1127: /* 1128: * Check in case the auto encoding detetection triggered 1129: * in already. 1130: */ 1131: if (input->buf->encoder == handler) 1132: return (0); 1133: 1134: /* 1135: * "UTF-16" can be used for both LE and BE 1136: if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1137: BAD_CAST "UTF-16", 6)) && 1138: (!xmlStrncmp(BAD_CAST handler->name, 1139: BAD_CAST "UTF-16", 6))) { 1140: return(0); 1141: } 1142: */ 1143: 1144: /* 1145: * Note: this is a bit dangerous, but that's what it 1146: * takes to use nearly compatible signature for different 1147: * encodings. 1148: */ 1149: xmlCharEncCloseFunc(input->buf->encoder); 1150: input->buf->encoder = handler; 1151: return (0); 1152: } 1153: input->buf->encoder = handler; 1154: 1155: /* 1156: * Is there already some content down the pipe to convert ? 1157: */ 1158: if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1159: int processed; 1160: unsigned int use; 1161: 1162: /* 1163: * Specific handling of the Byte Order Mark for 1164: * UTF-16 1165: */ 1166: if ((handler->name != NULL) && 1167: (!strcmp(handler->name, "UTF-16LE") || 1168: !strcmp(handler->name, "UTF-16")) && 1169: (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1170: input->cur += 2; 1171: } 1172: if ((handler->name != NULL) && 1173: (!strcmp(handler->name, "UTF-16BE")) && 1174: (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1175: input->cur += 2; 1176: } 1177: /* 1178: * Errata on XML-1.0 June 20 2001 1179: * Specific handling of the Byte Order Mark for 1180: * UTF-8 1181: */ 1182: if ((handler->name != NULL) && 1183: (!strcmp(handler->name, "UTF-8")) && 1184: (input->cur[0] == 0xEF) && 1185: (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1186: input->cur += 3; 1187: } 1188: 1189: /* 1190: * Shrink the current input buffer. 1191: * Move it as the raw buffer and create a new input buffer 1192: */ 1193: processed = input->cur - input->base; 1194: xmlBufShrink(input->buf->buffer, processed); 1195: input->buf->raw = input->buf->buffer; 1196: input->buf->buffer = xmlBufCreate(); 1197: input->buf->rawconsumed = processed; 1198: use = xmlBufUse(input->buf->raw); 1199: 1200: if (ctxt->html) { 1201: /* 1202: * convert as much as possible of the buffer 1203: */ 1204: nbchars = xmlCharEncInput(input->buf, 1); 1205: } else { 1206: /* 1207: * convert just enough to get 1208: * '<?xml version="1.0" encoding="xxx"?>' 1209: * parsed with the autodetected encoding 1210: * into the parser reading buffer. 1211: */ 1212: nbchars = xmlCharEncFirstLineInput(input->buf, len); 1213: } 1214: if (nbchars < 0) { 1215: xmlErrInternal(ctxt, 1216: "switching encoding: encoder error\n", 1217: NULL); 1218: return (-1); 1219: } 1220: input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1221: xmlBufResetInput(input->buf->buffer, input); 1222: } 1223: return (0); 1224: } else if (input->length == 0) { 1225: /* 1226: * When parsing a static memory array one must know the 1227: * size to be able to convert the buffer. 1228: */ 1229: xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1230: return (-1); 1231: } 1232: return (0); 1233: } 1234: 1235: /** 1236: * xmlSwitchInputEncoding: 1237: * @ctxt: the parser context 1238: * @input: the input stream 1239: * @handler: the encoding handler 1240: * 1241: * change the input functions when discovering the character encoding 1242: * of a given entity. 1243: * 1244: * Returns 0 in case of success, -1 otherwise 1245: */ 1246: int 1247: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1248: xmlCharEncodingHandlerPtr handler) { 1249: return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1250: } 1251: 1252: /** 1253: * xmlSwitchToEncodingInt: 1254: * @ctxt: the parser context 1255: * @handler: the encoding handler 1256: * @len: the length to convert or -1 1257: * 1258: * change the input functions when discovering the character encoding 1259: * of a given entity, and convert only @len bytes of the output, this 1260: * is needed on auto detect to allows any declared encoding later to 1261: * convert the actual content after the xmlDecl 1262: * 1263: * Returns 0 in case of success, -1 otherwise 1264: */ 1265: static int 1266: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1267: xmlCharEncodingHandlerPtr handler, int len) { 1268: int ret = 0; 1269: 1270: if (handler != NULL) { 1271: if (ctxt->input != NULL) { 1272: ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1273: } else { 1274: xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1275: NULL); 1276: return(-1); 1277: } 1278: /* 1279: * The parsing is now done in UTF8 natively 1280: */ 1281: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1282: } else 1283: return(-1); 1284: return(ret); 1285: } 1286: 1287: /** 1288: * xmlSwitchToEncoding: 1289: * @ctxt: the parser context 1290: * @handler: the encoding handler 1291: * 1292: * change the input functions when discovering the character encoding 1293: * of a given entity. 1294: * 1295: * Returns 0 in case of success, -1 otherwise 1296: */ 1297: int 1298: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1299: { 1300: return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1301: } 1302: 1303: /************************************************************************ 1304: * * 1305: * Commodity functions to handle entities processing * 1306: * * 1307: ************************************************************************/ 1308: 1309: /** 1310: * xmlFreeInputStream: 1311: * @input: an xmlParserInputPtr 1312: * 1313: * Free up an input stream. 1314: */ 1315: void 1316: xmlFreeInputStream(xmlParserInputPtr input) { 1317: if (input == NULL) return; 1318: 1319: if (input->filename != NULL) xmlFree((char *) input->filename); 1320: if (input->directory != NULL) xmlFree((char *) input->directory); 1321: if (input->encoding != NULL) xmlFree((char *) input->encoding); 1322: if (input->version != NULL) xmlFree((char *) input->version); 1323: if ((input->free != NULL) && (input->base != NULL)) 1324: input->free((xmlChar *) input->base); 1325: if (input->buf != NULL) 1326: xmlFreeParserInputBuffer(input->buf); 1327: xmlFree(input); 1328: } 1329: 1330: /** 1331: * xmlNewInputStream: 1332: * @ctxt: an XML parser context 1333: * 1334: * Create a new input stream structure. 1335: * 1336: * Returns the new input stream or NULL 1337: */ 1338: xmlParserInputPtr 1339: xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1340: xmlParserInputPtr input; 1341: 1342: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1343: if (input == NULL) { 1344: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1345: return(NULL); 1346: } 1347: memset(input, 0, sizeof(xmlParserInput)); 1348: input->line = 1; 1349: input->col = 1; 1350: input->standalone = -1; 1351: 1352: /* 1353: * If the context is NULL the id cannot be initialized, but that 1354: * should not happen while parsing which is the situation where 1355: * the id is actually needed. 1356: */ 1357: if (ctxt != NULL) 1358: input->id = ctxt->input_id++; 1359: 1360: return(input); 1361: } 1362: 1363: /** 1364: * xmlNewIOInputStream: 1365: * @ctxt: an XML parser context 1366: * @input: an I/O Input 1367: * @enc: the charset encoding if known 1368: * 1369: * Create a new input stream structure encapsulating the @input into 1370: * a stream suitable for the parser. 1371: * 1372: * Returns the new input stream or NULL 1373: */ 1374: xmlParserInputPtr 1375: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1376: xmlCharEncoding enc) { 1377: xmlParserInputPtr inputStream; 1378: 1379: if (input == NULL) return(NULL); 1380: if (xmlParserDebugEntities) 1381: xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1382: inputStream = xmlNewInputStream(ctxt); 1383: if (inputStream == NULL) { 1384: return(NULL); 1385: } 1386: inputStream->filename = NULL; 1387: inputStream->buf = input; 1388: xmlBufResetInput(inputStream->buf->buffer, inputStream); 1389: 1390: if (enc != XML_CHAR_ENCODING_NONE) { 1391: xmlSwitchEncoding(ctxt, enc); 1392: } 1393: 1394: return(inputStream); 1395: } 1396: 1397: /** 1398: * xmlNewEntityInputStream: 1399: * @ctxt: an XML parser context 1400: * @entity: an Entity pointer 1401: * 1402: * Create a new input stream based on an xmlEntityPtr 1403: * 1404: * Returns the new input stream or NULL 1405: */ 1406: xmlParserInputPtr 1407: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1408: xmlParserInputPtr input; 1409: 1410: if (entity == NULL) { 1411: xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1412: NULL); 1413: return(NULL); 1414: } 1415: if (xmlParserDebugEntities) 1416: xmlGenericError(xmlGenericErrorContext, 1417: "new input from entity: %s\n", entity->name); 1418: if (entity->content == NULL) { 1419: switch (entity->etype) { 1420: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1421: xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1422: entity->name); 1423: break; 1424: case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1425: case XML_EXTERNAL_PARAMETER_ENTITY: 1426: return(xmlLoadExternalEntity((char *) entity->URI, 1427: (char *) entity->ExternalID, ctxt)); 1428: case XML_INTERNAL_GENERAL_ENTITY: 1429: xmlErrInternal(ctxt, 1430: "Internal entity %s without content !\n", 1431: entity->name); 1432: break; 1433: case XML_INTERNAL_PARAMETER_ENTITY: 1434: xmlErrInternal(ctxt, 1435: "Internal parameter entity %s without content !\n", 1436: entity->name); 1437: break; 1438: case XML_INTERNAL_PREDEFINED_ENTITY: 1439: xmlErrInternal(ctxt, 1440: "Predefined entity %s without content !\n", 1441: entity->name); 1442: break; 1443: } 1444: return(NULL); 1445: } 1446: input = xmlNewInputStream(ctxt); 1447: if (input == NULL) { 1448: return(NULL); 1449: } 1450: if (entity->URI != NULL) 1451: input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1452: input->base = entity->content; 1453: input->cur = entity->content; 1454: input->length = entity->length; 1455: input->end = &entity->content[input->length]; 1456: return(input); 1457: } 1458: 1459: /** 1460: * xmlNewStringInputStream: 1461: * @ctxt: an XML parser context 1462: * @buffer: an memory buffer 1463: * 1464: * Create a new input stream based on a memory buffer. 1465: * Returns the new input stream 1466: */ 1467: xmlParserInputPtr 1468: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1469: xmlParserInputPtr input; 1470: 1471: if (buffer == NULL) { 1472: xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1473: NULL); 1474: return(NULL); 1475: } 1476: if (xmlParserDebugEntities) 1477: xmlGenericError(xmlGenericErrorContext, 1478: "new fixed input: %.30s\n", buffer); 1479: input = xmlNewInputStream(ctxt); 1480: if (input == NULL) { 1481: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1482: return(NULL); 1483: } 1484: input->base = buffer; 1485: input->cur = buffer; 1486: input->length = xmlStrlen(buffer); 1487: input->end = &buffer[input->length]; 1488: return(input); 1489: } 1490: 1491: /** 1492: * xmlNewInputFromFile: 1493: * @ctxt: an XML parser context 1494: * @filename: the filename to use as entity 1495: * 1496: * Create a new input stream based on a file or an URL. 1497: * 1498: * Returns the new input stream or NULL in case of error 1499: */ 1500: xmlParserInputPtr 1501: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1502: xmlParserInputBufferPtr buf; 1503: xmlParserInputPtr inputStream; 1504: char *directory = NULL; 1505: xmlChar *URI = NULL; 1506: 1507: if (xmlParserDebugEntities) 1508: xmlGenericError(xmlGenericErrorContext, 1509: "new input from file: %s\n", filename); 1510: if (ctxt == NULL) return(NULL); 1511: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1512: if (buf == NULL) { 1513: if (filename == NULL) 1514: __xmlLoaderErr(ctxt, 1515: "failed to load external entity: NULL filename \n", 1516: NULL); 1517: else 1518: __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1519: (const char *) filename); 1520: return(NULL); 1521: } 1522: 1523: inputStream = xmlNewInputStream(ctxt); 1524: if (inputStream == NULL) 1525: return(NULL); 1526: 1527: inputStream->buf = buf; 1528: inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1529: if (inputStream == NULL) 1530: return(NULL); 1531: 1532: if (inputStream->filename == NULL) 1533: URI = xmlStrdup((xmlChar *) filename); 1534: else 1535: URI = xmlStrdup((xmlChar *) inputStream->filename); 1536: directory = xmlParserGetDirectory((const char *) URI); 1537: if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1538: inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1539: if (URI != NULL) xmlFree((char *) URI); 1540: inputStream->directory = directory; 1541: 1542: xmlBufResetInput(inputStream->buf->buffer, inputStream); 1543: if ((ctxt->directory == NULL) && (directory != NULL)) 1544: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1545: return(inputStream); 1546: } 1547: 1548: /************************************************************************ 1549: * * 1550: * Commodity functions to handle parser contexts * 1551: * * 1552: ************************************************************************/ 1553: 1554: /** 1555: * xmlInitParserCtxt: 1556: * @ctxt: an XML parser context 1557: * 1558: * Initialize a parser context 1559: * 1560: * Returns 0 in case of success and -1 in case of error 1561: */ 1562: 1563: int 1564: xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1565: { 1566: xmlParserInputPtr input; 1567: 1568: if(ctxt==NULL) { 1569: xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1570: return(-1); 1571: } 1572: 1573: xmlDefaultSAXHandlerInit(); 1574: 1575: if (ctxt->dict == NULL) 1576: ctxt->dict = xmlDictCreate(); 1577: if (ctxt->dict == NULL) { 1578: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1579: return(-1); 1580: } 1581: xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1582: 1583: if (ctxt->sax == NULL) 1584: ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1585: if (ctxt->sax == NULL) { 1586: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1587: return(-1); 1588: } 1589: else 1590: xmlSAXVersion(ctxt->sax, 2); 1591: 1592: ctxt->maxatts = 0; 1593: ctxt->atts = NULL; 1594: /* Allocate the Input stack */ 1595: if (ctxt->inputTab == NULL) { 1596: ctxt->inputTab = (xmlParserInputPtr *) 1597: xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1598: ctxt->inputMax = 5; 1599: } 1600: if (ctxt->inputTab == NULL) { 1601: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1602: ctxt->inputNr = 0; 1603: ctxt->inputMax = 0; 1604: ctxt->input = NULL; 1605: return(-1); 1606: } 1607: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1608: xmlFreeInputStream(input); 1609: } 1610: ctxt->inputNr = 0; 1611: ctxt->input = NULL; 1612: 1613: ctxt->version = NULL; 1614: ctxt->encoding = NULL; 1615: ctxt->standalone = -1; 1616: ctxt->hasExternalSubset = 0; 1617: ctxt->hasPErefs = 0; 1618: ctxt->html = 0; 1619: ctxt->external = 0; 1620: ctxt->instate = XML_PARSER_START; 1621: ctxt->token = 0; 1622: ctxt->directory = NULL; 1623: 1624: /* Allocate the Node stack */ 1625: if (ctxt->nodeTab == NULL) { 1626: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1627: ctxt->nodeMax = 10; 1628: } 1629: if (ctxt->nodeTab == NULL) { 1630: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1631: ctxt->nodeNr = 0; 1632: ctxt->nodeMax = 0; 1633: ctxt->node = NULL; 1634: ctxt->inputNr = 0; 1635: ctxt->inputMax = 0; 1636: ctxt->input = NULL; 1637: return(-1); 1638: } 1639: ctxt->nodeNr = 0; 1640: ctxt->node = NULL; 1641: 1642: /* Allocate the Name stack */ 1643: if (ctxt->nameTab == NULL) { 1644: ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1645: ctxt->nameMax = 10; 1646: } 1647: if (ctxt->nameTab == NULL) { 1648: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1649: ctxt->nodeNr = 0; 1650: ctxt->nodeMax = 0; 1651: ctxt->node = NULL; 1652: ctxt->inputNr = 0; 1653: ctxt->inputMax = 0; 1654: ctxt->input = NULL; 1655: ctxt->nameNr = 0; 1656: ctxt->nameMax = 0; 1657: ctxt->name = NULL; 1658: return(-1); 1659: } 1660: ctxt->nameNr = 0; 1661: ctxt->name = NULL; 1662: 1663: /* Allocate the space stack */ 1664: if (ctxt->spaceTab == NULL) { 1665: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1666: ctxt->spaceMax = 10; 1667: } 1668: if (ctxt->spaceTab == NULL) { 1669: xmlErrMemory(NULL, "cannot initialize parser context\n"); 1670: ctxt->nodeNr = 0; 1671: ctxt->nodeMax = 0; 1672: ctxt->node = NULL; 1673: ctxt->inputNr = 0; 1674: ctxt->inputMax = 0; 1675: ctxt->input = NULL; 1676: ctxt->nameNr = 0; 1677: ctxt->nameMax = 0; 1678: ctxt->name = NULL; 1679: ctxt->spaceNr = 0; 1680: ctxt->spaceMax = 0; 1681: ctxt->space = NULL; 1682: return(-1); 1683: } 1684: ctxt->spaceNr = 1; 1685: ctxt->spaceMax = 10; 1686: ctxt->spaceTab[0] = -1; 1687: ctxt->space = &ctxt->spaceTab[0]; 1688: ctxt->userData = ctxt; 1689: ctxt->myDoc = NULL; 1690: ctxt->wellFormed = 1; 1691: ctxt->nsWellFormed = 1; 1692: ctxt->valid = 1; 1693: ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1694: ctxt->validate = xmlDoValidityCheckingDefaultValue; 1695: ctxt->pedantic = xmlPedanticParserDefaultValue; 1696: ctxt->linenumbers = xmlLineNumbersDefaultValue; 1697: ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1698: if (ctxt->keepBlanks == 0) 1699: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1700: 1701: ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1702: ctxt->vctxt.userData = ctxt; 1703: ctxt->vctxt.error = xmlParserValidityError; 1704: ctxt->vctxt.warning = xmlParserValidityWarning; 1705: if (ctxt->validate) { 1706: if (xmlGetWarningsDefaultValue == 0) 1707: ctxt->vctxt.warning = NULL; 1708: else 1709: ctxt->vctxt.warning = xmlParserValidityWarning; 1710: ctxt->vctxt.nodeMax = 0; 1711: } 1712: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1713: ctxt->record_info = 0; 1714: ctxt->nbChars = 0; 1715: ctxt->checkIndex = 0; 1716: ctxt->inSubset = 0; 1717: ctxt->errNo = XML_ERR_OK; 1718: ctxt->depth = 0; 1719: ctxt->charset = XML_CHAR_ENCODING_UTF8; 1720: ctxt->catalogs = NULL; 1721: ctxt->nbentities = 0; 1722: ctxt->sizeentities = 0; 1723: ctxt->sizeentcopy = 0; 1724: ctxt->input_id = 1; 1725: xmlInitNodeInfoSeq(&ctxt->node_seq); 1726: return(0); 1727: } 1728: 1729: /** 1730: * xmlFreeParserCtxt: 1731: * @ctxt: an XML parser context 1732: * 1733: * Free all the memory used by a parser context. However the parsed 1734: * document in ctxt->myDoc is not freed. 1735: */ 1736: 1737: void 1738: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1739: { 1740: xmlParserInputPtr input; 1741: 1742: if (ctxt == NULL) return; 1743: 1744: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1745: xmlFreeInputStream(input); 1746: } 1747: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1748: if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1749: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1750: if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1751: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1752: if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1753: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1754: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1755: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1756: #ifdef LIBXML_SAX1_ENABLED 1757: if ((ctxt->sax != NULL) && 1758: (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1759: #else 1760: if (ctxt->sax != NULL) 1761: #endif /* LIBXML_SAX1_ENABLED */ 1762: xmlFree(ctxt->sax); 1763: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1764: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1765: if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1766: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1767: if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1768: if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1769: if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1770: if (ctxt->attsDefault != NULL) 1771: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1772: if (ctxt->attsSpecial != NULL) 1773: xmlHashFree(ctxt->attsSpecial, NULL); 1774: if (ctxt->freeElems != NULL) { 1775: xmlNodePtr cur, next; 1776: 1777: cur = ctxt->freeElems; 1778: while (cur != NULL) { 1779: next = cur->next; 1780: xmlFree(cur); 1781: cur = next; 1782: } 1783: } 1784: if (ctxt->freeAttrs != NULL) { 1785: xmlAttrPtr cur, next; 1786: 1787: cur = ctxt->freeAttrs; 1788: while (cur != NULL) { 1789: next = cur->next; 1790: xmlFree(cur); 1791: cur = next; 1792: } 1793: } 1794: /* 1795: * cleanup the error strings 1796: */ 1797: if (ctxt->lastError.message != NULL) 1798: xmlFree(ctxt->lastError.message); 1799: if (ctxt->lastError.file != NULL) 1800: xmlFree(ctxt->lastError.file); 1801: if (ctxt->lastError.str1 != NULL) 1802: xmlFree(ctxt->lastError.str1); 1803: if (ctxt->lastError.str2 != NULL) 1804: xmlFree(ctxt->lastError.str2); 1805: if (ctxt->lastError.str3 != NULL) 1806: xmlFree(ctxt->lastError.str3); 1807: 1808: #ifdef LIBXML_CATALOG_ENABLED 1809: if (ctxt->catalogs != NULL) 1810: xmlCatalogFreeLocal(ctxt->catalogs); 1811: #endif 1812: xmlFree(ctxt); 1813: } 1814: 1815: /** 1816: * xmlNewParserCtxt: 1817: * 1818: * Allocate and initialize a new parser context. 1819: * 1820: * Returns the xmlParserCtxtPtr or NULL 1821: */ 1822: 1823: xmlParserCtxtPtr 1824: xmlNewParserCtxt(void) 1825: { 1826: xmlParserCtxtPtr ctxt; 1827: 1828: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1829: if (ctxt == NULL) { 1830: xmlErrMemory(NULL, "cannot allocate parser context\n"); 1831: return(NULL); 1832: } 1833: memset(ctxt, 0, sizeof(xmlParserCtxt)); 1834: if (xmlInitParserCtxt(ctxt) < 0) { 1835: xmlFreeParserCtxt(ctxt); 1836: return(NULL); 1837: } 1838: return(ctxt); 1839: } 1840: 1841: /************************************************************************ 1842: * * 1843: * Handling of node informations * 1844: * * 1845: ************************************************************************/ 1846: 1847: /** 1848: * xmlClearParserCtxt: 1849: * @ctxt: an XML parser context 1850: * 1851: * Clear (release owned resources) and reinitialize a parser context 1852: */ 1853: 1854: void 1855: xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1856: { 1857: if (ctxt==NULL) 1858: return; 1859: xmlClearNodeInfoSeq(&ctxt->node_seq); 1860: xmlCtxtReset(ctxt); 1861: } 1862: 1863: 1864: /** 1865: * xmlParserFindNodeInfo: 1866: * @ctx: an XML parser context 1867: * @node: an XML node within the tree 1868: * 1869: * Find the parser node info struct for a given node 1870: * 1871: * Returns an xmlParserNodeInfo block pointer or NULL 1872: */ 1873: const xmlParserNodeInfo * 1874: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1875: { 1876: unsigned long pos; 1877: 1878: if ((ctx == NULL) || (node == NULL)) 1879: return (NULL); 1880: /* Find position where node should be at */ 1881: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1882: if (pos < ctx->node_seq.length 1883: && ctx->node_seq.buffer[pos].node == node) 1884: return &ctx->node_seq.buffer[pos]; 1885: else 1886: return NULL; 1887: } 1888: 1889: 1890: /** 1891: * xmlInitNodeInfoSeq: 1892: * @seq: a node info sequence pointer 1893: * 1894: * -- Initialize (set to initial state) node info sequence 1895: */ 1896: void 1897: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1898: { 1899: if (seq == NULL) 1900: return; 1901: seq->length = 0; 1902: seq->maximum = 0; 1903: seq->buffer = NULL; 1904: } 1905: 1906: /** 1907: * xmlClearNodeInfoSeq: 1908: * @seq: a node info sequence pointer 1909: * 1910: * -- Clear (release memory and reinitialize) node 1911: * info sequence 1912: */ 1913: void 1914: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1915: { 1916: if (seq == NULL) 1917: return; 1918: if (seq->buffer != NULL) 1919: xmlFree(seq->buffer); 1920: xmlInitNodeInfoSeq(seq); 1921: } 1922: 1923: /** 1924: * xmlParserFindNodeInfoIndex: 1925: * @seq: a node info sequence pointer 1926: * @node: an XML node pointer 1927: * 1928: * 1929: * xmlParserFindNodeInfoIndex : Find the index that the info record for 1930: * the given node is or should be at in a sorted sequence 1931: * 1932: * Returns a long indicating the position of the record 1933: */ 1934: unsigned long 1935: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1936: const xmlNodePtr node) 1937: { 1938: unsigned long upper, lower, middle; 1939: int found = 0; 1940: 1941: if ((seq == NULL) || (node == NULL)) 1942: return ((unsigned long) -1); 1943: 1944: /* Do a binary search for the key */ 1945: lower = 1; 1946: upper = seq->length; 1947: middle = 0; 1948: while (lower <= upper && !found) { 1949: middle = lower + (upper - lower) / 2; 1950: if (node == seq->buffer[middle - 1].node) 1951: found = 1; 1952: else if (node < seq->buffer[middle - 1].node) 1953: upper = middle - 1; 1954: else 1955: lower = middle + 1; 1956: } 1957: 1958: /* Return position */ 1959: if (middle == 0 || seq->buffer[middle - 1].node < node) 1960: return middle; 1961: else 1962: return middle - 1; 1963: } 1964: 1965: 1966: /** 1967: * xmlParserAddNodeInfo: 1968: * @ctxt: an XML parser context 1969: * @info: a node info sequence pointer 1970: * 1971: * Insert node info record into the sorted sequence 1972: */ 1973: void 1974: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1975: const xmlParserNodeInfoPtr info) 1976: { 1977: unsigned long pos; 1978: 1979: if ((ctxt == NULL) || (info == NULL)) return; 1980: 1981: /* Find pos and check to see if node is already in the sequence */ 1982: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1983: info->node); 1984: 1985: if ((pos < ctxt->node_seq.length) && 1986: (ctxt->node_seq.buffer != NULL) && 1987: (ctxt->node_seq.buffer[pos].node == info->node)) { 1988: ctxt->node_seq.buffer[pos] = *info; 1989: } 1990: 1991: /* Otherwise, we need to add new node to buffer */ 1992: else { 1993: if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 1994: xmlParserNodeInfo *tmp_buffer; 1995: unsigned int byte_size; 1996: 1997: if (ctxt->node_seq.maximum == 0) 1998: ctxt->node_seq.maximum = 2; 1999: byte_size = (sizeof(*ctxt->node_seq.buffer) * 2000: (2 * ctxt->node_seq.maximum)); 2001: 2002: if (ctxt->node_seq.buffer == NULL) 2003: tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2004: else 2005: tmp_buffer = 2006: (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2007: byte_size); 2008: 2009: if (tmp_buffer == NULL) { 2010: xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2011: return; 2012: } 2013: ctxt->node_seq.buffer = tmp_buffer; 2014: ctxt->node_seq.maximum *= 2; 2015: } 2016: 2017: /* If position is not at end, move elements out of the way */ 2018: if (pos != ctxt->node_seq.length) { 2019: unsigned long i; 2020: 2021: for (i = ctxt->node_seq.length; i > pos; i--) 2022: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2023: } 2024: 2025: /* Copy element and increase length */ 2026: ctxt->node_seq.buffer[pos] = *info; 2027: ctxt->node_seq.length++; 2028: } 2029: } 2030: 2031: /************************************************************************ 2032: * * 2033: * Defaults settings * 2034: * * 2035: ************************************************************************/ 2036: /** 2037: * xmlPedanticParserDefault: 2038: * @val: int 0 or 1 2039: * 2040: * Set and return the previous value for enabling pedantic warnings. 2041: * 2042: * Returns the last value for 0 for no substitution, 1 for substitution. 2043: */ 2044: 2045: int 2046: xmlPedanticParserDefault(int val) { 2047: int old = xmlPedanticParserDefaultValue; 2048: 2049: xmlPedanticParserDefaultValue = val; 2050: return(old); 2051: } 2052: 2053: /** 2054: * xmlLineNumbersDefault: 2055: * @val: int 0 or 1 2056: * 2057: * Set and return the previous value for enabling line numbers in elements 2058: * contents. This may break on old application and is turned off by default. 2059: * 2060: * Returns the last value for 0 for no substitution, 1 for substitution. 2061: */ 2062: 2063: int 2064: xmlLineNumbersDefault(int val) { 2065: int old = xmlLineNumbersDefaultValue; 2066: 2067: xmlLineNumbersDefaultValue = val; 2068: return(old); 2069: } 2070: 2071: /** 2072: * xmlSubstituteEntitiesDefault: 2073: * @val: int 0 or 1 2074: * 2075: * Set and return the previous value for default entity support. 2076: * Initially the parser always keep entity references instead of substituting 2077: * entity values in the output. This function has to be used to change the 2078: * default parser behavior 2079: * SAX::substituteEntities() has to be used for changing that on a file by 2080: * file basis. 2081: * 2082: * Returns the last value for 0 for no substitution, 1 for substitution. 2083: */ 2084: 2085: int 2086: xmlSubstituteEntitiesDefault(int val) { 2087: int old = xmlSubstituteEntitiesDefaultValue; 2088: 2089: xmlSubstituteEntitiesDefaultValue = val; 2090: return(old); 2091: } 2092: 2093: /** 2094: * xmlKeepBlanksDefault: 2095: * @val: int 0 or 1 2096: * 2097: * Set and return the previous value for default blanks text nodes support. 2098: * The 1.x version of the parser used an heuristic to try to detect 2099: * ignorable white spaces. As a result the SAX callback was generating 2100: * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2101: * using the DOM output text nodes containing those blanks were not generated. 2102: * The 2.x and later version will switch to the XML standard way and 2103: * ignorableWhitespace() are only generated when running the parser in 2104: * validating mode and when the current element doesn't allow CDATA or 2105: * mixed content. 2106: * This function is provided as a way to force the standard behavior 2107: * on 1.X libs and to switch back to the old mode for compatibility when 2108: * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2109: * by using xmlIsBlankNode() commodity function to detect the "empty" 2110: * nodes generated. 2111: * This value also affect autogeneration of indentation when saving code 2112: * if blanks sections are kept, indentation is not generated. 2113: * 2114: * Returns the last value for 0 for no substitution, 1 for substitution. 2115: */ 2116: 2117: int 2118: xmlKeepBlanksDefault(int val) { 2119: int old = xmlKeepBlanksDefaultValue; 2120: 2121: xmlKeepBlanksDefaultValue = val; 2122: if (!val) xmlIndentTreeOutput = 1; 2123: return(old); 2124: } 2125: 2126: #define bottom_parserInternals 2127: #include "elfgcchack.h"