embedaddon/libxml2/parser.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parser.c
Revision 1.1.1.3.2.1: download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:01:53 2014 UTC (11 years ago) by misho
Branches: v2_9_1p0
Diff to: branchpoint 1.1.1.3: preferred, unified

patch

1: /* 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3: * implemented on top of the SAX interfaces 4: * 5: * References: 6: * The XML specification: 7: * http://www.w3.org/TR/REC-xml 8: * Original 1.0 version: 9: * http://www.w3.org/TR/1998/REC-xml-19980210 10: * XML second edition working draft 11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12: * 13: * Okay this is a big file, the parser core is around 7000 lines, then it 14: * is followed by the progressive parser top routines, then the various 15: * high level APIs to call the parser and a few miscellaneous functions. 16: * A number of helper functions and deprecated ones have been moved to 17: * parserInternals.c to reduce this file size. 18: * As much as possible the functions are associated with their relative 19: * production in the XML specification. A few productions defining the 20: * different ranges of character are actually implanted either in 21: * parserInternals.h or parserInternals.c 22: * The DOM tree build is realized from the default SAX callbacks in 23: * the module SAX.c. 24: * The routines doing the validation checks are in valid.c and called either 25: * from the SAX callbacks or as standalone functions using a preparsed 26: * document. 27: * 28: * See Copyright for the status of this software. 29: * 30: * daniel@veillard.com 31: */ 32: 33: #define IN_LIBXML 34: #include "libxml.h" 35: 36: #if defined(WIN32) && !defined (__CYGWIN__) 37: #define XML_DIR_SEP '\\' 38: #else 39: #define XML_DIR_SEP '/' 40: #endif 41: 42: #include <stdlib.h> 43: #include <limits.h> 44: #include <string.h> 45: #include <stdarg.h> 46: #include <libxml/xmlmemory.h> 47: #include <libxml/threads.h> 48: #include <libxml/globals.h> 49: #include <libxml/tree.h> 50: #include <libxml/parser.h> 51: #include <libxml/parserInternals.h> 52: #include <libxml/valid.h> 53: #include <libxml/entities.h> 54: #include <libxml/xmlerror.h> 55: #include <libxml/encoding.h> 56: #include <libxml/xmlIO.h> 57: #include <libxml/uri.h> 58: #ifdef LIBXML_CATALOG_ENABLED 59: #include <libxml/catalog.h> 60: #endif 61: #ifdef LIBXML_SCHEMAS_ENABLED 62: #include <libxml/xmlschemastypes.h> 63: #include <libxml/relaxng.h> 64: #endif 65: #ifdef HAVE_CTYPE_H 66: #include <ctype.h> 67: #endif 68: #ifdef HAVE_STDLIB_H 69: #include <stdlib.h> 70: #endif 71: #ifdef HAVE_SYS_STAT_H 72: #include <sys/stat.h> 73: #endif 74: #ifdef HAVE_FCNTL_H 75: #include <fcntl.h> 76: #endif 77: #ifdef HAVE_UNISTD_H 78: #include <unistd.h> 79: #endif 80: #ifdef HAVE_ZLIB_H 81: #include <zlib.h> 82: #endif 83: #ifdef HAVE_LZMA_H 84: #include <lzma.h> 85: #endif 86: 87: #include "buf.h" 88: #include "enc.h" 89: 90: static void 91: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92: 93: static xmlParserCtxtPtr 94: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95: const xmlChar *base, xmlParserCtxtPtr pctx); 96: 97: /************************************************************************ 98: * * 99: * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 100: * * 101: ************************************************************************/ 102: 103: #define XML_PARSER_BIG_ENTITY 1000 104: #define XML_PARSER_LOT_ENTITY 5000 105: 106: /* 107: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 108: * replacement over the size in byte of the input indicates that you have 109: * and eponential behaviour. A value of 10 correspond to at least 3 entity 110: * replacement per byte of input. 111: */ 112: #define XML_PARSER_NON_LINEAR 10 113: 114: /* 115: * xmlParserEntityCheck 116: * 117: * Function to check non-linear entity expansion behaviour 118: * This is here to detect and stop exponential linear entity expansion 119: * This is not a limitation of the parser but a safety 120: * boundary feature. It can be disabled with the XML_PARSE_HUGE 121: * parser option. 122: */ 123: static int 124: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 125: xmlEntityPtr ent, size_t replacement) 126: { 127: size_t consumed = 0; 128: 129: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 130: return (0); 131: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 132: return (1); 133: if (replacement != 0) { 134: if (replacement < XML_MAX_TEXT_LENGTH) 135: return(0); 136: 137: /* 138: * If the volume of entity copy reaches 10 times the 139: * amount of parsed data and over the large text threshold 140: * then that's very likely to be an abuse. 141: */ 142: if (ctxt->input != NULL) { 143: consumed = ctxt->input->consumed + 144: (ctxt->input->cur - ctxt->input->base); 145: } 146: consumed += ctxt->sizeentities; 147: 148: if (replacement < XML_PARSER_NON_LINEAR * consumed) 149: return(0); 150: } else if (size != 0) { 151: /* 152: * Do the check based on the replacement size of the entity 153: */ 154: if (size < XML_PARSER_BIG_ENTITY) 155: return(0); 156: 157: /* 158: * A limit on the amount of text data reasonably used 159: */ 160: if (ctxt->input != NULL) { 161: consumed = ctxt->input->consumed + 162: (ctxt->input->cur - ctxt->input->base); 163: } 164: consumed += ctxt->sizeentities; 165: 166: if ((size < XML_PARSER_NON_LINEAR * consumed) && 167: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 168: return (0); 169: } else if (ent != NULL) { 170: /* 171: * use the number of parsed entities in the replacement 172: */ 173: size = ent->checked / 2; 174: 175: /* 176: * The amount of data parsed counting entities size only once 177: */ 178: if (ctxt->input != NULL) { 179: consumed = ctxt->input->consumed + 180: (ctxt->input->cur - ctxt->input->base); 181: } 182: consumed += ctxt->sizeentities; 183: 184: /* 185: * Check the density of entities for the amount of data 186: * knowing an entity reference will take at least 3 bytes 187: */ 188: if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 189: return (0); 190: } else { 191: /* 192: * strange we got no data for checking just return 193: */ 194: return (0); 195: } 196: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 197: return (1); 198: } 199: 200: /** 201: * xmlParserMaxDepth: 202: * 203: * arbitrary depth limit for the XML documents that we allow to 204: * process. This is not a limitation of the parser but a safety 205: * boundary feature. It can be disabled with the XML_PARSE_HUGE 206: * parser option. 207: */ 208: unsigned int xmlParserMaxDepth = 256; 209: 210: 211: 212: #define SAX2 1 213: #define XML_PARSER_BIG_BUFFER_SIZE 300 214: #define XML_PARSER_BUFFER_SIZE 100 215: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 216: 217: /** 218: * XML_PARSER_CHUNK_SIZE 219: * 220: * When calling GROW that's the minimal amount of data 221: * the parser expected to have received. It is not a hard 222: * limit but an optimization when reading strings like Names 223: * It is not strictly needed as long as inputs available characters 224: * are followed by 0, which should be provided by the I/O level 225: */ 226: #define XML_PARSER_CHUNK_SIZE 100 227: 228: /* 229: * List of XML prefixed PI allowed by W3C specs 230: */ 231: 232: static const char *xmlW3CPIs[] = { 233: "xml-stylesheet", 234: "xml-model", 235: NULL 236: }; 237: 238: 239: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 240: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 241: const xmlChar **str); 242: 243: static xmlParserErrors 244: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 245: xmlSAXHandlerPtr sax, 246: void *user_data, int depth, const xmlChar *URL, 247: const xmlChar *ID, xmlNodePtr *list); 248: 249: static int 250: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 251: const char *encoding); 252: #ifdef LIBXML_LEGACY_ENABLED 253: static void 254: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 255: xmlNodePtr lastNode); 256: #endif /* LIBXML_LEGACY_ENABLED */ 257: 258: static xmlParserErrors 259: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 260: const xmlChar *string, void *user_data, xmlNodePtr *lst); 261: 262: static int 263: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 264: 265: /************************************************************************ 266: * * 267: * Some factorized error routines * 268: * * 269: ************************************************************************/ 270: 271: /** 272: * xmlErrAttributeDup: 273: * @ctxt: an XML parser context 274: * @prefix: the attribute prefix 275: * @localname: the attribute localname 276: * 277: * Handle a redefinition of attribute error 278: */ 279: static void 280: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 281: const xmlChar * localname) 282: { 283: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 284: (ctxt->instate == XML_PARSER_EOF)) 285: return; 286: if (ctxt != NULL) 287: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 288: 289: if (prefix == NULL) 290: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 291: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 292: (const char *) localname, NULL, NULL, 0, 0, 293: "Attribute %s redefined\n", localname); 294: else 295: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 296: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 297: (const char *) prefix, (const char *) localname, 298: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 299: localname); 300: if (ctxt != NULL) { 301: ctxt->wellFormed = 0; 302: if (ctxt->recovery == 0) 303: ctxt->disableSAX = 1; 304: } 305: } 306: 307: /** 308: * xmlFatalErr: 309: * @ctxt: an XML parser context 310: * @error: the error number 311: * @extra: extra information string 312: * 313: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 314: */ 315: static void 316: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 317: { 318: const char *errmsg; 319: char errstr[129] = ""; 320: 321: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 322: (ctxt->instate == XML_PARSER_EOF)) 323: return; 324: switch (error) { 325: case XML_ERR_INVALID_HEX_CHARREF: 326: errmsg = "CharRef: invalid hexadecimal value"; 327: break; 328: case XML_ERR_INVALID_DEC_CHARREF: 329: errmsg = "CharRef: invalid decimal value"; 330: break; 331: case XML_ERR_INVALID_CHARREF: 332: errmsg = "CharRef: invalid value"; 333: break; 334: case XML_ERR_INTERNAL_ERROR: 335: errmsg = "internal error"; 336: break; 337: case XML_ERR_PEREF_AT_EOF: 338: errmsg = "PEReference at end of document"; 339: break; 340: case XML_ERR_PEREF_IN_PROLOG: 341: errmsg = "PEReference in prolog"; 342: break; 343: case XML_ERR_PEREF_IN_EPILOG: 344: errmsg = "PEReference in epilog"; 345: break; 346: case XML_ERR_PEREF_NO_NAME: 347: errmsg = "PEReference: no name"; 348: break; 349: case XML_ERR_PEREF_SEMICOL_MISSING: 350: errmsg = "PEReference: expecting ';'"; 351: break; 352: case XML_ERR_ENTITY_LOOP: 353: errmsg = "Detected an entity reference loop"; 354: break; 355: case XML_ERR_ENTITY_NOT_STARTED: 356: errmsg = "EntityValue: \" or ' expected"; 357: break; 358: case XML_ERR_ENTITY_PE_INTERNAL: 359: errmsg = "PEReferences forbidden in internal subset"; 360: break; 361: case XML_ERR_ENTITY_NOT_FINISHED: 362: errmsg = "EntityValue: \" or ' expected"; 363: break; 364: case XML_ERR_ATTRIBUTE_NOT_STARTED: 365: errmsg = "AttValue: \" or ' expected"; 366: break; 367: case XML_ERR_LT_IN_ATTRIBUTE: 368: errmsg = "Unescaped '<' not allowed in attributes values"; 369: break; 370: case XML_ERR_LITERAL_NOT_STARTED: 371: errmsg = "SystemLiteral \" or ' expected"; 372: break; 373: case XML_ERR_LITERAL_NOT_FINISHED: 374: errmsg = "Unfinished System or Public ID \" or ' expected"; 375: break; 376: case XML_ERR_MISPLACED_CDATA_END: 377: errmsg = "Sequence ']]>' not allowed in content"; 378: break; 379: case XML_ERR_URI_REQUIRED: 380: errmsg = "SYSTEM or PUBLIC, the URI is missing"; 381: break; 382: case XML_ERR_PUBID_REQUIRED: 383: errmsg = "PUBLIC, the Public Identifier is missing"; 384: break; 385: case XML_ERR_HYPHEN_IN_COMMENT: 386: errmsg = "Comment must not contain '--' (double-hyphen)"; 387: break; 388: case XML_ERR_PI_NOT_STARTED: 389: errmsg = "xmlParsePI : no target name"; 390: break; 391: case XML_ERR_RESERVED_XML_NAME: 392: errmsg = "Invalid PI name"; 393: break; 394: case XML_ERR_NOTATION_NOT_STARTED: 395: errmsg = "NOTATION: Name expected here"; 396: break; 397: case XML_ERR_NOTATION_NOT_FINISHED: 398: errmsg = "'>' required to close NOTATION declaration"; 399: break; 400: case XML_ERR_VALUE_REQUIRED: 401: errmsg = "Entity value required"; 402: break; 403: case XML_ERR_URI_FRAGMENT: 404: errmsg = "Fragment not allowed"; 405: break; 406: case XML_ERR_ATTLIST_NOT_STARTED: 407: errmsg = "'(' required to start ATTLIST enumeration"; 408: break; 409: case XML_ERR_NMTOKEN_REQUIRED: 410: errmsg = "NmToken expected in ATTLIST enumeration"; 411: break; 412: case XML_ERR_ATTLIST_NOT_FINISHED: 413: errmsg = "')' required to finish ATTLIST enumeration"; 414: break; 415: case XML_ERR_MIXED_NOT_STARTED: 416: errmsg = "MixedContentDecl : '|' or ')*' expected"; 417: break; 418: case XML_ERR_PCDATA_REQUIRED: 419: errmsg = "MixedContentDecl : '#PCDATA' expected"; 420: break; 421: case XML_ERR_ELEMCONTENT_NOT_STARTED: 422: errmsg = "ContentDecl : Name or '(' expected"; 423: break; 424: case XML_ERR_ELEMCONTENT_NOT_FINISHED: 425: errmsg = "ContentDecl : ',' '|' or ')' expected"; 426: break; 427: case XML_ERR_PEREF_IN_INT_SUBSET: 428: errmsg = 429: "PEReference: forbidden within markup decl in internal subset"; 430: break; 431: case XML_ERR_GT_REQUIRED: 432: errmsg = "expected '>'"; 433: break; 434: case XML_ERR_CONDSEC_INVALID: 435: errmsg = "XML conditional section '[' expected"; 436: break; 437: case XML_ERR_EXT_SUBSET_NOT_FINISHED: 438: errmsg = "Content error in the external subset"; 439: break; 440: case XML_ERR_CONDSEC_INVALID_KEYWORD: 441: errmsg = 442: "conditional section INCLUDE or IGNORE keyword expected"; 443: break; 444: case XML_ERR_CONDSEC_NOT_FINISHED: 445: errmsg = "XML conditional section not closed"; 446: break; 447: case XML_ERR_XMLDECL_NOT_STARTED: 448: errmsg = "Text declaration '<?xml' required"; 449: break; 450: case XML_ERR_XMLDECL_NOT_FINISHED: 451: errmsg = "parsing XML declaration: '?>' expected"; 452: break; 453: case XML_ERR_EXT_ENTITY_STANDALONE: 454: errmsg = "external parsed entities cannot be standalone"; 455: break; 456: case XML_ERR_ENTITYREF_SEMICOL_MISSING: 457: errmsg = "EntityRef: expecting ';'"; 458: break; 459: case XML_ERR_DOCTYPE_NOT_FINISHED: 460: errmsg = "DOCTYPE improperly terminated"; 461: break; 462: case XML_ERR_LTSLASH_REQUIRED: 463: errmsg = "EndTag: '</' not found"; 464: break; 465: case XML_ERR_EQUAL_REQUIRED: 466: errmsg = "expected '='"; 467: break; 468: case XML_ERR_STRING_NOT_CLOSED: 469: errmsg = "String not closed expecting \" or '"; 470: break; 471: case XML_ERR_STRING_NOT_STARTED: 472: errmsg = "String not started expecting ' or \""; 473: break; 474: case XML_ERR_ENCODING_NAME: 475: errmsg = "Invalid XML encoding name"; 476: break; 477: case XML_ERR_STANDALONE_VALUE: 478: errmsg = "standalone accepts only 'yes' or 'no'"; 479: break; 480: case XML_ERR_DOCUMENT_EMPTY: 481: errmsg = "Document is empty"; 482: break; 483: case XML_ERR_DOCUMENT_END: 484: errmsg = "Extra content at the end of the document"; 485: break; 486: case XML_ERR_NOT_WELL_BALANCED: 487: errmsg = "chunk is not well balanced"; 488: break; 489: case XML_ERR_EXTRA_CONTENT: 490: errmsg = "extra content at the end of well balanced chunk"; 491: break; 492: case XML_ERR_VERSION_MISSING: 493: errmsg = "Malformed declaration expecting version"; 494: break; 495: case XML_ERR_NAME_TOO_LONG: 496: errmsg = "Name too long use XML_PARSE_HUGE option"; 497: break; 498: #if 0 499: case: 500: errmsg = ""; 501: break; 502: #endif 503: default: 504: errmsg = "Unregistered error message"; 505: } 506: if (info == NULL) 507: snprintf(errstr, 128, "%s\n", errmsg); 508: else 509: snprintf(errstr, 128, "%s: %%s\n", errmsg); 510: if (ctxt != NULL) 511: ctxt->errNo = error; 512: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 513: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 514: info); 515: if (ctxt != NULL) { 516: ctxt->wellFormed = 0; 517: if (ctxt->recovery == 0) 518: ctxt->disableSAX = 1; 519: } 520: } 521: 522: /** 523: * xmlFatalErrMsg: 524: * @ctxt: an XML parser context 525: * @error: the error number 526: * @msg: the error message 527: * 528: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 529: */ 530: static void 531: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 532: const char *msg) 533: { 534: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 535: (ctxt->instate == XML_PARSER_EOF)) 536: return; 537: if (ctxt != NULL) 538: ctxt->errNo = error; 539: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 540: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 541: if (ctxt != NULL) { 542: ctxt->wellFormed = 0; 543: if (ctxt->recovery == 0) 544: ctxt->disableSAX = 1; 545: } 546: } 547: 548: /** 549: * xmlWarningMsg: 550: * @ctxt: an XML parser context 551: * @error: the error number 552: * @msg: the error message 553: * @str1: extra data 554: * @str2: extra data 555: * 556: * Handle a warning. 557: */ 558: static void 559: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 560: const char *msg, const xmlChar *str1, const xmlChar *str2) 561: { 562: xmlStructuredErrorFunc schannel = NULL; 563: 564: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 565: (ctxt->instate == XML_PARSER_EOF)) 566: return; 567: if ((ctxt != NULL) && (ctxt->sax != NULL) && 568: (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569: schannel = ctxt->sax->serror; 570: if (ctxt != NULL) { 571: __xmlRaiseError(schannel, 572: (ctxt->sax) ? ctxt->sax->warning : NULL, 573: ctxt->userData, 574: ctxt, NULL, XML_FROM_PARSER, error, 575: XML_ERR_WARNING, NULL, 0, 576: (const char *) str1, (const char *) str2, NULL, 0, 0, 577: msg, (const char *) str1, (const char *) str2); 578: } else { 579: __xmlRaiseError(schannel, NULL, NULL, 580: ctxt, NULL, XML_FROM_PARSER, error, 581: XML_ERR_WARNING, NULL, 0, 582: (const char *) str1, (const char *) str2, NULL, 0, 0, 583: msg, (const char *) str1, (const char *) str2); 584: } 585: } 586: 587: /** 588: * xmlValidityError: 589: * @ctxt: an XML parser context 590: * @error: the error number 591: * @msg: the error message 592: * @str1: extra data 593: * 594: * Handle a validity error. 595: */ 596: static void 597: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 598: const char *msg, const xmlChar *str1, const xmlChar *str2) 599: { 600: xmlStructuredErrorFunc schannel = NULL; 601: 602: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 603: (ctxt->instate == XML_PARSER_EOF)) 604: return; 605: if (ctxt != NULL) { 606: ctxt->errNo = error; 607: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 608: schannel = ctxt->sax->serror; 609: } 610: if (ctxt != NULL) { 611: __xmlRaiseError(schannel, 612: ctxt->vctxt.error, ctxt->vctxt.userData, 613: ctxt, NULL, XML_FROM_DTD, error, 614: XML_ERR_ERROR, NULL, 0, (const char *) str1, 615: (const char *) str2, NULL, 0, 0, 616: msg, (const char *) str1, (const char *) str2); 617: ctxt->valid = 0; 618: } else { 619: __xmlRaiseError(schannel, NULL, NULL, 620: ctxt, NULL, XML_FROM_DTD, error, 621: XML_ERR_ERROR, NULL, 0, (const char *) str1, 622: (const char *) str2, NULL, 0, 0, 623: msg, (const char *) str1, (const char *) str2); 624: } 625: } 626: 627: /** 628: * xmlFatalErrMsgInt: 629: * @ctxt: an XML parser context 630: * @error: the error number 631: * @msg: the error message 632: * @val: an integer value 633: * 634: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 635: */ 636: static void 637: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 638: const char *msg, int val) 639: { 640: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 641: (ctxt->instate == XML_PARSER_EOF)) 642: return; 643: if (ctxt != NULL) 644: ctxt->errNo = error; 645: __xmlRaiseError(NULL, NULL, NULL, 646: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 647: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 648: if (ctxt != NULL) { 649: ctxt->wellFormed = 0; 650: if (ctxt->recovery == 0) 651: ctxt->disableSAX = 1; 652: } 653: } 654: 655: /** 656: * xmlFatalErrMsgStrIntStr: 657: * @ctxt: an XML parser context 658: * @error: the error number 659: * @msg: the error message 660: * @str1: an string info 661: * @val: an integer value 662: * @str2: an string info 663: * 664: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 665: */ 666: static void 667: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 668: const char *msg, const xmlChar *str1, int val, 669: const xmlChar *str2) 670: { 671: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 672: (ctxt->instate == XML_PARSER_EOF)) 673: return; 674: if (ctxt != NULL) 675: ctxt->errNo = error; 676: __xmlRaiseError(NULL, NULL, NULL, 677: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 678: NULL, 0, (const char *) str1, (const char *) str2, 679: NULL, val, 0, msg, str1, val, str2); 680: if (ctxt != NULL) { 681: ctxt->wellFormed = 0; 682: if (ctxt->recovery == 0) 683: ctxt->disableSAX = 1; 684: } 685: } 686: 687: /** 688: * xmlFatalErrMsgStr: 689: * @ctxt: an XML parser context 690: * @error: the error number 691: * @msg: the error message 692: * @val: a string value 693: * 694: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 695: */ 696: static void 697: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 698: const char *msg, const xmlChar * val) 699: { 700: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 701: (ctxt->instate == XML_PARSER_EOF)) 702: return; 703: if (ctxt != NULL) 704: ctxt->errNo = error; 705: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 706: XML_FROM_PARSER, error, XML_ERR_FATAL, 707: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 708: val); 709: if (ctxt != NULL) { 710: ctxt->wellFormed = 0; 711: if (ctxt->recovery == 0) 712: ctxt->disableSAX = 1; 713: } 714: } 715: 716: /** 717: * xmlErrMsgStr: 718: * @ctxt: an XML parser context 719: * @error: the error number 720: * @msg: the error message 721: * @val: a string value 722: * 723: * Handle a non fatal parser error 724: */ 725: static void 726: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 727: const char *msg, const xmlChar * val) 728: { 729: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 730: (ctxt->instate == XML_PARSER_EOF)) 731: return; 732: if (ctxt != NULL) 733: ctxt->errNo = error; 734: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 735: XML_FROM_PARSER, error, XML_ERR_ERROR, 736: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 737: val); 738: } 739: 740: /** 741: * xmlNsErr: 742: * @ctxt: an XML parser context 743: * @error: the error number 744: * @msg: the message 745: * @info1: extra information string 746: * @info2: extra information string 747: * 748: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 749: */ 750: static void 751: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 752: const char *msg, 753: const xmlChar * info1, const xmlChar * info2, 754: const xmlChar * info3) 755: { 756: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 757: (ctxt->instate == XML_PARSER_EOF)) 758: return; 759: if (ctxt != NULL) 760: ctxt->errNo = error; 761: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 762: XML_ERR_ERROR, NULL, 0, (const char *) info1, 763: (const char *) info2, (const char *) info3, 0, 0, msg, 764: info1, info2, info3); 765: if (ctxt != NULL) 766: ctxt->nsWellFormed = 0; 767: } 768: 769: /** 770: * xmlNsWarn 771: * @ctxt: an XML parser context 772: * @error: the error number 773: * @msg: the message 774: * @info1: extra information string 775: * @info2: extra information string 776: * 777: * Handle a namespace warning error 778: */ 779: static void 780: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 781: const char *msg, 782: const xmlChar * info1, const xmlChar * info2, 783: const xmlChar * info3) 784: { 785: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 786: (ctxt->instate == XML_PARSER_EOF)) 787: return; 788: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 789: XML_ERR_WARNING, NULL, 0, (const char *) info1, 790: (const char *) info2, (const char *) info3, 0, 0, msg, 791: info1, info2, info3); 792: } 793: 794: /************************************************************************ 795: * * 796: * Library wide options * 797: * * 798: ************************************************************************/ 799: 800: /** 801: * xmlHasFeature: 802: * @feature: the feature to be examined 803: * 804: * Examines if the library has been compiled with a given feature. 805: * 806: * Returns a non-zero value if the feature exist, otherwise zero. 807: * Returns zero (0) if the feature does not exist or an unknown 808: * unknown feature is requested, non-zero otherwise. 809: */ 810: int 811: xmlHasFeature(xmlFeature feature) 812: { 813: switch (feature) { 814: case XML_WITH_THREAD: 815: #ifdef LIBXML_THREAD_ENABLED 816: return(1); 817: #else 818: return(0); 819: #endif 820: case XML_WITH_TREE: 821: #ifdef LIBXML_TREE_ENABLED 822: return(1); 823: #else 824: return(0); 825: #endif 826: case XML_WITH_OUTPUT: 827: #ifdef LIBXML_OUTPUT_ENABLED 828: return(1); 829: #else 830: return(0); 831: #endif 832: case XML_WITH_PUSH: 833: #ifdef LIBXML_PUSH_ENABLED 834: return(1); 835: #else 836: return(0); 837: #endif 838: case XML_WITH_READER: 839: #ifdef LIBXML_READER_ENABLED 840: return(1); 841: #else 842: return(0); 843: #endif 844: case XML_WITH_PATTERN: 845: #ifdef LIBXML_PATTERN_ENABLED 846: return(1); 847: #else 848: return(0); 849: #endif 850: case XML_WITH_WRITER: 851: #ifdef LIBXML_WRITER_ENABLED 852: return(1); 853: #else 854: return(0); 855: #endif 856: case XML_WITH_SAX1: 857: #ifdef LIBXML_SAX1_ENABLED 858: return(1); 859: #else 860: return(0); 861: #endif 862: case XML_WITH_FTP: 863: #ifdef LIBXML_FTP_ENABLED 864: return(1); 865: #else 866: return(0); 867: #endif 868: case XML_WITH_HTTP: 869: #ifdef LIBXML_HTTP_ENABLED 870: return(1); 871: #else 872: return(0); 873: #endif 874: case XML_WITH_VALID: 875: #ifdef LIBXML_VALID_ENABLED 876: return(1); 877: #else 878: return(0); 879: #endif 880: case XML_WITH_HTML: 881: #ifdef LIBXML_HTML_ENABLED 882: return(1); 883: #else 884: return(0); 885: #endif 886: case XML_WITH_LEGACY: 887: #ifdef LIBXML_LEGACY_ENABLED 888: return(1); 889: #else 890: return(0); 891: #endif 892: case XML_WITH_C14N: 893: #ifdef LIBXML_C14N_ENABLED 894: return(1); 895: #else 896: return(0); 897: #endif 898: case XML_WITH_CATALOG: 899: #ifdef LIBXML_CATALOG_ENABLED 900: return(1); 901: #else 902: return(0); 903: #endif 904: case XML_WITH_XPATH: 905: #ifdef LIBXML_XPATH_ENABLED 906: return(1); 907: #else 908: return(0); 909: #endif 910: case XML_WITH_XPTR: 911: #ifdef LIBXML_XPTR_ENABLED 912: return(1); 913: #else 914: return(0); 915: #endif 916: case XML_WITH_XINCLUDE: 917: #ifdef LIBXML_XINCLUDE_ENABLED 918: return(1); 919: #else 920: return(0); 921: #endif 922: case XML_WITH_ICONV: 923: #ifdef LIBXML_ICONV_ENABLED 924: return(1); 925: #else 926: return(0); 927: #endif 928: case XML_WITH_ISO8859X: 929: #ifdef LIBXML_ISO8859X_ENABLED 930: return(1); 931: #else 932: return(0); 933: #endif 934: case XML_WITH_UNICODE: 935: #ifdef LIBXML_UNICODE_ENABLED 936: return(1); 937: #else 938: return(0); 939: #endif 940: case XML_WITH_REGEXP: 941: #ifdef LIBXML_REGEXP_ENABLED 942: return(1); 943: #else 944: return(0); 945: #endif 946: case XML_WITH_AUTOMATA: 947: #ifdef LIBXML_AUTOMATA_ENABLED 948: return(1); 949: #else 950: return(0); 951: #endif 952: case XML_WITH_EXPR: 953: #ifdef LIBXML_EXPR_ENABLED 954: return(1); 955: #else 956: return(0); 957: #endif 958: case XML_WITH_SCHEMAS: 959: #ifdef LIBXML_SCHEMAS_ENABLED 960: return(1); 961: #else 962: return(0); 963: #endif 964: case XML_WITH_SCHEMATRON: 965: #ifdef LIBXML_SCHEMATRON_ENABLED 966: return(1); 967: #else 968: return(0); 969: #endif 970: case XML_WITH_MODULES: 971: #ifdef LIBXML_MODULES_ENABLED 972: return(1); 973: #else 974: return(0); 975: #endif 976: case XML_WITH_DEBUG: 977: #ifdef LIBXML_DEBUG_ENABLED 978: return(1); 979: #else 980: return(0); 981: #endif 982: case XML_WITH_DEBUG_MEM: 983: #ifdef DEBUG_MEMORY_LOCATION 984: return(1); 985: #else 986: return(0); 987: #endif 988: case XML_WITH_DEBUG_RUN: 989: #ifdef LIBXML_DEBUG_RUNTIME 990: return(1); 991: #else 992: return(0); 993: #endif 994: case XML_WITH_ZLIB: 995: #ifdef LIBXML_ZLIB_ENABLED 996: return(1); 997: #else 998: return(0); 999: #endif 1000: case XML_WITH_LZMA: 1001: #ifdef LIBXML_LZMA_ENABLED 1002: return(1); 1003: #else 1004: return(0); 1005: #endif 1006: case XML_WITH_ICU: 1007: #ifdef LIBXML_ICU_ENABLED 1008: return(1); 1009: #else 1010: return(0); 1011: #endif 1012: default: 1013: break; 1014: } 1015: return(0); 1016: } 1017: 1018: /************************************************************************ 1019: * * 1020: * SAX2 defaulted attributes handling * 1021: * * 1022: ************************************************************************/ 1023: 1024: /** 1025: * xmlDetectSAX2: 1026: * @ctxt: an XML parser context 1027: * 1028: * Do the SAX2 detection and specific intialization 1029: */ 1030: static void 1031: xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1032: if (ctxt == NULL) return; 1033: #ifdef LIBXML_SAX1_ENABLED 1034: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1035: ((ctxt->sax->startElementNs != NULL) || 1036: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1037: #else 1038: ctxt->sax2 = 1; 1039: #endif /* LIBXML_SAX1_ENABLED */ 1040: 1041: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1042: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1043: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1044: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1045: (ctxt->str_xml_ns == NULL)) { 1046: xmlErrMemory(ctxt, NULL); 1047: } 1048: } 1049: 1050: typedef struct _xmlDefAttrs xmlDefAttrs; 1051: typedef xmlDefAttrs *xmlDefAttrsPtr; 1052: struct _xmlDefAttrs { 1053: int nbAttrs; /* number of defaulted attributes on that element */ 1054: int maxAttrs; /* the size of the array */ 1055: const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1056: }; 1057: 1058: /** 1059: * xmlAttrNormalizeSpace: 1060: * @src: the source string 1061: * @dst: the target string 1062: * 1063: * Normalize the space in non CDATA attribute values: 1064: * If the attribute type is not CDATA, then the XML processor MUST further 1065: * process the normalized attribute value by discarding any leading and 1066: * trailing space (#x20) characters, and by replacing sequences of space 1067: * (#x20) characters by a single space (#x20) character. 1068: * Note that the size of dst need to be at least src, and if one doesn't need 1069: * to preserve dst (and it doesn't come from a dictionary or read-only) then 1070: * passing src as dst is just fine. 1071: * 1072: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1073: * is needed. 1074: */ 1075: static xmlChar * 1076: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1077: { 1078: if ((src == NULL) || (dst == NULL)) 1079: return(NULL); 1080: 1081: while (*src == 0x20) src++; 1082: while (*src != 0) { 1083: if (*src == 0x20) { 1084: while (*src == 0x20) src++; 1085: if (*src != 0) 1086: *dst++ = 0x20; 1087: } else { 1088: *dst++ = *src++; 1089: } 1090: } 1091: *dst = 0; 1092: if (dst == src) 1093: return(NULL); 1094: return(dst); 1095: } 1096: 1097: /** 1098: * xmlAttrNormalizeSpace2: 1099: * @src: the source string 1100: * 1101: * Normalize the space in non CDATA attribute values, a slightly more complex 1102: * front end to avoid allocation problems when running on attribute values 1103: * coming from the input. 1104: * 1105: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1106: * is needed. 1107: */ 1108: static const xmlChar * 1109: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1110: { 1111: int i; 1112: int remove_head = 0; 1113: int need_realloc = 0; 1114: const xmlChar *cur; 1115: 1116: if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1117: return(NULL); 1118: i = *len; 1119: if (i <= 0) 1120: return(NULL); 1121: 1122: cur = src; 1123: while (*cur == 0x20) { 1124: cur++; 1125: remove_head++; 1126: } 1127: while (*cur != 0) { 1128: if (*cur == 0x20) { 1129: cur++; 1130: if ((*cur == 0x20) || (*cur == 0)) { 1131: need_realloc = 1; 1132: break; 1133: } 1134: } else 1135: cur++; 1136: } 1137: if (need_realloc) { 1138: xmlChar *ret; 1139: 1140: ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1141: if (ret == NULL) { 1142: xmlErrMemory(ctxt, NULL); 1143: return(NULL); 1144: } 1145: xmlAttrNormalizeSpace(ret, ret); 1146: *len = (int) strlen((const char *)ret); 1147: return(ret); 1148: } else if (remove_head) { 1149: *len -= remove_head; 1150: memmove(src, src + remove_head, 1 + *len); 1151: return(src); 1152: } 1153: return(NULL); 1154: } 1155: 1156: /** 1157: * xmlAddDefAttrs: 1158: * @ctxt: an XML parser context 1159: * @fullname: the element fullname 1160: * @fullattr: the attribute fullname 1161: * @value: the attribute value 1162: * 1163: * Add a defaulted attribute for an element 1164: */ 1165: static void 1166: xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1167: const xmlChar *fullname, 1168: const xmlChar *fullattr, 1169: const xmlChar *value) { 1170: xmlDefAttrsPtr defaults; 1171: int len; 1172: const xmlChar *name; 1173: const xmlChar *prefix; 1174: 1175: /* 1176: * Allows to detect attribute redefinitions 1177: */ 1178: if (ctxt->attsSpecial != NULL) { 1179: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1180: return; 1181: } 1182: 1183: if (ctxt->attsDefault == NULL) { 1184: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1185: if (ctxt->attsDefault == NULL) 1186: goto mem_error; 1187: } 1188: 1189: /* 1190: * split the element name into prefix:localname , the string found 1191: * are within the DTD and then not associated to namespace names. 1192: */ 1193: name = xmlSplitQName3(fullname, &len); 1194: if (name == NULL) { 1195: name = xmlDictLookup(ctxt->dict, fullname, -1); 1196: prefix = NULL; 1197: } else { 1198: name = xmlDictLookup(ctxt->dict, name, -1); 1199: prefix = xmlDictLookup(ctxt->dict, fullname, len); 1200: } 1201: 1202: /* 1203: * make sure there is some storage 1204: */ 1205: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1206: if (defaults == NULL) { 1207: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1208: (4 * 5) * sizeof(const xmlChar *)); 1209: if (defaults == NULL) 1210: goto mem_error; 1211: defaults->nbAttrs = 0; 1212: defaults->maxAttrs = 4; 1213: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1214: defaults, NULL) < 0) { 1215: xmlFree(defaults); 1216: goto mem_error; 1217: } 1218: } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1219: xmlDefAttrsPtr temp; 1220: 1221: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1222: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1223: if (temp == NULL) 1224: goto mem_error; 1225: defaults = temp; 1226: defaults->maxAttrs *= 2; 1227: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1228: defaults, NULL) < 0) { 1229: xmlFree(defaults); 1230: goto mem_error; 1231: } 1232: } 1233: 1234: /* 1235: * Split the element name into prefix:localname , the string found 1236: * are within the DTD and hen not associated to namespace names. 1237: */ 1238: name = xmlSplitQName3(fullattr, &len); 1239: if (name == NULL) { 1240: name = xmlDictLookup(ctxt->dict, fullattr, -1); 1241: prefix = NULL; 1242: } else { 1243: name = xmlDictLookup(ctxt->dict, name, -1); 1244: prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1245: } 1246: 1247: defaults->values[5 * defaults->nbAttrs] = name; 1248: defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1249: /* intern the string and precompute the end */ 1250: len = xmlStrlen(value); 1251: value = xmlDictLookup(ctxt->dict, value, len); 1252: defaults->values[5 * defaults->nbAttrs + 2] = value; 1253: defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1254: if (ctxt->external) 1255: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1256: else 1257: defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1258: defaults->nbAttrs++; 1259: 1260: return; 1261: 1262: mem_error: 1263: xmlErrMemory(ctxt, NULL); 1264: return; 1265: } 1266: 1267: /** 1268: * xmlAddSpecialAttr: 1269: * @ctxt: an XML parser context 1270: * @fullname: the element fullname 1271: * @fullattr: the attribute fullname 1272: * @type: the attribute type 1273: * 1274: * Register this attribute type 1275: */ 1276: static void 1277: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1278: const xmlChar *fullname, 1279: const xmlChar *fullattr, 1280: int type) 1281: { 1282: if (ctxt->attsSpecial == NULL) { 1283: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1284: if (ctxt->attsSpecial == NULL) 1285: goto mem_error; 1286: } 1287: 1288: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1289: return; 1290: 1291: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1292: (void *) (long) type); 1293: return; 1294: 1295: mem_error: 1296: xmlErrMemory(ctxt, NULL); 1297: return; 1298: } 1299: 1300: /** 1301: * xmlCleanSpecialAttrCallback: 1302: * 1303: * Removes CDATA attributes from the special attribute table 1304: */ 1305: static void 1306: xmlCleanSpecialAttrCallback(void *payload, void *data, 1307: const xmlChar *fullname, const xmlChar *fullattr, 1308: const xmlChar *unused ATTRIBUTE_UNUSED) { 1309: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1310: 1311: if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1312: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1313: } 1314: } 1315: 1316: /** 1317: * xmlCleanSpecialAttr: 1318: * @ctxt: an XML parser context 1319: * 1320: * Trim the list of attributes defined to remove all those of type 1321: * CDATA as they are not special. This call should be done when finishing 1322: * to parse the DTD and before starting to parse the document root. 1323: */ 1324: static void 1325: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1326: { 1327: if (ctxt->attsSpecial == NULL) 1328: return; 1329: 1330: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1331: 1332: if (xmlHashSize(ctxt->attsSpecial) == 0) { 1333: xmlHashFree(ctxt->attsSpecial, NULL); 1334: ctxt->attsSpecial = NULL; 1335: } 1336: return; 1337: } 1338: 1339: /** 1340: * xmlCheckLanguageID: 1341: * @lang: pointer to the string value 1342: * 1343: * Checks that the value conforms to the LanguageID production: 1344: * 1345: * NOTE: this is somewhat deprecated, those productions were removed from 1346: * the XML Second edition. 1347: * 1348: * [33] LanguageID ::= Langcode ('-' Subcode)* 1349: * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1350: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1351: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1352: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1353: * [38] Subcode ::= ([a-z] | [A-Z])+ 1354: * 1355: * The current REC reference the sucessors of RFC 1766, currently 5646 1356: * 1357: * http://www.rfc-editor.org/rfc/rfc5646.txt 1358: * langtag = language 1359: * ["-" script] 1360: * ["-" region] 1361: * *("-" variant) 1362: * *("-" extension) 1363: * ["-" privateuse] 1364: * language = 2*3ALPHA ; shortest ISO 639 code 1365: * ["-" extlang] ; sometimes followed by 1366: * ; extended language subtags 1367: * / 4ALPHA ; or reserved for future use 1368: * / 5*8ALPHA ; or registered language subtag 1369: * 1370: * extlang = 3ALPHA ; selected ISO 639 codes 1371: * *2("-" 3ALPHA) ; permanently reserved 1372: * 1373: * script = 4ALPHA ; ISO 15924 code 1374: * 1375: * region = 2ALPHA ; ISO 3166-1 code 1376: * / 3DIGIT ; UN M.49 code 1377: * 1378: * variant = 5*8alphanum ; registered variants 1379: * / (DIGIT 3alphanum) 1380: * 1381: * extension = singleton 1*("-" (2*8alphanum)) 1382: * 1383: * ; Single alphanumerics 1384: * ; "x" reserved for private use 1385: * singleton = DIGIT ; 0 - 9 1386: * / %x41-57 ; A - W 1387: * / %x59-5A ; Y - Z 1388: * / %x61-77 ; a - w 1389: * / %x79-7A ; y - z 1390: * 1391: * it sounds right to still allow Irregular i-xxx IANA and user codes too 1392: * The parser below doesn't try to cope with extension or privateuse 1393: * that could be added but that's not interoperable anyway 1394: * 1395: * Returns 1 if correct 0 otherwise 1396: **/ 1397: int 1398: xmlCheckLanguageID(const xmlChar * lang) 1399: { 1400: const xmlChar *cur = lang, *nxt; 1401: 1402: if (cur == NULL) 1403: return (0); 1404: if (((cur[0] == 'i') && (cur[1] == '-')) || 1405: ((cur[0] == 'I') && (cur[1] == '-')) || 1406: ((cur[0] == 'x') && (cur[1] == '-')) || 1407: ((cur[0] == 'X') && (cur[1] == '-'))) { 1408: /* 1409: * Still allow IANA code and user code which were coming 1410: * from the previous version of the XML-1.0 specification 1411: * it's deprecated but we should not fail 1412: */ 1413: cur += 2; 1414: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1415: ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1416: cur++; 1417: return(cur[0] == 0); 1418: } 1419: nxt = cur; 1420: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1421: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1422: nxt++; 1423: if (nxt - cur >= 4) { 1424: /* 1425: * Reserved 1426: */ 1427: if ((nxt - cur > 8) || (nxt[0] != 0)) 1428: return(0); 1429: return(1); 1430: } 1431: if (nxt - cur < 2) 1432: return(0); 1433: /* we got an ISO 639 code */ 1434: if (nxt[0] == 0) 1435: return(1); 1436: if (nxt[0] != '-') 1437: return(0); 1438: 1439: nxt++; 1440: cur = nxt; 1441: /* now we can have extlang or script or region or variant */ 1442: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1443: goto region_m49; 1444: 1445: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1446: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1447: nxt++; 1448: if (nxt - cur == 4) 1449: goto script; 1450: if (nxt - cur == 2) 1451: goto region; 1452: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1453: goto variant; 1454: if (nxt - cur != 3) 1455: return(0); 1456: /* we parsed an extlang */ 1457: if (nxt[0] == 0) 1458: return(1); 1459: if (nxt[0] != '-') 1460: return(0); 1461: 1462: nxt++; 1463: cur = nxt; 1464: /* now we can have script or region or variant */ 1465: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1466: goto region_m49; 1467: 1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1470: nxt++; 1471: if (nxt - cur == 2) 1472: goto region; 1473: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1474: goto variant; 1475: if (nxt - cur != 4) 1476: return(0); 1477: /* we parsed a script */ 1478: script: 1479: if (nxt[0] == 0) 1480: return(1); 1481: if (nxt[0] != '-') 1482: return(0); 1483: 1484: nxt++; 1485: cur = nxt; 1486: /* now we can have region or variant */ 1487: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1488: goto region_m49; 1489: 1490: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1491: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1492: nxt++; 1493: 1494: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1495: goto variant; 1496: if (nxt - cur != 2) 1497: return(0); 1498: /* we parsed a region */ 1499: region: 1500: if (nxt[0] == 0) 1501: return(1); 1502: if (nxt[0] != '-') 1503: return(0); 1504: 1505: nxt++; 1506: cur = nxt; 1507: /* now we can just have a variant */ 1508: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1509: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1510: nxt++; 1511: 1512: if ((nxt - cur < 5) || (nxt - cur > 8)) 1513: return(0); 1514: 1515: /* we parsed a variant */ 1516: variant: 1517: if (nxt[0] == 0) 1518: return(1); 1519: if (nxt[0] != '-') 1520: return(0); 1521: /* extensions and private use subtags not checked */ 1522: return (1); 1523: 1524: region_m49: 1525: if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1526: ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1527: nxt += 3; 1528: goto region; 1529: } 1530: return(0); 1531: } 1532: 1533: /************************************************************************ 1534: * * 1535: * Parser stacks related functions and macros * 1536: * * 1537: ************************************************************************/ 1538: 1539: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1540: const xmlChar ** str); 1541: 1542: #ifdef SAX2 1543: /** 1544: * nsPush: 1545: * @ctxt: an XML parser context 1546: * @prefix: the namespace prefix or NULL 1547: * @URL: the namespace name 1548: * 1549: * Pushes a new parser namespace on top of the ns stack 1550: * 1551: * Returns -1 in case of error, -2 if the namespace should be discarded 1552: * and the index in the stack otherwise. 1553: */ 1554: static int 1555: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1556: { 1557: if (ctxt->options & XML_PARSE_NSCLEAN) { 1558: int i; 1559: for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1560: if (ctxt->nsTab[i] == prefix) { 1561: /* in scope */ 1562: if (ctxt->nsTab[i + 1] == URL) 1563: return(-2); 1564: /* out of scope keep it */ 1565: break; 1566: } 1567: } 1568: } 1569: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1570: ctxt->nsMax = 10; 1571: ctxt->nsNr = 0; 1572: ctxt->nsTab = (const xmlChar **) 1573: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1574: if (ctxt->nsTab == NULL) { 1575: xmlErrMemory(ctxt, NULL); 1576: ctxt->nsMax = 0; 1577: return (-1); 1578: } 1579: } else if (ctxt->nsNr >= ctxt->nsMax) { 1580: const xmlChar ** tmp; 1581: ctxt->nsMax *= 2; 1582: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1583: ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1584: if (tmp == NULL) { 1585: xmlErrMemory(ctxt, NULL); 1586: ctxt->nsMax /= 2; 1587: return (-1); 1588: } 1589: ctxt->nsTab = tmp; 1590: } 1591: ctxt->nsTab[ctxt->nsNr++] = prefix; 1592: ctxt->nsTab[ctxt->nsNr++] = URL; 1593: return (ctxt->nsNr); 1594: } 1595: /** 1596: * nsPop: 1597: * @ctxt: an XML parser context 1598: * @nr: the number to pop 1599: * 1600: * Pops the top @nr parser prefix/namespace from the ns stack 1601: * 1602: * Returns the number of namespaces removed 1603: */ 1604: static int 1605: nsPop(xmlParserCtxtPtr ctxt, int nr) 1606: { 1607: int i; 1608: 1609: if (ctxt->nsTab == NULL) return(0); 1610: if (ctxt->nsNr < nr) { 1611: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1612: nr = ctxt->nsNr; 1613: } 1614: if (ctxt->nsNr <= 0) 1615: return (0); 1616: 1617: for (i = 0;i < nr;i++) { 1618: ctxt->nsNr--; 1619: ctxt->nsTab[ctxt->nsNr] = NULL; 1620: } 1621: return(nr); 1622: } 1623: #endif 1624: 1625: static int 1626: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1627: const xmlChar **atts; 1628: int *attallocs; 1629: int maxatts; 1630: 1631: if (ctxt->atts == NULL) { 1632: maxatts = 55; /* allow for 10 attrs by default */ 1633: atts = (const xmlChar **) 1634: xmlMalloc(maxatts * sizeof(xmlChar *)); 1635: if (atts == NULL) goto mem_error; 1636: ctxt->atts = atts; 1637: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1638: if (attallocs == NULL) goto mem_error; 1639: ctxt->attallocs = attallocs; 1640: ctxt->maxatts = maxatts; 1641: } else if (nr + 5 > ctxt->maxatts) { 1642: maxatts = (nr + 5) * 2; 1643: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1644: maxatts * sizeof(const xmlChar *)); 1645: if (atts == NULL) goto mem_error; 1646: ctxt->atts = atts; 1647: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1648: (maxatts / 5) * sizeof(int)); 1649: if (attallocs == NULL) goto mem_error; 1650: ctxt->attallocs = attallocs; 1651: ctxt->maxatts = maxatts; 1652: } 1653: return(ctxt->maxatts); 1654: mem_error: 1655: xmlErrMemory(ctxt, NULL); 1656: return(-1); 1657: } 1658: 1659: /** 1660: * inputPush: 1661: * @ctxt: an XML parser context 1662: * @value: the parser input 1663: * 1664: * Pushes a new parser input on top of the input stack 1665: * 1666: * Returns -1 in case of error, the index in the stack otherwise 1667: */ 1668: int 1669: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1670: { 1671: if ((ctxt == NULL) || (value == NULL)) 1672: return(-1); 1673: if (ctxt->inputNr >= ctxt->inputMax) { 1674: ctxt->inputMax *= 2; 1675: ctxt->inputTab = 1676: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1677: ctxt->inputMax * 1678: sizeof(ctxt->inputTab[0])); 1679: if (ctxt->inputTab == NULL) { 1680: xmlErrMemory(ctxt, NULL); 1681: xmlFreeInputStream(value); 1682: ctxt->inputMax /= 2; 1683: value = NULL; 1684: return (-1); 1685: } 1686: } 1687: ctxt->inputTab[ctxt->inputNr] = value; 1688: ctxt->input = value; 1689: return (ctxt->inputNr++); 1690: } 1691: /** 1692: * inputPop: 1693: * @ctxt: an XML parser context 1694: * 1695: * Pops the top parser input from the input stack 1696: * 1697: * Returns the input just removed 1698: */ 1699: xmlParserInputPtr 1700: inputPop(xmlParserCtxtPtr ctxt) 1701: { 1702: xmlParserInputPtr ret; 1703: 1704: if (ctxt == NULL) 1705: return(NULL); 1706: if (ctxt->inputNr <= 0) 1707: return (NULL); 1708: ctxt->inputNr--; 1709: if (ctxt->inputNr > 0) 1710: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1711: else 1712: ctxt->input = NULL; 1713: ret = ctxt->inputTab[ctxt->inputNr]; 1714: ctxt->inputTab[ctxt->inputNr] = NULL; 1715: return (ret); 1716: } 1717: /** 1718: * nodePush: 1719: * @ctxt: an XML parser context 1720: * @value: the element node 1721: * 1722: * Pushes a new element node on top of the node stack 1723: * 1724: * Returns -1 in case of error, the index in the stack otherwise 1725: */ 1726: int 1727: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1728: { 1729: if (ctxt == NULL) return(0); 1730: if (ctxt->nodeNr >= ctxt->nodeMax) { 1731: xmlNodePtr *tmp; 1732: 1733: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1734: ctxt->nodeMax * 2 * 1735: sizeof(ctxt->nodeTab[0])); 1736: if (tmp == NULL) { 1737: xmlErrMemory(ctxt, NULL); 1738: return (-1); 1739: } 1740: ctxt->nodeTab = tmp; 1741: ctxt->nodeMax *= 2; 1742: } 1743: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1744: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1745: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1746: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1747: xmlParserMaxDepth); 1748: ctxt->instate = XML_PARSER_EOF; 1749: return(-1); 1750: } 1751: ctxt->nodeTab[ctxt->nodeNr] = value; 1752: ctxt->node = value; 1753: return (ctxt->nodeNr++); 1754: } 1755: 1756: /** 1757: * nodePop: 1758: * @ctxt: an XML parser context 1759: * 1760: * Pops the top element node from the node stack 1761: * 1762: * Returns the node just removed 1763: */ 1764: xmlNodePtr 1765: nodePop(xmlParserCtxtPtr ctxt) 1766: { 1767: xmlNodePtr ret; 1768: 1769: if (ctxt == NULL) return(NULL); 1770: if (ctxt->nodeNr <= 0) 1771: return (NULL); 1772: ctxt->nodeNr--; 1773: if (ctxt->nodeNr > 0) 1774: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1775: else 1776: ctxt->node = NULL; 1777: ret = ctxt->nodeTab[ctxt->nodeNr]; 1778: ctxt->nodeTab[ctxt->nodeNr] = NULL; 1779: return (ret); 1780: } 1781: 1782: #ifdef LIBXML_PUSH_ENABLED 1783: /** 1784: * nameNsPush: 1785: * @ctxt: an XML parser context 1786: * @value: the element name 1787: * @prefix: the element prefix 1788: * @URI: the element namespace name 1789: * 1790: * Pushes a new element name/prefix/URL on top of the name stack 1791: * 1792: * Returns -1 in case of error, the index in the stack otherwise 1793: */ 1794: static int 1795: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1796: const xmlChar *prefix, const xmlChar *URI, int nsNr) 1797: { 1798: if (ctxt->nameNr >= ctxt->nameMax) { 1799: const xmlChar * *tmp; 1800: void **tmp2; 1801: ctxt->nameMax *= 2; 1802: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1803: ctxt->nameMax * 1804: sizeof(ctxt->nameTab[0])); 1805: if (tmp == NULL) { 1806: ctxt->nameMax /= 2; 1807: goto mem_error; 1808: } 1809: ctxt->nameTab = tmp; 1810: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1811: ctxt->nameMax * 3 * 1812: sizeof(ctxt->pushTab[0])); 1813: if (tmp2 == NULL) { 1814: ctxt->nameMax /= 2; 1815: goto mem_error; 1816: } 1817: ctxt->pushTab = tmp2; 1818: } 1819: ctxt->nameTab[ctxt->nameNr] = value; 1820: ctxt->name = value; 1821: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1822: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1823: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1824: return (ctxt->nameNr++); 1825: mem_error: 1826: xmlErrMemory(ctxt, NULL); 1827: return (-1); 1828: } 1829: /** 1830: * nameNsPop: 1831: * @ctxt: an XML parser context 1832: * 1833: * Pops the top element/prefix/URI name from the name stack 1834: * 1835: * Returns the name just removed 1836: */ 1837: static const xmlChar * 1838: nameNsPop(xmlParserCtxtPtr ctxt) 1839: { 1840: const xmlChar *ret; 1841: 1842: if (ctxt->nameNr <= 0) 1843: return (NULL); 1844: ctxt->nameNr--; 1845: if (ctxt->nameNr > 0) 1846: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1847: else 1848: ctxt->name = NULL; 1849: ret = ctxt->nameTab[ctxt->nameNr]; 1850: ctxt->nameTab[ctxt->nameNr] = NULL; 1851: return (ret); 1852: } 1853: #endif /* LIBXML_PUSH_ENABLED */ 1854: 1855: /** 1856: * namePush: 1857: * @ctxt: an XML parser context 1858: * @value: the element name 1859: * 1860: * Pushes a new element name on top of the name stack 1861: * 1862: * Returns -1 in case of error, the index in the stack otherwise 1863: */ 1864: int 1865: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1866: { 1867: if (ctxt == NULL) return (-1); 1868: 1869: if (ctxt->nameNr >= ctxt->nameMax) { 1870: const xmlChar * *tmp; 1871: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1872: ctxt->nameMax * 2 * 1873: sizeof(ctxt->nameTab[0])); 1874: if (tmp == NULL) { 1875: goto mem_error; 1876: } 1877: ctxt->nameTab = tmp; 1878: ctxt->nameMax *= 2; 1879: } 1880: ctxt->nameTab[ctxt->nameNr] = value; 1881: ctxt->name = value; 1882: return (ctxt->nameNr++); 1883: mem_error: 1884: xmlErrMemory(ctxt, NULL); 1885: return (-1); 1886: } 1887: /** 1888: * namePop: 1889: * @ctxt: an XML parser context 1890: * 1891: * Pops the top element name from the name stack 1892: * 1893: * Returns the name just removed 1894: */ 1895: const xmlChar * 1896: namePop(xmlParserCtxtPtr ctxt) 1897: { 1898: const xmlChar *ret; 1899: 1900: if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1901: return (NULL); 1902: ctxt->nameNr--; 1903: if (ctxt->nameNr > 0) 1904: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1905: else 1906: ctxt->name = NULL; 1907: ret = ctxt->nameTab[ctxt->nameNr]; 1908: ctxt->nameTab[ctxt->nameNr] = NULL; 1909: return (ret); 1910: } 1911: 1912: static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1913: if (ctxt->spaceNr >= ctxt->spaceMax) { 1914: int *tmp; 1915: 1916: ctxt->spaceMax *= 2; 1917: tmp = (int *) xmlRealloc(ctxt->spaceTab, 1918: ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1919: if (tmp == NULL) { 1920: xmlErrMemory(ctxt, NULL); 1921: ctxt->spaceMax /=2; 1922: return(-1); 1923: } 1924: ctxt->spaceTab = tmp; 1925: } 1926: ctxt->spaceTab[ctxt->spaceNr] = val; 1927: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1928: return(ctxt->spaceNr++); 1929: } 1930: 1931: static int spacePop(xmlParserCtxtPtr ctxt) { 1932: int ret; 1933: if (ctxt->spaceNr <= 0) return(0); 1934: ctxt->spaceNr--; 1935: if (ctxt->spaceNr > 0) 1936: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1937: else 1938: ctxt->space = &ctxt->spaceTab[0]; 1939: ret = ctxt->spaceTab[ctxt->spaceNr]; 1940: ctxt->spaceTab[ctxt->spaceNr] = -1; 1941: return(ret); 1942: } 1943: 1944: /* 1945: * Macros for accessing the content. Those should be used only by the parser, 1946: * and not exported. 1947: * 1948: * Dirty macros, i.e. one often need to make assumption on the context to 1949: * use them 1950: * 1951: * CUR_PTR return the current pointer to the xmlChar to be parsed. 1952: * To be used with extreme caution since operations consuming 1953: * characters may move the input buffer to a different location ! 1954: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1955: * This should be used internally by the parser 1956: * only to compare to ASCII values otherwise it would break when 1957: * running with UTF-8 encoding. 1958: * RAW same as CUR but in the input buffer, bypass any token 1959: * extraction that may have been done 1960: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1961: * to compare on ASCII based substring. 1962: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1963: * strings without newlines within the parser. 1964: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1965: * defined char within the parser. 1966: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1967: * 1968: * NEXT Skip to the next character, this does the proper decoding 1969: * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1970: * NEXTL(l) Skip the current unicode character of l xmlChars long. 1971: * CUR_CHAR(l) returns the current unicode character (int), set l 1972: * to the number of xmlChars used for the encoding [0-5]. 1973: * CUR_SCHAR same but operate on a string instead of the context 1974: * COPY_BUF copy the current unicode char to the target buffer, increment 1975: * the index 1976: * GROW, SHRINK handling of input buffers 1977: */ 1978: 1979: #define RAW (*ctxt->input->cur) 1980: #define CUR (*ctxt->input->cur) 1981: #define NXT(val) ctxt->input->cur[(val)] 1982: #define CUR_PTR ctxt->input->cur 1983: 1984: #define CMP4( s, c1, c2, c3, c4 ) \ 1985: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1986: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1987: #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1988: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1989: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1990: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1991: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1992: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1993: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1994: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1995: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1996: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1997: ((unsigned char *) s)[ 8 ] == c9 ) 1998: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1999: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2000: ((unsigned char *) s)[ 9 ] == c10 ) 2001: 2002: #define SKIP(val) do { \ 2003: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2004: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2005: if ((*ctxt->input->cur == 0) && \ 2006: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2007: xmlPopInput(ctxt); \ 2008: } while (0) 2009: 2010: #define SKIPL(val) do { \ 2011: int skipl; \ 2012: for(skipl=0; skipl<val; skipl++) { \ 2013: if (*(ctxt->input->cur) == '\n') { \ 2014: ctxt->input->line++; ctxt->input->col = 1; \ 2015: } else ctxt->input->col++; \ 2016: ctxt->nbChars++; \ 2017: ctxt->input->cur++; \ 2018: } \ 2019: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2020: if ((*ctxt->input->cur == 0) && \ 2021: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2022: xmlPopInput(ctxt); \ 2023: } while (0) 2024: 2025: #define SHRINK if ((ctxt->progressive == 0) && \ 2026: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2027: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2028: xmlSHRINK (ctxt); 2029: 2030: static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2031: xmlParserInputShrink(ctxt->input); 2032: if ((*ctxt->input->cur == 0) && 2033: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2034: xmlPopInput(ctxt); 2035: } 2036: 2037: #define GROW if ((ctxt->progressive == 0) && \ 2038: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2039: xmlGROW (ctxt); 2040: 2041: static void xmlGROW (xmlParserCtxtPtr ctxt) { 2042: if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 2043: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 2044: ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2045: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2046: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2047: ctxt->instate = XML_PARSER_EOF; 2048: } 2049: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2050: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2051: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2052: xmlPopInput(ctxt); 2053: } 2054: 2055: #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2056: 2057: #define NEXT xmlNextChar(ctxt) 2058: 2059: #define NEXT1 { \ 2060: ctxt->input->col++; \ 2061: ctxt->input->cur++; \ 2062: ctxt->nbChars++; \ 2063: if (*ctxt->input->cur == 0) \ 2064: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2065: } 2066: 2067: #define NEXTL(l) do { \ 2068: if (*(ctxt->input->cur) == '\n') { \ 2069: ctxt->input->line++; ctxt->input->col = 1; \ 2070: } else ctxt->input->col++; \ 2071: ctxt->input->cur += l; \ 2072: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2073: } while (0) 2074: 2075: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2076: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2077: 2078: #define COPY_BUF(l,b,i,v) \ 2079: if (l == 1) b[i++] = (xmlChar) v; \ 2080: else i += xmlCopyCharMultiByte(&b[i],v) 2081: 2082: /** 2083: * xmlSkipBlankChars: 2084: * @ctxt: the XML parser context 2085: * 2086: * skip all blanks character found at that point in the input streams. 2087: * It pops up finished entities in the process if allowable at that point. 2088: * 2089: * Returns the number of space chars skipped 2090: */ 2091: 2092: int 2093: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2094: int res = 0; 2095: 2096: /* 2097: * It's Okay to use CUR/NEXT here since all the blanks are on 2098: * the ASCII range. 2099: */ 2100: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2101: const xmlChar *cur; 2102: /* 2103: * if we are in the document content, go really fast 2104: */ 2105: cur = ctxt->input->cur; 2106: while (IS_BLANK_CH(*cur)) { 2107: if (*cur == '\n') { 2108: ctxt->input->line++; ctxt->input->col = 1; 2109: } 2110: cur++; 2111: res++; 2112: if (*cur == 0) { 2113: ctxt->input->cur = cur; 2114: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2115: cur = ctxt->input->cur; 2116: } 2117: } 2118: ctxt->input->cur = cur; 2119: } else { 2120: int cur; 2121: do { 2122: cur = CUR; 2123: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2124: NEXT; 2125: cur = CUR; 2126: res++; 2127: } 2128: while ((cur == 0) && (ctxt->inputNr > 1) && 2129: (ctxt->instate != XML_PARSER_COMMENT)) { 2130: xmlPopInput(ctxt); 2131: cur = CUR; 2132: } 2133: /* 2134: * Need to handle support of entities branching here 2135: */ 2136: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2137: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2138: } 2139: return(res); 2140: } 2141: 2142: /************************************************************************ 2143: * * 2144: * Commodity functions to handle entities * 2145: * * 2146: ************************************************************************/ 2147: 2148: /** 2149: * xmlPopInput: 2150: * @ctxt: an XML parser context 2151: * 2152: * xmlPopInput: the current input pointed by ctxt->input came to an end 2153: * pop it and return the next char. 2154: * 2155: * Returns the current xmlChar in the parser context 2156: */ 2157: xmlChar 2158: xmlPopInput(xmlParserCtxtPtr ctxt) { 2159: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2160: if (xmlParserDebugEntities) 2161: xmlGenericError(xmlGenericErrorContext, 2162: "Popping input %d\n", ctxt->inputNr); 2163: xmlFreeInputStream(inputPop(ctxt)); 2164: if ((*ctxt->input->cur == 0) && 2165: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2166: return(xmlPopInput(ctxt)); 2167: return(CUR); 2168: } 2169: 2170: /** 2171: * xmlPushInput: 2172: * @ctxt: an XML parser context 2173: * @input: an XML parser input fragment (entity, XML fragment ...). 2174: * 2175: * xmlPushInput: switch to a new input stream which is stacked on top 2176: * of the previous one(s). 2177: * Returns -1 in case of error or the index in the input stack 2178: */ 2179: int 2180: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2181: int ret; 2182: if (input == NULL) return(-1); 2183: 2184: if (xmlParserDebugEntities) { 2185: if ((ctxt->input != NULL) && (ctxt->input->filename)) 2186: xmlGenericError(xmlGenericErrorContext, 2187: "%s(%d): ", ctxt->input->filename, 2188: ctxt->input->line); 2189: xmlGenericError(xmlGenericErrorContext, 2190: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2191: } 2192: ret = inputPush(ctxt, input); 2193: if (ctxt->instate == XML_PARSER_EOF) 2194: return(-1); 2195: GROW; 2196: return(ret); 2197: } 2198: 2199: /** 2200: * xmlParseCharRef: 2201: * @ctxt: an XML parser context 2202: * 2203: * parse Reference declarations 2204: * 2205: * [66] CharRef ::= '&#' [0-9]+ ';' | 2206: * '&#x' [0-9a-fA-F]+ ';' 2207: * 2208: * [ WFC: Legal Character ] 2209: * Characters referred to using character references must match the 2210: * production for Char. 2211: * 2212: * Returns the value parsed (as an int), 0 in case of error 2213: */ 2214: int 2215: xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2216: unsigned int val = 0; 2217: int count = 0; 2218: unsigned int outofrange = 0; 2219: 2220: /* 2221: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2222: */ 2223: if ((RAW == '&') && (NXT(1) == '#') && 2224: (NXT(2) == 'x')) { 2225: SKIP(3); 2226: GROW; 2227: while (RAW != ';') { /* loop blocked by count */ 2228: if (count++ > 20) { 2229: count = 0; 2230: GROW; 2231: if (ctxt->instate == XML_PARSER_EOF) 2232: return(0); 2233: } 2234: if ((RAW >= '0') && (RAW <= '9')) 2235: val = val * 16 + (CUR - '0'); 2236: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2237: val = val * 16 + (CUR - 'a') + 10; 2238: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2239: val = val * 16 + (CUR - 'A') + 10; 2240: else { 2241: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2242: val = 0; 2243: break; 2244: } 2245: if (val > 0x10FFFF) 2246: outofrange = val; 2247: 2248: NEXT; 2249: count++; 2250: } 2251: if (RAW == ';') { 2252: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2253: ctxt->input->col++; 2254: ctxt->nbChars ++; 2255: ctxt->input->cur++; 2256: } 2257: } else if ((RAW == '&') && (NXT(1) == '#')) { 2258: SKIP(2); 2259: GROW; 2260: while (RAW != ';') { /* loop blocked by count */ 2261: if (count++ > 20) { 2262: count = 0; 2263: GROW; 2264: if (ctxt->instate == XML_PARSER_EOF) 2265: return(0); 2266: } 2267: if ((RAW >= '0') && (RAW <= '9')) 2268: val = val * 10 + (CUR - '0'); 2269: else { 2270: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2271: val = 0; 2272: break; 2273: } 2274: if (val > 0x10FFFF) 2275: outofrange = val; 2276: 2277: NEXT; 2278: count++; 2279: } 2280: if (RAW == ';') { 2281: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2282: ctxt->input->col++; 2283: ctxt->nbChars ++; 2284: ctxt->input->cur++; 2285: } 2286: } else { 2287: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2288: } 2289: 2290: /* 2291: * [ WFC: Legal Character ] 2292: * Characters referred to using character references must match the 2293: * production for Char. 2294: */ 2295: if ((IS_CHAR(val) && (outofrange == 0))) { 2296: return(val); 2297: } else { 2298: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2299: "xmlParseCharRef: invalid xmlChar value %d\n", 2300: val); 2301: } 2302: return(0); 2303: } 2304: 2305: /** 2306: * xmlParseStringCharRef: 2307: * @ctxt: an XML parser context 2308: * @str: a pointer to an index in the string 2309: * 2310: * parse Reference declarations, variant parsing from a string rather 2311: * than an an input flow. 2312: * 2313: * [66] CharRef ::= '&#' [0-9]+ ';' | 2314: * '&#x' [0-9a-fA-F]+ ';' 2315: * 2316: * [ WFC: Legal Character ] 2317: * Characters referred to using character references must match the 2318: * production for Char. 2319: * 2320: * Returns the value parsed (as an int), 0 in case of error, str will be 2321: * updated to the current value of the index 2322: */ 2323: static int 2324: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2325: const xmlChar *ptr; 2326: xmlChar cur; 2327: unsigned int val = 0; 2328: unsigned int outofrange = 0; 2329: 2330: if ((str == NULL) || (*str == NULL)) return(0); 2331: ptr = *str; 2332: cur = *ptr; 2333: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2334: ptr += 3; 2335: cur = *ptr; 2336: while (cur != ';') { /* Non input consuming loop */ 2337: if ((cur >= '0') && (cur <= '9')) 2338: val = val * 16 + (cur - '0'); 2339: else if ((cur >= 'a') && (cur <= 'f')) 2340: val = val * 16 + (cur - 'a') + 10; 2341: else if ((cur >= 'A') && (cur <= 'F')) 2342: val = val * 16 + (cur - 'A') + 10; 2343: else { 2344: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2345: val = 0; 2346: break; 2347: } 2348: if (val > 0x10FFFF) 2349: outofrange = val; 2350: 2351: ptr++; 2352: cur = *ptr; 2353: } 2354: if (cur == ';') 2355: ptr++; 2356: } else if ((cur == '&') && (ptr[1] == '#')){ 2357: ptr += 2; 2358: cur = *ptr; 2359: while (cur != ';') { /* Non input consuming loops */ 2360: if ((cur >= '0') && (cur <= '9')) 2361: val = val * 10 + (cur - '0'); 2362: else { 2363: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2364: val = 0; 2365: break; 2366: } 2367: if (val > 0x10FFFF) 2368: outofrange = val; 2369: 2370: ptr++; 2371: cur = *ptr; 2372: } 2373: if (cur == ';') 2374: ptr++; 2375: } else { 2376: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2377: return(0); 2378: } 2379: *str = ptr; 2380: 2381: /* 2382: * [ WFC: Legal Character ] 2383: * Characters referred to using character references must match the 2384: * production for Char. 2385: */ 2386: if ((IS_CHAR(val) && (outofrange == 0))) { 2387: return(val); 2388: } else { 2389: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2390: "xmlParseStringCharRef: invalid xmlChar value %d\n", 2391: val); 2392: } 2393: return(0); 2394: } 2395: 2396: /** 2397: * xmlNewBlanksWrapperInputStream: 2398: * @ctxt: an XML parser context 2399: * @entity: an Entity pointer 2400: * 2401: * Create a new input stream for wrapping 2402: * blanks around a PEReference 2403: * 2404: * Returns the new input stream or NULL 2405: */ 2406: 2407: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2408: 2409: static xmlParserInputPtr 2410: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2411: xmlParserInputPtr input; 2412: xmlChar *buffer; 2413: size_t length; 2414: if (entity == NULL) { 2415: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2416: "xmlNewBlanksWrapperInputStream entity\n"); 2417: return(NULL); 2418: } 2419: if (xmlParserDebugEntities) 2420: xmlGenericError(xmlGenericErrorContext, 2421: "new blanks wrapper for entity: %s\n", entity->name); 2422: input = xmlNewInputStream(ctxt); 2423: if (input == NULL) { 2424: return(NULL); 2425: } 2426: length = xmlStrlen(entity->name) + 5; 2427: buffer = xmlMallocAtomic(length); 2428: if (buffer == NULL) { 2429: xmlErrMemory(ctxt, NULL); 2430: xmlFree(input); 2431: return(NULL); 2432: } 2433: buffer [0] = ' '; 2434: buffer [1] = '%'; 2435: buffer [length-3] = ';'; 2436: buffer [length-2] = ' '; 2437: buffer [length-1] = 0; 2438: memcpy(buffer + 2, entity->name, length - 5); 2439: input->free = deallocblankswrapper; 2440: input->base = buffer; 2441: input->cur = buffer; 2442: input->length = length; 2443: input->end = &buffer[length]; 2444: return(input); 2445: } 2446: 2447: /** 2448: * xmlParserHandlePEReference: 2449: * @ctxt: the parser context 2450: * 2451: * [69] PEReference ::= '%' Name ';' 2452: * 2453: * [ WFC: No Recursion ] 2454: * A parsed entity must not contain a recursive 2455: * reference to itself, either directly or indirectly. 2456: * 2457: * [ WFC: Entity Declared ] 2458: * In a document without any DTD, a document with only an internal DTD 2459: * subset which contains no parameter entity references, or a document 2460: * with "standalone='yes'", ... ... The declaration of a parameter 2461: * entity must precede any reference to it... 2462: * 2463: * [ VC: Entity Declared ] 2464: * In a document with an external subset or external parameter entities 2465: * with "standalone='no'", ... ... The declaration of a parameter entity 2466: * must precede any reference to it... 2467: * 2468: * [ WFC: In DTD ] 2469: * Parameter-entity references may only appear in the DTD. 2470: * NOTE: misleading but this is handled. 2471: * 2472: * A PEReference may have been detected in the current input stream 2473: * the handling is done accordingly to 2474: * http://www.w3.org/TR/REC-xml#entproc 2475: * i.e. 2476: * - Included in literal in entity values 2477: * - Included as Parameter Entity reference within DTDs 2478: */ 2479: void 2480: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2481: const xmlChar *name; 2482: xmlEntityPtr entity = NULL; 2483: xmlParserInputPtr input; 2484: 2485: if (RAW != '%') return; 2486: switch(ctxt->instate) { 2487: case XML_PARSER_CDATA_SECTION: 2488: return; 2489: case XML_PARSER_COMMENT: 2490: return; 2491: case XML_PARSER_START_TAG: 2492: return; 2493: case XML_PARSER_END_TAG: 2494: return; 2495: case XML_PARSER_EOF: 2496: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2497: return; 2498: case XML_PARSER_PROLOG: 2499: case XML_PARSER_START: 2500: case XML_PARSER_MISC: 2501: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2502: return; 2503: case XML_PARSER_ENTITY_DECL: 2504: case XML_PARSER_CONTENT: 2505: case XML_PARSER_ATTRIBUTE_VALUE: 2506: case XML_PARSER_PI: 2507: case XML_PARSER_SYSTEM_LITERAL: 2508: case XML_PARSER_PUBLIC_LITERAL: 2509: /* we just ignore it there */ 2510: return; 2511: case XML_PARSER_EPILOG: 2512: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2513: return; 2514: case XML_PARSER_ENTITY_VALUE: 2515: /* 2516: * NOTE: in the case of entity values, we don't do the 2517: * substitution here since we need the literal 2518: * entity value to be able to save the internal 2519: * subset of the document. 2520: * This will be handled by xmlStringDecodeEntities 2521: */ 2522: return; 2523: case XML_PARSER_DTD: 2524: /* 2525: * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2526: * In the internal DTD subset, parameter-entity references 2527: * can occur only where markup declarations can occur, not 2528: * within markup declarations. 2529: * In that case this is handled in xmlParseMarkupDecl 2530: */ 2531: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2532: return; 2533: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2534: return; 2535: break; 2536: case XML_PARSER_IGNORE: 2537: return; 2538: } 2539: 2540: NEXT; 2541: name = xmlParseName(ctxt); 2542: if (xmlParserDebugEntities) 2543: xmlGenericError(xmlGenericErrorContext, 2544: "PEReference: %s\n", name); 2545: if (name == NULL) { 2546: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2547: } else { 2548: if (RAW == ';') { 2549: NEXT; 2550: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2551: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2552: if (ctxt->instate == XML_PARSER_EOF) 2553: return; 2554: if (entity == NULL) { 2555: 2556: /* 2557: * [ WFC: Entity Declared ] 2558: * In a document without any DTD, a document with only an 2559: * internal DTD subset which contains no parameter entity 2560: * references, or a document with "standalone='yes'", ... 2561: * ... The declaration of a parameter entity must precede 2562: * any reference to it... 2563: */ 2564: if ((ctxt->standalone == 1) || 2565: ((ctxt->hasExternalSubset == 0) && 2566: (ctxt->hasPErefs == 0))) { 2567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2568: "PEReference: %%%s; not found\n", name); 2569: } else { 2570: /* 2571: * [ VC: Entity Declared ] 2572: * In a document with an external subset or external 2573: * parameter entities with "standalone='no'", ... 2574: * ... The declaration of a parameter entity must precede 2575: * any reference to it... 2576: */ 2577: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2578: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2579: "PEReference: %%%s; not found\n", 2580: name, NULL); 2581: } else 2582: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2583: "PEReference: %%%s; not found\n", 2584: name, NULL); 2585: ctxt->valid = 0; 2586: } 2587: } else if (ctxt->input->free != deallocblankswrapper) { 2588: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2589: if (xmlPushInput(ctxt, input) < 0) 2590: return; 2591: } else { 2592: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2593: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2594: xmlChar start[4]; 2595: xmlCharEncoding enc; 2596: 2597: /* 2598: * Note: external parsed entities will not be loaded, it is 2599: * not required for a non-validating parser, unless the 2600: * option of validating, or substituting entities were 2601: * given. Doing so is far more secure as the parser will 2602: * only process data coming from the document entity by 2603: * default. 2604: */ 2605: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2606: ((ctxt->options & XML_PARSE_NOENT) == 0) && 2607: ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2608: (ctxt->validate == 0)) 2609: return; 2610: 2611: /* 2612: * handle the extra spaces added before and after 2613: * c.f. http://www.w3.org/TR/REC-xml#as-PE 2614: * this is done independently. 2615: */ 2616: input = xmlNewEntityInputStream(ctxt, entity); 2617: if (xmlPushInput(ctxt, input) < 0) 2618: return; 2619: 2620: /* 2621: * Get the 4 first bytes and decode the charset 2622: * if enc != XML_CHAR_ENCODING_NONE 2623: * plug some encoding conversion routines. 2624: * Note that, since we may have some non-UTF8 2625: * encoding (like UTF16, bug 135229), the 'length' 2626: * is not known, but we can calculate based upon 2627: * the amount of data in the buffer. 2628: */ 2629: GROW 2630: if (ctxt->instate == XML_PARSER_EOF) 2631: return; 2632: if ((ctxt->input->end - ctxt->input->cur)>=4) { 2633: start[0] = RAW; 2634: start[1] = NXT(1); 2635: start[2] = NXT(2); 2636: start[3] = NXT(3); 2637: enc = xmlDetectCharEncoding(start, 4); 2638: if (enc != XML_CHAR_ENCODING_NONE) { 2639: xmlSwitchEncoding(ctxt, enc); 2640: } 2641: } 2642: 2643: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2644: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2645: (IS_BLANK_CH(NXT(5)))) { 2646: xmlParseTextDecl(ctxt); 2647: } 2648: } else { 2649: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2650: "PEReference: %s is not a parameter entity\n", 2651: name); 2652: } 2653: } 2654: } else { 2655: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2656: } 2657: } 2658: } 2659: 2660: /* 2661: * Macro used to grow the current buffer. 2662: * buffer##_size is expected to be a size_t 2663: * mem_error: is expected to handle memory allocation failures 2664: */ 2665: #define growBuffer(buffer, n) { \ 2666: xmlChar *tmp; \ 2667: size_t new_size = buffer##_size * 2 + n; \ 2668: if (new_size < buffer##_size) goto mem_error; \ 2669: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2670: if (tmp == NULL) goto mem_error; \ 2671: buffer = tmp; \ 2672: buffer##_size = new_size; \ 2673: } 2674: 2675: /** 2676: * xmlStringLenDecodeEntities: 2677: * @ctxt: the parser context 2678: * @str: the input string 2679: * @len: the string length 2680: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2681: * @end: an end marker xmlChar, 0 if none 2682: * @end2: an end marker xmlChar, 0 if none 2683: * @end3: an end marker xmlChar, 0 if none 2684: * 2685: * Takes a entity string content and process to do the adequate substitutions. 2686: * 2687: * [67] Reference ::= EntityRef | CharRef 2688: * 2689: * [69] PEReference ::= '%' Name ';' 2690: * 2691: * Returns A newly allocated string with the substitution done. The caller 2692: * must deallocate it ! 2693: */ 2694: xmlChar * 2695: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2696: int what, xmlChar end, xmlChar end2, xmlChar end3) { 2697: xmlChar *buffer = NULL; 2698: size_t buffer_size = 0; 2699: size_t nbchars = 0; 2700: 2701: xmlChar *current = NULL; 2702: xmlChar *rep = NULL; 2703: const xmlChar *last; 2704: xmlEntityPtr ent; 2705: int c,l; 2706: 2707: if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2708: return(NULL); 2709: last = str + len; 2710: 2711: if (((ctxt->depth > 40) && 2712: ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2713: (ctxt->depth > 1024)) { 2714: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2715: return(NULL); 2716: } 2717: 2718: /* 2719: * allocate a translation buffer. 2720: */ 2721: buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2722: buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2723: if (buffer == NULL) goto mem_error; 2724: 2725: /* 2726: * OK loop until we reach one of the ending char or a size limit. 2727: * we are operating on already parsed values. 2728: */ 2729: if (str < last) 2730: c = CUR_SCHAR(str, l); 2731: else 2732: c = 0; 2733: while ((c != 0) && (c != end) && /* non input consuming loop */ 2734: (c != end2) && (c != end3)) { 2735: 2736: if (c == 0) break; 2737: if ((c == '&') && (str[1] == '#')) { 2738: int val = xmlParseStringCharRef(ctxt, &str); 2739: if (val != 0) { 2740: COPY_BUF(0,buffer,nbchars,val); 2741: } 2742: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2743: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2744: } 2745: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2746: if (xmlParserDebugEntities) 2747: xmlGenericError(xmlGenericErrorContext, 2748: "String decoding Entity Reference: %.30s\n", 2749: str); 2750: ent = xmlParseStringEntityRef(ctxt, &str); 2751: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2752: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2753: goto int_error; 2754: if (ent != NULL) 2755: ctxt->nbentities += ent->checked / 2; 2756: if ((ent != NULL) && 2757: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2758: if (ent->content != NULL) { 2759: COPY_BUF(0,buffer,nbchars,ent->content[0]); 2760: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2761: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2762: } 2763: } else { 2764: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2765: "predefined entity has no content\n"); 2766: } 2767: } else if ((ent != NULL) && (ent->content != NULL)) { 2768: ctxt->depth++; 2769: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2770: 0, 0, 0); 2771: ctxt->depth--; 2772: 2773: if (rep != NULL) { 2774: current = rep; 2775: while (*current != 0) { /* non input consuming loop */ 2776: buffer[nbchars++] = *current++; 2777: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2778: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2779: goto int_error; 2780: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2781: } 2782: } 2783: xmlFree(rep); 2784: rep = NULL; 2785: } 2786: } else if (ent != NULL) { 2787: int i = xmlStrlen(ent->name); 2788: const xmlChar *cur = ent->name; 2789: 2790: buffer[nbchars++] = '&'; 2791: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2792: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2793: } 2794: for (;i > 0;i--) 2795: buffer[nbchars++] = *cur++; 2796: buffer[nbchars++] = ';'; 2797: } 2798: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2799: if (xmlParserDebugEntities) 2800: xmlGenericError(xmlGenericErrorContext, 2801: "String decoding PE Reference: %.30s\n", str); 2802: ent = xmlParseStringPEReference(ctxt, &str); 2803: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2804: goto int_error; 2805: if (ent != NULL) 2806: ctxt->nbentities += ent->checked / 2; 2807: if (ent != NULL) { 2808: if (ent->content == NULL) { 2809: xmlLoadEntityContent(ctxt, ent); 2810: } 2811: ctxt->depth++; 2812: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2813: 0, 0, 0); 2814: ctxt->depth--; 2815: if (rep != NULL) { 2816: current = rep; 2817: while (*current != 0) { /* non input consuming loop */ 2818: buffer[nbchars++] = *current++; 2819: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2820: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2821: goto int_error; 2822: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2823: } 2824: } 2825: xmlFree(rep); 2826: rep = NULL; 2827: } 2828: } 2829: } else { 2830: COPY_BUF(l,buffer,nbchars,c); 2831: str += l; 2832: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2833: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2834: } 2835: } 2836: if (str < last) 2837: c = CUR_SCHAR(str, l); 2838: else 2839: c = 0; 2840: } 2841: buffer[nbchars] = 0; 2842: return(buffer); 2843: 2844: mem_error: 2845: xmlErrMemory(ctxt, NULL); 2846: int_error: 2847: if (rep != NULL) 2848: xmlFree(rep); 2849: if (buffer != NULL) 2850: xmlFree(buffer); 2851: return(NULL); 2852: } 2853: 2854: /** 2855: * xmlStringDecodeEntities: 2856: * @ctxt: the parser context 2857: * @str: the input string 2858: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2859: * @end: an end marker xmlChar, 0 if none 2860: * @end2: an end marker xmlChar, 0 if none 2861: * @end3: an end marker xmlChar, 0 if none 2862: * 2863: * Takes a entity string content and process to do the adequate substitutions. 2864: * 2865: * [67] Reference ::= EntityRef | CharRef 2866: * 2867: * [69] PEReference ::= '%' Name ';' 2868: * 2869: * Returns A newly allocated string with the substitution done. The caller 2870: * must deallocate it ! 2871: */ 2872: xmlChar * 2873: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2874: xmlChar end, xmlChar end2, xmlChar end3) { 2875: if ((ctxt == NULL) || (str == NULL)) return(NULL); 2876: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2877: end, end2, end3)); 2878: } 2879: 2880: /************************************************************************ 2881: * * 2882: * Commodity functions, cleanup needed ? * 2883: * * 2884: ************************************************************************/ 2885: 2886: /** 2887: * areBlanks: 2888: * @ctxt: an XML parser context 2889: * @str: a xmlChar * 2890: * @len: the size of @str 2891: * @blank_chars: we know the chars are blanks 2892: * 2893: * Is this a sequence of blank chars that one can ignore ? 2894: * 2895: * Returns 1 if ignorable 0 otherwise. 2896: */ 2897: 2898: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2899: int blank_chars) { 2900: int i, ret; 2901: xmlNodePtr lastChild; 2902: 2903: /* 2904: * Don't spend time trying to differentiate them, the same callback is 2905: * used ! 2906: */ 2907: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2908: return(0); 2909: 2910: /* 2911: * Check for xml:space value. 2912: */ 2913: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2914: (*(ctxt->space) == -2)) 2915: return(0); 2916: 2917: /* 2918: * Check that the string is made of blanks 2919: */ 2920: if (blank_chars == 0) { 2921: for (i = 0;i < len;i++) 2922: if (!(IS_BLANK_CH(str[i]))) return(0); 2923: } 2924: 2925: /* 2926: * Look if the element is mixed content in the DTD if available 2927: */ 2928: if (ctxt->node == NULL) return(0); 2929: if (ctxt->myDoc != NULL) { 2930: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2931: if (ret == 0) return(1); 2932: if (ret == 1) return(0); 2933: } 2934: 2935: /* 2936: * Otherwise, heuristic :-\ 2937: */ 2938: if ((RAW != '<') && (RAW != 0xD)) return(0); 2939: if ((ctxt->node->children == NULL) && 2940: (RAW == '<') && (NXT(1) == '/')) return(0); 2941: 2942: lastChild = xmlGetLastChild(ctxt->node); 2943: if (lastChild == NULL) { 2944: if ((ctxt->node->type != XML_ELEMENT_NODE) && 2945: (ctxt->node->content != NULL)) return(0); 2946: } else if (xmlNodeIsText(lastChild)) 2947: return(0); 2948: else if ((ctxt->node->children != NULL) && 2949: (xmlNodeIsText(ctxt->node->children))) 2950: return(0); 2951: return(1); 2952: } 2953: 2954: /************************************************************************ 2955: * * 2956: * Extra stuff for namespace support * 2957: * Relates to http://www.w3.org/TR/WD-xml-names * 2958: * * 2959: ************************************************************************/ 2960: 2961: /** 2962: * xmlSplitQName: 2963: * @ctxt: an XML parser context 2964: * @name: an XML parser context 2965: * @prefix: a xmlChar ** 2966: * 2967: * parse an UTF8 encoded XML qualified name string 2968: * 2969: * [NS 5] QName ::= (Prefix ':')? LocalPart 2970: * 2971: * [NS 6] Prefix ::= NCName 2972: * 2973: * [NS 7] LocalPart ::= NCName 2974: * 2975: * Returns the local part, and prefix is updated 2976: * to get the Prefix if any. 2977: */ 2978: 2979: xmlChar * 2980: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2981: xmlChar buf[XML_MAX_NAMELEN + 5]; 2982: xmlChar *buffer = NULL; 2983: int len = 0; 2984: int max = XML_MAX_NAMELEN; 2985: xmlChar *ret = NULL; 2986: const xmlChar *cur = name; 2987: int c; 2988: 2989: if (prefix == NULL) return(NULL); 2990: *prefix = NULL; 2991: 2992: if (cur == NULL) return(NULL); 2993: 2994: #ifndef XML_XML_NAMESPACE 2995: /* xml: prefix is not really a namespace */ 2996: if ((cur[0] == 'x') && (cur[1] == 'm') && 2997: (cur[2] == 'l') && (cur[3] == ':')) 2998: return(xmlStrdup(name)); 2999: #endif 3000: 3001: /* nasty but well=formed */ 3002: if (cur[0] == ':') 3003: return(xmlStrdup(name)); 3004: 3005: c = *cur++; 3006: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3007: buf[len++] = c; 3008: c = *cur++; 3009: } 3010: if (len >= max) { 3011: /* 3012: * Okay someone managed to make a huge name, so he's ready to pay 3013: * for the processing speed. 3014: */ 3015: max = len * 2; 3016: 3017: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3018: if (buffer == NULL) { 3019: xmlErrMemory(ctxt, NULL); 3020: return(NULL); 3021: } 3022: memcpy(buffer, buf, len); 3023: while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3024: if (len + 10 > max) { 3025: xmlChar *tmp; 3026: 3027: max *= 2; 3028: tmp = (xmlChar *) xmlRealloc(buffer, 3029: max * sizeof(xmlChar)); 3030: if (tmp == NULL) { 3031: xmlFree(buffer); 3032: xmlErrMemory(ctxt, NULL); 3033: return(NULL); 3034: } 3035: buffer = tmp; 3036: } 3037: buffer[len++] = c; 3038: c = *cur++; 3039: } 3040: buffer[len] = 0; 3041: } 3042: 3043: if ((c == ':') && (*cur == 0)) { 3044: if (buffer != NULL) 3045: xmlFree(buffer); 3046: *prefix = NULL; 3047: return(xmlStrdup(name)); 3048: } 3049: 3050: if (buffer == NULL) 3051: ret = xmlStrndup(buf, len); 3052: else { 3053: ret = buffer; 3054: buffer = NULL; 3055: max = XML_MAX_NAMELEN; 3056: } 3057: 3058: 3059: if (c == ':') { 3060: c = *cur; 3061: *prefix = ret; 3062: if (c == 0) { 3063: return(xmlStrndup(BAD_CAST "", 0)); 3064: } 3065: len = 0; 3066: 3067: /* 3068: * Check that the first character is proper to start 3069: * a new name 3070: */ 3071: if (!(((c >= 0x61) && (c <= 0x7A)) || 3072: ((c >= 0x41) && (c <= 0x5A)) || 3073: (c == '_') || (c == ':'))) { 3074: int l; 3075: int first = CUR_SCHAR(cur, l); 3076: 3077: if (!IS_LETTER(first) && (first != '_')) { 3078: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3079: "Name %s is not XML Namespace compliant\n", 3080: name); 3081: } 3082: } 3083: cur++; 3084: 3085: while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3086: buf[len++] = c; 3087: c = *cur++; 3088: } 3089: if (len >= max) { 3090: /* 3091: * Okay someone managed to make a huge name, so he's ready to pay 3092: * for the processing speed. 3093: */ 3094: max = len * 2; 3095: 3096: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3097: if (buffer == NULL) { 3098: xmlErrMemory(ctxt, NULL); 3099: return(NULL); 3100: } 3101: memcpy(buffer, buf, len); 3102: while (c != 0) { /* tested bigname2.xml */ 3103: if (len + 10 > max) { 3104: xmlChar *tmp; 3105: 3106: max *= 2; 3107: tmp = (xmlChar *) xmlRealloc(buffer, 3108: max * sizeof(xmlChar)); 3109: if (tmp == NULL) { 3110: xmlErrMemory(ctxt, NULL); 3111: xmlFree(buffer); 3112: return(NULL); 3113: } 3114: buffer = tmp; 3115: } 3116: buffer[len++] = c; 3117: c = *cur++; 3118: } 3119: buffer[len] = 0; 3120: } 3121: 3122: if (buffer == NULL) 3123: ret = xmlStrndup(buf, len); 3124: else { 3125: ret = buffer; 3126: } 3127: } 3128: 3129: return(ret); 3130: } 3131: 3132: /************************************************************************ 3133: * * 3134: * The parser itself * 3135: * Relates to http://www.w3.org/TR/REC-xml * 3136: * * 3137: ************************************************************************/ 3138: 3139: /************************************************************************ 3140: * * 3141: * Routines to parse Name, NCName and NmToken * 3142: * * 3143: ************************************************************************/ 3144: #ifdef DEBUG 3145: static unsigned long nbParseName = 0; 3146: static unsigned long nbParseNmToken = 0; 3147: static unsigned long nbParseNCName = 0; 3148: static unsigned long nbParseNCNameComplex = 0; 3149: static unsigned long nbParseNameComplex = 0; 3150: static unsigned long nbParseStringName = 0; 3151: #endif 3152: 3153: /* 3154: * The two following functions are related to the change of accepted 3155: * characters for Name and NmToken in the Revision 5 of XML-1.0 3156: * They correspond to the modified production [4] and the new production [4a] 3157: * changes in that revision. Also note that the macros used for the 3158: * productions Letter, Digit, CombiningChar and Extender are not needed 3159: * anymore. 3160: * We still keep compatibility to pre-revision5 parsing semantic if the 3161: * new XML_PARSE_OLD10 option is given to the parser. 3162: */ 3163: static int 3164: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3165: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3166: /* 3167: * Use the new checks of production [4] [4a] amd [5] of the 3168: * Update 5 of XML-1.0 3169: */ 3170: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3171: (((c >= 'a') && (c <= 'z')) || 3172: ((c >= 'A') && (c <= 'Z')) || 3173: (c == '_') || (c == ':') || 3174: ((c >= 0xC0) && (c <= 0xD6)) || 3175: ((c >= 0xD8) && (c <= 0xF6)) || 3176: ((c >= 0xF8) && (c <= 0x2FF)) || 3177: ((c >= 0x370) && (c <= 0x37D)) || 3178: ((c >= 0x37F) && (c <= 0x1FFF)) || 3179: ((c >= 0x200C) && (c <= 0x200D)) || 3180: ((c >= 0x2070) && (c <= 0x218F)) || 3181: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3182: ((c >= 0x3001) && (c <= 0xD7FF)) || 3183: ((c >= 0xF900) && (c <= 0xFDCF)) || 3184: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3185: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3186: return(1); 3187: } else { 3188: if (IS_LETTER(c) || (c == '_') || (c == ':')) 3189: return(1); 3190: } 3191: return(0); 3192: } 3193: 3194: static int 3195: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3196: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3197: /* 3198: * Use the new checks of production [4] [4a] amd [5] of the 3199: * Update 5 of XML-1.0 3200: */ 3201: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3202: (((c >= 'a') && (c <= 'z')) || 3203: ((c >= 'A') && (c <= 'Z')) || 3204: ((c >= '0') && (c <= '9')) || /* !start */ 3205: (c == '_') || (c == ':') || 3206: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3207: ((c >= 0xC0) && (c <= 0xD6)) || 3208: ((c >= 0xD8) && (c <= 0xF6)) || 3209: ((c >= 0xF8) && (c <= 0x2FF)) || 3210: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3211: ((c >= 0x370) && (c <= 0x37D)) || 3212: ((c >= 0x37F) && (c <= 0x1FFF)) || 3213: ((c >= 0x200C) && (c <= 0x200D)) || 3214: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3215: ((c >= 0x2070) && (c <= 0x218F)) || 3216: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3217: ((c >= 0x3001) && (c <= 0xD7FF)) || 3218: ((c >= 0xF900) && (c <= 0xFDCF)) || 3219: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3220: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3221: return(1); 3222: } else { 3223: if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3224: (c == '.') || (c == '-') || 3225: (c == '_') || (c == ':') || 3226: (IS_COMBINING(c)) || 3227: (IS_EXTENDER(c))) 3228: return(1); 3229: } 3230: return(0); 3231: } 3232: 3233: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3234: int *len, int *alloc, int normalize); 3235: 3236: static const xmlChar * 3237: xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3238: int len = 0, l; 3239: int c; 3240: int count = 0; 3241: 3242: #ifdef DEBUG 3243: nbParseNameComplex++; 3244: #endif 3245: 3246: /* 3247: * Handler for more complex cases 3248: */ 3249: GROW; 3250: if (ctxt->instate == XML_PARSER_EOF) 3251: return(NULL); 3252: c = CUR_CHAR(l); 3253: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3254: /* 3255: * Use the new checks of production [4] [4a] amd [5] of the 3256: * Update 5 of XML-1.0 3257: */ 3258: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3259: (!(((c >= 'a') && (c <= 'z')) || 3260: ((c >= 'A') && (c <= 'Z')) || 3261: (c == '_') || (c == ':') || 3262: ((c >= 0xC0) && (c <= 0xD6)) || 3263: ((c >= 0xD8) && (c <= 0xF6)) || 3264: ((c >= 0xF8) && (c <= 0x2FF)) || 3265: ((c >= 0x370) && (c <= 0x37D)) || 3266: ((c >= 0x37F) && (c <= 0x1FFF)) || 3267: ((c >= 0x200C) && (c <= 0x200D)) || 3268: ((c >= 0x2070) && (c <= 0x218F)) || 3269: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3270: ((c >= 0x3001) && (c <= 0xD7FF)) || 3271: ((c >= 0xF900) && (c <= 0xFDCF)) || 3272: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3273: ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3274: return(NULL); 3275: } 3276: len += l; 3277: NEXTL(l); 3278: c = CUR_CHAR(l); 3279: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3280: (((c >= 'a') && (c <= 'z')) || 3281: ((c >= 'A') && (c <= 'Z')) || 3282: ((c >= '0') && (c <= '9')) || /* !start */ 3283: (c == '_') || (c == ':') || 3284: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3285: ((c >= 0xC0) && (c <= 0xD6)) || 3286: ((c >= 0xD8) && (c <= 0xF6)) || 3287: ((c >= 0xF8) && (c <= 0x2FF)) || 3288: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3289: ((c >= 0x370) && (c <= 0x37D)) || 3290: ((c >= 0x37F) && (c <= 0x1FFF)) || 3291: ((c >= 0x200C) && (c <= 0x200D)) || 3292: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3293: ((c >= 0x2070) && (c <= 0x218F)) || 3294: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3295: ((c >= 0x3001) && (c <= 0xD7FF)) || 3296: ((c >= 0xF900) && (c <= 0xFDCF)) || 3297: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3298: ((c >= 0x10000) && (c <= 0xEFFFF)) 3299: )) { 3300: if (count++ > XML_PARSER_CHUNK_SIZE) { 3301: count = 0; 3302: GROW; 3303: if (ctxt->instate == XML_PARSER_EOF) 3304: return(NULL); 3305: } 3306: len += l; 3307: NEXTL(l); 3308: c = CUR_CHAR(l); 3309: } 3310: } else { 3311: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3312: (!IS_LETTER(c) && (c != '_') && 3313: (c != ':'))) { 3314: return(NULL); 3315: } 3316: len += l; 3317: NEXTL(l); 3318: c = CUR_CHAR(l); 3319: 3320: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3321: ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3322: (c == '.') || (c == '-') || 3323: (c == '_') || (c == ':') || 3324: (IS_COMBINING(c)) || 3325: (IS_EXTENDER(c)))) { 3326: if (count++ > XML_PARSER_CHUNK_SIZE) { 3327: count = 0; 3328: GROW; 3329: if (ctxt->instate == XML_PARSER_EOF) 3330: return(NULL); 3331: } 3332: len += l; 3333: NEXTL(l); 3334: c = CUR_CHAR(l); 3335: if (c == 0) { 3336: count = 0; 3337: GROW; 3338: if (ctxt->instate == XML_PARSER_EOF) 3339: return(NULL); 3340: c = CUR_CHAR(l); 3341: } 3342: } 3343: } 3344: if ((len > XML_MAX_NAME_LENGTH) && 3345: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3346: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3347: return(NULL); 3348: } 3349: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3350: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3351: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3352: } 3353: 3354: /** 3355: * xmlParseName: 3356: * @ctxt: an XML parser context 3357: * 3358: * parse an XML name. 3359: * 3360: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3361: * CombiningChar | Extender 3362: * 3363: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3364: * 3365: * [6] Names ::= Name (#x20 Name)* 3366: * 3367: * Returns the Name parsed or NULL 3368: */ 3369: 3370: const xmlChar * 3371: xmlParseName(xmlParserCtxtPtr ctxt) { 3372: const xmlChar *in; 3373: const xmlChar *ret; 3374: int count = 0; 3375: 3376: GROW; 3377: 3378: #ifdef DEBUG 3379: nbParseName++; 3380: #endif 3381: 3382: /* 3383: * Accelerator for simple ASCII names 3384: */ 3385: in = ctxt->input->cur; 3386: if (((*in >= 0x61) && (*in <= 0x7A)) || 3387: ((*in >= 0x41) && (*in <= 0x5A)) || 3388: (*in == '_') || (*in == ':')) { 3389: in++; 3390: while (((*in >= 0x61) && (*in <= 0x7A)) || 3391: ((*in >= 0x41) && (*in <= 0x5A)) || 3392: ((*in >= 0x30) && (*in <= 0x39)) || 3393: (*in == '_') || (*in == '-') || 3394: (*in == ':') || (*in == '.')) 3395: in++; 3396: if ((*in > 0) && (*in < 0x80)) { 3397: count = in - ctxt->input->cur; 3398: if ((count > XML_MAX_NAME_LENGTH) && 3399: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3400: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3401: return(NULL); 3402: } 3403: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3404: ctxt->input->cur = in; 3405: ctxt->nbChars += count; 3406: ctxt->input->col += count; 3407: if (ret == NULL) 3408: xmlErrMemory(ctxt, NULL); 3409: return(ret); 3410: } 3411: } 3412: /* accelerator for special cases */ 3413: return(xmlParseNameComplex(ctxt)); 3414: } 3415: 3416: static const xmlChar * 3417: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3418: int len = 0, l; 3419: int c; 3420: int count = 0; 3421: 3422: #ifdef DEBUG 3423: nbParseNCNameComplex++; 3424: #endif 3425: 3426: /* 3427: * Handler for more complex cases 3428: */ 3429: GROW; 3430: c = CUR_CHAR(l); 3431: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3432: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3433: return(NULL); 3434: } 3435: 3436: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3437: (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3438: if (count++ > XML_PARSER_CHUNK_SIZE) { 3439: if ((len > XML_MAX_NAME_LENGTH) && 3440: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3441: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3442: return(NULL); 3443: } 3444: count = 0; 3445: GROW; 3446: if (ctxt->instate == XML_PARSER_EOF) 3447: return(NULL); 3448: } 3449: len += l; 3450: NEXTL(l); 3451: c = CUR_CHAR(l); 3452: if (c == 0) { 3453: count = 0; 3454: GROW; 3455: if (ctxt->instate == XML_PARSER_EOF) 3456: return(NULL); 3457: c = CUR_CHAR(l); 3458: } 3459: } 3460: if ((len > XML_MAX_NAME_LENGTH) && 3461: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3462: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3463: return(NULL); 3464: } 3465: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3466: } 3467: 3468: /** 3469: * xmlParseNCName: 3470: * @ctxt: an XML parser context 3471: * @len: length of the string parsed 3472: * 3473: * parse an XML name. 3474: * 3475: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3476: * CombiningChar | Extender 3477: * 3478: * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3479: * 3480: * Returns the Name parsed or NULL 3481: */ 3482: 3483: static const xmlChar * 3484: xmlParseNCName(xmlParserCtxtPtr ctxt) { 3485: const xmlChar *in; 3486: const xmlChar *ret; 3487: int count = 0; 3488: 3489: #ifdef DEBUG 3490: nbParseNCName++; 3491: #endif 3492: 3493: /* 3494: * Accelerator for simple ASCII names 3495: */ 3496: in = ctxt->input->cur; 3497: if (((*in >= 0x61) && (*in <= 0x7A)) || 3498: ((*in >= 0x41) && (*in <= 0x5A)) || 3499: (*in == '_')) { 3500: in++; 3501: while (((*in >= 0x61) && (*in <= 0x7A)) || 3502: ((*in >= 0x41) && (*in <= 0x5A)) || 3503: ((*in >= 0x30) && (*in <= 0x39)) || 3504: (*in == '_') || (*in == '-') || 3505: (*in == '.')) 3506: in++; 3507: if ((*in > 0) && (*in < 0x80)) { 3508: count = in - ctxt->input->cur; 3509: if ((count > XML_MAX_NAME_LENGTH) && 3510: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3511: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3512: return(NULL); 3513: } 3514: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3515: ctxt->input->cur = in; 3516: ctxt->nbChars += count; 3517: ctxt->input->col += count; 3518: if (ret == NULL) { 3519: xmlErrMemory(ctxt, NULL); 3520: } 3521: return(ret); 3522: } 3523: } 3524: return(xmlParseNCNameComplex(ctxt)); 3525: } 3526: 3527: /** 3528: * xmlParseNameAndCompare: 3529: * @ctxt: an XML parser context 3530: * 3531: * parse an XML name and compares for match 3532: * (specialized for endtag parsing) 3533: * 3534: * Returns NULL for an illegal name, (xmlChar*) 1 for success 3535: * and the name for mismatch 3536: */ 3537: 3538: static const xmlChar * 3539: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3540: register const xmlChar *cmp = other; 3541: register const xmlChar *in; 3542: const xmlChar *ret; 3543: 3544: GROW; 3545: if (ctxt->instate == XML_PARSER_EOF) 3546: return(NULL); 3547: 3548: in = ctxt->input->cur; 3549: while (*in != 0 && *in == *cmp) { 3550: ++in; 3551: ++cmp; 3552: ctxt->input->col++; 3553: } 3554: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3555: /* success */ 3556: ctxt->input->cur = in; 3557: return (const xmlChar*) 1; 3558: } 3559: /* failure (or end of input buffer), check with full function */ 3560: ret = xmlParseName (ctxt); 3561: /* strings coming from the dictionnary direct compare possible */ 3562: if (ret == other) { 3563: return (const xmlChar*) 1; 3564: } 3565: return ret; 3566: } 3567: 3568: /** 3569: * xmlParseStringName: 3570: * @ctxt: an XML parser context 3571: * @str: a pointer to the string pointer (IN/OUT) 3572: * 3573: * parse an XML name. 3574: * 3575: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3576: * CombiningChar | Extender 3577: * 3578: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3579: * 3580: * [6] Names ::= Name (#x20 Name)* 3581: * 3582: * Returns the Name parsed or NULL. The @str pointer 3583: * is updated to the current location in the string. 3584: */ 3585: 3586: static xmlChar * 3587: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3588: xmlChar buf[XML_MAX_NAMELEN + 5]; 3589: const xmlChar *cur = *str; 3590: int len = 0, l; 3591: int c; 3592: 3593: #ifdef DEBUG 3594: nbParseStringName++; 3595: #endif 3596: 3597: c = CUR_SCHAR(cur, l); 3598: if (!xmlIsNameStartChar(ctxt, c)) { 3599: return(NULL); 3600: } 3601: 3602: COPY_BUF(l,buf,len,c); 3603: cur += l; 3604: c = CUR_SCHAR(cur, l); 3605: while (xmlIsNameChar(ctxt, c)) { 3606: COPY_BUF(l,buf,len,c); 3607: cur += l; 3608: c = CUR_SCHAR(cur, l); 3609: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3610: /* 3611: * Okay someone managed to make a huge name, so he's ready to pay 3612: * for the processing speed. 3613: */ 3614: xmlChar *buffer; 3615: int max = len * 2; 3616: 3617: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3618: if (buffer == NULL) { 3619: xmlErrMemory(ctxt, NULL); 3620: return(NULL); 3621: } 3622: memcpy(buffer, buf, len); 3623: while (xmlIsNameChar(ctxt, c)) { 3624: if (len + 10 > max) { 3625: xmlChar *tmp; 3626: 3627: if ((len > XML_MAX_NAME_LENGTH) && 3628: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3629: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3630: xmlFree(buffer); 3631: return(NULL); 3632: } 3633: max *= 2; 3634: tmp = (xmlChar *) xmlRealloc(buffer, 3635: max * sizeof(xmlChar)); 3636: if (tmp == NULL) { 3637: xmlErrMemory(ctxt, NULL); 3638: xmlFree(buffer); 3639: return(NULL); 3640: } 3641: buffer = tmp; 3642: } 3643: COPY_BUF(l,buffer,len,c); 3644: cur += l; 3645: c = CUR_SCHAR(cur, l); 3646: } 3647: buffer[len] = 0; 3648: *str = cur; 3649: return(buffer); 3650: } 3651: } 3652: if ((len > XML_MAX_NAME_LENGTH) && 3653: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3654: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3655: return(NULL); 3656: } 3657: *str = cur; 3658: return(xmlStrndup(buf, len)); 3659: } 3660: 3661: /** 3662: * xmlParseNmtoken: 3663: * @ctxt: an XML parser context 3664: * 3665: * parse an XML Nmtoken. 3666: * 3667: * [7] Nmtoken ::= (NameChar)+ 3668: * 3669: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3670: * 3671: * Returns the Nmtoken parsed or NULL 3672: */ 3673: 3674: xmlChar * 3675: xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3676: xmlChar buf[XML_MAX_NAMELEN + 5]; 3677: int len = 0, l; 3678: int c; 3679: int count = 0; 3680: 3681: #ifdef DEBUG 3682: nbParseNmToken++; 3683: #endif 3684: 3685: GROW; 3686: if (ctxt->instate == XML_PARSER_EOF) 3687: return(NULL); 3688: c = CUR_CHAR(l); 3689: 3690: while (xmlIsNameChar(ctxt, c)) { 3691: if (count++ > XML_PARSER_CHUNK_SIZE) { 3692: count = 0; 3693: GROW; 3694: } 3695: COPY_BUF(l,buf,len,c); 3696: NEXTL(l); 3697: c = CUR_CHAR(l); 3698: if (c == 0) { 3699: count = 0; 3700: GROW; 3701: if (ctxt->instate == XML_PARSER_EOF) 3702: return(NULL); 3703: c = CUR_CHAR(l); 3704: } 3705: if (len >= XML_MAX_NAMELEN) { 3706: /* 3707: * Okay someone managed to make a huge token, so he's ready to pay 3708: * for the processing speed. 3709: */ 3710: xmlChar *buffer; 3711: int max = len * 2; 3712: 3713: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3714: if (buffer == NULL) { 3715: xmlErrMemory(ctxt, NULL); 3716: return(NULL); 3717: } 3718: memcpy(buffer, buf, len); 3719: while (xmlIsNameChar(ctxt, c)) { 3720: if (count++ > XML_PARSER_CHUNK_SIZE) { 3721: count = 0; 3722: GROW; 3723: if (ctxt->instate == XML_PARSER_EOF) { 3724: xmlFree(buffer); 3725: return(NULL); 3726: } 3727: } 3728: if (len + 10 > max) { 3729: xmlChar *tmp; 3730: 3731: if ((max > XML_MAX_NAME_LENGTH) && 3732: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3733: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3734: xmlFree(buffer); 3735: return(NULL); 3736: } 3737: max *= 2; 3738: tmp = (xmlChar *) xmlRealloc(buffer, 3739: max * sizeof(xmlChar)); 3740: if (tmp == NULL) { 3741: xmlErrMemory(ctxt, NULL); 3742: xmlFree(buffer); 3743: return(NULL); 3744: } 3745: buffer = tmp; 3746: } 3747: COPY_BUF(l,buffer,len,c); 3748: NEXTL(l); 3749: c = CUR_CHAR(l); 3750: } 3751: buffer[len] = 0; 3752: return(buffer); 3753: } 3754: } 3755: if (len == 0) 3756: return(NULL); 3757: if ((len > XML_MAX_NAME_LENGTH) && 3758: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3759: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3760: return(NULL); 3761: } 3762: return(xmlStrndup(buf, len)); 3763: } 3764: 3765: /** 3766: * xmlParseEntityValue: 3767: * @ctxt: an XML parser context 3768: * @orig: if non-NULL store a copy of the original entity value 3769: * 3770: * parse a value for ENTITY declarations 3771: * 3772: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3773: * "'" ([^%&'] | PEReference | Reference)* "'" 3774: * 3775: * Returns the EntityValue parsed with reference substituted or NULL 3776: */ 3777: 3778: xmlChar * 3779: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3780: xmlChar *buf = NULL; 3781: int len = 0; 3782: int size = XML_PARSER_BUFFER_SIZE; 3783: int c, l; 3784: xmlChar stop; 3785: xmlChar *ret = NULL; 3786: const xmlChar *cur = NULL; 3787: xmlParserInputPtr input; 3788: 3789: if (RAW == '"') stop = '"'; 3790: else if (RAW == '\'') stop = '\''; 3791: else { 3792: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3793: return(NULL); 3794: } 3795: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3796: if (buf == NULL) { 3797: xmlErrMemory(ctxt, NULL); 3798: return(NULL); 3799: } 3800: 3801: /* 3802: * The content of the entity definition is copied in a buffer. 3803: */ 3804: 3805: ctxt->instate = XML_PARSER_ENTITY_VALUE; 3806: input = ctxt->input; 3807: GROW; 3808: if (ctxt->instate == XML_PARSER_EOF) { 3809: xmlFree(buf); 3810: return(NULL); 3811: } 3812: NEXT; 3813: c = CUR_CHAR(l); 3814: /* 3815: * NOTE: 4.4.5 Included in Literal 3816: * When a parameter entity reference appears in a literal entity 3817: * value, ... a single or double quote character in the replacement 3818: * text is always treated as a normal data character and will not 3819: * terminate the literal. 3820: * In practice it means we stop the loop only when back at parsing 3821: * the initial entity and the quote is found 3822: */ 3823: while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3824: (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3825: if (len + 5 >= size) { 3826: xmlChar *tmp; 3827: 3828: size *= 2; 3829: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3830: if (tmp == NULL) { 3831: xmlErrMemory(ctxt, NULL); 3832: xmlFree(buf); 3833: return(NULL); 3834: } 3835: buf = tmp; 3836: } 3837: COPY_BUF(l,buf,len,c); 3838: NEXTL(l); 3839: /* 3840: * Pop-up of finished entities. 3841: */ 3842: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3843: xmlPopInput(ctxt); 3844: 3845: GROW; 3846: c = CUR_CHAR(l); 3847: if (c == 0) { 3848: GROW; 3849: c = CUR_CHAR(l); 3850: } 3851: } 3852: buf[len] = 0; 3853: if (ctxt->instate == XML_PARSER_EOF) { 3854: xmlFree(buf); 3855: return(NULL); 3856: } 3857: 3858: /* 3859: * Raise problem w.r.t. '&' and '%' being used in non-entities 3860: * reference constructs. Note Charref will be handled in 3861: * xmlStringDecodeEntities() 3862: */ 3863: cur = buf; 3864: while (*cur != 0) { /* non input consuming */ 3865: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3866: xmlChar *name; 3867: xmlChar tmp = *cur; 3868: 3869: cur++; 3870: name = xmlParseStringName(ctxt, &cur); 3871: if ((name == NULL) || (*cur != ';')) { 3872: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3873: "EntityValue: '%c' forbidden except for entities references\n", 3874: tmp); 3875: } 3876: if ((tmp == '%') && (ctxt->inSubset == 1) && 3877: (ctxt->inputNr == 1)) { 3878: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3879: } 3880: if (name != NULL) 3881: xmlFree(name); 3882: if (*cur == 0) 3883: break; 3884: } 3885: cur++; 3886: } 3887: 3888: /* 3889: * Then PEReference entities are substituted. 3890: */ 3891: if (c != stop) { 3892: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3893: xmlFree(buf); 3894: } else { 3895: NEXT; 3896: /* 3897: * NOTE: 4.4.7 Bypassed 3898: * When a general entity reference appears in the EntityValue in 3899: * an entity declaration, it is bypassed and left as is. 3900: * so XML_SUBSTITUTE_REF is not set here. 3901: */ 3902: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3903: 0, 0, 0); 3904: if (orig != NULL) 3905: *orig = buf; 3906: else 3907: xmlFree(buf); 3908: } 3909: 3910: return(ret); 3911: } 3912: 3913: /** 3914: * xmlParseAttValueComplex: 3915: * @ctxt: an XML parser context 3916: * @len: the resulting attribute len 3917: * @normalize: wether to apply the inner normalization 3918: * 3919: * parse a value for an attribute, this is the fallback function 3920: * of xmlParseAttValue() when the attribute parsing requires handling 3921: * of non-ASCII characters, or normalization compaction. 3922: * 3923: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3924: */ 3925: static xmlChar * 3926: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3927: xmlChar limit = 0; 3928: xmlChar *buf = NULL; 3929: xmlChar *rep = NULL; 3930: size_t len = 0; 3931: size_t buf_size = 0; 3932: int c, l, in_space = 0; 3933: xmlChar *current = NULL; 3934: xmlEntityPtr ent; 3935: 3936: if (NXT(0) == '"') { 3937: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3938: limit = '"'; 3939: NEXT; 3940: } else if (NXT(0) == '\'') { 3941: limit = '\''; 3942: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3943: NEXT; 3944: } else { 3945: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3946: return(NULL); 3947: } 3948: 3949: /* 3950: * allocate a translation buffer. 3951: */ 3952: buf_size = XML_PARSER_BUFFER_SIZE; 3953: buf = (xmlChar *) xmlMallocAtomic(buf_size); 3954: if (buf == NULL) goto mem_error; 3955: 3956: /* 3957: * OK loop until we reach one of the ending char or a size limit. 3958: */ 3959: c = CUR_CHAR(l); 3960: while (((NXT(0) != limit) && /* checked */ 3961: (IS_CHAR(c)) && (c != '<')) && 3962: (ctxt->instate != XML_PARSER_EOF)) { 3963: /* 3964: * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3965: * special option is given 3966: */ 3967: if ((len > XML_MAX_TEXT_LENGTH) && 3968: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3969: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3970: "AttValue length too long\n"); 3971: goto mem_error; 3972: } 3973: if (c == 0) break; 3974: if (c == '&') { 3975: in_space = 0; 3976: if (NXT(1) == '#') { 3977: int val = xmlParseCharRef(ctxt); 3978: 3979: if (val == '&') { 3980: if (ctxt->replaceEntities) { 3981: if (len + 10 > buf_size) { 3982: growBuffer(buf, 10); 3983: } 3984: buf[len++] = '&'; 3985: } else { 3986: /* 3987: * The reparsing will be done in xmlStringGetNodeList() 3988: * called by the attribute() function in SAX.c 3989: */ 3990: if (len + 10 > buf_size) { 3991: growBuffer(buf, 10); 3992: } 3993: buf[len++] = '&'; 3994: buf[len++] = '#'; 3995: buf[len++] = '3'; 3996: buf[len++] = '8'; 3997: buf[len++] = ';'; 3998: } 3999: } else if (val != 0) { 4000: if (len + 10 > buf_size) { 4001: growBuffer(buf, 10); 4002: } 4003: len += xmlCopyChar(0, &buf[len], val); 4004: } 4005: } else { 4006: ent = xmlParseEntityRef(ctxt); 4007: ctxt->nbentities++; 4008: if (ent != NULL) 4009: ctxt->nbentities += ent->owner; 4010: if ((ent != NULL) && 4011: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4012: if (len + 10 > buf_size) { 4013: growBuffer(buf, 10); 4014: } 4015: if ((ctxt->replaceEntities == 0) && 4016: (ent->content[0] == '&')) { 4017: buf[len++] = '&'; 4018: buf[len++] = '#'; 4019: buf[len++] = '3'; 4020: buf[len++] = '8'; 4021: buf[len++] = ';'; 4022: } else { 4023: buf[len++] = ent->content[0]; 4024: } 4025: } else if ((ent != NULL) && 4026: (ctxt->replaceEntities != 0)) { 4027: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4028: rep = xmlStringDecodeEntities(ctxt, ent->content, 4029: XML_SUBSTITUTE_REF, 4030: 0, 0, 0); 4031: if (rep != NULL) { 4032: current = rep; 4033: while (*current != 0) { /* non input consuming */ 4034: if ((*current == 0xD) || (*current == 0xA) || 4035: (*current == 0x9)) { 4036: buf[len++] = 0x20; 4037: current++; 4038: } else 4039: buf[len++] = *current++; 4040: if (len + 10 > buf_size) { 4041: growBuffer(buf, 10); 4042: } 4043: } 4044: xmlFree(rep); 4045: rep = NULL; 4046: } 4047: } else { 4048: if (len + 10 > buf_size) { 4049: growBuffer(buf, 10); 4050: } 4051: if (ent->content != NULL) 4052: buf[len++] = ent->content[0]; 4053: } 4054: } else if (ent != NULL) { 4055: int i = xmlStrlen(ent->name); 4056: const xmlChar *cur = ent->name; 4057: 4058: /* 4059: * This may look absurd but is needed to detect 4060: * entities problems 4061: */ 4062: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4063: (ent->content != NULL) && (ent->checked == 0)) { 4064: unsigned long oldnbent = ctxt->nbentities; 4065: 4066: rep = xmlStringDecodeEntities(ctxt, ent->content, 4067: XML_SUBSTITUTE_REF, 0, 0, 0); 4068: 4069: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4070: if (rep != NULL) { 4071: if (xmlStrchr(rep, '<')) 4072: ent->checked |= 1; 4073: xmlFree(rep); 4074: rep = NULL; 4075: } 4076: } 4077: 4078: /* 4079: * Just output the reference 4080: */ 4081: buf[len++] = '&'; 4082: while (len + i + 10 > buf_size) { 4083: growBuffer(buf, i + 10); 4084: } 4085: for (;i > 0;i--) 4086: buf[len++] = *cur++; 4087: buf[len++] = ';'; 4088: } 4089: } 4090: } else { 4091: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4092: if ((len != 0) || (!normalize)) { 4093: if ((!normalize) || (!in_space)) { 4094: COPY_BUF(l,buf,len,0x20); 4095: while (len + 10 > buf_size) { 4096: growBuffer(buf, 10); 4097: } 4098: } 4099: in_space = 1; 4100: } 4101: } else { 4102: in_space = 0; 4103: COPY_BUF(l,buf,len,c); 4104: if (len + 10 > buf_size) { 4105: growBuffer(buf, 10); 4106: } 4107: } 4108: NEXTL(l); 4109: } 4110: GROW; 4111: c = CUR_CHAR(l); 4112: } 4113: if (ctxt->instate == XML_PARSER_EOF) 4114: goto error; 4115: 4116: if ((in_space) && (normalize)) { 4117: while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4118: } 4119: buf[len] = 0; 4120: if (RAW == '<') { 4121: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4122: } else if (RAW != limit) { 4123: if ((c != 0) && (!IS_CHAR(c))) { 4124: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4125: "invalid character in attribute value\n"); 4126: } else { 4127: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4128: "AttValue: ' expected\n"); 4129: } 4130: } else 4131: NEXT; 4132: 4133: /* 4134: * There we potentially risk an overflow, don't allow attribute value of 4135: * length more than INT_MAX it is a very reasonnable assumption ! 4136: */ 4137: if (len >= INT_MAX) { 4138: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4139: "AttValue length too long\n"); 4140: goto mem_error; 4141: } 4142: 4143: if (attlen != NULL) *attlen = (int) len; 4144: return(buf); 4145: 4146: mem_error: 4147: xmlErrMemory(ctxt, NULL); 4148: error: 4149: if (buf != NULL) 4150: xmlFree(buf); 4151: if (rep != NULL) 4152: xmlFree(rep); 4153: return(NULL); 4154: } 4155: 4156: /** 4157: * xmlParseAttValue: 4158: * @ctxt: an XML parser context 4159: * 4160: * parse a value for an attribute 4161: * Note: the parser won't do substitution of entities here, this 4162: * will be handled later in xmlStringGetNodeList 4163: * 4164: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4165: * "'" ([^<&'] | Reference)* "'" 4166: * 4167: * 3.3.3 Attribute-Value Normalization: 4168: * Before the value of an attribute is passed to the application or 4169: * checked for validity, the XML processor must normalize it as follows: 4170: * - a character reference is processed by appending the referenced 4171: * character to the attribute value 4172: * - an entity reference is processed by recursively processing the 4173: * replacement text of the entity 4174: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4175: * appending #x20 to the normalized value, except that only a single 4176: * #x20 is appended for a "#xD#xA" sequence that is part of an external 4177: * parsed entity or the literal entity value of an internal parsed entity 4178: * - other characters are processed by appending them to the normalized value 4179: * If the declared value is not CDATA, then the XML processor must further 4180: * process the normalized attribute value by discarding any leading and 4181: * trailing space (#x20) characters, and by replacing sequences of space 4182: * (#x20) characters by a single space (#x20) character. 4183: * All attributes for which no declaration has been read should be treated 4184: * by a non-validating parser as if declared CDATA. 4185: * 4186: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4187: */ 4188: 4189: 4190: xmlChar * 4191: xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4192: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4193: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4194: } 4195: 4196: /** 4197: * xmlParseSystemLiteral: 4198: * @ctxt: an XML parser context 4199: * 4200: * parse an XML Literal 4201: * 4202: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4203: * 4204: * Returns the SystemLiteral parsed or NULL 4205: */ 4206: 4207: xmlChar * 4208: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4209: xmlChar *buf = NULL; 4210: int len = 0; 4211: int size = XML_PARSER_BUFFER_SIZE; 4212: int cur, l; 4213: xmlChar stop; 4214: int state = ctxt->instate; 4215: int count = 0; 4216: 4217: SHRINK; 4218: if (RAW == '"') { 4219: NEXT; 4220: stop = '"'; 4221: } else if (RAW == '\'') { 4222: NEXT; 4223: stop = '\''; 4224: } else { 4225: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4226: return(NULL); 4227: } 4228: 4229: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4230: if (buf == NULL) { 4231: xmlErrMemory(ctxt, NULL); 4232: return(NULL); 4233: } 4234: ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4235: cur = CUR_CHAR(l); 4236: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4237: if (len + 5 >= size) { 4238: xmlChar *tmp; 4239: 4240: if ((size > XML_MAX_NAME_LENGTH) && 4241: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4242: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4243: xmlFree(buf); 4244: ctxt->instate = (xmlParserInputState) state; 4245: return(NULL); 4246: } 4247: size *= 2; 4248: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4249: if (tmp == NULL) { 4250: xmlFree(buf); 4251: xmlErrMemory(ctxt, NULL); 4252: ctxt->instate = (xmlParserInputState) state; 4253: return(NULL); 4254: } 4255: buf = tmp; 4256: } 4257: count++; 4258: if (count > 50) { 4259: GROW; 4260: count = 0; 4261: if (ctxt->instate == XML_PARSER_EOF) { 4262: xmlFree(buf); 4263: return(NULL); 4264: } 4265: } 4266: COPY_BUF(l,buf,len,cur); 4267: NEXTL(l); 4268: cur = CUR_CHAR(l); 4269: if (cur == 0) { 4270: GROW; 4271: SHRINK; 4272: cur = CUR_CHAR(l); 4273: } 4274: } 4275: buf[len] = 0; 4276: ctxt->instate = (xmlParserInputState) state; 4277: if (!IS_CHAR(cur)) { 4278: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4279: } else { 4280: NEXT; 4281: } 4282: return(buf); 4283: } 4284: 4285: /** 4286: * xmlParsePubidLiteral: 4287: * @ctxt: an XML parser context 4288: * 4289: * parse an XML public literal 4290: * 4291: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4292: * 4293: * Returns the PubidLiteral parsed or NULL. 4294: */ 4295: 4296: xmlChar * 4297: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4298: xmlChar *buf = NULL; 4299: int len = 0; 4300: int size = XML_PARSER_BUFFER_SIZE; 4301: xmlChar cur; 4302: xmlChar stop; 4303: int count = 0; 4304: xmlParserInputState oldstate = ctxt->instate; 4305: 4306: SHRINK; 4307: if (RAW == '"') { 4308: NEXT; 4309: stop = '"'; 4310: } else if (RAW == '\'') { 4311: NEXT; 4312: stop = '\''; 4313: } else { 4314: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4315: return(NULL); 4316: } 4317: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4318: if (buf == NULL) { 4319: xmlErrMemory(ctxt, NULL); 4320: return(NULL); 4321: } 4322: ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4323: cur = CUR; 4324: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4325: if (len + 1 >= size) { 4326: xmlChar *tmp; 4327: 4328: if ((size > XML_MAX_NAME_LENGTH) && 4329: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4330: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4331: xmlFree(buf); 4332: return(NULL); 4333: } 4334: size *= 2; 4335: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4336: if (tmp == NULL) { 4337: xmlErrMemory(ctxt, NULL); 4338: xmlFree(buf); 4339: return(NULL); 4340: } 4341: buf = tmp; 4342: } 4343: buf[len++] = cur; 4344: count++; 4345: if (count > 50) { 4346: GROW; 4347: count = 0; 4348: if (ctxt->instate == XML_PARSER_EOF) { 4349: xmlFree(buf); 4350: return(NULL); 4351: } 4352: } 4353: NEXT; 4354: cur = CUR; 4355: if (cur == 0) { 4356: GROW; 4357: SHRINK; 4358: cur = CUR; 4359: } 4360: } 4361: buf[len] = 0; 4362: if (cur != stop) { 4363: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4364: } else { 4365: NEXT; 4366: } 4367: ctxt->instate = oldstate; 4368: return(buf); 4369: } 4370: 4371: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4372: 4373: /* 4374: * used for the test in the inner loop of the char data testing 4375: */ 4376: static const unsigned char test_char_data[256] = { 4377: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4378: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4379: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4380: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4381: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4382: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4383: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4384: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4385: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4386: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4387: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4388: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4389: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4390: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4391: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4392: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4393: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4394: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4395: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4396: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4397: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4398: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4399: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4400: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4401: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4402: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4403: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4404: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4405: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4406: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4407: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4408: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4409: }; 4410: 4411: /** 4412: * xmlParseCharData: 4413: * @ctxt: an XML parser context 4414: * @cdata: int indicating whether we are within a CDATA section 4415: * 4416: * parse a CharData section. 4417: * if we are within a CDATA section ']]>' marks an end of section. 4418: * 4419: * The right angle bracket (>) may be represented using the string ">", 4420: * and must, for compatibility, be escaped using ">" or a character 4421: * reference when it appears in the string "]]>" in content, when that 4422: * string is not marking the end of a CDATA section. 4423: * 4424: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4425: */ 4426: 4427: void 4428: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4429: const xmlChar *in; 4430: int nbchar = 0; 4431: int line = ctxt->input->line; 4432: int col = ctxt->input->col; 4433: int ccol; 4434: 4435: SHRINK; 4436: GROW; 4437: /* 4438: * Accelerated common case where input don't need to be 4439: * modified before passing it to the handler. 4440: */ 4441: if (!cdata) { 4442: in = ctxt->input->cur; 4443: do { 4444: get_more_space: 4445: while (*in == 0x20) { in++; ctxt->input->col++; } 4446: if (*in == 0xA) { 4447: do { 4448: ctxt->input->line++; ctxt->input->col = 1; 4449: in++; 4450: } while (*in == 0xA); 4451: goto get_more_space; 4452: } 4453: if (*in == '<') { 4454: nbchar = in - ctxt->input->cur; 4455: if (nbchar > 0) { 4456: const xmlChar *tmp = ctxt->input->cur; 4457: ctxt->input->cur = in; 4458: 4459: if ((ctxt->sax != NULL) && 4460: (ctxt->sax->ignorableWhitespace != 4461: ctxt->sax->characters)) { 4462: if (areBlanks(ctxt, tmp, nbchar, 1)) { 4463: if (ctxt->sax->ignorableWhitespace != NULL) 4464: ctxt->sax->ignorableWhitespace(ctxt->userData, 4465: tmp, nbchar); 4466: } else { 4467: if (ctxt->sax->characters != NULL) 4468: ctxt->sax->characters(ctxt->userData, 4469: tmp, nbchar); 4470: if (*ctxt->space == -1) 4471: *ctxt->space = -2; 4472: } 4473: } else if ((ctxt->sax != NULL) && 4474: (ctxt->sax->characters != NULL)) { 4475: ctxt->sax->characters(ctxt->userData, 4476: tmp, nbchar); 4477: } 4478: } 4479: return; 4480: } 4481: 4482: get_more: 4483: ccol = ctxt->input->col; 4484: while (test_char_data[*in]) { 4485: in++; 4486: ccol++; 4487: } 4488: ctxt->input->col = ccol; 4489: if (*in == 0xA) { 4490: do { 4491: ctxt->input->line++; ctxt->input->col = 1; 4492: in++; 4493: } while (*in == 0xA); 4494: goto get_more; 4495: } 4496: if (*in == ']') { 4497: if ((in[1] == ']') && (in[2] == '>')) { 4498: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4499: ctxt->input->cur = in; 4500: return; 4501: } 4502: in++; 4503: ctxt->input->col++; 4504: goto get_more; 4505: } 4506: nbchar = in - ctxt->input->cur; 4507: if (nbchar > 0) { 4508: if ((ctxt->sax != NULL) && 4509: (ctxt->sax->ignorableWhitespace != 4510: ctxt->sax->characters) && 4511: (IS_BLANK_CH(*ctxt->input->cur))) { 4512: const xmlChar *tmp = ctxt->input->cur; 4513: ctxt->input->cur = in; 4514: 4515: if (areBlanks(ctxt, tmp, nbchar, 0)) { 4516: if (ctxt->sax->ignorableWhitespace != NULL) 4517: ctxt->sax->ignorableWhitespace(ctxt->userData, 4518: tmp, nbchar); 4519: } else { 4520: if (ctxt->sax->characters != NULL) 4521: ctxt->sax->characters(ctxt->userData, 4522: tmp, nbchar); 4523: if (*ctxt->space == -1) 4524: *ctxt->space = -2; 4525: } 4526: line = ctxt->input->line; 4527: col = ctxt->input->col; 4528: } else if (ctxt->sax != NULL) { 4529: if (ctxt->sax->characters != NULL) 4530: ctxt->sax->characters(ctxt->userData, 4531: ctxt->input->cur, nbchar); 4532: line = ctxt->input->line; 4533: col = ctxt->input->col; 4534: } 4535: /* something really bad happened in the SAX callback */ 4536: if (ctxt->instate != XML_PARSER_CONTENT) 4537: return; 4538: } 4539: ctxt->input->cur = in; 4540: if (*in == 0xD) { 4541: in++; 4542: if (*in == 0xA) { 4543: ctxt->input->cur = in; 4544: in++; 4545: ctxt->input->line++; ctxt->input->col = 1; 4546: continue; /* while */ 4547: } 4548: in--; 4549: } 4550: if (*in == '<') { 4551: return; 4552: } 4553: if (*in == '&') { 4554: return; 4555: } 4556: SHRINK; 4557: GROW; 4558: if (ctxt->instate == XML_PARSER_EOF) 4559: return; 4560: in = ctxt->input->cur; 4561: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4562: nbchar = 0; 4563: } 4564: ctxt->input->line = line; 4565: ctxt->input->col = col; 4566: xmlParseCharDataComplex(ctxt, cdata); 4567: } 4568: 4569: /** 4570: * xmlParseCharDataComplex: 4571: * @ctxt: an XML parser context 4572: * @cdata: int indicating whether we are within a CDATA section 4573: * 4574: * parse a CharData section.this is the fallback function 4575: * of xmlParseCharData() when the parsing requires handling 4576: * of non-ASCII characters. 4577: */ 4578: static void 4579: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4580: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4581: int nbchar = 0; 4582: int cur, l; 4583: int count = 0; 4584: 4585: SHRINK; 4586: GROW; 4587: cur = CUR_CHAR(l); 4588: while ((cur != '<') && /* checked */ 4589: (cur != '&') && 4590: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4591: if ((cur == ']') && (NXT(1) == ']') && 4592: (NXT(2) == '>')) { 4593: if (cdata) break; 4594: else { 4595: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4596: } 4597: } 4598: COPY_BUF(l,buf,nbchar,cur); 4599: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4600: buf[nbchar] = 0; 4601: 4602: /* 4603: * OK the segment is to be consumed as chars. 4604: */ 4605: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4606: if (areBlanks(ctxt, buf, nbchar, 0)) { 4607: if (ctxt->sax->ignorableWhitespace != NULL) 4608: ctxt->sax->ignorableWhitespace(ctxt->userData, 4609: buf, nbchar); 4610: } else { 4611: if (ctxt->sax->characters != NULL) 4612: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4613: if ((ctxt->sax->characters != 4614: ctxt->sax->ignorableWhitespace) && 4615: (*ctxt->space == -1)) 4616: *ctxt->space = -2; 4617: } 4618: } 4619: nbchar = 0; 4620: /* something really bad happened in the SAX callback */ 4621: if (ctxt->instate != XML_PARSER_CONTENT) 4622: return; 4623: } 4624: count++; 4625: if (count > 50) { 4626: GROW; 4627: count = 0; 4628: if (ctxt->instate == XML_PARSER_EOF) 4629: return; 4630: } 4631: NEXTL(l); 4632: cur = CUR_CHAR(l); 4633: } 4634: if (nbchar != 0) { 4635: buf[nbchar] = 0; 4636: /* 4637: * OK the segment is to be consumed as chars. 4638: */ 4639: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4640: if (areBlanks(ctxt, buf, nbchar, 0)) { 4641: if (ctxt->sax->ignorableWhitespace != NULL) 4642: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4643: } else { 4644: if (ctxt->sax->characters != NULL) 4645: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4646: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4647: (*ctxt->space == -1)) 4648: *ctxt->space = -2; 4649: } 4650: } 4651: } 4652: if ((cur != 0) && (!IS_CHAR(cur))) { 4653: /* Generate the error and skip the offending character */ 4654: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4655: "PCDATA invalid Char value %d\n", 4656: cur); 4657: NEXTL(l); 4658: } 4659: } 4660: 4661: /** 4662: * xmlParseExternalID: 4663: * @ctxt: an XML parser context 4664: * @publicID: a xmlChar** receiving PubidLiteral 4665: * @strict: indicate whether we should restrict parsing to only 4666: * production [75], see NOTE below 4667: * 4668: * Parse an External ID or a Public ID 4669: * 4670: * NOTE: Productions [75] and [83] interact badly since [75] can generate 4671: * 'PUBLIC' S PubidLiteral S SystemLiteral 4672: * 4673: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4674: * | 'PUBLIC' S PubidLiteral S SystemLiteral 4675: * 4676: * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4677: * 4678: * Returns the function returns SystemLiteral and in the second 4679: * case publicID receives PubidLiteral, is strict is off 4680: * it is possible to return NULL and have publicID set. 4681: */ 4682: 4683: xmlChar * 4684: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4685: xmlChar *URI = NULL; 4686: 4687: SHRINK; 4688: 4689: *publicID = NULL; 4690: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4691: SKIP(6); 4692: if (!IS_BLANK_CH(CUR)) { 4693: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4694: "Space required after 'SYSTEM'\n"); 4695: } 4696: SKIP_BLANKS; 4697: URI = xmlParseSystemLiteral(ctxt); 4698: if (URI == NULL) { 4699: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4700: } 4701: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4702: SKIP(6); 4703: if (!IS_BLANK_CH(CUR)) { 4704: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4705: "Space required after 'PUBLIC'\n"); 4706: } 4707: SKIP_BLANKS; 4708: *publicID = xmlParsePubidLiteral(ctxt); 4709: if (*publicID == NULL) { 4710: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4711: } 4712: if (strict) { 4713: /* 4714: * We don't handle [83] so "S SystemLiteral" is required. 4715: */ 4716: if (!IS_BLANK_CH(CUR)) { 4717: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4718: "Space required after the Public Identifier\n"); 4719: } 4720: } else { 4721: /* 4722: * We handle [83] so we return immediately, if 4723: * "S SystemLiteral" is not detected. From a purely parsing 4724: * point of view that's a nice mess. 4725: */ 4726: const xmlChar *ptr; 4727: GROW; 4728: 4729: ptr = CUR_PTR; 4730: if (!IS_BLANK_CH(*ptr)) return(NULL); 4731: 4732: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4733: if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4734: } 4735: SKIP_BLANKS; 4736: URI = xmlParseSystemLiteral(ctxt); 4737: if (URI == NULL) { 4738: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4739: } 4740: } 4741: return(URI); 4742: } 4743: 4744: /** 4745: * xmlParseCommentComplex: 4746: * @ctxt: an XML parser context 4747: * @buf: the already parsed part of the buffer 4748: * @len: number of bytes filles in the buffer 4749: * @size: allocated size of the buffer 4750: * 4751: * Skip an XML (SGML) comment  4752: * The spec says that "For compatibility, the string "--" (double-hyphen) 4753: * must not occur within comments. " 4754: * This is the slow routine in case the accelerator for ascii didn't work 4755: * 4756: * [15] Comment ::= '' 4757: */ 4758: static void 4759: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4760: size_t len, size_t size) { 4761: int q, ql; 4762: int r, rl; 4763: int cur, l; 4764: size_t count = 0; 4765: int inputid; 4766: 4767: inputid = ctxt->input->id; 4768: 4769: if (buf == NULL) { 4770: len = 0; 4771: size = XML_PARSER_BUFFER_SIZE; 4772: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4773: if (buf == NULL) { 4774: xmlErrMemory(ctxt, NULL); 4775: return; 4776: } 4777: } 4778: GROW; /* Assure there's enough input data */ 4779: q = CUR_CHAR(ql); 4780: if (q == 0) 4781: goto not_terminated; 4782: if (!IS_CHAR(q)) { 4783: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4784: "xmlParseComment: invalid xmlChar value %d\n", 4785: q); 4786: xmlFree (buf); 4787: return; 4788: } 4789: NEXTL(ql); 4790: r = CUR_CHAR(rl); 4791: if (r == 0) 4792: goto not_terminated; 4793: if (!IS_CHAR(r)) { 4794: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4795: "xmlParseComment: invalid xmlChar value %d\n", 4796: q); 4797: xmlFree (buf); 4798: return; 4799: } 4800: NEXTL(rl); 4801: cur = CUR_CHAR(l); 4802: if (cur == 0) 4803: goto not_terminated; 4804: while (IS_CHAR(cur) && /* checked */ 4805: ((cur != '>') || 4806: (r != '-') || (q != '-'))) { 4807: if ((r == '-') && (q == '-')) { 4808: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4809: } 4810: if ((len > XML_MAX_TEXT_LENGTH) && 4811: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4812: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4813: "Comment too big found", NULL); 4814: xmlFree (buf); 4815: return; 4816: } 4817: if (len + 5 >= size) { 4818: xmlChar *new_buf; 4819: size_t new_size; 4820: 4821: new_size = size * 2; 4822: new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4823: if (new_buf == NULL) { 4824: xmlFree (buf); 4825: xmlErrMemory(ctxt, NULL); 4826: return; 4827: } 4828: buf = new_buf; 4829: size = new_size; 4830: } 4831: COPY_BUF(ql,buf,len,q); 4832: q = r; 4833: ql = rl; 4834: r = cur; 4835: rl = l; 4836: 4837: count++; 4838: if (count > 50) { 4839: GROW; 4840: count = 0; 4841: if (ctxt->instate == XML_PARSER_EOF) { 4842: xmlFree(buf); 4843: return; 4844: } 4845: } 4846: NEXTL(l); 4847: cur = CUR_CHAR(l); 4848: if (cur == 0) { 4849: SHRINK; 4850: GROW; 4851: cur = CUR_CHAR(l); 4852: } 4853: } 4854: buf[len] = 0; 4855: if (cur == 0) { 4856: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4857: "Comment not terminated \n 4886: * The spec says that "For compatibility, the string "--" (double-hyphen) 4887: * must not occur within comments. " 4888: * 4889: * [15] Comment ::= '' 4890: */ 4891: void 4892: xmlParseComment(xmlParserCtxtPtr ctxt) { 4893: xmlChar *buf = NULL; 4894: size_t size = XML_PARSER_BUFFER_SIZE; 4895: size_t len = 0; 4896: xmlParserInputState state; 4897: const xmlChar *in; 4898: size_t nbchar = 0; 4899: int ccol; 4900: int inputid; 4901: 4902: /* 4903: * Check that there is a comment right here. 4904: */ 4905: if ((RAW != '<') || (NXT(1) != '!') || 4906: (NXT(2) != '-') || (NXT(3) != '-')) return; 4907: state = ctxt->instate; 4908: ctxt->instate = XML_PARSER_COMMENT; 4909: inputid = ctxt->input->id; 4910: SKIP(4); 4911: SHRINK; 4912: GROW; 4913: 4914: /* 4915: * Accelerated common case where input don't need to be 4916: * modified before passing it to the handler. 4917: */ 4918: in = ctxt->input->cur; 4919: do { 4920: if (*in == 0xA) { 4921: do { 4922: ctxt->input->line++; ctxt->input->col = 1; 4923: in++; 4924: } while (*in == 0xA); 4925: } 4926: get_more: 4927: ccol = ctxt->input->col; 4928: while (((*in > '-') && (*in <= 0x7F)) || 4929: ((*in >= 0x20) && (*in < '-')) || 4930: (*in == 0x09)) { 4931: in++; 4932: ccol++; 4933: } 4934: ctxt->input->col = ccol; 4935: if (*in == 0xA) { 4936: do { 4937: ctxt->input->line++; ctxt->input->col = 1; 4938: in++; 4939: } while (*in == 0xA); 4940: goto get_more; 4941: } 4942: nbchar = in - ctxt->input->cur; 4943: /* 4944: * save current set of data 4945: */ 4946: if (nbchar > 0) { 4947: if ((ctxt->sax != NULL) && 4948: (ctxt->sax->comment != NULL)) { 4949: if (buf == NULL) { 4950: if ((*in == '-') && (in[1] == '-')) 4951: size = nbchar + 1; 4952: else 4953: size = XML_PARSER_BUFFER_SIZE + nbchar; 4954: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4955: if (buf == NULL) { 4956: xmlErrMemory(ctxt, NULL); 4957: ctxt->instate = state; 4958: return; 4959: } 4960: len = 0; 4961: } else if (len + nbchar + 1 >= size) { 4962: xmlChar *new_buf; 4963: size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4964: new_buf = (xmlChar *) xmlRealloc(buf, 4965: size * sizeof(xmlChar)); 4966: if (new_buf == NULL) { 4967: xmlFree (buf); 4968: xmlErrMemory(ctxt, NULL); 4969: ctxt->instate = state; 4970: return; 4971: } 4972: buf = new_buf; 4973: } 4974: memcpy(&buf[len], ctxt->input->cur, nbchar); 4975: len += nbchar; 4976: buf[len] = 0; 4977: } 4978: } 4979: if ((len > XML_MAX_TEXT_LENGTH) && 4980: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4981: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4982: "Comment too big found", NULL); 4983: xmlFree (buf); 4984: return; 4985: } 4986: ctxt->input->cur = in; 4987: if (*in == 0xA) { 4988: in++; 4989: ctxt->input->line++; ctxt->input->col = 1; 4990: } 4991: if (*in == 0xD) { 4992: in++; 4993: if (*in == 0xA) { 4994: ctxt->input->cur = in; 4995: in++; 4996: ctxt->input->line++; ctxt->input->col = 1; 4997: continue; /* while */ 4998: } 4999: in--; 5000: } 5001: SHRINK; 5002: GROW; 5003: if (ctxt->instate == XML_PARSER_EOF) { 5004: xmlFree(buf); 5005: return; 5006: } 5007: in = ctxt->input->cur; 5008: if (*in == '-') { 5009: if (in[1] == '-') { 5010: if (in[2] == '>') { 5011: if (ctxt->input->id != inputid) { 5012: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5013: "comment doesn't start and stop in the same entity\n"); 5014: } 5015: SKIP(3); 5016: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5017: (!ctxt->disableSAX)) { 5018: if (buf != NULL) 5019: ctxt->sax->comment(ctxt->userData, buf); 5020: else 5021: ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5022: } 5023: if (buf != NULL) 5024: xmlFree(buf); 5025: if (ctxt->instate != XML_PARSER_EOF) 5026: ctxt->instate = state; 5027: return; 5028: } 5029: if (buf != NULL) { 5030: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5031: "Double hyphen within comment: " 5032: "<!--%.50s\n", 5033: buf); 5034: } else 5035: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5036: "Double hyphen within comment\n", NULL); 5037: in++; 5038: ctxt->input->col++; 5039: } 5040: in++; 5041: ctxt->input->col++; 5042: goto get_more; 5043: } 5044: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5045: xmlParseCommentComplex(ctxt, buf, len, size); 5046: ctxt->instate = state; 5047: return; 5048: } 5049: 5050: 5051: /** 5052: * xmlParsePITarget: 5053: * @ctxt: an XML parser context 5054: * 5055: * parse the name of a PI 5056: * 5057: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5058: * 5059: * Returns the PITarget name or NULL 5060: */ 5061: 5062: const xmlChar * 5063: xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5064: const xmlChar *name; 5065: 5066: name = xmlParseName(ctxt); 5067: if ((name != NULL) && 5068: ((name[0] == 'x') || (name[0] == 'X')) && 5069: ((name[1] == 'm') || (name[1] == 'M')) && 5070: ((name[2] == 'l') || (name[2] == 'L'))) { 5071: int i; 5072: if ((name[0] == 'x') && (name[1] == 'm') && 5073: (name[2] == 'l') && (name[3] == 0)) { 5074: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5075: "XML declaration allowed only at the start of the document\n"); 5076: return(name); 5077: } else if (name[3] == 0) { 5078: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5079: return(name); 5080: } 5081: for (i = 0;;i++) { 5082: if (xmlW3CPIs[i] == NULL) break; 5083: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5084: return(name); 5085: } 5086: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5087: "xmlParsePITarget: invalid name prefix 'xml'\n", 5088: NULL, NULL); 5089: } 5090: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5091: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5092: "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 5093: } 5094: return(name); 5095: } 5096: 5097: #ifdef LIBXML_CATALOG_ENABLED 5098: /** 5099: * xmlParseCatalogPI: 5100: * @ctxt: an XML parser context 5101: * @catalog: the PI value string 5102: * 5103: * parse an XML Catalog Processing Instruction. 5104: * 5105: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5106: * 5107: * Occurs only if allowed by the user and if happening in the Misc 5108: * part of the document before any doctype informations 5109: * This will add the given catalog to the parsing context in order 5110: * to be used if there is a resolution need further down in the document 5111: */ 5112: 5113: static void 5114: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5115: xmlChar *URL = NULL; 5116: const xmlChar *tmp, *base; 5117: xmlChar marker; 5118: 5119: tmp = catalog; 5120: while (IS_BLANK_CH(*tmp)) tmp++; 5121: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5122: goto error; 5123: tmp += 7; 5124: while (IS_BLANK_CH(*tmp)) tmp++; 5125: if (*tmp != '=') { 5126: return; 5127: } 5128: tmp++; 5129: while (IS_BLANK_CH(*tmp)) tmp++; 5130: marker = *tmp; 5131: if ((marker != '\'') && (marker != '"')) 5132: goto error; 5133: tmp++; 5134: base = tmp; 5135: while ((*tmp != 0) && (*tmp != marker)) tmp++; 5136: if (*tmp == 0) 5137: goto error; 5138: URL = xmlStrndup(base, tmp - base); 5139: tmp++; 5140: while (IS_BLANK_CH(*tmp)) tmp++; 5141: if (*tmp != 0) 5142: goto error; 5143: 5144: if (URL != NULL) { 5145: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5146: xmlFree(URL); 5147: } 5148: return; 5149: 5150: error: 5151: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5152: "Catalog PI syntax error: %s\n", 5153: catalog, NULL); 5154: if (URL != NULL) 5155: xmlFree(URL); 5156: } 5157: #endif 5158: 5159: /** 5160: * xmlParsePI: 5161: * @ctxt: an XML parser context 5162: * 5163: * parse an XML Processing Instruction. 5164: * 5165: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5166: * 5167: * The processing is transfered to SAX once parsed. 5168: */ 5169: 5170: void 5171: xmlParsePI(xmlParserCtxtPtr ctxt) { 5172: xmlChar *buf = NULL; 5173: size_t len = 0; 5174: size_t size = XML_PARSER_BUFFER_SIZE; 5175: int cur, l; 5176: const xmlChar *target; 5177: xmlParserInputState state; 5178: int count = 0; 5179: 5180: if ((RAW == '<') && (NXT(1) == '?')) { 5181: xmlParserInputPtr input = ctxt->input; 5182: state = ctxt->instate; 5183: ctxt->instate = XML_PARSER_PI; 5184: /* 5185: * this is a Processing Instruction. 5186: */ 5187: SKIP(2); 5188: SHRINK; 5189: 5190: /* 5191: * Parse the target name and check for special support like 5192: * namespace. 5193: */ 5194: target = xmlParsePITarget(ctxt); 5195: if (target != NULL) { 5196: if ((RAW == '?') && (NXT(1) == '>')) { 5197: if (input != ctxt->input) { 5198: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5199: "PI declaration doesn't start and stop in the same entity\n"); 5200: } 5201: SKIP(2); 5202: 5203: /* 5204: * SAX: PI detected. 5205: */ 5206: if ((ctxt->sax) && (!ctxt->disableSAX) && 5207: (ctxt->sax->processingInstruction != NULL)) 5208: ctxt->sax->processingInstruction(ctxt->userData, 5209: target, NULL); 5210: if (ctxt->instate != XML_PARSER_EOF) 5211: ctxt->instate = state; 5212: return; 5213: } 5214: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5215: if (buf == NULL) { 5216: xmlErrMemory(ctxt, NULL); 5217: ctxt->instate = state; 5218: return; 5219: } 5220: cur = CUR; 5221: if (!IS_BLANK(cur)) { 5222: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5223: "ParsePI: PI %s space expected\n", target); 5224: } 5225: SKIP_BLANKS; 5226: cur = CUR_CHAR(l); 5227: while (IS_CHAR(cur) && /* checked */ 5228: ((cur != '?') || (NXT(1) != '>'))) { 5229: if (len + 5 >= size) { 5230: xmlChar *tmp; 5231: size_t new_size = size * 2; 5232: tmp = (xmlChar *) xmlRealloc(buf, new_size); 5233: if (tmp == NULL) { 5234: xmlErrMemory(ctxt, NULL); 5235: xmlFree(buf); 5236: ctxt->instate = state; 5237: return; 5238: } 5239: buf = tmp; 5240: size = new_size; 5241: } 5242: count++; 5243: if (count > 50) { 5244: GROW; 5245: if (ctxt->instate == XML_PARSER_EOF) { 5246: xmlFree(buf); 5247: return; 5248: } 5249: count = 0; 5250: if ((len > XML_MAX_TEXT_LENGTH) && 5251: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5252: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5253: "PI %s too big found", target); 5254: xmlFree(buf); 5255: ctxt->instate = state; 5256: return; 5257: } 5258: } 5259: COPY_BUF(l,buf,len,cur); 5260: NEXTL(l); 5261: cur = CUR_CHAR(l); 5262: if (cur == 0) { 5263: SHRINK; 5264: GROW; 5265: cur = CUR_CHAR(l); 5266: } 5267: } 5268: if ((len > XML_MAX_TEXT_LENGTH) && 5269: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5270: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5271: "PI %s too big found", target); 5272: xmlFree(buf); 5273: ctxt->instate = state; 5274: return; 5275: } 5276: buf[len] = 0; 5277: if (cur != '?') { 5278: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5279: "ParsePI: PI %s never end ...\n", target); 5280: } else { 5281: if (input != ctxt->input) { 5282: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5283: "PI declaration doesn't start and stop in the same entity\n"); 5284: } 5285: SKIP(2); 5286: 5287: #ifdef LIBXML_CATALOG_ENABLED 5288: if (((state == XML_PARSER_MISC) || 5289: (state == XML_PARSER_START)) && 5290: (xmlStrEqual(target, XML_CATALOG_PI))) { 5291: xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5292: if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5293: (allow == XML_CATA_ALLOW_ALL)) 5294: xmlParseCatalogPI(ctxt, buf); 5295: } 5296: #endif 5297: 5298: 5299: /* 5300: * SAX: PI detected. 5301: */ 5302: if ((ctxt->sax) && (!ctxt->disableSAX) && 5303: (ctxt->sax->processingInstruction != NULL)) 5304: ctxt->sax->processingInstruction(ctxt->userData, 5305: target, buf); 5306: } 5307: xmlFree(buf); 5308: } else { 5309: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5310: } 5311: if (ctxt->instate != XML_PARSER_EOF) 5312: ctxt->instate = state; 5313: } 5314: } 5315: 5316: /** 5317: * xmlParseNotationDecl: 5318: * @ctxt: an XML parser context 5319: * 5320: * parse a notation declaration 5321: * 5322: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5323: * 5324: * Hence there is actually 3 choices: 5325: * 'PUBLIC' S PubidLiteral 5326: * 'PUBLIC' S PubidLiteral S SystemLiteral 5327: * and 'SYSTEM' S SystemLiteral 5328: * 5329: * See the NOTE on xmlParseExternalID(). 5330: */ 5331: 5332: void 5333: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5334: const xmlChar *name; 5335: xmlChar *Pubid; 5336: xmlChar *Systemid; 5337: 5338: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5339: xmlParserInputPtr input = ctxt->input; 5340: SHRINK; 5341: SKIP(10); 5342: if (!IS_BLANK_CH(CUR)) { 5343: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5344: "Space required after '<!NOTATION'\n"); 5345: return; 5346: } 5347: SKIP_BLANKS; 5348: 5349: name = xmlParseName(ctxt); 5350: if (name == NULL) { 5351: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5352: return; 5353: } 5354: if (!IS_BLANK_CH(CUR)) { 5355: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5356: "Space required after the NOTATION name'\n"); 5357: return; 5358: } 5359: if (xmlStrchr(name, ':') != NULL) { 5360: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5361: "colon are forbidden from notation names '%s'\n", 5362: name, NULL, NULL); 5363: } 5364: SKIP_BLANKS; 5365: 5366: /* 5367: * Parse the IDs. 5368: */ 5369: Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5370: SKIP_BLANKS; 5371: 5372: if (RAW == '>') { 5373: if (input != ctxt->input) { 5374: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5375: "Notation declaration doesn't start and stop in the same entity\n"); 5376: } 5377: NEXT; 5378: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5379: (ctxt->sax->notationDecl != NULL)) 5380: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5381: } else { 5382: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5383: } 5384: if (Systemid != NULL) xmlFree(Systemid); 5385: if (Pubid != NULL) xmlFree(Pubid); 5386: } 5387: } 5388: 5389: /** 5390: * xmlParseEntityDecl: 5391: * @ctxt: an XML parser context 5392: * 5393: * parse <!ENTITY declarations 5394: * 5395: * [70] EntityDecl ::= GEDecl | PEDecl 5396: * 5397: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5398: * 5399: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5400: * 5401: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5402: * 5403: * [74] PEDef ::= EntityValue | ExternalID 5404: * 5405: * [76] NDataDecl ::= S 'NDATA' S Name 5406: * 5407: * [ VC: Notation Declared ] 5408: * The Name must match the declared name of a notation. 5409: */ 5410: 5411: void 5412: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5413: const xmlChar *name = NULL; 5414: xmlChar *value = NULL; 5415: xmlChar *URI = NULL, *literal = NULL; 5416: const xmlChar *ndata = NULL; 5417: int isParameter = 0; 5418: xmlChar *orig = NULL; 5419: int skipped; 5420: 5421: /* GROW; done in the caller */ 5422: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5423: xmlParserInputPtr input = ctxt->input; 5424: SHRINK; 5425: SKIP(8); 5426: skipped = SKIP_BLANKS; 5427: if (skipped == 0) { 5428: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5429: "Space required after '<!ENTITY'\n"); 5430: } 5431: 5432: if (RAW == '%') { 5433: NEXT; 5434: skipped = SKIP_BLANKS; 5435: if (skipped == 0) { 5436: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5437: "Space required after '%'\n"); 5438: } 5439: isParameter = 1; 5440: } 5441: 5442: name = xmlParseName(ctxt); 5443: if (name == NULL) { 5444: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5445: "xmlParseEntityDecl: no name\n"); 5446: return; 5447: } 5448: if (xmlStrchr(name, ':') != NULL) { 5449: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5450: "colon are forbidden from entities names '%s'\n", 5451: name, NULL, NULL); 5452: } 5453: skipped = SKIP_BLANKS; 5454: if (skipped == 0) { 5455: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5456: "Space required after the entity name\n"); 5457: } 5458: 5459: ctxt->instate = XML_PARSER_ENTITY_DECL; 5460: /* 5461: * handle the various case of definitions... 5462: */ 5463: if (isParameter) { 5464: if ((RAW == '"') || (RAW == '\'')) { 5465: value = xmlParseEntityValue(ctxt, &orig); 5466: if (value) { 5467: if ((ctxt->sax != NULL) && 5468: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5469: ctxt->sax->entityDecl(ctxt->userData, name, 5470: XML_INTERNAL_PARAMETER_ENTITY, 5471: NULL, NULL, value); 5472: } 5473: } else { 5474: URI = xmlParseExternalID(ctxt, &literal, 1); 5475: if ((URI == NULL) && (literal == NULL)) { 5476: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5477: } 5478: if (URI) { 5479: xmlURIPtr uri; 5480: 5481: uri = xmlParseURI((const char *) URI); 5482: if (uri == NULL) { 5483: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5484: "Invalid URI: %s\n", URI); 5485: /* 5486: * This really ought to be a well formedness error 5487: * but the XML Core WG decided otherwise c.f. issue 5488: * E26 of the XML erratas. 5489: */ 5490: } else { 5491: if (uri->fragment != NULL) { 5492: /* 5493: * Okay this is foolish to block those but not 5494: * invalid URIs. 5495: */ 5496: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5497: } else { 5498: if ((ctxt->sax != NULL) && 5499: (!ctxt->disableSAX) && 5500: (ctxt->sax->entityDecl != NULL)) 5501: ctxt->sax->entityDecl(ctxt->userData, name, 5502: XML_EXTERNAL_PARAMETER_ENTITY, 5503: literal, URI, NULL); 5504: } 5505: xmlFreeURI(uri); 5506: } 5507: } 5508: } 5509: } else { 5510: if ((RAW == '"') || (RAW == '\'')) { 5511: value = xmlParseEntityValue(ctxt, &orig); 5512: if ((ctxt->sax != NULL) && 5513: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5514: ctxt->sax->entityDecl(ctxt->userData, name, 5515: XML_INTERNAL_GENERAL_ENTITY, 5516: NULL, NULL, value); 5517: /* 5518: * For expat compatibility in SAX mode. 5519: */ 5520: if ((ctxt->myDoc == NULL) || 5521: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5522: if (ctxt->myDoc == NULL) { 5523: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5524: if (ctxt->myDoc == NULL) { 5525: xmlErrMemory(ctxt, "New Doc failed"); 5526: return; 5527: } 5528: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5529: } 5530: if (ctxt->myDoc->intSubset == NULL) 5531: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5532: BAD_CAST "fake", NULL, NULL); 5533: 5534: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5535: NULL, NULL, value); 5536: } 5537: } else { 5538: URI = xmlParseExternalID(ctxt, &literal, 1); 5539: if ((URI == NULL) && (literal == NULL)) { 5540: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5541: } 5542: if (URI) { 5543: xmlURIPtr uri; 5544: 5545: uri = xmlParseURI((const char *)URI); 5546: if (uri == NULL) { 5547: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5548: "Invalid URI: %s\n", URI); 5549: /* 5550: * This really ought to be a well formedness error 5551: * but the XML Core WG decided otherwise c.f. issue 5552: * E26 of the XML erratas. 5553: */ 5554: } else { 5555: if (uri->fragment != NULL) { 5556: /* 5557: * Okay this is foolish to block those but not 5558: * invalid URIs. 5559: */ 5560: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5561: } 5562: xmlFreeURI(uri); 5563: } 5564: } 5565: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5566: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5567: "Space required before 'NDATA'\n"); 5568: } 5569: SKIP_BLANKS; 5570: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5571: SKIP(5); 5572: if (!IS_BLANK_CH(CUR)) { 5573: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5574: "Space required after 'NDATA'\n"); 5575: } 5576: SKIP_BLANKS; 5577: ndata = xmlParseName(ctxt); 5578: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5579: (ctxt->sax->unparsedEntityDecl != NULL)) 5580: ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5581: literal, URI, ndata); 5582: } else { 5583: if ((ctxt->sax != NULL) && 5584: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5585: ctxt->sax->entityDecl(ctxt->userData, name, 5586: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5587: literal, URI, NULL); 5588: /* 5589: * For expat compatibility in SAX mode. 5590: * assuming the entity repalcement was asked for 5591: */ 5592: if ((ctxt->replaceEntities != 0) && 5593: ((ctxt->myDoc == NULL) || 5594: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5595: if (ctxt->myDoc == NULL) { 5596: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5597: if (ctxt->myDoc == NULL) { 5598: xmlErrMemory(ctxt, "New Doc failed"); 5599: return; 5600: } 5601: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5602: } 5603: 5604: if (ctxt->myDoc->intSubset == NULL) 5605: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5606: BAD_CAST "fake", NULL, NULL); 5607: xmlSAX2EntityDecl(ctxt, name, 5608: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5609: literal, URI, NULL); 5610: } 5611: } 5612: } 5613: } 5614: if (ctxt->instate == XML_PARSER_EOF) 5615: return; 5616: SKIP_BLANKS; 5617: if (RAW != '>') { 5618: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5619: "xmlParseEntityDecl: entity %s not terminated\n", name); 5620: } else { 5621: if (input != ctxt->input) { 5622: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5623: "Entity declaration doesn't start and stop in the same entity\n"); 5624: } 5625: NEXT; 5626: } 5627: if (orig != NULL) { 5628: /* 5629: * Ugly mechanism to save the raw entity value. 5630: */ 5631: xmlEntityPtr cur = NULL; 5632: 5633: if (isParameter) { 5634: if ((ctxt->sax != NULL) && 5635: (ctxt->sax->getParameterEntity != NULL)) 5636: cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5637: } else { 5638: if ((ctxt->sax != NULL) && 5639: (ctxt->sax->getEntity != NULL)) 5640: cur = ctxt->sax->getEntity(ctxt->userData, name); 5641: if ((cur == NULL) && (ctxt->userData==ctxt)) { 5642: cur = xmlSAX2GetEntity(ctxt, name); 5643: } 5644: } 5645: if (cur != NULL) { 5646: if (cur->orig != NULL) 5647: xmlFree(orig); 5648: else 5649: cur->orig = orig; 5650: } else 5651: xmlFree(orig); 5652: } 5653: if (value != NULL) xmlFree(value); 5654: if (URI != NULL) xmlFree(URI); 5655: if (literal != NULL) xmlFree(literal); 5656: } 5657: } 5658: 5659: /** 5660: * xmlParseDefaultDecl: 5661: * @ctxt: an XML parser context 5662: * @value: Receive a possible fixed default value for the attribute 5663: * 5664: * Parse an attribute default declaration 5665: * 5666: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5667: * 5668: * [ VC: Required Attribute ] 5669: * if the default declaration is the keyword #REQUIRED, then the 5670: * attribute must be specified for all elements of the type in the 5671: * attribute-list declaration. 5672: * 5673: * [ VC: Attribute Default Legal ] 5674: * The declared default value must meet the lexical constraints of 5675: * the declared attribute type c.f. xmlValidateAttributeDecl() 5676: * 5677: * [ VC: Fixed Attribute Default ] 5678: * if an attribute has a default value declared with the #FIXED 5679: * keyword, instances of that attribute must match the default value. 5680: * 5681: * [ WFC: No < in Attribute Values ] 5682: * handled in xmlParseAttValue() 5683: * 5684: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5685: * or XML_ATTRIBUTE_FIXED. 5686: */ 5687: 5688: int 5689: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5690: int val; 5691: xmlChar *ret; 5692: 5693: *value = NULL; 5694: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5695: SKIP(9); 5696: return(XML_ATTRIBUTE_REQUIRED); 5697: } 5698: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5699: SKIP(8); 5700: return(XML_ATTRIBUTE_IMPLIED); 5701: } 5702: val = XML_ATTRIBUTE_NONE; 5703: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5704: SKIP(6); 5705: val = XML_ATTRIBUTE_FIXED; 5706: if (!IS_BLANK_CH(CUR)) { 5707: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5708: "Space required after '#FIXED'\n"); 5709: } 5710: SKIP_BLANKS; 5711: } 5712: ret = xmlParseAttValue(ctxt); 5713: ctxt->instate = XML_PARSER_DTD; 5714: if (ret == NULL) { 5715: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5716: "Attribute default value declaration error\n"); 5717: } else 5718: *value = ret; 5719: return(val); 5720: } 5721: 5722: /** 5723: * xmlParseNotationType: 5724: * @ctxt: an XML parser context 5725: * 5726: * parse an Notation attribute type. 5727: * 5728: * Note: the leading 'NOTATION' S part has already being parsed... 5729: * 5730: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5731: * 5732: * [ VC: Notation Attributes ] 5733: * Values of this type must match one of the notation names included 5734: * in the declaration; all notation names in the declaration must be declared. 5735: * 5736: * Returns: the notation attribute tree built while parsing 5737: */ 5738: 5739: xmlEnumerationPtr 5740: xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5741: const xmlChar *name; 5742: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5743: 5744: if (RAW != '(') { 5745: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5746: return(NULL); 5747: } 5748: SHRINK; 5749: do { 5750: NEXT; 5751: SKIP_BLANKS; 5752: name = xmlParseName(ctxt); 5753: if (name == NULL) { 5754: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5755: "Name expected in NOTATION declaration\n"); 5756: xmlFreeEnumeration(ret); 5757: return(NULL); 5758: } 5759: tmp = ret; 5760: while (tmp != NULL) { 5761: if (xmlStrEqual(name, tmp->name)) { 5762: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5763: "standalone: attribute notation value token %s duplicated\n", 5764: name, NULL); 5765: if (!xmlDictOwns(ctxt->dict, name)) 5766: xmlFree((xmlChar *) name); 5767: break; 5768: } 5769: tmp = tmp->next; 5770: } 5771: if (tmp == NULL) { 5772: cur = xmlCreateEnumeration(name); 5773: if (cur == NULL) { 5774: xmlFreeEnumeration(ret); 5775: return(NULL); 5776: } 5777: if (last == NULL) ret = last = cur; 5778: else { 5779: last->next = cur; 5780: last = cur; 5781: } 5782: } 5783: SKIP_BLANKS; 5784: } while (RAW == '|'); 5785: if (RAW != ')') { 5786: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5787: xmlFreeEnumeration(ret); 5788: return(NULL); 5789: } 5790: NEXT; 5791: return(ret); 5792: } 5793: 5794: /** 5795: * xmlParseEnumerationType: 5796: * @ctxt: an XML parser context 5797: * 5798: * parse an Enumeration attribute type. 5799: * 5800: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5801: * 5802: * [ VC: Enumeration ] 5803: * Values of this type must match one of the Nmtoken tokens in 5804: * the declaration 5805: * 5806: * Returns: the enumeration attribute tree built while parsing 5807: */ 5808: 5809: xmlEnumerationPtr 5810: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5811: xmlChar *name; 5812: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5813: 5814: if (RAW != '(') { 5815: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5816: return(NULL); 5817: } 5818: SHRINK; 5819: do { 5820: NEXT; 5821: SKIP_BLANKS; 5822: name = xmlParseNmtoken(ctxt); 5823: if (name == NULL) { 5824: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5825: return(ret); 5826: } 5827: tmp = ret; 5828: while (tmp != NULL) { 5829: if (xmlStrEqual(name, tmp->name)) { 5830: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5831: "standalone: attribute enumeration value token %s duplicated\n", 5832: name, NULL); 5833: if (!xmlDictOwns(ctxt->dict, name)) 5834: xmlFree(name); 5835: break; 5836: } 5837: tmp = tmp->next; 5838: } 5839: if (tmp == NULL) { 5840: cur = xmlCreateEnumeration(name); 5841: if (!xmlDictOwns(ctxt->dict, name)) 5842: xmlFree(name); 5843: if (cur == NULL) { 5844: xmlFreeEnumeration(ret); 5845: return(NULL); 5846: } 5847: if (last == NULL) ret = last = cur; 5848: else { 5849: last->next = cur; 5850: last = cur; 5851: } 5852: } 5853: SKIP_BLANKS; 5854: } while (RAW == '|'); 5855: if (RAW != ')') { 5856: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5857: return(ret); 5858: } 5859: NEXT; 5860: return(ret); 5861: } 5862: 5863: /** 5864: * xmlParseEnumeratedType: 5865: * @ctxt: an XML parser context 5866: * @tree: the enumeration tree built while parsing 5867: * 5868: * parse an Enumerated attribute type. 5869: * 5870: * [57] EnumeratedType ::= NotationType | Enumeration 5871: * 5872: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5873: * 5874: * 5875: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5876: */ 5877: 5878: int 5879: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5880: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5881: SKIP(8); 5882: if (!IS_BLANK_CH(CUR)) { 5883: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5884: "Space required after 'NOTATION'\n"); 5885: return(0); 5886: } 5887: SKIP_BLANKS; 5888: *tree = xmlParseNotationType(ctxt); 5889: if (*tree == NULL) return(0); 5890: return(XML_ATTRIBUTE_NOTATION); 5891: } 5892: *tree = xmlParseEnumerationType(ctxt); 5893: if (*tree == NULL) return(0); 5894: return(XML_ATTRIBUTE_ENUMERATION); 5895: } 5896: 5897: /** 5898: * xmlParseAttributeType: 5899: * @ctxt: an XML parser context 5900: * @tree: the enumeration tree built while parsing 5901: * 5902: * parse the Attribute list def for an element 5903: * 5904: * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5905: * 5906: * [55] StringType ::= 'CDATA' 5907: * 5908: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5909: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5910: * 5911: * Validity constraints for attribute values syntax are checked in 5912: * xmlValidateAttributeValue() 5913: * 5914: * [ VC: ID ] 5915: * Values of type ID must match the Name production. A name must not 5916: * appear more than once in an XML document as a value of this type; 5917: * i.e., ID values must uniquely identify the elements which bear them. 5918: * 5919: * [ VC: One ID per Element Type ] 5920: * No element type may have more than one ID attribute specified. 5921: * 5922: * [ VC: ID Attribute Default ] 5923: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5924: * 5925: * [ VC: IDREF ] 5926: * Values of type IDREF must match the Name production, and values 5927: * of type IDREFS must match Names; each IDREF Name must match the value 5928: * of an ID attribute on some element in the XML document; i.e. IDREF 5929: * values must match the value of some ID attribute. 5930: * 5931: * [ VC: Entity Name ] 5932: * Values of type ENTITY must match the Name production, values 5933: * of type ENTITIES must match Names; each Entity Name must match the 5934: * name of an unparsed entity declared in the DTD. 5935: * 5936: * [ VC: Name Token ] 5937: * Values of type NMTOKEN must match the Nmtoken production; values 5938: * of type NMTOKENS must match Nmtokens. 5939: * 5940: * Returns the attribute type 5941: */ 5942: int 5943: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5944: SHRINK; 5945: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5946: SKIP(5); 5947: return(XML_ATTRIBUTE_CDATA); 5948: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5949: SKIP(6); 5950: return(XML_ATTRIBUTE_IDREFS); 5951: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5952: SKIP(5); 5953: return(XML_ATTRIBUTE_IDREF); 5954: } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5955: SKIP(2); 5956: return(XML_ATTRIBUTE_ID); 5957: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5958: SKIP(6); 5959: return(XML_ATTRIBUTE_ENTITY); 5960: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5961: SKIP(8); 5962: return(XML_ATTRIBUTE_ENTITIES); 5963: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5964: SKIP(8); 5965: return(XML_ATTRIBUTE_NMTOKENS); 5966: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5967: SKIP(7); 5968: return(XML_ATTRIBUTE_NMTOKEN); 5969: } 5970: return(xmlParseEnumeratedType(ctxt, tree)); 5971: } 5972: 5973: /** 5974: * xmlParseAttributeListDecl: 5975: * @ctxt: an XML parser context 5976: * 5977: * : parse the Attribute list def for an element 5978: * 5979: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5980: * 5981: * [53] AttDef ::= S Name S AttType S DefaultDecl 5982: * 5983: */ 5984: void 5985: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5986: const xmlChar *elemName; 5987: const xmlChar *attrName; 5988: xmlEnumerationPtr tree; 5989: 5990: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5991: xmlParserInputPtr input = ctxt->input; 5992: 5993: SKIP(9); 5994: if (!IS_BLANK_CH(CUR)) { 5995: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5996: "Space required after '<!ATTLIST'\n"); 5997: } 5998: SKIP_BLANKS; 5999: elemName = xmlParseName(ctxt); 6000: if (elemName == NULL) { 6001: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6002: "ATTLIST: no name for Element\n"); 6003: return; 6004: } 6005: SKIP_BLANKS; 6006: GROW; 6007: while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6008: const xmlChar *check = CUR_PTR; 6009: int type; 6010: int def; 6011: xmlChar *defaultValue = NULL; 6012: 6013: GROW; 6014: tree = NULL; 6015: attrName = xmlParseName(ctxt); 6016: if (attrName == NULL) { 6017: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6018: "ATTLIST: no name for Attribute\n"); 6019: break; 6020: } 6021: GROW; 6022: if (!IS_BLANK_CH(CUR)) { 6023: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6024: "Space required after the attribute name\n"); 6025: break; 6026: } 6027: SKIP_BLANKS; 6028: 6029: type = xmlParseAttributeType(ctxt, &tree); 6030: if (type <= 0) { 6031: break; 6032: } 6033: 6034: GROW; 6035: if (!IS_BLANK_CH(CUR)) { 6036: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6037: "Space required after the attribute type\n"); 6038: if (tree != NULL) 6039: xmlFreeEnumeration(tree); 6040: break; 6041: } 6042: SKIP_BLANKS; 6043: 6044: def = xmlParseDefaultDecl(ctxt, &defaultValue); 6045: if (def <= 0) { 6046: if (defaultValue != NULL) 6047: xmlFree(defaultValue); 6048: if (tree != NULL) 6049: xmlFreeEnumeration(tree); 6050: break; 6051: } 6052: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6053: xmlAttrNormalizeSpace(defaultValue, defaultValue); 6054: 6055: GROW; 6056: if (RAW != '>') { 6057: if (!IS_BLANK_CH(CUR)) { 6058: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6059: "Space required after the attribute default value\n"); 6060: if (defaultValue != NULL) 6061: xmlFree(defaultValue); 6062: if (tree != NULL) 6063: xmlFreeEnumeration(tree); 6064: break; 6065: } 6066: SKIP_BLANKS; 6067: } 6068: if (check == CUR_PTR) { 6069: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6070: "in xmlParseAttributeListDecl\n"); 6071: if (defaultValue != NULL) 6072: xmlFree(defaultValue); 6073: if (tree != NULL) 6074: xmlFreeEnumeration(tree); 6075: break; 6076: } 6077: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6078: (ctxt->sax->attributeDecl != NULL)) 6079: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6080: type, def, defaultValue, tree); 6081: else if (tree != NULL) 6082: xmlFreeEnumeration(tree); 6083: 6084: if ((ctxt->sax2) && (defaultValue != NULL) && 6085: (def != XML_ATTRIBUTE_IMPLIED) && 6086: (def != XML_ATTRIBUTE_REQUIRED)) { 6087: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6088: } 6089: if (ctxt->sax2) { 6090: xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6091: } 6092: if (defaultValue != NULL) 6093: xmlFree(defaultValue); 6094: GROW; 6095: } 6096: if (RAW == '>') { 6097: if (input != ctxt->input) { 6098: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6099: "Attribute list declaration doesn't start and stop in the same entity\n", 6100: NULL, NULL); 6101: } 6102: NEXT; 6103: } 6104: } 6105: } 6106: 6107: /** 6108: * xmlParseElementMixedContentDecl: 6109: * @ctxt: an XML parser context 6110: * @inputchk: the input used for the current entity, needed for boundary checks 6111: * 6112: * parse the declaration for a Mixed Element content 6113: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6114: * 6115: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6116: * '(' S? '#PCDATA' S? ')' 6117: * 6118: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6119: * 6120: * [ VC: No Duplicate Types ] 6121: * The same name must not appear more than once in a single 6122: * mixed-content declaration. 6123: * 6124: * returns: the list of the xmlElementContentPtr describing the element choices 6125: */ 6126: xmlElementContentPtr 6127: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6128: xmlElementContentPtr ret = NULL, cur = NULL, n; 6129: const xmlChar *elem = NULL; 6130: 6131: GROW; 6132: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6133: SKIP(7); 6134: SKIP_BLANKS; 6135: SHRINK; 6136: if (RAW == ')') { 6137: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6138: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6139: "Element content declaration doesn't start and stop in the same entity\n", 6140: NULL, NULL); 6141: } 6142: NEXT; 6143: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6144: if (ret == NULL) 6145: return(NULL); 6146: if (RAW == '*') { 6147: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6148: NEXT; 6149: } 6150: return(ret); 6151: } 6152: if ((RAW == '(') || (RAW == '|')) { 6153: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6154: if (ret == NULL) return(NULL); 6155: } 6156: while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6157: NEXT; 6158: if (elem == NULL) { 6159: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6160: if (ret == NULL) return(NULL); 6161: ret->c1 = cur; 6162: if (cur != NULL) 6163: cur->parent = ret; 6164: cur = ret; 6165: } else { 6166: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6167: if (n == NULL) return(NULL); 6168: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6169: if (n->c1 != NULL) 6170: n->c1->parent = n; 6171: cur->c2 = n; 6172: if (n != NULL) 6173: n->parent = cur; 6174: cur = n; 6175: } 6176: SKIP_BLANKS; 6177: elem = xmlParseName(ctxt); 6178: if (elem == NULL) { 6179: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6180: "xmlParseElementMixedContentDecl : Name expected\n"); 6181: xmlFreeDocElementContent(ctxt->myDoc, cur); 6182: return(NULL); 6183: } 6184: SKIP_BLANKS; 6185: GROW; 6186: } 6187: if ((RAW == ')') && (NXT(1) == '*')) { 6188: if (elem != NULL) { 6189: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6190: XML_ELEMENT_CONTENT_ELEMENT); 6191: if (cur->c2 != NULL) 6192: cur->c2->parent = cur; 6193: } 6194: if (ret != NULL) 6195: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6196: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6197: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6198: "Element content declaration doesn't start and stop in the same entity\n", 6199: NULL, NULL); 6200: } 6201: SKIP(2); 6202: } else { 6203: xmlFreeDocElementContent(ctxt->myDoc, ret); 6204: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6205: return(NULL); 6206: } 6207: 6208: } else { 6209: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6210: } 6211: return(ret); 6212: } 6213: 6214: /** 6215: * xmlParseElementChildrenContentDeclPriv: 6216: * @ctxt: an XML parser context 6217: * @inputchk: the input used for the current entity, needed for boundary checks 6218: * @depth: the level of recursion 6219: * 6220: * parse the declaration for a Mixed Element content 6221: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6222: * 6223: * 6224: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6225: * 6226: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6227: * 6228: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6229: * 6230: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6231: * 6232: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6233: * TODO Parameter-entity replacement text must be properly nested 6234: * with parenthesized groups. That is to say, if either of the 6235: * opening or closing parentheses in a choice, seq, or Mixed 6236: * construct is contained in the replacement text for a parameter 6237: * entity, both must be contained in the same replacement text. For 6238: * interoperability, if a parameter-entity reference appears in a 6239: * choice, seq, or Mixed construct, its replacement text should not 6240: * be empty, and neither the first nor last non-blank character of 6241: * the replacement text should be a connector (| or ,). 6242: * 6243: * Returns the tree of xmlElementContentPtr describing the element 6244: * hierarchy. 6245: */ 6246: static xmlElementContentPtr 6247: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6248: int depth) { 6249: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6250: const xmlChar *elem; 6251: xmlChar type = 0; 6252: 6253: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6254: (depth > 2048)) { 6255: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6256: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6257: depth); 6258: return(NULL); 6259: } 6260: SKIP_BLANKS; 6261: GROW; 6262: if (RAW == '(') { 6263: int inputid = ctxt->input->id; 6264: 6265: /* Recurse on first child */ 6266: NEXT; 6267: SKIP_BLANKS; 6268: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6269: depth + 1); 6270: SKIP_BLANKS; 6271: GROW; 6272: } else { 6273: elem = xmlParseName(ctxt); 6274: if (elem == NULL) { 6275: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6276: return(NULL); 6277: } 6278: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6279: if (cur == NULL) { 6280: xmlErrMemory(ctxt, NULL); 6281: return(NULL); 6282: } 6283: GROW; 6284: if (RAW == '?') { 6285: cur->ocur = XML_ELEMENT_CONTENT_OPT; 6286: NEXT; 6287: } else if (RAW == '*') { 6288: cur->ocur = XML_ELEMENT_CONTENT_MULT; 6289: NEXT; 6290: } else if (RAW == '+') { 6291: cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6292: NEXT; 6293: } else { 6294: cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6295: } 6296: GROW; 6297: } 6298: SKIP_BLANKS; 6299: SHRINK; 6300: while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6301: /* 6302: * Each loop we parse one separator and one element. 6303: */ 6304: if (RAW == ',') { 6305: if (type == 0) type = CUR; 6306: 6307: /* 6308: * Detect "Name | Name , Name" error 6309: */ 6310: else if (type != CUR) { 6311: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6312: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6313: type); 6314: if ((last != NULL) && (last != ret)) 6315: xmlFreeDocElementContent(ctxt->myDoc, last); 6316: if (ret != NULL) 6317: xmlFreeDocElementContent(ctxt->myDoc, ret); 6318: return(NULL); 6319: } 6320: NEXT; 6321: 6322: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6323: if (op == NULL) { 6324: if ((last != NULL) && (last != ret)) 6325: xmlFreeDocElementContent(ctxt->myDoc, last); 6326: xmlFreeDocElementContent(ctxt->myDoc, ret); 6327: return(NULL); 6328: } 6329: if (last == NULL) { 6330: op->c1 = ret; 6331: if (ret != NULL) 6332: ret->parent = op; 6333: ret = cur = op; 6334: } else { 6335: cur->c2 = op; 6336: if (op != NULL) 6337: op->parent = cur; 6338: op->c1 = last; 6339: if (last != NULL) 6340: last->parent = op; 6341: cur =op; 6342: last = NULL; 6343: } 6344: } else if (RAW == '|') { 6345: if (type == 0) type = CUR; 6346: 6347: /* 6348: * Detect "Name , Name | Name" error 6349: */ 6350: else if (type != CUR) { 6351: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6352: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6353: type); 6354: if ((last != NULL) && (last != ret)) 6355: xmlFreeDocElementContent(ctxt->myDoc, last); 6356: if (ret != NULL) 6357: xmlFreeDocElementContent(ctxt->myDoc, ret); 6358: return(NULL); 6359: } 6360: NEXT; 6361: 6362: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6363: if (op == NULL) { 6364: if ((last != NULL) && (last != ret)) 6365: xmlFreeDocElementContent(ctxt->myDoc, last); 6366: if (ret != NULL) 6367: xmlFreeDocElementContent(ctxt->myDoc, ret); 6368: return(NULL); 6369: } 6370: if (last == NULL) { 6371: op->c1 = ret; 6372: if (ret != NULL) 6373: ret->parent = op; 6374: ret = cur = op; 6375: } else { 6376: cur->c2 = op; 6377: if (op != NULL) 6378: op->parent = cur; 6379: op->c1 = last; 6380: if (last != NULL) 6381: last->parent = op; 6382: cur =op; 6383: last = NULL; 6384: } 6385: } else { 6386: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6387: if ((last != NULL) && (last != ret)) 6388: xmlFreeDocElementContent(ctxt->myDoc, last); 6389: if (ret != NULL) 6390: xmlFreeDocElementContent(ctxt->myDoc, ret); 6391: return(NULL); 6392: } 6393: GROW; 6394: SKIP_BLANKS; 6395: GROW; 6396: if (RAW == '(') { 6397: int inputid = ctxt->input->id; 6398: /* Recurse on second child */ 6399: NEXT; 6400: SKIP_BLANKS; 6401: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6402: depth + 1); 6403: SKIP_BLANKS; 6404: } else { 6405: elem = xmlParseName(ctxt); 6406: if (elem == NULL) { 6407: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6408: if (ret != NULL) 6409: xmlFreeDocElementContent(ctxt->myDoc, ret); 6410: return(NULL); 6411: } 6412: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6413: if (last == NULL) { 6414: if (ret != NULL) 6415: xmlFreeDocElementContent(ctxt->myDoc, ret); 6416: return(NULL); 6417: } 6418: if (RAW == '?') { 6419: last->ocur = XML_ELEMENT_CONTENT_OPT; 6420: NEXT; 6421: } else if (RAW == '*') { 6422: last->ocur = XML_ELEMENT_CONTENT_MULT; 6423: NEXT; 6424: } else if (RAW == '+') { 6425: last->ocur = XML_ELEMENT_CONTENT_PLUS; 6426: NEXT; 6427: } else { 6428: last->ocur = XML_ELEMENT_CONTENT_ONCE; 6429: } 6430: } 6431: SKIP_BLANKS; 6432: GROW; 6433: } 6434: if ((cur != NULL) && (last != NULL)) { 6435: cur->c2 = last; 6436: if (last != NULL) 6437: last->parent = cur; 6438: } 6439: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6440: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6441: "Element content declaration doesn't start and stop in the same entity\n", 6442: NULL, NULL); 6443: } 6444: NEXT; 6445: if (RAW == '?') { 6446: if (ret != NULL) { 6447: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6448: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6449: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6450: else 6451: ret->ocur = XML_ELEMENT_CONTENT_OPT; 6452: } 6453: NEXT; 6454: } else if (RAW == '*') { 6455: if (ret != NULL) { 6456: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6457: cur = ret; 6458: /* 6459: * Some normalization: 6460: * (a | b* | c?)* == (a | b | c)* 6461: */ 6462: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6463: if ((cur->c1 != NULL) && 6464: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6465: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6466: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6467: if ((cur->c2 != NULL) && 6468: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6469: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6470: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6471: cur = cur->c2; 6472: } 6473: } 6474: NEXT; 6475: } else if (RAW == '+') { 6476: if (ret != NULL) { 6477: int found = 0; 6478: 6479: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6480: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6481: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6482: else 6483: ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6484: /* 6485: * Some normalization: 6486: * (a | b*)+ == (a | b)* 6487: * (a | b?)+ == (a | b)* 6488: */ 6489: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6490: if ((cur->c1 != NULL) && 6491: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6492: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6493: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6494: found = 1; 6495: } 6496: if ((cur->c2 != NULL) && 6497: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6498: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6499: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6500: found = 1; 6501: } 6502: cur = cur->c2; 6503: } 6504: if (found) 6505: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6506: } 6507: NEXT; 6508: } 6509: return(ret); 6510: } 6511: 6512: /** 6513: * xmlParseElementChildrenContentDecl: 6514: * @ctxt: an XML parser context 6515: * @inputchk: the input used for the current entity, needed for boundary checks 6516: * 6517: * parse the declaration for a Mixed Element content 6518: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6519: * 6520: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6521: * 6522: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6523: * 6524: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6525: * 6526: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6527: * 6528: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6529: * TODO Parameter-entity replacement text must be properly nested 6530: * with parenthesized groups. That is to say, if either of the 6531: * opening or closing parentheses in a choice, seq, or Mixed 6532: * construct is contained in the replacement text for a parameter 6533: * entity, both must be contained in the same replacement text. For 6534: * interoperability, if a parameter-entity reference appears in a 6535: * choice, seq, or Mixed construct, its replacement text should not 6536: * be empty, and neither the first nor last non-blank character of 6537: * the replacement text should be a connector (| or ,). 6538: * 6539: * Returns the tree of xmlElementContentPtr describing the element 6540: * hierarchy. 6541: */ 6542: xmlElementContentPtr 6543: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6544: /* stub left for API/ABI compat */ 6545: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6546: } 6547: 6548: /** 6549: * xmlParseElementContentDecl: 6550: * @ctxt: an XML parser context 6551: * @name: the name of the element being defined. 6552: * @result: the Element Content pointer will be stored here if any 6553: * 6554: * parse the declaration for an Element content either Mixed or Children, 6555: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6556: * 6557: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6558: * 6559: * returns: the type of element content XML_ELEMENT_TYPE_xxx 6560: */ 6561: 6562: int 6563: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6564: xmlElementContentPtr *result) { 6565: 6566: xmlElementContentPtr tree = NULL; 6567: int inputid = ctxt->input->id; 6568: int res; 6569: 6570: *result = NULL; 6571: 6572: if (RAW != '(') { 6573: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6574: "xmlParseElementContentDecl : %s '(' expected\n", name); 6575: return(-1); 6576: } 6577: NEXT; 6578: GROW; 6579: if (ctxt->instate == XML_PARSER_EOF) 6580: return(-1); 6581: SKIP_BLANKS; 6582: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6583: tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6584: res = XML_ELEMENT_TYPE_MIXED; 6585: } else { 6586: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6587: res = XML_ELEMENT_TYPE_ELEMENT; 6588: } 6589: SKIP_BLANKS; 6590: *result = tree; 6591: return(res); 6592: } 6593: 6594: /** 6595: * xmlParseElementDecl: 6596: * @ctxt: an XML parser context 6597: * 6598: * parse an Element declaration. 6599: * 6600: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6601: * 6602: * [ VC: Unique Element Type Declaration ] 6603: * No element type may be declared more than once 6604: * 6605: * Returns the type of the element, or -1 in case of error 6606: */ 6607: int 6608: xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6609: const xmlChar *name; 6610: int ret = -1; 6611: xmlElementContentPtr content = NULL; 6612: 6613: /* GROW; done in the caller */ 6614: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6615: xmlParserInputPtr input = ctxt->input; 6616: 6617: SKIP(9); 6618: if (!IS_BLANK_CH(CUR)) { 6619: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6620: "Space required after 'ELEMENT'\n"); 6621: } 6622: SKIP_BLANKS; 6623: name = xmlParseName(ctxt); 6624: if (name == NULL) { 6625: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6626: "xmlParseElementDecl: no name for Element\n"); 6627: return(-1); 6628: } 6629: while ((RAW == 0) && (ctxt->inputNr > 1)) 6630: xmlPopInput(ctxt); 6631: if (!IS_BLANK_CH(CUR)) { 6632: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6633: "Space required after the element name\n"); 6634: } 6635: SKIP_BLANKS; 6636: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6637: SKIP(5); 6638: /* 6639: * Element must always be empty. 6640: */ 6641: ret = XML_ELEMENT_TYPE_EMPTY; 6642: } else if ((RAW == 'A') && (NXT(1) == 'N') && 6643: (NXT(2) == 'Y')) { 6644: SKIP(3); 6645: /* 6646: * Element is a generic container. 6647: */ 6648: ret = XML_ELEMENT_TYPE_ANY; 6649: } else if (RAW == '(') { 6650: ret = xmlParseElementContentDecl(ctxt, name, &content); 6651: } else { 6652: /* 6653: * [ WFC: PEs in Internal Subset ] error handling. 6654: */ 6655: if ((RAW == '%') && (ctxt->external == 0) && 6656: (ctxt->inputNr == 1)) { 6657: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6658: "PEReference: forbidden within markup decl in internal subset\n"); 6659: } else { 6660: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6661: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6662: } 6663: return(-1); 6664: } 6665: 6666: SKIP_BLANKS; 6667: /* 6668: * Pop-up of finished entities. 6669: */ 6670: while ((RAW == 0) && (ctxt->inputNr > 1)) 6671: xmlPopInput(ctxt); 6672: SKIP_BLANKS; 6673: 6674: if (RAW != '>') { 6675: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6676: if (content != NULL) { 6677: xmlFreeDocElementContent(ctxt->myDoc, content); 6678: } 6679: } else { 6680: if (input != ctxt->input) { 6681: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6682: "Element declaration doesn't start and stop in the same entity\n"); 6683: } 6684: 6685: NEXT; 6686: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6687: (ctxt->sax->elementDecl != NULL)) { 6688: if (content != NULL) 6689: content->parent = NULL; 6690: ctxt->sax->elementDecl(ctxt->userData, name, ret, 6691: content); 6692: if ((content != NULL) && (content->parent == NULL)) { 6693: /* 6694: * this is a trick: if xmlAddElementDecl is called, 6695: * instead of copying the full tree it is plugged directly 6696: * if called from the parser. Avoid duplicating the 6697: * interfaces or change the API/ABI 6698: */ 6699: xmlFreeDocElementContent(ctxt->myDoc, content); 6700: } 6701: } else if (content != NULL) { 6702: xmlFreeDocElementContent(ctxt->myDoc, content); 6703: } 6704: } 6705: } 6706: return(ret); 6707: } 6708: 6709: /** 6710: * xmlParseConditionalSections 6711: * @ctxt: an XML parser context 6712: * 6713: * [61] conditionalSect ::= includeSect | ignoreSect 6714: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6715: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6716: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6717: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6718: */ 6719: 6720: static void 6721: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6722: int id = ctxt->input->id; 6723: 6724: SKIP(3); 6725: SKIP_BLANKS; 6726: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6727: SKIP(7); 6728: SKIP_BLANKS; 6729: if (RAW != '[') { 6730: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6731: } else { 6732: if (ctxt->input->id != id) { 6733: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6734: "All markup of the conditional section is not in the same entity\n", 6735: NULL, NULL); 6736: } 6737: NEXT; 6738: } 6739: if (xmlParserDebugEntities) { 6740: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6741: xmlGenericError(xmlGenericErrorContext, 6742: "%s(%d): ", ctxt->input->filename, 6743: ctxt->input->line); 6744: xmlGenericError(xmlGenericErrorContext, 6745: "Entering INCLUDE Conditional Section\n"); 6746: } 6747: 6748: while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6749: (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6750: const xmlChar *check = CUR_PTR; 6751: unsigned int cons = ctxt->input->consumed; 6752: 6753: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6754: xmlParseConditionalSections(ctxt); 6755: } else if (IS_BLANK_CH(CUR)) { 6756: NEXT; 6757: } else if (RAW == '%') { 6758: xmlParsePEReference(ctxt); 6759: } else 6760: xmlParseMarkupDecl(ctxt); 6761: 6762: /* 6763: * Pop-up of finished entities. 6764: */ 6765: while ((RAW == 0) && (ctxt->inputNr > 1)) 6766: xmlPopInput(ctxt); 6767: 6768: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6769: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6770: break; 6771: } 6772: } 6773: if (xmlParserDebugEntities) { 6774: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6775: xmlGenericError(xmlGenericErrorContext, 6776: "%s(%d): ", ctxt->input->filename, 6777: ctxt->input->line); 6778: xmlGenericError(xmlGenericErrorContext, 6779: "Leaving INCLUDE Conditional Section\n"); 6780: } 6781: 6782: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6783: int state; 6784: xmlParserInputState instate; 6785: int depth = 0; 6786: 6787: SKIP(6); 6788: SKIP_BLANKS; 6789: if (RAW != '[') { 6790: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6791: } else { 6792: if (ctxt->input->id != id) { 6793: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6794: "All markup of the conditional section is not in the same entity\n", 6795: NULL, NULL); 6796: } 6797: NEXT; 6798: } 6799: if (xmlParserDebugEntities) { 6800: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6801: xmlGenericError(xmlGenericErrorContext, 6802: "%s(%d): ", ctxt->input->filename, 6803: ctxt->input->line); 6804: xmlGenericError(xmlGenericErrorContext, 6805: "Entering IGNORE Conditional Section\n"); 6806: } 6807: 6808: /* 6809: * Parse up to the end of the conditional section 6810: * But disable SAX event generating DTD building in the meantime 6811: */ 6812: state = ctxt->disableSAX; 6813: instate = ctxt->instate; 6814: if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6815: ctxt->instate = XML_PARSER_IGNORE; 6816: 6817: while (((depth >= 0) && (RAW != 0)) && 6818: (ctxt->instate != XML_PARSER_EOF)) { 6819: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6820: depth++; 6821: SKIP(3); 6822: continue; 6823: } 6824: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6825: if (--depth >= 0) SKIP(3); 6826: continue; 6827: } 6828: NEXT; 6829: continue; 6830: } 6831: 6832: ctxt->disableSAX = state; 6833: ctxt->instate = instate; 6834: 6835: if (xmlParserDebugEntities) { 6836: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6837: xmlGenericError(xmlGenericErrorContext, 6838: "%s(%d): ", ctxt->input->filename, 6839: ctxt->input->line); 6840: xmlGenericError(xmlGenericErrorContext, 6841: "Leaving IGNORE Conditional Section\n"); 6842: } 6843: 6844: } else { 6845: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6846: } 6847: 6848: if (RAW == 0) 6849: SHRINK; 6850: 6851: if (RAW == 0) { 6852: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6853: } else { 6854: if (ctxt->input->id != id) { 6855: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6856: "All markup of the conditional section is not in the same entity\n", 6857: NULL, NULL); 6858: } 6859: SKIP(3); 6860: } 6861: } 6862: 6863: /** 6864: * xmlParseMarkupDecl: 6865: * @ctxt: an XML parser context 6866: * 6867: * parse Markup declarations 6868: * 6869: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6870: * NotationDecl | PI | Comment 6871: * 6872: * [ VC: Proper Declaration/PE Nesting ] 6873: * Parameter-entity replacement text must be properly nested with 6874: * markup declarations. That is to say, if either the first character 6875: * or the last character of a markup declaration (markupdecl above) is 6876: * contained in the replacement text for a parameter-entity reference, 6877: * both must be contained in the same replacement text. 6878: * 6879: * [ WFC: PEs in Internal Subset ] 6880: * In the internal DTD subset, parameter-entity references can occur 6881: * only where markup declarations can occur, not within markup declarations. 6882: * (This does not apply to references that occur in external parameter 6883: * entities or to the external subset.) 6884: */ 6885: void 6886: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6887: GROW; 6888: if (CUR == '<') { 6889: if (NXT(1) == '!') { 6890: switch (NXT(2)) { 6891: case 'E': 6892: if (NXT(3) == 'L') 6893: xmlParseElementDecl(ctxt); 6894: else if (NXT(3) == 'N') 6895: xmlParseEntityDecl(ctxt); 6896: break; 6897: case 'A': 6898: xmlParseAttributeListDecl(ctxt); 6899: break; 6900: case 'N': 6901: xmlParseNotationDecl(ctxt); 6902: break; 6903: case '-': 6904: xmlParseComment(ctxt); 6905: break; 6906: default: 6907: /* there is an error but it will be detected later */ 6908: break; 6909: } 6910: } else if (NXT(1) == '?') { 6911: xmlParsePI(ctxt); 6912: } 6913: } 6914: /* 6915: * This is only for internal subset. On external entities, 6916: * the replacement is done before parsing stage 6917: */ 6918: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6919: xmlParsePEReference(ctxt); 6920: 6921: /* 6922: * Conditional sections are allowed from entities included 6923: * by PE References in the internal subset. 6924: */ 6925: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6926: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6927: xmlParseConditionalSections(ctxt); 6928: } 6929: } 6930: 6931: ctxt->instate = XML_PARSER_DTD; 6932: } 6933: 6934: /** 6935: * xmlParseTextDecl: 6936: * @ctxt: an XML parser context 6937: * 6938: * parse an XML declaration header for external entities 6939: * 6940: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6941: */ 6942: 6943: void 6944: xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6945: xmlChar *version; 6946: const xmlChar *encoding; 6947: 6948: /* 6949: * We know that '<?xml' is here. 6950: */ 6951: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6952: SKIP(5); 6953: } else { 6954: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6955: return; 6956: } 6957: 6958: if (!IS_BLANK_CH(CUR)) { 6959: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6960: "Space needed after '<?xml'\n"); 6961: } 6962: SKIP_BLANKS; 6963: 6964: /* 6965: * We may have the VersionInfo here. 6966: */ 6967: version = xmlParseVersionInfo(ctxt); 6968: if (version == NULL) 6969: version = xmlCharStrdup(XML_DEFAULT_VERSION); 6970: else { 6971: if (!IS_BLANK_CH(CUR)) { 6972: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6973: "Space needed here\n"); 6974: } 6975: } 6976: ctxt->input->version = version; 6977: 6978: /* 6979: * We must have the encoding declaration 6980: */ 6981: encoding = xmlParseEncodingDecl(ctxt); 6982: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6983: /* 6984: * The XML REC instructs us to stop parsing right here 6985: */ 6986: return; 6987: } 6988: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6989: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6990: "Missing encoding in text declaration\n"); 6991: } 6992: 6993: SKIP_BLANKS; 6994: if ((RAW == '?') && (NXT(1) == '>')) { 6995: SKIP(2); 6996: } else if (RAW == '>') { 6997: /* Deprecated old WD ... */ 6998: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6999: NEXT; 7000: } else { 7001: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7002: MOVETO_ENDTAG(CUR_PTR); 7003: NEXT; 7004: } 7005: } 7006: 7007: /** 7008: * xmlParseExternalSubset: 7009: * @ctxt: an XML parser context 7010: * @ExternalID: the external identifier 7011: * @SystemID: the system identifier (or URL) 7012: * 7013: * parse Markup declarations from an external subset 7014: * 7015: * [30] extSubset ::= textDecl? extSubsetDecl 7016: * 7017: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7018: */ 7019: void 7020: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7021: const xmlChar *SystemID) { 7022: xmlDetectSAX2(ctxt); 7023: GROW; 7024: 7025: if ((ctxt->encoding == NULL) && 7026: (ctxt->input->end - ctxt->input->cur >= 4)) { 7027: xmlChar start[4]; 7028: xmlCharEncoding enc; 7029: 7030: start[0] = RAW; 7031: start[1] = NXT(1); 7032: start[2] = NXT(2); 7033: start[3] = NXT(3); 7034: enc = xmlDetectCharEncoding(start, 4); 7035: if (enc != XML_CHAR_ENCODING_NONE) 7036: xmlSwitchEncoding(ctxt, enc); 7037: } 7038: 7039: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7040: xmlParseTextDecl(ctxt); 7041: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7042: /* 7043: * The XML REC instructs us to stop parsing right here 7044: */ 7045: ctxt->instate = XML_PARSER_EOF; 7046: return; 7047: } 7048: } 7049: if (ctxt->myDoc == NULL) { 7050: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7051: if (ctxt->myDoc == NULL) { 7052: xmlErrMemory(ctxt, "New Doc failed"); 7053: return; 7054: } 7055: ctxt->myDoc->properties = XML_DOC_INTERNAL; 7056: } 7057: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7058: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7059: 7060: ctxt->instate = XML_PARSER_DTD; 7061: ctxt->external = 1; 7062: while (((RAW == '<') && (NXT(1) == '?')) || 7063: ((RAW == '<') && (NXT(1) == '!')) || 7064: (RAW == '%') || IS_BLANK_CH(CUR)) { 7065: const xmlChar *check = CUR_PTR; 7066: unsigned int cons = ctxt->input->consumed; 7067: 7068: GROW; 7069: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7070: xmlParseConditionalSections(ctxt); 7071: } else if (IS_BLANK_CH(CUR)) { 7072: NEXT; 7073: } else if (RAW == '%') { 7074: xmlParsePEReference(ctxt); 7075: } else 7076: xmlParseMarkupDecl(ctxt); 7077: 7078: /* 7079: * Pop-up of finished entities. 7080: */ 7081: while ((RAW == 0) && (ctxt->inputNr > 1)) 7082: xmlPopInput(ctxt); 7083: 7084: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7085: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7086: break; 7087: } 7088: } 7089: 7090: if (RAW != 0) { 7091: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7092: } 7093: 7094: } 7095: 7096: /** 7097: * xmlParseReference: 7098: * @ctxt: an XML parser context 7099: * 7100: * parse and handle entity references in content, depending on the SAX 7101: * interface, this may end-up in a call to character() if this is a 7102: * CharRef, a predefined entity, if there is no reference() callback. 7103: * or if the parser was asked to switch to that mode. 7104: * 7105: * [67] Reference ::= EntityRef | CharRef 7106: */ 7107: void 7108: xmlParseReference(xmlParserCtxtPtr ctxt) { 7109: xmlEntityPtr ent; 7110: xmlChar *val; 7111: int was_checked; 7112: xmlNodePtr list = NULL; 7113: xmlParserErrors ret = XML_ERR_OK; 7114: 7115: 7116: if (RAW != '&') 7117: return; 7118: 7119: /* 7120: * Simple case of a CharRef 7121: */ 7122: if (NXT(1) == '#') { 7123: int i = 0; 7124: xmlChar out[10]; 7125: int hex = NXT(2); 7126: int value = xmlParseCharRef(ctxt); 7127: 7128: if (value == 0) 7129: return; 7130: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7131: /* 7132: * So we are using non-UTF-8 buffers 7133: * Check that the char fit on 8bits, if not 7134: * generate a CharRef. 7135: */ 7136: if (value <= 0xFF) { 7137: out[0] = value; 7138: out[1] = 0; 7139: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7140: (!ctxt->disableSAX)) 7141: ctxt->sax->characters(ctxt->userData, out, 1); 7142: } else { 7143: if ((hex == 'x') || (hex == 'X')) 7144: snprintf((char *)out, sizeof(out), "#x%X", value); 7145: else 7146: snprintf((char *)out, sizeof(out), "#%d", value); 7147: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7148: (!ctxt->disableSAX)) 7149: ctxt->sax->reference(ctxt->userData, out); 7150: } 7151: } else { 7152: /* 7153: * Just encode the value in UTF-8 7154: */ 7155: COPY_BUF(0 ,out, i, value); 7156: out[i] = 0; 7157: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7158: (!ctxt->disableSAX)) 7159: ctxt->sax->characters(ctxt->userData, out, i); 7160: } 7161: return; 7162: } 7163: 7164: /* 7165: * We are seeing an entity reference 7166: */ 7167: ent = xmlParseEntityRef(ctxt); 7168: if (ent == NULL) return; 7169: if (!ctxt->wellFormed) 7170: return; 7171: was_checked = ent->checked; 7172: 7173: /* special case of predefined entities */ 7174: if ((ent->name == NULL) || 7175: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7176: val = ent->content; 7177: if (val == NULL) return; 7178: /* 7179: * inline the entity. 7180: */ 7181: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7182: (!ctxt->disableSAX)) 7183: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7184: return; 7185: } 7186: 7187: /* 7188: * The first reference to the entity trigger a parsing phase 7189: * where the ent->children is filled with the result from 7190: * the parsing. 7191: * Note: external parsed entities will not be loaded, it is not 7192: * required for a non-validating parser, unless the parsing option 7193: * of validating, or substituting entities were given. Doing so is 7194: * far more secure as the parser will only process data coming from 7195: * the document entity by default. 7196: */ 7197: if ((ent->checked == 0) && 7198: ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7199: (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7200: unsigned long oldnbent = ctxt->nbentities; 7201: 7202: /* 7203: * This is a bit hackish but this seems the best 7204: * way to make sure both SAX and DOM entity support 7205: * behaves okay. 7206: */ 7207: void *user_data; 7208: if (ctxt->userData == ctxt) 7209: user_data = NULL; 7210: else 7211: user_data = ctxt->userData; 7212: 7213: /* 7214: * Check that this entity is well formed 7215: * 4.3.2: An internal general parsed entity is well-formed 7216: * if its replacement text matches the production labeled 7217: * content. 7218: */ 7219: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7220: ctxt->depth++; 7221: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7222: user_data, &list); 7223: ctxt->depth--; 7224: 7225: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7226: ctxt->depth++; 7227: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7228: user_data, ctxt->depth, ent->URI, 7229: ent->ExternalID, &list); 7230: ctxt->depth--; 7231: } else { 7232: ret = XML_ERR_ENTITY_PE_INTERNAL; 7233: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7234: "invalid entity type found\n", NULL); 7235: } 7236: 7237: /* 7238: * Store the number of entities needing parsing for this entity 7239: * content and do checkings 7240: */ 7241: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7242: if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7243: ent->checked |= 1; 7244: if (ret == XML_ERR_ENTITY_LOOP) { 7245: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7246: xmlFreeNodeList(list); 7247: return; 7248: } 7249: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7250: xmlFreeNodeList(list); 7251: return; 7252: } 7253: 7254: if ((ret == XML_ERR_OK) && (list != NULL)) { 7255: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7256: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7257: (ent->children == NULL)) { 7258: ent->children = list; 7259: if (ctxt->replaceEntities) { 7260: /* 7261: * Prune it directly in the generated document 7262: * except for single text nodes. 7263: */ 7264: if (((list->type == XML_TEXT_NODE) && 7265: (list->next == NULL)) || 7266: (ctxt->parseMode == XML_PARSE_READER)) { 7267: list->parent = (xmlNodePtr) ent; 7268: list = NULL; 7269: ent->owner = 1; 7270: } else { 7271: ent->owner = 0; 7272: while (list != NULL) { 7273: list->parent = (xmlNodePtr) ctxt->node; 7274: list->doc = ctxt->myDoc; 7275: if (list->next == NULL) 7276: ent->last = list; 7277: list = list->next; 7278: } 7279: list = ent->children; 7280: #ifdef LIBXML_LEGACY_ENABLED 7281: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7282: xmlAddEntityReference(ent, list, NULL); 7283: #endif /* LIBXML_LEGACY_ENABLED */ 7284: } 7285: } else { 7286: ent->owner = 1; 7287: while (list != NULL) { 7288: list->parent = (xmlNodePtr) ent; 7289: xmlSetTreeDoc(list, ent->doc); 7290: if (list->next == NULL) 7291: ent->last = list; 7292: list = list->next; 7293: } 7294: } 7295: } else { 7296: xmlFreeNodeList(list); 7297: list = NULL; 7298: } 7299: } else if ((ret != XML_ERR_OK) && 7300: (ret != XML_WAR_UNDECLARED_ENTITY)) { 7301: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7302: "Entity '%s' failed to parse\n", ent->name); 7303: } else if (list != NULL) { 7304: xmlFreeNodeList(list); 7305: list = NULL; 7306: } 7307: if (ent->checked == 0) 7308: ent->checked = 2; 7309: } else if (ent->checked != 1) { 7310: ctxt->nbentities += ent->checked / 2; 7311: } 7312: 7313: /* 7314: * Now that the entity content has been gathered 7315: * provide it to the application, this can take different forms based 7316: * on the parsing modes. 7317: */ 7318: if (ent->children == NULL) { 7319: /* 7320: * Probably running in SAX mode and the callbacks don't 7321: * build the entity content. So unless we already went 7322: * though parsing for first checking go though the entity 7323: * content to generate callbacks associated to the entity 7324: */ 7325: if (was_checked != 0) { 7326: void *user_data; 7327: /* 7328: * This is a bit hackish but this seems the best 7329: * way to make sure both SAX and DOM entity support 7330: * behaves okay. 7331: */ 7332: if (ctxt->userData == ctxt) 7333: user_data = NULL; 7334: else 7335: user_data = ctxt->userData; 7336: 7337: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7338: ctxt->depth++; 7339: ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7340: ent->content, user_data, NULL); 7341: ctxt->depth--; 7342: } else if (ent->etype == 7343: XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7344: ctxt->depth++; 7345: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7346: ctxt->sax, user_data, ctxt->depth, 7347: ent->URI, ent->ExternalID, NULL); 7348: ctxt->depth--; 7349: } else { 7350: ret = XML_ERR_ENTITY_PE_INTERNAL; 7351: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7352: "invalid entity type found\n", NULL); 7353: } 7354: if (ret == XML_ERR_ENTITY_LOOP) { 7355: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7356: return; 7357: } 7358: } 7359: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7360: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7361: /* 7362: * Entity reference callback comes second, it's somewhat 7363: * superfluous but a compatibility to historical behaviour 7364: */ 7365: ctxt->sax->reference(ctxt->userData, ent->name); 7366: } 7367: return; 7368: } 7369: 7370: /* 7371: * If we didn't get any children for the entity being built 7372: */ 7373: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7374: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7375: /* 7376: * Create a node. 7377: */ 7378: ctxt->sax->reference(ctxt->userData, ent->name); 7379: return; 7380: } 7381: 7382: if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7383: /* 7384: * There is a problem on the handling of _private for entities 7385: * (bug 155816): Should we copy the content of the field from 7386: * the entity (possibly overwriting some value set by the user 7387: * when a copy is created), should we leave it alone, or should 7388: * we try to take care of different situations? The problem 7389: * is exacerbated by the usage of this field by the xmlReader. 7390: * To fix this bug, we look at _private on the created node 7391: * and, if it's NULL, we copy in whatever was in the entity. 7392: * If it's not NULL we leave it alone. This is somewhat of a 7393: * hack - maybe we should have further tests to determine 7394: * what to do. 7395: */ 7396: if ((ctxt->node != NULL) && (ent->children != NULL)) { 7397: /* 7398: * Seems we are generating the DOM content, do 7399: * a simple tree copy for all references except the first 7400: * In the first occurrence list contains the replacement. 7401: */ 7402: if (((list == NULL) && (ent->owner == 0)) || 7403: (ctxt->parseMode == XML_PARSE_READER)) { 7404: xmlNodePtr nw = NULL, cur, firstChild = NULL; 7405: 7406: /* 7407: * We are copying here, make sure there is no abuse 7408: */ 7409: ctxt->sizeentcopy += ent->length; 7410: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7411: return; 7412: 7413: /* 7414: * when operating on a reader, the entities definitions 7415: * are always owning the entities subtree. 7416: if (ctxt->parseMode == XML_PARSE_READER) 7417: ent->owner = 1; 7418: */ 7419: 7420: cur = ent->children; 7421: while (cur != NULL) { 7422: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7423: if (nw != NULL) { 7424: if (nw->_private == NULL) 7425: nw->_private = cur->_private; 7426: if (firstChild == NULL){ 7427: firstChild = nw; 7428: } 7429: nw = xmlAddChild(ctxt->node, nw); 7430: } 7431: if (cur == ent->last) { 7432: /* 7433: * needed to detect some strange empty 7434: * node cases in the reader tests 7435: */ 7436: if ((ctxt->parseMode == XML_PARSE_READER) && 7437: (nw != NULL) && 7438: (nw->type == XML_ELEMENT_NODE) && 7439: (nw->children == NULL)) 7440: nw->extra = 1; 7441: 7442: break; 7443: } 7444: cur = cur->next; 7445: } 7446: #ifdef LIBXML_LEGACY_ENABLED 7447: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7448: xmlAddEntityReference(ent, firstChild, nw); 7449: #endif /* LIBXML_LEGACY_ENABLED */ 7450: } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7451: xmlNodePtr nw = NULL, cur, next, last, 7452: firstChild = NULL; 7453: 7454: /* 7455: * We are copying here, make sure there is no abuse 7456: */ 7457: ctxt->sizeentcopy += ent->length; 7458: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7459: return; 7460: 7461: /* 7462: * Copy the entity child list and make it the new 7463: * entity child list. The goal is to make sure any 7464: * ID or REF referenced will be the one from the 7465: * document content and not the entity copy. 7466: */ 7467: cur = ent->children; 7468: ent->children = NULL; 7469: last = ent->last; 7470: ent->last = NULL; 7471: while (cur != NULL) { 7472: next = cur->next; 7473: cur->next = NULL; 7474: cur->parent = NULL; 7475: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7476: if (nw != NULL) { 7477: if (nw->_private == NULL) 7478: nw->_private = cur->_private; 7479: if (firstChild == NULL){ 7480: firstChild = cur; 7481: } 7482: xmlAddChild((xmlNodePtr) ent, nw); 7483: xmlAddChild(ctxt->node, cur); 7484: } 7485: if (cur == last) 7486: break; 7487: cur = next; 7488: } 7489: if (ent->owner == 0) 7490: ent->owner = 1; 7491: #ifdef LIBXML_LEGACY_ENABLED 7492: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7493: xmlAddEntityReference(ent, firstChild, nw); 7494: #endif /* LIBXML_LEGACY_ENABLED */ 7495: } else { 7496: const xmlChar *nbktext; 7497: 7498: /* 7499: * the name change is to avoid coalescing of the 7500: * node with a possible previous text one which 7501: * would make ent->children a dangling pointer 7502: */ 7503: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7504: -1); 7505: if (ent->children->type == XML_TEXT_NODE) 7506: ent->children->name = nbktext; 7507: if ((ent->last != ent->children) && 7508: (ent->last->type == XML_TEXT_NODE)) 7509: ent->last->name = nbktext; 7510: xmlAddChildList(ctxt->node, ent->children); 7511: } 7512: 7513: /* 7514: * This is to avoid a nasty side effect, see 7515: * characters() in SAX.c 7516: */ 7517: ctxt->nodemem = 0; 7518: ctxt->nodelen = 0; 7519: return; 7520: } 7521: } 7522: } 7523: 7524: /** 7525: * xmlParseEntityRef: 7526: * @ctxt: an XML parser context 7527: * 7528: * parse ENTITY references declarations 7529: * 7530: * [68] EntityRef ::= '&' Name ';' 7531: * 7532: * [ WFC: Entity Declared ] 7533: * In a document without any DTD, a document with only an internal DTD 7534: * subset which contains no parameter entity references, or a document 7535: * with "standalone='yes'", the Name given in the entity reference 7536: * must match that in an entity declaration, except that well-formed 7537: * documents need not declare any of the following entities: amp, lt, 7538: * gt, apos, quot. The declaration of a parameter entity must precede 7539: * any reference to it. Similarly, the declaration of a general entity 7540: * must precede any reference to it which appears in a default value in an 7541: * attribute-list declaration. Note that if entities are declared in the 7542: * external subset or in external parameter entities, a non-validating 7543: * processor is not obligated to read and process their declarations; 7544: * for such documents, the rule that an entity must be declared is a 7545: * well-formedness constraint only if standalone='yes'. 7546: * 7547: * [ WFC: Parsed Entity ] 7548: * An entity reference must not contain the name of an unparsed entity 7549: * 7550: * Returns the xmlEntityPtr if found, or NULL otherwise. 7551: */ 7552: xmlEntityPtr 7553: xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7554: const xmlChar *name; 7555: xmlEntityPtr ent = NULL; 7556: 7557: GROW; 7558: if (ctxt->instate == XML_PARSER_EOF) 7559: return(NULL); 7560: 7561: if (RAW != '&') 7562: return(NULL); 7563: NEXT; 7564: name = xmlParseName(ctxt); 7565: if (name == NULL) { 7566: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7567: "xmlParseEntityRef: no name\n"); 7568: return(NULL); 7569: } 7570: if (RAW != ';') { 7571: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7572: return(NULL); 7573: } 7574: NEXT; 7575: 7576: /* 7577: * Predefined entities override any extra definition 7578: */ 7579: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7580: ent = xmlGetPredefinedEntity(name); 7581: if (ent != NULL) 7582: return(ent); 7583: } 7584: 7585: /* 7586: * Increase the number of entity references parsed 7587: */ 7588: ctxt->nbentities++; 7589: 7590: /* 7591: * Ask first SAX for entity resolution, otherwise try the 7592: * entities which may have stored in the parser context. 7593: */ 7594: if (ctxt->sax != NULL) { 7595: if (ctxt->sax->getEntity != NULL) 7596: ent = ctxt->sax->getEntity(ctxt->userData, name); 7597: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7598: (ctxt->options & XML_PARSE_OLDSAX)) 7599: ent = xmlGetPredefinedEntity(name); 7600: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7601: (ctxt->userData==ctxt)) { 7602: ent = xmlSAX2GetEntity(ctxt, name); 7603: } 7604: } 7605: if (ctxt->instate == XML_PARSER_EOF) 7606: return(NULL); 7607: /* 7608: * [ WFC: Entity Declared ] 7609: * In a document without any DTD, a document with only an 7610: * internal DTD subset which contains no parameter entity 7611: * references, or a document with "standalone='yes'", the 7612: * Name given in the entity reference must match that in an 7613: * entity declaration, except that well-formed documents 7614: * need not declare any of the following entities: amp, lt, 7615: * gt, apos, quot. 7616: * The declaration of a parameter entity must precede any 7617: * reference to it. 7618: * Similarly, the declaration of a general entity must 7619: * precede any reference to it which appears in a default 7620: * value in an attribute-list declaration. Note that if 7621: * entities are declared in the external subset or in 7622: * external parameter entities, a non-validating processor 7623: * is not obligated to read and process their declarations; 7624: * for such documents, the rule that an entity must be 7625: * declared is a well-formedness constraint only if 7626: * standalone='yes'. 7627: */ 7628: if (ent == NULL) { 7629: if ((ctxt->standalone == 1) || 7630: ((ctxt->hasExternalSubset == 0) && 7631: (ctxt->hasPErefs == 0))) { 7632: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7633: "Entity '%s' not defined\n", name); 7634: } else { 7635: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7636: "Entity '%s' not defined\n", name); 7637: if ((ctxt->inSubset == 0) && 7638: (ctxt->sax != NULL) && 7639: (ctxt->sax->reference != NULL)) { 7640: ctxt->sax->reference(ctxt->userData, name); 7641: } 7642: } 7643: ctxt->valid = 0; 7644: } 7645: 7646: /* 7647: * [ WFC: Parsed Entity ] 7648: * An entity reference must not contain the name of an 7649: * unparsed entity 7650: */ 7651: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7652: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7653: "Entity reference to unparsed entity %s\n", name); 7654: } 7655: 7656: /* 7657: * [ WFC: No External Entity References ] 7658: * Attribute values cannot contain direct or indirect 7659: * entity references to external entities. 7660: */ 7661: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7662: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7663: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7664: "Attribute references external entity '%s'\n", name); 7665: } 7666: /* 7667: * [ WFC: No < in Attribute Values ] 7668: * The replacement text of any entity referred to directly or 7669: * indirectly in an attribute value (other than "<") must 7670: * not contain a <. 7671: */ 7672: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7673: (ent != NULL) && 7674: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7675: if ((ent->checked & 1) || ((ent->checked == 0) && 7676: (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) { 7677: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7678: "'<' in entity '%s' is not allowed in attributes values\n", name); 7679: } 7680: } 7681: 7682: /* 7683: * Internal check, no parameter entities here ... 7684: */ 7685: else { 7686: switch (ent->etype) { 7687: case XML_INTERNAL_PARAMETER_ENTITY: 7688: case XML_EXTERNAL_PARAMETER_ENTITY: 7689: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7690: "Attempt to reference the parameter entity '%s'\n", 7691: name); 7692: break; 7693: default: 7694: break; 7695: } 7696: } 7697: 7698: /* 7699: * [ WFC: No Recursion ] 7700: * A parsed entity must not contain a recursive reference 7701: * to itself, either directly or indirectly. 7702: * Done somewhere else 7703: */ 7704: return(ent); 7705: } 7706: 7707: /** 7708: * xmlParseStringEntityRef: 7709: * @ctxt: an XML parser context 7710: * @str: a pointer to an index in the string 7711: * 7712: * parse ENTITY references declarations, but this version parses it from 7713: * a string value. 7714: * 7715: * [68] EntityRef ::= '&' Name ';' 7716: * 7717: * [ WFC: Entity Declared ] 7718: * In a document without any DTD, a document with only an internal DTD 7719: * subset which contains no parameter entity references, or a document 7720: * with "standalone='yes'", the Name given in the entity reference 7721: * must match that in an entity declaration, except that well-formed 7722: * documents need not declare any of the following entities: amp, lt, 7723: * gt, apos, quot. The declaration of a parameter entity must precede 7724: * any reference to it. Similarly, the declaration of a general entity 7725: * must precede any reference to it which appears in a default value in an 7726: * attribute-list declaration. Note that if entities are declared in the 7727: * external subset or in external parameter entities, a non-validating 7728: * processor is not obligated to read and process their declarations; 7729: * for such documents, the rule that an entity must be declared is a 7730: * well-formedness constraint only if standalone='yes'. 7731: * 7732: * [ WFC: Parsed Entity ] 7733: * An entity reference must not contain the name of an unparsed entity 7734: * 7735: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7736: * is updated to the current location in the string. 7737: */ 7738: static xmlEntityPtr 7739: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7740: xmlChar *name; 7741: const xmlChar *ptr; 7742: xmlChar cur; 7743: xmlEntityPtr ent = NULL; 7744: 7745: if ((str == NULL) || (*str == NULL)) 7746: return(NULL); 7747: ptr = *str; 7748: cur = *ptr; 7749: if (cur != '&') 7750: return(NULL); 7751: 7752: ptr++; 7753: name = xmlParseStringName(ctxt, &ptr); 7754: if (name == NULL) { 7755: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7756: "xmlParseStringEntityRef: no name\n"); 7757: *str = ptr; 7758: return(NULL); 7759: } 7760: if (*ptr != ';') { 7761: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7762: xmlFree(name); 7763: *str = ptr; 7764: return(NULL); 7765: } 7766: ptr++; 7767: 7768: 7769: /* 7770: * Predefined entites override any extra definition 7771: */ 7772: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7773: ent = xmlGetPredefinedEntity(name); 7774: if (ent != NULL) { 7775: xmlFree(name); 7776: *str = ptr; 7777: return(ent); 7778: } 7779: } 7780: 7781: /* 7782: * Increate the number of entity references parsed 7783: */ 7784: ctxt->nbentities++; 7785: 7786: /* 7787: * Ask first SAX for entity resolution, otherwise try the 7788: * entities which may have stored in the parser context. 7789: */ 7790: if (ctxt->sax != NULL) { 7791: if (ctxt->sax->getEntity != NULL) 7792: ent = ctxt->sax->getEntity(ctxt->userData, name); 7793: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7794: ent = xmlGetPredefinedEntity(name); 7795: if ((ent == NULL) && (ctxt->userData==ctxt)) { 7796: ent = xmlSAX2GetEntity(ctxt, name); 7797: } 7798: } 7799: if (ctxt->instate == XML_PARSER_EOF) { 7800: xmlFree(name); 7801: return(NULL); 7802: } 7803: 7804: /* 7805: * [ WFC: Entity Declared ] 7806: * In a document without any DTD, a document with only an 7807: * internal DTD subset which contains no parameter entity 7808: * references, or a document with "standalone='yes'", the 7809: * Name given in the entity reference must match that in an 7810: * entity declaration, except that well-formed documents 7811: * need not declare any of the following entities: amp, lt, 7812: * gt, apos, quot. 7813: * The declaration of a parameter entity must precede any 7814: * reference to it. 7815: * Similarly, the declaration of a general entity must 7816: * precede any reference to it which appears in a default 7817: * value in an attribute-list declaration. Note that if 7818: * entities are declared in the external subset or in 7819: * external parameter entities, a non-validating processor 7820: * is not obligated to read and process their declarations; 7821: * for such documents, the rule that an entity must be 7822: * declared is a well-formedness constraint only if 7823: * standalone='yes'. 7824: */ 7825: if (ent == NULL) { 7826: if ((ctxt->standalone == 1) || 7827: ((ctxt->hasExternalSubset == 0) && 7828: (ctxt->hasPErefs == 0))) { 7829: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7830: "Entity '%s' not defined\n", name); 7831: } else { 7832: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7833: "Entity '%s' not defined\n", 7834: name); 7835: } 7836: /* TODO ? check regressions ctxt->valid = 0; */ 7837: } 7838: 7839: /* 7840: * [ WFC: Parsed Entity ] 7841: * An entity reference must not contain the name of an 7842: * unparsed entity 7843: */ 7844: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7845: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7846: "Entity reference to unparsed entity %s\n", name); 7847: } 7848: 7849: /* 7850: * [ WFC: No External Entity References ] 7851: * Attribute values cannot contain direct or indirect 7852: * entity references to external entities. 7853: */ 7854: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7855: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7856: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7857: "Attribute references external entity '%s'\n", name); 7858: } 7859: /* 7860: * [ WFC: No < in Attribute Values ] 7861: * The replacement text of any entity referred to directly or 7862: * indirectly in an attribute value (other than "<") must 7863: * not contain a <. 7864: */ 7865: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7866: (ent != NULL) && (ent->content != NULL) && 7867: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7868: (xmlStrchr(ent->content, '<'))) { 7869: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7870: "'<' in entity '%s' is not allowed in attributes values\n", 7871: name); 7872: } 7873: 7874: /* 7875: * Internal check, no parameter entities here ... 7876: */ 7877: else { 7878: switch (ent->etype) { 7879: case XML_INTERNAL_PARAMETER_ENTITY: 7880: case XML_EXTERNAL_PARAMETER_ENTITY: 7881: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7882: "Attempt to reference the parameter entity '%s'\n", 7883: name); 7884: break; 7885: default: 7886: break; 7887: } 7888: } 7889: 7890: /* 7891: * [ WFC: No Recursion ] 7892: * A parsed entity must not contain a recursive reference 7893: * to itself, either directly or indirectly. 7894: * Done somewhere else 7895: */ 7896: 7897: xmlFree(name); 7898: *str = ptr; 7899: return(ent); 7900: } 7901: 7902: /** 7903: * xmlParsePEReference: 7904: * @ctxt: an XML parser context 7905: * 7906: * parse PEReference declarations 7907: * The entity content is handled directly by pushing it's content as 7908: * a new input stream. 7909: * 7910: * [69] PEReference ::= '%' Name ';' 7911: * 7912: * [ WFC: No Recursion ] 7913: * A parsed entity must not contain a recursive 7914: * reference to itself, either directly or indirectly. 7915: * 7916: * [ WFC: Entity Declared ] 7917: * In a document without any DTD, a document with only an internal DTD 7918: * subset which contains no parameter entity references, or a document 7919: * with "standalone='yes'", ... ... The declaration of a parameter 7920: * entity must precede any reference to it... 7921: * 7922: * [ VC: Entity Declared ] 7923: * In a document with an external subset or external parameter entities 7924: * with "standalone='no'", ... ... The declaration of a parameter entity 7925: * must precede any reference to it... 7926: * 7927: * [ WFC: In DTD ] 7928: * Parameter-entity references may only appear in the DTD. 7929: * NOTE: misleading but this is handled. 7930: */ 7931: void 7932: xmlParsePEReference(xmlParserCtxtPtr ctxt) 7933: { 7934: const xmlChar *name; 7935: xmlEntityPtr entity = NULL; 7936: xmlParserInputPtr input; 7937: 7938: if (RAW != '%') 7939: return; 7940: NEXT; 7941: name = xmlParseName(ctxt); 7942: if (name == NULL) { 7943: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7944: "xmlParsePEReference: no name\n"); 7945: return; 7946: } 7947: if (RAW != ';') { 7948: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7949: return; 7950: } 7951: 7952: NEXT; 7953: 7954: /* 7955: * Increate the number of entity references parsed 7956: */ 7957: ctxt->nbentities++; 7958: 7959: /* 7960: * Request the entity from SAX 7961: */ 7962: if ((ctxt->sax != NULL) && 7963: (ctxt->sax->getParameterEntity != NULL)) 7964: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7965: if (ctxt->instate == XML_PARSER_EOF) 7966: return; 7967: if (entity == NULL) { 7968: /* 7969: * [ WFC: Entity Declared ] 7970: * In a document without any DTD, a document with only an 7971: * internal DTD subset which contains no parameter entity 7972: * references, or a document with "standalone='yes'", ... 7973: * ... The declaration of a parameter entity must precede 7974: * any reference to it... 7975: */ 7976: if ((ctxt->standalone == 1) || 7977: ((ctxt->hasExternalSubset == 0) && 7978: (ctxt->hasPErefs == 0))) { 7979: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7980: "PEReference: %%%s; not found\n", 7981: name); 7982: } else { 7983: /* 7984: * [ VC: Entity Declared ] 7985: * In a document with an external subset or external 7986: * parameter entities with "standalone='no'", ... 7987: * ... The declaration of a parameter entity must 7988: * precede any reference to it... 7989: */ 7990: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7991: "PEReference: %%%s; not found\n", 7992: name, NULL); 7993: ctxt->valid = 0; 7994: } 7995: } else { 7996: /* 7997: * Internal checking in case the entity quest barfed 7998: */ 7999: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8000: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8001: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8002: "Internal: %%%s; is not a parameter entity\n", 8003: name, NULL); 8004: } else if (ctxt->input->free != deallocblankswrapper) { 8005: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8006: if (xmlPushInput(ctxt, input) < 0) 8007: return; 8008: } else { 8009: /* 8010: * TODO !!! 8011: * handle the extra spaces added before and after 8012: * c.f. http://www.w3.org/TR/REC-xml#as-PE 8013: */ 8014: input = xmlNewEntityInputStream(ctxt, entity); 8015: if (xmlPushInput(ctxt, input) < 0) 8016: return; 8017: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8018: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8019: (IS_BLANK_CH(NXT(5)))) { 8020: xmlParseTextDecl(ctxt); 8021: if (ctxt->errNo == 8022: XML_ERR_UNSUPPORTED_ENCODING) { 8023: /* 8024: * The XML REC instructs us to stop parsing 8025: * right here 8026: */ 8027: ctxt->instate = XML_PARSER_EOF; 8028: return; 8029: } 8030: } 8031: } 8032: } 8033: ctxt->hasPErefs = 1; 8034: } 8035: 8036: /** 8037: * xmlLoadEntityContent: 8038: * @ctxt: an XML parser context 8039: * @entity: an unloaded system entity 8040: * 8041: * Load the original content of the given system entity from the 8042: * ExternalID/SystemID given. This is to be used for Included in Literal 8043: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8044: * 8045: * Returns 0 in case of success and -1 in case of failure 8046: */ 8047: static int 8048: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8049: xmlParserInputPtr input; 8050: xmlBufferPtr buf; 8051: int l, c; 8052: int count = 0; 8053: 8054: if ((ctxt == NULL) || (entity == NULL) || 8055: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8056: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8057: (entity->content != NULL)) { 8058: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8059: "xmlLoadEntityContent parameter error"); 8060: return(-1); 8061: } 8062: 8063: if (xmlParserDebugEntities) 8064: xmlGenericError(xmlGenericErrorContext, 8065: "Reading %s entity content input\n", entity->name); 8066: 8067: buf = xmlBufferCreate(); 8068: if (buf == NULL) { 8069: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8070: "xmlLoadEntityContent parameter error"); 8071: return(-1); 8072: } 8073: 8074: input = xmlNewEntityInputStream(ctxt, entity); 8075: if (input == NULL) { 8076: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8077: "xmlLoadEntityContent input error"); 8078: xmlBufferFree(buf); 8079: return(-1); 8080: } 8081: 8082: /* 8083: * Push the entity as the current input, read char by char 8084: * saving to the buffer until the end of the entity or an error 8085: */ 8086: if (xmlPushInput(ctxt, input) < 0) { 8087: xmlBufferFree(buf); 8088: return(-1); 8089: } 8090: 8091: GROW; 8092: c = CUR_CHAR(l); 8093: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8094: (IS_CHAR(c))) { 8095: xmlBufferAdd(buf, ctxt->input->cur, l); 8096: if (count++ > XML_PARSER_CHUNK_SIZE) { 8097: count = 0; 8098: GROW; 8099: if (ctxt->instate == XML_PARSER_EOF) { 8100: xmlBufferFree(buf); 8101: return(-1); 8102: } 8103: } 8104: NEXTL(l); 8105: c = CUR_CHAR(l); 8106: if (c == 0) { 8107: count = 0; 8108: GROW; 8109: if (ctxt->instate == XML_PARSER_EOF) { 8110: xmlBufferFree(buf); 8111: return(-1); 8112: } 8113: c = CUR_CHAR(l); 8114: } 8115: } 8116: 8117: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8118: xmlPopInput(ctxt); 8119: } else if (!IS_CHAR(c)) { 8120: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8121: "xmlLoadEntityContent: invalid char value %d\n", 8122: c); 8123: xmlBufferFree(buf); 8124: return(-1); 8125: } 8126: entity->content = buf->content; 8127: buf->content = NULL; 8128: xmlBufferFree(buf); 8129: 8130: return(0); 8131: } 8132: 8133: /** 8134: * xmlParseStringPEReference: 8135: * @ctxt: an XML parser context 8136: * @str: a pointer to an index in the string 8137: * 8138: * parse PEReference declarations 8139: * 8140: * [69] PEReference ::= '%' Name ';' 8141: * 8142: * [ WFC: No Recursion ] 8143: * A parsed entity must not contain a recursive 8144: * reference to itself, either directly or indirectly. 8145: * 8146: * [ WFC: Entity Declared ] 8147: * In a document without any DTD, a document with only an internal DTD 8148: * subset which contains no parameter entity references, or a document 8149: * with "standalone='yes'", ... ... The declaration of a parameter 8150: * entity must precede any reference to it... 8151: * 8152: * [ VC: Entity Declared ] 8153: * In a document with an external subset or external parameter entities 8154: * with "standalone='no'", ... ... The declaration of a parameter entity 8155: * must precede any reference to it... 8156: * 8157: * [ WFC: In DTD ] 8158: * Parameter-entity references may only appear in the DTD. 8159: * NOTE: misleading but this is handled. 8160: * 8161: * Returns the string of the entity content. 8162: * str is updated to the current value of the index 8163: */ 8164: static xmlEntityPtr 8165: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8166: const xmlChar *ptr; 8167: xmlChar cur; 8168: xmlChar *name; 8169: xmlEntityPtr entity = NULL; 8170: 8171: if ((str == NULL) || (*str == NULL)) return(NULL); 8172: ptr = *str; 8173: cur = *ptr; 8174: if (cur != '%') 8175: return(NULL); 8176: ptr++; 8177: name = xmlParseStringName(ctxt, &ptr); 8178: if (name == NULL) { 8179: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8180: "xmlParseStringPEReference: no name\n"); 8181: *str = ptr; 8182: return(NULL); 8183: } 8184: cur = *ptr; 8185: if (cur != ';') { 8186: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8187: xmlFree(name); 8188: *str = ptr; 8189: return(NULL); 8190: } 8191: ptr++; 8192: 8193: /* 8194: * Increate the number of entity references parsed 8195: */ 8196: ctxt->nbentities++; 8197: 8198: /* 8199: * Request the entity from SAX 8200: */ 8201: if ((ctxt->sax != NULL) && 8202: (ctxt->sax->getParameterEntity != NULL)) 8203: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8204: if (ctxt->instate == XML_PARSER_EOF) { 8205: xmlFree(name); 8206: return(NULL); 8207: } 8208: if (entity == NULL) { 8209: /* 8210: * [ WFC: Entity Declared ] 8211: * In a document without any DTD, a document with only an 8212: * internal DTD subset which contains no parameter entity 8213: * references, or a document with "standalone='yes'", ... 8214: * ... The declaration of a parameter entity must precede 8215: * any reference to it... 8216: */ 8217: if ((ctxt->standalone == 1) || 8218: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8219: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8220: "PEReference: %%%s; not found\n", name); 8221: } else { 8222: /* 8223: * [ VC: Entity Declared ] 8224: * In a document with an external subset or external 8225: * parameter entities with "standalone='no'", ... 8226: * ... The declaration of a parameter entity must 8227: * precede any reference to it... 8228: */ 8229: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8230: "PEReference: %%%s; not found\n", 8231: name, NULL); 8232: ctxt->valid = 0; 8233: } 8234: } else { 8235: /* 8236: * Internal checking in case the entity quest barfed 8237: */ 8238: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8239: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8240: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8241: "%%%s; is not a parameter entity\n", 8242: name, NULL); 8243: } 8244: } 8245: ctxt->hasPErefs = 1; 8246: xmlFree(name); 8247: *str = ptr; 8248: return(entity); 8249: } 8250: 8251: /** 8252: * xmlParseDocTypeDecl: 8253: * @ctxt: an XML parser context 8254: * 8255: * parse a DOCTYPE declaration 8256: * 8257: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8258: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8259: * 8260: * [ VC: Root Element Type ] 8261: * The Name in the document type declaration must match the element 8262: * type of the root element. 8263: */ 8264: 8265: void 8266: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8267: const xmlChar *name = NULL; 8268: xmlChar *ExternalID = NULL; 8269: xmlChar *URI = NULL; 8270: 8271: /* 8272: * We know that '<!DOCTYPE' has been detected. 8273: */ 8274: SKIP(9); 8275: 8276: SKIP_BLANKS; 8277: 8278: /* 8279: * Parse the DOCTYPE name. 8280: */ 8281: name = xmlParseName(ctxt); 8282: if (name == NULL) { 8283: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8284: "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8285: } 8286: ctxt->intSubName = name; 8287: 8288: SKIP_BLANKS; 8289: 8290: /* 8291: * Check for SystemID and ExternalID 8292: */ 8293: URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8294: 8295: if ((URI != NULL) || (ExternalID != NULL)) { 8296: ctxt->hasExternalSubset = 1; 8297: } 8298: ctxt->extSubURI = URI; 8299: ctxt->extSubSystem = ExternalID; 8300: 8301: SKIP_BLANKS; 8302: 8303: /* 8304: * Create and update the internal subset. 8305: */ 8306: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8307: (!ctxt->disableSAX)) 8308: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8309: if (ctxt->instate == XML_PARSER_EOF) 8310: return; 8311: 8312: /* 8313: * Is there any internal subset declarations ? 8314: * they are handled separately in xmlParseInternalSubset() 8315: */ 8316: if (RAW == '[') 8317: return; 8318: 8319: /* 8320: * We should be at the end of the DOCTYPE declaration. 8321: */ 8322: if (RAW != '>') { 8323: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8324: } 8325: NEXT; 8326: } 8327: 8328: /** 8329: * xmlParseInternalSubset: 8330: * @ctxt: an XML parser context 8331: * 8332: * parse the internal subset declaration 8333: * 8334: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8335: */ 8336: 8337: static void 8338: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8339: /* 8340: * Is there any DTD definition ? 8341: */ 8342: if (RAW == '[') { 8343: ctxt->instate = XML_PARSER_DTD; 8344: NEXT; 8345: /* 8346: * Parse the succession of Markup declarations and 8347: * PEReferences. 8348: * Subsequence (markupdecl | PEReference | S)* 8349: */ 8350: while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8351: const xmlChar *check = CUR_PTR; 8352: unsigned int cons = ctxt->input->consumed; 8353: 8354: SKIP_BLANKS; 8355: xmlParseMarkupDecl(ctxt); 8356: xmlParsePEReference(ctxt); 8357: 8358: /* 8359: * Pop-up of finished entities. 8360: */ 8361: while ((RAW == 0) && (ctxt->inputNr > 1)) 8362: xmlPopInput(ctxt); 8363: 8364: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8365: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8366: "xmlParseInternalSubset: error detected in Markup declaration\n"); 8367: break; 8368: } 8369: } 8370: if (RAW == ']') { 8371: NEXT; 8372: SKIP_BLANKS; 8373: } 8374: } 8375: 8376: /* 8377: * We should be at the end of the DOCTYPE declaration. 8378: */ 8379: if (RAW != '>') { 8380: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8381: } 8382: NEXT; 8383: } 8384: 8385: #ifdef LIBXML_SAX1_ENABLED 8386: /** 8387: * xmlParseAttribute: 8388: * @ctxt: an XML parser context 8389: * @value: a xmlChar ** used to store the value of the attribute 8390: * 8391: * parse an attribute 8392: * 8393: * [41] Attribute ::= Name Eq AttValue 8394: * 8395: * [ WFC: No External Entity References ] 8396: * Attribute values cannot contain direct or indirect entity references 8397: * to external entities. 8398: * 8399: * [ WFC: No < in Attribute Values ] 8400: * The replacement text of any entity referred to directly or indirectly in 8401: * an attribute value (other than "<") must not contain a <. 8402: * 8403: * [ VC: Attribute Value Type ] 8404: * The attribute must have been declared; the value must be of the type 8405: * declared for it. 8406: * 8407: * [25] Eq ::= S? '=' S? 8408: * 8409: * With namespace: 8410: * 8411: * [NS 11] Attribute ::= QName Eq AttValue 8412: * 8413: * Also the case QName == xmlns:??? is handled independently as a namespace 8414: * definition. 8415: * 8416: * Returns the attribute name, and the value in *value. 8417: */ 8418: 8419: const xmlChar * 8420: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8421: const xmlChar *name; 8422: xmlChar *val; 8423: 8424: *value = NULL; 8425: GROW; 8426: name = xmlParseName(ctxt); 8427: if (name == NULL) { 8428: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8429: "error parsing attribute name\n"); 8430: return(NULL); 8431: } 8432: 8433: /* 8434: * read the value 8435: */ 8436: SKIP_BLANKS; 8437: if (RAW == '=') { 8438: NEXT; 8439: SKIP_BLANKS; 8440: val = xmlParseAttValue(ctxt); 8441: ctxt->instate = XML_PARSER_CONTENT; 8442: } else { 8443: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8444: "Specification mandate value for attribute %s\n", name); 8445: return(NULL); 8446: } 8447: 8448: /* 8449: * Check that xml:lang conforms to the specification 8450: * No more registered as an error, just generate a warning now 8451: * since this was deprecated in XML second edition 8452: */ 8453: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8454: if (!xmlCheckLanguageID(val)) { 8455: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8456: "Malformed value for xml:lang : %s\n", 8457: val, NULL); 8458: } 8459: } 8460: 8461: /* 8462: * Check that xml:space conforms to the specification 8463: */ 8464: if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8465: if (xmlStrEqual(val, BAD_CAST "default")) 8466: *(ctxt->space) = 0; 8467: else if (xmlStrEqual(val, BAD_CAST "preserve")) 8468: *(ctxt->space) = 1; 8469: else { 8470: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8471: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8472: val, NULL); 8473: } 8474: } 8475: 8476: *value = val; 8477: return(name); 8478: } 8479: 8480: /** 8481: * xmlParseStartTag: 8482: * @ctxt: an XML parser context 8483: * 8484: * parse a start of tag either for rule element or 8485: * EmptyElement. In both case we don't parse the tag closing chars. 8486: * 8487: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8488: * 8489: * [ WFC: Unique Att Spec ] 8490: * No attribute name may appear more than once in the same start-tag or 8491: * empty-element tag. 8492: * 8493: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8494: * 8495: * [ WFC: Unique Att Spec ] 8496: * No attribute name may appear more than once in the same start-tag or 8497: * empty-element tag. 8498: * 8499: * With namespace: 8500: * 8501: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8502: * 8503: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8504: * 8505: * Returns the element name parsed 8506: */ 8507: 8508: const xmlChar * 8509: xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8510: const xmlChar *name; 8511: const xmlChar *attname; 8512: xmlChar *attvalue; 8513: const xmlChar **atts = ctxt->atts; 8514: int nbatts = 0; 8515: int maxatts = ctxt->maxatts; 8516: int i; 8517: 8518: if (RAW != '<') return(NULL); 8519: NEXT1; 8520: 8521: name = xmlParseName(ctxt); 8522: if (name == NULL) { 8523: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8524: "xmlParseStartTag: invalid element name\n"); 8525: return(NULL); 8526: } 8527: 8528: /* 8529: * Now parse the attributes, it ends up with the ending 8530: * 8531: * (S Attribute)* S? 8532: */ 8533: SKIP_BLANKS; 8534: GROW; 8535: 8536: while (((RAW != '>') && 8537: ((RAW != '/') || (NXT(1) != '>')) && 8538: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8539: const xmlChar *q = CUR_PTR; 8540: unsigned int cons = ctxt->input->consumed; 8541: 8542: attname = xmlParseAttribute(ctxt, &attvalue); 8543: if ((attname != NULL) && (attvalue != NULL)) { 8544: /* 8545: * [ WFC: Unique Att Spec ] 8546: * No attribute name may appear more than once in the same 8547: * start-tag or empty-element tag. 8548: */ 8549: for (i = 0; i < nbatts;i += 2) { 8550: if (xmlStrEqual(atts[i], attname)) { 8551: xmlErrAttributeDup(ctxt, NULL, attname); 8552: xmlFree(attvalue); 8553: goto failed; 8554: } 8555: } 8556: /* 8557: * Add the pair to atts 8558: */ 8559: if (atts == NULL) { 8560: maxatts = 22; /* allow for 10 attrs by default */ 8561: atts = (const xmlChar **) 8562: xmlMalloc(maxatts * sizeof(xmlChar *)); 8563: if (atts == NULL) { 8564: xmlErrMemory(ctxt, NULL); 8565: if (attvalue != NULL) 8566: xmlFree(attvalue); 8567: goto failed; 8568: } 8569: ctxt->atts = atts; 8570: ctxt->maxatts = maxatts; 8571: } else if (nbatts + 4 > maxatts) { 8572: const xmlChar **n; 8573: 8574: maxatts *= 2; 8575: n = (const xmlChar **) xmlRealloc((void *) atts, 8576: maxatts * sizeof(const xmlChar *)); 8577: if (n == NULL) { 8578: xmlErrMemory(ctxt, NULL); 8579: if (attvalue != NULL) 8580: xmlFree(attvalue); 8581: goto failed; 8582: } 8583: atts = n; 8584: ctxt->atts = atts; 8585: ctxt->maxatts = maxatts; 8586: } 8587: atts[nbatts++] = attname; 8588: atts[nbatts++] = attvalue; 8589: atts[nbatts] = NULL; 8590: atts[nbatts + 1] = NULL; 8591: } else { 8592: if (attvalue != NULL) 8593: xmlFree(attvalue); 8594: } 8595: 8596: failed: 8597: 8598: GROW 8599: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8600: break; 8601: if (!IS_BLANK_CH(RAW)) { 8602: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8603: "attributes construct error\n"); 8604: } 8605: SKIP_BLANKS; 8606: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8607: (attname == NULL) && (attvalue == NULL)) { 8608: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8609: "xmlParseStartTag: problem parsing attributes\n"); 8610: break; 8611: } 8612: SHRINK; 8613: GROW; 8614: } 8615: 8616: /* 8617: * SAX: Start of Element ! 8618: */ 8619: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8620: (!ctxt->disableSAX)) { 8621: if (nbatts > 0) 8622: ctxt->sax->startElement(ctxt->userData, name, atts); 8623: else 8624: ctxt->sax->startElement(ctxt->userData, name, NULL); 8625: } 8626: 8627: if (atts != NULL) { 8628: /* Free only the content strings */ 8629: for (i = 1;i < nbatts;i+=2) 8630: if (atts[i] != NULL) 8631: xmlFree((xmlChar *) atts[i]); 8632: } 8633: return(name); 8634: } 8635: 8636: /** 8637: * xmlParseEndTag1: 8638: * @ctxt: an XML parser context 8639: * @line: line of the start tag 8640: * @nsNr: number of namespaces on the start tag 8641: * 8642: * parse an end of tag 8643: * 8644: * [42] ETag ::= '</' Name S? '>' 8645: * 8646: * With namespace 8647: * 8648: * [NS 9] ETag ::= '</' QName S? '>' 8649: */ 8650: 8651: static void 8652: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8653: const xmlChar *name; 8654: 8655: GROW; 8656: if ((RAW != '<') || (NXT(1) != '/')) { 8657: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8658: "xmlParseEndTag: '</' not found\n"); 8659: return; 8660: } 8661: SKIP(2); 8662: 8663: name = xmlParseNameAndCompare(ctxt,ctxt->name); 8664: 8665: /* 8666: * We should definitely be at the ending "S? '>'" part 8667: */ 8668: GROW; 8669: SKIP_BLANKS; 8670: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8671: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8672: } else 8673: NEXT1; 8674: 8675: /* 8676: * [ WFC: Element Type Match ] 8677: * The Name in an element's end-tag must match the element type in the 8678: * start-tag. 8679: * 8680: */ 8681: if (name != (xmlChar*)1) { 8682: if (name == NULL) name = BAD_CAST "unparseable"; 8683: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8684: "Opening and ending tag mismatch: %s line %d and %s\n", 8685: ctxt->name, line, name); 8686: } 8687: 8688: /* 8689: * SAX: End of Tag 8690: */ 8691: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8692: (!ctxt->disableSAX)) 8693: ctxt->sax->endElement(ctxt->userData, ctxt->name); 8694: 8695: namePop(ctxt); 8696: spacePop(ctxt); 8697: return; 8698: } 8699: 8700: /** 8701: * xmlParseEndTag: 8702: * @ctxt: an XML parser context 8703: * 8704: * parse an end of tag 8705: * 8706: * [42] ETag ::= '</' Name S? '>' 8707: * 8708: * With namespace 8709: * 8710: * [NS 9] ETag ::= '</' QName S? '>' 8711: */ 8712: 8713: void 8714: xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8715: xmlParseEndTag1(ctxt, 0); 8716: } 8717: #endif /* LIBXML_SAX1_ENABLED */ 8718: 8719: /************************************************************************ 8720: * * 8721: * SAX 2 specific operations * 8722: * * 8723: ************************************************************************/ 8724: 8725: /* 8726: * xmlGetNamespace: 8727: * @ctxt: an XML parser context 8728: * @prefix: the prefix to lookup 8729: * 8730: * Lookup the namespace name for the @prefix (which ca be NULL) 8731: * The prefix must come from the @ctxt->dict dictionnary 8732: * 8733: * Returns the namespace name or NULL if not bound 8734: */ 8735: static const xmlChar * 8736: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8737: int i; 8738: 8739: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8740: for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8741: if (ctxt->nsTab[i] == prefix) { 8742: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8743: return(NULL); 8744: return(ctxt->nsTab[i + 1]); 8745: } 8746: return(NULL); 8747: } 8748: 8749: /** 8750: * xmlParseQName: 8751: * @ctxt: an XML parser context 8752: * @prefix: pointer to store the prefix part 8753: * 8754: * parse an XML Namespace QName 8755: * 8756: * [6] QName ::= (Prefix ':')? LocalPart 8757: * [7] Prefix ::= NCName 8758: * [8] LocalPart ::= NCName 8759: * 8760: * Returns the Name parsed or NULL 8761: */ 8762: 8763: static const xmlChar * 8764: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8765: const xmlChar *l, *p; 8766: 8767: GROW; 8768: 8769: l = xmlParseNCName(ctxt); 8770: if (l == NULL) { 8771: if (CUR == ':') { 8772: l = xmlParseName(ctxt); 8773: if (l != NULL) { 8774: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8775: "Failed to parse QName '%s'\n", l, NULL, NULL); 8776: *prefix = NULL; 8777: return(l); 8778: } 8779: } 8780: return(NULL); 8781: } 8782: if (CUR == ':') { 8783: NEXT; 8784: p = l; 8785: l = xmlParseNCName(ctxt); 8786: if (l == NULL) { 8787: xmlChar *tmp; 8788: 8789: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8790: "Failed to parse QName '%s:'\n", p, NULL, NULL); 8791: l = xmlParseNmtoken(ctxt); 8792: if (l == NULL) 8793: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8794: else { 8795: tmp = xmlBuildQName(l, p, NULL, 0); 8796: xmlFree((char *)l); 8797: } 8798: p = xmlDictLookup(ctxt->dict, tmp, -1); 8799: if (tmp != NULL) xmlFree(tmp); 8800: *prefix = NULL; 8801: return(p); 8802: } 8803: if (CUR == ':') { 8804: xmlChar *tmp; 8805: 8806: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8807: "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8808: NEXT; 8809: tmp = (xmlChar *) xmlParseName(ctxt); 8810: if (tmp != NULL) { 8811: tmp = xmlBuildQName(tmp, l, NULL, 0); 8812: l = xmlDictLookup(ctxt->dict, tmp, -1); 8813: if (tmp != NULL) xmlFree(tmp); 8814: *prefix = p; 8815: return(l); 8816: } 8817: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8818: l = xmlDictLookup(ctxt->dict, tmp, -1); 8819: if (tmp != NULL) xmlFree(tmp); 8820: *prefix = p; 8821: return(l); 8822: } 8823: *prefix = p; 8824: } else 8825: *prefix = NULL; 8826: return(l); 8827: } 8828: 8829: /** 8830: * xmlParseQNameAndCompare: 8831: * @ctxt: an XML parser context 8832: * @name: the localname 8833: * @prefix: the prefix, if any. 8834: * 8835: * parse an XML name and compares for match 8836: * (specialized for endtag parsing) 8837: * 8838: * Returns NULL for an illegal name, (xmlChar*) 1 for success 8839: * and the name for mismatch 8840: */ 8841: 8842: static const xmlChar * 8843: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8844: xmlChar const *prefix) { 8845: const xmlChar *cmp; 8846: const xmlChar *in; 8847: const xmlChar *ret; 8848: const xmlChar *prefix2; 8849: 8850: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8851: 8852: GROW; 8853: in = ctxt->input->cur; 8854: 8855: cmp = prefix; 8856: while (*in != 0 && *in == *cmp) { 8857: ++in; 8858: ++cmp; 8859: } 8860: if ((*cmp == 0) && (*in == ':')) { 8861: in++; 8862: cmp = name; 8863: while (*in != 0 && *in == *cmp) { 8864: ++in; 8865: ++cmp; 8866: } 8867: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8868: /* success */ 8869: ctxt->input->cur = in; 8870: return((const xmlChar*) 1); 8871: } 8872: } 8873: /* 8874: * all strings coms from the dictionary, equality can be done directly 8875: */ 8876: ret = xmlParseQName (ctxt, &prefix2); 8877: if ((ret == name) && (prefix == prefix2)) 8878: return((const xmlChar*) 1); 8879: return ret; 8880: } 8881: 8882: /** 8883: * xmlParseAttValueInternal: 8884: * @ctxt: an XML parser context 8885: * @len: attribute len result 8886: * @alloc: whether the attribute was reallocated as a new string 8887: * @normalize: if 1 then further non-CDATA normalization must be done 8888: * 8889: * parse a value for an attribute. 8890: * NOTE: if no normalization is needed, the routine will return pointers 8891: * directly from the data buffer. 8892: * 8893: * 3.3.3 Attribute-Value Normalization: 8894: * Before the value of an attribute is passed to the application or 8895: * checked for validity, the XML processor must normalize it as follows: 8896: * - a character reference is processed by appending the referenced 8897: * character to the attribute value 8898: * - an entity reference is processed by recursively processing the 8899: * replacement text of the entity 8900: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8901: * appending #x20 to the normalized value, except that only a single 8902: * #x20 is appended for a "#xD#xA" sequence that is part of an external 8903: * parsed entity or the literal entity value of an internal parsed entity 8904: * - other characters are processed by appending them to the normalized value 8905: * If the declared value is not CDATA, then the XML processor must further 8906: * process the normalized attribute value by discarding any leading and 8907: * trailing space (#x20) characters, and by replacing sequences of space 8908: * (#x20) characters by a single space (#x20) character. 8909: * All attributes for which no declaration has been read should be treated 8910: * by a non-validating parser as if declared CDATA. 8911: * 8912: * Returns the AttValue parsed or NULL. The value has to be freed by the 8913: * caller if it was copied, this can be detected by val[*len] == 0. 8914: */ 8915: 8916: static xmlChar * 8917: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8918: int normalize) 8919: { 8920: xmlChar limit = 0; 8921: const xmlChar *in = NULL, *start, *end, *last; 8922: xmlChar *ret = NULL; 8923: 8924: GROW; 8925: in = (xmlChar *) CUR_PTR; 8926: if (*in != '"' && *in != '\'') { 8927: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8928: return (NULL); 8929: } 8930: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8931: 8932: /* 8933: * try to handle in this routine the most common case where no 8934: * allocation of a new string is required and where content is 8935: * pure ASCII. 8936: */ 8937: limit = *in++; 8938: end = ctxt->input->end; 8939: start = in; 8940: if (in >= end) { 8941: const xmlChar *oldbase = ctxt->input->base; 8942: GROW; 8943: if (oldbase != ctxt->input->base) { 8944: long delta = ctxt->input->base - oldbase; 8945: start = start + delta; 8946: in = in + delta; 8947: } 8948: end = ctxt->input->end; 8949: } 8950: if (normalize) { 8951: /* 8952: * Skip any leading spaces 8953: */ 8954: while ((in < end) && (*in != limit) && 8955: ((*in == 0x20) || (*in == 0x9) || 8956: (*in == 0xA) || (*in == 0xD))) { 8957: in++; 8958: start = in; 8959: if (in >= end) { 8960: const xmlChar *oldbase = ctxt->input->base; 8961: GROW; 8962: if (ctxt->instate == XML_PARSER_EOF) 8963: return(NULL); 8964: if (oldbase != ctxt->input->base) { 8965: long delta = ctxt->input->base - oldbase; 8966: start = start + delta; 8967: in = in + delta; 8968: } 8969: end = ctxt->input->end; 8970: if (((in - start) > XML_MAX_TEXT_LENGTH) && 8971: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8972: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8973: "AttValue length too long\n"); 8974: return(NULL); 8975: } 8976: } 8977: } 8978: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8979: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8980: if ((*in++ == 0x20) && (*in == 0x20)) break; 8981: if (in >= end) { 8982: const xmlChar *oldbase = ctxt->input->base; 8983: GROW; 8984: if (ctxt->instate == XML_PARSER_EOF) 8985: return(NULL); 8986: if (oldbase != ctxt->input->base) { 8987: long delta = ctxt->input->base - oldbase; 8988: start = start + delta; 8989: in = in + delta; 8990: } 8991: end = ctxt->input->end; 8992: if (((in - start) > XML_MAX_TEXT_LENGTH) && 8993: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8994: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8995: "AttValue length too long\n"); 8996: return(NULL); 8997: } 8998: } 8999: } 9000: last = in; 9001: /* 9002: * skip the trailing blanks 9003: */ 9004: while ((last[-1] == 0x20) && (last > start)) last--; 9005: while ((in < end) && (*in != limit) && 9006: ((*in == 0x20) || (*in == 0x9) || 9007: (*in == 0xA) || (*in == 0xD))) { 9008: in++; 9009: if (in >= end) { 9010: const xmlChar *oldbase = ctxt->input->base; 9011: GROW; 9012: if (ctxt->instate == XML_PARSER_EOF) 9013: return(NULL); 9014: if (oldbase != ctxt->input->base) { 9015: long delta = ctxt->input->base - oldbase; 9016: start = start + delta; 9017: in = in + delta; 9018: last = last + delta; 9019: } 9020: end = ctxt->input->end; 9021: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9022: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9023: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9024: "AttValue length too long\n"); 9025: return(NULL); 9026: } 9027: } 9028: } 9029: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9030: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9031: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9032: "AttValue length too long\n"); 9033: return(NULL); 9034: } 9035: if (*in != limit) goto need_complex; 9036: } else { 9037: while ((in < end) && (*in != limit) && (*in >= 0x20) && 9038: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9039: in++; 9040: if (in >= end) { 9041: const xmlChar *oldbase = ctxt->input->base; 9042: GROW; 9043: if (ctxt->instate == XML_PARSER_EOF) 9044: return(NULL); 9045: if (oldbase != ctxt->input->base) { 9046: long delta = ctxt->input->base - oldbase; 9047: start = start + delta; 9048: in = in + delta; 9049: } 9050: end = ctxt->input->end; 9051: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9052: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9053: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9054: "AttValue length too long\n"); 9055: return(NULL); 9056: } 9057: } 9058: } 9059: last = in; 9060: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9061: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9062: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9063: "AttValue length too long\n"); 9064: return(NULL); 9065: } 9066: if (*in != limit) goto need_complex; 9067: } 9068: in++; 9069: if (len != NULL) { 9070: *len = last - start; 9071: ret = (xmlChar *) start; 9072: } else { 9073: if (alloc) *alloc = 1; 9074: ret = xmlStrndup(start, last - start); 9075: } 9076: CUR_PTR = in; 9077: if (alloc) *alloc = 0; 9078: return ret; 9079: need_complex: 9080: if (alloc) *alloc = 1; 9081: return xmlParseAttValueComplex(ctxt, len, normalize); 9082: } 9083: 9084: /** 9085: * xmlParseAttribute2: 9086: * @ctxt: an XML parser context 9087: * @pref: the element prefix 9088: * @elem: the element name 9089: * @prefix: a xmlChar ** used to store the value of the attribute prefix 9090: * @value: a xmlChar ** used to store the value of the attribute 9091: * @len: an int * to save the length of the attribute 9092: * @alloc: an int * to indicate if the attribute was allocated 9093: * 9094: * parse an attribute in the new SAX2 framework. 9095: * 9096: * Returns the attribute name, and the value in *value, . 9097: */ 9098: 9099: static const xmlChar * 9100: xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9101: const xmlChar * pref, const xmlChar * elem, 9102: const xmlChar ** prefix, xmlChar ** value, 9103: int *len, int *alloc) 9104: { 9105: const xmlChar *name; 9106: xmlChar *val, *internal_val = NULL; 9107: int normalize = 0; 9108: 9109: *value = NULL; 9110: GROW; 9111: name = xmlParseQName(ctxt, prefix); 9112: if (name == NULL) { 9113: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9114: "error parsing attribute name\n"); 9115: return (NULL); 9116: } 9117: 9118: /* 9119: * get the type if needed 9120: */ 9121: if (ctxt->attsSpecial != NULL) { 9122: int type; 9123: 9124: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9125: pref, elem, *prefix, name); 9126: if (type != 0) 9127: normalize = 1; 9128: } 9129: 9130: /* 9131: * read the value 9132: */ 9133: SKIP_BLANKS; 9134: if (RAW == '=') { 9135: NEXT; 9136: SKIP_BLANKS; 9137: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9138: if (normalize) { 9139: /* 9140: * Sometimes a second normalisation pass for spaces is needed 9141: * but that only happens if charrefs or entities refernces 9142: * have been used in the attribute value, i.e. the attribute 9143: * value have been extracted in an allocated string already. 9144: */ 9145: if (*alloc) { 9146: const xmlChar *val2; 9147: 9148: val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9149: if ((val2 != NULL) && (val2 != val)) { 9150: xmlFree(val); 9151: val = (xmlChar *) val2; 9152: } 9153: } 9154: } 9155: ctxt->instate = XML_PARSER_CONTENT; 9156: } else { 9157: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9158: "Specification mandate value for attribute %s\n", 9159: name); 9160: return (NULL); 9161: } 9162: 9163: if (*prefix == ctxt->str_xml) { 9164: /* 9165: * Check that xml:lang conforms to the specification 9166: * No more registered as an error, just generate a warning now 9167: * since this was deprecated in XML second edition 9168: */ 9169: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9170: internal_val = xmlStrndup(val, *len); 9171: if (!xmlCheckLanguageID(internal_val)) { 9172: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9173: "Malformed value for xml:lang : %s\n", 9174: internal_val, NULL); 9175: } 9176: } 9177: 9178: /* 9179: * Check that xml:space conforms to the specification 9180: */ 9181: if (xmlStrEqual(name, BAD_CAST "space")) { 9182: internal_val = xmlStrndup(val, *len); 9183: if (xmlStrEqual(internal_val, BAD_CAST "default")) 9184: *(ctxt->space) = 0; 9185: else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9186: *(ctxt->space) = 1; 9187: else { 9188: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9189: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9190: internal_val, NULL); 9191: } 9192: } 9193: if (internal_val) { 9194: xmlFree(internal_val); 9195: } 9196: } 9197: 9198: *value = val; 9199: return (name); 9200: } 9201: /** 9202: * xmlParseStartTag2: 9203: * @ctxt: an XML parser context 9204: * 9205: * parse a start of tag either for rule element or 9206: * EmptyElement. In both case we don't parse the tag closing chars. 9207: * This routine is called when running SAX2 parsing 9208: * 9209: * [40] STag ::= '<' Name (S Attribute)* S? '>' 9210: * 9211: * [ WFC: Unique Att Spec ] 9212: * No attribute name may appear more than once in the same start-tag or 9213: * empty-element tag. 9214: * 9215: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9216: * 9217: * [ WFC: Unique Att Spec ] 9218: * No attribute name may appear more than once in the same start-tag or 9219: * empty-element tag. 9220: * 9221: * With namespace: 9222: * 9223: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9224: * 9225: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9226: * 9227: * Returns the element name parsed 9228: */ 9229: 9230: static const xmlChar * 9231: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9232: const xmlChar **URI, int *tlen) { 9233: const xmlChar *localname; 9234: const xmlChar *prefix; 9235: const xmlChar *attname; 9236: const xmlChar *aprefix; 9237: const xmlChar *nsname; 9238: xmlChar *attvalue; 9239: const xmlChar **atts = ctxt->atts; 9240: int maxatts = ctxt->maxatts; 9241: int nratts, nbatts, nbdef; 9242: int i, j, nbNs, attval, oldline, oldcol; 9243: const xmlChar *base; 9244: unsigned long cur; 9245: int nsNr = ctxt->nsNr; 9246: 9247: if (RAW != '<') return(NULL); 9248: NEXT1; 9249: 9250: /* 9251: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9252: * point since the attribute values may be stored as pointers to 9253: * the buffer and calling SHRINK would destroy them ! 9254: * The Shrinking is only possible once the full set of attribute 9255: * callbacks have been done. 9256: */ 9257: reparse: 9258: SHRINK; 9259: base = ctxt->input->base; 9260: cur = ctxt->input->cur - ctxt->input->base; 9261: oldline = ctxt->input->line; 9262: oldcol = ctxt->input->col; 9263: nbatts = 0; 9264: nratts = 0; 9265: nbdef = 0; 9266: nbNs = 0; 9267: attval = 0; 9268: /* Forget any namespaces added during an earlier parse of this element. */ 9269: ctxt->nsNr = nsNr; 9270: 9271: localname = xmlParseQName(ctxt, &prefix); 9272: if (localname == NULL) { 9273: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9274: "StartTag: invalid element name\n"); 9275: return(NULL); 9276: } 9277: *tlen = ctxt->input->cur - ctxt->input->base - cur; 9278: 9279: /* 9280: * Now parse the attributes, it ends up with the ending 9281: * 9282: * (S Attribute)* S? 9283: */ 9284: SKIP_BLANKS; 9285: GROW; 9286: if (ctxt->input->base != base) goto base_changed; 9287: 9288: while (((RAW != '>') && 9289: ((RAW != '/') || (NXT(1) != '>')) && 9290: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9291: const xmlChar *q = CUR_PTR; 9292: unsigned int cons = ctxt->input->consumed; 9293: int len = -1, alloc = 0; 9294: 9295: attname = xmlParseAttribute2(ctxt, prefix, localname, 9296: &aprefix, &attvalue, &len, &alloc); 9297: if (ctxt->input->base != base) { 9298: if ((attvalue != NULL) && (alloc != 0)) 9299: xmlFree(attvalue); 9300: attvalue = NULL; 9301: goto base_changed; 9302: } 9303: if ((attname != NULL) && (attvalue != NULL)) { 9304: if (len < 0) len = xmlStrlen(attvalue); 9305: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9306: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9307: xmlURIPtr uri; 9308: 9309: if (*URL != 0) { 9310: uri = xmlParseURI((const char *) URL); 9311: if (uri == NULL) { 9312: xmlNsErr(ctxt, XML_WAR_NS_URI, 9313: "xmlns: '%s' is not a valid URI\n", 9314: URL, NULL, NULL); 9315: } else { 9316: if (uri->scheme == NULL) { 9317: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9318: "xmlns: URI %s is not absolute\n", 9319: URL, NULL, NULL); 9320: } 9321: xmlFreeURI(uri); 9322: } 9323: if (URL == ctxt->str_xml_ns) { 9324: if (attname != ctxt->str_xml) { 9325: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9326: "xml namespace URI cannot be the default namespace\n", 9327: NULL, NULL, NULL); 9328: } 9329: goto skip_default_ns; 9330: } 9331: if ((len == 29) && 9332: (xmlStrEqual(URL, 9333: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9334: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9335: "reuse of the xmlns namespace name is forbidden\n", 9336: NULL, NULL, NULL); 9337: goto skip_default_ns; 9338: } 9339: } 9340: /* 9341: * check that it's not a defined namespace 9342: */ 9343: for (j = 1;j <= nbNs;j++) 9344: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9345: break; 9346: if (j <= nbNs) 9347: xmlErrAttributeDup(ctxt, NULL, attname); 9348: else 9349: if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9350: skip_default_ns: 9351: if (alloc != 0) xmlFree(attvalue); 9352: SKIP_BLANKS; 9353: continue; 9354: } 9355: if (aprefix == ctxt->str_xmlns) { 9356: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9357: xmlURIPtr uri; 9358: 9359: if (attname == ctxt->str_xml) { 9360: if (URL != ctxt->str_xml_ns) { 9361: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9362: "xml namespace prefix mapped to wrong URI\n", 9363: NULL, NULL, NULL); 9364: } 9365: /* 9366: * Do not keep a namespace definition node 9367: */ 9368: goto skip_ns; 9369: } 9370: if (URL == ctxt->str_xml_ns) { 9371: if (attname != ctxt->str_xml) { 9372: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9373: "xml namespace URI mapped to wrong prefix\n", 9374: NULL, NULL, NULL); 9375: } 9376: goto skip_ns; 9377: } 9378: if (attname == ctxt->str_xmlns) { 9379: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9380: "redefinition of the xmlns prefix is forbidden\n", 9381: NULL, NULL, NULL); 9382: goto skip_ns; 9383: } 9384: if ((len == 29) && 9385: (xmlStrEqual(URL, 9386: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9387: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9388: "reuse of the xmlns namespace name is forbidden\n", 9389: NULL, NULL, NULL); 9390: goto skip_ns; 9391: } 9392: if ((URL == NULL) || (URL[0] == 0)) { 9393: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9394: "xmlns:%s: Empty XML namespace is not allowed\n", 9395: attname, NULL, NULL); 9396: goto skip_ns; 9397: } else { 9398: uri = xmlParseURI((const char *) URL); 9399: if (uri == NULL) { 9400: xmlNsErr(ctxt, XML_WAR_NS_URI, 9401: "xmlns:%s: '%s' is not a valid URI\n", 9402: attname, URL, NULL); 9403: } else { 9404: if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9405: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9406: "xmlns:%s: URI %s is not absolute\n", 9407: attname, URL, NULL); 9408: } 9409: xmlFreeURI(uri); 9410: } 9411: } 9412: 9413: /* 9414: * check that it's not a defined namespace 9415: */ 9416: for (j = 1;j <= nbNs;j++) 9417: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9418: break; 9419: if (j <= nbNs) 9420: xmlErrAttributeDup(ctxt, aprefix, attname); 9421: else 9422: if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9423: skip_ns: 9424: if (alloc != 0) xmlFree(attvalue); 9425: SKIP_BLANKS; 9426: if (ctxt->input->base != base) goto base_changed; 9427: continue; 9428: } 9429: 9430: /* 9431: * Add the pair to atts 9432: */ 9433: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9434: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9435: if (attvalue[len] == 0) 9436: xmlFree(attvalue); 9437: goto failed; 9438: } 9439: maxatts = ctxt->maxatts; 9440: atts = ctxt->atts; 9441: } 9442: ctxt->attallocs[nratts++] = alloc; 9443: atts[nbatts++] = attname; 9444: atts[nbatts++] = aprefix; 9445: atts[nbatts++] = NULL; /* the URI will be fetched later */ 9446: atts[nbatts++] = attvalue; 9447: attvalue += len; 9448: atts[nbatts++] = attvalue; 9449: /* 9450: * tag if some deallocation is needed 9451: */ 9452: if (alloc != 0) attval = 1; 9453: } else { 9454: if ((attvalue != NULL) && (attvalue[len] == 0)) 9455: xmlFree(attvalue); 9456: } 9457: 9458: failed: 9459: 9460: GROW 9461: if (ctxt->instate == XML_PARSER_EOF) 9462: break; 9463: if (ctxt->input->base != base) goto base_changed; 9464: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9465: break; 9466: if (!IS_BLANK_CH(RAW)) { 9467: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9468: "attributes construct error\n"); 9469: break; 9470: } 9471: SKIP_BLANKS; 9472: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9473: (attname == NULL) && (attvalue == NULL)) { 9474: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9475: "xmlParseStartTag: problem parsing attributes\n"); 9476: break; 9477: } 9478: GROW; 9479: if (ctxt->input->base != base) goto base_changed; 9480: } 9481: 9482: /* 9483: * The attributes defaulting 9484: */ 9485: if (ctxt->attsDefault != NULL) { 9486: xmlDefAttrsPtr defaults; 9487: 9488: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9489: if (defaults != NULL) { 9490: for (i = 0;i < defaults->nbAttrs;i++) { 9491: attname = defaults->values[5 * i]; 9492: aprefix = defaults->values[5 * i + 1]; 9493: 9494: /* 9495: * special work for namespaces defaulted defs 9496: */ 9497: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9498: /* 9499: * check that it's not a defined namespace 9500: */ 9501: for (j = 1;j <= nbNs;j++) 9502: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9503: break; 9504: if (j <= nbNs) continue; 9505: 9506: nsname = xmlGetNamespace(ctxt, NULL); 9507: if (nsname != defaults->values[5 * i + 2]) { 9508: if (nsPush(ctxt, NULL, 9509: defaults->values[5 * i + 2]) > 0) 9510: nbNs++; 9511: } 9512: } else if (aprefix == ctxt->str_xmlns) { 9513: /* 9514: * check that it's not a defined namespace 9515: */ 9516: for (j = 1;j <= nbNs;j++) 9517: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9518: break; 9519: if (j <= nbNs) continue; 9520: 9521: nsname = xmlGetNamespace(ctxt, attname); 9522: if (nsname != defaults->values[2]) { 9523: if (nsPush(ctxt, attname, 9524: defaults->values[5 * i + 2]) > 0) 9525: nbNs++; 9526: } 9527: } else { 9528: /* 9529: * check that it's not a defined attribute 9530: */ 9531: for (j = 0;j < nbatts;j+=5) { 9532: if ((attname == atts[j]) && (aprefix == atts[j+1])) 9533: break; 9534: } 9535: if (j < nbatts) continue; 9536: 9537: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9538: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9539: return(NULL); 9540: } 9541: maxatts = ctxt->maxatts; 9542: atts = ctxt->atts; 9543: } 9544: atts[nbatts++] = attname; 9545: atts[nbatts++] = aprefix; 9546: if (aprefix == NULL) 9547: atts[nbatts++] = NULL; 9548: else 9549: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9550: atts[nbatts++] = defaults->values[5 * i + 2]; 9551: atts[nbatts++] = defaults->values[5 * i + 3]; 9552: if ((ctxt->standalone == 1) && 9553: (defaults->values[5 * i + 4] != NULL)) { 9554: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9555: "standalone: attribute %s on %s defaulted from external subset\n", 9556: attname, localname); 9557: } 9558: nbdef++; 9559: } 9560: } 9561: } 9562: } 9563: 9564: /* 9565: * The attributes checkings 9566: */ 9567: for (i = 0; i < nbatts;i += 5) { 9568: /* 9569: * The default namespace does not apply to attribute names. 9570: */ 9571: if (atts[i + 1] != NULL) { 9572: nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9573: if (nsname == NULL) { 9574: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9575: "Namespace prefix %s for %s on %s is not defined\n", 9576: atts[i + 1], atts[i], localname); 9577: } 9578: atts[i + 2] = nsname; 9579: } else 9580: nsname = NULL; 9581: /* 9582: * [ WFC: Unique Att Spec ] 9583: * No attribute name may appear more than once in the same 9584: * start-tag or empty-element tag. 9585: * As extended by the Namespace in XML REC. 9586: */ 9587: for (j = 0; j < i;j += 5) { 9588: if (atts[i] == atts[j]) { 9589: if (atts[i+1] == atts[j+1]) { 9590: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9591: break; 9592: } 9593: if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9594: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9595: "Namespaced Attribute %s in '%s' redefined\n", 9596: atts[i], nsname, NULL); 9597: break; 9598: } 9599: } 9600: } 9601: } 9602: 9603: nsname = xmlGetNamespace(ctxt, prefix); 9604: if ((prefix != NULL) && (nsname == NULL)) { 9605: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9606: "Namespace prefix %s on %s is not defined\n", 9607: prefix, localname, NULL); 9608: } 9609: *pref = prefix; 9610: *URI = nsname; 9611: 9612: /* 9613: * SAX: Start of Element ! 9614: */ 9615: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9616: (!ctxt->disableSAX)) { 9617: if (nbNs > 0) 9618: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9619: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9620: nbatts / 5, nbdef, atts); 9621: else 9622: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9623: nsname, 0, NULL, nbatts / 5, nbdef, atts); 9624: } 9625: 9626: /* 9627: * Free up attribute allocated strings if needed 9628: */ 9629: if (attval != 0) { 9630: for (i = 3,j = 0; j < nratts;i += 5,j++) 9631: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9632: xmlFree((xmlChar *) atts[i]); 9633: } 9634: 9635: return(localname); 9636: 9637: base_changed: 9638: /* 9639: * the attribute strings are valid iif the base didn't changed 9640: */ 9641: if (attval != 0) { 9642: for (i = 3,j = 0; j < nratts;i += 5,j++) 9643: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9644: xmlFree((xmlChar *) atts[i]); 9645: } 9646: ctxt->input->cur = ctxt->input->base + cur; 9647: ctxt->input->line = oldline; 9648: ctxt->input->col = oldcol; 9649: if (ctxt->wellFormed == 1) { 9650: goto reparse; 9651: } 9652: return(NULL); 9653: } 9654: 9655: /** 9656: * xmlParseEndTag2: 9657: * @ctxt: an XML parser context 9658: * @line: line of the start tag 9659: * @nsNr: number of namespaces on the start tag 9660: * 9661: * parse an end of tag 9662: * 9663: * [42] ETag ::= '</' Name S? '>' 9664: * 9665: * With namespace 9666: * 9667: * [NS 9] ETag ::= '</' QName S? '>' 9668: */ 9669: 9670: static void 9671: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9672: const xmlChar *URI, int line, int nsNr, int tlen) { 9673: const xmlChar *name; 9674: 9675: GROW; 9676: if ((RAW != '<') || (NXT(1) != '/')) { 9677: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9678: return; 9679: } 9680: SKIP(2); 9681: 9682: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9683: if (ctxt->input->cur[tlen] == '>') { 9684: ctxt->input->cur += tlen + 1; 9685: goto done; 9686: } 9687: ctxt->input->cur += tlen; 9688: name = (xmlChar*)1; 9689: } else { 9690: if (prefix == NULL) 9691: name = xmlParseNameAndCompare(ctxt, ctxt->name); 9692: else 9693: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9694: } 9695: 9696: /* 9697: * We should definitely be at the ending "S? '>'" part 9698: */ 9699: GROW; 9700: if (ctxt->instate == XML_PARSER_EOF) 9701: return; 9702: SKIP_BLANKS; 9703: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9704: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9705: } else 9706: NEXT1; 9707: 9708: /* 9709: * [ WFC: Element Type Match ] 9710: * The Name in an element's end-tag must match the element type in the 9711: * start-tag. 9712: * 9713: */ 9714: if (name != (xmlChar*)1) { 9715: if (name == NULL) name = BAD_CAST "unparseable"; 9716: if ((line == 0) && (ctxt->node != NULL)) 9717: line = ctxt->node->line; 9718: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9719: "Opening and ending tag mismatch: %s line %d and %s\n", 9720: ctxt->name, line, name); 9721: } 9722: 9723: /* 9724: * SAX: End of Tag 9725: */ 9726: done: 9727: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9728: (!ctxt->disableSAX)) 9729: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9730: 9731: spacePop(ctxt); 9732: if (nsNr != 0) 9733: nsPop(ctxt, nsNr); 9734: return; 9735: } 9736: 9737: /** 9738: * xmlParseCDSect: 9739: * @ctxt: an XML parser context 9740: * 9741: * Parse escaped pure raw content. 9742: * 9743: * [18] CDSect ::= CDStart CData CDEnd 9744: * 9745: * [19] CDStart ::= '<![CDATA[' 9746: * 9747: * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9748: * 9749: * [21] CDEnd ::= ']]>' 9750: */ 9751: void 9752: xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9753: xmlChar *buf = NULL; 9754: int len = 0; 9755: int size = XML_PARSER_BUFFER_SIZE; 9756: int r, rl; 9757: int s, sl; 9758: int cur, l; 9759: int count = 0; 9760: 9761: /* Check 2.6.0 was NXT(0) not RAW */ 9762: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9763: SKIP(9); 9764: } else 9765: return; 9766: 9767: ctxt->instate = XML_PARSER_CDATA_SECTION; 9768: r = CUR_CHAR(rl); 9769: if (!IS_CHAR(r)) { 9770: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9771: ctxt->instate = XML_PARSER_CONTENT; 9772: return; 9773: } 9774: NEXTL(rl); 9775: s = CUR_CHAR(sl); 9776: if (!IS_CHAR(s)) { 9777: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9778: ctxt->instate = XML_PARSER_CONTENT; 9779: return; 9780: } 9781: NEXTL(sl); 9782: cur = CUR_CHAR(l); 9783: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9784: if (buf == NULL) { 9785: xmlErrMemory(ctxt, NULL); 9786: return; 9787: } 9788: while (IS_CHAR(cur) && 9789: ((r != ']') || (s != ']') || (cur != '>'))) { 9790: if (len + 5 >= size) { 9791: xmlChar *tmp; 9792: 9793: if ((size > XML_MAX_TEXT_LENGTH) && 9794: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9795: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9796: "CData section too big found", NULL); 9797: xmlFree (buf); 9798: return; 9799: } 9800: tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9801: if (tmp == NULL) { 9802: xmlFree(buf); 9803: xmlErrMemory(ctxt, NULL); 9804: return; 9805: } 9806: buf = tmp; 9807: size *= 2; 9808: } 9809: COPY_BUF(rl,buf,len,r); 9810: r = s; 9811: rl = sl; 9812: s = cur; 9813: sl = l; 9814: count++; 9815: if (count > 50) { 9816: GROW; 9817: if (ctxt->instate == XML_PARSER_EOF) { 9818: xmlFree(buf); 9819: return; 9820: } 9821: count = 0; 9822: } 9823: NEXTL(l); 9824: cur = CUR_CHAR(l); 9825: } 9826: buf[len] = 0; 9827: ctxt->instate = XML_PARSER_CONTENT; 9828: if (cur != '>') { 9829: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9830: "CData section not finished\n%.50s\n", buf); 9831: xmlFree(buf); 9832: return; 9833: } 9834: NEXTL(l); 9835: 9836: /* 9837: * OK the buffer is to be consumed as cdata. 9838: */ 9839: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9840: if (ctxt->sax->cdataBlock != NULL) 9841: ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9842: else if (ctxt->sax->characters != NULL) 9843: ctxt->sax->characters(ctxt->userData, buf, len); 9844: } 9845: xmlFree(buf); 9846: } 9847: 9848: /** 9849: * xmlParseContent: 9850: * @ctxt: an XML parser context 9851: * 9852: * Parse a content: 9853: * 9854: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9855: */ 9856: 9857: void 9858: xmlParseContent(xmlParserCtxtPtr ctxt) { 9859: GROW; 9860: while ((RAW != 0) && 9861: ((RAW != '<') || (NXT(1) != '/')) && 9862: (ctxt->instate != XML_PARSER_EOF)) { 9863: const xmlChar *test = CUR_PTR; 9864: unsigned int cons = ctxt->input->consumed; 9865: const xmlChar *cur = ctxt->input->cur; 9866: 9867: /* 9868: * First case : a Processing Instruction. 9869: */ 9870: if ((*cur == '<') && (cur[1] == '?')) { 9871: xmlParsePI(ctxt); 9872: } 9873: 9874: /* 9875: * Second case : a CDSection 9876: */ 9877: /* 2.6.0 test was *cur not RAW */ 9878: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9879: xmlParseCDSect(ctxt); 9880: } 9881: 9882: /* 9883: * Third case : a comment 9884: */ 9885: else if ((*cur == '<') && (NXT(1) == '!') && 9886: (NXT(2) == '-') && (NXT(3) == '-')) { 9887: xmlParseComment(ctxt); 9888: ctxt->instate = XML_PARSER_CONTENT; 9889: } 9890: 9891: /* 9892: * Fourth case : a sub-element. 9893: */ 9894: else if (*cur == '<') { 9895: xmlParseElement(ctxt); 9896: } 9897: 9898: /* 9899: * Fifth case : a reference. If if has not been resolved, 9900: * parsing returns it's Name, create the node 9901: */ 9902: 9903: else if (*cur == '&') { 9904: xmlParseReference(ctxt); 9905: } 9906: 9907: /* 9908: * Last case, text. Note that References are handled directly. 9909: */ 9910: else { 9911: xmlParseCharData(ctxt, 0); 9912: } 9913: 9914: GROW; 9915: /* 9916: * Pop-up of finished entities. 9917: */ 9918: while ((RAW == 0) && (ctxt->inputNr > 1)) 9919: xmlPopInput(ctxt); 9920: SHRINK; 9921: 9922: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9923: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9924: "detected an error in element content\n"); 9925: ctxt->instate = XML_PARSER_EOF; 9926: break; 9927: } 9928: } 9929: } 9930: 9931: /** 9932: * xmlParseElement: 9933: * @ctxt: an XML parser context 9934: * 9935: * parse an XML element, this is highly recursive 9936: * 9937: * [39] element ::= EmptyElemTag | STag content ETag 9938: * 9939: * [ WFC: Element Type Match ] 9940: * The Name in an element's end-tag must match the element type in the 9941: * start-tag. 9942: * 9943: */ 9944: 9945: void 9946: xmlParseElement(xmlParserCtxtPtr ctxt) { 9947: const xmlChar *name; 9948: const xmlChar *prefix = NULL; 9949: const xmlChar *URI = NULL; 9950: xmlParserNodeInfo node_info; 9951: int line, tlen = 0; 9952: xmlNodePtr ret; 9953: int nsNr = ctxt->nsNr; 9954: 9955: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9956: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9957: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9958: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9959: xmlParserMaxDepth); 9960: ctxt->instate = XML_PARSER_EOF; 9961: return; 9962: } 9963: 9964: /* Capture start position */ 9965: if (ctxt->record_info) { 9966: node_info.begin_pos = ctxt->input->consumed + 9967: (CUR_PTR - ctxt->input->base); 9968: node_info.begin_line = ctxt->input->line; 9969: } 9970: 9971: if (ctxt->spaceNr == 0) 9972: spacePush(ctxt, -1); 9973: else if (*ctxt->space == -2) 9974: spacePush(ctxt, -1); 9975: else 9976: spacePush(ctxt, *ctxt->space); 9977: 9978: line = ctxt->input->line; 9979: #ifdef LIBXML_SAX1_ENABLED 9980: if (ctxt->sax2) 9981: #endif /* LIBXML_SAX1_ENABLED */ 9982: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9983: #ifdef LIBXML_SAX1_ENABLED 9984: else 9985: name = xmlParseStartTag(ctxt); 9986: #endif /* LIBXML_SAX1_ENABLED */ 9987: if (ctxt->instate == XML_PARSER_EOF) 9988: return; 9989: if (name == NULL) { 9990: spacePop(ctxt); 9991: return; 9992: } 9993: namePush(ctxt, name); 9994: ret = ctxt->node; 9995: 9996: #ifdef LIBXML_VALID_ENABLED 9997: /* 9998: * [ VC: Root Element Type ] 9999: * The Name in the document type declaration must match the element 10000: * type of the root element. 10001: */ 10002: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10003: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10004: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10005: #endif /* LIBXML_VALID_ENABLED */ 10006: 10007: /* 10008: * Check for an Empty Element. 10009: */ 10010: if ((RAW == '/') && (NXT(1) == '>')) { 10011: SKIP(2); 10012: if (ctxt->sax2) { 10013: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10014: (!ctxt->disableSAX)) 10015: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10016: #ifdef LIBXML_SAX1_ENABLED 10017: } else { 10018: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10019: (!ctxt->disableSAX)) 10020: ctxt->sax->endElement(ctxt->userData, name); 10021: #endif /* LIBXML_SAX1_ENABLED */ 10022: } 10023: namePop(ctxt); 10024: spacePop(ctxt); 10025: if (nsNr != ctxt->nsNr) 10026: nsPop(ctxt, ctxt->nsNr - nsNr); 10027: if ( ret != NULL && ctxt->record_info ) { 10028: node_info.end_pos = ctxt->input->consumed + 10029: (CUR_PTR - ctxt->input->base); 10030: node_info.end_line = ctxt->input->line; 10031: node_info.node = ret; 10032: xmlParserAddNodeInfo(ctxt, &node_info); 10033: } 10034: return; 10035: } 10036: if (RAW == '>') { 10037: NEXT1; 10038: } else { 10039: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10040: "Couldn't find end of Start Tag %s line %d\n", 10041: name, line, NULL); 10042: 10043: /* 10044: * end of parsing of this node. 10045: */ 10046: nodePop(ctxt); 10047: namePop(ctxt); 10048: spacePop(ctxt); 10049: if (nsNr != ctxt->nsNr) 10050: nsPop(ctxt, ctxt->nsNr - nsNr); 10051: 10052: /* 10053: * Capture end position and add node 10054: */ 10055: if ( ret != NULL && ctxt->record_info ) { 10056: node_info.end_pos = ctxt->input->consumed + 10057: (CUR_PTR - ctxt->input->base); 10058: node_info.end_line = ctxt->input->line; 10059: node_info.node = ret; 10060: xmlParserAddNodeInfo(ctxt, &node_info); 10061: } 10062: return; 10063: } 10064: 10065: /* 10066: * Parse the content of the element: 10067: */ 10068: xmlParseContent(ctxt); 10069: if (ctxt->instate == XML_PARSER_EOF) 10070: return; 10071: if (!IS_BYTE_CHAR(RAW)) { 10072: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10073: "Premature end of data in tag %s line %d\n", 10074: name, line, NULL); 10075: 10076: /* 10077: * end of parsing of this node. 10078: */ 10079: nodePop(ctxt); 10080: namePop(ctxt); 10081: spacePop(ctxt); 10082: if (nsNr != ctxt->nsNr) 10083: nsPop(ctxt, ctxt->nsNr - nsNr); 10084: return; 10085: } 10086: 10087: /* 10088: * parse the end of tag: '</' should be here. 10089: */ 10090: if (ctxt->sax2) { 10091: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10092: namePop(ctxt); 10093: } 10094: #ifdef LIBXML_SAX1_ENABLED 10095: else 10096: xmlParseEndTag1(ctxt, line); 10097: #endif /* LIBXML_SAX1_ENABLED */ 10098: 10099: /* 10100: * Capture end position and add node 10101: */ 10102: if ( ret != NULL && ctxt->record_info ) { 10103: node_info.end_pos = ctxt->input->consumed + 10104: (CUR_PTR - ctxt->input->base); 10105: node_info.end_line = ctxt->input->line; 10106: node_info.node = ret; 10107: xmlParserAddNodeInfo(ctxt, &node_info); 10108: } 10109: } 10110: 10111: /** 10112: * xmlParseVersionNum: 10113: * @ctxt: an XML parser context 10114: * 10115: * parse the XML version value. 10116: * 10117: * [26] VersionNum ::= '1.' [0-9]+ 10118: * 10119: * In practice allow [0-9].[0-9]+ at that level 10120: * 10121: * Returns the string giving the XML version number, or NULL 10122: */ 10123: xmlChar * 10124: xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10125: xmlChar *buf = NULL; 10126: int len = 0; 10127: int size = 10; 10128: xmlChar cur; 10129: 10130: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10131: if (buf == NULL) { 10132: xmlErrMemory(ctxt, NULL); 10133: return(NULL); 10134: } 10135: cur = CUR; 10136: if (!((cur >= '0') && (cur <= '9'))) { 10137: xmlFree(buf); 10138: return(NULL); 10139: } 10140: buf[len++] = cur; 10141: NEXT; 10142: cur=CUR; 10143: if (cur != '.') { 10144: xmlFree(buf); 10145: return(NULL); 10146: } 10147: buf[len++] = cur; 10148: NEXT; 10149: cur=CUR; 10150: while ((cur >= '0') && (cur <= '9')) { 10151: if (len + 1 >= size) { 10152: xmlChar *tmp; 10153: 10154: size *= 2; 10155: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10156: if (tmp == NULL) { 10157: xmlFree(buf); 10158: xmlErrMemory(ctxt, NULL); 10159: return(NULL); 10160: } 10161: buf = tmp; 10162: } 10163: buf[len++] = cur; 10164: NEXT; 10165: cur=CUR; 10166: } 10167: buf[len] = 0; 10168: return(buf); 10169: } 10170: 10171: /** 10172: * xmlParseVersionInfo: 10173: * @ctxt: an XML parser context 10174: * 10175: * parse the XML version. 10176: * 10177: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10178: * 10179: * [25] Eq ::= S? '=' S? 10180: * 10181: * Returns the version string, e.g. "1.0" 10182: */ 10183: 10184: xmlChar * 10185: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10186: xmlChar *version = NULL; 10187: 10188: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10189: SKIP(7); 10190: SKIP_BLANKS; 10191: if (RAW != '=') { 10192: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10193: return(NULL); 10194: } 10195: NEXT; 10196: SKIP_BLANKS; 10197: if (RAW == '"') { 10198: NEXT; 10199: version = xmlParseVersionNum(ctxt); 10200: if (RAW != '"') { 10201: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10202: } else 10203: NEXT; 10204: } else if (RAW == '\''){ 10205: NEXT; 10206: version = xmlParseVersionNum(ctxt); 10207: if (RAW != '\'') { 10208: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10209: } else 10210: NEXT; 10211: } else { 10212: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10213: } 10214: } 10215: return(version); 10216: } 10217: 10218: /** 10219: * xmlParseEncName: 10220: * @ctxt: an XML parser context 10221: * 10222: * parse the XML encoding name 10223: * 10224: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10225: * 10226: * Returns the encoding name value or NULL 10227: */ 10228: xmlChar * 10229: xmlParseEncName(xmlParserCtxtPtr ctxt) { 10230: xmlChar *buf = NULL; 10231: int len = 0; 10232: int size = 10; 10233: xmlChar cur; 10234: 10235: cur = CUR; 10236: if (((cur >= 'a') && (cur <= 'z')) || 10237: ((cur >= 'A') && (cur <= 'Z'))) { 10238: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10239: if (buf == NULL) { 10240: xmlErrMemory(ctxt, NULL); 10241: return(NULL); 10242: } 10243: 10244: buf[len++] = cur; 10245: NEXT; 10246: cur = CUR; 10247: while (((cur >= 'a') && (cur <= 'z')) || 10248: ((cur >= 'A') && (cur <= 'Z')) || 10249: ((cur >= '0') && (cur <= '9')) || 10250: (cur == '.') || (cur == '_') || 10251: (cur == '-')) { 10252: if (len + 1 >= size) { 10253: xmlChar *tmp; 10254: 10255: size *= 2; 10256: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10257: if (tmp == NULL) { 10258: xmlErrMemory(ctxt, NULL); 10259: xmlFree(buf); 10260: return(NULL); 10261: } 10262: buf = tmp; 10263: } 10264: buf[len++] = cur; 10265: NEXT; 10266: cur = CUR; 10267: if (cur == 0) { 10268: SHRINK; 10269: GROW; 10270: cur = CUR; 10271: } 10272: } 10273: buf[len] = 0; 10274: } else { 10275: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10276: } 10277: return(buf); 10278: } 10279: 10280: /** 10281: * xmlParseEncodingDecl: 10282: * @ctxt: an XML parser context 10283: * 10284: * parse the XML encoding declaration 10285: * 10286: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10287: * 10288: * this setups the conversion filters. 10289: * 10290: * Returns the encoding value or NULL 10291: */ 10292: 10293: const xmlChar * 10294: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10295: xmlChar *encoding = NULL; 10296: 10297: SKIP_BLANKS; 10298: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10299: SKIP(8); 10300: SKIP_BLANKS; 10301: if (RAW != '=') { 10302: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10303: return(NULL); 10304: } 10305: NEXT; 10306: SKIP_BLANKS; 10307: if (RAW == '"') { 10308: NEXT; 10309: encoding = xmlParseEncName(ctxt); 10310: if (RAW != '"') { 10311: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10312: } else 10313: NEXT; 10314: } else if (RAW == '\''){ 10315: NEXT; 10316: encoding = xmlParseEncName(ctxt); 10317: if (RAW != '\'') { 10318: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10319: } else 10320: NEXT; 10321: } else { 10322: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10323: } 10324: 10325: /* 10326: * Non standard parsing, allowing the user to ignore encoding 10327: */ 10328: if (ctxt->options & XML_PARSE_IGNORE_ENC) 10329: return(encoding); 10330: 10331: /* 10332: * UTF-16 encoding stwich has already taken place at this stage, 10333: * more over the little-endian/big-endian selection is already done 10334: */ 10335: if ((encoding != NULL) && 10336: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10337: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10338: /* 10339: * If no encoding was passed to the parser, that we are 10340: * using UTF-16 and no decoder is present i.e. the 10341: * document is apparently UTF-8 compatible, then raise an 10342: * encoding mismatch fatal error 10343: */ 10344: if ((ctxt->encoding == NULL) && 10345: (ctxt->input->buf != NULL) && 10346: (ctxt->input->buf->encoder == NULL)) { 10347: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10348: "Document labelled UTF-16 but has UTF-8 content\n"); 10349: } 10350: if (ctxt->encoding != NULL) 10351: xmlFree((xmlChar *) ctxt->encoding); 10352: ctxt->encoding = encoding; 10353: } 10354: /* 10355: * UTF-8 encoding is handled natively 10356: */ 10357: else if ((encoding != NULL) && 10358: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10359: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10360: if (ctxt->encoding != NULL) 10361: xmlFree((xmlChar *) ctxt->encoding); 10362: ctxt->encoding = encoding; 10363: } 10364: else if (encoding != NULL) { 10365: xmlCharEncodingHandlerPtr handler; 10366: 10367: if (ctxt->input->encoding != NULL) 10368: xmlFree((xmlChar *) ctxt->input->encoding); 10369: ctxt->input->encoding = encoding; 10370: 10371: handler = xmlFindCharEncodingHandler((const char *) encoding); 10372: if (handler != NULL) { 10373: xmlSwitchToEncoding(ctxt, handler); 10374: } else { 10375: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10376: "Unsupported encoding %s\n", encoding); 10377: return(NULL); 10378: } 10379: } 10380: } 10381: return(encoding); 10382: } 10383: 10384: /** 10385: * xmlParseSDDecl: 10386: * @ctxt: an XML parser context 10387: * 10388: * parse the XML standalone declaration 10389: * 10390: * [32] SDDecl ::= S 'standalone' Eq 10391: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10392: * 10393: * [ VC: Standalone Document Declaration ] 10394: * TODO The standalone document declaration must have the value "no" 10395: * if any external markup declarations contain declarations of: 10396: * - attributes with default values, if elements to which these 10397: * attributes apply appear in the document without specifications 10398: * of values for these attributes, or 10399: * - entities (other than amp, lt, gt, apos, quot), if references 10400: * to those entities appear in the document, or 10401: * - attributes with values subject to normalization, where the 10402: * attribute appears in the document with a value which will change 10403: * as a result of normalization, or 10404: * - element types with element content, if white space occurs directly 10405: * within any instance of those types. 10406: * 10407: * Returns: 10408: * 1 if standalone="yes" 10409: * 0 if standalone="no" 10410: * -2 if standalone attribute is missing or invalid 10411: * (A standalone value of -2 means that the XML declaration was found, 10412: * but no value was specified for the standalone attribute). 10413: */ 10414: 10415: int 10416: xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10417: int standalone = -2; 10418: 10419: SKIP_BLANKS; 10420: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10421: SKIP(10); 10422: SKIP_BLANKS; 10423: if (RAW != '=') { 10424: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10425: return(standalone); 10426: } 10427: NEXT; 10428: SKIP_BLANKS; 10429: if (RAW == '\''){ 10430: NEXT; 10431: if ((RAW == 'n') && (NXT(1) == 'o')) { 10432: standalone = 0; 10433: SKIP(2); 10434: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10435: (NXT(2) == 's')) { 10436: standalone = 1; 10437: SKIP(3); 10438: } else { 10439: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10440: } 10441: if (RAW != '\'') { 10442: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10443: } else 10444: NEXT; 10445: } else if (RAW == '"'){ 10446: NEXT; 10447: if ((RAW == 'n') && (NXT(1) == 'o')) { 10448: standalone = 0; 10449: SKIP(2); 10450: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10451: (NXT(2) == 's')) { 10452: standalone = 1; 10453: SKIP(3); 10454: } else { 10455: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10456: } 10457: if (RAW != '"') { 10458: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10459: } else 10460: NEXT; 10461: } else { 10462: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10463: } 10464: } 10465: return(standalone); 10466: } 10467: 10468: /** 10469: * xmlParseXMLDecl: 10470: * @ctxt: an XML parser context 10471: * 10472: * parse an XML declaration header 10473: * 10474: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10475: */ 10476: 10477: void 10478: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10479: xmlChar *version; 10480: 10481: /* 10482: * This value for standalone indicates that the document has an 10483: * XML declaration but it does not have a standalone attribute. 10484: * It will be overwritten later if a standalone attribute is found. 10485: */ 10486: ctxt->input->standalone = -2; 10487: 10488: /* 10489: * We know that '<?xml' is here. 10490: */ 10491: SKIP(5); 10492: 10493: if (!IS_BLANK_CH(RAW)) { 10494: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10495: "Blank needed after '<?xml'\n"); 10496: } 10497: SKIP_BLANKS; 10498: 10499: /* 10500: * We must have the VersionInfo here. 10501: */ 10502: version = xmlParseVersionInfo(ctxt); 10503: if (version == NULL) { 10504: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10505: } else { 10506: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10507: /* 10508: * Changed here for XML-1.0 5th edition 10509: */ 10510: if (ctxt->options & XML_PARSE_OLD10) { 10511: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10512: "Unsupported version '%s'\n", 10513: version); 10514: } else { 10515: if ((version[0] == '1') && ((version[1] == '.'))) { 10516: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10517: "Unsupported version '%s'\n", 10518: version, NULL); 10519: } else { 10520: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10521: "Unsupported version '%s'\n", 10522: version); 10523: } 10524: } 10525: } 10526: if (ctxt->version != NULL) 10527: xmlFree((void *) ctxt->version); 10528: ctxt->version = version; 10529: } 10530: 10531: /* 10532: * We may have the encoding declaration 10533: */ 10534: if (!IS_BLANK_CH(RAW)) { 10535: if ((RAW == '?') && (NXT(1) == '>')) { 10536: SKIP(2); 10537: return; 10538: } 10539: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10540: } 10541: xmlParseEncodingDecl(ctxt); 10542: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10543: /* 10544: * The XML REC instructs us to stop parsing right here 10545: */ 10546: return; 10547: } 10548: 10549: /* 10550: * We may have the standalone status. 10551: */ 10552: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10553: if ((RAW == '?') && (NXT(1) == '>')) { 10554: SKIP(2); 10555: return; 10556: } 10557: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10558: } 10559: 10560: /* 10561: * We can grow the input buffer freely at that point 10562: */ 10563: GROW; 10564: 10565: SKIP_BLANKS; 10566: ctxt->input->standalone = xmlParseSDDecl(ctxt); 10567: 10568: SKIP_BLANKS; 10569: if ((RAW == '?') && (NXT(1) == '>')) { 10570: SKIP(2); 10571: } else if (RAW == '>') { 10572: /* Deprecated old WD ... */ 10573: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10574: NEXT; 10575: } else { 10576: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10577: MOVETO_ENDTAG(CUR_PTR); 10578: NEXT; 10579: } 10580: } 10581: 10582: /** 10583: * xmlParseMisc: 10584: * @ctxt: an XML parser context 10585: * 10586: * parse an XML Misc* optional field. 10587: * 10588: * [27] Misc ::= Comment | PI | S 10589: */ 10590: 10591: void 10592: xmlParseMisc(xmlParserCtxtPtr ctxt) { 10593: while ((ctxt->instate != XML_PARSER_EOF) && 10594: (((RAW == '<') && (NXT(1) == '?')) || 10595: (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10596: IS_BLANK_CH(CUR))) { 10597: if ((RAW == '<') && (NXT(1) == '?')) { 10598: xmlParsePI(ctxt); 10599: } else if (IS_BLANK_CH(CUR)) { 10600: NEXT; 10601: } else 10602: xmlParseComment(ctxt); 10603: } 10604: } 10605: 10606: /** 10607: * xmlParseDocument: 10608: * @ctxt: an XML parser context 10609: * 10610: * parse an XML document (and build a tree if using the standard SAX 10611: * interface). 10612: * 10613: * [1] document ::= prolog element Misc* 10614: * 10615: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10616: * 10617: * Returns 0, -1 in case of error. the parser context is augmented 10618: * as a result of the parsing. 10619: */ 10620: 10621: int 10622: xmlParseDocument(xmlParserCtxtPtr ctxt) { 10623: xmlChar start[4]; 10624: xmlCharEncoding enc; 10625: 10626: xmlInitParser(); 10627: 10628: if ((ctxt == NULL) || (ctxt->input == NULL)) 10629: return(-1); 10630: 10631: GROW; 10632: 10633: /* 10634: * SAX: detecting the level. 10635: */ 10636: xmlDetectSAX2(ctxt); 10637: 10638: /* 10639: * SAX: beginning of the document processing. 10640: */ 10641: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10642: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10643: if (ctxt->instate == XML_PARSER_EOF) 10644: return(-1); 10645: 10646: if ((ctxt->encoding == NULL) && 10647: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10648: /* 10649: * Get the 4 first bytes and decode the charset 10650: * if enc != XML_CHAR_ENCODING_NONE 10651: * plug some encoding conversion routines. 10652: */ 10653: start[0] = RAW; 10654: start[1] = NXT(1); 10655: start[2] = NXT(2); 10656: start[3] = NXT(3); 10657: enc = xmlDetectCharEncoding(&start[0], 4); 10658: if (enc != XML_CHAR_ENCODING_NONE) { 10659: xmlSwitchEncoding(ctxt, enc); 10660: } 10661: } 10662: 10663: 10664: if (CUR == 0) { 10665: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10666: } 10667: 10668: /* 10669: * Check for the XMLDecl in the Prolog. 10670: * do not GROW here to avoid the detected encoder to decode more 10671: * than just the first line, unless the amount of data is really 10672: * too small to hold "<?xml version="1.0" encoding="foo" 10673: */ 10674: if ((ctxt->input->end - ctxt->input->cur) < 35) { 10675: GROW; 10676: } 10677: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10678: 10679: /* 10680: * Note that we will switch encoding on the fly. 10681: */ 10682: xmlParseXMLDecl(ctxt); 10683: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10684: /* 10685: * The XML REC instructs us to stop parsing right here 10686: */ 10687: return(-1); 10688: } 10689: ctxt->standalone = ctxt->input->standalone; 10690: SKIP_BLANKS; 10691: } else { 10692: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10693: } 10694: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10695: ctxt->sax->startDocument(ctxt->userData); 10696: if (ctxt->instate == XML_PARSER_EOF) 10697: return(-1); 10698: 10699: /* 10700: * The Misc part of the Prolog 10701: */ 10702: GROW; 10703: xmlParseMisc(ctxt); 10704: 10705: /* 10706: * Then possibly doc type declaration(s) and more Misc 10707: * (doctypedecl Misc*)? 10708: */ 10709: GROW; 10710: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10711: 10712: ctxt->inSubset = 1; 10713: xmlParseDocTypeDecl(ctxt); 10714: if (RAW == '[') { 10715: ctxt->instate = XML_PARSER_DTD; 10716: xmlParseInternalSubset(ctxt); 10717: if (ctxt->instate == XML_PARSER_EOF) 10718: return(-1); 10719: } 10720: 10721: /* 10722: * Create and update the external subset. 10723: */ 10724: ctxt->inSubset = 2; 10725: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10726: (!ctxt->disableSAX)) 10727: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10728: ctxt->extSubSystem, ctxt->extSubURI); 10729: if (ctxt->instate == XML_PARSER_EOF) 10730: return(-1); 10731: ctxt->inSubset = 0; 10732: 10733: xmlCleanSpecialAttr(ctxt); 10734: 10735: ctxt->instate = XML_PARSER_PROLOG; 10736: xmlParseMisc(ctxt); 10737: } 10738: 10739: /* 10740: * Time to start parsing the tree itself 10741: */ 10742: GROW; 10743: if (RAW != '<') { 10744: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10745: "Start tag expected, '<' not found\n"); 10746: } else { 10747: ctxt->instate = XML_PARSER_CONTENT; 10748: xmlParseElement(ctxt); 10749: ctxt->instate = XML_PARSER_EPILOG; 10750: 10751: 10752: /* 10753: * The Misc part at the end 10754: */ 10755: xmlParseMisc(ctxt); 10756: 10757: if (RAW != 0) { 10758: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10759: } 10760: ctxt->instate = XML_PARSER_EOF; 10761: } 10762: 10763: /* 10764: * SAX: end of the document processing. 10765: */ 10766: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10767: ctxt->sax->endDocument(ctxt->userData); 10768: 10769: /* 10770: * Remove locally kept entity definitions if the tree was not built 10771: */ 10772: if ((ctxt->myDoc != NULL) && 10773: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10774: xmlFreeDoc(ctxt->myDoc); 10775: ctxt->myDoc = NULL; 10776: } 10777: 10778: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10779: ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10780: if (ctxt->valid) 10781: ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10782: if (ctxt->nsWellFormed) 10783: ctxt->myDoc->properties |= XML_DOC_NSVALID; 10784: if (ctxt->options & XML_PARSE_OLD10) 10785: ctxt->myDoc->properties |= XML_DOC_OLD10; 10786: } 10787: if (! ctxt->wellFormed) { 10788: ctxt->valid = 0; 10789: return(-1); 10790: } 10791: return(0); 10792: } 10793: 10794: /** 10795: * xmlParseExtParsedEnt: 10796: * @ctxt: an XML parser context 10797: * 10798: * parse a general parsed entity 10799: * An external general parsed entity is well-formed if it matches the 10800: * production labeled extParsedEnt. 10801: * 10802: * [78] extParsedEnt ::= TextDecl? content 10803: * 10804: * Returns 0, -1 in case of error. the parser context is augmented 10805: * as a result of the parsing. 10806: */ 10807: 10808: int 10809: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10810: xmlChar start[4]; 10811: xmlCharEncoding enc; 10812: 10813: if ((ctxt == NULL) || (ctxt->input == NULL)) 10814: return(-1); 10815: 10816: xmlDefaultSAXHandlerInit(); 10817: 10818: xmlDetectSAX2(ctxt); 10819: 10820: GROW; 10821: 10822: /* 10823: * SAX: beginning of the document processing. 10824: */ 10825: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10826: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10827: 10828: /* 10829: * Get the 4 first bytes and decode the charset 10830: * if enc != XML_CHAR_ENCODING_NONE 10831: * plug some encoding conversion routines. 10832: */ 10833: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10834: start[0] = RAW; 10835: start[1] = NXT(1); 10836: start[2] = NXT(2); 10837: start[3] = NXT(3); 10838: enc = xmlDetectCharEncoding(start, 4); 10839: if (enc != XML_CHAR_ENCODING_NONE) { 10840: xmlSwitchEncoding(ctxt, enc); 10841: } 10842: } 10843: 10844: 10845: if (CUR == 0) { 10846: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10847: } 10848: 10849: /* 10850: * Check for the XMLDecl in the Prolog. 10851: */ 10852: GROW; 10853: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10854: 10855: /* 10856: * Note that we will switch encoding on the fly. 10857: */ 10858: xmlParseXMLDecl(ctxt); 10859: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10860: /* 10861: * The XML REC instructs us to stop parsing right here 10862: */ 10863: return(-1); 10864: } 10865: SKIP_BLANKS; 10866: } else { 10867: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10868: } 10869: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10870: ctxt->sax->startDocument(ctxt->userData); 10871: if (ctxt->instate == XML_PARSER_EOF) 10872: return(-1); 10873: 10874: /* 10875: * Doing validity checking on chunk doesn't make sense 10876: */ 10877: ctxt->instate = XML_PARSER_CONTENT; 10878: ctxt->validate = 0; 10879: ctxt->loadsubset = 0; 10880: ctxt->depth = 0; 10881: 10882: xmlParseContent(ctxt); 10883: if (ctxt->instate == XML_PARSER_EOF) 10884: return(-1); 10885: 10886: if ((RAW == '<') && (NXT(1) == '/')) { 10887: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10888: } else if (RAW != 0) { 10889: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10890: } 10891: 10892: /* 10893: * SAX: end of the document processing. 10894: */ 10895: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10896: ctxt->sax->endDocument(ctxt->userData); 10897: 10898: if (! ctxt->wellFormed) return(-1); 10899: return(0); 10900: } 10901: 10902: #ifdef LIBXML_PUSH_ENABLED 10903: /************************************************************************ 10904: * * 10905: * Progressive parsing interfaces * 10906: * * 10907: ************************************************************************/ 10908: 10909: /** 10910: * xmlParseLookupSequence: 10911: * @ctxt: an XML parser context 10912: * @first: the first char to lookup 10913: * @next: the next char to lookup or zero 10914: * @third: the next char to lookup or zero 10915: * 10916: * Try to find if a sequence (first, next, third) or just (first next) or 10917: * (first) is available in the input stream. 10918: * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10919: * to avoid rescanning sequences of bytes, it DOES change the state of the 10920: * parser, do not use liberally. 10921: * 10922: * Returns the index to the current parsing point if the full sequence 10923: * is available, -1 otherwise. 10924: */ 10925: static int 10926: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10927: xmlChar next, xmlChar third) { 10928: int base, len; 10929: xmlParserInputPtr in; 10930: const xmlChar *buf; 10931: 10932: in = ctxt->input; 10933: if (in == NULL) return(-1); 10934: base = in->cur - in->base; 10935: if (base < 0) return(-1); 10936: if (ctxt->checkIndex > base) 10937: base = ctxt->checkIndex; 10938: if (in->buf == NULL) { 10939: buf = in->base; 10940: len = in->length; 10941: } else { 10942: buf = xmlBufContent(in->buf->buffer); 10943: len = xmlBufUse(in->buf->buffer); 10944: } 10945: /* take into account the sequence length */ 10946: if (third) len -= 2; 10947: else if (next) len --; 10948: for (;base < len;base++) { 10949: if (buf[base] == first) { 10950: if (third != 0) { 10951: if ((buf[base + 1] != next) || 10952: (buf[base + 2] != third)) continue; 10953: } else if (next != 0) { 10954: if (buf[base + 1] != next) continue; 10955: } 10956: ctxt->checkIndex = 0; 10957: #ifdef DEBUG_PUSH 10958: if (next == 0) 10959: xmlGenericError(xmlGenericErrorContext, 10960: "PP: lookup '%c' found at %d\n", 10961: first, base); 10962: else if (third == 0) 10963: xmlGenericError(xmlGenericErrorContext, 10964: "PP: lookup '%c%c' found at %d\n", 10965: first, next, base); 10966: else 10967: xmlGenericError(xmlGenericErrorContext, 10968: "PP: lookup '%c%c%c' found at %d\n", 10969: first, next, third, base); 10970: #endif 10971: return(base - (in->cur - in->base)); 10972: } 10973: } 10974: ctxt->checkIndex = base; 10975: #ifdef DEBUG_PUSH 10976: if (next == 0) 10977: xmlGenericError(xmlGenericErrorContext, 10978: "PP: lookup '%c' failed\n", first); 10979: else if (third == 0) 10980: xmlGenericError(xmlGenericErrorContext, 10981: "PP: lookup '%c%c' failed\n", first, next); 10982: else 10983: xmlGenericError(xmlGenericErrorContext, 10984: "PP: lookup '%c%c%c' failed\n", first, next, third); 10985: #endif 10986: return(-1); 10987: } 10988: 10989: /** 10990: * xmlParseGetLasts: 10991: * @ctxt: an XML parser context 10992: * @lastlt: pointer to store the last '<' from the input 10993: * @lastgt: pointer to store the last '>' from the input 10994: * 10995: * Lookup the last < and > in the current chunk 10996: */ 10997: static void 10998: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10999: const xmlChar **lastgt) { 11000: const xmlChar *tmp; 11001: 11002: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11003: xmlGenericError(xmlGenericErrorContext, 11004: "Internal error: xmlParseGetLasts\n"); 11005: return; 11006: } 11007: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11008: tmp = ctxt->input->end; 11009: tmp--; 11010: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11011: if (tmp < ctxt->input->base) { 11012: *lastlt = NULL; 11013: *lastgt = NULL; 11014: } else { 11015: *lastlt = tmp; 11016: tmp++; 11017: while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11018: if (*tmp == '\'') { 11019: tmp++; 11020: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11021: if (tmp < ctxt->input->end) tmp++; 11022: } else if (*tmp == '"') { 11023: tmp++; 11024: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11025: if (tmp < ctxt->input->end) tmp++; 11026: } else 11027: tmp++; 11028: } 11029: if (tmp < ctxt->input->end) 11030: *lastgt = tmp; 11031: else { 11032: tmp = *lastlt; 11033: tmp--; 11034: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11035: if (tmp >= ctxt->input->base) 11036: *lastgt = tmp; 11037: else 11038: *lastgt = NULL; 11039: } 11040: } 11041: } else { 11042: *lastlt = NULL; 11043: *lastgt = NULL; 11044: } 11045: } 11046: /** 11047: * xmlCheckCdataPush: 11048: * @cur: pointer to the bock of characters 11049: * @len: length of the block in bytes 11050: * 11051: * Check that the block of characters is okay as SCdata content [20] 11052: * 11053: * Returns the number of bytes to pass if okay, a negative index where an 11054: * UTF-8 error occured otherwise 11055: */ 11056: static int 11057: xmlCheckCdataPush(const xmlChar *utf, int len) { 11058: int ix; 11059: unsigned char c; 11060: int codepoint; 11061: 11062: if ((utf == NULL) || (len <= 0)) 11063: return(0); 11064: 11065: for (ix = 0; ix < len;) { /* string is 0-terminated */ 11066: c = utf[ix]; 11067: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11068: if (c >= 0x20) 11069: ix++; 11070: else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11071: ix++; 11072: else 11073: return(-ix); 11074: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11075: if (ix + 2 > len) return(ix); 11076: if ((utf[ix+1] & 0xc0 ) != 0x80) 11077: return(-ix); 11078: codepoint = (utf[ix] & 0x1f) << 6; 11079: codepoint |= utf[ix+1] & 0x3f; 11080: if (!xmlIsCharQ(codepoint)) 11081: return(-ix); 11082: ix += 2; 11083: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11084: if (ix + 3 > len) return(ix); 11085: if (((utf[ix+1] & 0xc0) != 0x80) || 11086: ((utf[ix+2] & 0xc0) != 0x80)) 11087: return(-ix); 11088: codepoint = (utf[ix] & 0xf) << 12; 11089: codepoint |= (utf[ix+1] & 0x3f) << 6; 11090: codepoint |= utf[ix+2] & 0x3f; 11091: if (!xmlIsCharQ(codepoint)) 11092: return(-ix); 11093: ix += 3; 11094: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11095: if (ix + 4 > len) return(ix); 11096: if (((utf[ix+1] & 0xc0) != 0x80) || 11097: ((utf[ix+2] & 0xc0) != 0x80) || 11098: ((utf[ix+3] & 0xc0) != 0x80)) 11099: return(-ix); 11100: codepoint = (utf[ix] & 0x7) << 18; 11101: codepoint |= (utf[ix+1] & 0x3f) << 12; 11102: codepoint |= (utf[ix+2] & 0x3f) << 6; 11103: codepoint |= utf[ix+3] & 0x3f; 11104: if (!xmlIsCharQ(codepoint)) 11105: return(-ix); 11106: ix += 4; 11107: } else /* unknown encoding */ 11108: return(-ix); 11109: } 11110: return(ix); 11111: } 11112: 11113: /** 11114: * xmlParseTryOrFinish: 11115: * @ctxt: an XML parser context 11116: * @terminate: last chunk indicator 11117: * 11118: * Try to progress on parsing 11119: * 11120: * Returns zero if no parsing was possible 11121: */ 11122: static int 11123: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11124: int ret = 0; 11125: int avail, tlen; 11126: xmlChar cur, next; 11127: const xmlChar *lastlt, *lastgt; 11128: 11129: if (ctxt->input == NULL) 11130: return(0); 11131: 11132: #ifdef DEBUG_PUSH 11133: switch (ctxt->instate) { 11134: case XML_PARSER_EOF: 11135: xmlGenericError(xmlGenericErrorContext, 11136: "PP: try EOF\n"); break; 11137: case XML_PARSER_START: 11138: xmlGenericError(xmlGenericErrorContext, 11139: "PP: try START\n"); break; 11140: case XML_PARSER_MISC: 11141: xmlGenericError(xmlGenericErrorContext, 11142: "PP: try MISC\n");break; 11143: case XML_PARSER_COMMENT: 11144: xmlGenericError(xmlGenericErrorContext, 11145: "PP: try COMMENT\n");break; 11146: case XML_PARSER_PROLOG: 11147: xmlGenericError(xmlGenericErrorContext, 11148: "PP: try PROLOG\n");break; 11149: case XML_PARSER_START_TAG: 11150: xmlGenericError(xmlGenericErrorContext, 11151: "PP: try START_TAG\n");break; 11152: case XML_PARSER_CONTENT: 11153: xmlGenericError(xmlGenericErrorContext, 11154: "PP: try CONTENT\n");break; 11155: case XML_PARSER_CDATA_SECTION: 11156: xmlGenericError(xmlGenericErrorContext, 11157: "PP: try CDATA_SECTION\n");break; 11158: case XML_PARSER_END_TAG: 11159: xmlGenericError(xmlGenericErrorContext, 11160: "PP: try END_TAG\n");break; 11161: case XML_PARSER_ENTITY_DECL: 11162: xmlGenericError(xmlGenericErrorContext, 11163: "PP: try ENTITY_DECL\n");break; 11164: case XML_PARSER_ENTITY_VALUE: 11165: xmlGenericError(xmlGenericErrorContext, 11166: "PP: try ENTITY_VALUE\n");break; 11167: case XML_PARSER_ATTRIBUTE_VALUE: 11168: xmlGenericError(xmlGenericErrorContext, 11169: "PP: try ATTRIBUTE_VALUE\n");break; 11170: case XML_PARSER_DTD: 11171: xmlGenericError(xmlGenericErrorContext, 11172: "PP: try DTD\n");break; 11173: case XML_PARSER_EPILOG: 11174: xmlGenericError(xmlGenericErrorContext, 11175: "PP: try EPILOG\n");break; 11176: case XML_PARSER_PI: 11177: xmlGenericError(xmlGenericErrorContext, 11178: "PP: try PI\n");break; 11179: case XML_PARSER_IGNORE: 11180: xmlGenericError(xmlGenericErrorContext, 11181: "PP: try IGNORE\n");break; 11182: } 11183: #endif 11184: 11185: if ((ctxt->input != NULL) && 11186: (ctxt->input->cur - ctxt->input->base > 4096)) { 11187: xmlSHRINK(ctxt); 11188: ctxt->checkIndex = 0; 11189: } 11190: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11191: 11192: while (ctxt->instate != XML_PARSER_EOF) { 11193: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11194: return(0); 11195: 11196: 11197: /* 11198: * Pop-up of finished entities. 11199: */ 11200: while ((RAW == 0) && (ctxt->inputNr > 1)) 11201: xmlPopInput(ctxt); 11202: 11203: if (ctxt->input == NULL) break; 11204: if (ctxt->input->buf == NULL) 11205: avail = ctxt->input->length - 11206: (ctxt->input->cur - ctxt->input->base); 11207: else { 11208: /* 11209: * If we are operating on converted input, try to flush 11210: * remainng chars to avoid them stalling in the non-converted 11211: * buffer. But do not do this in document start where 11212: * encoding="..." may not have been read and we work on a 11213: * guessed encoding. 11214: */ 11215: if ((ctxt->instate != XML_PARSER_START) && 11216: (ctxt->input->buf->raw != NULL) && 11217: (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11218: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11219: ctxt->input); 11220: size_t current = ctxt->input->cur - ctxt->input->base; 11221: 11222: xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11223: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11224: base, current); 11225: } 11226: avail = xmlBufUse(ctxt->input->buf->buffer) - 11227: (ctxt->input->cur - ctxt->input->base); 11228: } 11229: if (avail < 1) 11230: goto done; 11231: switch (ctxt->instate) { 11232: case XML_PARSER_EOF: 11233: /* 11234: * Document parsing is done ! 11235: */ 11236: goto done; 11237: case XML_PARSER_START: 11238: if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11239: xmlChar start[4]; 11240: xmlCharEncoding enc; 11241: 11242: /* 11243: * Very first chars read from the document flow. 11244: */ 11245: if (avail < 4) 11246: goto done; 11247: 11248: /* 11249: * Get the 4 first bytes and decode the charset 11250: * if enc != XML_CHAR_ENCODING_NONE 11251: * plug some encoding conversion routines, 11252: * else xmlSwitchEncoding will set to (default) 11253: * UTF8. 11254: */ 11255: start[0] = RAW; 11256: start[1] = NXT(1); 11257: start[2] = NXT(2); 11258: start[3] = NXT(3); 11259: enc = xmlDetectCharEncoding(start, 4); 11260: xmlSwitchEncoding(ctxt, enc); 11261: break; 11262: } 11263: 11264: if (avail < 2) 11265: goto done; 11266: cur = ctxt->input->cur[0]; 11267: next = ctxt->input->cur[1]; 11268: if (cur == 0) { 11269: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11270: ctxt->sax->setDocumentLocator(ctxt->userData, 11271: &xmlDefaultSAXLocator); 11272: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11273: ctxt->instate = XML_PARSER_EOF; 11274: #ifdef DEBUG_PUSH 11275: xmlGenericError(xmlGenericErrorContext, 11276: "PP: entering EOF\n"); 11277: #endif 11278: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11279: ctxt->sax->endDocument(ctxt->userData); 11280: goto done; 11281: } 11282: if ((cur == '<') && (next == '?')) { 11283: /* PI or XML decl */ 11284: if (avail < 5) return(ret); 11285: if ((!terminate) && 11286: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11287: return(ret); 11288: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11289: ctxt->sax->setDocumentLocator(ctxt->userData, 11290: &xmlDefaultSAXLocator); 11291: if ((ctxt->input->cur[2] == 'x') && 11292: (ctxt->input->cur[3] == 'm') && 11293: (ctxt->input->cur[4] == 'l') && 11294: (IS_BLANK_CH(ctxt->input->cur[5]))) { 11295: ret += 5; 11296: #ifdef DEBUG_PUSH 11297: xmlGenericError(xmlGenericErrorContext, 11298: "PP: Parsing XML Decl\n"); 11299: #endif 11300: xmlParseXMLDecl(ctxt); 11301: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11302: /* 11303: * The XML REC instructs us to stop parsing right 11304: * here 11305: */ 11306: ctxt->instate = XML_PARSER_EOF; 11307: return(0); 11308: } 11309: ctxt->standalone = ctxt->input->standalone; 11310: if ((ctxt->encoding == NULL) && 11311: (ctxt->input->encoding != NULL)) 11312: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11313: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11314: (!ctxt->disableSAX)) 11315: ctxt->sax->startDocument(ctxt->userData); 11316: ctxt->instate = XML_PARSER_MISC; 11317: #ifdef DEBUG_PUSH 11318: xmlGenericError(xmlGenericErrorContext, 11319: "PP: entering MISC\n"); 11320: #endif 11321: } else { 11322: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11323: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11324: (!ctxt->disableSAX)) 11325: ctxt->sax->startDocument(ctxt->userData); 11326: ctxt->instate = XML_PARSER_MISC; 11327: #ifdef DEBUG_PUSH 11328: xmlGenericError(xmlGenericErrorContext, 11329: "PP: entering MISC\n"); 11330: #endif 11331: } 11332: } else { 11333: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11334: ctxt->sax->setDocumentLocator(ctxt->userData, 11335: &xmlDefaultSAXLocator); 11336: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11337: if (ctxt->version == NULL) { 11338: xmlErrMemory(ctxt, NULL); 11339: break; 11340: } 11341: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11342: (!ctxt->disableSAX)) 11343: ctxt->sax->startDocument(ctxt->userData); 11344: ctxt->instate = XML_PARSER_MISC; 11345: #ifdef DEBUG_PUSH 11346: xmlGenericError(xmlGenericErrorContext, 11347: "PP: entering MISC\n"); 11348: #endif 11349: } 11350: break; 11351: case XML_PARSER_START_TAG: { 11352: const xmlChar *name; 11353: const xmlChar *prefix = NULL; 11354: const xmlChar *URI = NULL; 11355: int nsNr = ctxt->nsNr; 11356: 11357: if ((avail < 2) && (ctxt->inputNr == 1)) 11358: goto done; 11359: cur = ctxt->input->cur[0]; 11360: if (cur != '<') { 11361: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11362: ctxt->instate = XML_PARSER_EOF; 11363: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11364: ctxt->sax->endDocument(ctxt->userData); 11365: goto done; 11366: } 11367: if (!terminate) { 11368: if (ctxt->progressive) { 11369: /* > can be found unescaped in attribute values */ 11370: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11371: goto done; 11372: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11373: goto done; 11374: } 11375: } 11376: if (ctxt->spaceNr == 0) 11377: spacePush(ctxt, -1); 11378: else if (*ctxt->space == -2) 11379: spacePush(ctxt, -1); 11380: else 11381: spacePush(ctxt, *ctxt->space); 11382: #ifdef LIBXML_SAX1_ENABLED 11383: if (ctxt->sax2) 11384: #endif /* LIBXML_SAX1_ENABLED */ 11385: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11386: #ifdef LIBXML_SAX1_ENABLED 11387: else 11388: name = xmlParseStartTag(ctxt); 11389: #endif /* LIBXML_SAX1_ENABLED */ 11390: if (ctxt->instate == XML_PARSER_EOF) 11391: goto done; 11392: if (name == NULL) { 11393: spacePop(ctxt); 11394: ctxt->instate = XML_PARSER_EOF; 11395: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11396: ctxt->sax->endDocument(ctxt->userData); 11397: goto done; 11398: } 11399: #ifdef LIBXML_VALID_ENABLED 11400: /* 11401: * [ VC: Root Element Type ] 11402: * The Name in the document type declaration must match 11403: * the element type of the root element. 11404: */ 11405: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11406: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11407: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11408: #endif /* LIBXML_VALID_ENABLED */ 11409: 11410: /* 11411: * Check for an Empty Element. 11412: */ 11413: if ((RAW == '/') && (NXT(1) == '>')) { 11414: SKIP(2); 11415: 11416: if (ctxt->sax2) { 11417: if ((ctxt->sax != NULL) && 11418: (ctxt->sax->endElementNs != NULL) && 11419: (!ctxt->disableSAX)) 11420: ctxt->sax->endElementNs(ctxt->userData, name, 11421: prefix, URI); 11422: if (ctxt->nsNr - nsNr > 0) 11423: nsPop(ctxt, ctxt->nsNr - nsNr); 11424: #ifdef LIBXML_SAX1_ENABLED 11425: } else { 11426: if ((ctxt->sax != NULL) && 11427: (ctxt->sax->endElement != NULL) && 11428: (!ctxt->disableSAX)) 11429: ctxt->sax->endElement(ctxt->userData, name); 11430: #endif /* LIBXML_SAX1_ENABLED */ 11431: } 11432: if (ctxt->instate == XML_PARSER_EOF) 11433: goto done; 11434: spacePop(ctxt); 11435: if (ctxt->nameNr == 0) { 11436: ctxt->instate = XML_PARSER_EPILOG; 11437: } else { 11438: ctxt->instate = XML_PARSER_CONTENT; 11439: } 11440: ctxt->progressive = 1; 11441: break; 11442: } 11443: if (RAW == '>') { 11444: NEXT; 11445: } else { 11446: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11447: "Couldn't find end of Start Tag %s\n", 11448: name); 11449: nodePop(ctxt); 11450: spacePop(ctxt); 11451: } 11452: if (ctxt->sax2) 11453: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11454: #ifdef LIBXML_SAX1_ENABLED 11455: else 11456: namePush(ctxt, name); 11457: #endif /* LIBXML_SAX1_ENABLED */ 11458: 11459: ctxt->instate = XML_PARSER_CONTENT; 11460: ctxt->progressive = 1; 11461: break; 11462: } 11463: case XML_PARSER_CONTENT: { 11464: const xmlChar *test; 11465: unsigned int cons; 11466: if ((avail < 2) && (ctxt->inputNr == 1)) 11467: goto done; 11468: cur = ctxt->input->cur[0]; 11469: next = ctxt->input->cur[1]; 11470: 11471: test = CUR_PTR; 11472: cons = ctxt->input->consumed; 11473: if ((cur == '<') && (next == '/')) { 11474: ctxt->instate = XML_PARSER_END_TAG; 11475: break; 11476: } else if ((cur == '<') && (next == '?')) { 11477: if ((!terminate) && 11478: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11479: ctxt->progressive = XML_PARSER_PI; 11480: goto done; 11481: } 11482: xmlParsePI(ctxt); 11483: ctxt->instate = XML_PARSER_CONTENT; 11484: ctxt->progressive = 1; 11485: } else if ((cur == '<') && (next != '!')) { 11486: ctxt->instate = XML_PARSER_START_TAG; 11487: break; 11488: } else if ((cur == '<') && (next == '!') && 11489: (ctxt->input->cur[2] == '-') && 11490: (ctxt->input->cur[3] == '-')) { 11491: int term; 11492: 11493: if (avail < 4) 11494: goto done; 11495: ctxt->input->cur += 4; 11496: term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11497: ctxt->input->cur -= 4; 11498: if ((!terminate) && (term < 0)) { 11499: ctxt->progressive = XML_PARSER_COMMENT; 11500: goto done; 11501: } 11502: xmlParseComment(ctxt); 11503: ctxt->instate = XML_PARSER_CONTENT; 11504: ctxt->progressive = 1; 11505: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11506: (ctxt->input->cur[2] == '[') && 11507: (ctxt->input->cur[3] == 'C') && 11508: (ctxt->input->cur[4] == 'D') && 11509: (ctxt->input->cur[5] == 'A') && 11510: (ctxt->input->cur[6] == 'T') && 11511: (ctxt->input->cur[7] == 'A') && 11512: (ctxt->input->cur[8] == '[')) { 11513: SKIP(9); 11514: ctxt->instate = XML_PARSER_CDATA_SECTION; 11515: break; 11516: } else if ((cur == '<') && (next == '!') && 11517: (avail < 9)) { 11518: goto done; 11519: } else if (cur == '&') { 11520: if ((!terminate) && 11521: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11522: goto done; 11523: xmlParseReference(ctxt); 11524: } else { 11525: /* TODO Avoid the extra copy, handle directly !!! */ 11526: /* 11527: * Goal of the following test is: 11528: * - minimize calls to the SAX 'character' callback 11529: * when they are mergeable 11530: * - handle an problem for isBlank when we only parse 11531: * a sequence of blank chars and the next one is 11532: * not available to check against '<' presence. 11533: * - tries to homogenize the differences in SAX 11534: * callbacks between the push and pull versions 11535: * of the parser. 11536: */ 11537: if ((ctxt->inputNr == 1) && 11538: (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11539: if (!terminate) { 11540: if (ctxt->progressive) { 11541: if ((lastlt == NULL) || 11542: (ctxt->input->cur > lastlt)) 11543: goto done; 11544: } else if (xmlParseLookupSequence(ctxt, 11545: '<', 0, 0) < 0) { 11546: goto done; 11547: } 11548: } 11549: } 11550: ctxt->checkIndex = 0; 11551: xmlParseCharData(ctxt, 0); 11552: } 11553: /* 11554: * Pop-up of finished entities. 11555: */ 11556: while ((RAW == 0) && (ctxt->inputNr > 1)) 11557: xmlPopInput(ctxt); 11558: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11559: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11560: "detected an error in element content\n"); 11561: ctxt->instate = XML_PARSER_EOF; 11562: break; 11563: } 11564: break; 11565: } 11566: case XML_PARSER_END_TAG: 11567: if (avail < 2) 11568: goto done; 11569: if (!terminate) { 11570: if (ctxt->progressive) { 11571: /* > can be found unescaped in attribute values */ 11572: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11573: goto done; 11574: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11575: goto done; 11576: } 11577: } 11578: if (ctxt->sax2) { 11579: xmlParseEndTag2(ctxt, 11580: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11581: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11582: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11583: nameNsPop(ctxt); 11584: } 11585: #ifdef LIBXML_SAX1_ENABLED 11586: else 11587: xmlParseEndTag1(ctxt, 0); 11588: #endif /* LIBXML_SAX1_ENABLED */ 11589: if (ctxt->instate == XML_PARSER_EOF) { 11590: /* Nothing */ 11591: } else if (ctxt->nameNr == 0) { 11592: ctxt->instate = XML_PARSER_EPILOG; 11593: } else { 11594: ctxt->instate = XML_PARSER_CONTENT; 11595: } 11596: break; 11597: case XML_PARSER_CDATA_SECTION: { 11598: /* 11599: * The Push mode need to have the SAX callback for 11600: * cdataBlock merge back contiguous callbacks. 11601: */ 11602: int base; 11603: 11604: base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11605: if (base < 0) { 11606: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11607: int tmp; 11608: 11609: tmp = xmlCheckCdataPush(ctxt->input->cur, 11610: XML_PARSER_BIG_BUFFER_SIZE); 11611: if (tmp < 0) { 11612: tmp = -tmp; 11613: ctxt->input->cur += tmp; 11614: goto encoding_error; 11615: } 11616: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11617: if (ctxt->sax->cdataBlock != NULL) 11618: ctxt->sax->cdataBlock(ctxt->userData, 11619: ctxt->input->cur, tmp); 11620: else if (ctxt->sax->characters != NULL) 11621: ctxt->sax->characters(ctxt->userData, 11622: ctxt->input->cur, tmp); 11623: } 11624: if (ctxt->instate == XML_PARSER_EOF) 11625: goto done; 11626: SKIPL(tmp); 11627: ctxt->checkIndex = 0; 11628: } 11629: goto done; 11630: } else { 11631: int tmp; 11632: 11633: tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11634: if ((tmp < 0) || (tmp != base)) { 11635: tmp = -tmp; 11636: ctxt->input->cur += tmp; 11637: goto encoding_error; 11638: } 11639: if ((ctxt->sax != NULL) && (base == 0) && 11640: (ctxt->sax->cdataBlock != NULL) && 11641: (!ctxt->disableSAX)) { 11642: /* 11643: * Special case to provide identical behaviour 11644: * between pull and push parsers on enpty CDATA 11645: * sections 11646: */ 11647: if ((ctxt->input->cur - ctxt->input->base >= 9) && 11648: (!strncmp((const char *)&ctxt->input->cur[-9], 11649: "<![CDATA[", 9))) 11650: ctxt->sax->cdataBlock(ctxt->userData, 11651: BAD_CAST "", 0); 11652: } else if ((ctxt->sax != NULL) && (base > 0) && 11653: (!ctxt->disableSAX)) { 11654: if (ctxt->sax->cdataBlock != NULL) 11655: ctxt->sax->cdataBlock(ctxt->userData, 11656: ctxt->input->cur, base); 11657: else if (ctxt->sax->characters != NULL) 11658: ctxt->sax->characters(ctxt->userData, 11659: ctxt->input->cur, base); 11660: } 11661: if (ctxt->instate == XML_PARSER_EOF) 11662: goto done; 11663: SKIPL(base + 3); 11664: ctxt->checkIndex = 0; 11665: ctxt->instate = XML_PARSER_CONTENT; 11666: #ifdef DEBUG_PUSH 11667: xmlGenericError(xmlGenericErrorContext, 11668: "PP: entering CONTENT\n"); 11669: #endif 11670: } 11671: break; 11672: } 11673: case XML_PARSER_MISC: 11674: SKIP_BLANKS; 11675: if (ctxt->input->buf == NULL) 11676: avail = ctxt->input->length - 11677: (ctxt->input->cur - ctxt->input->base); 11678: else 11679: avail = xmlBufUse(ctxt->input->buf->buffer) - 11680: (ctxt->input->cur - ctxt->input->base); 11681: if (avail < 2) 11682: goto done; 11683: cur = ctxt->input->cur[0]; 11684: next = ctxt->input->cur[1]; 11685: if ((cur == '<') && (next == '?')) { 11686: if ((!terminate) && 11687: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11688: ctxt->progressive = XML_PARSER_PI; 11689: goto done; 11690: } 11691: #ifdef DEBUG_PUSH 11692: xmlGenericError(xmlGenericErrorContext, 11693: "PP: Parsing PI\n"); 11694: #endif 11695: xmlParsePI(ctxt); 11696: if (ctxt->instate == XML_PARSER_EOF) 11697: goto done; 11698: ctxt->instate = XML_PARSER_MISC; 11699: ctxt->progressive = 1; 11700: ctxt->checkIndex = 0; 11701: } else if ((cur == '<') && (next == '!') && 11702: (ctxt->input->cur[2] == '-') && 11703: (ctxt->input->cur[3] == '-')) { 11704: if ((!terminate) && 11705: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11706: ctxt->progressive = XML_PARSER_COMMENT; 11707: goto done; 11708: } 11709: #ifdef DEBUG_PUSH 11710: xmlGenericError(xmlGenericErrorContext, 11711: "PP: Parsing Comment\n"); 11712: #endif 11713: xmlParseComment(ctxt); 11714: if (ctxt->instate == XML_PARSER_EOF) 11715: goto done; 11716: ctxt->instate = XML_PARSER_MISC; 11717: ctxt->progressive = 1; 11718: ctxt->checkIndex = 0; 11719: } else if ((cur == '<') && (next == '!') && 11720: (ctxt->input->cur[2] == 'D') && 11721: (ctxt->input->cur[3] == 'O') && 11722: (ctxt->input->cur[4] == 'C') && 11723: (ctxt->input->cur[5] == 'T') && 11724: (ctxt->input->cur[6] == 'Y') && 11725: (ctxt->input->cur[7] == 'P') && 11726: (ctxt->input->cur[8] == 'E')) { 11727: if ((!terminate) && 11728: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11729: ctxt->progressive = XML_PARSER_DTD; 11730: goto done; 11731: } 11732: #ifdef DEBUG_PUSH 11733: xmlGenericError(xmlGenericErrorContext, 11734: "PP: Parsing internal subset\n"); 11735: #endif 11736: ctxt->inSubset = 1; 11737: ctxt->progressive = 0; 11738: ctxt->checkIndex = 0; 11739: xmlParseDocTypeDecl(ctxt); 11740: if (ctxt->instate == XML_PARSER_EOF) 11741: goto done; 11742: if (RAW == '[') { 11743: ctxt->instate = XML_PARSER_DTD; 11744: #ifdef DEBUG_PUSH 11745: xmlGenericError(xmlGenericErrorContext, 11746: "PP: entering DTD\n"); 11747: #endif 11748: } else { 11749: /* 11750: * Create and update the external subset. 11751: */ 11752: ctxt->inSubset = 2; 11753: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11754: (ctxt->sax->externalSubset != NULL)) 11755: ctxt->sax->externalSubset(ctxt->userData, 11756: ctxt->intSubName, ctxt->extSubSystem, 11757: ctxt->extSubURI); 11758: ctxt->inSubset = 0; 11759: xmlCleanSpecialAttr(ctxt); 11760: ctxt->instate = XML_PARSER_PROLOG; 11761: #ifdef DEBUG_PUSH 11762: xmlGenericError(xmlGenericErrorContext, 11763: "PP: entering PROLOG\n"); 11764: #endif 11765: } 11766: } else if ((cur == '<') && (next == '!') && 11767: (avail < 9)) { 11768: goto done; 11769: } else { 11770: ctxt->instate = XML_PARSER_START_TAG; 11771: ctxt->progressive = XML_PARSER_START_TAG; 11772: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11773: #ifdef DEBUG_PUSH 11774: xmlGenericError(xmlGenericErrorContext, 11775: "PP: entering START_TAG\n"); 11776: #endif 11777: } 11778: break; 11779: case XML_PARSER_PROLOG: 11780: SKIP_BLANKS; 11781: if (ctxt->input->buf == NULL) 11782: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11783: else 11784: avail = xmlBufUse(ctxt->input->buf->buffer) - 11785: (ctxt->input->cur - ctxt->input->base); 11786: if (avail < 2) 11787: goto done; 11788: cur = ctxt->input->cur[0]; 11789: next = ctxt->input->cur[1]; 11790: if ((cur == '<') && (next == '?')) { 11791: if ((!terminate) && 11792: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11793: ctxt->progressive = XML_PARSER_PI; 11794: goto done; 11795: } 11796: #ifdef DEBUG_PUSH 11797: xmlGenericError(xmlGenericErrorContext, 11798: "PP: Parsing PI\n"); 11799: #endif 11800: xmlParsePI(ctxt); 11801: if (ctxt->instate == XML_PARSER_EOF) 11802: goto done; 11803: ctxt->instate = XML_PARSER_PROLOG; 11804: ctxt->progressive = 1; 11805: } else if ((cur == '<') && (next == '!') && 11806: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11807: if ((!terminate) && 11808: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11809: ctxt->progressive = XML_PARSER_COMMENT; 11810: goto done; 11811: } 11812: #ifdef DEBUG_PUSH 11813: xmlGenericError(xmlGenericErrorContext, 11814: "PP: Parsing Comment\n"); 11815: #endif 11816: xmlParseComment(ctxt); 11817: if (ctxt->instate == XML_PARSER_EOF) 11818: goto done; 11819: ctxt->instate = XML_PARSER_PROLOG; 11820: ctxt->progressive = 1; 11821: } else if ((cur == '<') && (next == '!') && 11822: (avail < 4)) { 11823: goto done; 11824: } else { 11825: ctxt->instate = XML_PARSER_START_TAG; 11826: if (ctxt->progressive == 0) 11827: ctxt->progressive = XML_PARSER_START_TAG; 11828: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11829: #ifdef DEBUG_PUSH 11830: xmlGenericError(xmlGenericErrorContext, 11831: "PP: entering START_TAG\n"); 11832: #endif 11833: } 11834: break; 11835: case XML_PARSER_EPILOG: 11836: SKIP_BLANKS; 11837: if (ctxt->input->buf == NULL) 11838: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11839: else 11840: avail = xmlBufUse(ctxt->input->buf->buffer) - 11841: (ctxt->input->cur - ctxt->input->base); 11842: if (avail < 2) 11843: goto done; 11844: cur = ctxt->input->cur[0]; 11845: next = ctxt->input->cur[1]; 11846: if ((cur == '<') && (next == '?')) { 11847: if ((!terminate) && 11848: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11849: ctxt->progressive = XML_PARSER_PI; 11850: goto done; 11851: } 11852: #ifdef DEBUG_PUSH 11853: xmlGenericError(xmlGenericErrorContext, 11854: "PP: Parsing PI\n"); 11855: #endif 11856: xmlParsePI(ctxt); 11857: if (ctxt->instate == XML_PARSER_EOF) 11858: goto done; 11859: ctxt->instate = XML_PARSER_EPILOG; 11860: ctxt->progressive = 1; 11861: } else if ((cur == '<') && (next == '!') && 11862: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11863: if ((!terminate) && 11864: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11865: ctxt->progressive = XML_PARSER_COMMENT; 11866: goto done; 11867: } 11868: #ifdef DEBUG_PUSH 11869: xmlGenericError(xmlGenericErrorContext, 11870: "PP: Parsing Comment\n"); 11871: #endif 11872: xmlParseComment(ctxt); 11873: if (ctxt->instate == XML_PARSER_EOF) 11874: goto done; 11875: ctxt->instate = XML_PARSER_EPILOG; 11876: ctxt->progressive = 1; 11877: } else if ((cur == '<') && (next == '!') && 11878: (avail < 4)) { 11879: goto done; 11880: } else { 11881: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11882: ctxt->instate = XML_PARSER_EOF; 11883: #ifdef DEBUG_PUSH 11884: xmlGenericError(xmlGenericErrorContext, 11885: "PP: entering EOF\n"); 11886: #endif 11887: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11888: ctxt->sax->endDocument(ctxt->userData); 11889: goto done; 11890: } 11891: break; 11892: case XML_PARSER_DTD: { 11893: /* 11894: * Sorry but progressive parsing of the internal subset 11895: * is not expected to be supported. We first check that 11896: * the full content of the internal subset is available and 11897: * the parsing is launched only at that point. 11898: * Internal subset ends up with "']' S? '>'" in an unescaped 11899: * section and not in a ']]>' sequence which are conditional 11900: * sections (whoever argued to keep that crap in XML deserve 11901: * a place in hell !). 11902: */ 11903: int base, i; 11904: xmlChar *buf; 11905: xmlChar quote = 0; 11906: size_t use; 11907: 11908: base = ctxt->input->cur - ctxt->input->base; 11909: if (base < 0) return(0); 11910: if (ctxt->checkIndex > base) 11911: base = ctxt->checkIndex; 11912: buf = xmlBufContent(ctxt->input->buf->buffer); 11913: use = xmlBufUse(ctxt->input->buf->buffer); 11914: for (;(unsigned int) base < use; base++) { 11915: if (quote != 0) { 11916: if (buf[base] == quote) 11917: quote = 0; 11918: continue; 11919: } 11920: if ((quote == 0) && (buf[base] == '<')) { 11921: int found = 0; 11922: /* special handling of comments */ 11923: if (((unsigned int) base + 4 < use) && 11924: (buf[base + 1] == '!') && 11925: (buf[base + 2] == '-') && 11926: (buf[base + 3] == '-')) { 11927: for (;(unsigned int) base + 3 < use; base++) { 11928: if ((buf[base] == '-') && 11929: (buf[base + 1] == '-') && 11930: (buf[base + 2] == '>')) { 11931: found = 1; 11932: base += 2; 11933: break; 11934: } 11935: } 11936: if (!found) { 11937: #if 0 11938: fprintf(stderr, "unfinished comment\n"); 11939: #endif 11940: break; /* for */ 11941: } 11942: continue; 11943: } 11944: } 11945: if (buf[base] == '"') { 11946: quote = '"'; 11947: continue; 11948: } 11949: if (buf[base] == '\'') { 11950: quote = '\''; 11951: continue; 11952: } 11953: if (buf[base] == ']') { 11954: #if 0 11955: fprintf(stderr, "%c%c%c%c: ", buf[base], 11956: buf[base + 1], buf[base + 2], buf[base + 3]); 11957: #endif 11958: if ((unsigned int) base +1 >= use) 11959: break; 11960: if (buf[base + 1] == ']') { 11961: /* conditional crap, skip both ']' ! */ 11962: base++; 11963: continue; 11964: } 11965: for (i = 1; (unsigned int) base + i < use; i++) { 11966: if (buf[base + i] == '>') { 11967: #if 0 11968: fprintf(stderr, "found\n"); 11969: #endif 11970: goto found_end_int_subset; 11971: } 11972: if (!IS_BLANK_CH(buf[base + i])) { 11973: #if 0 11974: fprintf(stderr, "not found\n"); 11975: #endif 11976: goto not_end_of_int_subset; 11977: } 11978: } 11979: #if 0 11980: fprintf(stderr, "end of stream\n"); 11981: #endif 11982: break; 11983: 11984: } 11985: not_end_of_int_subset: 11986: continue; /* for */ 11987: } 11988: /* 11989: * We didn't found the end of the Internal subset 11990: */ 11991: if (quote == 0) 11992: ctxt->checkIndex = base; 11993: else 11994: ctxt->checkIndex = 0; 11995: #ifdef DEBUG_PUSH 11996: if (next == 0) 11997: xmlGenericError(xmlGenericErrorContext, 11998: "PP: lookup of int subset end filed\n"); 11999: #endif 12000: goto done; 12001: 12002: found_end_int_subset: 12003: ctxt->checkIndex = 0; 12004: xmlParseInternalSubset(ctxt); 12005: if (ctxt->instate == XML_PARSER_EOF) 12006: goto done; 12007: ctxt->inSubset = 2; 12008: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12009: (ctxt->sax->externalSubset != NULL)) 12010: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12011: ctxt->extSubSystem, ctxt->extSubURI); 12012: ctxt->inSubset = 0; 12013: xmlCleanSpecialAttr(ctxt); 12014: if (ctxt->instate == XML_PARSER_EOF) 12015: goto done; 12016: ctxt->instate = XML_PARSER_PROLOG; 12017: ctxt->checkIndex = 0; 12018: #ifdef DEBUG_PUSH 12019: xmlGenericError(xmlGenericErrorContext, 12020: "PP: entering PROLOG\n"); 12021: #endif 12022: break; 12023: } 12024: case XML_PARSER_COMMENT: 12025: xmlGenericError(xmlGenericErrorContext, 12026: "PP: internal error, state == COMMENT\n"); 12027: ctxt->instate = XML_PARSER_CONTENT; 12028: #ifdef DEBUG_PUSH 12029: xmlGenericError(xmlGenericErrorContext, 12030: "PP: entering CONTENT\n"); 12031: #endif 12032: break; 12033: case XML_PARSER_IGNORE: 12034: xmlGenericError(xmlGenericErrorContext, 12035: "PP: internal error, state == IGNORE"); 12036: ctxt->instate = XML_PARSER_DTD; 12037: #ifdef DEBUG_PUSH 12038: xmlGenericError(xmlGenericErrorContext, 12039: "PP: entering DTD\n"); 12040: #endif 12041: break; 12042: case XML_PARSER_PI: 12043: xmlGenericError(xmlGenericErrorContext, 12044: "PP: internal error, state == PI\n"); 12045: ctxt->instate = XML_PARSER_CONTENT; 12046: #ifdef DEBUG_PUSH 12047: xmlGenericError(xmlGenericErrorContext, 12048: "PP: entering CONTENT\n"); 12049: #endif 12050: break; 12051: case XML_PARSER_ENTITY_DECL: 12052: xmlGenericError(xmlGenericErrorContext, 12053: "PP: internal error, state == ENTITY_DECL\n"); 12054: ctxt->instate = XML_PARSER_DTD; 12055: #ifdef DEBUG_PUSH 12056: xmlGenericError(xmlGenericErrorContext, 12057: "PP: entering DTD\n"); 12058: #endif 12059: break; 12060: case XML_PARSER_ENTITY_VALUE: 12061: xmlGenericError(xmlGenericErrorContext, 12062: "PP: internal error, state == ENTITY_VALUE\n"); 12063: ctxt->instate = XML_PARSER_CONTENT; 12064: #ifdef DEBUG_PUSH 12065: xmlGenericError(xmlGenericErrorContext, 12066: "PP: entering DTD\n"); 12067: #endif 12068: break; 12069: case XML_PARSER_ATTRIBUTE_VALUE: 12070: xmlGenericError(xmlGenericErrorContext, 12071: "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12072: ctxt->instate = XML_PARSER_START_TAG; 12073: #ifdef DEBUG_PUSH 12074: xmlGenericError(xmlGenericErrorContext, 12075: "PP: entering START_TAG\n"); 12076: #endif 12077: break; 12078: case XML_PARSER_SYSTEM_LITERAL: 12079: xmlGenericError(xmlGenericErrorContext, 12080: "PP: internal error, state == SYSTEM_LITERAL\n"); 12081: ctxt->instate = XML_PARSER_START_TAG; 12082: #ifdef DEBUG_PUSH 12083: xmlGenericError(xmlGenericErrorContext, 12084: "PP: entering START_TAG\n"); 12085: #endif 12086: break; 12087: case XML_PARSER_PUBLIC_LITERAL: 12088: xmlGenericError(xmlGenericErrorContext, 12089: "PP: internal error, state == PUBLIC_LITERAL\n"); 12090: ctxt->instate = XML_PARSER_START_TAG; 12091: #ifdef DEBUG_PUSH 12092: xmlGenericError(xmlGenericErrorContext, 12093: "PP: entering START_TAG\n"); 12094: #endif 12095: break; 12096: } 12097: } 12098: done: 12099: #ifdef DEBUG_PUSH 12100: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12101: #endif 12102: return(ret); 12103: encoding_error: 12104: { 12105: char buffer[150]; 12106: 12107: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12108: ctxt->input->cur[0], ctxt->input->cur[1], 12109: ctxt->input->cur[2], ctxt->input->cur[3]); 12110: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12111: "Input is not proper UTF-8, indicate encoding !\n%s", 12112: BAD_CAST buffer, NULL); 12113: } 12114: return(0); 12115: } 12116: 12117: /** 12118: * xmlParseCheckTransition: 12119: * @ctxt: an XML parser context 12120: * @chunk: a char array 12121: * @size: the size in byte of the chunk 12122: * 12123: * Check depending on the current parser state if the chunk given must be 12124: * processed immediately or one need more data to advance on parsing. 12125: * 12126: * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12127: */ 12128: static int 12129: xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12130: if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12131: return(-1); 12132: if (ctxt->instate == XML_PARSER_START_TAG) { 12133: if (memchr(chunk, '>', size) != NULL) 12134: return(1); 12135: return(0); 12136: } 12137: if (ctxt->progressive == XML_PARSER_COMMENT) { 12138: if (memchr(chunk, '>', size) != NULL) 12139: return(1); 12140: return(0); 12141: } 12142: if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12143: if (memchr(chunk, '>', size) != NULL) 12144: return(1); 12145: return(0); 12146: } 12147: if (ctxt->progressive == XML_PARSER_PI) { 12148: if (memchr(chunk, '>', size) != NULL) 12149: return(1); 12150: return(0); 12151: } 12152: if (ctxt->instate == XML_PARSER_END_TAG) { 12153: if (memchr(chunk, '>', size) != NULL) 12154: return(1); 12155: return(0); 12156: } 12157: if ((ctxt->progressive == XML_PARSER_DTD) || 12158: (ctxt->instate == XML_PARSER_DTD)) { 12159: if (memchr(chunk, '>', size) != NULL) 12160: return(1); 12161: return(0); 12162: } 12163: return(1); 12164: } 12165: 12166: /** 12167: * xmlParseChunk: 12168: * @ctxt: an XML parser context 12169: * @chunk: an char array 12170: * @size: the size in byte of the chunk 12171: * @terminate: last chunk indicator 12172: * 12173: * Parse a Chunk of memory 12174: * 12175: * Returns zero if no error, the xmlParserErrors otherwise. 12176: */ 12177: int 12178: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12179: int terminate) { 12180: int end_in_lf = 0; 12181: int remain = 0; 12182: size_t old_avail = 0; 12183: size_t avail = 0; 12184: 12185: if (ctxt == NULL) 12186: return(XML_ERR_INTERNAL_ERROR); 12187: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12188: return(ctxt->errNo); 12189: if (ctxt->instate == XML_PARSER_EOF) 12190: return(-1); 12191: if (ctxt->instate == XML_PARSER_START) 12192: xmlDetectSAX2(ctxt); 12193: if ((size > 0) && (chunk != NULL) && (!terminate) && 12194: (chunk[size - 1] == '\r')) { 12195: end_in_lf = 1; 12196: size--; 12197: } 12198: 12199: xmldecl_done: 12200: 12201: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12202: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12203: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12204: size_t cur = ctxt->input->cur - ctxt->input->base; 12205: int res; 12206: 12207: old_avail = xmlBufUse(ctxt->input->buf->buffer); 12208: /* 12209: * Specific handling if we autodetected an encoding, we should not 12210: * push more than the first line ... which depend on the encoding 12211: * And only push the rest once the final encoding was detected 12212: */ 12213: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12214: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12215: unsigned int len = 45; 12216: 12217: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12218: BAD_CAST "UTF-16")) || 12219: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12220: BAD_CAST "UTF16"))) 12221: len = 90; 12222: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12223: BAD_CAST "UCS-4")) || 12224: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12225: BAD_CAST "UCS4"))) 12226: len = 180; 12227: 12228: if (ctxt->input->buf->rawconsumed < len) 12229: len -= ctxt->input->buf->rawconsumed; 12230: 12231: /* 12232: * Change size for reading the initial declaration only 12233: * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12234: * will blindly copy extra bytes from memory. 12235: */ 12236: if ((unsigned int) size > len) { 12237: remain = size - len; 12238: size = len; 12239: } else { 12240: remain = 0; 12241: } 12242: } 12243: res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12244: if (res < 0) { 12245: ctxt->errNo = XML_PARSER_EOF; 12246: ctxt->disableSAX = 1; 12247: return (XML_PARSER_EOF); 12248: } 12249: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12250: #ifdef DEBUG_PUSH 12251: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12252: #endif 12253: 12254: } else if (ctxt->instate != XML_PARSER_EOF) { 12255: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12256: xmlParserInputBufferPtr in = ctxt->input->buf; 12257: if ((in->encoder != NULL) && (in->buffer != NULL) && 12258: (in->raw != NULL)) { 12259: int nbchars; 12260: size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12261: size_t current = ctxt->input->cur - ctxt->input->base; 12262: 12263: nbchars = xmlCharEncInput(in, terminate); 12264: if (nbchars < 0) { 12265: /* TODO 2.6.0 */ 12266: xmlGenericError(xmlGenericErrorContext, 12267: "xmlParseChunk: encoder error\n"); 12268: return(XML_ERR_INVALID_ENCODING); 12269: } 12270: xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12271: } 12272: } 12273: } 12274: if (remain != 0) { 12275: xmlParseTryOrFinish(ctxt, 0); 12276: } else { 12277: if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12278: avail = xmlBufUse(ctxt->input->buf->buffer); 12279: /* 12280: * Depending on the current state it may not be such 12281: * a good idea to try parsing if there is nothing in the chunk 12282: * which would be worth doing a parser state transition and we 12283: * need to wait for more data 12284: */ 12285: if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12286: (old_avail == 0) || (avail == 0) || 12287: (xmlParseCheckTransition(ctxt, 12288: (const char *)&ctxt->input->base[old_avail], 12289: avail - old_avail))) 12290: xmlParseTryOrFinish(ctxt, terminate); 12291: } 12292: if (ctxt->instate == XML_PARSER_EOF) 12293: return(ctxt->errNo); 12294: 12295: if ((ctxt->input != NULL) && 12296: (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12297: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12298: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12299: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12300: ctxt->instate = XML_PARSER_EOF; 12301: } 12302: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12303: return(ctxt->errNo); 12304: 12305: if (remain != 0) { 12306: chunk += size; 12307: size = remain; 12308: remain = 0; 12309: goto xmldecl_done; 12310: } 12311: if ((end_in_lf == 1) && (ctxt->input != NULL) && 12312: (ctxt->input->buf != NULL)) { 12313: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12314: ctxt->input); 12315: size_t current = ctxt->input->cur - ctxt->input->base; 12316: 12317: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12318: 12319: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12320: base, current); 12321: } 12322: if (terminate) { 12323: /* 12324: * Check for termination 12325: */ 12326: int cur_avail = 0; 12327: 12328: if (ctxt->input != NULL) { 12329: if (ctxt->input->buf == NULL) 12330: cur_avail = ctxt->input->length - 12331: (ctxt->input->cur - ctxt->input->base); 12332: else 12333: cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12334: (ctxt->input->cur - ctxt->input->base); 12335: } 12336: 12337: if ((ctxt->instate != XML_PARSER_EOF) && 12338: (ctxt->instate != XML_PARSER_EPILOG)) { 12339: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12340: } 12341: if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12342: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12343: } 12344: if (ctxt->instate != XML_PARSER_EOF) { 12345: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12346: ctxt->sax->endDocument(ctxt->userData); 12347: } 12348: ctxt->instate = XML_PARSER_EOF; 12349: } 12350: if (ctxt->wellFormed == 0) 12351: return((xmlParserErrors) ctxt->errNo); 12352: else 12353: return(0); 12354: } 12355: 12356: /************************************************************************ 12357: * * 12358: * I/O front end functions to the parser * 12359: * * 12360: ************************************************************************/ 12361: 12362: /** 12363: * xmlCreatePushParserCtxt: 12364: * @sax: a SAX handler 12365: * @user_data: The user data returned on SAX callbacks 12366: * @chunk: a pointer to an array of chars 12367: * @size: number of chars in the array 12368: * @filename: an optional file name or URI 12369: * 12370: * Create a parser context for using the XML parser in push mode. 12371: * If @buffer and @size are non-NULL, the data is used to detect 12372: * the encoding. The remaining characters will be parsed so they 12373: * don't need to be fed in again through xmlParseChunk. 12374: * To allow content encoding detection, @size should be >= 4 12375: * The value of @filename is used for fetching external entities 12376: * and error/warning reports. 12377: * 12378: * Returns the new parser context or NULL 12379: */ 12380: 12381: xmlParserCtxtPtr 12382: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12383: const char *chunk, int size, const char *filename) { 12384: xmlParserCtxtPtr ctxt; 12385: xmlParserInputPtr inputStream; 12386: xmlParserInputBufferPtr buf; 12387: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12388: 12389: /* 12390: * plug some encoding conversion routines 12391: */ 12392: if ((chunk != NULL) && (size >= 4)) 12393: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12394: 12395: buf = xmlAllocParserInputBuffer(enc); 12396: if (buf == NULL) return(NULL); 12397: 12398: ctxt = xmlNewParserCtxt(); 12399: if (ctxt == NULL) { 12400: xmlErrMemory(NULL, "creating parser: out of memory\n"); 12401: xmlFreeParserInputBuffer(buf); 12402: return(NULL); 12403: } 12404: ctxt->dictNames = 1; 12405: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12406: if (ctxt->pushTab == NULL) { 12407: xmlErrMemory(ctxt, NULL); 12408: xmlFreeParserInputBuffer(buf); 12409: xmlFreeParserCtxt(ctxt); 12410: return(NULL); 12411: } 12412: if (sax != NULL) { 12413: #ifdef LIBXML_SAX1_ENABLED 12414: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12415: #endif /* LIBXML_SAX1_ENABLED */ 12416: xmlFree(ctxt->sax); 12417: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12418: if (ctxt->sax == NULL) { 12419: xmlErrMemory(ctxt, NULL); 12420: xmlFreeParserInputBuffer(buf); 12421: xmlFreeParserCtxt(ctxt); 12422: return(NULL); 12423: } 12424: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12425: if (sax->initialized == XML_SAX2_MAGIC) 12426: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12427: else 12428: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12429: if (user_data != NULL) 12430: ctxt->userData = user_data; 12431: } 12432: if (filename == NULL) { 12433: ctxt->directory = NULL; 12434: } else { 12435: ctxt->directory = xmlParserGetDirectory(filename); 12436: } 12437: 12438: inputStream = xmlNewInputStream(ctxt); 12439: if (inputStream == NULL) { 12440: xmlFreeParserCtxt(ctxt); 12441: xmlFreeParserInputBuffer(buf); 12442: return(NULL); 12443: } 12444: 12445: if (filename == NULL) 12446: inputStream->filename = NULL; 12447: else { 12448: inputStream->filename = (char *) 12449: xmlCanonicPath((const xmlChar *) filename); 12450: if (inputStream->filename == NULL) { 12451: xmlFreeParserCtxt(ctxt); 12452: xmlFreeParserInputBuffer(buf); 12453: return(NULL); 12454: } 12455: } 12456: inputStream->buf = buf; 12457: xmlBufResetInput(inputStream->buf->buffer, inputStream); 12458: inputPush(ctxt, inputStream); 12459: 12460: /* 12461: * If the caller didn't provide an initial 'chunk' for determining 12462: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12463: * that it can be automatically determined later 12464: */ 12465: if ((size == 0) || (chunk == NULL)) { 12466: ctxt->charset = XML_CHAR_ENCODING_NONE; 12467: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12468: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12469: size_t cur = ctxt->input->cur - ctxt->input->base; 12470: 12471: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12472: 12473: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12474: #ifdef DEBUG_PUSH 12475: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12476: #endif 12477: } 12478: 12479: if (enc != XML_CHAR_ENCODING_NONE) { 12480: xmlSwitchEncoding(ctxt, enc); 12481: } 12482: 12483: return(ctxt); 12484: } 12485: #endif /* LIBXML_PUSH_ENABLED */ 12486: 12487: /** 12488: * xmlStopParser: 12489: * @ctxt: an XML parser context 12490: * 12491: * Blocks further parser processing 12492: */ 12493: void 12494: xmlStopParser(xmlParserCtxtPtr ctxt) { 12495: if (ctxt == NULL) 12496: return; 12497: ctxt->instate = XML_PARSER_EOF; 12498: ctxt->errNo = XML_ERR_USER_STOP; 12499: ctxt->disableSAX = 1; 12500: if (ctxt->input != NULL) { 12501: ctxt->input->cur = BAD_CAST""; 12502: ctxt->input->base = ctxt->input->cur; 12503: } 12504: } 12505: 12506: /** 12507: * xmlCreateIOParserCtxt: 12508: * @sax: a SAX handler 12509: * @user_data: The user data returned on SAX callbacks 12510: * @ioread: an I/O read function 12511: * @ioclose: an I/O close function 12512: * @ioctx: an I/O handler 12513: * @enc: the charset encoding if known 12514: * 12515: * Create a parser context for using the XML parser with an existing 12516: * I/O stream 12517: * 12518: * Returns the new parser context or NULL 12519: */ 12520: xmlParserCtxtPtr 12521: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12522: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12523: void *ioctx, xmlCharEncoding enc) { 12524: xmlParserCtxtPtr ctxt; 12525: xmlParserInputPtr inputStream; 12526: xmlParserInputBufferPtr buf; 12527: 12528: if (ioread == NULL) return(NULL); 12529: 12530: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12531: if (buf == NULL) { 12532: if (ioclose != NULL) 12533: ioclose(ioctx); 12534: return (NULL); 12535: } 12536: 12537: ctxt = xmlNewParserCtxt(); 12538: if (ctxt == NULL) { 12539: xmlFreeParserInputBuffer(buf); 12540: return(NULL); 12541: } 12542: if (sax != NULL) { 12543: #ifdef LIBXML_SAX1_ENABLED 12544: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12545: #endif /* LIBXML_SAX1_ENABLED */ 12546: xmlFree(ctxt->sax); 12547: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12548: if (ctxt->sax == NULL) { 12549: xmlErrMemory(ctxt, NULL); 12550: xmlFreeParserCtxt(ctxt); 12551: return(NULL); 12552: } 12553: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12554: if (sax->initialized == XML_SAX2_MAGIC) 12555: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12556: else 12557: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12558: if (user_data != NULL) 12559: ctxt->userData = user_data; 12560: } 12561: 12562: inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12563: if (inputStream == NULL) { 12564: xmlFreeParserCtxt(ctxt); 12565: return(NULL); 12566: } 12567: inputPush(ctxt, inputStream); 12568: 12569: return(ctxt); 12570: } 12571: 12572: #ifdef LIBXML_VALID_ENABLED 12573: /************************************************************************ 12574: * * 12575: * Front ends when parsing a DTD * 12576: * * 12577: ************************************************************************/ 12578: 12579: /** 12580: * xmlIOParseDTD: 12581: * @sax: the SAX handler block or NULL 12582: * @input: an Input Buffer 12583: * @enc: the charset encoding if known 12584: * 12585: * Load and parse a DTD 12586: * 12587: * Returns the resulting xmlDtdPtr or NULL in case of error. 12588: * @input will be freed by the function in any case. 12589: */ 12590: 12591: xmlDtdPtr 12592: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12593: xmlCharEncoding enc) { 12594: xmlDtdPtr ret = NULL; 12595: xmlParserCtxtPtr ctxt; 12596: xmlParserInputPtr pinput = NULL; 12597: xmlChar start[4]; 12598: 12599: if (input == NULL) 12600: return(NULL); 12601: 12602: ctxt = xmlNewParserCtxt(); 12603: if (ctxt == NULL) { 12604: xmlFreeParserInputBuffer(input); 12605: return(NULL); 12606: } 12607: 12608: /* 12609: * Set-up the SAX context 12610: */ 12611: if (sax != NULL) { 12612: if (ctxt->sax != NULL) 12613: xmlFree(ctxt->sax); 12614: ctxt->sax = sax; 12615: ctxt->userData = ctxt; 12616: } 12617: xmlDetectSAX2(ctxt); 12618: 12619: /* 12620: * generate a parser input from the I/O handler 12621: */ 12622: 12623: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12624: if (pinput == NULL) { 12625: if (sax != NULL) ctxt->sax = NULL; 12626: xmlFreeParserInputBuffer(input); 12627: xmlFreeParserCtxt(ctxt); 12628: return(NULL); 12629: } 12630: 12631: /* 12632: * plug some encoding conversion routines here. 12633: */ 12634: if (xmlPushInput(ctxt, pinput) < 0) { 12635: if (sax != NULL) ctxt->sax = NULL; 12636: xmlFreeParserCtxt(ctxt); 12637: return(NULL); 12638: } 12639: if (enc != XML_CHAR_ENCODING_NONE) { 12640: xmlSwitchEncoding(ctxt, enc); 12641: } 12642: 12643: pinput->filename = NULL; 12644: pinput->line = 1; 12645: pinput->col = 1; 12646: pinput->base = ctxt->input->cur; 12647: pinput->cur = ctxt->input->cur; 12648: pinput->free = NULL; 12649: 12650: /* 12651: * let's parse that entity knowing it's an external subset. 12652: */ 12653: ctxt->inSubset = 2; 12654: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12655: if (ctxt->myDoc == NULL) { 12656: xmlErrMemory(ctxt, "New Doc failed"); 12657: return(NULL); 12658: } 12659: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12660: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12661: BAD_CAST "none", BAD_CAST "none"); 12662: 12663: if ((enc == XML_CHAR_ENCODING_NONE) && 12664: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12665: /* 12666: * Get the 4 first bytes and decode the charset 12667: * if enc != XML_CHAR_ENCODING_NONE 12668: * plug some encoding conversion routines. 12669: */ 12670: start[0] = RAW; 12671: start[1] = NXT(1); 12672: start[2] = NXT(2); 12673: start[3] = NXT(3); 12674: enc = xmlDetectCharEncoding(start, 4); 12675: if (enc != XML_CHAR_ENCODING_NONE) { 12676: xmlSwitchEncoding(ctxt, enc); 12677: } 12678: } 12679: 12680: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12681: 12682: if (ctxt->myDoc != NULL) { 12683: if (ctxt->wellFormed) { 12684: ret = ctxt->myDoc->extSubset; 12685: ctxt->myDoc->extSubset = NULL; 12686: if (ret != NULL) { 12687: xmlNodePtr tmp; 12688: 12689: ret->doc = NULL; 12690: tmp = ret->children; 12691: while (tmp != NULL) { 12692: tmp->doc = NULL; 12693: tmp = tmp->next; 12694: } 12695: } 12696: } else { 12697: ret = NULL; 12698: } 12699: xmlFreeDoc(ctxt->myDoc); 12700: ctxt->myDoc = NULL; 12701: } 12702: if (sax != NULL) ctxt->sax = NULL; 12703: xmlFreeParserCtxt(ctxt); 12704: 12705: return(ret); 12706: } 12707: 12708: /** 12709: * xmlSAXParseDTD: 12710: * @sax: the SAX handler block 12711: * @ExternalID: a NAME* containing the External ID of the DTD 12712: * @SystemID: a NAME* containing the URL to the DTD 12713: * 12714: * Load and parse an external subset. 12715: * 12716: * Returns the resulting xmlDtdPtr or NULL in case of error. 12717: */ 12718: 12719: xmlDtdPtr 12720: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12721: const xmlChar *SystemID) { 12722: xmlDtdPtr ret = NULL; 12723: xmlParserCtxtPtr ctxt; 12724: xmlParserInputPtr input = NULL; 12725: xmlCharEncoding enc; 12726: xmlChar* systemIdCanonic; 12727: 12728: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12729: 12730: ctxt = xmlNewParserCtxt(); 12731: if (ctxt == NULL) { 12732: return(NULL); 12733: } 12734: 12735: /* 12736: * Set-up the SAX context 12737: */ 12738: if (sax != NULL) { 12739: if (ctxt->sax != NULL) 12740: xmlFree(ctxt->sax); 12741: ctxt->sax = sax; 12742: ctxt->userData = ctxt; 12743: } 12744: 12745: /* 12746: * Canonicalise the system ID 12747: */ 12748: systemIdCanonic = xmlCanonicPath(SystemID); 12749: if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12750: xmlFreeParserCtxt(ctxt); 12751: return(NULL); 12752: } 12753: 12754: /* 12755: * Ask the Entity resolver to load the damn thing 12756: */ 12757: 12758: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12759: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12760: systemIdCanonic); 12761: if (input == NULL) { 12762: if (sax != NULL) ctxt->sax = NULL; 12763: xmlFreeParserCtxt(ctxt); 12764: if (systemIdCanonic != NULL) 12765: xmlFree(systemIdCanonic); 12766: return(NULL); 12767: } 12768: 12769: /* 12770: * plug some encoding conversion routines here. 12771: */ 12772: if (xmlPushInput(ctxt, input) < 0) { 12773: if (sax != NULL) ctxt->sax = NULL; 12774: xmlFreeParserCtxt(ctxt); 12775: if (systemIdCanonic != NULL) 12776: xmlFree(systemIdCanonic); 12777: return(NULL); 12778: } 12779: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12780: enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12781: xmlSwitchEncoding(ctxt, enc); 12782: } 12783: 12784: if (input->filename == NULL) 12785: input->filename = (char *) systemIdCanonic; 12786: else 12787: xmlFree(systemIdCanonic); 12788: input->line = 1; 12789: input->col = 1; 12790: input->base = ctxt->input->cur; 12791: input->cur = ctxt->input->cur; 12792: input->free = NULL; 12793: 12794: /* 12795: * let's parse that entity knowing it's an external subset. 12796: */ 12797: ctxt->inSubset = 2; 12798: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12799: if (ctxt->myDoc == NULL) { 12800: xmlErrMemory(ctxt, "New Doc failed"); 12801: if (sax != NULL) ctxt->sax = NULL; 12802: xmlFreeParserCtxt(ctxt); 12803: return(NULL); 12804: } 12805: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12806: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12807: ExternalID, SystemID); 12808: xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12809: 12810: if (ctxt->myDoc != NULL) { 12811: if (ctxt->wellFormed) { 12812: ret = ctxt->myDoc->extSubset; 12813: ctxt->myDoc->extSubset = NULL; 12814: if (ret != NULL) { 12815: xmlNodePtr tmp; 12816: 12817: ret->doc = NULL; 12818: tmp = ret->children; 12819: while (tmp != NULL) { 12820: tmp->doc = NULL; 12821: tmp = tmp->next; 12822: } 12823: } 12824: } else { 12825: ret = NULL; 12826: } 12827: xmlFreeDoc(ctxt->myDoc); 12828: ctxt->myDoc = NULL; 12829: } 12830: if (sax != NULL) ctxt->sax = NULL; 12831: xmlFreeParserCtxt(ctxt); 12832: 12833: return(ret); 12834: } 12835: 12836: 12837: /** 12838: * xmlParseDTD: 12839: * @ExternalID: a NAME* containing the External ID of the DTD 12840: * @SystemID: a NAME* containing the URL to the DTD 12841: * 12842: * Load and parse an external subset. 12843: * 12844: * Returns the resulting xmlDtdPtr or NULL in case of error. 12845: */ 12846: 12847: xmlDtdPtr 12848: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12849: return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12850: } 12851: #endif /* LIBXML_VALID_ENABLED */ 12852: 12853: /************************************************************************ 12854: * * 12855: * Front ends when parsing an Entity * 12856: * * 12857: ************************************************************************/ 12858: 12859: /** 12860: * xmlParseCtxtExternalEntity: 12861: * @ctx: the existing parsing context 12862: * @URL: the URL for the entity to load 12863: * @ID: the System ID for the entity to load 12864: * @lst: the return value for the set of parsed nodes 12865: * 12866: * Parse an external general entity within an existing parsing context 12867: * An external general parsed entity is well-formed if it matches the 12868: * production labeled extParsedEnt. 12869: * 12870: * [78] extParsedEnt ::= TextDecl? content 12871: * 12872: * Returns 0 if the entity is well formed, -1 in case of args problem and 12873: * the parser error code otherwise 12874: */ 12875: 12876: int 12877: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12878: const xmlChar *ID, xmlNodePtr *lst) { 12879: xmlParserCtxtPtr ctxt; 12880: xmlDocPtr newDoc; 12881: xmlNodePtr newRoot; 12882: xmlSAXHandlerPtr oldsax = NULL; 12883: int ret = 0; 12884: xmlChar start[4]; 12885: xmlCharEncoding enc; 12886: 12887: if (ctx == NULL) return(-1); 12888: 12889: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12890: (ctx->depth > 1024)) { 12891: return(XML_ERR_ENTITY_LOOP); 12892: } 12893: 12894: if (lst != NULL) 12895: *lst = NULL; 12896: if ((URL == NULL) && (ID == NULL)) 12897: return(-1); 12898: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12899: return(-1); 12900: 12901: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12902: if (ctxt == NULL) { 12903: return(-1); 12904: } 12905: 12906: oldsax = ctxt->sax; 12907: ctxt->sax = ctx->sax; 12908: xmlDetectSAX2(ctxt); 12909: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12910: if (newDoc == NULL) { 12911: xmlFreeParserCtxt(ctxt); 12912: return(-1); 12913: } 12914: newDoc->properties = XML_DOC_INTERNAL; 12915: if (ctx->myDoc->dict) { 12916: newDoc->dict = ctx->myDoc->dict; 12917: xmlDictReference(newDoc->dict); 12918: } 12919: if (ctx->myDoc != NULL) { 12920: newDoc->intSubset = ctx->myDoc->intSubset; 12921: newDoc->extSubset = ctx->myDoc->extSubset; 12922: } 12923: if (ctx->myDoc->URL != NULL) { 12924: newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12925: } 12926: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12927: if (newRoot == NULL) { 12928: ctxt->sax = oldsax; 12929: xmlFreeParserCtxt(ctxt); 12930: newDoc->intSubset = NULL; 12931: newDoc->extSubset = NULL; 12932: xmlFreeDoc(newDoc); 12933: return(-1); 12934: } 12935: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12936: nodePush(ctxt, newDoc->children); 12937: if (ctx->myDoc == NULL) { 12938: ctxt->myDoc = newDoc; 12939: } else { 12940: ctxt->myDoc = ctx->myDoc; 12941: newDoc->children->doc = ctx->myDoc; 12942: } 12943: 12944: /* 12945: * Get the 4 first bytes and decode the charset 12946: * if enc != XML_CHAR_ENCODING_NONE 12947: * plug some encoding conversion routines. 12948: */ 12949: GROW 12950: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12951: start[0] = RAW; 12952: start[1] = NXT(1); 12953: start[2] = NXT(2); 12954: start[3] = NXT(3); 12955: enc = xmlDetectCharEncoding(start, 4); 12956: if (enc != XML_CHAR_ENCODING_NONE) { 12957: xmlSwitchEncoding(ctxt, enc); 12958: } 12959: } 12960: 12961: /* 12962: * Parse a possible text declaration first 12963: */ 12964: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12965: xmlParseTextDecl(ctxt); 12966: /* 12967: * An XML-1.0 document can't reference an entity not XML-1.0 12968: */ 12969: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12970: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12971: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12972: "Version mismatch between document and entity\n"); 12973: } 12974: } 12975: 12976: /* 12977: * If the user provided its own SAX callbacks then reuse the 12978: * useData callback field, otherwise the expected setup in a 12979: * DOM builder is to have userData == ctxt 12980: */ 12981: if (ctx->userData == ctx) 12982: ctxt->userData = ctxt; 12983: else 12984: ctxt->userData = ctx->userData; 12985: 12986: /* 12987: * Doing validity checking on chunk doesn't make sense 12988: */ 12989: ctxt->instate = XML_PARSER_CONTENT; 12990: ctxt->validate = ctx->validate; 12991: ctxt->valid = ctx->valid; 12992: ctxt->loadsubset = ctx->loadsubset; 12993: ctxt->depth = ctx->depth + 1; 12994: ctxt->replaceEntities = ctx->replaceEntities; 12995: if (ctxt->validate) { 12996: ctxt->vctxt.error = ctx->vctxt.error; 12997: ctxt->vctxt.warning = ctx->vctxt.warning; 12998: } else { 12999: ctxt->vctxt.error = NULL; 13000: ctxt->vctxt.warning = NULL; 13001: } 13002: ctxt->vctxt.nodeTab = NULL; 13003: ctxt->vctxt.nodeNr = 0; 13004: ctxt->vctxt.nodeMax = 0; 13005: ctxt->vctxt.node = NULL; 13006: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13007: ctxt->dict = ctx->dict; 13008: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13009: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13010: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13011: ctxt->dictNames = ctx->dictNames; 13012: ctxt->attsDefault = ctx->attsDefault; 13013: ctxt->attsSpecial = ctx->attsSpecial; 13014: ctxt->linenumbers = ctx->linenumbers; 13015: 13016: xmlParseContent(ctxt); 13017: 13018: ctx->validate = ctxt->validate; 13019: ctx->valid = ctxt->valid; 13020: if ((RAW == '<') && (NXT(1) == '/')) { 13021: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13022: } else if (RAW != 0) { 13023: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13024: } 13025: if (ctxt->node != newDoc->children) { 13026: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13027: } 13028: 13029: if (!ctxt->wellFormed) { 13030: if (ctxt->errNo == 0) 13031: ret = 1; 13032: else 13033: ret = ctxt->errNo; 13034: } else { 13035: if (lst != NULL) { 13036: xmlNodePtr cur; 13037: 13038: /* 13039: * Return the newly created nodeset after unlinking it from 13040: * they pseudo parent. 13041: */ 13042: cur = newDoc->children->children; 13043: *lst = cur; 13044: while (cur != NULL) { 13045: cur->parent = NULL; 13046: cur = cur->next; 13047: } 13048: newDoc->children->children = NULL; 13049: } 13050: ret = 0; 13051: } 13052: ctxt->sax = oldsax; 13053: ctxt->dict = NULL; 13054: ctxt->attsDefault = NULL; 13055: ctxt->attsSpecial = NULL; 13056: xmlFreeParserCtxt(ctxt); 13057: newDoc->intSubset = NULL; 13058: newDoc->extSubset = NULL; 13059: xmlFreeDoc(newDoc); 13060: 13061: return(ret); 13062: } 13063: 13064: /** 13065: * xmlParseExternalEntityPrivate: 13066: * @doc: the document the chunk pertains to 13067: * @oldctxt: the previous parser context if available 13068: * @sax: the SAX handler bloc (possibly NULL) 13069: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13070: * @depth: Used for loop detection, use 0 13071: * @URL: the URL for the entity to load 13072: * @ID: the System ID for the entity to load 13073: * @list: the return value for the set of parsed nodes 13074: * 13075: * Private version of xmlParseExternalEntity() 13076: * 13077: * Returns 0 if the entity is well formed, -1 in case of args problem and 13078: * the parser error code otherwise 13079: */ 13080: 13081: static xmlParserErrors 13082: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13083: xmlSAXHandlerPtr sax, 13084: void *user_data, int depth, const xmlChar *URL, 13085: const xmlChar *ID, xmlNodePtr *list) { 13086: xmlParserCtxtPtr ctxt; 13087: xmlDocPtr newDoc; 13088: xmlNodePtr newRoot; 13089: xmlSAXHandlerPtr oldsax = NULL; 13090: xmlParserErrors ret = XML_ERR_OK; 13091: xmlChar start[4]; 13092: xmlCharEncoding enc; 13093: 13094: if (((depth > 40) && 13095: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13096: (depth > 1024)) { 13097: return(XML_ERR_ENTITY_LOOP); 13098: } 13099: 13100: if (list != NULL) 13101: *list = NULL; 13102: if ((URL == NULL) && (ID == NULL)) 13103: return(XML_ERR_INTERNAL_ERROR); 13104: if (doc == NULL) 13105: return(XML_ERR_INTERNAL_ERROR); 13106: 13107: 13108: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13109: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13110: ctxt->userData = ctxt; 13111: if (oldctxt != NULL) { 13112: ctxt->_private = oldctxt->_private; 13113: ctxt->loadsubset = oldctxt->loadsubset; 13114: ctxt->validate = oldctxt->validate; 13115: ctxt->external = oldctxt->external; 13116: ctxt->record_info = oldctxt->record_info; 13117: ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13118: ctxt->node_seq.length = oldctxt->node_seq.length; 13119: ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13120: } else { 13121: /* 13122: * Doing validity checking on chunk without context 13123: * doesn't make sense 13124: */ 13125: ctxt->_private = NULL; 13126: ctxt->validate = 0; 13127: ctxt->external = 2; 13128: ctxt->loadsubset = 0; 13129: } 13130: if (sax != NULL) { 13131: oldsax = ctxt->sax; 13132: ctxt->sax = sax; 13133: if (user_data != NULL) 13134: ctxt->userData = user_data; 13135: } 13136: xmlDetectSAX2(ctxt); 13137: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13138: if (newDoc == NULL) { 13139: ctxt->node_seq.maximum = 0; 13140: ctxt->node_seq.length = 0; 13141: ctxt->node_seq.buffer = NULL; 13142: xmlFreeParserCtxt(ctxt); 13143: return(XML_ERR_INTERNAL_ERROR); 13144: } 13145: newDoc->properties = XML_DOC_INTERNAL; 13146: newDoc->intSubset = doc->intSubset; 13147: newDoc->extSubset = doc->extSubset; 13148: newDoc->dict = doc->dict; 13149: xmlDictReference(newDoc->dict); 13150: 13151: if (doc->URL != NULL) { 13152: newDoc->URL = xmlStrdup(doc->URL); 13153: } 13154: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13155: if (newRoot == NULL) { 13156: if (sax != NULL) 13157: ctxt->sax = oldsax; 13158: ctxt->node_seq.maximum = 0; 13159: ctxt->node_seq.length = 0; 13160: ctxt->node_seq.buffer = NULL; 13161: xmlFreeParserCtxt(ctxt); 13162: newDoc->intSubset = NULL; 13163: newDoc->extSubset = NULL; 13164: xmlFreeDoc(newDoc); 13165: return(XML_ERR_INTERNAL_ERROR); 13166: } 13167: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13168: nodePush(ctxt, newDoc->children); 13169: ctxt->myDoc = doc; 13170: newRoot->doc = doc; 13171: 13172: /* 13173: * Get the 4 first bytes and decode the charset 13174: * if enc != XML_CHAR_ENCODING_NONE 13175: * plug some encoding conversion routines. 13176: */ 13177: GROW; 13178: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13179: start[0] = RAW; 13180: start[1] = NXT(1); 13181: start[2] = NXT(2); 13182: start[3] = NXT(3); 13183: enc = xmlDetectCharEncoding(start, 4); 13184: if (enc != XML_CHAR_ENCODING_NONE) { 13185: xmlSwitchEncoding(ctxt, enc); 13186: } 13187: } 13188: 13189: /* 13190: * Parse a possible text declaration first 13191: */ 13192: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13193: xmlParseTextDecl(ctxt); 13194: } 13195: 13196: ctxt->instate = XML_PARSER_CONTENT; 13197: ctxt->depth = depth; 13198: 13199: xmlParseContent(ctxt); 13200: 13201: if ((RAW == '<') && (NXT(1) == '/')) { 13202: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13203: } else if (RAW != 0) { 13204: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13205: } 13206: if (ctxt->node != newDoc->children) { 13207: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13208: } 13209: 13210: if (!ctxt->wellFormed) { 13211: if (ctxt->errNo == 0) 13212: ret = XML_ERR_INTERNAL_ERROR; 13213: else 13214: ret = (xmlParserErrors)ctxt->errNo; 13215: } else { 13216: if (list != NULL) { 13217: xmlNodePtr cur; 13218: 13219: /* 13220: * Return the newly created nodeset after unlinking it from 13221: * they pseudo parent. 13222: */ 13223: cur = newDoc->children->children; 13224: *list = cur; 13225: while (cur != NULL) { 13226: cur->parent = NULL; 13227: cur = cur->next; 13228: } 13229: newDoc->children->children = NULL; 13230: } 13231: ret = XML_ERR_OK; 13232: } 13233: 13234: /* 13235: * Record in the parent context the number of entities replacement 13236: * done when parsing that reference. 13237: */ 13238: if (oldctxt != NULL) 13239: oldctxt->nbentities += ctxt->nbentities; 13240: 13241: /* 13242: * Also record the size of the entity parsed 13243: */ 13244: if (ctxt->input != NULL) { 13245: oldctxt->sizeentities += ctxt->input->consumed; 13246: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13247: } 13248: /* 13249: * And record the last error if any 13250: */ 13251: if (ctxt->lastError.code != XML_ERR_OK) 13252: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13253: 13254: if (sax != NULL) 13255: ctxt->sax = oldsax; 13256: oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13257: oldctxt->node_seq.length = ctxt->node_seq.length; 13258: oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13259: ctxt->node_seq.maximum = 0; 13260: ctxt->node_seq.length = 0; 13261: ctxt->node_seq.buffer = NULL; 13262: xmlFreeParserCtxt(ctxt); 13263: newDoc->intSubset = NULL; 13264: newDoc->extSubset = NULL; 13265: xmlFreeDoc(newDoc); 13266: 13267: return(ret); 13268: } 13269: 13270: #ifdef LIBXML_SAX1_ENABLED 13271: /** 13272: * xmlParseExternalEntity: 13273: * @doc: the document the chunk pertains to 13274: * @sax: the SAX handler bloc (possibly NULL) 13275: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13276: * @depth: Used for loop detection, use 0 13277: * @URL: the URL for the entity to load 13278: * @ID: the System ID for the entity to load 13279: * @lst: the return value for the set of parsed nodes 13280: * 13281: * Parse an external general entity 13282: * An external general parsed entity is well-formed if it matches the 13283: * production labeled extParsedEnt. 13284: * 13285: * [78] extParsedEnt ::= TextDecl? content 13286: * 13287: * Returns 0 if the entity is well formed, -1 in case of args problem and 13288: * the parser error code otherwise 13289: */ 13290: 13291: int 13292: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13293: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13294: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13295: ID, lst)); 13296: } 13297: 13298: /** 13299: * xmlParseBalancedChunkMemory: 13300: * @doc: the document the chunk pertains to 13301: * @sax: the SAX handler bloc (possibly NULL) 13302: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13303: * @depth: Used for loop detection, use 0 13304: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13305: * @lst: the return value for the set of parsed nodes 13306: * 13307: * Parse a well-balanced chunk of an XML document 13308: * called by the parser 13309: * The allowed sequence for the Well Balanced Chunk is the one defined by 13310: * the content production in the XML grammar: 13311: * 13312: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13313: * 13314: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13315: * the parser error code otherwise 13316: */ 13317: 13318: int 13319: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13320: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13321: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13322: depth, string, lst, 0 ); 13323: } 13324: #endif /* LIBXML_SAX1_ENABLED */ 13325: 13326: /** 13327: * xmlParseBalancedChunkMemoryInternal: 13328: * @oldctxt: the existing parsing context 13329: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13330: * @user_data: the user data field for the parser context 13331: * @lst: the return value for the set of parsed nodes 13332: * 13333: * 13334: * Parse a well-balanced chunk of an XML document 13335: * called by the parser 13336: * The allowed sequence for the Well Balanced Chunk is the one defined by 13337: * the content production in the XML grammar: 13338: * 13339: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13340: * 13341: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13342: * error code otherwise 13343: * 13344: * In case recover is set to 1, the nodelist will not be empty even if 13345: * the parsed chunk is not well balanced. 13346: */ 13347: static xmlParserErrors 13348: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13349: const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13350: xmlParserCtxtPtr ctxt; 13351: xmlDocPtr newDoc = NULL; 13352: xmlNodePtr newRoot; 13353: xmlSAXHandlerPtr oldsax = NULL; 13354: xmlNodePtr content = NULL; 13355: xmlNodePtr last = NULL; 13356: int size; 13357: xmlParserErrors ret = XML_ERR_OK; 13358: #ifdef SAX2 13359: int i; 13360: #endif 13361: 13362: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13363: (oldctxt->depth > 1024)) { 13364: return(XML_ERR_ENTITY_LOOP); 13365: } 13366: 13367: 13368: if (lst != NULL) 13369: *lst = NULL; 13370: if (string == NULL) 13371: return(XML_ERR_INTERNAL_ERROR); 13372: 13373: size = xmlStrlen(string); 13374: 13375: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13376: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13377: if (user_data != NULL) 13378: ctxt->userData = user_data; 13379: else 13380: ctxt->userData = ctxt; 13381: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13382: ctxt->dict = oldctxt->dict; 13383: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13384: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13385: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13386: 13387: #ifdef SAX2 13388: /* propagate namespaces down the entity */ 13389: for (i = 0;i < oldctxt->nsNr;i += 2) { 13390: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13391: } 13392: #endif 13393: 13394: oldsax = ctxt->sax; 13395: ctxt->sax = oldctxt->sax; 13396: xmlDetectSAX2(ctxt); 13397: ctxt->replaceEntities = oldctxt->replaceEntities; 13398: ctxt->options = oldctxt->options; 13399: 13400: ctxt->_private = oldctxt->_private; 13401: if (oldctxt->myDoc == NULL) { 13402: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13403: if (newDoc == NULL) { 13404: ctxt->sax = oldsax; 13405: ctxt->dict = NULL; 13406: xmlFreeParserCtxt(ctxt); 13407: return(XML_ERR_INTERNAL_ERROR); 13408: } 13409: newDoc->properties = XML_DOC_INTERNAL; 13410: newDoc->dict = ctxt->dict; 13411: xmlDictReference(newDoc->dict); 13412: ctxt->myDoc = newDoc; 13413: } else { 13414: ctxt->myDoc = oldctxt->myDoc; 13415: content = ctxt->myDoc->children; 13416: last = ctxt->myDoc->last; 13417: } 13418: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13419: if (newRoot == NULL) { 13420: ctxt->sax = oldsax; 13421: ctxt->dict = NULL; 13422: xmlFreeParserCtxt(ctxt); 13423: if (newDoc != NULL) { 13424: xmlFreeDoc(newDoc); 13425: } 13426: return(XML_ERR_INTERNAL_ERROR); 13427: } 13428: ctxt->myDoc->children = NULL; 13429: ctxt->myDoc->last = NULL; 13430: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13431: nodePush(ctxt, ctxt->myDoc->children); 13432: ctxt->instate = XML_PARSER_CONTENT; 13433: ctxt->depth = oldctxt->depth + 1; 13434: 13435: ctxt->validate = 0; 13436: ctxt->loadsubset = oldctxt->loadsubset; 13437: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13438: /* 13439: * ID/IDREF registration will be done in xmlValidateElement below 13440: */ 13441: ctxt->loadsubset |= XML_SKIP_IDS; 13442: } 13443: ctxt->dictNames = oldctxt->dictNames; 13444: ctxt->attsDefault = oldctxt->attsDefault; 13445: ctxt->attsSpecial = oldctxt->attsSpecial; 13446: 13447: xmlParseContent(ctxt); 13448: if ((RAW == '<') && (NXT(1) == '/')) { 13449: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13450: } else if (RAW != 0) { 13451: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13452: } 13453: if (ctxt->node != ctxt->myDoc->children) { 13454: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13455: } 13456: 13457: if (!ctxt->wellFormed) { 13458: if (ctxt->errNo == 0) 13459: ret = XML_ERR_INTERNAL_ERROR; 13460: else 13461: ret = (xmlParserErrors)ctxt->errNo; 13462: } else { 13463: ret = XML_ERR_OK; 13464: } 13465: 13466: if ((lst != NULL) && (ret == XML_ERR_OK)) { 13467: xmlNodePtr cur; 13468: 13469: /* 13470: * Return the newly created nodeset after unlinking it from 13471: * they pseudo parent. 13472: */ 13473: cur = ctxt->myDoc->children->children; 13474: *lst = cur; 13475: while (cur != NULL) { 13476: #ifdef LIBXML_VALID_ENABLED 13477: if ((oldctxt->validate) && (oldctxt->wellFormed) && 13478: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13479: (cur->type == XML_ELEMENT_NODE)) { 13480: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13481: oldctxt->myDoc, cur); 13482: } 13483: #endif /* LIBXML_VALID_ENABLED */ 13484: cur->parent = NULL; 13485: cur = cur->next; 13486: } 13487: ctxt->myDoc->children->children = NULL; 13488: } 13489: if (ctxt->myDoc != NULL) { 13490: xmlFreeNode(ctxt->myDoc->children); 13491: ctxt->myDoc->children = content; 13492: ctxt->myDoc->last = last; 13493: } 13494: 13495: /* 13496: * Record in the parent context the number of entities replacement 13497: * done when parsing that reference. 13498: */ 13499: if (oldctxt != NULL) 13500: oldctxt->nbentities += ctxt->nbentities; 13501: 13502: /* 13503: * Also record the last error if any 13504: */ 13505: if (ctxt->lastError.code != XML_ERR_OK) 13506: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13507: 13508: ctxt->sax = oldsax; 13509: ctxt->dict = NULL; 13510: ctxt->attsDefault = NULL; 13511: ctxt->attsSpecial = NULL; 13512: xmlFreeParserCtxt(ctxt); 13513: if (newDoc != NULL) { 13514: xmlFreeDoc(newDoc); 13515: } 13516: 13517: return(ret); 13518: } 13519: 13520: /** 13521: * xmlParseInNodeContext: 13522: * @node: the context node 13523: * @data: the input string 13524: * @datalen: the input string length in bytes 13525: * @options: a combination of xmlParserOption 13526: * @lst: the return value for the set of parsed nodes 13527: * 13528: * Parse a well-balanced chunk of an XML document 13529: * within the context (DTD, namespaces, etc ...) of the given node. 13530: * 13531: * The allowed sequence for the data is a Well Balanced Chunk defined by 13532: * the content production in the XML grammar: 13533: * 13534: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13535: * 13536: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13537: * error code otherwise 13538: */ 13539: xmlParserErrors 13540: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13541: int options, xmlNodePtr *lst) { 13542: #ifdef SAX2 13543: xmlParserCtxtPtr ctxt; 13544: xmlDocPtr doc = NULL; 13545: xmlNodePtr fake, cur; 13546: int nsnr = 0; 13547: 13548: xmlParserErrors ret = XML_ERR_OK; 13549: 13550: /* 13551: * check all input parameters, grab the document 13552: */ 13553: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13554: return(XML_ERR_INTERNAL_ERROR); 13555: switch (node->type) { 13556: case XML_ELEMENT_NODE: 13557: case XML_ATTRIBUTE_NODE: 13558: case XML_TEXT_NODE: 13559: case XML_CDATA_SECTION_NODE: 13560: case XML_ENTITY_REF_NODE: 13561: case XML_PI_NODE: 13562: case XML_COMMENT_NODE: 13563: case XML_DOCUMENT_NODE: 13564: case XML_HTML_DOCUMENT_NODE: 13565: break; 13566: default: 13567: return(XML_ERR_INTERNAL_ERROR); 13568: 13569: } 13570: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13571: (node->type != XML_DOCUMENT_NODE) && 13572: (node->type != XML_HTML_DOCUMENT_NODE)) 13573: node = node->parent; 13574: if (node == NULL) 13575: return(XML_ERR_INTERNAL_ERROR); 13576: if (node->type == XML_ELEMENT_NODE) 13577: doc = node->doc; 13578: else 13579: doc = (xmlDocPtr) node; 13580: if (doc == NULL) 13581: return(XML_ERR_INTERNAL_ERROR); 13582: 13583: /* 13584: * allocate a context and set-up everything not related to the 13585: * node position in the tree 13586: */ 13587: if (doc->type == XML_DOCUMENT_NODE) 13588: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13589: #ifdef LIBXML_HTML_ENABLED 13590: else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13591: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13592: /* 13593: * When parsing in context, it makes no sense to add implied 13594: * elements like html/body/etc... 13595: */ 13596: options |= HTML_PARSE_NOIMPLIED; 13597: } 13598: #endif 13599: else 13600: return(XML_ERR_INTERNAL_ERROR); 13601: 13602: if (ctxt == NULL) 13603: return(XML_ERR_NO_MEMORY); 13604: 13605: /* 13606: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13607: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13608: * we must wait until the last moment to free the original one. 13609: */ 13610: if (doc->dict != NULL) { 13611: if (ctxt->dict != NULL) 13612: xmlDictFree(ctxt->dict); 13613: ctxt->dict = doc->dict; 13614: } else 13615: options |= XML_PARSE_NODICT; 13616: 13617: if (doc->encoding != NULL) { 13618: xmlCharEncodingHandlerPtr hdlr; 13619: 13620: if (ctxt->encoding != NULL) 13621: xmlFree((xmlChar *) ctxt->encoding); 13622: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13623: 13624: hdlr = xmlFindCharEncodingHandler(doc->encoding); 13625: if (hdlr != NULL) { 13626: xmlSwitchToEncoding(ctxt, hdlr); 13627: } else { 13628: return(XML_ERR_UNSUPPORTED_ENCODING); 13629: } 13630: } 13631: 13632: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13633: xmlDetectSAX2(ctxt); 13634: ctxt->myDoc = doc; 13635: 13636: fake = xmlNewComment(NULL); 13637: if (fake == NULL) { 13638: xmlFreeParserCtxt(ctxt); 13639: return(XML_ERR_NO_MEMORY); 13640: } 13641: xmlAddChild(node, fake); 13642: 13643: if (node->type == XML_ELEMENT_NODE) { 13644: nodePush(ctxt, node); 13645: /* 13646: * initialize the SAX2 namespaces stack 13647: */ 13648: cur = node; 13649: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13650: xmlNsPtr ns = cur->nsDef; 13651: const xmlChar *iprefix, *ihref; 13652: 13653: while (ns != NULL) { 13654: if (ctxt->dict) { 13655: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13656: ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13657: } else { 13658: iprefix = ns->prefix; 13659: ihref = ns->href; 13660: } 13661: 13662: if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13663: nsPush(ctxt, iprefix, ihref); 13664: nsnr++; 13665: } 13666: ns = ns->next; 13667: } 13668: cur = cur->parent; 13669: } 13670: ctxt->instate = XML_PARSER_CONTENT; 13671: } 13672: 13673: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13674: /* 13675: * ID/IDREF registration will be done in xmlValidateElement below 13676: */ 13677: ctxt->loadsubset |= XML_SKIP_IDS; 13678: } 13679: 13680: #ifdef LIBXML_HTML_ENABLED 13681: if (doc->type == XML_HTML_DOCUMENT_NODE) 13682: __htmlParseContent(ctxt); 13683: else 13684: #endif 13685: xmlParseContent(ctxt); 13686: 13687: nsPop(ctxt, nsnr); 13688: if ((RAW == '<') && (NXT(1) == '/')) { 13689: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13690: } else if (RAW != 0) { 13691: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13692: } 13693: if ((ctxt->node != NULL) && (ctxt->node != node)) { 13694: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13695: ctxt->wellFormed = 0; 13696: } 13697: 13698: if (!ctxt->wellFormed) { 13699: if (ctxt->errNo == 0) 13700: ret = XML_ERR_INTERNAL_ERROR; 13701: else 13702: ret = (xmlParserErrors)ctxt->errNo; 13703: } else { 13704: ret = XML_ERR_OK; 13705: } 13706: 13707: /* 13708: * Return the newly created nodeset after unlinking it from 13709: * the pseudo sibling. 13710: */ 13711: 13712: cur = fake->next; 13713: fake->next = NULL; 13714: node->last = fake; 13715: 13716: if (cur != NULL) { 13717: cur->prev = NULL; 13718: } 13719: 13720: *lst = cur; 13721: 13722: while (cur != NULL) { 13723: cur->parent = NULL; 13724: cur = cur->next; 13725: } 13726: 13727: xmlUnlinkNode(fake); 13728: xmlFreeNode(fake); 13729: 13730: 13731: if (ret != XML_ERR_OK) { 13732: xmlFreeNodeList(*lst); 13733: *lst = NULL; 13734: } 13735: 13736: if (doc->dict != NULL) 13737: ctxt->dict = NULL; 13738: xmlFreeParserCtxt(ctxt); 13739: 13740: return(ret); 13741: #else /* !SAX2 */ 13742: return(XML_ERR_INTERNAL_ERROR); 13743: #endif 13744: } 13745: 13746: #ifdef LIBXML_SAX1_ENABLED 13747: /** 13748: * xmlParseBalancedChunkMemoryRecover: 13749: * @doc: the document the chunk pertains to 13750: * @sax: the SAX handler bloc (possibly NULL) 13751: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13752: * @depth: Used for loop detection, use 0 13753: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13754: * @lst: the return value for the set of parsed nodes 13755: * @recover: return nodes even if the data is broken (use 0) 13756: * 13757: * 13758: * Parse a well-balanced chunk of an XML document 13759: * called by the parser 13760: * The allowed sequence for the Well Balanced Chunk is the one defined by 13761: * the content production in the XML grammar: 13762: * 13763: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13764: * 13765: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13766: * the parser error code otherwise 13767: * 13768: * In case recover is set to 1, the nodelist will not be empty even if 13769: * the parsed chunk is not well balanced, assuming the parsing succeeded to 13770: * some extent. 13771: */ 13772: int 13773: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13774: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13775: int recover) { 13776: xmlParserCtxtPtr ctxt; 13777: xmlDocPtr newDoc; 13778: xmlSAXHandlerPtr oldsax = NULL; 13779: xmlNodePtr content, newRoot; 13780: int size; 13781: int ret = 0; 13782: 13783: if (depth > 40) { 13784: return(XML_ERR_ENTITY_LOOP); 13785: } 13786: 13787: 13788: if (lst != NULL) 13789: *lst = NULL; 13790: if (string == NULL) 13791: return(-1); 13792: 13793: size = xmlStrlen(string); 13794: 13795: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13796: if (ctxt == NULL) return(-1); 13797: ctxt->userData = ctxt; 13798: if (sax != NULL) { 13799: oldsax = ctxt->sax; 13800: ctxt->sax = sax; 13801: if (user_data != NULL) 13802: ctxt->userData = user_data; 13803: } 13804: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13805: if (newDoc == NULL) { 13806: xmlFreeParserCtxt(ctxt); 13807: return(-1); 13808: } 13809: newDoc->properties = XML_DOC_INTERNAL; 13810: if ((doc != NULL) && (doc->dict != NULL)) { 13811: xmlDictFree(ctxt->dict); 13812: ctxt->dict = doc->dict; 13813: xmlDictReference(ctxt->dict); 13814: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13815: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13816: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13817: ctxt->dictNames = 1; 13818: } else { 13819: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13820: } 13821: if (doc != NULL) { 13822: newDoc->intSubset = doc->intSubset; 13823: newDoc->extSubset = doc->extSubset; 13824: } 13825: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13826: if (newRoot == NULL) { 13827: if (sax != NULL) 13828: ctxt->sax = oldsax; 13829: xmlFreeParserCtxt(ctxt); 13830: newDoc->intSubset = NULL; 13831: newDoc->extSubset = NULL; 13832: xmlFreeDoc(newDoc); 13833: return(-1); 13834: } 13835: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13836: nodePush(ctxt, newRoot); 13837: if (doc == NULL) { 13838: ctxt->myDoc = newDoc; 13839: } else { 13840: ctxt->myDoc = newDoc; 13841: newDoc->children->doc = doc; 13842: /* Ensure that doc has XML spec namespace */ 13843: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13844: newDoc->oldNs = doc->oldNs; 13845: } 13846: ctxt->instate = XML_PARSER_CONTENT; 13847: ctxt->depth = depth; 13848: 13849: /* 13850: * Doing validity checking on chunk doesn't make sense 13851: */ 13852: ctxt->validate = 0; 13853: ctxt->loadsubset = 0; 13854: xmlDetectSAX2(ctxt); 13855: 13856: if ( doc != NULL ){ 13857: content = doc->children; 13858: doc->children = NULL; 13859: xmlParseContent(ctxt); 13860: doc->children = content; 13861: } 13862: else { 13863: xmlParseContent(ctxt); 13864: } 13865: if ((RAW == '<') && (NXT(1) == '/')) { 13866: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13867: } else if (RAW != 0) { 13868: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13869: } 13870: if (ctxt->node != newDoc->children) { 13871: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13872: } 13873: 13874: if (!ctxt->wellFormed) { 13875: if (ctxt->errNo == 0) 13876: ret = 1; 13877: else 13878: ret = ctxt->errNo; 13879: } else { 13880: ret = 0; 13881: } 13882: 13883: if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13884: xmlNodePtr cur; 13885: 13886: /* 13887: * Return the newly created nodeset after unlinking it from 13888: * they pseudo parent. 13889: */ 13890: cur = newDoc->children->children; 13891: *lst = cur; 13892: while (cur != NULL) { 13893: xmlSetTreeDoc(cur, doc); 13894: cur->parent = NULL; 13895: cur = cur->next; 13896: } 13897: newDoc->children->children = NULL; 13898: } 13899: 13900: if (sax != NULL) 13901: ctxt->sax = oldsax; 13902: xmlFreeParserCtxt(ctxt); 13903: newDoc->intSubset = NULL; 13904: newDoc->extSubset = NULL; 13905: newDoc->oldNs = NULL; 13906: xmlFreeDoc(newDoc); 13907: 13908: return(ret); 13909: } 13910: 13911: /** 13912: * xmlSAXParseEntity: 13913: * @sax: the SAX handler block 13914: * @filename: the filename 13915: * 13916: * parse an XML external entity out of context and build a tree. 13917: * It use the given SAX function block to handle the parsing callback. 13918: * If sax is NULL, fallback to the default DOM tree building routines. 13919: * 13920: * [78] extParsedEnt ::= TextDecl? content 13921: * 13922: * This correspond to a "Well Balanced" chunk 13923: * 13924: * Returns the resulting document tree 13925: */ 13926: 13927: xmlDocPtr 13928: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13929: xmlDocPtr ret; 13930: xmlParserCtxtPtr ctxt; 13931: 13932: ctxt = xmlCreateFileParserCtxt(filename); 13933: if (ctxt == NULL) { 13934: return(NULL); 13935: } 13936: if (sax != NULL) { 13937: if (ctxt->sax != NULL) 13938: xmlFree(ctxt->sax); 13939: ctxt->sax = sax; 13940: ctxt->userData = NULL; 13941: } 13942: 13943: xmlParseExtParsedEnt(ctxt); 13944: 13945: if (ctxt->wellFormed) 13946: ret = ctxt->myDoc; 13947: else { 13948: ret = NULL; 13949: xmlFreeDoc(ctxt->myDoc); 13950: ctxt->myDoc = NULL; 13951: } 13952: if (sax != NULL) 13953: ctxt->sax = NULL; 13954: xmlFreeParserCtxt(ctxt); 13955: 13956: return(ret); 13957: } 13958: 13959: /** 13960: * xmlParseEntity: 13961: * @filename: the filename 13962: * 13963: * parse an XML external entity out of context and build a tree. 13964: * 13965: * [78] extParsedEnt ::= TextDecl? content 13966: * 13967: * This correspond to a "Well Balanced" chunk 13968: * 13969: * Returns the resulting document tree 13970: */ 13971: 13972: xmlDocPtr 13973: xmlParseEntity(const char *filename) { 13974: return(xmlSAXParseEntity(NULL, filename)); 13975: } 13976: #endif /* LIBXML_SAX1_ENABLED */ 13977: 13978: /** 13979: * xmlCreateEntityParserCtxtInternal: 13980: * @URL: the entity URL 13981: * @ID: the entity PUBLIC ID 13982: * @base: a possible base for the target URI 13983: * @pctx: parser context used to set options on new context 13984: * 13985: * Create a parser context for an external entity 13986: * Automatic support for ZLIB/Compress compressed document is provided 13987: * by default if found at compile-time. 13988: * 13989: * Returns the new parser context or NULL 13990: */ 13991: static xmlParserCtxtPtr 13992: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13993: const xmlChar *base, xmlParserCtxtPtr pctx) { 13994: xmlParserCtxtPtr ctxt; 13995: xmlParserInputPtr inputStream; 13996: char *directory = NULL; 13997: xmlChar *uri; 13998: 13999: ctxt = xmlNewParserCtxt(); 14000: if (ctxt == NULL) { 14001: return(NULL); 14002: } 14003: 14004: if (pctx != NULL) { 14005: ctxt->options = pctx->options; 14006: ctxt->_private = pctx->_private; 14007: } 14008: 14009: uri = xmlBuildURI(URL, base); 14010: 14011: if (uri == NULL) { 14012: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14013: if (inputStream == NULL) { 14014: xmlFreeParserCtxt(ctxt); 14015: return(NULL); 14016: } 14017: 14018: inputPush(ctxt, inputStream); 14019: 14020: if ((ctxt->directory == NULL) && (directory == NULL)) 14021: directory = xmlParserGetDirectory((char *)URL); 14022: if ((ctxt->directory == NULL) && (directory != NULL)) 14023: ctxt->directory = directory; 14024: } else { 14025: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14026: if (inputStream == NULL) { 14027: xmlFree(uri); 14028: xmlFreeParserCtxt(ctxt); 14029: return(NULL); 14030: } 14031: 14032: inputPush(ctxt, inputStream); 14033: 14034: if ((ctxt->directory == NULL) && (directory == NULL)) 14035: directory = xmlParserGetDirectory((char *)uri); 14036: if ((ctxt->directory == NULL) && (directory != NULL)) 14037: ctxt->directory = directory; 14038: xmlFree(uri); 14039: } 14040: return(ctxt); 14041: } 14042: 14043: /** 14044: * xmlCreateEntityParserCtxt: 14045: * @URL: the entity URL 14046: * @ID: the entity PUBLIC ID 14047: * @base: a possible base for the target URI 14048: * 14049: * Create a parser context for an external entity 14050: * Automatic support for ZLIB/Compress compressed document is provided 14051: * by default if found at compile-time. 14052: * 14053: * Returns the new parser context or NULL 14054: */ 14055: xmlParserCtxtPtr 14056: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14057: const xmlChar *base) { 14058: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14059: 14060: } 14061: 14062: /************************************************************************ 14063: * * 14064: * Front ends when parsing from a file * 14065: * * 14066: ************************************************************************/ 14067: 14068: /** 14069: * xmlCreateURLParserCtxt: 14070: * @filename: the filename or URL 14071: * @options: a combination of xmlParserOption 14072: * 14073: * Create a parser context for a file or URL content. 14074: * Automatic support for ZLIB/Compress compressed document is provided 14075: * by default if found at compile-time and for file accesses 14076: * 14077: * Returns the new parser context or NULL 14078: */ 14079: xmlParserCtxtPtr 14080: xmlCreateURLParserCtxt(const char *filename, int options) 14081: { 14082: xmlParserCtxtPtr ctxt; 14083: xmlParserInputPtr inputStream; 14084: char *directory = NULL; 14085: 14086: ctxt = xmlNewParserCtxt(); 14087: if (ctxt == NULL) { 14088: xmlErrMemory(NULL, "cannot allocate parser context"); 14089: return(NULL); 14090: } 14091: 14092: if (options) 14093: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14094: ctxt->linenumbers = 1; 14095: 14096: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14097: if (inputStream == NULL) { 14098: xmlFreeParserCtxt(ctxt); 14099: return(NULL); 14100: } 14101: 14102: inputPush(ctxt, inputStream); 14103: if ((ctxt->directory == NULL) && (directory == NULL)) 14104: directory = xmlParserGetDirectory(filename); 14105: if ((ctxt->directory == NULL) && (directory != NULL)) 14106: ctxt->directory = directory; 14107: 14108: return(ctxt); 14109: } 14110: 14111: /** 14112: * xmlCreateFileParserCtxt: 14113: * @filename: the filename 14114: * 14115: * Create a parser context for a file content. 14116: * Automatic support for ZLIB/Compress compressed document is provided 14117: * by default if found at compile-time. 14118: * 14119: * Returns the new parser context or NULL 14120: */ 14121: xmlParserCtxtPtr 14122: xmlCreateFileParserCtxt(const char *filename) 14123: { 14124: return(xmlCreateURLParserCtxt(filename, 0)); 14125: } 14126: 14127: #ifdef LIBXML_SAX1_ENABLED 14128: /** 14129: * xmlSAXParseFileWithData: 14130: * @sax: the SAX handler block 14131: * @filename: the filename 14132: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14133: * documents 14134: * @data: the userdata 14135: * 14136: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14137: * compressed document is provided by default if found at compile-time. 14138: * It use the given SAX function block to handle the parsing callback. 14139: * If sax is NULL, fallback to the default DOM tree building routines. 14140: * 14141: * User data (void *) is stored within the parser context in the 14142: * context's _private member, so it is available nearly everywhere in libxml 14143: * 14144: * Returns the resulting document tree 14145: */ 14146: 14147: xmlDocPtr 14148: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14149: int recovery, void *data) { 14150: xmlDocPtr ret; 14151: xmlParserCtxtPtr ctxt; 14152: 14153: xmlInitParser(); 14154: 14155: ctxt = xmlCreateFileParserCtxt(filename); 14156: if (ctxt == NULL) { 14157: return(NULL); 14158: } 14159: if (sax != NULL) { 14160: if (ctxt->sax != NULL) 14161: xmlFree(ctxt->sax); 14162: ctxt->sax = sax; 14163: } 14164: xmlDetectSAX2(ctxt); 14165: if (data!=NULL) { 14166: ctxt->_private = data; 14167: } 14168: 14169: if (ctxt->directory == NULL) 14170: ctxt->directory = xmlParserGetDirectory(filename); 14171: 14172: ctxt->recovery = recovery; 14173: 14174: xmlParseDocument(ctxt); 14175: 14176: if ((ctxt->wellFormed) || recovery) { 14177: ret = ctxt->myDoc; 14178: if (ret != NULL) { 14179: if (ctxt->input->buf->compressed > 0) 14180: ret->compression = 9; 14181: else 14182: ret->compression = ctxt->input->buf->compressed; 14183: } 14184: } 14185: else { 14186: ret = NULL; 14187: xmlFreeDoc(ctxt->myDoc); 14188: ctxt->myDoc = NULL; 14189: } 14190: if (sax != NULL) 14191: ctxt->sax = NULL; 14192: xmlFreeParserCtxt(ctxt); 14193: 14194: return(ret); 14195: } 14196: 14197: /** 14198: * xmlSAXParseFile: 14199: * @sax: the SAX handler block 14200: * @filename: the filename 14201: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14202: * documents 14203: * 14204: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14205: * compressed document is provided by default if found at compile-time. 14206: * It use the given SAX function block to handle the parsing callback. 14207: * If sax is NULL, fallback to the default DOM tree building routines. 14208: * 14209: * Returns the resulting document tree 14210: */ 14211: 14212: xmlDocPtr 14213: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14214: int recovery) { 14215: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14216: } 14217: 14218: /** 14219: * xmlRecoverDoc: 14220: * @cur: a pointer to an array of xmlChar 14221: * 14222: * parse an XML in-memory document and build a tree. 14223: * In the case the document is not Well Formed, a attempt to build a 14224: * tree is tried anyway 14225: * 14226: * Returns the resulting document tree or NULL in case of failure 14227: */ 14228: 14229: xmlDocPtr 14230: xmlRecoverDoc(const xmlChar *cur) { 14231: return(xmlSAXParseDoc(NULL, cur, 1)); 14232: } 14233: 14234: /** 14235: * xmlParseFile: 14236: * @filename: the filename 14237: * 14238: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14239: * compressed document is provided by default if found at compile-time. 14240: * 14241: * Returns the resulting document tree if the file was wellformed, 14242: * NULL otherwise. 14243: */ 14244: 14245: xmlDocPtr 14246: xmlParseFile(const char *filename) { 14247: return(xmlSAXParseFile(NULL, filename, 0)); 14248: } 14249: 14250: /** 14251: * xmlRecoverFile: 14252: * @filename: the filename 14253: * 14254: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14255: * compressed document is provided by default if found at compile-time. 14256: * In the case the document is not Well Formed, it attempts to build 14257: * a tree anyway 14258: * 14259: * Returns the resulting document tree or NULL in case of failure 14260: */ 14261: 14262: xmlDocPtr 14263: xmlRecoverFile(const char *filename) { 14264: return(xmlSAXParseFile(NULL, filename, 1)); 14265: } 14266: 14267: 14268: /** 14269: * xmlSetupParserForBuffer: 14270: * @ctxt: an XML parser context 14271: * @buffer: a xmlChar * buffer 14272: * @filename: a file name 14273: * 14274: * Setup the parser context to parse a new buffer; Clears any prior 14275: * contents from the parser context. The buffer parameter must not be 14276: * NULL, but the filename parameter can be 14277: */ 14278: void 14279: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14280: const char* filename) 14281: { 14282: xmlParserInputPtr input; 14283: 14284: if ((ctxt == NULL) || (buffer == NULL)) 14285: return; 14286: 14287: input = xmlNewInputStream(ctxt); 14288: if (input == NULL) { 14289: xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14290: xmlClearParserCtxt(ctxt); 14291: return; 14292: } 14293: 14294: xmlClearParserCtxt(ctxt); 14295: if (filename != NULL) 14296: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14297: input->base = buffer; 14298: input->cur = buffer; 14299: input->end = &buffer[xmlStrlen(buffer)]; 14300: inputPush(ctxt, input); 14301: } 14302: 14303: /** 14304: * xmlSAXUserParseFile: 14305: * @sax: a SAX handler 14306: * @user_data: The user data returned on SAX callbacks 14307: * @filename: a file name 14308: * 14309: * parse an XML file and call the given SAX handler routines. 14310: * Automatic support for ZLIB/Compress compressed document is provided 14311: * 14312: * Returns 0 in case of success or a error number otherwise 14313: */ 14314: int 14315: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14316: const char *filename) { 14317: int ret = 0; 14318: xmlParserCtxtPtr ctxt; 14319: 14320: ctxt = xmlCreateFileParserCtxt(filename); 14321: if (ctxt == NULL) return -1; 14322: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14323: xmlFree(ctxt->sax); 14324: ctxt->sax = sax; 14325: xmlDetectSAX2(ctxt); 14326: 14327: if (user_data != NULL) 14328: ctxt->userData = user_data; 14329: 14330: xmlParseDocument(ctxt); 14331: 14332: if (ctxt->wellFormed) 14333: ret = 0; 14334: else { 14335: if (ctxt->errNo != 0) 14336: ret = ctxt->errNo; 14337: else 14338: ret = -1; 14339: } 14340: if (sax != NULL) 14341: ctxt->sax = NULL; 14342: if (ctxt->myDoc != NULL) { 14343: xmlFreeDoc(ctxt->myDoc); 14344: ctxt->myDoc = NULL; 14345: } 14346: xmlFreeParserCtxt(ctxt); 14347: 14348: return ret; 14349: } 14350: #endif /* LIBXML_SAX1_ENABLED */ 14351: 14352: /************************************************************************ 14353: * * 14354: * Front ends when parsing from memory * 14355: * * 14356: ************************************************************************/ 14357: 14358: /** 14359: * xmlCreateMemoryParserCtxt: 14360: * @buffer: a pointer to a char array 14361: * @size: the size of the array 14362: * 14363: * Create a parser context for an XML in-memory document. 14364: * 14365: * Returns the new parser context or NULL 14366: */ 14367: xmlParserCtxtPtr 14368: xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14369: xmlParserCtxtPtr ctxt; 14370: xmlParserInputPtr input; 14371: xmlParserInputBufferPtr buf; 14372: 14373: if (buffer == NULL) 14374: return(NULL); 14375: if (size <= 0) 14376: return(NULL); 14377: 14378: ctxt = xmlNewParserCtxt(); 14379: if (ctxt == NULL) 14380: return(NULL); 14381: 14382: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14383: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14384: if (buf == NULL) { 14385: xmlFreeParserCtxt(ctxt); 14386: return(NULL); 14387: } 14388: 14389: input = xmlNewInputStream(ctxt); 14390: if (input == NULL) { 14391: xmlFreeParserInputBuffer(buf); 14392: xmlFreeParserCtxt(ctxt); 14393: return(NULL); 14394: } 14395: 14396: input->filename = NULL; 14397: input->buf = buf; 14398: xmlBufResetInput(input->buf->buffer, input); 14399: 14400: inputPush(ctxt, input); 14401: return(ctxt); 14402: } 14403: 14404: #ifdef LIBXML_SAX1_ENABLED 14405: /** 14406: * xmlSAXParseMemoryWithData: 14407: * @sax: the SAX handler block 14408: * @buffer: an pointer to a char array 14409: * @size: the size of the array 14410: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14411: * documents 14412: * @data: the userdata 14413: * 14414: * parse an XML in-memory block and use the given SAX function block 14415: * to handle the parsing callback. If sax is NULL, fallback to the default 14416: * DOM tree building routines. 14417: * 14418: * User data (void *) is stored within the parser context in the 14419: * context's _private member, so it is available nearly everywhere in libxml 14420: * 14421: * Returns the resulting document tree 14422: */ 14423: 14424: xmlDocPtr 14425: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14426: int size, int recovery, void *data) { 14427: xmlDocPtr ret; 14428: xmlParserCtxtPtr ctxt; 14429: 14430: xmlInitParser(); 14431: 14432: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14433: if (ctxt == NULL) return(NULL); 14434: if (sax != NULL) { 14435: if (ctxt->sax != NULL) 14436: xmlFree(ctxt->sax); 14437: ctxt->sax = sax; 14438: } 14439: xmlDetectSAX2(ctxt); 14440: if (data!=NULL) { 14441: ctxt->_private=data; 14442: } 14443: 14444: ctxt->recovery = recovery; 14445: 14446: xmlParseDocument(ctxt); 14447: 14448: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14449: else { 14450: ret = NULL; 14451: xmlFreeDoc(ctxt->myDoc); 14452: ctxt->myDoc = NULL; 14453: } 14454: if (sax != NULL) 14455: ctxt->sax = NULL; 14456: xmlFreeParserCtxt(ctxt); 14457: 14458: return(ret); 14459: } 14460: 14461: /** 14462: * xmlSAXParseMemory: 14463: * @sax: the SAX handler block 14464: * @buffer: an pointer to a char array 14465: * @size: the size of the array 14466: * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14467: * documents 14468: * 14469: * parse an XML in-memory block and use the given SAX function block 14470: * to handle the parsing callback. If sax is NULL, fallback to the default 14471: * DOM tree building routines. 14472: * 14473: * Returns the resulting document tree 14474: */ 14475: xmlDocPtr 14476: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14477: int size, int recovery) { 14478: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14479: } 14480: 14481: /** 14482: * xmlParseMemory: 14483: * @buffer: an pointer to a char array 14484: * @size: the size of the array 14485: * 14486: * parse an XML in-memory block and build a tree. 14487: * 14488: * Returns the resulting document tree 14489: */ 14490: 14491: xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14492: return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14493: } 14494: 14495: /** 14496: * xmlRecoverMemory: 14497: * @buffer: an pointer to a char array 14498: * @size: the size of the array 14499: * 14500: * parse an XML in-memory block and build a tree. 14501: * In the case the document is not Well Formed, an attempt to 14502: * build a tree is tried anyway 14503: * 14504: * Returns the resulting document tree or NULL in case of error 14505: */ 14506: 14507: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14508: return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14509: } 14510: 14511: /** 14512: * xmlSAXUserParseMemory: 14513: * @sax: a SAX handler 14514: * @user_data: The user data returned on SAX callbacks 14515: * @buffer: an in-memory XML document input 14516: * @size: the length of the XML document in bytes 14517: * 14518: * A better SAX parsing routine. 14519: * parse an XML in-memory buffer and call the given SAX handler routines. 14520: * 14521: * Returns 0 in case of success or a error number otherwise 14522: */ 14523: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14524: const char *buffer, int size) { 14525: int ret = 0; 14526: xmlParserCtxtPtr ctxt; 14527: 14528: xmlInitParser(); 14529: 14530: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14531: if (ctxt == NULL) return -1; 14532: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14533: xmlFree(ctxt->sax); 14534: ctxt->sax = sax; 14535: xmlDetectSAX2(ctxt); 14536: 14537: if (user_data != NULL) 14538: ctxt->userData = user_data; 14539: 14540: xmlParseDocument(ctxt); 14541: 14542: if (ctxt->wellFormed) 14543: ret = 0; 14544: else { 14545: if (ctxt->errNo != 0) 14546: ret = ctxt->errNo; 14547: else 14548: ret = -1; 14549: } 14550: if (sax != NULL) 14551: ctxt->sax = NULL; 14552: if (ctxt->myDoc != NULL) { 14553: xmlFreeDoc(ctxt->myDoc); 14554: ctxt->myDoc = NULL; 14555: } 14556: xmlFreeParserCtxt(ctxt); 14557: 14558: return ret; 14559: } 14560: #endif /* LIBXML_SAX1_ENABLED */ 14561: 14562: /** 14563: * xmlCreateDocParserCtxt: 14564: * @cur: a pointer to an array of xmlChar 14565: * 14566: * Creates a parser context for an XML in-memory document. 14567: * 14568: * Returns the new parser context or NULL 14569: */ 14570: xmlParserCtxtPtr 14571: xmlCreateDocParserCtxt(const xmlChar *cur) { 14572: int len; 14573: 14574: if (cur == NULL) 14575: return(NULL); 14576: len = xmlStrlen(cur); 14577: return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14578: } 14579: 14580: #ifdef LIBXML_SAX1_ENABLED 14581: /** 14582: * xmlSAXParseDoc: 14583: * @sax: the SAX handler block 14584: * @cur: a pointer to an array of xmlChar 14585: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14586: * documents 14587: * 14588: * parse an XML in-memory document and build a tree. 14589: * It use the given SAX function block to handle the parsing callback. 14590: * If sax is NULL, fallback to the default DOM tree building routines. 14591: * 14592: * Returns the resulting document tree 14593: */ 14594: 14595: xmlDocPtr 14596: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14597: xmlDocPtr ret; 14598: xmlParserCtxtPtr ctxt; 14599: xmlSAXHandlerPtr oldsax = NULL; 14600: 14601: if (cur == NULL) return(NULL); 14602: 14603: 14604: ctxt = xmlCreateDocParserCtxt(cur); 14605: if (ctxt == NULL) return(NULL); 14606: if (sax != NULL) { 14607: oldsax = ctxt->sax; 14608: ctxt->sax = sax; 14609: ctxt->userData = NULL; 14610: } 14611: xmlDetectSAX2(ctxt); 14612: 14613: xmlParseDocument(ctxt); 14614: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14615: else { 14616: ret = NULL; 14617: xmlFreeDoc(ctxt->myDoc); 14618: ctxt->myDoc = NULL; 14619: } 14620: if (sax != NULL) 14621: ctxt->sax = oldsax; 14622: xmlFreeParserCtxt(ctxt); 14623: 14624: return(ret); 14625: } 14626: 14627: /** 14628: * xmlParseDoc: 14629: * @cur: a pointer to an array of xmlChar 14630: * 14631: * parse an XML in-memory document and build a tree. 14632: * 14633: * Returns the resulting document tree 14634: */ 14635: 14636: xmlDocPtr 14637: xmlParseDoc(const xmlChar *cur) { 14638: return(xmlSAXParseDoc(NULL, cur, 0)); 14639: } 14640: #endif /* LIBXML_SAX1_ENABLED */ 14641: 14642: #ifdef LIBXML_LEGACY_ENABLED 14643: /************************************************************************ 14644: * * 14645: * Specific function to keep track of entities references * 14646: * and used by the XSLT debugger * 14647: * * 14648: ************************************************************************/ 14649: 14650: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14651: 14652: /** 14653: * xmlAddEntityReference: 14654: * @ent : A valid entity 14655: * @firstNode : A valid first node for children of entity 14656: * @lastNode : A valid last node of children entity 14657: * 14658: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14659: */ 14660: static void 14661: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14662: xmlNodePtr lastNode) 14663: { 14664: if (xmlEntityRefFunc != NULL) { 14665: (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14666: } 14667: } 14668: 14669: 14670: /** 14671: * xmlSetEntityReferenceFunc: 14672: * @func: A valid function 14673: * 14674: * Set the function to call call back when a xml reference has been made 14675: */ 14676: void 14677: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14678: { 14679: xmlEntityRefFunc = func; 14680: } 14681: #endif /* LIBXML_LEGACY_ENABLED */ 14682: 14683: /************************************************************************ 14684: * * 14685: * Miscellaneous * 14686: * * 14687: ************************************************************************/ 14688: 14689: #ifdef LIBXML_XPATH_ENABLED 14690: #include <libxml/xpath.h> 14691: #endif 14692: 14693: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14694: static int xmlParserInitialized = 0; 14695: 14696: /** 14697: * xmlInitParser: 14698: * 14699: * Initialization function for the XML parser. 14700: * This is not reentrant. Call once before processing in case of 14701: * use in multithreaded programs. 14702: */ 14703: 14704: void 14705: xmlInitParser(void) { 14706: if (xmlParserInitialized != 0) 14707: return; 14708: 14709: #ifdef LIBXML_THREAD_ENABLED 14710: __xmlGlobalInitMutexLock(); 14711: if (xmlParserInitialized == 0) { 14712: #endif 14713: xmlInitThreads(); 14714: xmlInitGlobals(); 14715: if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14716: (xmlGenericError == NULL)) 14717: initGenericErrorDefaultFunc(NULL); 14718: xmlInitMemory(); 14719: xmlInitializeDict(); 14720: xmlInitCharEncodingHandlers(); 14721: xmlDefaultSAXHandlerInit(); 14722: xmlRegisterDefaultInputCallbacks(); 14723: #ifdef LIBXML_OUTPUT_ENABLED 14724: xmlRegisterDefaultOutputCallbacks(); 14725: #endif /* LIBXML_OUTPUT_ENABLED */ 14726: #ifdef LIBXML_HTML_ENABLED 14727: htmlInitAutoClose(); 14728: htmlDefaultSAXHandlerInit(); 14729: #endif 14730: #ifdef LIBXML_XPATH_ENABLED 14731: xmlXPathInit(); 14732: #endif 14733: xmlParserInitialized = 1; 14734: #ifdef LIBXML_THREAD_ENABLED 14735: } 14736: __xmlGlobalInitMutexUnlock(); 14737: #endif 14738: } 14739: 14740: /** 14741: * xmlCleanupParser: 14742: * 14743: * This function name is somewhat misleading. It does not clean up 14744: * parser state, it cleans up memory allocated by the library itself. 14745: * It is a cleanup function for the XML library. It tries to reclaim all 14746: * related global memory allocated for the library processing. 14747: * It doesn't deallocate any document related memory. One should 14748: * call xmlCleanupParser() only when the process has finished using 14749: * the library and all XML/HTML documents built with it. 14750: * See also xmlInitParser() which has the opposite function of preparing 14751: * the library for operations. 14752: * 14753: * WARNING: if your application is multithreaded or has plugin support 14754: * calling this may crash the application if another thread or 14755: * a plugin is still using libxml2. It's sometimes very hard to 14756: * guess if libxml2 is in use in the application, some libraries 14757: * or plugins may use it without notice. In case of doubt abstain 14758: * from calling this function or do it just before calling exit() 14759: * to avoid leak reports from valgrind ! 14760: */ 14761: 14762: void 14763: xmlCleanupParser(void) { 14764: if (!xmlParserInitialized) 14765: return; 14766: 14767: xmlCleanupCharEncodingHandlers(); 14768: #ifdef LIBXML_CATALOG_ENABLED 14769: xmlCatalogCleanup(); 14770: #endif 14771: xmlDictCleanup(); 14772: xmlCleanupInputCallbacks(); 14773: #ifdef LIBXML_OUTPUT_ENABLED 14774: xmlCleanupOutputCallbacks(); 14775: #endif 14776: #ifdef LIBXML_SCHEMAS_ENABLED 14777: xmlSchemaCleanupTypes(); 14778: xmlRelaxNGCleanupTypes(); 14779: #endif 14780: xmlCleanupGlobals(); 14781: xmlResetLastError(); 14782: xmlCleanupThreads(); /* must be last if called not from the main thread */ 14783: xmlCleanupMemory(); 14784: xmlParserInitialized = 0; 14785: } 14786: 14787: /************************************************************************ 14788: * * 14789: * New set (2.6.0) of simpler and more flexible APIs * 14790: * * 14791: ************************************************************************/ 14792: 14793: /** 14794: * DICT_FREE: 14795: * @str: a string 14796: * 14797: * Free a string if it is not owned by the "dict" dictionnary in the 14798: * current scope 14799: */ 14800: #define DICT_FREE(str) \ 14801: if ((str) && ((!dict) || \ 14802: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14803: xmlFree((char *)(str)); 14804: 14805: /** 14806: * xmlCtxtReset: 14807: * @ctxt: an XML parser context 14808: * 14809: * Reset a parser context 14810: */ 14811: void 14812: xmlCtxtReset(xmlParserCtxtPtr ctxt) 14813: { 14814: xmlParserInputPtr input; 14815: xmlDictPtr dict; 14816: 14817: if (ctxt == NULL) 14818: return; 14819: 14820: dict = ctxt->dict; 14821: 14822: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14823: xmlFreeInputStream(input); 14824: } 14825: ctxt->inputNr = 0; 14826: ctxt->input = NULL; 14827: 14828: ctxt->spaceNr = 0; 14829: if (ctxt->spaceTab != NULL) { 14830: ctxt->spaceTab[0] = -1; 14831: ctxt->space = &ctxt->spaceTab[0]; 14832: } else { 14833: ctxt->space = NULL; 14834: } 14835: 14836: 14837: ctxt->nodeNr = 0; 14838: ctxt->node = NULL; 14839: 14840: ctxt->nameNr = 0; 14841: ctxt->name = NULL; 14842: 14843: DICT_FREE(ctxt->version); 14844: ctxt->version = NULL; 14845: DICT_FREE(ctxt->encoding); 14846: ctxt->encoding = NULL; 14847: DICT_FREE(ctxt->directory); 14848: ctxt->directory = NULL; 14849: DICT_FREE(ctxt->extSubURI); 14850: ctxt->extSubURI = NULL; 14851: DICT_FREE(ctxt->extSubSystem); 14852: ctxt->extSubSystem = NULL; 14853: if (ctxt->myDoc != NULL) 14854: xmlFreeDoc(ctxt->myDoc); 14855: ctxt->myDoc = NULL; 14856: 14857: ctxt->standalone = -1; 14858: ctxt->hasExternalSubset = 0; 14859: ctxt->hasPErefs = 0; 14860: ctxt->html = 0; 14861: ctxt->external = 0; 14862: ctxt->instate = XML_PARSER_START; 14863: ctxt->token = 0; 14864: 14865: ctxt->wellFormed = 1; 14866: ctxt->nsWellFormed = 1; 14867: ctxt->disableSAX = 0; 14868: ctxt->valid = 1; 14869: #if 0 14870: ctxt->vctxt.userData = ctxt; 14871: ctxt->vctxt.error = xmlParserValidityError; 14872: ctxt->vctxt.warning = xmlParserValidityWarning; 14873: #endif 14874: ctxt->record_info = 0; 14875: ctxt->nbChars = 0; 14876: ctxt->checkIndex = 0; 14877: ctxt->inSubset = 0; 14878: ctxt->errNo = XML_ERR_OK; 14879: ctxt->depth = 0; 14880: ctxt->charset = XML_CHAR_ENCODING_UTF8; 14881: ctxt->catalogs = NULL; 14882: ctxt->nbentities = 0; 14883: ctxt->sizeentities = 0; 14884: ctxt->sizeentcopy = 0; 14885: xmlInitNodeInfoSeq(&ctxt->node_seq); 14886: 14887: if (ctxt->attsDefault != NULL) { 14888: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14889: ctxt->attsDefault = NULL; 14890: } 14891: if (ctxt->attsSpecial != NULL) { 14892: xmlHashFree(ctxt->attsSpecial, NULL); 14893: ctxt->attsSpecial = NULL; 14894: } 14895: 14896: #ifdef LIBXML_CATALOG_ENABLED 14897: if (ctxt->catalogs != NULL) 14898: xmlCatalogFreeLocal(ctxt->catalogs); 14899: #endif 14900: if (ctxt->lastError.code != XML_ERR_OK) 14901: xmlResetError(&ctxt->lastError); 14902: } 14903: 14904: /** 14905: * xmlCtxtResetPush: 14906: * @ctxt: an XML parser context 14907: * @chunk: a pointer to an array of chars 14908: * @size: number of chars in the array 14909: * @filename: an optional file name or URI 14910: * @encoding: the document encoding, or NULL 14911: * 14912: * Reset a push parser context 14913: * 14914: * Returns 0 in case of success and 1 in case of error 14915: */ 14916: int 14917: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14918: int size, const char *filename, const char *encoding) 14919: { 14920: xmlParserInputPtr inputStream; 14921: xmlParserInputBufferPtr buf; 14922: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14923: 14924: if (ctxt == NULL) 14925: return(1); 14926: 14927: if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14928: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14929: 14930: buf = xmlAllocParserInputBuffer(enc); 14931: if (buf == NULL) 14932: return(1); 14933: 14934: if (ctxt == NULL) { 14935: xmlFreeParserInputBuffer(buf); 14936: return(1); 14937: } 14938: 14939: xmlCtxtReset(ctxt); 14940: 14941: if (ctxt->pushTab == NULL) { 14942: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14943: sizeof(xmlChar *)); 14944: if (ctxt->pushTab == NULL) { 14945: xmlErrMemory(ctxt, NULL); 14946: xmlFreeParserInputBuffer(buf); 14947: return(1); 14948: } 14949: } 14950: 14951: if (filename == NULL) { 14952: ctxt->directory = NULL; 14953: } else { 14954: ctxt->directory = xmlParserGetDirectory(filename); 14955: } 14956: 14957: inputStream = xmlNewInputStream(ctxt); 14958: if (inputStream == NULL) { 14959: xmlFreeParserInputBuffer(buf); 14960: return(1); 14961: } 14962: 14963: if (filename == NULL) 14964: inputStream->filename = NULL; 14965: else 14966: inputStream->filename = (char *) 14967: xmlCanonicPath((const xmlChar *) filename); 14968: inputStream->buf = buf; 14969: xmlBufResetInput(buf->buffer, inputStream); 14970: 14971: inputPush(ctxt, inputStream); 14972: 14973: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14974: (ctxt->input->buf != NULL)) { 14975: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14976: size_t cur = ctxt->input->cur - ctxt->input->base; 14977: 14978: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14979: 14980: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14981: #ifdef DEBUG_PUSH 14982: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14983: #endif 14984: } 14985: 14986: if (encoding != NULL) { 14987: xmlCharEncodingHandlerPtr hdlr; 14988: 14989: if (ctxt->encoding != NULL) 14990: xmlFree((xmlChar *) ctxt->encoding); 14991: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14992: 14993: hdlr = xmlFindCharEncodingHandler(encoding); 14994: if (hdlr != NULL) { 14995: xmlSwitchToEncoding(ctxt, hdlr); 14996: } else { 14997: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14998: "Unsupported encoding %s\n", BAD_CAST encoding); 14999: } 15000: } else if (enc != XML_CHAR_ENCODING_NONE) { 15001: xmlSwitchEncoding(ctxt, enc); 15002: } 15003: 15004: return(0); 15005: } 15006: 15007: 15008: /** 15009: * xmlCtxtUseOptionsInternal: 15010: * @ctxt: an XML parser context 15011: * @options: a combination of xmlParserOption 15012: * @encoding: the user provided encoding to use 15013: * 15014: * Applies the options to the parser context 15015: * 15016: * Returns 0 in case of success, the set of unknown or unimplemented options 15017: * in case of error. 15018: */ 15019: static int 15020: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15021: { 15022: if (ctxt == NULL) 15023: return(-1); 15024: if (encoding != NULL) { 15025: if (ctxt->encoding != NULL) 15026: xmlFree((xmlChar *) ctxt->encoding); 15027: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15028: } 15029: if (options & XML_PARSE_RECOVER) { 15030: ctxt->recovery = 1; 15031: options -= XML_PARSE_RECOVER; 15032: ctxt->options |= XML_PARSE_RECOVER; 15033: } else 15034: ctxt->recovery = 0; 15035: if (options & XML_PARSE_DTDLOAD) { 15036: ctxt->loadsubset = XML_DETECT_IDS; 15037: options -= XML_PARSE_DTDLOAD; 15038: ctxt->options |= XML_PARSE_DTDLOAD; 15039: } else 15040: ctxt->loadsubset = 0; 15041: if (options & XML_PARSE_DTDATTR) { 15042: ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15043: options -= XML_PARSE_DTDATTR; 15044: ctxt->options |= XML_PARSE_DTDATTR; 15045: } 15046: if (options & XML_PARSE_NOENT) { 15047: ctxt->replaceEntities = 1; 15048: /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15049: options -= XML_PARSE_NOENT; 15050: ctxt->options |= XML_PARSE_NOENT; 15051: } else 15052: ctxt->replaceEntities = 0; 15053: if (options & XML_PARSE_PEDANTIC) { 15054: ctxt->pedantic = 1; 15055: options -= XML_PARSE_PEDANTIC; 15056: ctxt->options |= XML_PARSE_PEDANTIC; 15057: } else 15058: ctxt->pedantic = 0; 15059: if (options & XML_PARSE_NOBLANKS) { 15060: ctxt->keepBlanks = 0; 15061: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15062: options -= XML_PARSE_NOBLANKS; 15063: ctxt->options |= XML_PARSE_NOBLANKS; 15064: } else 15065: ctxt->keepBlanks = 1; 15066: if (options & XML_PARSE_DTDVALID) { 15067: ctxt->validate = 1; 15068: if (options & XML_PARSE_NOWARNING) 15069: ctxt->vctxt.warning = NULL; 15070: if (options & XML_PARSE_NOERROR) 15071: ctxt->vctxt.error = NULL; 15072: options -= XML_PARSE_DTDVALID; 15073: ctxt->options |= XML_PARSE_DTDVALID; 15074: } else 15075: ctxt->validate = 0; 15076: if (options & XML_PARSE_NOWARNING) { 15077: ctxt->sax->warning = NULL; 15078: options -= XML_PARSE_NOWARNING; 15079: } 15080: if (options & XML_PARSE_NOERROR) { 15081: ctxt->sax->error = NULL; 15082: ctxt->sax->fatalError = NULL; 15083: options -= XML_PARSE_NOERROR; 15084: } 15085: #ifdef LIBXML_SAX1_ENABLED 15086: if (options & XML_PARSE_SAX1) { 15087: ctxt->sax->startElement = xmlSAX2StartElement; 15088: ctxt->sax->endElement = xmlSAX2EndElement; 15089: ctxt->sax->startElementNs = NULL; 15090: ctxt->sax->endElementNs = NULL; 15091: ctxt->sax->initialized = 1; 15092: options -= XML_PARSE_SAX1; 15093: ctxt->options |= XML_PARSE_SAX1; 15094: } 15095: #endif /* LIBXML_SAX1_ENABLED */ 15096: if (options & XML_PARSE_NODICT) { 15097: ctxt->dictNames = 0; 15098: options -= XML_PARSE_NODICT; 15099: ctxt->options |= XML_PARSE_NODICT; 15100: } else { 15101: ctxt->dictNames = 1; 15102: } 15103: if (options & XML_PARSE_NOCDATA) { 15104: ctxt->sax->cdataBlock = NULL; 15105: options -= XML_PARSE_NOCDATA; 15106: ctxt->options |= XML_PARSE_NOCDATA; 15107: } 15108: if (options & XML_PARSE_NSCLEAN) { 15109: ctxt->options |= XML_PARSE_NSCLEAN; 15110: options -= XML_PARSE_NSCLEAN; 15111: } 15112: if (options & XML_PARSE_NONET) { 15113: ctxt->options |= XML_PARSE_NONET; 15114: options -= XML_PARSE_NONET; 15115: } 15116: if (options & XML_PARSE_COMPACT) { 15117: ctxt->options |= XML_PARSE_COMPACT; 15118: options -= XML_PARSE_COMPACT; 15119: } 15120: if (options & XML_PARSE_OLD10) { 15121: ctxt->options |= XML_PARSE_OLD10; 15122: options -= XML_PARSE_OLD10; 15123: } 15124: if (options & XML_PARSE_NOBASEFIX) { 15125: ctxt->options |= XML_PARSE_NOBASEFIX; 15126: options -= XML_PARSE_NOBASEFIX; 15127: } 15128: if (options & XML_PARSE_HUGE) { 15129: ctxt->options |= XML_PARSE_HUGE; 15130: options -= XML_PARSE_HUGE; 15131: if (ctxt->dict != NULL) 15132: xmlDictSetLimit(ctxt->dict, 0); 15133: } 15134: if (options & XML_PARSE_OLDSAX) { 15135: ctxt->options |= XML_PARSE_OLDSAX; 15136: options -= XML_PARSE_OLDSAX; 15137: } 15138: if (options & XML_PARSE_IGNORE_ENC) { 15139: ctxt->options |= XML_PARSE_IGNORE_ENC; 15140: options -= XML_PARSE_IGNORE_ENC; 15141: } 15142: if (options & XML_PARSE_BIG_LINES) { 15143: ctxt->options |= XML_PARSE_BIG_LINES; 15144: options -= XML_PARSE_BIG_LINES; 15145: } 15146: ctxt->linenumbers = 1; 15147: return (options); 15148: } 15149: 15150: /** 15151: * xmlCtxtUseOptions: 15152: * @ctxt: an XML parser context 15153: * @options: a combination of xmlParserOption 15154: * 15155: * Applies the options to the parser context 15156: * 15157: * Returns 0 in case of success, the set of unknown or unimplemented options 15158: * in case of error. 15159: */ 15160: int 15161: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15162: { 15163: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15164: } 15165: 15166: /** 15167: * xmlDoRead: 15168: * @ctxt: an XML parser context 15169: * @URL: the base URL to use for the document 15170: * @encoding: the document encoding, or NULL 15171: * @options: a combination of xmlParserOption 15172: * @reuse: keep the context for reuse 15173: * 15174: * Common front-end for the xmlRead functions 15175: * 15176: * Returns the resulting document tree or NULL 15177: */ 15178: static xmlDocPtr 15179: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15180: int options, int reuse) 15181: { 15182: xmlDocPtr ret; 15183: 15184: xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15185: if (encoding != NULL) { 15186: xmlCharEncodingHandlerPtr hdlr; 15187: 15188: hdlr = xmlFindCharEncodingHandler(encoding); 15189: if (hdlr != NULL) 15190: xmlSwitchToEncoding(ctxt, hdlr); 15191: } 15192: if ((URL != NULL) && (ctxt->input != NULL) && 15193: (ctxt->input->filename == NULL)) 15194: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15195: xmlParseDocument(ctxt); 15196: if ((ctxt->wellFormed) || ctxt->recovery) 15197: ret = ctxt->myDoc; 15198: else { 15199: ret = NULL; 15200: if (ctxt->myDoc != NULL) { 15201: xmlFreeDoc(ctxt->myDoc); 15202: } 15203: } 15204: ctxt->myDoc = NULL; 15205: if (!reuse) { 15206: xmlFreeParserCtxt(ctxt); 15207: } 15208: 15209: return (ret); 15210: } 15211: 15212: /** 15213: * xmlReadDoc: 15214: * @cur: a pointer to a zero terminated string 15215: * @URL: the base URL to use for the document 15216: * @encoding: the document encoding, or NULL 15217: * @options: a combination of xmlParserOption 15218: * 15219: * parse an XML in-memory document and build a tree. 15220: * 15221: * Returns the resulting document tree 15222: */ 15223: xmlDocPtr 15224: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15225: { 15226: xmlParserCtxtPtr ctxt; 15227: 15228: if (cur == NULL) 15229: return (NULL); 15230: 15231: ctxt = xmlCreateDocParserCtxt(cur); 15232: if (ctxt == NULL) 15233: return (NULL); 15234: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15235: } 15236: 15237: /** 15238: * xmlReadFile: 15239: * @filename: a file or URL 15240: * @encoding: the document encoding, or NULL 15241: * @options: a combination of xmlParserOption 15242: * 15243: * parse an XML file from the filesystem or the network. 15244: * 15245: * Returns the resulting document tree 15246: */ 15247: xmlDocPtr 15248: xmlReadFile(const char *filename, const char *encoding, int options) 15249: { 15250: xmlParserCtxtPtr ctxt; 15251: 15252: ctxt = xmlCreateURLParserCtxt(filename, options); 15253: if (ctxt == NULL) 15254: return (NULL); 15255: return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15256: } 15257: 15258: /** 15259: * xmlReadMemory: 15260: * @buffer: a pointer to a char array 15261: * @size: the size of the array 15262: * @URL: the base URL to use for the document 15263: * @encoding: the document encoding, or NULL 15264: * @options: a combination of xmlParserOption 15265: * 15266: * parse an XML in-memory document and build a tree. 15267: * 15268: * Returns the resulting document tree 15269: */ 15270: xmlDocPtr 15271: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15272: { 15273: xmlParserCtxtPtr ctxt; 15274: 15275: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15276: if (ctxt == NULL) 15277: return (NULL); 15278: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15279: } 15280: 15281: /** 15282: * xmlReadFd: 15283: * @fd: an open file descriptor 15284: * @URL: the base URL to use for the document 15285: * @encoding: the document encoding, or NULL 15286: * @options: a combination of xmlParserOption 15287: * 15288: * parse an XML from a file descriptor and build a tree. 15289: * NOTE that the file descriptor will not be closed when the 15290: * reader is closed or reset. 15291: * 15292: * Returns the resulting document tree 15293: */ 15294: xmlDocPtr 15295: xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15296: { 15297: xmlParserCtxtPtr ctxt; 15298: xmlParserInputBufferPtr input; 15299: xmlParserInputPtr stream; 15300: 15301: if (fd < 0) 15302: return (NULL); 15303: 15304: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15305: if (input == NULL) 15306: return (NULL); 15307: input->closecallback = NULL; 15308: ctxt = xmlNewParserCtxt(); 15309: if (ctxt == NULL) { 15310: xmlFreeParserInputBuffer(input); 15311: return (NULL); 15312: } 15313: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15314: if (stream == NULL) { 15315: xmlFreeParserInputBuffer(input); 15316: xmlFreeParserCtxt(ctxt); 15317: return (NULL); 15318: } 15319: inputPush(ctxt, stream); 15320: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15321: } 15322: 15323: /** 15324: * xmlReadIO: 15325: * @ioread: an I/O read function 15326: * @ioclose: an I/O close function 15327: * @ioctx: an I/O handler 15328: * @URL: the base URL to use for the document 15329: * @encoding: the document encoding, or NULL 15330: * @options: a combination of xmlParserOption 15331: * 15332: * parse an XML document from I/O functions and source and build a tree. 15333: * 15334: * Returns the resulting document tree 15335: */ 15336: xmlDocPtr 15337: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15338: void *ioctx, const char *URL, const char *encoding, int options) 15339: { 15340: xmlParserCtxtPtr ctxt; 15341: xmlParserInputBufferPtr input; 15342: xmlParserInputPtr stream; 15343: 15344: if (ioread == NULL) 15345: return (NULL); 15346: 15347: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15348: XML_CHAR_ENCODING_NONE); 15349: if (input == NULL) { 15350: if (ioclose != NULL) 15351: ioclose(ioctx); 15352: return (NULL); 15353: } 15354: ctxt = xmlNewParserCtxt(); 15355: if (ctxt == NULL) { 15356: xmlFreeParserInputBuffer(input); 15357: return (NULL); 15358: } 15359: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15360: if (stream == NULL) { 15361: xmlFreeParserInputBuffer(input); 15362: xmlFreeParserCtxt(ctxt); 15363: return (NULL); 15364: } 15365: inputPush(ctxt, stream); 15366: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15367: } 15368: 15369: /** 15370: * xmlCtxtReadDoc: 15371: * @ctxt: an XML parser context 15372: * @cur: a pointer to a zero terminated string 15373: * @URL: the base URL to use for the document 15374: * @encoding: the document encoding, or NULL 15375: * @options: a combination of xmlParserOption 15376: * 15377: * parse an XML in-memory document and build a tree. 15378: * This reuses the existing @ctxt parser context 15379: * 15380: * Returns the resulting document tree 15381: */ 15382: xmlDocPtr 15383: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15384: const char *URL, const char *encoding, int options) 15385: { 15386: xmlParserInputPtr stream; 15387: 15388: if (cur == NULL) 15389: return (NULL); 15390: if (ctxt == NULL) 15391: return (NULL); 15392: 15393: xmlCtxtReset(ctxt); 15394: 15395: stream = xmlNewStringInputStream(ctxt, cur); 15396: if (stream == NULL) { 15397: return (NULL); 15398: } 15399: inputPush(ctxt, stream); 15400: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15401: } 15402: 15403: /** 15404: * xmlCtxtReadFile: 15405: * @ctxt: an XML parser context 15406: * @filename: a file or URL 15407: * @encoding: the document encoding, or NULL 15408: * @options: a combination of xmlParserOption 15409: * 15410: * parse an XML file from the filesystem or the network. 15411: * This reuses the existing @ctxt parser context 15412: * 15413: * Returns the resulting document tree 15414: */ 15415: xmlDocPtr 15416: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15417: const char *encoding, int options) 15418: { 15419: xmlParserInputPtr stream; 15420: 15421: if (filename == NULL) 15422: return (NULL); 15423: if (ctxt == NULL) 15424: return (NULL); 15425: 15426: xmlCtxtReset(ctxt); 15427: 15428: stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15429: if (stream == NULL) { 15430: return (NULL); 15431: } 15432: inputPush(ctxt, stream); 15433: return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15434: } 15435: 15436: /** 15437: * xmlCtxtReadMemory: 15438: * @ctxt: an XML parser context 15439: * @buffer: a pointer to a char array 15440: * @size: the size of the array 15441: * @URL: the base URL to use for the document 15442: * @encoding: the document encoding, or NULL 15443: * @options: a combination of xmlParserOption 15444: * 15445: * parse an XML in-memory document and build a tree. 15446: * This reuses the existing @ctxt parser context 15447: * 15448: * Returns the resulting document tree 15449: */ 15450: xmlDocPtr 15451: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15452: const char *URL, const char *encoding, int options) 15453: { 15454: xmlParserInputBufferPtr input; 15455: xmlParserInputPtr stream; 15456: 15457: if (ctxt == NULL) 15458: return (NULL); 15459: if (buffer == NULL) 15460: return (NULL); 15461: 15462: xmlCtxtReset(ctxt); 15463: 15464: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15465: if (input == NULL) { 15466: return(NULL); 15467: } 15468: 15469: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15470: if (stream == NULL) { 15471: xmlFreeParserInputBuffer(input); 15472: return(NULL); 15473: } 15474: 15475: inputPush(ctxt, stream); 15476: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15477: } 15478: 15479: /** 15480: * xmlCtxtReadFd: 15481: * @ctxt: an XML parser context 15482: * @fd: an open file descriptor 15483: * @URL: the base URL to use for the document 15484: * @encoding: the document encoding, or NULL 15485: * @options: a combination of xmlParserOption 15486: * 15487: * parse an XML from a file descriptor and build a tree. 15488: * This reuses the existing @ctxt parser context 15489: * NOTE that the file descriptor will not be closed when the 15490: * reader is closed or reset. 15491: * 15492: * Returns the resulting document tree 15493: */ 15494: xmlDocPtr 15495: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15496: const char *URL, const char *encoding, int options) 15497: { 15498: xmlParserInputBufferPtr input; 15499: xmlParserInputPtr stream; 15500: 15501: if (fd < 0) 15502: return (NULL); 15503: if (ctxt == NULL) 15504: return (NULL); 15505: 15506: xmlCtxtReset(ctxt); 15507: 15508: 15509: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15510: if (input == NULL) 15511: return (NULL); 15512: input->closecallback = NULL; 15513: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15514: if (stream == NULL) { 15515: xmlFreeParserInputBuffer(input); 15516: return (NULL); 15517: } 15518: inputPush(ctxt, stream); 15519: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15520: } 15521: 15522: /** 15523: * xmlCtxtReadIO: 15524: * @ctxt: an XML parser context 15525: * @ioread: an I/O read function 15526: * @ioclose: an I/O close function 15527: * @ioctx: an I/O handler 15528: * @URL: the base URL to use for the document 15529: * @encoding: the document encoding, or NULL 15530: * @options: a combination of xmlParserOption 15531: * 15532: * parse an XML document from I/O functions and source and build a tree. 15533: * This reuses the existing @ctxt parser context 15534: * 15535: * Returns the resulting document tree 15536: */ 15537: xmlDocPtr 15538: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15539: xmlInputCloseCallback ioclose, void *ioctx, 15540: const char *URL, 15541: const char *encoding, int options) 15542: { 15543: xmlParserInputBufferPtr input; 15544: xmlParserInputPtr stream; 15545: 15546: if (ioread == NULL) 15547: return (NULL); 15548: if (ctxt == NULL) 15549: return (NULL); 15550: 15551: xmlCtxtReset(ctxt); 15552: 15553: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15554: XML_CHAR_ENCODING_NONE); 15555: if (input == NULL) { 15556: if (ioclose != NULL) 15557: ioclose(ioctx); 15558: return (NULL); 15559: } 15560: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15561: if (stream == NULL) { 15562: xmlFreeParserInputBuffer(input); 15563: return (NULL); 15564: } 15565: inputPush(ctxt, stream); 15566: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15567: } 15568: 15569: #define bottom_parser 15570: #include "elfgcchack.h"