embedaddon/libxml2/parser.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parser.c
Revision 1.1.1.2.2.1: download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:28:50 2013 UTC (11 years ago) by misho
Branches: v2_8_0p0
Diff to: branchpoint 1.1.1.2: preferred, unified

patch0

1: /* 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3: * implemented on top of the SAX interfaces 4: * 5: * References: 6: * The XML specification: 7: * http://www.w3.org/TR/REC-xml 8: * Original 1.0 version: 9: * http://www.w3.org/TR/1998/REC-xml-19980210 10: * XML second edition working draft 11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12: * 13: * Okay this is a big file, the parser core is around 7000 lines, then it 14: * is followed by the progressive parser top routines, then the various 15: * high level APIs to call the parser and a few miscellaneous functions. 16: * A number of helper functions and deprecated ones have been moved to 17: * parserInternals.c to reduce this file size. 18: * As much as possible the functions are associated with their relative 19: * production in the XML specification. A few productions defining the 20: * different ranges of character are actually implanted either in 21: * parserInternals.h or parserInternals.c 22: * The DOM tree build is realized from the default SAX callbacks in 23: * the module SAX.c. 24: * The routines doing the validation checks are in valid.c and called either 25: * from the SAX callbacks or as standalone functions using a preparsed 26: * document. 27: * 28: * See Copyright for the status of this software. 29: * 30: * daniel@veillard.com 31: */ 32: 33: #define IN_LIBXML 34: #include "libxml.h" 35: 36: #if defined(WIN32) && !defined (__CYGWIN__) 37: #define XML_DIR_SEP '\\' 38: #else 39: #define XML_DIR_SEP '/' 40: #endif 41: 42: #include <stdlib.h> 43: #include <limits.h> 44: #include <string.h> 45: #include <stdarg.h> 46: #include <libxml/xmlmemory.h> 47: #include <libxml/threads.h> 48: #include <libxml/globals.h> 49: #include <libxml/tree.h> 50: #include <libxml/parser.h> 51: #include <libxml/parserInternals.h> 52: #include <libxml/valid.h> 53: #include <libxml/entities.h> 54: #include <libxml/xmlerror.h> 55: #include <libxml/encoding.h> 56: #include <libxml/xmlIO.h> 57: #include <libxml/uri.h> 58: #ifdef LIBXML_CATALOG_ENABLED 59: #include <libxml/catalog.h> 60: #endif 61: #ifdef LIBXML_SCHEMAS_ENABLED 62: #include <libxml/xmlschemastypes.h> 63: #include <libxml/relaxng.h> 64: #endif 65: #ifdef HAVE_CTYPE_H 66: #include <ctype.h> 67: #endif 68: #ifdef HAVE_STDLIB_H 69: #include <stdlib.h> 70: #endif 71: #ifdef HAVE_SYS_STAT_H 72: #include <sys/stat.h> 73: #endif 74: #ifdef HAVE_FCNTL_H 75: #include <fcntl.h> 76: #endif 77: #ifdef HAVE_UNISTD_H 78: #include <unistd.h> 79: #endif 80: #ifdef HAVE_ZLIB_H 81: #include <zlib.h> 82: #endif 83: #ifdef HAVE_LZMA_H 84: #include <lzma.h> 85: #endif 86: 87: static void 88: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 89: 90: static xmlParserCtxtPtr 91: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 92: const xmlChar *base, xmlParserCtxtPtr pctx); 93: 94: /************************************************************************ 95: * * 96: * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 97: * * 98: ************************************************************************/ 99: 100: #define XML_PARSER_BIG_ENTITY 1000 101: #define XML_PARSER_LOT_ENTITY 5000 102: 103: /* 104: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 105: * replacement over the size in byte of the input indicates that you have 106: * and eponential behaviour. A value of 10 correspond to at least 3 entity 107: * replacement per byte of input. 108: */ 109: #define XML_PARSER_NON_LINEAR 10 110: 111: /* 112: * xmlParserEntityCheck 113: * 114: * Function to check non-linear entity expansion behaviour 115: * This is here to detect and stop exponential linear entity expansion 116: * This is not a limitation of the parser but a safety 117: * boundary feature. It can be disabled with the XML_PARSE_HUGE 118: * parser option. 119: */ 120: static int 121: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 122: xmlEntityPtr ent, size_t replacement) 123: { 124: size_t consumed = 0; 125: 126: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 127: return (0); 128: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 129: return (1); 130: if (replacement != 0) { 131: if (replacement < XML_MAX_TEXT_LENGTH) 132: return(0); 133: 134: /* 135: * If the volume of entity copy reaches 10 times the 136: * amount of parsed data and over the large text threshold 137: * then that's very likely to be an abuse. 138: */ 139: if (ctxt->input != NULL) { 140: consumed = ctxt->input->consumed + 141: (ctxt->input->cur - ctxt->input->base); 142: } 143: consumed += ctxt->sizeentities; 144: 145: if (replacement < XML_PARSER_NON_LINEAR * consumed) 146: return(0); 147: } else if (size != 0) { 148: /* 149: * Do the check based on the replacement size of the entity 150: */ 151: if (size < XML_PARSER_BIG_ENTITY) 152: return(0); 153: 154: /* 155: * A limit on the amount of text data reasonably used 156: */ 157: if (ctxt->input != NULL) { 158: consumed = ctxt->input->consumed + 159: (ctxt->input->cur - ctxt->input->base); 160: } 161: consumed += ctxt->sizeentities; 162: 163: if ((size < XML_PARSER_NON_LINEAR * consumed) && 164: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 165: return (0); 166: } else if (ent != NULL) { 167: /* 168: * use the number of parsed entities in the replacement 169: */ 170: size = ent->checked; 171: 172: /* 173: * The amount of data parsed counting entities size only once 174: */ 175: if (ctxt->input != NULL) { 176: consumed = ctxt->input->consumed + 177: (ctxt->input->cur - ctxt->input->base); 178: } 179: consumed += ctxt->sizeentities; 180: 181: /* 182: * Check the density of entities for the amount of data 183: * knowing an entity reference will take at least 3 bytes 184: */ 185: if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 186: return (0); 187: } else { 188: /* 189: * strange we got no data for checking just return 190: */ 191: return (0); 192: } 193: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 194: return (1); 195: } 196: 197: /** 198: * xmlParserMaxDepth: 199: * 200: * arbitrary depth limit for the XML documents that we allow to 201: * process. This is not a limitation of the parser but a safety 202: * boundary feature. It can be disabled with the XML_PARSE_HUGE 203: * parser option. 204: */ 205: unsigned int xmlParserMaxDepth = 256; 206: 207: 208: 209: #define SAX2 1 210: #define XML_PARSER_BIG_BUFFER_SIZE 300 211: #define XML_PARSER_BUFFER_SIZE 100 212: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 213: 214: /* 215: * List of XML prefixed PI allowed by W3C specs 216: */ 217: 218: static const char *xmlW3CPIs[] = { 219: "xml-stylesheet", 220: "xml-model", 221: NULL 222: }; 223: 224: 225: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 226: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 227: const xmlChar **str); 228: 229: static xmlParserErrors 230: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 231: xmlSAXHandlerPtr sax, 232: void *user_data, int depth, const xmlChar *URL, 233: const xmlChar *ID, xmlNodePtr *list); 234: 235: static int 236: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 237: const char *encoding); 238: #ifdef LIBXML_LEGACY_ENABLED 239: static void 240: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 241: xmlNodePtr lastNode); 242: #endif /* LIBXML_LEGACY_ENABLED */ 243: 244: static xmlParserErrors 245: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 246: const xmlChar *string, void *user_data, xmlNodePtr *lst); 247: 248: static int 249: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 250: 251: /************************************************************************ 252: * * 253: * Some factorized error routines * 254: * * 255: ************************************************************************/ 256: 257: /** 258: * xmlErrAttributeDup: 259: * @ctxt: an XML parser context 260: * @prefix: the attribute prefix 261: * @localname: the attribute localname 262: * 263: * Handle a redefinition of attribute error 264: */ 265: static void 266: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 267: const xmlChar * localname) 268: { 269: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 270: (ctxt->instate == XML_PARSER_EOF)) 271: return; 272: if (ctxt != NULL) 273: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 274: 275: if (prefix == NULL) 276: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 277: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 278: (const char *) localname, NULL, NULL, 0, 0, 279: "Attribute %s redefined\n", localname); 280: else 281: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 282: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 283: (const char *) prefix, (const char *) localname, 284: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 285: localname); 286: if (ctxt != NULL) { 287: ctxt->wellFormed = 0; 288: if (ctxt->recovery == 0) 289: ctxt->disableSAX = 1; 290: } 291: } 292: 293: /** 294: * xmlFatalErr: 295: * @ctxt: an XML parser context 296: * @error: the error number 297: * @extra: extra information string 298: * 299: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 300: */ 301: static void 302: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 303: { 304: const char *errmsg; 305: 306: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 307: (ctxt->instate == XML_PARSER_EOF)) 308: return; 309: switch (error) { 310: case XML_ERR_INVALID_HEX_CHARREF: 311: errmsg = "CharRef: invalid hexadecimal value\n"; 312: break; 313: case XML_ERR_INVALID_DEC_CHARREF: 314: errmsg = "CharRef: invalid decimal value\n"; 315: break; 316: case XML_ERR_INVALID_CHARREF: 317: errmsg = "CharRef: invalid value\n"; 318: break; 319: case XML_ERR_INTERNAL_ERROR: 320: errmsg = "internal error"; 321: break; 322: case XML_ERR_PEREF_AT_EOF: 323: errmsg = "PEReference at end of document\n"; 324: break; 325: case XML_ERR_PEREF_IN_PROLOG: 326: errmsg = "PEReference in prolog\n"; 327: break; 328: case XML_ERR_PEREF_IN_EPILOG: 329: errmsg = "PEReference in epilog\n"; 330: break; 331: case XML_ERR_PEREF_NO_NAME: 332: errmsg = "PEReference: no name\n"; 333: break; 334: case XML_ERR_PEREF_SEMICOL_MISSING: 335: errmsg = "PEReference: expecting ';'\n"; 336: break; 337: case XML_ERR_ENTITY_LOOP: 338: errmsg = "Detected an entity reference loop\n"; 339: break; 340: case XML_ERR_ENTITY_NOT_STARTED: 341: errmsg = "EntityValue: \" or ' expected\n"; 342: break; 343: case XML_ERR_ENTITY_PE_INTERNAL: 344: errmsg = "PEReferences forbidden in internal subset\n"; 345: break; 346: case XML_ERR_ENTITY_NOT_FINISHED: 347: errmsg = "EntityValue: \" or ' expected\n"; 348: break; 349: case XML_ERR_ATTRIBUTE_NOT_STARTED: 350: errmsg = "AttValue: \" or ' expected\n"; 351: break; 352: case XML_ERR_LT_IN_ATTRIBUTE: 353: errmsg = "Unescaped '<' not allowed in attributes values\n"; 354: break; 355: case XML_ERR_LITERAL_NOT_STARTED: 356: errmsg = "SystemLiteral \" or ' expected\n"; 357: break; 358: case XML_ERR_LITERAL_NOT_FINISHED: 359: errmsg = "Unfinished System or Public ID \" or ' expected\n"; 360: break; 361: case XML_ERR_MISPLACED_CDATA_END: 362: errmsg = "Sequence ']]>' not allowed in content\n"; 363: break; 364: case XML_ERR_URI_REQUIRED: 365: errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 366: break; 367: case XML_ERR_PUBID_REQUIRED: 368: errmsg = "PUBLIC, the Public Identifier is missing\n"; 369: break; 370: case XML_ERR_HYPHEN_IN_COMMENT: 371: errmsg = "Comment must not contain '--' (double-hyphen)\n"; 372: break; 373: case XML_ERR_PI_NOT_STARTED: 374: errmsg = "xmlParsePI : no target name\n"; 375: break; 376: case XML_ERR_RESERVED_XML_NAME: 377: errmsg = "Invalid PI name\n"; 378: break; 379: case XML_ERR_NOTATION_NOT_STARTED: 380: errmsg = "NOTATION: Name expected here\n"; 381: break; 382: case XML_ERR_NOTATION_NOT_FINISHED: 383: errmsg = "'>' required to close NOTATION declaration\n"; 384: break; 385: case XML_ERR_VALUE_REQUIRED: 386: errmsg = "Entity value required\n"; 387: break; 388: case XML_ERR_URI_FRAGMENT: 389: errmsg = "Fragment not allowed"; 390: break; 391: case XML_ERR_ATTLIST_NOT_STARTED: 392: errmsg = "'(' required to start ATTLIST enumeration\n"; 393: break; 394: case XML_ERR_NMTOKEN_REQUIRED: 395: errmsg = "NmToken expected in ATTLIST enumeration\n"; 396: break; 397: case XML_ERR_ATTLIST_NOT_FINISHED: 398: errmsg = "')' required to finish ATTLIST enumeration\n"; 399: break; 400: case XML_ERR_MIXED_NOT_STARTED: 401: errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 402: break; 403: case XML_ERR_PCDATA_REQUIRED: 404: errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 405: break; 406: case XML_ERR_ELEMCONTENT_NOT_STARTED: 407: errmsg = "ContentDecl : Name or '(' expected\n"; 408: break; 409: case XML_ERR_ELEMCONTENT_NOT_FINISHED: 410: errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 411: break; 412: case XML_ERR_PEREF_IN_INT_SUBSET: 413: errmsg = 414: "PEReference: forbidden within markup decl in internal subset\n"; 415: break; 416: case XML_ERR_GT_REQUIRED: 417: errmsg = "expected '>'\n"; 418: break; 419: case XML_ERR_CONDSEC_INVALID: 420: errmsg = "XML conditional section '[' expected\n"; 421: break; 422: case XML_ERR_EXT_SUBSET_NOT_FINISHED: 423: errmsg = "Content error in the external subset\n"; 424: break; 425: case XML_ERR_CONDSEC_INVALID_KEYWORD: 426: errmsg = 427: "conditional section INCLUDE or IGNORE keyword expected\n"; 428: break; 429: case XML_ERR_CONDSEC_NOT_FINISHED: 430: errmsg = "XML conditional section not closed\n"; 431: break; 432: case XML_ERR_XMLDECL_NOT_STARTED: 433: errmsg = "Text declaration '<?xml' required\n"; 434: break; 435: case XML_ERR_XMLDECL_NOT_FINISHED: 436: errmsg = "parsing XML declaration: '?>' expected\n"; 437: break; 438: case XML_ERR_EXT_ENTITY_STANDALONE: 439: errmsg = "external parsed entities cannot be standalone\n"; 440: break; 441: case XML_ERR_ENTITYREF_SEMICOL_MISSING: 442: errmsg = "EntityRef: expecting ';'\n"; 443: break; 444: case XML_ERR_DOCTYPE_NOT_FINISHED: 445: errmsg = "DOCTYPE improperly terminated\n"; 446: break; 447: case XML_ERR_LTSLASH_REQUIRED: 448: errmsg = "EndTag: '</' not found\n"; 449: break; 450: case XML_ERR_EQUAL_REQUIRED: 451: errmsg = "expected '='\n"; 452: break; 453: case XML_ERR_STRING_NOT_CLOSED: 454: errmsg = "String not closed expecting \" or '\n"; 455: break; 456: case XML_ERR_STRING_NOT_STARTED: 457: errmsg = "String not started expecting ' or \"\n"; 458: break; 459: case XML_ERR_ENCODING_NAME: 460: errmsg = "Invalid XML encoding name\n"; 461: break; 462: case XML_ERR_STANDALONE_VALUE: 463: errmsg = "standalone accepts only 'yes' or 'no'\n"; 464: break; 465: case XML_ERR_DOCUMENT_EMPTY: 466: errmsg = "Document is empty\n"; 467: break; 468: case XML_ERR_DOCUMENT_END: 469: errmsg = "Extra content at the end of the document\n"; 470: break; 471: case XML_ERR_NOT_WELL_BALANCED: 472: errmsg = "chunk is not well balanced\n"; 473: break; 474: case XML_ERR_EXTRA_CONTENT: 475: errmsg = "extra content at the end of well balanced chunk\n"; 476: break; 477: case XML_ERR_VERSION_MISSING: 478: errmsg = "Malformed declaration expecting version\n"; 479: break; 480: #if 0 481: case: 482: errmsg = "\n"; 483: break; 484: #endif 485: default: 486: errmsg = "Unregistered error message\n"; 487: } 488: if (ctxt != NULL) 489: ctxt->errNo = error; 490: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 491: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 492: info); 493: if (ctxt != NULL) { 494: ctxt->wellFormed = 0; 495: if (ctxt->recovery == 0) 496: ctxt->disableSAX = 1; 497: } 498: } 499: 500: /** 501: * xmlFatalErrMsg: 502: * @ctxt: an XML parser context 503: * @error: the error number 504: * @msg: the error message 505: * 506: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 507: */ 508: static void 509: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 510: const char *msg) 511: { 512: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 513: (ctxt->instate == XML_PARSER_EOF)) 514: return; 515: if (ctxt != NULL) 516: ctxt->errNo = error; 517: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 518: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 519: if (ctxt != NULL) { 520: ctxt->wellFormed = 0; 521: if (ctxt->recovery == 0) 522: ctxt->disableSAX = 1; 523: } 524: } 525: 526: /** 527: * xmlWarningMsg: 528: * @ctxt: an XML parser context 529: * @error: the error number 530: * @msg: the error message 531: * @str1: extra data 532: * @str2: extra data 533: * 534: * Handle a warning. 535: */ 536: static void 537: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 538: const char *msg, const xmlChar *str1, const xmlChar *str2) 539: { 540: xmlStructuredErrorFunc schannel = NULL; 541: 542: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 543: (ctxt->instate == XML_PARSER_EOF)) 544: return; 545: if ((ctxt != NULL) && (ctxt->sax != NULL) && 546: (ctxt->sax->initialized == XML_SAX2_MAGIC)) 547: schannel = ctxt->sax->serror; 548: if (ctxt != NULL) { 549: __xmlRaiseError(schannel, 550: (ctxt->sax) ? ctxt->sax->warning : NULL, 551: ctxt->userData, 552: ctxt, NULL, XML_FROM_PARSER, error, 553: XML_ERR_WARNING, NULL, 0, 554: (const char *) str1, (const char *) str2, NULL, 0, 0, 555: msg, (const char *) str1, (const char *) str2); 556: } else { 557: __xmlRaiseError(schannel, NULL, NULL, 558: ctxt, NULL, XML_FROM_PARSER, error, 559: XML_ERR_WARNING, NULL, 0, 560: (const char *) str1, (const char *) str2, NULL, 0, 0, 561: msg, (const char *) str1, (const char *) str2); 562: } 563: } 564: 565: /** 566: * xmlValidityError: 567: * @ctxt: an XML parser context 568: * @error: the error number 569: * @msg: the error message 570: * @str1: extra data 571: * 572: * Handle a validity error. 573: */ 574: static void 575: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 576: const char *msg, const xmlChar *str1, const xmlChar *str2) 577: { 578: xmlStructuredErrorFunc schannel = NULL; 579: 580: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 581: (ctxt->instate == XML_PARSER_EOF)) 582: return; 583: if (ctxt != NULL) { 584: ctxt->errNo = error; 585: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 586: schannel = ctxt->sax->serror; 587: } 588: if (ctxt != NULL) { 589: __xmlRaiseError(schannel, 590: ctxt->vctxt.error, ctxt->vctxt.userData, 591: ctxt, NULL, XML_FROM_DTD, error, 592: XML_ERR_ERROR, NULL, 0, (const char *) str1, 593: (const char *) str2, NULL, 0, 0, 594: msg, (const char *) str1, (const char *) str2); 595: ctxt->valid = 0; 596: } else { 597: __xmlRaiseError(schannel, NULL, NULL, 598: ctxt, NULL, XML_FROM_DTD, error, 599: XML_ERR_ERROR, NULL, 0, (const char *) str1, 600: (const char *) str2, NULL, 0, 0, 601: msg, (const char *) str1, (const char *) str2); 602: } 603: } 604: 605: /** 606: * xmlFatalErrMsgInt: 607: * @ctxt: an XML parser context 608: * @error: the error number 609: * @msg: the error message 610: * @val: an integer value 611: * 612: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 613: */ 614: static void 615: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 616: const char *msg, int val) 617: { 618: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 619: (ctxt->instate == XML_PARSER_EOF)) 620: return; 621: if (ctxt != NULL) 622: ctxt->errNo = error; 623: __xmlRaiseError(NULL, NULL, NULL, 624: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 625: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 626: if (ctxt != NULL) { 627: ctxt->wellFormed = 0; 628: if (ctxt->recovery == 0) 629: ctxt->disableSAX = 1; 630: } 631: } 632: 633: /** 634: * xmlFatalErrMsgStrIntStr: 635: * @ctxt: an XML parser context 636: * @error: the error number 637: * @msg: the error message 638: * @str1: an string info 639: * @val: an integer value 640: * @str2: an string info 641: * 642: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 643: */ 644: static void 645: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 646: const char *msg, const xmlChar *str1, int val, 647: const xmlChar *str2) 648: { 649: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 650: (ctxt->instate == XML_PARSER_EOF)) 651: return; 652: if (ctxt != NULL) 653: ctxt->errNo = error; 654: __xmlRaiseError(NULL, NULL, NULL, 655: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 656: NULL, 0, (const char *) str1, (const char *) str2, 657: NULL, val, 0, msg, str1, val, str2); 658: if (ctxt != NULL) { 659: ctxt->wellFormed = 0; 660: if (ctxt->recovery == 0) 661: ctxt->disableSAX = 1; 662: } 663: } 664: 665: /** 666: * xmlFatalErrMsgStr: 667: * @ctxt: an XML parser context 668: * @error: the error number 669: * @msg: the error message 670: * @val: a string value 671: * 672: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 673: */ 674: static void 675: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 676: const char *msg, const xmlChar * val) 677: { 678: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 679: (ctxt->instate == XML_PARSER_EOF)) 680: return; 681: if (ctxt != NULL) 682: ctxt->errNo = error; 683: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 684: XML_FROM_PARSER, error, XML_ERR_FATAL, 685: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 686: val); 687: if (ctxt != NULL) { 688: ctxt->wellFormed = 0; 689: if (ctxt->recovery == 0) 690: ctxt->disableSAX = 1; 691: } 692: } 693: 694: /** 695: * xmlErrMsgStr: 696: * @ctxt: an XML parser context 697: * @error: the error number 698: * @msg: the error message 699: * @val: a string value 700: * 701: * Handle a non fatal parser error 702: */ 703: static void 704: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 705: const char *msg, const xmlChar * val) 706: { 707: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 708: (ctxt->instate == XML_PARSER_EOF)) 709: return; 710: if (ctxt != NULL) 711: ctxt->errNo = error; 712: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 713: XML_FROM_PARSER, error, XML_ERR_ERROR, 714: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 715: val); 716: } 717: 718: /** 719: * xmlNsErr: 720: * @ctxt: an XML parser context 721: * @error: the error number 722: * @msg: the message 723: * @info1: extra information string 724: * @info2: extra information string 725: * 726: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 727: */ 728: static void 729: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 730: const char *msg, 731: const xmlChar * info1, const xmlChar * info2, 732: const xmlChar * info3) 733: { 734: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 735: (ctxt->instate == XML_PARSER_EOF)) 736: return; 737: if (ctxt != NULL) 738: ctxt->errNo = error; 739: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 740: XML_ERR_ERROR, NULL, 0, (const char *) info1, 741: (const char *) info2, (const char *) info3, 0, 0, msg, 742: info1, info2, info3); 743: if (ctxt != NULL) 744: ctxt->nsWellFormed = 0; 745: } 746: 747: /** 748: * xmlNsWarn 749: * @ctxt: an XML parser context 750: * @error: the error number 751: * @msg: the message 752: * @info1: extra information string 753: * @info2: extra information string 754: * 755: * Handle a namespace warning error 756: */ 757: static void 758: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 759: const char *msg, 760: const xmlChar * info1, const xmlChar * info2, 761: const xmlChar * info3) 762: { 763: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 764: (ctxt->instate == XML_PARSER_EOF)) 765: return; 766: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 767: XML_ERR_WARNING, NULL, 0, (const char *) info1, 768: (const char *) info2, (const char *) info3, 0, 0, msg, 769: info1, info2, info3); 770: } 771: 772: /************************************************************************ 773: * * 774: * Library wide options * 775: * * 776: ************************************************************************/ 777: 778: /** 779: * xmlHasFeature: 780: * @feature: the feature to be examined 781: * 782: * Examines if the library has been compiled with a given feature. 783: * 784: * Returns a non-zero value if the feature exist, otherwise zero. 785: * Returns zero (0) if the feature does not exist or an unknown 786: * unknown feature is requested, non-zero otherwise. 787: */ 788: int 789: xmlHasFeature(xmlFeature feature) 790: { 791: switch (feature) { 792: case XML_WITH_THREAD: 793: #ifdef LIBXML_THREAD_ENABLED 794: return(1); 795: #else 796: return(0); 797: #endif 798: case XML_WITH_TREE: 799: #ifdef LIBXML_TREE_ENABLED 800: return(1); 801: #else 802: return(0); 803: #endif 804: case XML_WITH_OUTPUT: 805: #ifdef LIBXML_OUTPUT_ENABLED 806: return(1); 807: #else 808: return(0); 809: #endif 810: case XML_WITH_PUSH: 811: #ifdef LIBXML_PUSH_ENABLED 812: return(1); 813: #else 814: return(0); 815: #endif 816: case XML_WITH_READER: 817: #ifdef LIBXML_READER_ENABLED 818: return(1); 819: #else 820: return(0); 821: #endif 822: case XML_WITH_PATTERN: 823: #ifdef LIBXML_PATTERN_ENABLED 824: return(1); 825: #else 826: return(0); 827: #endif 828: case XML_WITH_WRITER: 829: #ifdef LIBXML_WRITER_ENABLED 830: return(1); 831: #else 832: return(0); 833: #endif 834: case XML_WITH_SAX1: 835: #ifdef LIBXML_SAX1_ENABLED 836: return(1); 837: #else 838: return(0); 839: #endif 840: case XML_WITH_FTP: 841: #ifdef LIBXML_FTP_ENABLED 842: return(1); 843: #else 844: return(0); 845: #endif 846: case XML_WITH_HTTP: 847: #ifdef LIBXML_HTTP_ENABLED 848: return(1); 849: #else 850: return(0); 851: #endif 852: case XML_WITH_VALID: 853: #ifdef LIBXML_VALID_ENABLED 854: return(1); 855: #else 856: return(0); 857: #endif 858: case XML_WITH_HTML: 859: #ifdef LIBXML_HTML_ENABLED 860: return(1); 861: #else 862: return(0); 863: #endif 864: case XML_WITH_LEGACY: 865: #ifdef LIBXML_LEGACY_ENABLED 866: return(1); 867: #else 868: return(0); 869: #endif 870: case XML_WITH_C14N: 871: #ifdef LIBXML_C14N_ENABLED 872: return(1); 873: #else 874: return(0); 875: #endif 876: case XML_WITH_CATALOG: 877: #ifdef LIBXML_CATALOG_ENABLED 878: return(1); 879: #else 880: return(0); 881: #endif 882: case XML_WITH_XPATH: 883: #ifdef LIBXML_XPATH_ENABLED 884: return(1); 885: #else 886: return(0); 887: #endif 888: case XML_WITH_XPTR: 889: #ifdef LIBXML_XPTR_ENABLED 890: return(1); 891: #else 892: return(0); 893: #endif 894: case XML_WITH_XINCLUDE: 895: #ifdef LIBXML_XINCLUDE_ENABLED 896: return(1); 897: #else 898: return(0); 899: #endif 900: case XML_WITH_ICONV: 901: #ifdef LIBXML_ICONV_ENABLED 902: return(1); 903: #else 904: return(0); 905: #endif 906: case XML_WITH_ISO8859X: 907: #ifdef LIBXML_ISO8859X_ENABLED 908: return(1); 909: #else 910: return(0); 911: #endif 912: case XML_WITH_UNICODE: 913: #ifdef LIBXML_UNICODE_ENABLED 914: return(1); 915: #else 916: return(0); 917: #endif 918: case XML_WITH_REGEXP: 919: #ifdef LIBXML_REGEXP_ENABLED 920: return(1); 921: #else 922: return(0); 923: #endif 924: case XML_WITH_AUTOMATA: 925: #ifdef LIBXML_AUTOMATA_ENABLED 926: return(1); 927: #else 928: return(0); 929: #endif 930: case XML_WITH_EXPR: 931: #ifdef LIBXML_EXPR_ENABLED 932: return(1); 933: #else 934: return(0); 935: #endif 936: case XML_WITH_SCHEMAS: 937: #ifdef LIBXML_SCHEMAS_ENABLED 938: return(1); 939: #else 940: return(0); 941: #endif 942: case XML_WITH_SCHEMATRON: 943: #ifdef LIBXML_SCHEMATRON_ENABLED 944: return(1); 945: #else 946: return(0); 947: #endif 948: case XML_WITH_MODULES: 949: #ifdef LIBXML_MODULES_ENABLED 950: return(1); 951: #else 952: return(0); 953: #endif 954: case XML_WITH_DEBUG: 955: #ifdef LIBXML_DEBUG_ENABLED 956: return(1); 957: #else 958: return(0); 959: #endif 960: case XML_WITH_DEBUG_MEM: 961: #ifdef DEBUG_MEMORY_LOCATION 962: return(1); 963: #else 964: return(0); 965: #endif 966: case XML_WITH_DEBUG_RUN: 967: #ifdef LIBXML_DEBUG_RUNTIME 968: return(1); 969: #else 970: return(0); 971: #endif 972: case XML_WITH_ZLIB: 973: #ifdef LIBXML_ZLIB_ENABLED 974: return(1); 975: #else 976: return(0); 977: #endif 978: case XML_WITH_LZMA: 979: #ifdef LIBXML_LZMA_ENABLED 980: return(1); 981: #else 982: return(0); 983: #endif 984: case XML_WITH_ICU: 985: #ifdef LIBXML_ICU_ENABLED 986: return(1); 987: #else 988: return(0); 989: #endif 990: default: 991: break; 992: } 993: return(0); 994: } 995: 996: /************************************************************************ 997: * * 998: * SAX2 defaulted attributes handling * 999: * * 1000: ************************************************************************/ 1001: 1002: /** 1003: * xmlDetectSAX2: 1004: * @ctxt: an XML parser context 1005: * 1006: * Do the SAX2 detection and specific intialization 1007: */ 1008: static void 1009: xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1010: if (ctxt == NULL) return; 1011: #ifdef LIBXML_SAX1_ENABLED 1012: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1013: ((ctxt->sax->startElementNs != NULL) || 1014: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1015: #else 1016: ctxt->sax2 = 1; 1017: #endif /* LIBXML_SAX1_ENABLED */ 1018: 1019: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1020: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1021: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1022: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1023: (ctxt->str_xml_ns == NULL)) { 1024: xmlErrMemory(ctxt, NULL); 1025: } 1026: } 1027: 1028: typedef struct _xmlDefAttrs xmlDefAttrs; 1029: typedef xmlDefAttrs *xmlDefAttrsPtr; 1030: struct _xmlDefAttrs { 1031: int nbAttrs; /* number of defaulted attributes on that element */ 1032: int maxAttrs; /* the size of the array */ 1033: const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1034: }; 1035: 1036: /** 1037: * xmlAttrNormalizeSpace: 1038: * @src: the source string 1039: * @dst: the target string 1040: * 1041: * Normalize the space in non CDATA attribute values: 1042: * If the attribute type is not CDATA, then the XML processor MUST further 1043: * process the normalized attribute value by discarding any leading and 1044: * trailing space (#x20) characters, and by replacing sequences of space 1045: * (#x20) characters by a single space (#x20) character. 1046: * Note that the size of dst need to be at least src, and if one doesn't need 1047: * to preserve dst (and it doesn't come from a dictionary or read-only) then 1048: * passing src as dst is just fine. 1049: * 1050: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1051: * is needed. 1052: */ 1053: static xmlChar * 1054: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1055: { 1056: if ((src == NULL) || (dst == NULL)) 1057: return(NULL); 1058: 1059: while (*src == 0x20) src++; 1060: while (*src != 0) { 1061: if (*src == 0x20) { 1062: while (*src == 0x20) src++; 1063: if (*src != 0) 1064: *dst++ = 0x20; 1065: } else { 1066: *dst++ = *src++; 1067: } 1068: } 1069: *dst = 0; 1070: if (dst == src) 1071: return(NULL); 1072: return(dst); 1073: } 1074: 1075: /** 1076: * xmlAttrNormalizeSpace2: 1077: * @src: the source string 1078: * 1079: * Normalize the space in non CDATA attribute values, a slightly more complex 1080: * front end to avoid allocation problems when running on attribute values 1081: * coming from the input. 1082: * 1083: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1084: * is needed. 1085: */ 1086: static const xmlChar * 1087: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1088: { 1089: int i; 1090: int remove_head = 0; 1091: int need_realloc = 0; 1092: const xmlChar *cur; 1093: 1094: if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1095: return(NULL); 1096: i = *len; 1097: if (i <= 0) 1098: return(NULL); 1099: 1100: cur = src; 1101: while (*cur == 0x20) { 1102: cur++; 1103: remove_head++; 1104: } 1105: while (*cur != 0) { 1106: if (*cur == 0x20) { 1107: cur++; 1108: if ((*cur == 0x20) || (*cur == 0)) { 1109: need_realloc = 1; 1110: break; 1111: } 1112: } else 1113: cur++; 1114: } 1115: if (need_realloc) { 1116: xmlChar *ret; 1117: 1118: ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1119: if (ret == NULL) { 1120: xmlErrMemory(ctxt, NULL); 1121: return(NULL); 1122: } 1123: xmlAttrNormalizeSpace(ret, ret); 1124: *len = (int) strlen((const char *)ret); 1125: return(ret); 1126: } else if (remove_head) { 1127: *len -= remove_head; 1128: memmove(src, src + remove_head, 1 + *len); 1129: return(src); 1130: } 1131: return(NULL); 1132: } 1133: 1134: /** 1135: * xmlAddDefAttrs: 1136: * @ctxt: an XML parser context 1137: * @fullname: the element fullname 1138: * @fullattr: the attribute fullname 1139: * @value: the attribute value 1140: * 1141: * Add a defaulted attribute for an element 1142: */ 1143: static void 1144: xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1145: const xmlChar *fullname, 1146: const xmlChar *fullattr, 1147: const xmlChar *value) { 1148: xmlDefAttrsPtr defaults; 1149: int len; 1150: const xmlChar *name; 1151: const xmlChar *prefix; 1152: 1153: /* 1154: * Allows to detect attribute redefinitions 1155: */ 1156: if (ctxt->attsSpecial != NULL) { 1157: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1158: return; 1159: } 1160: 1161: if (ctxt->attsDefault == NULL) { 1162: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1163: if (ctxt->attsDefault == NULL) 1164: goto mem_error; 1165: } 1166: 1167: /* 1168: * split the element name into prefix:localname , the string found 1169: * are within the DTD and then not associated to namespace names. 1170: */ 1171: name = xmlSplitQName3(fullname, &len); 1172: if (name == NULL) { 1173: name = xmlDictLookup(ctxt->dict, fullname, -1); 1174: prefix = NULL; 1175: } else { 1176: name = xmlDictLookup(ctxt->dict, name, -1); 1177: prefix = xmlDictLookup(ctxt->dict, fullname, len); 1178: } 1179: 1180: /* 1181: * make sure there is some storage 1182: */ 1183: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1184: if (defaults == NULL) { 1185: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1186: (4 * 5) * sizeof(const xmlChar *)); 1187: if (defaults == NULL) 1188: goto mem_error; 1189: defaults->nbAttrs = 0; 1190: defaults->maxAttrs = 4; 1191: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1192: defaults, NULL) < 0) { 1193: xmlFree(defaults); 1194: goto mem_error; 1195: } 1196: } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1197: xmlDefAttrsPtr temp; 1198: 1199: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1200: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1201: if (temp == NULL) 1202: goto mem_error; 1203: defaults = temp; 1204: defaults->maxAttrs *= 2; 1205: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1206: defaults, NULL) < 0) { 1207: xmlFree(defaults); 1208: goto mem_error; 1209: } 1210: } 1211: 1212: /* 1213: * Split the element name into prefix:localname , the string found 1214: * are within the DTD and hen not associated to namespace names. 1215: */ 1216: name = xmlSplitQName3(fullattr, &len); 1217: if (name == NULL) { 1218: name = xmlDictLookup(ctxt->dict, fullattr, -1); 1219: prefix = NULL; 1220: } else { 1221: name = xmlDictLookup(ctxt->dict, name, -1); 1222: prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1223: } 1224: 1225: defaults->values[5 * defaults->nbAttrs] = name; 1226: defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1227: /* intern the string and precompute the end */ 1228: len = xmlStrlen(value); 1229: value = xmlDictLookup(ctxt->dict, value, len); 1230: defaults->values[5 * defaults->nbAttrs + 2] = value; 1231: defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1232: if (ctxt->external) 1233: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1234: else 1235: defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1236: defaults->nbAttrs++; 1237: 1238: return; 1239: 1240: mem_error: 1241: xmlErrMemory(ctxt, NULL); 1242: return; 1243: } 1244: 1245: /** 1246: * xmlAddSpecialAttr: 1247: * @ctxt: an XML parser context 1248: * @fullname: the element fullname 1249: * @fullattr: the attribute fullname 1250: * @type: the attribute type 1251: * 1252: * Register this attribute type 1253: */ 1254: static void 1255: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1256: const xmlChar *fullname, 1257: const xmlChar *fullattr, 1258: int type) 1259: { 1260: if (ctxt->attsSpecial == NULL) { 1261: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1262: if (ctxt->attsSpecial == NULL) 1263: goto mem_error; 1264: } 1265: 1266: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1267: return; 1268: 1269: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1270: (void *) (long) type); 1271: return; 1272: 1273: mem_error: 1274: xmlErrMemory(ctxt, NULL); 1275: return; 1276: } 1277: 1278: /** 1279: * xmlCleanSpecialAttrCallback: 1280: * 1281: * Removes CDATA attributes from the special attribute table 1282: */ 1283: static void 1284: xmlCleanSpecialAttrCallback(void *payload, void *data, 1285: const xmlChar *fullname, const xmlChar *fullattr, 1286: const xmlChar *unused ATTRIBUTE_UNUSED) { 1287: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1288: 1289: if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1290: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1291: } 1292: } 1293: 1294: /** 1295: * xmlCleanSpecialAttr: 1296: * @ctxt: an XML parser context 1297: * 1298: * Trim the list of attributes defined to remove all those of type 1299: * CDATA as they are not special. This call should be done when finishing 1300: * to parse the DTD and before starting to parse the document root. 1301: */ 1302: static void 1303: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1304: { 1305: if (ctxt->attsSpecial == NULL) 1306: return; 1307: 1308: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1309: 1310: if (xmlHashSize(ctxt->attsSpecial) == 0) { 1311: xmlHashFree(ctxt->attsSpecial, NULL); 1312: ctxt->attsSpecial = NULL; 1313: } 1314: return; 1315: } 1316: 1317: /** 1318: * xmlCheckLanguageID: 1319: * @lang: pointer to the string value 1320: * 1321: * Checks that the value conforms to the LanguageID production: 1322: * 1323: * NOTE: this is somewhat deprecated, those productions were removed from 1324: * the XML Second edition. 1325: * 1326: * [33] LanguageID ::= Langcode ('-' Subcode)* 1327: * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1328: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1329: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1330: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1331: * [38] Subcode ::= ([a-z] | [A-Z])+ 1332: * 1333: * The current REC reference the sucessors of RFC 1766, currently 5646 1334: * 1335: * http://www.rfc-editor.org/rfc/rfc5646.txt 1336: * langtag = language 1337: * ["-" script] 1338: * ["-" region] 1339: * *("-" variant) 1340: * *("-" extension) 1341: * ["-" privateuse] 1342: * language = 2*3ALPHA ; shortest ISO 639 code 1343: * ["-" extlang] ; sometimes followed by 1344: * ; extended language subtags 1345: * / 4ALPHA ; or reserved for future use 1346: * / 5*8ALPHA ; or registered language subtag 1347: * 1348: * extlang = 3ALPHA ; selected ISO 639 codes 1349: * *2("-" 3ALPHA) ; permanently reserved 1350: * 1351: * script = 4ALPHA ; ISO 15924 code 1352: * 1353: * region = 2ALPHA ; ISO 3166-1 code 1354: * / 3DIGIT ; UN M.49 code 1355: * 1356: * variant = 5*8alphanum ; registered variants 1357: * / (DIGIT 3alphanum) 1358: * 1359: * extension = singleton 1*("-" (2*8alphanum)) 1360: * 1361: * ; Single alphanumerics 1362: * ; "x" reserved for private use 1363: * singleton = DIGIT ; 0 - 9 1364: * / %x41-57 ; A - W 1365: * / %x59-5A ; Y - Z 1366: * / %x61-77 ; a - w 1367: * / %x79-7A ; y - z 1368: * 1369: * it sounds right to still allow Irregular i-xxx IANA and user codes too 1370: * The parser below doesn't try to cope with extension or privateuse 1371: * that could be added but that's not interoperable anyway 1372: * 1373: * Returns 1 if correct 0 otherwise 1374: **/ 1375: int 1376: xmlCheckLanguageID(const xmlChar * lang) 1377: { 1378: const xmlChar *cur = lang, *nxt; 1379: 1380: if (cur == NULL) 1381: return (0); 1382: if (((cur[0] == 'i') && (cur[1] == '-')) || 1383: ((cur[0] == 'I') && (cur[1] == '-')) || 1384: ((cur[0] == 'x') && (cur[1] == '-')) || 1385: ((cur[0] == 'X') && (cur[1] == '-'))) { 1386: /* 1387: * Still allow IANA code and user code which were coming 1388: * from the previous version of the XML-1.0 specification 1389: * it's deprecated but we should not fail 1390: */ 1391: cur += 2; 1392: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1393: ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1394: cur++; 1395: return(cur[0] == 0); 1396: } 1397: nxt = cur; 1398: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1399: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1400: nxt++; 1401: if (nxt - cur >= 4) { 1402: /* 1403: * Reserved 1404: */ 1405: if ((nxt - cur > 8) || (nxt[0] != 0)) 1406: return(0); 1407: return(1); 1408: } 1409: if (nxt - cur < 2) 1410: return(0); 1411: /* we got an ISO 639 code */ 1412: if (nxt[0] == 0) 1413: return(1); 1414: if (nxt[0] != '-') 1415: return(0); 1416: 1417: nxt++; 1418: cur = nxt; 1419: /* now we can have extlang or script or region or variant */ 1420: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1421: goto region_m49; 1422: 1423: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1424: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1425: nxt++; 1426: if (nxt - cur == 4) 1427: goto script; 1428: if (nxt - cur == 2) 1429: goto region; 1430: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1431: goto variant; 1432: if (nxt - cur != 3) 1433: return(0); 1434: /* we parsed an extlang */ 1435: if (nxt[0] == 0) 1436: return(1); 1437: if (nxt[0] != '-') 1438: return(0); 1439: 1440: nxt++; 1441: cur = nxt; 1442: /* now we can have script or region or variant */ 1443: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1444: goto region_m49; 1445: 1446: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1447: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1448: nxt++; 1449: if (nxt - cur == 2) 1450: goto region; 1451: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1452: goto variant; 1453: if (nxt - cur != 4) 1454: return(0); 1455: /* we parsed a script */ 1456: script: 1457: if (nxt[0] == 0) 1458: return(1); 1459: if (nxt[0] != '-') 1460: return(0); 1461: 1462: nxt++; 1463: cur = nxt; 1464: /* now we can have region or variant */ 1465: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1466: goto region_m49; 1467: 1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1470: nxt++; 1471: 1472: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1473: goto variant; 1474: if (nxt - cur != 2) 1475: return(0); 1476: /* we parsed a region */ 1477: region: 1478: if (nxt[0] == 0) 1479: return(1); 1480: if (nxt[0] != '-') 1481: return(0); 1482: 1483: nxt++; 1484: cur = nxt; 1485: /* now we can just have a variant */ 1486: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1487: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1488: nxt++; 1489: 1490: if ((nxt - cur < 5) || (nxt - cur > 8)) 1491: return(0); 1492: 1493: /* we parsed a variant */ 1494: variant: 1495: if (nxt[0] == 0) 1496: return(1); 1497: if (nxt[0] != '-') 1498: return(0); 1499: /* extensions and private use subtags not checked */ 1500: return (1); 1501: 1502: region_m49: 1503: if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1504: ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1505: nxt += 3; 1506: goto region; 1507: } 1508: return(0); 1509: } 1510: 1511: /************************************************************************ 1512: * * 1513: * Parser stacks related functions and macros * 1514: * * 1515: ************************************************************************/ 1516: 1517: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1518: const xmlChar ** str); 1519: 1520: #ifdef SAX2 1521: /** 1522: * nsPush: 1523: * @ctxt: an XML parser context 1524: * @prefix: the namespace prefix or NULL 1525: * @URL: the namespace name 1526: * 1527: * Pushes a new parser namespace on top of the ns stack 1528: * 1529: * Returns -1 in case of error, -2 if the namespace should be discarded 1530: * and the index in the stack otherwise. 1531: */ 1532: static int 1533: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1534: { 1535: if (ctxt->options & XML_PARSE_NSCLEAN) { 1536: int i; 1537: for (i = 0;i < ctxt->nsNr;i += 2) { 1538: if (ctxt->nsTab[i] == prefix) { 1539: /* in scope */ 1540: if (ctxt->nsTab[i + 1] == URL) 1541: return(-2); 1542: /* out of scope keep it */ 1543: break; 1544: } 1545: } 1546: } 1547: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1548: ctxt->nsMax = 10; 1549: ctxt->nsNr = 0; 1550: ctxt->nsTab = (const xmlChar **) 1551: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1552: if (ctxt->nsTab == NULL) { 1553: xmlErrMemory(ctxt, NULL); 1554: ctxt->nsMax = 0; 1555: return (-1); 1556: } 1557: } else if (ctxt->nsNr >= ctxt->nsMax) { 1558: const xmlChar ** tmp; 1559: ctxt->nsMax *= 2; 1560: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1561: ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1562: if (tmp == NULL) { 1563: xmlErrMemory(ctxt, NULL); 1564: ctxt->nsMax /= 2; 1565: return (-1); 1566: } 1567: ctxt->nsTab = tmp; 1568: } 1569: ctxt->nsTab[ctxt->nsNr++] = prefix; 1570: ctxt->nsTab[ctxt->nsNr++] = URL; 1571: return (ctxt->nsNr); 1572: } 1573: /** 1574: * nsPop: 1575: * @ctxt: an XML parser context 1576: * @nr: the number to pop 1577: * 1578: * Pops the top @nr parser prefix/namespace from the ns stack 1579: * 1580: * Returns the number of namespaces removed 1581: */ 1582: static int 1583: nsPop(xmlParserCtxtPtr ctxt, int nr) 1584: { 1585: int i; 1586: 1587: if (ctxt->nsTab == NULL) return(0); 1588: if (ctxt->nsNr < nr) { 1589: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1590: nr = ctxt->nsNr; 1591: } 1592: if (ctxt->nsNr <= 0) 1593: return (0); 1594: 1595: for (i = 0;i < nr;i++) { 1596: ctxt->nsNr--; 1597: ctxt->nsTab[ctxt->nsNr] = NULL; 1598: } 1599: return(nr); 1600: } 1601: #endif 1602: 1603: static int 1604: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1605: const xmlChar **atts; 1606: int *attallocs; 1607: int maxatts; 1608: 1609: if (ctxt->atts == NULL) { 1610: maxatts = 55; /* allow for 10 attrs by default */ 1611: atts = (const xmlChar **) 1612: xmlMalloc(maxatts * sizeof(xmlChar *)); 1613: if (atts == NULL) goto mem_error; 1614: ctxt->atts = atts; 1615: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1616: if (attallocs == NULL) goto mem_error; 1617: ctxt->attallocs = attallocs; 1618: ctxt->maxatts = maxatts; 1619: } else if (nr + 5 > ctxt->maxatts) { 1620: maxatts = (nr + 5) * 2; 1621: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1622: maxatts * sizeof(const xmlChar *)); 1623: if (atts == NULL) goto mem_error; 1624: ctxt->atts = atts; 1625: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1626: (maxatts / 5) * sizeof(int)); 1627: if (attallocs == NULL) goto mem_error; 1628: ctxt->attallocs = attallocs; 1629: ctxt->maxatts = maxatts; 1630: } 1631: return(ctxt->maxatts); 1632: mem_error: 1633: xmlErrMemory(ctxt, NULL); 1634: return(-1); 1635: } 1636: 1637: /** 1638: * inputPush: 1639: * @ctxt: an XML parser context 1640: * @value: the parser input 1641: * 1642: * Pushes a new parser input on top of the input stack 1643: * 1644: * Returns -1 in case of error, the index in the stack otherwise 1645: */ 1646: int 1647: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1648: { 1649: if ((ctxt == NULL) || (value == NULL)) 1650: return(-1); 1651: if (ctxt->inputNr >= ctxt->inputMax) { 1652: ctxt->inputMax *= 2; 1653: ctxt->inputTab = 1654: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1655: ctxt->inputMax * 1656: sizeof(ctxt->inputTab[0])); 1657: if (ctxt->inputTab == NULL) { 1658: xmlErrMemory(ctxt, NULL); 1659: xmlFreeInputStream(value); 1660: ctxt->inputMax /= 2; 1661: value = NULL; 1662: return (-1); 1663: } 1664: } 1665: ctxt->inputTab[ctxt->inputNr] = value; 1666: ctxt->input = value; 1667: return (ctxt->inputNr++); 1668: } 1669: /** 1670: * inputPop: 1671: * @ctxt: an XML parser context 1672: * 1673: * Pops the top parser input from the input stack 1674: * 1675: * Returns the input just removed 1676: */ 1677: xmlParserInputPtr 1678: inputPop(xmlParserCtxtPtr ctxt) 1679: { 1680: xmlParserInputPtr ret; 1681: 1682: if (ctxt == NULL) 1683: return(NULL); 1684: if (ctxt->inputNr <= 0) 1685: return (NULL); 1686: ctxt->inputNr--; 1687: if (ctxt->inputNr > 0) 1688: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1689: else 1690: ctxt->input = NULL; 1691: ret = ctxt->inputTab[ctxt->inputNr]; 1692: ctxt->inputTab[ctxt->inputNr] = NULL; 1693: return (ret); 1694: } 1695: /** 1696: * nodePush: 1697: * @ctxt: an XML parser context 1698: * @value: the element node 1699: * 1700: * Pushes a new element node on top of the node stack 1701: * 1702: * Returns -1 in case of error, the index in the stack otherwise 1703: */ 1704: int 1705: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1706: { 1707: if (ctxt == NULL) return(0); 1708: if (ctxt->nodeNr >= ctxt->nodeMax) { 1709: xmlNodePtr *tmp; 1710: 1711: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1712: ctxt->nodeMax * 2 * 1713: sizeof(ctxt->nodeTab[0])); 1714: if (tmp == NULL) { 1715: xmlErrMemory(ctxt, NULL); 1716: return (-1); 1717: } 1718: ctxt->nodeTab = tmp; 1719: ctxt->nodeMax *= 2; 1720: } 1721: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1722: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1723: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1724: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1725: xmlParserMaxDepth); 1726: ctxt->instate = XML_PARSER_EOF; 1727: return(-1); 1728: } 1729: ctxt->nodeTab[ctxt->nodeNr] = value; 1730: ctxt->node = value; 1731: return (ctxt->nodeNr++); 1732: } 1733: 1734: /** 1735: * nodePop: 1736: * @ctxt: an XML parser context 1737: * 1738: * Pops the top element node from the node stack 1739: * 1740: * Returns the node just removed 1741: */ 1742: xmlNodePtr 1743: nodePop(xmlParserCtxtPtr ctxt) 1744: { 1745: xmlNodePtr ret; 1746: 1747: if (ctxt == NULL) return(NULL); 1748: if (ctxt->nodeNr <= 0) 1749: return (NULL); 1750: ctxt->nodeNr--; 1751: if (ctxt->nodeNr > 0) 1752: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1753: else 1754: ctxt->node = NULL; 1755: ret = ctxt->nodeTab[ctxt->nodeNr]; 1756: ctxt->nodeTab[ctxt->nodeNr] = NULL; 1757: return (ret); 1758: } 1759: 1760: #ifdef LIBXML_PUSH_ENABLED 1761: /** 1762: * nameNsPush: 1763: * @ctxt: an XML parser context 1764: * @value: the element name 1765: * @prefix: the element prefix 1766: * @URI: the element namespace name 1767: * 1768: * Pushes a new element name/prefix/URL on top of the name stack 1769: * 1770: * Returns -1 in case of error, the index in the stack otherwise 1771: */ 1772: static int 1773: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1774: const xmlChar *prefix, const xmlChar *URI, int nsNr) 1775: { 1776: if (ctxt->nameNr >= ctxt->nameMax) { 1777: const xmlChar * *tmp; 1778: void **tmp2; 1779: ctxt->nameMax *= 2; 1780: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1781: ctxt->nameMax * 1782: sizeof(ctxt->nameTab[0])); 1783: if (tmp == NULL) { 1784: ctxt->nameMax /= 2; 1785: goto mem_error; 1786: } 1787: ctxt->nameTab = tmp; 1788: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1789: ctxt->nameMax * 3 * 1790: sizeof(ctxt->pushTab[0])); 1791: if (tmp2 == NULL) { 1792: ctxt->nameMax /= 2; 1793: goto mem_error; 1794: } 1795: ctxt->pushTab = tmp2; 1796: } 1797: ctxt->nameTab[ctxt->nameNr] = value; 1798: ctxt->name = value; 1799: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1800: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1801: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1802: return (ctxt->nameNr++); 1803: mem_error: 1804: xmlErrMemory(ctxt, NULL); 1805: return (-1); 1806: } 1807: /** 1808: * nameNsPop: 1809: * @ctxt: an XML parser context 1810: * 1811: * Pops the top element/prefix/URI name from the name stack 1812: * 1813: * Returns the name just removed 1814: */ 1815: static const xmlChar * 1816: nameNsPop(xmlParserCtxtPtr ctxt) 1817: { 1818: const xmlChar *ret; 1819: 1820: if (ctxt->nameNr <= 0) 1821: return (NULL); 1822: ctxt->nameNr--; 1823: if (ctxt->nameNr > 0) 1824: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1825: else 1826: ctxt->name = NULL; 1827: ret = ctxt->nameTab[ctxt->nameNr]; 1828: ctxt->nameTab[ctxt->nameNr] = NULL; 1829: return (ret); 1830: } 1831: #endif /* LIBXML_PUSH_ENABLED */ 1832: 1833: /** 1834: * namePush: 1835: * @ctxt: an XML parser context 1836: * @value: the element name 1837: * 1838: * Pushes a new element name on top of the name stack 1839: * 1840: * Returns -1 in case of error, the index in the stack otherwise 1841: */ 1842: int 1843: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1844: { 1845: if (ctxt == NULL) return (-1); 1846: 1847: if (ctxt->nameNr >= ctxt->nameMax) { 1848: const xmlChar * *tmp; 1849: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1850: ctxt->nameMax * 2 * 1851: sizeof(ctxt->nameTab[0])); 1852: if (tmp == NULL) { 1853: goto mem_error; 1854: } 1855: ctxt->nameTab = tmp; 1856: ctxt->nameMax *= 2; 1857: } 1858: ctxt->nameTab[ctxt->nameNr] = value; 1859: ctxt->name = value; 1860: return (ctxt->nameNr++); 1861: mem_error: 1862: xmlErrMemory(ctxt, NULL); 1863: return (-1); 1864: } 1865: /** 1866: * namePop: 1867: * @ctxt: an XML parser context 1868: * 1869: * Pops the top element name from the name stack 1870: * 1871: * Returns the name just removed 1872: */ 1873: const xmlChar * 1874: namePop(xmlParserCtxtPtr ctxt) 1875: { 1876: const xmlChar *ret; 1877: 1878: if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1879: return (NULL); 1880: ctxt->nameNr--; 1881: if (ctxt->nameNr > 0) 1882: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1883: else 1884: ctxt->name = NULL; 1885: ret = ctxt->nameTab[ctxt->nameNr]; 1886: ctxt->nameTab[ctxt->nameNr] = NULL; 1887: return (ret); 1888: } 1889: 1890: static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1891: if (ctxt->spaceNr >= ctxt->spaceMax) { 1892: int *tmp; 1893: 1894: ctxt->spaceMax *= 2; 1895: tmp = (int *) xmlRealloc(ctxt->spaceTab, 1896: ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1897: if (tmp == NULL) { 1898: xmlErrMemory(ctxt, NULL); 1899: ctxt->spaceMax /=2; 1900: return(-1); 1901: } 1902: ctxt->spaceTab = tmp; 1903: } 1904: ctxt->spaceTab[ctxt->spaceNr] = val; 1905: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1906: return(ctxt->spaceNr++); 1907: } 1908: 1909: static int spacePop(xmlParserCtxtPtr ctxt) { 1910: int ret; 1911: if (ctxt->spaceNr <= 0) return(0); 1912: ctxt->spaceNr--; 1913: if (ctxt->spaceNr > 0) 1914: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1915: else 1916: ctxt->space = &ctxt->spaceTab[0]; 1917: ret = ctxt->spaceTab[ctxt->spaceNr]; 1918: ctxt->spaceTab[ctxt->spaceNr] = -1; 1919: return(ret); 1920: } 1921: 1922: /* 1923: * Macros for accessing the content. Those should be used only by the parser, 1924: * and not exported. 1925: * 1926: * Dirty macros, i.e. one often need to make assumption on the context to 1927: * use them 1928: * 1929: * CUR_PTR return the current pointer to the xmlChar to be parsed. 1930: * To be used with extreme caution since operations consuming 1931: * characters may move the input buffer to a different location ! 1932: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1933: * This should be used internally by the parser 1934: * only to compare to ASCII values otherwise it would break when 1935: * running with UTF-8 encoding. 1936: * RAW same as CUR but in the input buffer, bypass any token 1937: * extraction that may have been done 1938: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1939: * to compare on ASCII based substring. 1940: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1941: * strings without newlines within the parser. 1942: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1943: * defined char within the parser. 1944: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1945: * 1946: * NEXT Skip to the next character, this does the proper decoding 1947: * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1948: * NEXTL(l) Skip the current unicode character of l xmlChars long. 1949: * CUR_CHAR(l) returns the current unicode character (int), set l 1950: * to the number of xmlChars used for the encoding [0-5]. 1951: * CUR_SCHAR same but operate on a string instead of the context 1952: * COPY_BUF copy the current unicode char to the target buffer, increment 1953: * the index 1954: * GROW, SHRINK handling of input buffers 1955: */ 1956: 1957: #define RAW (*ctxt->input->cur) 1958: #define CUR (*ctxt->input->cur) 1959: #define NXT(val) ctxt->input->cur[(val)] 1960: #define CUR_PTR ctxt->input->cur 1961: 1962: #define CMP4( s, c1, c2, c3, c4 ) \ 1963: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1964: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1965: #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1966: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1967: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1968: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1969: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1970: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1971: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1972: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1973: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1974: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1975: ((unsigned char *) s)[ 8 ] == c9 ) 1976: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1977: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1978: ((unsigned char *) s)[ 9 ] == c10 ) 1979: 1980: #define SKIP(val) do { \ 1981: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1982: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1983: if ((*ctxt->input->cur == 0) && \ 1984: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1985: xmlPopInput(ctxt); \ 1986: } while (0) 1987: 1988: #define SKIPL(val) do { \ 1989: int skipl; \ 1990: for(skipl=0; skipl<val; skipl++) { \ 1991: if (*(ctxt->input->cur) == '\n') { \ 1992: ctxt->input->line++; ctxt->input->col = 1; \ 1993: } else ctxt->input->col++; \ 1994: ctxt->nbChars++; \ 1995: ctxt->input->cur++; \ 1996: } \ 1997: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1998: if ((*ctxt->input->cur == 0) && \ 1999: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2000: xmlPopInput(ctxt); \ 2001: } while (0) 2002: 2003: #define SHRINK if ((ctxt->progressive == 0) && \ 2004: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2005: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2006: xmlSHRINK (ctxt); 2007: 2008: static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2009: xmlParserInputShrink(ctxt->input); 2010: if ((*ctxt->input->cur == 0) && 2011: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2012: xmlPopInput(ctxt); 2013: } 2014: 2015: #define GROW if ((ctxt->progressive == 0) && \ 2016: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2017: xmlGROW (ctxt); 2018: 2019: static void xmlGROW (xmlParserCtxtPtr ctxt) { 2020: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2021: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2022: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2023: xmlPopInput(ctxt); 2024: } 2025: 2026: #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2027: 2028: #define NEXT xmlNextChar(ctxt) 2029: 2030: #define NEXT1 { \ 2031: ctxt->input->col++; \ 2032: ctxt->input->cur++; \ 2033: ctxt->nbChars++; \ 2034: if (*ctxt->input->cur == 0) \ 2035: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2036: } 2037: 2038: #define NEXTL(l) do { \ 2039: if (*(ctxt->input->cur) == '\n') { \ 2040: ctxt->input->line++; ctxt->input->col = 1; \ 2041: } else ctxt->input->col++; \ 2042: ctxt->input->cur += l; \ 2043: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2044: } while (0) 2045: 2046: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2047: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2048: 2049: #define COPY_BUF(l,b,i,v) \ 2050: if (l == 1) b[i++] = (xmlChar) v; \ 2051: else i += xmlCopyCharMultiByte(&b[i],v) 2052: 2053: /** 2054: * xmlSkipBlankChars: 2055: * @ctxt: the XML parser context 2056: * 2057: * skip all blanks character found at that point in the input streams. 2058: * It pops up finished entities in the process if allowable at that point. 2059: * 2060: * Returns the number of space chars skipped 2061: */ 2062: 2063: int 2064: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2065: int res = 0; 2066: 2067: /* 2068: * It's Okay to use CUR/NEXT here since all the blanks are on 2069: * the ASCII range. 2070: */ 2071: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2072: const xmlChar *cur; 2073: /* 2074: * if we are in the document content, go really fast 2075: */ 2076: cur = ctxt->input->cur; 2077: while (IS_BLANK_CH(*cur)) { 2078: if (*cur == '\n') { 2079: ctxt->input->line++; ctxt->input->col = 1; 2080: } 2081: cur++; 2082: res++; 2083: if (*cur == 0) { 2084: ctxt->input->cur = cur; 2085: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2086: cur = ctxt->input->cur; 2087: } 2088: } 2089: ctxt->input->cur = cur; 2090: } else { 2091: int cur; 2092: do { 2093: cur = CUR; 2094: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2095: NEXT; 2096: cur = CUR; 2097: res++; 2098: } 2099: while ((cur == 0) && (ctxt->inputNr > 1) && 2100: (ctxt->instate != XML_PARSER_COMMENT)) { 2101: xmlPopInput(ctxt); 2102: cur = CUR; 2103: } 2104: /* 2105: * Need to handle support of entities branching here 2106: */ 2107: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2108: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2109: } 2110: return(res); 2111: } 2112: 2113: /************************************************************************ 2114: * * 2115: * Commodity functions to handle entities * 2116: * * 2117: ************************************************************************/ 2118: 2119: /** 2120: * xmlPopInput: 2121: * @ctxt: an XML parser context 2122: * 2123: * xmlPopInput: the current input pointed by ctxt->input came to an end 2124: * pop it and return the next char. 2125: * 2126: * Returns the current xmlChar in the parser context 2127: */ 2128: xmlChar 2129: xmlPopInput(xmlParserCtxtPtr ctxt) { 2130: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2131: if (xmlParserDebugEntities) 2132: xmlGenericError(xmlGenericErrorContext, 2133: "Popping input %d\n", ctxt->inputNr); 2134: xmlFreeInputStream(inputPop(ctxt)); 2135: if ((*ctxt->input->cur == 0) && 2136: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2137: return(xmlPopInput(ctxt)); 2138: return(CUR); 2139: } 2140: 2141: /** 2142: * xmlPushInput: 2143: * @ctxt: an XML parser context 2144: * @input: an XML parser input fragment (entity, XML fragment ...). 2145: * 2146: * xmlPushInput: switch to a new input stream which is stacked on top 2147: * of the previous one(s). 2148: * Returns -1 in case of error or the index in the input stack 2149: */ 2150: int 2151: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2152: int ret; 2153: if (input == NULL) return(-1); 2154: 2155: if (xmlParserDebugEntities) { 2156: if ((ctxt->input != NULL) && (ctxt->input->filename)) 2157: xmlGenericError(xmlGenericErrorContext, 2158: "%s(%d): ", ctxt->input->filename, 2159: ctxt->input->line); 2160: xmlGenericError(xmlGenericErrorContext, 2161: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2162: } 2163: ret = inputPush(ctxt, input); 2164: GROW; 2165: return(ret); 2166: } 2167: 2168: /** 2169: * xmlParseCharRef: 2170: * @ctxt: an XML parser context 2171: * 2172: * parse Reference declarations 2173: * 2174: * [66] CharRef ::= '&#' [0-9]+ ';' | 2175: * '&#x' [0-9a-fA-F]+ ';' 2176: * 2177: * [ WFC: Legal Character ] 2178: * Characters referred to using character references must match the 2179: * production for Char. 2180: * 2181: * Returns the value parsed (as an int), 0 in case of error 2182: */ 2183: int 2184: xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2185: unsigned int val = 0; 2186: int count = 0; 2187: unsigned int outofrange = 0; 2188: 2189: /* 2190: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2191: */ 2192: if ((RAW == '&') && (NXT(1) == '#') && 2193: (NXT(2) == 'x')) { 2194: SKIP(3); 2195: GROW; 2196: while (RAW != ';') { /* loop blocked by count */ 2197: if (count++ > 20) { 2198: count = 0; 2199: GROW; 2200: } 2201: if ((RAW >= '0') && (RAW <= '9')) 2202: val = val * 16 + (CUR - '0'); 2203: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2204: val = val * 16 + (CUR - 'a') + 10; 2205: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2206: val = val * 16 + (CUR - 'A') + 10; 2207: else { 2208: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2209: val = 0; 2210: break; 2211: } 2212: if (val > 0x10FFFF) 2213: outofrange = val; 2214: 2215: NEXT; 2216: count++; 2217: } 2218: if (RAW == ';') { 2219: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2220: ctxt->input->col++; 2221: ctxt->nbChars ++; 2222: ctxt->input->cur++; 2223: } 2224: } else if ((RAW == '&') && (NXT(1) == '#')) { 2225: SKIP(2); 2226: GROW; 2227: while (RAW != ';') { /* loop blocked by count */ 2228: if (count++ > 20) { 2229: count = 0; 2230: GROW; 2231: } 2232: if ((RAW >= '0') && (RAW <= '9')) 2233: val = val * 10 + (CUR - '0'); 2234: else { 2235: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2236: val = 0; 2237: break; 2238: } 2239: if (val > 0x10FFFF) 2240: outofrange = val; 2241: 2242: NEXT; 2243: count++; 2244: } 2245: if (RAW == ';') { 2246: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2247: ctxt->input->col++; 2248: ctxt->nbChars ++; 2249: ctxt->input->cur++; 2250: } 2251: } else { 2252: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2253: } 2254: 2255: /* 2256: * [ WFC: Legal Character ] 2257: * Characters referred to using character references must match the 2258: * production for Char. 2259: */ 2260: if ((IS_CHAR(val) && (outofrange == 0))) { 2261: return(val); 2262: } else { 2263: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2264: "xmlParseCharRef: invalid xmlChar value %d\n", 2265: val); 2266: } 2267: return(0); 2268: } 2269: 2270: /** 2271: * xmlParseStringCharRef: 2272: * @ctxt: an XML parser context 2273: * @str: a pointer to an index in the string 2274: * 2275: * parse Reference declarations, variant parsing from a string rather 2276: * than an an input flow. 2277: * 2278: * [66] CharRef ::= '&#' [0-9]+ ';' | 2279: * '&#x' [0-9a-fA-F]+ ';' 2280: * 2281: * [ WFC: Legal Character ] 2282: * Characters referred to using character references must match the 2283: * production for Char. 2284: * 2285: * Returns the value parsed (as an int), 0 in case of error, str will be 2286: * updated to the current value of the index 2287: */ 2288: static int 2289: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2290: const xmlChar *ptr; 2291: xmlChar cur; 2292: unsigned int val = 0; 2293: unsigned int outofrange = 0; 2294: 2295: if ((str == NULL) || (*str == NULL)) return(0); 2296: ptr = *str; 2297: cur = *ptr; 2298: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2299: ptr += 3; 2300: cur = *ptr; 2301: while (cur != ';') { /* Non input consuming loop */ 2302: if ((cur >= '0') && (cur <= '9')) 2303: val = val * 16 + (cur - '0'); 2304: else if ((cur >= 'a') && (cur <= 'f')) 2305: val = val * 16 + (cur - 'a') + 10; 2306: else if ((cur >= 'A') && (cur <= 'F')) 2307: val = val * 16 + (cur - 'A') + 10; 2308: else { 2309: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2310: val = 0; 2311: break; 2312: } 2313: if (val > 0x10FFFF) 2314: outofrange = val; 2315: 2316: ptr++; 2317: cur = *ptr; 2318: } 2319: if (cur == ';') 2320: ptr++; 2321: } else if ((cur == '&') && (ptr[1] == '#')){ 2322: ptr += 2; 2323: cur = *ptr; 2324: while (cur != ';') { /* Non input consuming loops */ 2325: if ((cur >= '0') && (cur <= '9')) 2326: val = val * 10 + (cur - '0'); 2327: else { 2328: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2329: val = 0; 2330: break; 2331: } 2332: if (val > 0x10FFFF) 2333: outofrange = val; 2334: 2335: ptr++; 2336: cur = *ptr; 2337: } 2338: if (cur == ';') 2339: ptr++; 2340: } else { 2341: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2342: return(0); 2343: } 2344: *str = ptr; 2345: 2346: /* 2347: * [ WFC: Legal Character ] 2348: * Characters referred to using character references must match the 2349: * production for Char. 2350: */ 2351: if ((IS_CHAR(val) && (outofrange == 0))) { 2352: return(val); 2353: } else { 2354: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2355: "xmlParseStringCharRef: invalid xmlChar value %d\n", 2356: val); 2357: } 2358: return(0); 2359: } 2360: 2361: /** 2362: * xmlNewBlanksWrapperInputStream: 2363: * @ctxt: an XML parser context 2364: * @entity: an Entity pointer 2365: * 2366: * Create a new input stream for wrapping 2367: * blanks around a PEReference 2368: * 2369: * Returns the new input stream or NULL 2370: */ 2371: 2372: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2373: 2374: static xmlParserInputPtr 2375: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2376: xmlParserInputPtr input; 2377: xmlChar *buffer; 2378: size_t length; 2379: if (entity == NULL) { 2380: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2381: "xmlNewBlanksWrapperInputStream entity\n"); 2382: return(NULL); 2383: } 2384: if (xmlParserDebugEntities) 2385: xmlGenericError(xmlGenericErrorContext, 2386: "new blanks wrapper for entity: %s\n", entity->name); 2387: input = xmlNewInputStream(ctxt); 2388: if (input == NULL) { 2389: return(NULL); 2390: } 2391: length = xmlStrlen(entity->name) + 5; 2392: buffer = xmlMallocAtomic(length); 2393: if (buffer == NULL) { 2394: xmlErrMemory(ctxt, NULL); 2395: xmlFree(input); 2396: return(NULL); 2397: } 2398: buffer [0] = ' '; 2399: buffer [1] = '%'; 2400: buffer [length-3] = ';'; 2401: buffer [length-2] = ' '; 2402: buffer [length-1] = 0; 2403: memcpy(buffer + 2, entity->name, length - 5); 2404: input->free = deallocblankswrapper; 2405: input->base = buffer; 2406: input->cur = buffer; 2407: input->length = length; 2408: input->end = &buffer[length]; 2409: return(input); 2410: } 2411: 2412: /** 2413: * xmlParserHandlePEReference: 2414: * @ctxt: the parser context 2415: * 2416: * [69] PEReference ::= '%' Name ';' 2417: * 2418: * [ WFC: No Recursion ] 2419: * A parsed entity must not contain a recursive 2420: * reference to itself, either directly or indirectly. 2421: * 2422: * [ WFC: Entity Declared ] 2423: * In a document without any DTD, a document with only an internal DTD 2424: * subset which contains no parameter entity references, or a document 2425: * with "standalone='yes'", ... ... The declaration of a parameter 2426: * entity must precede any reference to it... 2427: * 2428: * [ VC: Entity Declared ] 2429: * In a document with an external subset or external parameter entities 2430: * with "standalone='no'", ... ... The declaration of a parameter entity 2431: * must precede any reference to it... 2432: * 2433: * [ WFC: In DTD ] 2434: * Parameter-entity references may only appear in the DTD. 2435: * NOTE: misleading but this is handled. 2436: * 2437: * A PEReference may have been detected in the current input stream 2438: * the handling is done accordingly to 2439: * http://www.w3.org/TR/REC-xml#entproc 2440: * i.e. 2441: * - Included in literal in entity values 2442: * - Included as Parameter Entity reference within DTDs 2443: */ 2444: void 2445: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2446: const xmlChar *name; 2447: xmlEntityPtr entity = NULL; 2448: xmlParserInputPtr input; 2449: 2450: if (RAW != '%') return; 2451: switch(ctxt->instate) { 2452: case XML_PARSER_CDATA_SECTION: 2453: return; 2454: case XML_PARSER_COMMENT: 2455: return; 2456: case XML_PARSER_START_TAG: 2457: return; 2458: case XML_PARSER_END_TAG: 2459: return; 2460: case XML_PARSER_EOF: 2461: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2462: return; 2463: case XML_PARSER_PROLOG: 2464: case XML_PARSER_START: 2465: case XML_PARSER_MISC: 2466: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2467: return; 2468: case XML_PARSER_ENTITY_DECL: 2469: case XML_PARSER_CONTENT: 2470: case XML_PARSER_ATTRIBUTE_VALUE: 2471: case XML_PARSER_PI: 2472: case XML_PARSER_SYSTEM_LITERAL: 2473: case XML_PARSER_PUBLIC_LITERAL: 2474: /* we just ignore it there */ 2475: return; 2476: case XML_PARSER_EPILOG: 2477: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2478: return; 2479: case XML_PARSER_ENTITY_VALUE: 2480: /* 2481: * NOTE: in the case of entity values, we don't do the 2482: * substitution here since we need the literal 2483: * entity value to be able to save the internal 2484: * subset of the document. 2485: * This will be handled by xmlStringDecodeEntities 2486: */ 2487: return; 2488: case XML_PARSER_DTD: 2489: /* 2490: * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2491: * In the internal DTD subset, parameter-entity references 2492: * can occur only where markup declarations can occur, not 2493: * within markup declarations. 2494: * In that case this is handled in xmlParseMarkupDecl 2495: */ 2496: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2497: return; 2498: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2499: return; 2500: break; 2501: case XML_PARSER_IGNORE: 2502: return; 2503: } 2504: 2505: NEXT; 2506: name = xmlParseName(ctxt); 2507: if (xmlParserDebugEntities) 2508: xmlGenericError(xmlGenericErrorContext, 2509: "PEReference: %s\n", name); 2510: if (name == NULL) { 2511: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2512: } else { 2513: if (RAW == ';') { 2514: NEXT; 2515: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2516: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2517: if (entity == NULL) { 2518: 2519: /* 2520: * [ WFC: Entity Declared ] 2521: * In a document without any DTD, a document with only an 2522: * internal DTD subset which contains no parameter entity 2523: * references, or a document with "standalone='yes'", ... 2524: * ... The declaration of a parameter entity must precede 2525: * any reference to it... 2526: */ 2527: if ((ctxt->standalone == 1) || 2528: ((ctxt->hasExternalSubset == 0) && 2529: (ctxt->hasPErefs == 0))) { 2530: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2531: "PEReference: %%%s; not found\n", name); 2532: } else { 2533: /* 2534: * [ VC: Entity Declared ] 2535: * In a document with an external subset or external 2536: * parameter entities with "standalone='no'", ... 2537: * ... The declaration of a parameter entity must precede 2538: * any reference to it... 2539: */ 2540: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2541: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2542: "PEReference: %%%s; not found\n", 2543: name, NULL); 2544: } else 2545: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2546: "PEReference: %%%s; not found\n", 2547: name, NULL); 2548: ctxt->valid = 0; 2549: } 2550: } else if (ctxt->input->free != deallocblankswrapper) { 2551: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2552: if (xmlPushInput(ctxt, input) < 0) 2553: return; 2554: } else { 2555: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2556: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2557: xmlChar start[4]; 2558: xmlCharEncoding enc; 2559: 2560: /* 2561: * handle the extra spaces added before and after 2562: * c.f. http://www.w3.org/TR/REC-xml#as-PE 2563: * this is done independently. 2564: */ 2565: input = xmlNewEntityInputStream(ctxt, entity); 2566: if (xmlPushInput(ctxt, input) < 0) 2567: return; 2568: 2569: /* 2570: * Get the 4 first bytes and decode the charset 2571: * if enc != XML_CHAR_ENCODING_NONE 2572: * plug some encoding conversion routines. 2573: * Note that, since we may have some non-UTF8 2574: * encoding (like UTF16, bug 135229), the 'length' 2575: * is not known, but we can calculate based upon 2576: * the amount of data in the buffer. 2577: */ 2578: GROW 2579: if ((ctxt->input->end - ctxt->input->cur)>=4) { 2580: start[0] = RAW; 2581: start[1] = NXT(1); 2582: start[2] = NXT(2); 2583: start[3] = NXT(3); 2584: enc = xmlDetectCharEncoding(start, 4); 2585: if (enc != XML_CHAR_ENCODING_NONE) { 2586: xmlSwitchEncoding(ctxt, enc); 2587: } 2588: } 2589: 2590: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2591: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2592: (IS_BLANK_CH(NXT(5)))) { 2593: xmlParseTextDecl(ctxt); 2594: } 2595: } else { 2596: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2597: "PEReference: %s is not a parameter entity\n", 2598: name); 2599: } 2600: } 2601: } else { 2602: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2603: } 2604: } 2605: } 2606: 2607: /* 2608: * Macro used to grow the current buffer. 2609: * buffer##_size is expected to be a size_t 2610: * mem_error: is expected to handle memory allocation failures 2611: */ 2612: #define growBuffer(buffer, n) { \ 2613: xmlChar *tmp; \ 2614: size_t new_size = buffer##_size * 2 + n; \ 2615: if (new_size < buffer##_size) goto mem_error; \ 2616: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2617: if (tmp == NULL) goto mem_error; \ 2618: buffer = tmp; \ 2619: buffer##_size = new_size; \ 2620: } 2621: 2622: /** 2623: * xmlStringLenDecodeEntities: 2624: * @ctxt: the parser context 2625: * @str: the input string 2626: * @len: the string length 2627: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2628: * @end: an end marker xmlChar, 0 if none 2629: * @end2: an end marker xmlChar, 0 if none 2630: * @end3: an end marker xmlChar, 0 if none 2631: * 2632: * Takes a entity string content and process to do the adequate substitutions. 2633: * 2634: * [67] Reference ::= EntityRef | CharRef 2635: * 2636: * [69] PEReference ::= '%' Name ';' 2637: * 2638: * Returns A newly allocated string with the substitution done. The caller 2639: * must deallocate it ! 2640: */ 2641: xmlChar * 2642: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2643: int what, xmlChar end, xmlChar end2, xmlChar end3) { 2644: xmlChar *buffer = NULL; 2645: size_t buffer_size = 0; 2646: size_t nbchars = 0; 2647: 2648: xmlChar *current = NULL; 2649: xmlChar *rep = NULL; 2650: const xmlChar *last; 2651: xmlEntityPtr ent; 2652: int c,l; 2653: 2654: if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2655: return(NULL); 2656: last = str + len; 2657: 2658: if (((ctxt->depth > 40) && 2659: ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2660: (ctxt->depth > 1024)) { 2661: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2662: return(NULL); 2663: } 2664: 2665: /* 2666: * allocate a translation buffer. 2667: */ 2668: buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2669: buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2670: if (buffer == NULL) goto mem_error; 2671: 2672: /* 2673: * OK loop until we reach one of the ending char or a size limit. 2674: * we are operating on already parsed values. 2675: */ 2676: if (str < last) 2677: c = CUR_SCHAR(str, l); 2678: else 2679: c = 0; 2680: while ((c != 0) && (c != end) && /* non input consuming loop */ 2681: (c != end2) && (c != end3)) { 2682: 2683: if (c == 0) break; 2684: if ((c == '&') && (str[1] == '#')) { 2685: int val = xmlParseStringCharRef(ctxt, &str); 2686: if (val != 0) { 2687: COPY_BUF(0,buffer,nbchars,val); 2688: } 2689: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2690: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2691: } 2692: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2693: if (xmlParserDebugEntities) 2694: xmlGenericError(xmlGenericErrorContext, 2695: "String decoding Entity Reference: %.30s\n", 2696: str); 2697: ent = xmlParseStringEntityRef(ctxt, &str); 2698: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2699: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2700: goto int_error; 2701: if (ent != NULL) 2702: ctxt->nbentities += ent->checked; 2703: if ((ent != NULL) && 2704: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2705: if (ent->content != NULL) { 2706: COPY_BUF(0,buffer,nbchars,ent->content[0]); 2707: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2708: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2709: } 2710: } else { 2711: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2712: "predefined entity has no content\n"); 2713: } 2714: } else if ((ent != NULL) && (ent->content != NULL)) { 2715: ctxt->depth++; 2716: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2717: 0, 0, 0); 2718: ctxt->depth--; 2719: 2720: if (rep != NULL) { 2721: current = rep; 2722: while (*current != 0) { /* non input consuming loop */ 2723: buffer[nbchars++] = *current++; 2724: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2725: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2726: goto int_error; 2727: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2728: } 2729: } 2730: xmlFree(rep); 2731: rep = NULL; 2732: } 2733: } else if (ent != NULL) { 2734: int i = xmlStrlen(ent->name); 2735: const xmlChar *cur = ent->name; 2736: 2737: buffer[nbchars++] = '&'; 2738: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2739: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2740: } 2741: for (;i > 0;i--) 2742: buffer[nbchars++] = *cur++; 2743: buffer[nbchars++] = ';'; 2744: } 2745: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2746: if (xmlParserDebugEntities) 2747: xmlGenericError(xmlGenericErrorContext, 2748: "String decoding PE Reference: %.30s\n", str); 2749: ent = xmlParseStringPEReference(ctxt, &str); 2750: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2751: goto int_error; 2752: if (ent != NULL) 2753: ctxt->nbentities += ent->checked; 2754: if (ent != NULL) { 2755: if (ent->content == NULL) { 2756: xmlLoadEntityContent(ctxt, ent); 2757: } 2758: ctxt->depth++; 2759: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2760: 0, 0, 0); 2761: ctxt->depth--; 2762: if (rep != NULL) { 2763: current = rep; 2764: while (*current != 0) { /* non input consuming loop */ 2765: buffer[nbchars++] = *current++; 2766: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2767: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2768: goto int_error; 2769: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2770: } 2771: } 2772: xmlFree(rep); 2773: rep = NULL; 2774: } 2775: } 2776: } else { 2777: COPY_BUF(l,buffer,nbchars,c); 2778: str += l; 2779: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2780: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2781: } 2782: } 2783: if (str < last) 2784: c = CUR_SCHAR(str, l); 2785: else 2786: c = 0; 2787: } 2788: buffer[nbchars] = 0; 2789: return(buffer); 2790: 2791: mem_error: 2792: xmlErrMemory(ctxt, NULL); 2793: int_error: 2794: if (rep != NULL) 2795: xmlFree(rep); 2796: if (buffer != NULL) 2797: xmlFree(buffer); 2798: return(NULL); 2799: } 2800: 2801: /** 2802: * xmlStringDecodeEntities: 2803: * @ctxt: the parser context 2804: * @str: the input string 2805: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2806: * @end: an end marker xmlChar, 0 if none 2807: * @end2: an end marker xmlChar, 0 if none 2808: * @end3: an end marker xmlChar, 0 if none 2809: * 2810: * Takes a entity string content and process to do the adequate substitutions. 2811: * 2812: * [67] Reference ::= EntityRef | CharRef 2813: * 2814: * [69] PEReference ::= '%' Name ';' 2815: * 2816: * Returns A newly allocated string with the substitution done. The caller 2817: * must deallocate it ! 2818: */ 2819: xmlChar * 2820: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2821: xmlChar end, xmlChar end2, xmlChar end3) { 2822: if ((ctxt == NULL) || (str == NULL)) return(NULL); 2823: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2824: end, end2, end3)); 2825: } 2826: 2827: /************************************************************************ 2828: * * 2829: * Commodity functions, cleanup needed ? * 2830: * * 2831: ************************************************************************/ 2832: 2833: /** 2834: * areBlanks: 2835: * @ctxt: an XML parser context 2836: * @str: a xmlChar * 2837: * @len: the size of @str 2838: * @blank_chars: we know the chars are blanks 2839: * 2840: * Is this a sequence of blank chars that one can ignore ? 2841: * 2842: * Returns 1 if ignorable 0 otherwise. 2843: */ 2844: 2845: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2846: int blank_chars) { 2847: int i, ret; 2848: xmlNodePtr lastChild; 2849: 2850: /* 2851: * Don't spend time trying to differentiate them, the same callback is 2852: * used ! 2853: */ 2854: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2855: return(0); 2856: 2857: /* 2858: * Check for xml:space value. 2859: */ 2860: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2861: (*(ctxt->space) == -2)) 2862: return(0); 2863: 2864: /* 2865: * Check that the string is made of blanks 2866: */ 2867: if (blank_chars == 0) { 2868: for (i = 0;i < len;i++) 2869: if (!(IS_BLANK_CH(str[i]))) return(0); 2870: } 2871: 2872: /* 2873: * Look if the element is mixed content in the DTD if available 2874: */ 2875: if (ctxt->node == NULL) return(0); 2876: if (ctxt->myDoc != NULL) { 2877: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2878: if (ret == 0) return(1); 2879: if (ret == 1) return(0); 2880: } 2881: 2882: /* 2883: * Otherwise, heuristic :-\ 2884: */ 2885: if ((RAW != '<') && (RAW != 0xD)) return(0); 2886: if ((ctxt->node->children == NULL) && 2887: (RAW == '<') && (NXT(1) == '/')) return(0); 2888: 2889: lastChild = xmlGetLastChild(ctxt->node); 2890: if (lastChild == NULL) { 2891: if ((ctxt->node->type != XML_ELEMENT_NODE) && 2892: (ctxt->node->content != NULL)) return(0); 2893: } else if (xmlNodeIsText(lastChild)) 2894: return(0); 2895: else if ((ctxt->node->children != NULL) && 2896: (xmlNodeIsText(ctxt->node->children))) 2897: return(0); 2898: return(1); 2899: } 2900: 2901: /************************************************************************ 2902: * * 2903: * Extra stuff for namespace support * 2904: * Relates to http://www.w3.org/TR/WD-xml-names * 2905: * * 2906: ************************************************************************/ 2907: 2908: /** 2909: * xmlSplitQName: 2910: * @ctxt: an XML parser context 2911: * @name: an XML parser context 2912: * @prefix: a xmlChar ** 2913: * 2914: * parse an UTF8 encoded XML qualified name string 2915: * 2916: * [NS 5] QName ::= (Prefix ':')? LocalPart 2917: * 2918: * [NS 6] Prefix ::= NCName 2919: * 2920: * [NS 7] LocalPart ::= NCName 2921: * 2922: * Returns the local part, and prefix is updated 2923: * to get the Prefix if any. 2924: */ 2925: 2926: xmlChar * 2927: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2928: xmlChar buf[XML_MAX_NAMELEN + 5]; 2929: xmlChar *buffer = NULL; 2930: int len = 0; 2931: int max = XML_MAX_NAMELEN; 2932: xmlChar *ret = NULL; 2933: const xmlChar *cur = name; 2934: int c; 2935: 2936: if (prefix == NULL) return(NULL); 2937: *prefix = NULL; 2938: 2939: if (cur == NULL) return(NULL); 2940: 2941: #ifndef XML_XML_NAMESPACE 2942: /* xml: prefix is not really a namespace */ 2943: if ((cur[0] == 'x') && (cur[1] == 'm') && 2944: (cur[2] == 'l') && (cur[3] == ':')) 2945: return(xmlStrdup(name)); 2946: #endif 2947: 2948: /* nasty but well=formed */ 2949: if (cur[0] == ':') 2950: return(xmlStrdup(name)); 2951: 2952: c = *cur++; 2953: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2954: buf[len++] = c; 2955: c = *cur++; 2956: } 2957: if (len >= max) { 2958: /* 2959: * Okay someone managed to make a huge name, so he's ready to pay 2960: * for the processing speed. 2961: */ 2962: max = len * 2; 2963: 2964: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2965: if (buffer == NULL) { 2966: xmlErrMemory(ctxt, NULL); 2967: return(NULL); 2968: } 2969: memcpy(buffer, buf, len); 2970: while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2971: if (len + 10 > max) { 2972: xmlChar *tmp; 2973: 2974: max *= 2; 2975: tmp = (xmlChar *) xmlRealloc(buffer, 2976: max * sizeof(xmlChar)); 2977: if (tmp == NULL) { 2978: xmlFree(buffer); 2979: xmlErrMemory(ctxt, NULL); 2980: return(NULL); 2981: } 2982: buffer = tmp; 2983: } 2984: buffer[len++] = c; 2985: c = *cur++; 2986: } 2987: buffer[len] = 0; 2988: } 2989: 2990: if ((c == ':') && (*cur == 0)) { 2991: if (buffer != NULL) 2992: xmlFree(buffer); 2993: *prefix = NULL; 2994: return(xmlStrdup(name)); 2995: } 2996: 2997: if (buffer == NULL) 2998: ret = xmlStrndup(buf, len); 2999: else { 3000: ret = buffer; 3001: buffer = NULL; 3002: max = XML_MAX_NAMELEN; 3003: } 3004: 3005: 3006: if (c == ':') { 3007: c = *cur; 3008: *prefix = ret; 3009: if (c == 0) { 3010: return(xmlStrndup(BAD_CAST "", 0)); 3011: } 3012: len = 0; 3013: 3014: /* 3015: * Check that the first character is proper to start 3016: * a new name 3017: */ 3018: if (!(((c >= 0x61) && (c <= 0x7A)) || 3019: ((c >= 0x41) && (c <= 0x5A)) || 3020: (c == '_') || (c == ':'))) { 3021: int l; 3022: int first = CUR_SCHAR(cur, l); 3023: 3024: if (!IS_LETTER(first) && (first != '_')) { 3025: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3026: "Name %s is not XML Namespace compliant\n", 3027: name); 3028: } 3029: } 3030: cur++; 3031: 3032: while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3033: buf[len++] = c; 3034: c = *cur++; 3035: } 3036: if (len >= max) { 3037: /* 3038: * Okay someone managed to make a huge name, so he's ready to pay 3039: * for the processing speed. 3040: */ 3041: max = len * 2; 3042: 3043: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3044: if (buffer == NULL) { 3045: xmlErrMemory(ctxt, NULL); 3046: return(NULL); 3047: } 3048: memcpy(buffer, buf, len); 3049: while (c != 0) { /* tested bigname2.xml */ 3050: if (len + 10 > max) { 3051: xmlChar *tmp; 3052: 3053: max *= 2; 3054: tmp = (xmlChar *) xmlRealloc(buffer, 3055: max * sizeof(xmlChar)); 3056: if (tmp == NULL) { 3057: xmlErrMemory(ctxt, NULL); 3058: xmlFree(buffer); 3059: return(NULL); 3060: } 3061: buffer = tmp; 3062: } 3063: buffer[len++] = c; 3064: c = *cur++; 3065: } 3066: buffer[len] = 0; 3067: } 3068: 3069: if (buffer == NULL) 3070: ret = xmlStrndup(buf, len); 3071: else { 3072: ret = buffer; 3073: } 3074: } 3075: 3076: return(ret); 3077: } 3078: 3079: /************************************************************************ 3080: * * 3081: * The parser itself * 3082: * Relates to http://www.w3.org/TR/REC-xml * 3083: * * 3084: ************************************************************************/ 3085: 3086: /************************************************************************ 3087: * * 3088: * Routines to parse Name, NCName and NmToken * 3089: * * 3090: ************************************************************************/ 3091: #ifdef DEBUG 3092: static unsigned long nbParseName = 0; 3093: static unsigned long nbParseNmToken = 0; 3094: static unsigned long nbParseNCName = 0; 3095: static unsigned long nbParseNCNameComplex = 0; 3096: static unsigned long nbParseNameComplex = 0; 3097: static unsigned long nbParseStringName = 0; 3098: #endif 3099: 3100: /* 3101: * The two following functions are related to the change of accepted 3102: * characters for Name and NmToken in the Revision 5 of XML-1.0 3103: * They correspond to the modified production [4] and the new production [4a] 3104: * changes in that revision. Also note that the macros used for the 3105: * productions Letter, Digit, CombiningChar and Extender are not needed 3106: * anymore. 3107: * We still keep compatibility to pre-revision5 parsing semantic if the 3108: * new XML_PARSE_OLD10 option is given to the parser. 3109: */ 3110: static int 3111: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3112: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3113: /* 3114: * Use the new checks of production [4] [4a] amd [5] of the 3115: * Update 5 of XML-1.0 3116: */ 3117: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3118: (((c >= 'a') && (c <= 'z')) || 3119: ((c >= 'A') && (c <= 'Z')) || 3120: (c == '_') || (c == ':') || 3121: ((c >= 0xC0) && (c <= 0xD6)) || 3122: ((c >= 0xD8) && (c <= 0xF6)) || 3123: ((c >= 0xF8) && (c <= 0x2FF)) || 3124: ((c >= 0x370) && (c <= 0x37D)) || 3125: ((c >= 0x37F) && (c <= 0x1FFF)) || 3126: ((c >= 0x200C) && (c <= 0x200D)) || 3127: ((c >= 0x2070) && (c <= 0x218F)) || 3128: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3129: ((c >= 0x3001) && (c <= 0xD7FF)) || 3130: ((c >= 0xF900) && (c <= 0xFDCF)) || 3131: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3132: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3133: return(1); 3134: } else { 3135: if (IS_LETTER(c) || (c == '_') || (c == ':')) 3136: return(1); 3137: } 3138: return(0); 3139: } 3140: 3141: static int 3142: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3143: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3144: /* 3145: * Use the new checks of production [4] [4a] amd [5] of the 3146: * Update 5 of XML-1.0 3147: */ 3148: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3149: (((c >= 'a') && (c <= 'z')) || 3150: ((c >= 'A') && (c <= 'Z')) || 3151: ((c >= '0') && (c <= '9')) || /* !start */ 3152: (c == '_') || (c == ':') || 3153: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3154: ((c >= 0xC0) && (c <= 0xD6)) || 3155: ((c >= 0xD8) && (c <= 0xF6)) || 3156: ((c >= 0xF8) && (c <= 0x2FF)) || 3157: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3158: ((c >= 0x370) && (c <= 0x37D)) || 3159: ((c >= 0x37F) && (c <= 0x1FFF)) || 3160: ((c >= 0x200C) && (c <= 0x200D)) || 3161: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3162: ((c >= 0x2070) && (c <= 0x218F)) || 3163: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3164: ((c >= 0x3001) && (c <= 0xD7FF)) || 3165: ((c >= 0xF900) && (c <= 0xFDCF)) || 3166: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3167: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3168: return(1); 3169: } else { 3170: if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3171: (c == '.') || (c == '-') || 3172: (c == '_') || (c == ':') || 3173: (IS_COMBINING(c)) || 3174: (IS_EXTENDER(c))) 3175: return(1); 3176: } 3177: return(0); 3178: } 3179: 3180: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3181: int *len, int *alloc, int normalize); 3182: 3183: static const xmlChar * 3184: xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3185: int len = 0, l; 3186: int c; 3187: int count = 0; 3188: 3189: #ifdef DEBUG 3190: nbParseNameComplex++; 3191: #endif 3192: 3193: /* 3194: * Handler for more complex cases 3195: */ 3196: GROW; 3197: c = CUR_CHAR(l); 3198: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3199: /* 3200: * Use the new checks of production [4] [4a] amd [5] of the 3201: * Update 5 of XML-1.0 3202: */ 3203: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3204: (!(((c >= 'a') && (c <= 'z')) || 3205: ((c >= 'A') && (c <= 'Z')) || 3206: (c == '_') || (c == ':') || 3207: ((c >= 0xC0) && (c <= 0xD6)) || 3208: ((c >= 0xD8) && (c <= 0xF6)) || 3209: ((c >= 0xF8) && (c <= 0x2FF)) || 3210: ((c >= 0x370) && (c <= 0x37D)) || 3211: ((c >= 0x37F) && (c <= 0x1FFF)) || 3212: ((c >= 0x200C) && (c <= 0x200D)) || 3213: ((c >= 0x2070) && (c <= 0x218F)) || 3214: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3215: ((c >= 0x3001) && (c <= 0xD7FF)) || 3216: ((c >= 0xF900) && (c <= 0xFDCF)) || 3217: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3218: ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3219: return(NULL); 3220: } 3221: len += l; 3222: NEXTL(l); 3223: c = CUR_CHAR(l); 3224: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3225: (((c >= 'a') && (c <= 'z')) || 3226: ((c >= 'A') && (c <= 'Z')) || 3227: ((c >= '0') && (c <= '9')) || /* !start */ 3228: (c == '_') || (c == ':') || 3229: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3230: ((c >= 0xC0) && (c <= 0xD6)) || 3231: ((c >= 0xD8) && (c <= 0xF6)) || 3232: ((c >= 0xF8) && (c <= 0x2FF)) || 3233: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3234: ((c >= 0x370) && (c <= 0x37D)) || 3235: ((c >= 0x37F) && (c <= 0x1FFF)) || 3236: ((c >= 0x200C) && (c <= 0x200D)) || 3237: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3238: ((c >= 0x2070) && (c <= 0x218F)) || 3239: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3240: ((c >= 0x3001) && (c <= 0xD7FF)) || 3241: ((c >= 0xF900) && (c <= 0xFDCF)) || 3242: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3243: ((c >= 0x10000) && (c <= 0xEFFFF)) 3244: )) { 3245: if (count++ > 100) { 3246: count = 0; 3247: GROW; 3248: } 3249: len += l; 3250: NEXTL(l); 3251: c = CUR_CHAR(l); 3252: } 3253: } else { 3254: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3255: (!IS_LETTER(c) && (c != '_') && 3256: (c != ':'))) { 3257: return(NULL); 3258: } 3259: len += l; 3260: NEXTL(l); 3261: c = CUR_CHAR(l); 3262: 3263: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3264: ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3265: (c == '.') || (c == '-') || 3266: (c == '_') || (c == ':') || 3267: (IS_COMBINING(c)) || 3268: (IS_EXTENDER(c)))) { 3269: if (count++ > 100) { 3270: count = 0; 3271: GROW; 3272: } 3273: len += l; 3274: NEXTL(l); 3275: c = CUR_CHAR(l); 3276: } 3277: } 3278: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3279: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3280: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3281: } 3282: 3283: /** 3284: * xmlParseName: 3285: * @ctxt: an XML parser context 3286: * 3287: * parse an XML name. 3288: * 3289: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3290: * CombiningChar | Extender 3291: * 3292: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3293: * 3294: * [6] Names ::= Name (#x20 Name)* 3295: * 3296: * Returns the Name parsed or NULL 3297: */ 3298: 3299: const xmlChar * 3300: xmlParseName(xmlParserCtxtPtr ctxt) { 3301: const xmlChar *in; 3302: const xmlChar *ret; 3303: int count = 0; 3304: 3305: GROW; 3306: 3307: #ifdef DEBUG 3308: nbParseName++; 3309: #endif 3310: 3311: /* 3312: * Accelerator for simple ASCII names 3313: */ 3314: in = ctxt->input->cur; 3315: if (((*in >= 0x61) && (*in <= 0x7A)) || 3316: ((*in >= 0x41) && (*in <= 0x5A)) || 3317: (*in == '_') || (*in == ':')) { 3318: in++; 3319: while (((*in >= 0x61) && (*in <= 0x7A)) || 3320: ((*in >= 0x41) && (*in <= 0x5A)) || 3321: ((*in >= 0x30) && (*in <= 0x39)) || 3322: (*in == '_') || (*in == '-') || 3323: (*in == ':') || (*in == '.')) 3324: in++; 3325: if ((*in > 0) && (*in < 0x80)) { 3326: count = in - ctxt->input->cur; 3327: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3328: ctxt->input->cur = in; 3329: ctxt->nbChars += count; 3330: ctxt->input->col += count; 3331: if (ret == NULL) 3332: xmlErrMemory(ctxt, NULL); 3333: return(ret); 3334: } 3335: } 3336: /* accelerator for special cases */ 3337: return(xmlParseNameComplex(ctxt)); 3338: } 3339: 3340: static const xmlChar * 3341: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3342: int len = 0, l; 3343: int c; 3344: int count = 0; 3345: 3346: #ifdef DEBUG 3347: nbParseNCNameComplex++; 3348: #endif 3349: 3350: /* 3351: * Handler for more complex cases 3352: */ 3353: GROW; 3354: c = CUR_CHAR(l); 3355: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3356: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3357: return(NULL); 3358: } 3359: 3360: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3361: (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3362: if (count++ > 100) { 3363: count = 0; 3364: GROW; 3365: } 3366: len += l; 3367: NEXTL(l); 3368: c = CUR_CHAR(l); 3369: } 3370: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3371: } 3372: 3373: /** 3374: * xmlParseNCName: 3375: * @ctxt: an XML parser context 3376: * @len: lenght of the string parsed 3377: * 3378: * parse an XML name. 3379: * 3380: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3381: * CombiningChar | Extender 3382: * 3383: * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3384: * 3385: * Returns the Name parsed or NULL 3386: */ 3387: 3388: static const xmlChar * 3389: xmlParseNCName(xmlParserCtxtPtr ctxt) { 3390: const xmlChar *in; 3391: const xmlChar *ret; 3392: int count = 0; 3393: 3394: #ifdef DEBUG 3395: nbParseNCName++; 3396: #endif 3397: 3398: /* 3399: * Accelerator for simple ASCII names 3400: */ 3401: in = ctxt->input->cur; 3402: if (((*in >= 0x61) && (*in <= 0x7A)) || 3403: ((*in >= 0x41) && (*in <= 0x5A)) || 3404: (*in == '_')) { 3405: in++; 3406: while (((*in >= 0x61) && (*in <= 0x7A)) || 3407: ((*in >= 0x41) && (*in <= 0x5A)) || 3408: ((*in >= 0x30) && (*in <= 0x39)) || 3409: (*in == '_') || (*in == '-') || 3410: (*in == '.')) 3411: in++; 3412: if ((*in > 0) && (*in < 0x80)) { 3413: count = in - ctxt->input->cur; 3414: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3415: ctxt->input->cur = in; 3416: ctxt->nbChars += count; 3417: ctxt->input->col += count; 3418: if (ret == NULL) { 3419: xmlErrMemory(ctxt, NULL); 3420: } 3421: return(ret); 3422: } 3423: } 3424: return(xmlParseNCNameComplex(ctxt)); 3425: } 3426: 3427: /** 3428: * xmlParseNameAndCompare: 3429: * @ctxt: an XML parser context 3430: * 3431: * parse an XML name and compares for match 3432: * (specialized for endtag parsing) 3433: * 3434: * Returns NULL for an illegal name, (xmlChar*) 1 for success 3435: * and the name for mismatch 3436: */ 3437: 3438: static const xmlChar * 3439: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3440: register const xmlChar *cmp = other; 3441: register const xmlChar *in; 3442: const xmlChar *ret; 3443: 3444: GROW; 3445: 3446: in = ctxt->input->cur; 3447: while (*in != 0 && *in == *cmp) { 3448: ++in; 3449: ++cmp; 3450: ctxt->input->col++; 3451: } 3452: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3453: /* success */ 3454: ctxt->input->cur = in; 3455: return (const xmlChar*) 1; 3456: } 3457: /* failure (or end of input buffer), check with full function */ 3458: ret = xmlParseName (ctxt); 3459: /* strings coming from the dictionnary direct compare possible */ 3460: if (ret == other) { 3461: return (const xmlChar*) 1; 3462: } 3463: return ret; 3464: } 3465: 3466: /** 3467: * xmlParseStringName: 3468: * @ctxt: an XML parser context 3469: * @str: a pointer to the string pointer (IN/OUT) 3470: * 3471: * parse an XML name. 3472: * 3473: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3474: * CombiningChar | Extender 3475: * 3476: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3477: * 3478: * [6] Names ::= Name (#x20 Name)* 3479: * 3480: * Returns the Name parsed or NULL. The @str pointer 3481: * is updated to the current location in the string. 3482: */ 3483: 3484: static xmlChar * 3485: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3486: xmlChar buf[XML_MAX_NAMELEN + 5]; 3487: const xmlChar *cur = *str; 3488: int len = 0, l; 3489: int c; 3490: 3491: #ifdef DEBUG 3492: nbParseStringName++; 3493: #endif 3494: 3495: c = CUR_SCHAR(cur, l); 3496: if (!xmlIsNameStartChar(ctxt, c)) { 3497: return(NULL); 3498: } 3499: 3500: COPY_BUF(l,buf,len,c); 3501: cur += l; 3502: c = CUR_SCHAR(cur, l); 3503: while (xmlIsNameChar(ctxt, c)) { 3504: COPY_BUF(l,buf,len,c); 3505: cur += l; 3506: c = CUR_SCHAR(cur, l); 3507: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3508: /* 3509: * Okay someone managed to make a huge name, so he's ready to pay 3510: * for the processing speed. 3511: */ 3512: xmlChar *buffer; 3513: int max = len * 2; 3514: 3515: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3516: if (buffer == NULL) { 3517: xmlErrMemory(ctxt, NULL); 3518: return(NULL); 3519: } 3520: memcpy(buffer, buf, len); 3521: while (xmlIsNameChar(ctxt, c)) { 3522: if (len + 10 > max) { 3523: xmlChar *tmp; 3524: max *= 2; 3525: tmp = (xmlChar *) xmlRealloc(buffer, 3526: max * sizeof(xmlChar)); 3527: if (tmp == NULL) { 3528: xmlErrMemory(ctxt, NULL); 3529: xmlFree(buffer); 3530: return(NULL); 3531: } 3532: buffer = tmp; 3533: } 3534: COPY_BUF(l,buffer,len,c); 3535: cur += l; 3536: c = CUR_SCHAR(cur, l); 3537: } 3538: buffer[len] = 0; 3539: *str = cur; 3540: return(buffer); 3541: } 3542: } 3543: *str = cur; 3544: return(xmlStrndup(buf, len)); 3545: } 3546: 3547: /** 3548: * xmlParseNmtoken: 3549: * @ctxt: an XML parser context 3550: * 3551: * parse an XML Nmtoken. 3552: * 3553: * [7] Nmtoken ::= (NameChar)+ 3554: * 3555: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3556: * 3557: * Returns the Nmtoken parsed or NULL 3558: */ 3559: 3560: xmlChar * 3561: xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3562: xmlChar buf[XML_MAX_NAMELEN + 5]; 3563: int len = 0, l; 3564: int c; 3565: int count = 0; 3566: 3567: #ifdef DEBUG 3568: nbParseNmToken++; 3569: #endif 3570: 3571: GROW; 3572: c = CUR_CHAR(l); 3573: 3574: while (xmlIsNameChar(ctxt, c)) { 3575: if (count++ > 100) { 3576: count = 0; 3577: GROW; 3578: } 3579: COPY_BUF(l,buf,len,c); 3580: NEXTL(l); 3581: c = CUR_CHAR(l); 3582: if (len >= XML_MAX_NAMELEN) { 3583: /* 3584: * Okay someone managed to make a huge token, so he's ready to pay 3585: * for the processing speed. 3586: */ 3587: xmlChar *buffer; 3588: int max = len * 2; 3589: 3590: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3591: if (buffer == NULL) { 3592: xmlErrMemory(ctxt, NULL); 3593: return(NULL); 3594: } 3595: memcpy(buffer, buf, len); 3596: while (xmlIsNameChar(ctxt, c)) { 3597: if (count++ > 100) { 3598: count = 0; 3599: GROW; 3600: } 3601: if (len + 10 > max) { 3602: xmlChar *tmp; 3603: 3604: max *= 2; 3605: tmp = (xmlChar *) xmlRealloc(buffer, 3606: max * sizeof(xmlChar)); 3607: if (tmp == NULL) { 3608: xmlErrMemory(ctxt, NULL); 3609: xmlFree(buffer); 3610: return(NULL); 3611: } 3612: buffer = tmp; 3613: } 3614: COPY_BUF(l,buffer,len,c); 3615: NEXTL(l); 3616: c = CUR_CHAR(l); 3617: } 3618: buffer[len] = 0; 3619: return(buffer); 3620: } 3621: } 3622: if (len == 0) 3623: return(NULL); 3624: return(xmlStrndup(buf, len)); 3625: } 3626: 3627: /** 3628: * xmlParseEntityValue: 3629: * @ctxt: an XML parser context 3630: * @orig: if non-NULL store a copy of the original entity value 3631: * 3632: * parse a value for ENTITY declarations 3633: * 3634: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3635: * "'" ([^%&'] | PEReference | Reference)* "'" 3636: * 3637: * Returns the EntityValue parsed with reference substituted or NULL 3638: */ 3639: 3640: xmlChar * 3641: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3642: xmlChar *buf = NULL; 3643: int len = 0; 3644: int size = XML_PARSER_BUFFER_SIZE; 3645: int c, l; 3646: xmlChar stop; 3647: xmlChar *ret = NULL; 3648: const xmlChar *cur = NULL; 3649: xmlParserInputPtr input; 3650: 3651: if (RAW == '"') stop = '"'; 3652: else if (RAW == '\'') stop = '\''; 3653: else { 3654: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3655: return(NULL); 3656: } 3657: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3658: if (buf == NULL) { 3659: xmlErrMemory(ctxt, NULL); 3660: return(NULL); 3661: } 3662: 3663: /* 3664: * The content of the entity definition is copied in a buffer. 3665: */ 3666: 3667: ctxt->instate = XML_PARSER_ENTITY_VALUE; 3668: input = ctxt->input; 3669: GROW; 3670: NEXT; 3671: c = CUR_CHAR(l); 3672: /* 3673: * NOTE: 4.4.5 Included in Literal 3674: * When a parameter entity reference appears in a literal entity 3675: * value, ... a single or double quote character in the replacement 3676: * text is always treated as a normal data character and will not 3677: * terminate the literal. 3678: * In practice it means we stop the loop only when back at parsing 3679: * the initial entity and the quote is found 3680: */ 3681: while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3682: (ctxt->input != input))) { 3683: if (len + 5 >= size) { 3684: xmlChar *tmp; 3685: 3686: size *= 2; 3687: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3688: if (tmp == NULL) { 3689: xmlErrMemory(ctxt, NULL); 3690: xmlFree(buf); 3691: return(NULL); 3692: } 3693: buf = tmp; 3694: } 3695: COPY_BUF(l,buf,len,c); 3696: NEXTL(l); 3697: /* 3698: * Pop-up of finished entities. 3699: */ 3700: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3701: xmlPopInput(ctxt); 3702: 3703: GROW; 3704: c = CUR_CHAR(l); 3705: if (c == 0) { 3706: GROW; 3707: c = CUR_CHAR(l); 3708: } 3709: } 3710: buf[len] = 0; 3711: 3712: /* 3713: * Raise problem w.r.t. '&' and '%' being used in non-entities 3714: * reference constructs. Note Charref will be handled in 3715: * xmlStringDecodeEntities() 3716: */ 3717: cur = buf; 3718: while (*cur != 0) { /* non input consuming */ 3719: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3720: xmlChar *name; 3721: xmlChar tmp = *cur; 3722: 3723: cur++; 3724: name = xmlParseStringName(ctxt, &cur); 3725: if ((name == NULL) || (*cur != ';')) { 3726: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3727: "EntityValue: '%c' forbidden except for entities references\n", 3728: tmp); 3729: } 3730: if ((tmp == '%') && (ctxt->inSubset == 1) && 3731: (ctxt->inputNr == 1)) { 3732: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3733: } 3734: if (name != NULL) 3735: xmlFree(name); 3736: if (*cur == 0) 3737: break; 3738: } 3739: cur++; 3740: } 3741: 3742: /* 3743: * Then PEReference entities are substituted. 3744: */ 3745: if (c != stop) { 3746: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3747: xmlFree(buf); 3748: } else { 3749: NEXT; 3750: /* 3751: * NOTE: 4.4.7 Bypassed 3752: * When a general entity reference appears in the EntityValue in 3753: * an entity declaration, it is bypassed and left as is. 3754: * so XML_SUBSTITUTE_REF is not set here. 3755: */ 3756: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3757: 0, 0, 0); 3758: if (orig != NULL) 3759: *orig = buf; 3760: else 3761: xmlFree(buf); 3762: } 3763: 3764: return(ret); 3765: } 3766: 3767: /** 3768: * xmlParseAttValueComplex: 3769: * @ctxt: an XML parser context 3770: * @len: the resulting attribute len 3771: * @normalize: wether to apply the inner normalization 3772: * 3773: * parse a value for an attribute, this is the fallback function 3774: * of xmlParseAttValue() when the attribute parsing requires handling 3775: * of non-ASCII characters, or normalization compaction. 3776: * 3777: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3778: */ 3779: static xmlChar * 3780: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3781: xmlChar limit = 0; 3782: xmlChar *buf = NULL; 3783: xmlChar *rep = NULL; 3784: size_t len = 0; 3785: size_t buf_size = 0; 3786: int c, l, in_space = 0; 3787: xmlChar *current = NULL; 3788: xmlEntityPtr ent; 3789: 3790: if (NXT(0) == '"') { 3791: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3792: limit = '"'; 3793: NEXT; 3794: } else if (NXT(0) == '\'') { 3795: limit = '\''; 3796: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3797: NEXT; 3798: } else { 3799: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3800: return(NULL); 3801: } 3802: 3803: /* 3804: * allocate a translation buffer. 3805: */ 3806: buf_size = XML_PARSER_BUFFER_SIZE; 3807: buf = (xmlChar *) xmlMallocAtomic(buf_size); 3808: if (buf == NULL) goto mem_error; 3809: 3810: /* 3811: * OK loop until we reach one of the ending char or a size limit. 3812: */ 3813: c = CUR_CHAR(l); 3814: while ((NXT(0) != limit) && /* checked */ 3815: (IS_CHAR(c)) && (c != '<')) { 3816: if (c == 0) break; 3817: if (c == '&') { 3818: in_space = 0; 3819: if (NXT(1) == '#') { 3820: int val = xmlParseCharRef(ctxt); 3821: 3822: if (val == '&') { 3823: if (ctxt->replaceEntities) { 3824: if (len + 10 > buf_size) { 3825: growBuffer(buf, 10); 3826: } 3827: buf[len++] = '&'; 3828: } else { 3829: /* 3830: * The reparsing will be done in xmlStringGetNodeList() 3831: * called by the attribute() function in SAX.c 3832: */ 3833: if (len + 10 > buf_size) { 3834: growBuffer(buf, 10); 3835: } 3836: buf[len++] = '&'; 3837: buf[len++] = '#'; 3838: buf[len++] = '3'; 3839: buf[len++] = '8'; 3840: buf[len++] = ';'; 3841: } 3842: } else if (val != 0) { 3843: if (len + 10 > buf_size) { 3844: growBuffer(buf, 10); 3845: } 3846: len += xmlCopyChar(0, &buf[len], val); 3847: } 3848: } else { 3849: ent = xmlParseEntityRef(ctxt); 3850: ctxt->nbentities++; 3851: if (ent != NULL) 3852: ctxt->nbentities += ent->owner; 3853: if ((ent != NULL) && 3854: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3855: if (len + 10 > buf_size) { 3856: growBuffer(buf, 10); 3857: } 3858: if ((ctxt->replaceEntities == 0) && 3859: (ent->content[0] == '&')) { 3860: buf[len++] = '&'; 3861: buf[len++] = '#'; 3862: buf[len++] = '3'; 3863: buf[len++] = '8'; 3864: buf[len++] = ';'; 3865: } else { 3866: buf[len++] = ent->content[0]; 3867: } 3868: } else if ((ent != NULL) && 3869: (ctxt->replaceEntities != 0)) { 3870: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3871: rep = xmlStringDecodeEntities(ctxt, ent->content, 3872: XML_SUBSTITUTE_REF, 3873: 0, 0, 0); 3874: if (rep != NULL) { 3875: current = rep; 3876: while (*current != 0) { /* non input consuming */ 3877: if ((*current == 0xD) || (*current == 0xA) || 3878: (*current == 0x9)) { 3879: buf[len++] = 0x20; 3880: current++; 3881: } else 3882: buf[len++] = *current++; 3883: if (len + 10 > buf_size) { 3884: growBuffer(buf, 10); 3885: } 3886: } 3887: xmlFree(rep); 3888: rep = NULL; 3889: } 3890: } else { 3891: if (len + 10 > buf_size) { 3892: growBuffer(buf, 10); 3893: } 3894: if (ent->content != NULL) 3895: buf[len++] = ent->content[0]; 3896: } 3897: } else if (ent != NULL) { 3898: int i = xmlStrlen(ent->name); 3899: const xmlChar *cur = ent->name; 3900: 3901: /* 3902: * This may look absurd but is needed to detect 3903: * entities problems 3904: */ 3905: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3906: (ent->content != NULL)) { 3907: rep = xmlStringDecodeEntities(ctxt, ent->content, 3908: XML_SUBSTITUTE_REF, 0, 0, 0); 3909: if (rep != NULL) { 3910: xmlFree(rep); 3911: rep = NULL; 3912: } 3913: } 3914: 3915: /* 3916: * Just output the reference 3917: */ 3918: buf[len++] = '&'; 3919: while (len + i + 10 > buf_size) { 3920: growBuffer(buf, i + 10); 3921: } 3922: for (;i > 0;i--) 3923: buf[len++] = *cur++; 3924: buf[len++] = ';'; 3925: } 3926: } 3927: } else { 3928: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3929: if ((len != 0) || (!normalize)) { 3930: if ((!normalize) || (!in_space)) { 3931: COPY_BUF(l,buf,len,0x20); 3932: while (len + 10 > buf_size) { 3933: growBuffer(buf, 10); 3934: } 3935: } 3936: in_space = 1; 3937: } 3938: } else { 3939: in_space = 0; 3940: COPY_BUF(l,buf,len,c); 3941: if (len + 10 > buf_size) { 3942: growBuffer(buf, 10); 3943: } 3944: } 3945: NEXTL(l); 3946: } 3947: GROW; 3948: c = CUR_CHAR(l); 3949: } 3950: if ((in_space) && (normalize)) { 3951: while ((len > 0) && (buf[len - 1] == 0x20)) len--; 3952: } 3953: buf[len] = 0; 3954: if (RAW == '<') { 3955: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3956: } else if (RAW != limit) { 3957: if ((c != 0) && (!IS_CHAR(c))) { 3958: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3959: "invalid character in attribute value\n"); 3960: } else { 3961: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3962: "AttValue: ' expected\n"); 3963: } 3964: } else 3965: NEXT; 3966: 3967: /* 3968: * There we potentially risk an overflow, don't allow attribute value of 3969: * lenght more than INT_MAX it is a very reasonnable assumption ! 3970: */ 3971: if (len >= INT_MAX) { 3972: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3973: "AttValue lenght too long\n"); 3974: goto mem_error; 3975: } 3976: 3977: if (attlen != NULL) *attlen = (int) len; 3978: return(buf); 3979: 3980: mem_error: 3981: xmlErrMemory(ctxt, NULL); 3982: if (buf != NULL) 3983: xmlFree(buf); 3984: if (rep != NULL) 3985: xmlFree(rep); 3986: return(NULL); 3987: } 3988: 3989: /** 3990: * xmlParseAttValue: 3991: * @ctxt: an XML parser context 3992: * 3993: * parse a value for an attribute 3994: * Note: the parser won't do substitution of entities here, this 3995: * will be handled later in xmlStringGetNodeList 3996: * 3997: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3998: * "'" ([^<&'] | Reference)* "'" 3999: * 4000: * 3.3.3 Attribute-Value Normalization: 4001: * Before the value of an attribute is passed to the application or 4002: * checked for validity, the XML processor must normalize it as follows: 4003: * - a character reference is processed by appending the referenced 4004: * character to the attribute value 4005: * - an entity reference is processed by recursively processing the 4006: * replacement text of the entity 4007: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4008: * appending #x20 to the normalized value, except that only a single 4009: * #x20 is appended for a "#xD#xA" sequence that is part of an external 4010: * parsed entity or the literal entity value of an internal parsed entity 4011: * - other characters are processed by appending them to the normalized value 4012: * If the declared value is not CDATA, then the XML processor must further 4013: * process the normalized attribute value by discarding any leading and 4014: * trailing space (#x20) characters, and by replacing sequences of space 4015: * (#x20) characters by a single space (#x20) character. 4016: * All attributes for which no declaration has been read should be treated 4017: * by a non-validating parser as if declared CDATA. 4018: * 4019: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4020: */ 4021: 4022: 4023: xmlChar * 4024: xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4025: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4026: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4027: } 4028: 4029: /** 4030: * xmlParseSystemLiteral: 4031: * @ctxt: an XML parser context 4032: * 4033: * parse an XML Literal 4034: * 4035: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4036: * 4037: * Returns the SystemLiteral parsed or NULL 4038: */ 4039: 4040: xmlChar * 4041: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4042: xmlChar *buf = NULL; 4043: int len = 0; 4044: int size = XML_PARSER_BUFFER_SIZE; 4045: int cur, l; 4046: xmlChar stop; 4047: int state = ctxt->instate; 4048: int count = 0; 4049: 4050: SHRINK; 4051: if (RAW == '"') { 4052: NEXT; 4053: stop = '"'; 4054: } else if (RAW == '\'') { 4055: NEXT; 4056: stop = '\''; 4057: } else { 4058: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4059: return(NULL); 4060: } 4061: 4062: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4063: if (buf == NULL) { 4064: xmlErrMemory(ctxt, NULL); 4065: return(NULL); 4066: } 4067: ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4068: cur = CUR_CHAR(l); 4069: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4070: if (len + 5 >= size) { 4071: xmlChar *tmp; 4072: 4073: size *= 2; 4074: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4075: if (tmp == NULL) { 4076: xmlFree(buf); 4077: xmlErrMemory(ctxt, NULL); 4078: ctxt->instate = (xmlParserInputState) state; 4079: return(NULL); 4080: } 4081: buf = tmp; 4082: } 4083: count++; 4084: if (count > 50) { 4085: GROW; 4086: count = 0; 4087: } 4088: COPY_BUF(l,buf,len,cur); 4089: NEXTL(l); 4090: cur = CUR_CHAR(l); 4091: if (cur == 0) { 4092: GROW; 4093: SHRINK; 4094: cur = CUR_CHAR(l); 4095: } 4096: } 4097: buf[len] = 0; 4098: ctxt->instate = (xmlParserInputState) state; 4099: if (!IS_CHAR(cur)) { 4100: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4101: } else { 4102: NEXT; 4103: } 4104: return(buf); 4105: } 4106: 4107: /** 4108: * xmlParsePubidLiteral: 4109: * @ctxt: an XML parser context 4110: * 4111: * parse an XML public literal 4112: * 4113: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4114: * 4115: * Returns the PubidLiteral parsed or NULL. 4116: */ 4117: 4118: xmlChar * 4119: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4120: xmlChar *buf = NULL; 4121: int len = 0; 4122: int size = XML_PARSER_BUFFER_SIZE; 4123: xmlChar cur; 4124: xmlChar stop; 4125: int count = 0; 4126: xmlParserInputState oldstate = ctxt->instate; 4127: 4128: SHRINK; 4129: if (RAW == '"') { 4130: NEXT; 4131: stop = '"'; 4132: } else if (RAW == '\'') { 4133: NEXT; 4134: stop = '\''; 4135: } else { 4136: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4137: return(NULL); 4138: } 4139: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4140: if (buf == NULL) { 4141: xmlErrMemory(ctxt, NULL); 4142: return(NULL); 4143: } 4144: ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4145: cur = CUR; 4146: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4147: if (len + 1 >= size) { 4148: xmlChar *tmp; 4149: 4150: size *= 2; 4151: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4152: if (tmp == NULL) { 4153: xmlErrMemory(ctxt, NULL); 4154: xmlFree(buf); 4155: return(NULL); 4156: } 4157: buf = tmp; 4158: } 4159: buf[len++] = cur; 4160: count++; 4161: if (count > 50) { 4162: GROW; 4163: count = 0; 4164: } 4165: NEXT; 4166: cur = CUR; 4167: if (cur == 0) { 4168: GROW; 4169: SHRINK; 4170: cur = CUR; 4171: } 4172: } 4173: buf[len] = 0; 4174: if (cur != stop) { 4175: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4176: } else { 4177: NEXT; 4178: } 4179: ctxt->instate = oldstate; 4180: return(buf); 4181: } 4182: 4183: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4184: 4185: /* 4186: * used for the test in the inner loop of the char data testing 4187: */ 4188: static const unsigned char test_char_data[256] = { 4189: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4190: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4191: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4192: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4193: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4194: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4195: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4196: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4197: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4198: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4199: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4200: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4201: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4202: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4203: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4204: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4205: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4206: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4207: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4208: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4209: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4210: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4211: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4212: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4213: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4214: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4215: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4216: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4217: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4218: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4219: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4220: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4221: }; 4222: 4223: /** 4224: * xmlParseCharData: 4225: * @ctxt: an XML parser context 4226: * @cdata: int indicating whether we are within a CDATA section 4227: * 4228: * parse a CharData section. 4229: * if we are within a CDATA section ']]>' marks an end of section. 4230: * 4231: * The right angle bracket (>) may be represented using the string ">", 4232: * and must, for compatibility, be escaped using ">" or a character 4233: * reference when it appears in the string "]]>" in content, when that 4234: * string is not marking the end of a CDATA section. 4235: * 4236: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4237: */ 4238: 4239: void 4240: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4241: const xmlChar *in; 4242: int nbchar = 0; 4243: int line = ctxt->input->line; 4244: int col = ctxt->input->col; 4245: int ccol; 4246: 4247: SHRINK; 4248: GROW; 4249: /* 4250: * Accelerated common case where input don't need to be 4251: * modified before passing it to the handler. 4252: */ 4253: if (!cdata) { 4254: in = ctxt->input->cur; 4255: do { 4256: get_more_space: 4257: while (*in == 0x20) { in++; ctxt->input->col++; } 4258: if (*in == 0xA) { 4259: do { 4260: ctxt->input->line++; ctxt->input->col = 1; 4261: in++; 4262: } while (*in == 0xA); 4263: goto get_more_space; 4264: } 4265: if (*in == '<') { 4266: nbchar = in - ctxt->input->cur; 4267: if (nbchar > 0) { 4268: const xmlChar *tmp = ctxt->input->cur; 4269: ctxt->input->cur = in; 4270: 4271: if ((ctxt->sax != NULL) && 4272: (ctxt->sax->ignorableWhitespace != 4273: ctxt->sax->characters)) { 4274: if (areBlanks(ctxt, tmp, nbchar, 1)) { 4275: if (ctxt->sax->ignorableWhitespace != NULL) 4276: ctxt->sax->ignorableWhitespace(ctxt->userData, 4277: tmp, nbchar); 4278: } else { 4279: if (ctxt->sax->characters != NULL) 4280: ctxt->sax->characters(ctxt->userData, 4281: tmp, nbchar); 4282: if (*ctxt->space == -1) 4283: *ctxt->space = -2; 4284: } 4285: } else if ((ctxt->sax != NULL) && 4286: (ctxt->sax->characters != NULL)) { 4287: ctxt->sax->characters(ctxt->userData, 4288: tmp, nbchar); 4289: } 4290: } 4291: return; 4292: } 4293: 4294: get_more: 4295: ccol = ctxt->input->col; 4296: while (test_char_data[*in]) { 4297: in++; 4298: ccol++; 4299: } 4300: ctxt->input->col = ccol; 4301: if (*in == 0xA) { 4302: do { 4303: ctxt->input->line++; ctxt->input->col = 1; 4304: in++; 4305: } while (*in == 0xA); 4306: goto get_more; 4307: } 4308: if (*in == ']') { 4309: if ((in[1] == ']') && (in[2] == '>')) { 4310: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4311: ctxt->input->cur = in; 4312: return; 4313: } 4314: in++; 4315: ctxt->input->col++; 4316: goto get_more; 4317: } 4318: nbchar = in - ctxt->input->cur; 4319: if (nbchar > 0) { 4320: if ((ctxt->sax != NULL) && 4321: (ctxt->sax->ignorableWhitespace != 4322: ctxt->sax->characters) && 4323: (IS_BLANK_CH(*ctxt->input->cur))) { 4324: const xmlChar *tmp = ctxt->input->cur; 4325: ctxt->input->cur = in; 4326: 4327: if (areBlanks(ctxt, tmp, nbchar, 0)) { 4328: if (ctxt->sax->ignorableWhitespace != NULL) 4329: ctxt->sax->ignorableWhitespace(ctxt->userData, 4330: tmp, nbchar); 4331: } else { 4332: if (ctxt->sax->characters != NULL) 4333: ctxt->sax->characters(ctxt->userData, 4334: tmp, nbchar); 4335: if (*ctxt->space == -1) 4336: *ctxt->space = -2; 4337: } 4338: line = ctxt->input->line; 4339: col = ctxt->input->col; 4340: } else if (ctxt->sax != NULL) { 4341: if (ctxt->sax->characters != NULL) 4342: ctxt->sax->characters(ctxt->userData, 4343: ctxt->input->cur, nbchar); 4344: line = ctxt->input->line; 4345: col = ctxt->input->col; 4346: } 4347: /* something really bad happened in the SAX callback */ 4348: if (ctxt->instate != XML_PARSER_CONTENT) 4349: return; 4350: } 4351: ctxt->input->cur = in; 4352: if (*in == 0xD) { 4353: in++; 4354: if (*in == 0xA) { 4355: ctxt->input->cur = in; 4356: in++; 4357: ctxt->input->line++; ctxt->input->col = 1; 4358: continue; /* while */ 4359: } 4360: in--; 4361: } 4362: if (*in == '<') { 4363: return; 4364: } 4365: if (*in == '&') { 4366: return; 4367: } 4368: SHRINK; 4369: GROW; 4370: in = ctxt->input->cur; 4371: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4372: nbchar = 0; 4373: } 4374: ctxt->input->line = line; 4375: ctxt->input->col = col; 4376: xmlParseCharDataComplex(ctxt, cdata); 4377: } 4378: 4379: /** 4380: * xmlParseCharDataComplex: 4381: * @ctxt: an XML parser context 4382: * @cdata: int indicating whether we are within a CDATA section 4383: * 4384: * parse a CharData section.this is the fallback function 4385: * of xmlParseCharData() when the parsing requires handling 4386: * of non-ASCII characters. 4387: */ 4388: static void 4389: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4390: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4391: int nbchar = 0; 4392: int cur, l; 4393: int count = 0; 4394: 4395: SHRINK; 4396: GROW; 4397: cur = CUR_CHAR(l); 4398: while ((cur != '<') && /* checked */ 4399: (cur != '&') && 4400: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4401: if ((cur == ']') && (NXT(1) == ']') && 4402: (NXT(2) == '>')) { 4403: if (cdata) break; 4404: else { 4405: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4406: } 4407: } 4408: COPY_BUF(l,buf,nbchar,cur); 4409: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4410: buf[nbchar] = 0; 4411: 4412: /* 4413: * OK the segment is to be consumed as chars. 4414: */ 4415: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4416: if (areBlanks(ctxt, buf, nbchar, 0)) { 4417: if (ctxt->sax->ignorableWhitespace != NULL) 4418: ctxt->sax->ignorableWhitespace(ctxt->userData, 4419: buf, nbchar); 4420: } else { 4421: if (ctxt->sax->characters != NULL) 4422: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4423: if ((ctxt->sax->characters != 4424: ctxt->sax->ignorableWhitespace) && 4425: (*ctxt->space == -1)) 4426: *ctxt->space = -2; 4427: } 4428: } 4429: nbchar = 0; 4430: /* something really bad happened in the SAX callback */ 4431: if (ctxt->instate != XML_PARSER_CONTENT) 4432: return; 4433: } 4434: count++; 4435: if (count > 50) { 4436: GROW; 4437: count = 0; 4438: } 4439: NEXTL(l); 4440: cur = CUR_CHAR(l); 4441: } 4442: if (nbchar != 0) { 4443: buf[nbchar] = 0; 4444: /* 4445: * OK the segment is to be consumed as chars. 4446: */ 4447: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4448: if (areBlanks(ctxt, buf, nbchar, 0)) { 4449: if (ctxt->sax->ignorableWhitespace != NULL) 4450: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4451: } else { 4452: if (ctxt->sax->characters != NULL) 4453: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4454: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4455: (*ctxt->space == -1)) 4456: *ctxt->space = -2; 4457: } 4458: } 4459: } 4460: if ((cur != 0) && (!IS_CHAR(cur))) { 4461: /* Generate the error and skip the offending character */ 4462: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4463: "PCDATA invalid Char value %d\n", 4464: cur); 4465: NEXTL(l); 4466: } 4467: } 4468: 4469: /** 4470: * xmlParseExternalID: 4471: * @ctxt: an XML parser context 4472: * @publicID: a xmlChar** receiving PubidLiteral 4473: * @strict: indicate whether we should restrict parsing to only 4474: * production [75], see NOTE below 4475: * 4476: * Parse an External ID or a Public ID 4477: * 4478: * NOTE: Productions [75] and [83] interact badly since [75] can generate 4479: * 'PUBLIC' S PubidLiteral S SystemLiteral 4480: * 4481: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4482: * | 'PUBLIC' S PubidLiteral S SystemLiteral 4483: * 4484: * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4485: * 4486: * Returns the function returns SystemLiteral and in the second 4487: * case publicID receives PubidLiteral, is strict is off 4488: * it is possible to return NULL and have publicID set. 4489: */ 4490: 4491: xmlChar * 4492: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4493: xmlChar *URI = NULL; 4494: 4495: SHRINK; 4496: 4497: *publicID = NULL; 4498: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4499: SKIP(6); 4500: if (!IS_BLANK_CH(CUR)) { 4501: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4502: "Space required after 'SYSTEM'\n"); 4503: } 4504: SKIP_BLANKS; 4505: URI = xmlParseSystemLiteral(ctxt); 4506: if (URI == NULL) { 4507: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4508: } 4509: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4510: SKIP(6); 4511: if (!IS_BLANK_CH(CUR)) { 4512: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4513: "Space required after 'PUBLIC'\n"); 4514: } 4515: SKIP_BLANKS; 4516: *publicID = xmlParsePubidLiteral(ctxt); 4517: if (*publicID == NULL) { 4518: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4519: } 4520: if (strict) { 4521: /* 4522: * We don't handle [83] so "S SystemLiteral" is required. 4523: */ 4524: if (!IS_BLANK_CH(CUR)) { 4525: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4526: "Space required after the Public Identifier\n"); 4527: } 4528: } else { 4529: /* 4530: * We handle [83] so we return immediately, if 4531: * "S SystemLiteral" is not detected. From a purely parsing 4532: * point of view that's a nice mess. 4533: */ 4534: const xmlChar *ptr; 4535: GROW; 4536: 4537: ptr = CUR_PTR; 4538: if (!IS_BLANK_CH(*ptr)) return(NULL); 4539: 4540: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4541: if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4542: } 4543: SKIP_BLANKS; 4544: URI = xmlParseSystemLiteral(ctxt); 4545: if (URI == NULL) { 4546: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4547: } 4548: } 4549: return(URI); 4550: } 4551: 4552: /** 4553: * xmlParseCommentComplex: 4554: * @ctxt: an XML parser context 4555: * @buf: the already parsed part of the buffer 4556: * @len: number of bytes filles in the buffer 4557: * @size: allocated size of the buffer 4558: * 4559: * Skip an XML (SGML) comment  4560: * The spec says that "For compatibility, the string "--" (double-hyphen) 4561: * must not occur within comments. " 4562: * This is the slow routine in case the accelerator for ascii didn't work 4563: * 4564: * [15] Comment ::= '' 4565: */ 4566: static void 4567: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4568: int q, ql; 4569: int r, rl; 4570: int cur, l; 4571: int count = 0; 4572: int inputid; 4573: 4574: inputid = ctxt->input->id; 4575: 4576: if (buf == NULL) { 4577: len = 0; 4578: size = XML_PARSER_BUFFER_SIZE; 4579: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4580: if (buf == NULL) { 4581: xmlErrMemory(ctxt, NULL); 4582: return; 4583: } 4584: } 4585: GROW; /* Assure there's enough input data */ 4586: q = CUR_CHAR(ql); 4587: if (q == 0) 4588: goto not_terminated; 4589: if (!IS_CHAR(q)) { 4590: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4591: "xmlParseComment: invalid xmlChar value %d\n", 4592: q); 4593: xmlFree (buf); 4594: return; 4595: } 4596: NEXTL(ql); 4597: r = CUR_CHAR(rl); 4598: if (r == 0) 4599: goto not_terminated; 4600: if (!IS_CHAR(r)) { 4601: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4602: "xmlParseComment: invalid xmlChar value %d\n", 4603: q); 4604: xmlFree (buf); 4605: return; 4606: } 4607: NEXTL(rl); 4608: cur = CUR_CHAR(l); 4609: if (cur == 0) 4610: goto not_terminated; 4611: while (IS_CHAR(cur) && /* checked */ 4612: ((cur != '>') || 4613: (r != '-') || (q != '-'))) { 4614: if ((r == '-') && (q == '-')) { 4615: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4616: } 4617: if (len + 5 >= size) { 4618: xmlChar *new_buf; 4619: size *= 2; 4620: new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4621: if (new_buf == NULL) { 4622: xmlFree (buf); 4623: xmlErrMemory(ctxt, NULL); 4624: return; 4625: } 4626: buf = new_buf; 4627: } 4628: COPY_BUF(ql,buf,len,q); 4629: q = r; 4630: ql = rl; 4631: r = cur; 4632: rl = l; 4633: 4634: count++; 4635: if (count > 50) { 4636: GROW; 4637: count = 0; 4638: } 4639: NEXTL(l); 4640: cur = CUR_CHAR(l); 4641: if (cur == 0) { 4642: SHRINK; 4643: GROW; 4644: cur = CUR_CHAR(l); 4645: } 4646: } 4647: buf[len] = 0; 4648: if (cur == 0) { 4649: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4650: "Comment not terminated \n 4679: * The spec says that "For compatibility, the string "--" (double-hyphen) 4680: * must not occur within comments. " 4681: * 4682: * [15] Comment ::= '' 4683: */ 4684: void 4685: xmlParseComment(xmlParserCtxtPtr ctxt) { 4686: xmlChar *buf = NULL; 4687: int size = XML_PARSER_BUFFER_SIZE; 4688: int len = 0; 4689: xmlParserInputState state; 4690: const xmlChar *in; 4691: int nbchar = 0, ccol; 4692: int inputid; 4693: 4694: /* 4695: * Check that there is a comment right here. 4696: */ 4697: if ((RAW != '<') || (NXT(1) != '!') || 4698: (NXT(2) != '-') || (NXT(3) != '-')) return; 4699: state = ctxt->instate; 4700: ctxt->instate = XML_PARSER_COMMENT; 4701: inputid = ctxt->input->id; 4702: SKIP(4); 4703: SHRINK; 4704: GROW; 4705: 4706: /* 4707: * Accelerated common case where input don't need to be 4708: * modified before passing it to the handler. 4709: */ 4710: in = ctxt->input->cur; 4711: do { 4712: if (*in == 0xA) { 4713: do { 4714: ctxt->input->line++; ctxt->input->col = 1; 4715: in++; 4716: } while (*in == 0xA); 4717: } 4718: get_more: 4719: ccol = ctxt->input->col; 4720: while (((*in > '-') && (*in <= 0x7F)) || 4721: ((*in >= 0x20) && (*in < '-')) || 4722: (*in == 0x09)) { 4723: in++; 4724: ccol++; 4725: } 4726: ctxt->input->col = ccol; 4727: if (*in == 0xA) { 4728: do { 4729: ctxt->input->line++; ctxt->input->col = 1; 4730: in++; 4731: } while (*in == 0xA); 4732: goto get_more; 4733: } 4734: nbchar = in - ctxt->input->cur; 4735: /* 4736: * save current set of data 4737: */ 4738: if (nbchar > 0) { 4739: if ((ctxt->sax != NULL) && 4740: (ctxt->sax->comment != NULL)) { 4741: if (buf == NULL) { 4742: if ((*in == '-') && (in[1] == '-')) 4743: size = nbchar + 1; 4744: else 4745: size = XML_PARSER_BUFFER_SIZE + nbchar; 4746: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4747: if (buf == NULL) { 4748: xmlErrMemory(ctxt, NULL); 4749: ctxt->instate = state; 4750: return; 4751: } 4752: len = 0; 4753: } else if (len + nbchar + 1 >= size) { 4754: xmlChar *new_buf; 4755: size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4756: new_buf = (xmlChar *) xmlRealloc(buf, 4757: size * sizeof(xmlChar)); 4758: if (new_buf == NULL) { 4759: xmlFree (buf); 4760: xmlErrMemory(ctxt, NULL); 4761: ctxt->instate = state; 4762: return; 4763: } 4764: buf = new_buf; 4765: } 4766: memcpy(&buf[len], ctxt->input->cur, nbchar); 4767: len += nbchar; 4768: buf[len] = 0; 4769: } 4770: } 4771: ctxt->input->cur = in; 4772: if (*in == 0xA) { 4773: in++; 4774: ctxt->input->line++; ctxt->input->col = 1; 4775: } 4776: if (*in == 0xD) { 4777: in++; 4778: if (*in == 0xA) { 4779: ctxt->input->cur = in; 4780: in++; 4781: ctxt->input->line++; ctxt->input->col = 1; 4782: continue; /* while */ 4783: } 4784: in--; 4785: } 4786: SHRINK; 4787: GROW; 4788: in = ctxt->input->cur; 4789: if (*in == '-') { 4790: if (in[1] == '-') { 4791: if (in[2] == '>') { 4792: if (ctxt->input->id != inputid) { 4793: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4794: "comment doesn't start and stop in the same entity\n"); 4795: } 4796: SKIP(3); 4797: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4798: (!ctxt->disableSAX)) { 4799: if (buf != NULL) 4800: ctxt->sax->comment(ctxt->userData, buf); 4801: else 4802: ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4803: } 4804: if (buf != NULL) 4805: xmlFree(buf); 4806: ctxt->instate = state; 4807: return; 4808: } 4809: if (buf != NULL) { 4810: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4811: "Double hyphen within comment: " 4812: "<!--%.50s\n", 4813: buf); 4814: } else 4815: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4816: "Double hyphen within comment\n", NULL); 4817: in++; 4818: ctxt->input->col++; 4819: } 4820: in++; 4821: ctxt->input->col++; 4822: goto get_more; 4823: } 4824: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4825: xmlParseCommentComplex(ctxt, buf, len, size); 4826: ctxt->instate = state; 4827: return; 4828: } 4829: 4830: 4831: /** 4832: * xmlParsePITarget: 4833: * @ctxt: an XML parser context 4834: * 4835: * parse the name of a PI 4836: * 4837: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4838: * 4839: * Returns the PITarget name or NULL 4840: */ 4841: 4842: const xmlChar * 4843: xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4844: const xmlChar *name; 4845: 4846: name = xmlParseName(ctxt); 4847: if ((name != NULL) && 4848: ((name[0] == 'x') || (name[0] == 'X')) && 4849: ((name[1] == 'm') || (name[1] == 'M')) && 4850: ((name[2] == 'l') || (name[2] == 'L'))) { 4851: int i; 4852: if ((name[0] == 'x') && (name[1] == 'm') && 4853: (name[2] == 'l') && (name[3] == 0)) { 4854: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4855: "XML declaration allowed only at the start of the document\n"); 4856: return(name); 4857: } else if (name[3] == 0) { 4858: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4859: return(name); 4860: } 4861: for (i = 0;;i++) { 4862: if (xmlW3CPIs[i] == NULL) break; 4863: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4864: return(name); 4865: } 4866: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4867: "xmlParsePITarget: invalid name prefix 'xml'\n", 4868: NULL, NULL); 4869: } 4870: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4871: xmlNsErr(ctxt, XML_NS_ERR_COLON, 4872: "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4873: } 4874: return(name); 4875: } 4876: 4877: #ifdef LIBXML_CATALOG_ENABLED 4878: /** 4879: * xmlParseCatalogPI: 4880: * @ctxt: an XML parser context 4881: * @catalog: the PI value string 4882: * 4883: * parse an XML Catalog Processing Instruction. 4884: * 4885: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4886: * 4887: * Occurs only if allowed by the user and if happening in the Misc 4888: * part of the document before any doctype informations 4889: * This will add the given catalog to the parsing context in order 4890: * to be used if there is a resolution need further down in the document 4891: */ 4892: 4893: static void 4894: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4895: xmlChar *URL = NULL; 4896: const xmlChar *tmp, *base; 4897: xmlChar marker; 4898: 4899: tmp = catalog; 4900: while (IS_BLANK_CH(*tmp)) tmp++; 4901: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4902: goto error; 4903: tmp += 7; 4904: while (IS_BLANK_CH(*tmp)) tmp++; 4905: if (*tmp != '=') { 4906: return; 4907: } 4908: tmp++; 4909: while (IS_BLANK_CH(*tmp)) tmp++; 4910: marker = *tmp; 4911: if ((marker != '\'') && (marker != '"')) 4912: goto error; 4913: tmp++; 4914: base = tmp; 4915: while ((*tmp != 0) && (*tmp != marker)) tmp++; 4916: if (*tmp == 0) 4917: goto error; 4918: URL = xmlStrndup(base, tmp - base); 4919: tmp++; 4920: while (IS_BLANK_CH(*tmp)) tmp++; 4921: if (*tmp != 0) 4922: goto error; 4923: 4924: if (URL != NULL) { 4925: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4926: xmlFree(URL); 4927: } 4928: return; 4929: 4930: error: 4931: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4932: "Catalog PI syntax error: %s\n", 4933: catalog, NULL); 4934: if (URL != NULL) 4935: xmlFree(URL); 4936: } 4937: #endif 4938: 4939: /** 4940: * xmlParsePI: 4941: * @ctxt: an XML parser context 4942: * 4943: * parse an XML Processing Instruction. 4944: * 4945: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4946: * 4947: * The processing is transfered to SAX once parsed. 4948: */ 4949: 4950: void 4951: xmlParsePI(xmlParserCtxtPtr ctxt) { 4952: xmlChar *buf = NULL; 4953: int len = 0; 4954: int size = XML_PARSER_BUFFER_SIZE; 4955: int cur, l; 4956: const xmlChar *target; 4957: xmlParserInputState state; 4958: int count = 0; 4959: 4960: if ((RAW == '<') && (NXT(1) == '?')) { 4961: xmlParserInputPtr input = ctxt->input; 4962: state = ctxt->instate; 4963: ctxt->instate = XML_PARSER_PI; 4964: /* 4965: * this is a Processing Instruction. 4966: */ 4967: SKIP(2); 4968: SHRINK; 4969: 4970: /* 4971: * Parse the target name and check for special support like 4972: * namespace. 4973: */ 4974: target = xmlParsePITarget(ctxt); 4975: if (target != NULL) { 4976: if ((RAW == '?') && (NXT(1) == '>')) { 4977: if (input != ctxt->input) { 4978: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4979: "PI declaration doesn't start and stop in the same entity\n"); 4980: } 4981: SKIP(2); 4982: 4983: /* 4984: * SAX: PI detected. 4985: */ 4986: if ((ctxt->sax) && (!ctxt->disableSAX) && 4987: (ctxt->sax->processingInstruction != NULL)) 4988: ctxt->sax->processingInstruction(ctxt->userData, 4989: target, NULL); 4990: if (ctxt->instate != XML_PARSER_EOF) 4991: ctxt->instate = state; 4992: return; 4993: } 4994: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4995: if (buf == NULL) { 4996: xmlErrMemory(ctxt, NULL); 4997: ctxt->instate = state; 4998: return; 4999: } 5000: cur = CUR; 5001: if (!IS_BLANK(cur)) { 5002: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5003: "ParsePI: PI %s space expected\n", target); 5004: } 5005: SKIP_BLANKS; 5006: cur = CUR_CHAR(l); 5007: while (IS_CHAR(cur) && /* checked */ 5008: ((cur != '?') || (NXT(1) != '>'))) { 5009: if (len + 5 >= size) { 5010: xmlChar *tmp; 5011: 5012: size *= 2; 5013: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 5014: if (tmp == NULL) { 5015: xmlErrMemory(ctxt, NULL); 5016: xmlFree(buf); 5017: ctxt->instate = state; 5018: return; 5019: } 5020: buf = tmp; 5021: } 5022: count++; 5023: if (count > 50) { 5024: GROW; 5025: count = 0; 5026: } 5027: COPY_BUF(l,buf,len,cur); 5028: NEXTL(l); 5029: cur = CUR_CHAR(l); 5030: if (cur == 0) { 5031: SHRINK; 5032: GROW; 5033: cur = CUR_CHAR(l); 5034: } 5035: } 5036: buf[len] = 0; 5037: if (cur != '?') { 5038: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5039: "ParsePI: PI %s never end ...\n", target); 5040: } else { 5041: if (input != ctxt->input) { 5042: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5043: "PI declaration doesn't start and stop in the same entity\n"); 5044: } 5045: SKIP(2); 5046: 5047: #ifdef LIBXML_CATALOG_ENABLED 5048: if (((state == XML_PARSER_MISC) || 5049: (state == XML_PARSER_START)) && 5050: (xmlStrEqual(target, XML_CATALOG_PI))) { 5051: xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5052: if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5053: (allow == XML_CATA_ALLOW_ALL)) 5054: xmlParseCatalogPI(ctxt, buf); 5055: } 5056: #endif 5057: 5058: 5059: /* 5060: * SAX: PI detected. 5061: */ 5062: if ((ctxt->sax) && (!ctxt->disableSAX) && 5063: (ctxt->sax->processingInstruction != NULL)) 5064: ctxt->sax->processingInstruction(ctxt->userData, 5065: target, buf); 5066: } 5067: xmlFree(buf); 5068: } else { 5069: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5070: } 5071: if (ctxt->instate != XML_PARSER_EOF) 5072: ctxt->instate = state; 5073: } 5074: } 5075: 5076: /** 5077: * xmlParseNotationDecl: 5078: * @ctxt: an XML parser context 5079: * 5080: * parse a notation declaration 5081: * 5082: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5083: * 5084: * Hence there is actually 3 choices: 5085: * 'PUBLIC' S PubidLiteral 5086: * 'PUBLIC' S PubidLiteral S SystemLiteral 5087: * and 'SYSTEM' S SystemLiteral 5088: * 5089: * See the NOTE on xmlParseExternalID(). 5090: */ 5091: 5092: void 5093: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5094: const xmlChar *name; 5095: xmlChar *Pubid; 5096: xmlChar *Systemid; 5097: 5098: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5099: xmlParserInputPtr input = ctxt->input; 5100: SHRINK; 5101: SKIP(10); 5102: if (!IS_BLANK_CH(CUR)) { 5103: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5104: "Space required after '<!NOTATION'\n"); 5105: return; 5106: } 5107: SKIP_BLANKS; 5108: 5109: name = xmlParseName(ctxt); 5110: if (name == NULL) { 5111: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5112: return; 5113: } 5114: if (!IS_BLANK_CH(CUR)) { 5115: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5116: "Space required after the NOTATION name'\n"); 5117: return; 5118: } 5119: if (xmlStrchr(name, ':') != NULL) { 5120: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5121: "colon are forbidden from notation names '%s'\n", 5122: name, NULL, NULL); 5123: } 5124: SKIP_BLANKS; 5125: 5126: /* 5127: * Parse the IDs. 5128: */ 5129: Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5130: SKIP_BLANKS; 5131: 5132: if (RAW == '>') { 5133: if (input != ctxt->input) { 5134: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5135: "Notation declaration doesn't start and stop in the same entity\n"); 5136: } 5137: NEXT; 5138: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5139: (ctxt->sax->notationDecl != NULL)) 5140: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5141: } else { 5142: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5143: } 5144: if (Systemid != NULL) xmlFree(Systemid); 5145: if (Pubid != NULL) xmlFree(Pubid); 5146: } 5147: } 5148: 5149: /** 5150: * xmlParseEntityDecl: 5151: * @ctxt: an XML parser context 5152: * 5153: * parse <!ENTITY declarations 5154: * 5155: * [70] EntityDecl ::= GEDecl | PEDecl 5156: * 5157: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5158: * 5159: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5160: * 5161: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5162: * 5163: * [74] PEDef ::= EntityValue | ExternalID 5164: * 5165: * [76] NDataDecl ::= S 'NDATA' S Name 5166: * 5167: * [ VC: Notation Declared ] 5168: * The Name must match the declared name of a notation. 5169: */ 5170: 5171: void 5172: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5173: const xmlChar *name = NULL; 5174: xmlChar *value = NULL; 5175: xmlChar *URI = NULL, *literal = NULL; 5176: const xmlChar *ndata = NULL; 5177: int isParameter = 0; 5178: xmlChar *orig = NULL; 5179: int skipped; 5180: 5181: /* GROW; done in the caller */ 5182: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5183: xmlParserInputPtr input = ctxt->input; 5184: SHRINK; 5185: SKIP(8); 5186: skipped = SKIP_BLANKS; 5187: if (skipped == 0) { 5188: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5189: "Space required after '<!ENTITY'\n"); 5190: } 5191: 5192: if (RAW == '%') { 5193: NEXT; 5194: skipped = SKIP_BLANKS; 5195: if (skipped == 0) { 5196: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5197: "Space required after '%'\n"); 5198: } 5199: isParameter = 1; 5200: } 5201: 5202: name = xmlParseName(ctxt); 5203: if (name == NULL) { 5204: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5205: "xmlParseEntityDecl: no name\n"); 5206: return; 5207: } 5208: if (xmlStrchr(name, ':') != NULL) { 5209: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5210: "colon are forbidden from entities names '%s'\n", 5211: name, NULL, NULL); 5212: } 5213: skipped = SKIP_BLANKS; 5214: if (skipped == 0) { 5215: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5216: "Space required after the entity name\n"); 5217: } 5218: 5219: ctxt->instate = XML_PARSER_ENTITY_DECL; 5220: /* 5221: * handle the various case of definitions... 5222: */ 5223: if (isParameter) { 5224: if ((RAW == '"') || (RAW == '\'')) { 5225: value = xmlParseEntityValue(ctxt, &orig); 5226: if (value) { 5227: if ((ctxt->sax != NULL) && 5228: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5229: ctxt->sax->entityDecl(ctxt->userData, name, 5230: XML_INTERNAL_PARAMETER_ENTITY, 5231: NULL, NULL, value); 5232: } 5233: } else { 5234: URI = xmlParseExternalID(ctxt, &literal, 1); 5235: if ((URI == NULL) && (literal == NULL)) { 5236: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5237: } 5238: if (URI) { 5239: xmlURIPtr uri; 5240: 5241: uri = xmlParseURI((const char *) URI); 5242: if (uri == NULL) { 5243: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5244: "Invalid URI: %s\n", URI); 5245: /* 5246: * This really ought to be a well formedness error 5247: * but the XML Core WG decided otherwise c.f. issue 5248: * E26 of the XML erratas. 5249: */ 5250: } else { 5251: if (uri->fragment != NULL) { 5252: /* 5253: * Okay this is foolish to block those but not 5254: * invalid URIs. 5255: */ 5256: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5257: } else { 5258: if ((ctxt->sax != NULL) && 5259: (!ctxt->disableSAX) && 5260: (ctxt->sax->entityDecl != NULL)) 5261: ctxt->sax->entityDecl(ctxt->userData, name, 5262: XML_EXTERNAL_PARAMETER_ENTITY, 5263: literal, URI, NULL); 5264: } 5265: xmlFreeURI(uri); 5266: } 5267: } 5268: } 5269: } else { 5270: if ((RAW == '"') || (RAW == '\'')) { 5271: value = xmlParseEntityValue(ctxt, &orig); 5272: if ((ctxt->sax != NULL) && 5273: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5274: ctxt->sax->entityDecl(ctxt->userData, name, 5275: XML_INTERNAL_GENERAL_ENTITY, 5276: NULL, NULL, value); 5277: /* 5278: * For expat compatibility in SAX mode. 5279: */ 5280: if ((ctxt->myDoc == NULL) || 5281: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5282: if (ctxt->myDoc == NULL) { 5283: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5284: if (ctxt->myDoc == NULL) { 5285: xmlErrMemory(ctxt, "New Doc failed"); 5286: return; 5287: } 5288: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5289: } 5290: if (ctxt->myDoc->intSubset == NULL) 5291: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5292: BAD_CAST "fake", NULL, NULL); 5293: 5294: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5295: NULL, NULL, value); 5296: } 5297: } else { 5298: URI = xmlParseExternalID(ctxt, &literal, 1); 5299: if ((URI == NULL) && (literal == NULL)) { 5300: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5301: } 5302: if (URI) { 5303: xmlURIPtr uri; 5304: 5305: uri = xmlParseURI((const char *)URI); 5306: if (uri == NULL) { 5307: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5308: "Invalid URI: %s\n", URI); 5309: /* 5310: * This really ought to be a well formedness error 5311: * but the XML Core WG decided otherwise c.f. issue 5312: * E26 of the XML erratas. 5313: */ 5314: } else { 5315: if (uri->fragment != NULL) { 5316: /* 5317: * Okay this is foolish to block those but not 5318: * invalid URIs. 5319: */ 5320: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5321: } 5322: xmlFreeURI(uri); 5323: } 5324: } 5325: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5326: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5327: "Space required before 'NDATA'\n"); 5328: } 5329: SKIP_BLANKS; 5330: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5331: SKIP(5); 5332: if (!IS_BLANK_CH(CUR)) { 5333: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5334: "Space required after 'NDATA'\n"); 5335: } 5336: SKIP_BLANKS; 5337: ndata = xmlParseName(ctxt); 5338: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5339: (ctxt->sax->unparsedEntityDecl != NULL)) 5340: ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5341: literal, URI, ndata); 5342: } else { 5343: if ((ctxt->sax != NULL) && 5344: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5345: ctxt->sax->entityDecl(ctxt->userData, name, 5346: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5347: literal, URI, NULL); 5348: /* 5349: * For expat compatibility in SAX mode. 5350: * assuming the entity repalcement was asked for 5351: */ 5352: if ((ctxt->replaceEntities != 0) && 5353: ((ctxt->myDoc == NULL) || 5354: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5355: if (ctxt->myDoc == NULL) { 5356: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5357: if (ctxt->myDoc == NULL) { 5358: xmlErrMemory(ctxt, "New Doc failed"); 5359: return; 5360: } 5361: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5362: } 5363: 5364: if (ctxt->myDoc->intSubset == NULL) 5365: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5366: BAD_CAST "fake", NULL, NULL); 5367: xmlSAX2EntityDecl(ctxt, name, 5368: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5369: literal, URI, NULL); 5370: } 5371: } 5372: } 5373: } 5374: SKIP_BLANKS; 5375: if (RAW != '>') { 5376: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5377: "xmlParseEntityDecl: entity %s not terminated\n", name); 5378: } else { 5379: if (input != ctxt->input) { 5380: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5381: "Entity declaration doesn't start and stop in the same entity\n"); 5382: } 5383: NEXT; 5384: } 5385: if (orig != NULL) { 5386: /* 5387: * Ugly mechanism to save the raw entity value. 5388: */ 5389: xmlEntityPtr cur = NULL; 5390: 5391: if (isParameter) { 5392: if ((ctxt->sax != NULL) && 5393: (ctxt->sax->getParameterEntity != NULL)) 5394: cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5395: } else { 5396: if ((ctxt->sax != NULL) && 5397: (ctxt->sax->getEntity != NULL)) 5398: cur = ctxt->sax->getEntity(ctxt->userData, name); 5399: if ((cur == NULL) && (ctxt->userData==ctxt)) { 5400: cur = xmlSAX2GetEntity(ctxt, name); 5401: } 5402: } 5403: if (cur != NULL) { 5404: if (cur->orig != NULL) 5405: xmlFree(orig); 5406: else 5407: cur->orig = orig; 5408: } else 5409: xmlFree(orig); 5410: } 5411: if (value != NULL) xmlFree(value); 5412: if (URI != NULL) xmlFree(URI); 5413: if (literal != NULL) xmlFree(literal); 5414: } 5415: } 5416: 5417: /** 5418: * xmlParseDefaultDecl: 5419: * @ctxt: an XML parser context 5420: * @value: Receive a possible fixed default value for the attribute 5421: * 5422: * Parse an attribute default declaration 5423: * 5424: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5425: * 5426: * [ VC: Required Attribute ] 5427: * if the default declaration is the keyword #REQUIRED, then the 5428: * attribute must be specified for all elements of the type in the 5429: * attribute-list declaration. 5430: * 5431: * [ VC: Attribute Default Legal ] 5432: * The declared default value must meet the lexical constraints of 5433: * the declared attribute type c.f. xmlValidateAttributeDecl() 5434: * 5435: * [ VC: Fixed Attribute Default ] 5436: * if an attribute has a default value declared with the #FIXED 5437: * keyword, instances of that attribute must match the default value. 5438: * 5439: * [ WFC: No < in Attribute Values ] 5440: * handled in xmlParseAttValue() 5441: * 5442: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5443: * or XML_ATTRIBUTE_FIXED. 5444: */ 5445: 5446: int 5447: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5448: int val; 5449: xmlChar *ret; 5450: 5451: *value = NULL; 5452: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5453: SKIP(9); 5454: return(XML_ATTRIBUTE_REQUIRED); 5455: } 5456: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5457: SKIP(8); 5458: return(XML_ATTRIBUTE_IMPLIED); 5459: } 5460: val = XML_ATTRIBUTE_NONE; 5461: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5462: SKIP(6); 5463: val = XML_ATTRIBUTE_FIXED; 5464: if (!IS_BLANK_CH(CUR)) { 5465: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5466: "Space required after '#FIXED'\n"); 5467: } 5468: SKIP_BLANKS; 5469: } 5470: ret = xmlParseAttValue(ctxt); 5471: ctxt->instate = XML_PARSER_DTD; 5472: if (ret == NULL) { 5473: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5474: "Attribute default value declaration error\n"); 5475: } else 5476: *value = ret; 5477: return(val); 5478: } 5479: 5480: /** 5481: * xmlParseNotationType: 5482: * @ctxt: an XML parser context 5483: * 5484: * parse an Notation attribute type. 5485: * 5486: * Note: the leading 'NOTATION' S part has already being parsed... 5487: * 5488: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5489: * 5490: * [ VC: Notation Attributes ] 5491: * Values of this type must match one of the notation names included 5492: * in the declaration; all notation names in the declaration must be declared. 5493: * 5494: * Returns: the notation attribute tree built while parsing 5495: */ 5496: 5497: xmlEnumerationPtr 5498: xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5499: const xmlChar *name; 5500: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5501: 5502: if (RAW != '(') { 5503: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5504: return(NULL); 5505: } 5506: SHRINK; 5507: do { 5508: NEXT; 5509: SKIP_BLANKS; 5510: name = xmlParseName(ctxt); 5511: if (name == NULL) { 5512: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5513: "Name expected in NOTATION declaration\n"); 5514: xmlFreeEnumeration(ret); 5515: return(NULL); 5516: } 5517: tmp = ret; 5518: while (tmp != NULL) { 5519: if (xmlStrEqual(name, tmp->name)) { 5520: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5521: "standalone: attribute notation value token %s duplicated\n", 5522: name, NULL); 5523: if (!xmlDictOwns(ctxt->dict, name)) 5524: xmlFree((xmlChar *) name); 5525: break; 5526: } 5527: tmp = tmp->next; 5528: } 5529: if (tmp == NULL) { 5530: cur = xmlCreateEnumeration(name); 5531: if (cur == NULL) { 5532: xmlFreeEnumeration(ret); 5533: return(NULL); 5534: } 5535: if (last == NULL) ret = last = cur; 5536: else { 5537: last->next = cur; 5538: last = cur; 5539: } 5540: } 5541: SKIP_BLANKS; 5542: } while (RAW == '|'); 5543: if (RAW != ')') { 5544: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5545: xmlFreeEnumeration(ret); 5546: return(NULL); 5547: } 5548: NEXT; 5549: return(ret); 5550: } 5551: 5552: /** 5553: * xmlParseEnumerationType: 5554: * @ctxt: an XML parser context 5555: * 5556: * parse an Enumeration attribute type. 5557: * 5558: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5559: * 5560: * [ VC: Enumeration ] 5561: * Values of this type must match one of the Nmtoken tokens in 5562: * the declaration 5563: * 5564: * Returns: the enumeration attribute tree built while parsing 5565: */ 5566: 5567: xmlEnumerationPtr 5568: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5569: xmlChar *name; 5570: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5571: 5572: if (RAW != '(') { 5573: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5574: return(NULL); 5575: } 5576: SHRINK; 5577: do { 5578: NEXT; 5579: SKIP_BLANKS; 5580: name = xmlParseNmtoken(ctxt); 5581: if (name == NULL) { 5582: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5583: return(ret); 5584: } 5585: tmp = ret; 5586: while (tmp != NULL) { 5587: if (xmlStrEqual(name, tmp->name)) { 5588: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5589: "standalone: attribute enumeration value token %s duplicated\n", 5590: name, NULL); 5591: if (!xmlDictOwns(ctxt->dict, name)) 5592: xmlFree(name); 5593: break; 5594: } 5595: tmp = tmp->next; 5596: } 5597: if (tmp == NULL) { 5598: cur = xmlCreateEnumeration(name); 5599: if (!xmlDictOwns(ctxt->dict, name)) 5600: xmlFree(name); 5601: if (cur == NULL) { 5602: xmlFreeEnumeration(ret); 5603: return(NULL); 5604: } 5605: if (last == NULL) ret = last = cur; 5606: else { 5607: last->next = cur; 5608: last = cur; 5609: } 5610: } 5611: SKIP_BLANKS; 5612: } while (RAW == '|'); 5613: if (RAW != ')') { 5614: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5615: return(ret); 5616: } 5617: NEXT; 5618: return(ret); 5619: } 5620: 5621: /** 5622: * xmlParseEnumeratedType: 5623: * @ctxt: an XML parser context 5624: * @tree: the enumeration tree built while parsing 5625: * 5626: * parse an Enumerated attribute type. 5627: * 5628: * [57] EnumeratedType ::= NotationType | Enumeration 5629: * 5630: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5631: * 5632: * 5633: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5634: */ 5635: 5636: int 5637: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5638: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5639: SKIP(8); 5640: if (!IS_BLANK_CH(CUR)) { 5641: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5642: "Space required after 'NOTATION'\n"); 5643: return(0); 5644: } 5645: SKIP_BLANKS; 5646: *tree = xmlParseNotationType(ctxt); 5647: if (*tree == NULL) return(0); 5648: return(XML_ATTRIBUTE_NOTATION); 5649: } 5650: *tree = xmlParseEnumerationType(ctxt); 5651: if (*tree == NULL) return(0); 5652: return(XML_ATTRIBUTE_ENUMERATION); 5653: } 5654: 5655: /** 5656: * xmlParseAttributeType: 5657: * @ctxt: an XML parser context 5658: * @tree: the enumeration tree built while parsing 5659: * 5660: * parse the Attribute list def for an element 5661: * 5662: * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5663: * 5664: * [55] StringType ::= 'CDATA' 5665: * 5666: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5667: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5668: * 5669: * Validity constraints for attribute values syntax are checked in 5670: * xmlValidateAttributeValue() 5671: * 5672: * [ VC: ID ] 5673: * Values of type ID must match the Name production. A name must not 5674: * appear more than once in an XML document as a value of this type; 5675: * i.e., ID values must uniquely identify the elements which bear them. 5676: * 5677: * [ VC: One ID per Element Type ] 5678: * No element type may have more than one ID attribute specified. 5679: * 5680: * [ VC: ID Attribute Default ] 5681: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5682: * 5683: * [ VC: IDREF ] 5684: * Values of type IDREF must match the Name production, and values 5685: * of type IDREFS must match Names; each IDREF Name must match the value 5686: * of an ID attribute on some element in the XML document; i.e. IDREF 5687: * values must match the value of some ID attribute. 5688: * 5689: * [ VC: Entity Name ] 5690: * Values of type ENTITY must match the Name production, values 5691: * of type ENTITIES must match Names; each Entity Name must match the 5692: * name of an unparsed entity declared in the DTD. 5693: * 5694: * [ VC: Name Token ] 5695: * Values of type NMTOKEN must match the Nmtoken production; values 5696: * of type NMTOKENS must match Nmtokens. 5697: * 5698: * Returns the attribute type 5699: */ 5700: int 5701: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5702: SHRINK; 5703: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5704: SKIP(5); 5705: return(XML_ATTRIBUTE_CDATA); 5706: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5707: SKIP(6); 5708: return(XML_ATTRIBUTE_IDREFS); 5709: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5710: SKIP(5); 5711: return(XML_ATTRIBUTE_IDREF); 5712: } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5713: SKIP(2); 5714: return(XML_ATTRIBUTE_ID); 5715: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5716: SKIP(6); 5717: return(XML_ATTRIBUTE_ENTITY); 5718: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5719: SKIP(8); 5720: return(XML_ATTRIBUTE_ENTITIES); 5721: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5722: SKIP(8); 5723: return(XML_ATTRIBUTE_NMTOKENS); 5724: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5725: SKIP(7); 5726: return(XML_ATTRIBUTE_NMTOKEN); 5727: } 5728: return(xmlParseEnumeratedType(ctxt, tree)); 5729: } 5730: 5731: /** 5732: * xmlParseAttributeListDecl: 5733: * @ctxt: an XML parser context 5734: * 5735: * : parse the Attribute list def for an element 5736: * 5737: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5738: * 5739: * [53] AttDef ::= S Name S AttType S DefaultDecl 5740: * 5741: */ 5742: void 5743: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5744: const xmlChar *elemName; 5745: const xmlChar *attrName; 5746: xmlEnumerationPtr tree; 5747: 5748: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5749: xmlParserInputPtr input = ctxt->input; 5750: 5751: SKIP(9); 5752: if (!IS_BLANK_CH(CUR)) { 5753: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5754: "Space required after '<!ATTLIST'\n"); 5755: } 5756: SKIP_BLANKS; 5757: elemName = xmlParseName(ctxt); 5758: if (elemName == NULL) { 5759: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5760: "ATTLIST: no name for Element\n"); 5761: return; 5762: } 5763: SKIP_BLANKS; 5764: GROW; 5765: while (RAW != '>') { 5766: const xmlChar *check = CUR_PTR; 5767: int type; 5768: int def; 5769: xmlChar *defaultValue = NULL; 5770: 5771: GROW; 5772: tree = NULL; 5773: attrName = xmlParseName(ctxt); 5774: if (attrName == NULL) { 5775: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5776: "ATTLIST: no name for Attribute\n"); 5777: break; 5778: } 5779: GROW; 5780: if (!IS_BLANK_CH(CUR)) { 5781: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5782: "Space required after the attribute name\n"); 5783: break; 5784: } 5785: SKIP_BLANKS; 5786: 5787: type = xmlParseAttributeType(ctxt, &tree); 5788: if (type <= 0) { 5789: break; 5790: } 5791: 5792: GROW; 5793: if (!IS_BLANK_CH(CUR)) { 5794: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5795: "Space required after the attribute type\n"); 5796: if (tree != NULL) 5797: xmlFreeEnumeration(tree); 5798: break; 5799: } 5800: SKIP_BLANKS; 5801: 5802: def = xmlParseDefaultDecl(ctxt, &defaultValue); 5803: if (def <= 0) { 5804: if (defaultValue != NULL) 5805: xmlFree(defaultValue); 5806: if (tree != NULL) 5807: xmlFreeEnumeration(tree); 5808: break; 5809: } 5810: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5811: xmlAttrNormalizeSpace(defaultValue, defaultValue); 5812: 5813: GROW; 5814: if (RAW != '>') { 5815: if (!IS_BLANK_CH(CUR)) { 5816: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5817: "Space required after the attribute default value\n"); 5818: if (defaultValue != NULL) 5819: xmlFree(defaultValue); 5820: if (tree != NULL) 5821: xmlFreeEnumeration(tree); 5822: break; 5823: } 5824: SKIP_BLANKS; 5825: } 5826: if (check == CUR_PTR) { 5827: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5828: "in xmlParseAttributeListDecl\n"); 5829: if (defaultValue != NULL) 5830: xmlFree(defaultValue); 5831: if (tree != NULL) 5832: xmlFreeEnumeration(tree); 5833: break; 5834: } 5835: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5836: (ctxt->sax->attributeDecl != NULL)) 5837: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5838: type, def, defaultValue, tree); 5839: else if (tree != NULL) 5840: xmlFreeEnumeration(tree); 5841: 5842: if ((ctxt->sax2) && (defaultValue != NULL) && 5843: (def != XML_ATTRIBUTE_IMPLIED) && 5844: (def != XML_ATTRIBUTE_REQUIRED)) { 5845: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5846: } 5847: if (ctxt->sax2) { 5848: xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5849: } 5850: if (defaultValue != NULL) 5851: xmlFree(defaultValue); 5852: GROW; 5853: } 5854: if (RAW == '>') { 5855: if (input != ctxt->input) { 5856: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5857: "Attribute list declaration doesn't start and stop in the same entity\n", 5858: NULL, NULL); 5859: } 5860: NEXT; 5861: } 5862: } 5863: } 5864: 5865: /** 5866: * xmlParseElementMixedContentDecl: 5867: * @ctxt: an XML parser context 5868: * @inputchk: the input used for the current entity, needed for boundary checks 5869: * 5870: * parse the declaration for a Mixed Element content 5871: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5872: * 5873: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5874: * '(' S? '#PCDATA' S? ')' 5875: * 5876: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5877: * 5878: * [ VC: No Duplicate Types ] 5879: * The same name must not appear more than once in a single 5880: * mixed-content declaration. 5881: * 5882: * returns: the list of the xmlElementContentPtr describing the element choices 5883: */ 5884: xmlElementContentPtr 5885: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5886: xmlElementContentPtr ret = NULL, cur = NULL, n; 5887: const xmlChar *elem = NULL; 5888: 5889: GROW; 5890: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5891: SKIP(7); 5892: SKIP_BLANKS; 5893: SHRINK; 5894: if (RAW == ')') { 5895: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5896: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5897: "Element content declaration doesn't start and stop in the same entity\n", 5898: NULL, NULL); 5899: } 5900: NEXT; 5901: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5902: if (ret == NULL) 5903: return(NULL); 5904: if (RAW == '*') { 5905: ret->ocur = XML_ELEMENT_CONTENT_MULT; 5906: NEXT; 5907: } 5908: return(ret); 5909: } 5910: if ((RAW == '(') || (RAW == '|')) { 5911: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5912: if (ret == NULL) return(NULL); 5913: } 5914: while (RAW == '|') { 5915: NEXT; 5916: if (elem == NULL) { 5917: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5918: if (ret == NULL) return(NULL); 5919: ret->c1 = cur; 5920: if (cur != NULL) 5921: cur->parent = ret; 5922: cur = ret; 5923: } else { 5924: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5925: if (n == NULL) return(NULL); 5926: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5927: if (n->c1 != NULL) 5928: n->c1->parent = n; 5929: cur->c2 = n; 5930: if (n != NULL) 5931: n->parent = cur; 5932: cur = n; 5933: } 5934: SKIP_BLANKS; 5935: elem = xmlParseName(ctxt); 5936: if (elem == NULL) { 5937: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5938: "xmlParseElementMixedContentDecl : Name expected\n"); 5939: xmlFreeDocElementContent(ctxt->myDoc, cur); 5940: return(NULL); 5941: } 5942: SKIP_BLANKS; 5943: GROW; 5944: } 5945: if ((RAW == ')') && (NXT(1) == '*')) { 5946: if (elem != NULL) { 5947: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5948: XML_ELEMENT_CONTENT_ELEMENT); 5949: if (cur->c2 != NULL) 5950: cur->c2->parent = cur; 5951: } 5952: if (ret != NULL) 5953: ret->ocur = XML_ELEMENT_CONTENT_MULT; 5954: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5955: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5956: "Element content declaration doesn't start and stop in the same entity\n", 5957: NULL, NULL); 5958: } 5959: SKIP(2); 5960: } else { 5961: xmlFreeDocElementContent(ctxt->myDoc, ret); 5962: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5963: return(NULL); 5964: } 5965: 5966: } else { 5967: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5968: } 5969: return(ret); 5970: } 5971: 5972: /** 5973: * xmlParseElementChildrenContentDeclPriv: 5974: * @ctxt: an XML parser context 5975: * @inputchk: the input used for the current entity, needed for boundary checks 5976: * @depth: the level of recursion 5977: * 5978: * parse the declaration for a Mixed Element content 5979: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5980: * 5981: * 5982: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5983: * 5984: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5985: * 5986: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5987: * 5988: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5989: * 5990: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5991: * TODO Parameter-entity replacement text must be properly nested 5992: * with parenthesized groups. That is to say, if either of the 5993: * opening or closing parentheses in a choice, seq, or Mixed 5994: * construct is contained in the replacement text for a parameter 5995: * entity, both must be contained in the same replacement text. For 5996: * interoperability, if a parameter-entity reference appears in a 5997: * choice, seq, or Mixed construct, its replacement text should not 5998: * be empty, and neither the first nor last non-blank character of 5999: * the replacement text should be a connector (| or ,). 6000: * 6001: * Returns the tree of xmlElementContentPtr describing the element 6002: * hierarchy. 6003: */ 6004: static xmlElementContentPtr 6005: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6006: int depth) { 6007: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6008: const xmlChar *elem; 6009: xmlChar type = 0; 6010: 6011: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6012: (depth > 2048)) { 6013: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6014: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6015: depth); 6016: return(NULL); 6017: } 6018: SKIP_BLANKS; 6019: GROW; 6020: if (RAW == '(') { 6021: int inputid = ctxt->input->id; 6022: 6023: /* Recurse on first child */ 6024: NEXT; 6025: SKIP_BLANKS; 6026: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6027: depth + 1); 6028: SKIP_BLANKS; 6029: GROW; 6030: } else { 6031: elem = xmlParseName(ctxt); 6032: if (elem == NULL) { 6033: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6034: return(NULL); 6035: } 6036: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6037: if (cur == NULL) { 6038: xmlErrMemory(ctxt, NULL); 6039: return(NULL); 6040: } 6041: GROW; 6042: if (RAW == '?') { 6043: cur->ocur = XML_ELEMENT_CONTENT_OPT; 6044: NEXT; 6045: } else if (RAW == '*') { 6046: cur->ocur = XML_ELEMENT_CONTENT_MULT; 6047: NEXT; 6048: } else if (RAW == '+') { 6049: cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6050: NEXT; 6051: } else { 6052: cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6053: } 6054: GROW; 6055: } 6056: SKIP_BLANKS; 6057: SHRINK; 6058: while (RAW != ')') { 6059: /* 6060: * Each loop we parse one separator and one element. 6061: */ 6062: if (RAW == ',') { 6063: if (type == 0) type = CUR; 6064: 6065: /* 6066: * Detect "Name | Name , Name" error 6067: */ 6068: else if (type != CUR) { 6069: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6070: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6071: type); 6072: if ((last != NULL) && (last != ret)) 6073: xmlFreeDocElementContent(ctxt->myDoc, last); 6074: if (ret != NULL) 6075: xmlFreeDocElementContent(ctxt->myDoc, ret); 6076: return(NULL); 6077: } 6078: NEXT; 6079: 6080: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6081: if (op == NULL) { 6082: if ((last != NULL) && (last != ret)) 6083: xmlFreeDocElementContent(ctxt->myDoc, last); 6084: xmlFreeDocElementContent(ctxt->myDoc, ret); 6085: return(NULL); 6086: } 6087: if (last == NULL) { 6088: op->c1 = ret; 6089: if (ret != NULL) 6090: ret->parent = op; 6091: ret = cur = op; 6092: } else { 6093: cur->c2 = op; 6094: if (op != NULL) 6095: op->parent = cur; 6096: op->c1 = last; 6097: if (last != NULL) 6098: last->parent = op; 6099: cur =op; 6100: last = NULL; 6101: } 6102: } else if (RAW == '|') { 6103: if (type == 0) type = CUR; 6104: 6105: /* 6106: * Detect "Name , Name | Name" error 6107: */ 6108: else if (type != CUR) { 6109: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6110: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6111: type); 6112: if ((last != NULL) && (last != ret)) 6113: xmlFreeDocElementContent(ctxt->myDoc, last); 6114: if (ret != NULL) 6115: xmlFreeDocElementContent(ctxt->myDoc, ret); 6116: return(NULL); 6117: } 6118: NEXT; 6119: 6120: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6121: if (op == NULL) { 6122: if ((last != NULL) && (last != ret)) 6123: xmlFreeDocElementContent(ctxt->myDoc, last); 6124: if (ret != NULL) 6125: xmlFreeDocElementContent(ctxt->myDoc, ret); 6126: return(NULL); 6127: } 6128: if (last == NULL) { 6129: op->c1 = ret; 6130: if (ret != NULL) 6131: ret->parent = op; 6132: ret = cur = op; 6133: } else { 6134: cur->c2 = op; 6135: if (op != NULL) 6136: op->parent = cur; 6137: op->c1 = last; 6138: if (last != NULL) 6139: last->parent = op; 6140: cur =op; 6141: last = NULL; 6142: } 6143: } else { 6144: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6145: if ((last != NULL) && (last != ret)) 6146: xmlFreeDocElementContent(ctxt->myDoc, last); 6147: if (ret != NULL) 6148: xmlFreeDocElementContent(ctxt->myDoc, ret); 6149: return(NULL); 6150: } 6151: GROW; 6152: SKIP_BLANKS; 6153: GROW; 6154: if (RAW == '(') { 6155: int inputid = ctxt->input->id; 6156: /* Recurse on second child */ 6157: NEXT; 6158: SKIP_BLANKS; 6159: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6160: depth + 1); 6161: SKIP_BLANKS; 6162: } else { 6163: elem = xmlParseName(ctxt); 6164: if (elem == NULL) { 6165: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6166: if (ret != NULL) 6167: xmlFreeDocElementContent(ctxt->myDoc, ret); 6168: return(NULL); 6169: } 6170: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6171: if (last == NULL) { 6172: if (ret != NULL) 6173: xmlFreeDocElementContent(ctxt->myDoc, ret); 6174: return(NULL); 6175: } 6176: if (RAW == '?') { 6177: last->ocur = XML_ELEMENT_CONTENT_OPT; 6178: NEXT; 6179: } else if (RAW == '*') { 6180: last->ocur = XML_ELEMENT_CONTENT_MULT; 6181: NEXT; 6182: } else if (RAW == '+') { 6183: last->ocur = XML_ELEMENT_CONTENT_PLUS; 6184: NEXT; 6185: } else { 6186: last->ocur = XML_ELEMENT_CONTENT_ONCE; 6187: } 6188: } 6189: SKIP_BLANKS; 6190: GROW; 6191: } 6192: if ((cur != NULL) && (last != NULL)) { 6193: cur->c2 = last; 6194: if (last != NULL) 6195: last->parent = cur; 6196: } 6197: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6198: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6199: "Element content declaration doesn't start and stop in the same entity\n", 6200: NULL, NULL); 6201: } 6202: NEXT; 6203: if (RAW == '?') { 6204: if (ret != NULL) { 6205: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6206: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6207: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6208: else 6209: ret->ocur = XML_ELEMENT_CONTENT_OPT; 6210: } 6211: NEXT; 6212: } else if (RAW == '*') { 6213: if (ret != NULL) { 6214: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6215: cur = ret; 6216: /* 6217: * Some normalization: 6218: * (a | b* | c?)* == (a | b | c)* 6219: */ 6220: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6221: if ((cur->c1 != NULL) && 6222: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6223: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6224: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6225: if ((cur->c2 != NULL) && 6226: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6227: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6228: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6229: cur = cur->c2; 6230: } 6231: } 6232: NEXT; 6233: } else if (RAW == '+') { 6234: if (ret != NULL) { 6235: int found = 0; 6236: 6237: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6238: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6239: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6240: else 6241: ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6242: /* 6243: * Some normalization: 6244: * (a | b*)+ == (a | b)* 6245: * (a | b?)+ == (a | b)* 6246: */ 6247: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6248: if ((cur->c1 != NULL) && 6249: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6250: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6251: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6252: found = 1; 6253: } 6254: if ((cur->c2 != NULL) && 6255: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6256: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6257: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6258: found = 1; 6259: } 6260: cur = cur->c2; 6261: } 6262: if (found) 6263: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6264: } 6265: NEXT; 6266: } 6267: return(ret); 6268: } 6269: 6270: /** 6271: * xmlParseElementChildrenContentDecl: 6272: * @ctxt: an XML parser context 6273: * @inputchk: the input used for the current entity, needed for boundary checks 6274: * 6275: * parse the declaration for a Mixed Element content 6276: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6277: * 6278: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6279: * 6280: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6281: * 6282: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6283: * 6284: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6285: * 6286: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6287: * TODO Parameter-entity replacement text must be properly nested 6288: * with parenthesized groups. That is to say, if either of the 6289: * opening or closing parentheses in a choice, seq, or Mixed 6290: * construct is contained in the replacement text for a parameter 6291: * entity, both must be contained in the same replacement text. For 6292: * interoperability, if a parameter-entity reference appears in a 6293: * choice, seq, or Mixed construct, its replacement text should not 6294: * be empty, and neither the first nor last non-blank character of 6295: * the replacement text should be a connector (| or ,). 6296: * 6297: * Returns the tree of xmlElementContentPtr describing the element 6298: * hierarchy. 6299: */ 6300: xmlElementContentPtr 6301: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6302: /* stub left for API/ABI compat */ 6303: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6304: } 6305: 6306: /** 6307: * xmlParseElementContentDecl: 6308: * @ctxt: an XML parser context 6309: * @name: the name of the element being defined. 6310: * @result: the Element Content pointer will be stored here if any 6311: * 6312: * parse the declaration for an Element content either Mixed or Children, 6313: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6314: * 6315: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6316: * 6317: * returns: the type of element content XML_ELEMENT_TYPE_xxx 6318: */ 6319: 6320: int 6321: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6322: xmlElementContentPtr *result) { 6323: 6324: xmlElementContentPtr tree = NULL; 6325: int inputid = ctxt->input->id; 6326: int res; 6327: 6328: *result = NULL; 6329: 6330: if (RAW != '(') { 6331: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6332: "xmlParseElementContentDecl : %s '(' expected\n", name); 6333: return(-1); 6334: } 6335: NEXT; 6336: GROW; 6337: SKIP_BLANKS; 6338: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6339: tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6340: res = XML_ELEMENT_TYPE_MIXED; 6341: } else { 6342: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6343: res = XML_ELEMENT_TYPE_ELEMENT; 6344: } 6345: SKIP_BLANKS; 6346: *result = tree; 6347: return(res); 6348: } 6349: 6350: /** 6351: * xmlParseElementDecl: 6352: * @ctxt: an XML parser context 6353: * 6354: * parse an Element declaration. 6355: * 6356: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6357: * 6358: * [ VC: Unique Element Type Declaration ] 6359: * No element type may be declared more than once 6360: * 6361: * Returns the type of the element, or -1 in case of error 6362: */ 6363: int 6364: xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6365: const xmlChar *name; 6366: int ret = -1; 6367: xmlElementContentPtr content = NULL; 6368: 6369: /* GROW; done in the caller */ 6370: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6371: xmlParserInputPtr input = ctxt->input; 6372: 6373: SKIP(9); 6374: if (!IS_BLANK_CH(CUR)) { 6375: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6376: "Space required after 'ELEMENT'\n"); 6377: } 6378: SKIP_BLANKS; 6379: name = xmlParseName(ctxt); 6380: if (name == NULL) { 6381: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6382: "xmlParseElementDecl: no name for Element\n"); 6383: return(-1); 6384: } 6385: while ((RAW == 0) && (ctxt->inputNr > 1)) 6386: xmlPopInput(ctxt); 6387: if (!IS_BLANK_CH(CUR)) { 6388: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6389: "Space required after the element name\n"); 6390: } 6391: SKIP_BLANKS; 6392: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6393: SKIP(5); 6394: /* 6395: * Element must always be empty. 6396: */ 6397: ret = XML_ELEMENT_TYPE_EMPTY; 6398: } else if ((RAW == 'A') && (NXT(1) == 'N') && 6399: (NXT(2) == 'Y')) { 6400: SKIP(3); 6401: /* 6402: * Element is a generic container. 6403: */ 6404: ret = XML_ELEMENT_TYPE_ANY; 6405: } else if (RAW == '(') { 6406: ret = xmlParseElementContentDecl(ctxt, name, &content); 6407: } else { 6408: /* 6409: * [ WFC: PEs in Internal Subset ] error handling. 6410: */ 6411: if ((RAW == '%') && (ctxt->external == 0) && 6412: (ctxt->inputNr == 1)) { 6413: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6414: "PEReference: forbidden within markup decl in internal subset\n"); 6415: } else { 6416: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6417: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6418: } 6419: return(-1); 6420: } 6421: 6422: SKIP_BLANKS; 6423: /* 6424: * Pop-up of finished entities. 6425: */ 6426: while ((RAW == 0) && (ctxt->inputNr > 1)) 6427: xmlPopInput(ctxt); 6428: SKIP_BLANKS; 6429: 6430: if (RAW != '>') { 6431: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6432: if (content != NULL) { 6433: xmlFreeDocElementContent(ctxt->myDoc, content); 6434: } 6435: } else { 6436: if (input != ctxt->input) { 6437: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6438: "Element declaration doesn't start and stop in the same entity\n"); 6439: } 6440: 6441: NEXT; 6442: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6443: (ctxt->sax->elementDecl != NULL)) { 6444: if (content != NULL) 6445: content->parent = NULL; 6446: ctxt->sax->elementDecl(ctxt->userData, name, ret, 6447: content); 6448: if ((content != NULL) && (content->parent == NULL)) { 6449: /* 6450: * this is a trick: if xmlAddElementDecl is called, 6451: * instead of copying the full tree it is plugged directly 6452: * if called from the parser. Avoid duplicating the 6453: * interfaces or change the API/ABI 6454: */ 6455: xmlFreeDocElementContent(ctxt->myDoc, content); 6456: } 6457: } else if (content != NULL) { 6458: xmlFreeDocElementContent(ctxt->myDoc, content); 6459: } 6460: } 6461: } 6462: return(ret); 6463: } 6464: 6465: /** 6466: * xmlParseConditionalSections 6467: * @ctxt: an XML parser context 6468: * 6469: * [61] conditionalSect ::= includeSect | ignoreSect 6470: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6471: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6472: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6473: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6474: */ 6475: 6476: static void 6477: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6478: int id = ctxt->input->id; 6479: 6480: SKIP(3); 6481: SKIP_BLANKS; 6482: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6483: SKIP(7); 6484: SKIP_BLANKS; 6485: if (RAW != '[') { 6486: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6487: } else { 6488: if (ctxt->input->id != id) { 6489: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6490: "All markup of the conditional section is not in the same entity\n", 6491: NULL, NULL); 6492: } 6493: NEXT; 6494: } 6495: if (xmlParserDebugEntities) { 6496: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6497: xmlGenericError(xmlGenericErrorContext, 6498: "%s(%d): ", ctxt->input->filename, 6499: ctxt->input->line); 6500: xmlGenericError(xmlGenericErrorContext, 6501: "Entering INCLUDE Conditional Section\n"); 6502: } 6503: 6504: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6505: (NXT(2) != '>'))) { 6506: const xmlChar *check = CUR_PTR; 6507: unsigned int cons = ctxt->input->consumed; 6508: 6509: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6510: xmlParseConditionalSections(ctxt); 6511: } else if (IS_BLANK_CH(CUR)) { 6512: NEXT; 6513: } else if (RAW == '%') { 6514: xmlParsePEReference(ctxt); 6515: } else 6516: xmlParseMarkupDecl(ctxt); 6517: 6518: /* 6519: * Pop-up of finished entities. 6520: */ 6521: while ((RAW == 0) && (ctxt->inputNr > 1)) 6522: xmlPopInput(ctxt); 6523: 6524: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6525: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6526: break; 6527: } 6528: } 6529: if (xmlParserDebugEntities) { 6530: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6531: xmlGenericError(xmlGenericErrorContext, 6532: "%s(%d): ", ctxt->input->filename, 6533: ctxt->input->line); 6534: xmlGenericError(xmlGenericErrorContext, 6535: "Leaving INCLUDE Conditional Section\n"); 6536: } 6537: 6538: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6539: int state; 6540: xmlParserInputState instate; 6541: int depth = 0; 6542: 6543: SKIP(6); 6544: SKIP_BLANKS; 6545: if (RAW != '[') { 6546: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6547: } else { 6548: if (ctxt->input->id != id) { 6549: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6550: "All markup of the conditional section is not in the same entity\n", 6551: NULL, NULL); 6552: } 6553: NEXT; 6554: } 6555: if (xmlParserDebugEntities) { 6556: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6557: xmlGenericError(xmlGenericErrorContext, 6558: "%s(%d): ", ctxt->input->filename, 6559: ctxt->input->line); 6560: xmlGenericError(xmlGenericErrorContext, 6561: "Entering IGNORE Conditional Section\n"); 6562: } 6563: 6564: /* 6565: * Parse up to the end of the conditional section 6566: * But disable SAX event generating DTD building in the meantime 6567: */ 6568: state = ctxt->disableSAX; 6569: instate = ctxt->instate; 6570: if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6571: ctxt->instate = XML_PARSER_IGNORE; 6572: 6573: while ((depth >= 0) && (RAW != 0)) { 6574: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6575: depth++; 6576: SKIP(3); 6577: continue; 6578: } 6579: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6580: if (--depth >= 0) SKIP(3); 6581: continue; 6582: } 6583: NEXT; 6584: continue; 6585: } 6586: 6587: ctxt->disableSAX = state; 6588: ctxt->instate = instate; 6589: 6590: if (xmlParserDebugEntities) { 6591: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6592: xmlGenericError(xmlGenericErrorContext, 6593: "%s(%d): ", ctxt->input->filename, 6594: ctxt->input->line); 6595: xmlGenericError(xmlGenericErrorContext, 6596: "Leaving IGNORE Conditional Section\n"); 6597: } 6598: 6599: } else { 6600: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6601: } 6602: 6603: if (RAW == 0) 6604: SHRINK; 6605: 6606: if (RAW == 0) { 6607: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6608: } else { 6609: if (ctxt->input->id != id) { 6610: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6611: "All markup of the conditional section is not in the same entity\n", 6612: NULL, NULL); 6613: } 6614: SKIP(3); 6615: } 6616: } 6617: 6618: /** 6619: * xmlParseMarkupDecl: 6620: * @ctxt: an XML parser context 6621: * 6622: * parse Markup declarations 6623: * 6624: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6625: * NotationDecl | PI | Comment 6626: * 6627: * [ VC: Proper Declaration/PE Nesting ] 6628: * Parameter-entity replacement text must be properly nested with 6629: * markup declarations. That is to say, if either the first character 6630: * or the last character of a markup declaration (markupdecl above) is 6631: * contained in the replacement text for a parameter-entity reference, 6632: * both must be contained in the same replacement text. 6633: * 6634: * [ WFC: PEs in Internal Subset ] 6635: * In the internal DTD subset, parameter-entity references can occur 6636: * only where markup declarations can occur, not within markup declarations. 6637: * (This does not apply to references that occur in external parameter 6638: * entities or to the external subset.) 6639: */ 6640: void 6641: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6642: GROW; 6643: if (CUR == '<') { 6644: if (NXT(1) == '!') { 6645: switch (NXT(2)) { 6646: case 'E': 6647: if (NXT(3) == 'L') 6648: xmlParseElementDecl(ctxt); 6649: else if (NXT(3) == 'N') 6650: xmlParseEntityDecl(ctxt); 6651: break; 6652: case 'A': 6653: xmlParseAttributeListDecl(ctxt); 6654: break; 6655: case 'N': 6656: xmlParseNotationDecl(ctxt); 6657: break; 6658: case '-': 6659: xmlParseComment(ctxt); 6660: break; 6661: default: 6662: /* there is an error but it will be detected later */ 6663: break; 6664: } 6665: } else if (NXT(1) == '?') { 6666: xmlParsePI(ctxt); 6667: } 6668: } 6669: /* 6670: * This is only for internal subset. On external entities, 6671: * the replacement is done before parsing stage 6672: */ 6673: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6674: xmlParsePEReference(ctxt); 6675: 6676: /* 6677: * Conditional sections are allowed from entities included 6678: * by PE References in the internal subset. 6679: */ 6680: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6681: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6682: xmlParseConditionalSections(ctxt); 6683: } 6684: } 6685: 6686: ctxt->instate = XML_PARSER_DTD; 6687: } 6688: 6689: /** 6690: * xmlParseTextDecl: 6691: * @ctxt: an XML parser context 6692: * 6693: * parse an XML declaration header for external entities 6694: * 6695: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6696: */ 6697: 6698: void 6699: xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6700: xmlChar *version; 6701: const xmlChar *encoding; 6702: 6703: /* 6704: * We know that '<?xml' is here. 6705: */ 6706: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6707: SKIP(5); 6708: } else { 6709: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6710: return; 6711: } 6712: 6713: if (!IS_BLANK_CH(CUR)) { 6714: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6715: "Space needed after '<?xml'\n"); 6716: } 6717: SKIP_BLANKS; 6718: 6719: /* 6720: * We may have the VersionInfo here. 6721: */ 6722: version = xmlParseVersionInfo(ctxt); 6723: if (version == NULL) 6724: version = xmlCharStrdup(XML_DEFAULT_VERSION); 6725: else { 6726: if (!IS_BLANK_CH(CUR)) { 6727: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6728: "Space needed here\n"); 6729: } 6730: } 6731: ctxt->input->version = version; 6732: 6733: /* 6734: * We must have the encoding declaration 6735: */ 6736: encoding = xmlParseEncodingDecl(ctxt); 6737: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6738: /* 6739: * The XML REC instructs us to stop parsing right here 6740: */ 6741: return; 6742: } 6743: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6744: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6745: "Missing encoding in text declaration\n"); 6746: } 6747: 6748: SKIP_BLANKS; 6749: if ((RAW == '?') && (NXT(1) == '>')) { 6750: SKIP(2); 6751: } else if (RAW == '>') { 6752: /* Deprecated old WD ... */ 6753: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6754: NEXT; 6755: } else { 6756: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6757: MOVETO_ENDTAG(CUR_PTR); 6758: NEXT; 6759: } 6760: } 6761: 6762: /** 6763: * xmlParseExternalSubset: 6764: * @ctxt: an XML parser context 6765: * @ExternalID: the external identifier 6766: * @SystemID: the system identifier (or URL) 6767: * 6768: * parse Markup declarations from an external subset 6769: * 6770: * [30] extSubset ::= textDecl? extSubsetDecl 6771: * 6772: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6773: */ 6774: void 6775: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6776: const xmlChar *SystemID) { 6777: xmlDetectSAX2(ctxt); 6778: GROW; 6779: 6780: if ((ctxt->encoding == NULL) && 6781: (ctxt->input->end - ctxt->input->cur >= 4)) { 6782: xmlChar start[4]; 6783: xmlCharEncoding enc; 6784: 6785: start[0] = RAW; 6786: start[1] = NXT(1); 6787: start[2] = NXT(2); 6788: start[3] = NXT(3); 6789: enc = xmlDetectCharEncoding(start, 4); 6790: if (enc != XML_CHAR_ENCODING_NONE) 6791: xmlSwitchEncoding(ctxt, enc); 6792: } 6793: 6794: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6795: xmlParseTextDecl(ctxt); 6796: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6797: /* 6798: * The XML REC instructs us to stop parsing right here 6799: */ 6800: ctxt->instate = XML_PARSER_EOF; 6801: return; 6802: } 6803: } 6804: if (ctxt->myDoc == NULL) { 6805: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6806: if (ctxt->myDoc == NULL) { 6807: xmlErrMemory(ctxt, "New Doc failed"); 6808: return; 6809: } 6810: ctxt->myDoc->properties = XML_DOC_INTERNAL; 6811: } 6812: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6813: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6814: 6815: ctxt->instate = XML_PARSER_DTD; 6816: ctxt->external = 1; 6817: while (((RAW == '<') && (NXT(1) == '?')) || 6818: ((RAW == '<') && (NXT(1) == '!')) || 6819: (RAW == '%') || IS_BLANK_CH(CUR)) { 6820: const xmlChar *check = CUR_PTR; 6821: unsigned int cons = ctxt->input->consumed; 6822: 6823: GROW; 6824: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6825: xmlParseConditionalSections(ctxt); 6826: } else if (IS_BLANK_CH(CUR)) { 6827: NEXT; 6828: } else if (RAW == '%') { 6829: xmlParsePEReference(ctxt); 6830: } else 6831: xmlParseMarkupDecl(ctxt); 6832: 6833: /* 6834: * Pop-up of finished entities. 6835: */ 6836: while ((RAW == 0) && (ctxt->inputNr > 1)) 6837: xmlPopInput(ctxt); 6838: 6839: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6840: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6841: break; 6842: } 6843: } 6844: 6845: if (RAW != 0) { 6846: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6847: } 6848: 6849: } 6850: 6851: /** 6852: * xmlParseReference: 6853: * @ctxt: an XML parser context 6854: * 6855: * parse and handle entity references in content, depending on the SAX 6856: * interface, this may end-up in a call to character() if this is a 6857: * CharRef, a predefined entity, if there is no reference() callback. 6858: * or if the parser was asked to switch to that mode. 6859: * 6860: * [67] Reference ::= EntityRef | CharRef 6861: */ 6862: void 6863: xmlParseReference(xmlParserCtxtPtr ctxt) { 6864: xmlEntityPtr ent; 6865: xmlChar *val; 6866: int was_checked; 6867: xmlNodePtr list = NULL; 6868: xmlParserErrors ret = XML_ERR_OK; 6869: 6870: 6871: if (RAW != '&') 6872: return; 6873: 6874: /* 6875: * Simple case of a CharRef 6876: */ 6877: if (NXT(1) == '#') { 6878: int i = 0; 6879: xmlChar out[10]; 6880: int hex = NXT(2); 6881: int value = xmlParseCharRef(ctxt); 6882: 6883: if (value == 0) 6884: return; 6885: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6886: /* 6887: * So we are using non-UTF-8 buffers 6888: * Check that the char fit on 8bits, if not 6889: * generate a CharRef. 6890: */ 6891: if (value <= 0xFF) { 6892: out[0] = value; 6893: out[1] = 0; 6894: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6895: (!ctxt->disableSAX)) 6896: ctxt->sax->characters(ctxt->userData, out, 1); 6897: } else { 6898: if ((hex == 'x') || (hex == 'X')) 6899: snprintf((char *)out, sizeof(out), "#x%X", value); 6900: else 6901: snprintf((char *)out, sizeof(out), "#%d", value); 6902: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6903: (!ctxt->disableSAX)) 6904: ctxt->sax->reference(ctxt->userData, out); 6905: } 6906: } else { 6907: /* 6908: * Just encode the value in UTF-8 6909: */ 6910: COPY_BUF(0 ,out, i, value); 6911: out[i] = 0; 6912: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6913: (!ctxt->disableSAX)) 6914: ctxt->sax->characters(ctxt->userData, out, i); 6915: } 6916: return; 6917: } 6918: 6919: /* 6920: * We are seeing an entity reference 6921: */ 6922: ent = xmlParseEntityRef(ctxt); 6923: if (ent == NULL) return; 6924: if (!ctxt->wellFormed) 6925: return; 6926: was_checked = ent->checked; 6927: 6928: /* special case of predefined entities */ 6929: if ((ent->name == NULL) || 6930: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6931: val = ent->content; 6932: if (val == NULL) return; 6933: /* 6934: * inline the entity. 6935: */ 6936: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6937: (!ctxt->disableSAX)) 6938: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6939: return; 6940: } 6941: 6942: /* 6943: * The first reference to the entity trigger a parsing phase 6944: * where the ent->children is filled with the result from 6945: * the parsing. 6946: */ 6947: if (ent->checked == 0) { 6948: unsigned long oldnbent = ctxt->nbentities; 6949: 6950: /* 6951: * This is a bit hackish but this seems the best 6952: * way to make sure both SAX and DOM entity support 6953: * behaves okay. 6954: */ 6955: void *user_data; 6956: if (ctxt->userData == ctxt) 6957: user_data = NULL; 6958: else 6959: user_data = ctxt->userData; 6960: 6961: /* 6962: * Check that this entity is well formed 6963: * 4.3.2: An internal general parsed entity is well-formed 6964: * if its replacement text matches the production labeled 6965: * content. 6966: */ 6967: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6968: ctxt->depth++; 6969: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6970: user_data, &list); 6971: ctxt->depth--; 6972: 6973: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6974: ctxt->depth++; 6975: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6976: user_data, ctxt->depth, ent->URI, 6977: ent->ExternalID, &list); 6978: ctxt->depth--; 6979: } else { 6980: ret = XML_ERR_ENTITY_PE_INTERNAL; 6981: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6982: "invalid entity type found\n", NULL); 6983: } 6984: 6985: /* 6986: * Store the number of entities needing parsing for this entity 6987: * content and do checkings 6988: */ 6989: ent->checked = ctxt->nbentities - oldnbent; 6990: if (ret == XML_ERR_ENTITY_LOOP) { 6991: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6992: xmlFreeNodeList(list); 6993: return; 6994: } 6995: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 6996: xmlFreeNodeList(list); 6997: return; 6998: } 6999: 7000: if ((ret == XML_ERR_OK) && (list != NULL)) { 7001: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7002: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7003: (ent->children == NULL)) { 7004: ent->children = list; 7005: if (ctxt->replaceEntities) { 7006: /* 7007: * Prune it directly in the generated document 7008: * except for single text nodes. 7009: */ 7010: if (((list->type == XML_TEXT_NODE) && 7011: (list->next == NULL)) || 7012: (ctxt->parseMode == XML_PARSE_READER)) { 7013: list->parent = (xmlNodePtr) ent; 7014: list = NULL; 7015: ent->owner = 1; 7016: } else { 7017: ent->owner = 0; 7018: while (list != NULL) { 7019: list->parent = (xmlNodePtr) ctxt->node; 7020: list->doc = ctxt->myDoc; 7021: if (list->next == NULL) 7022: ent->last = list; 7023: list = list->next; 7024: } 7025: list = ent->children; 7026: #ifdef LIBXML_LEGACY_ENABLED 7027: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7028: xmlAddEntityReference(ent, list, NULL); 7029: #endif /* LIBXML_LEGACY_ENABLED */ 7030: } 7031: } else { 7032: ent->owner = 1; 7033: while (list != NULL) { 7034: list->parent = (xmlNodePtr) ent; 7035: xmlSetTreeDoc(list, ent->doc); 7036: if (list->next == NULL) 7037: ent->last = list; 7038: list = list->next; 7039: } 7040: } 7041: } else { 7042: xmlFreeNodeList(list); 7043: list = NULL; 7044: } 7045: } else if ((ret != XML_ERR_OK) && 7046: (ret != XML_WAR_UNDECLARED_ENTITY)) { 7047: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7048: "Entity '%s' failed to parse\n", ent->name); 7049: } else if (list != NULL) { 7050: xmlFreeNodeList(list); 7051: list = NULL; 7052: } 7053: if (ent->checked == 0) 7054: ent->checked = 1; 7055: } else if (ent->checked != 1) { 7056: ctxt->nbentities += ent->checked; 7057: } 7058: 7059: /* 7060: * Now that the entity content has been gathered 7061: * provide it to the application, this can take different forms based 7062: * on the parsing modes. 7063: */ 7064: if (ent->children == NULL) { 7065: /* 7066: * Probably running in SAX mode and the callbacks don't 7067: * build the entity content. So unless we already went 7068: * though parsing for first checking go though the entity 7069: * content to generate callbacks associated to the entity 7070: */ 7071: if (was_checked != 0) { 7072: void *user_data; 7073: /* 7074: * This is a bit hackish but this seems the best 7075: * way to make sure both SAX and DOM entity support 7076: * behaves okay. 7077: */ 7078: if (ctxt->userData == ctxt) 7079: user_data = NULL; 7080: else 7081: user_data = ctxt->userData; 7082: 7083: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7084: ctxt->depth++; 7085: ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7086: ent->content, user_data, NULL); 7087: ctxt->depth--; 7088: } else if (ent->etype == 7089: XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7090: ctxt->depth++; 7091: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7092: ctxt->sax, user_data, ctxt->depth, 7093: ent->URI, ent->ExternalID, NULL); 7094: ctxt->depth--; 7095: } else { 7096: ret = XML_ERR_ENTITY_PE_INTERNAL; 7097: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7098: "invalid entity type found\n", NULL); 7099: } 7100: if (ret == XML_ERR_ENTITY_LOOP) { 7101: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7102: return; 7103: } 7104: } 7105: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7106: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7107: /* 7108: * Entity reference callback comes second, it's somewhat 7109: * superfluous but a compatibility to historical behaviour 7110: */ 7111: ctxt->sax->reference(ctxt->userData, ent->name); 7112: } 7113: return; 7114: } 7115: 7116: /* 7117: * If we didn't get any children for the entity being built 7118: */ 7119: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7120: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7121: /* 7122: * Create a node. 7123: */ 7124: ctxt->sax->reference(ctxt->userData, ent->name); 7125: return; 7126: } 7127: 7128: if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7129: /* 7130: * There is a problem on the handling of _private for entities 7131: * (bug 155816): Should we copy the content of the field from 7132: * the entity (possibly overwriting some value set by the user 7133: * when a copy is created), should we leave it alone, or should 7134: * we try to take care of different situations? The problem 7135: * is exacerbated by the usage of this field by the xmlReader. 7136: * To fix this bug, we look at _private on the created node 7137: * and, if it's NULL, we copy in whatever was in the entity. 7138: * If it's not NULL we leave it alone. This is somewhat of a 7139: * hack - maybe we should have further tests to determine 7140: * what to do. 7141: */ 7142: if ((ctxt->node != NULL) && (ent->children != NULL)) { 7143: /* 7144: * Seems we are generating the DOM content, do 7145: * a simple tree copy for all references except the first 7146: * In the first occurrence list contains the replacement. 7147: * progressive == 2 means we are operating on the Reader 7148: * and since nodes are discarded we must copy all the time. 7149: */ 7150: if (((list == NULL) && (ent->owner == 0)) || 7151: (ctxt->parseMode == XML_PARSE_READER)) { 7152: xmlNodePtr nw = NULL, cur, firstChild = NULL; 7153: 7154: /* 7155: * We are copying here, make sure there is no abuse 7156: */ 7157: ctxt->sizeentcopy += ent->length; 7158: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7159: return; 7160: 7161: /* 7162: * when operating on a reader, the entities definitions 7163: * are always owning the entities subtree. 7164: if (ctxt->parseMode == XML_PARSE_READER) 7165: ent->owner = 1; 7166: */ 7167: 7168: cur = ent->children; 7169: while (cur != NULL) { 7170: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7171: if (nw != NULL) { 7172: if (nw->_private == NULL) 7173: nw->_private = cur->_private; 7174: if (firstChild == NULL){ 7175: firstChild = nw; 7176: } 7177: nw = xmlAddChild(ctxt->node, nw); 7178: } 7179: if (cur == ent->last) { 7180: /* 7181: * needed to detect some strange empty 7182: * node cases in the reader tests 7183: */ 7184: if ((ctxt->parseMode == XML_PARSE_READER) && 7185: (nw != NULL) && 7186: (nw->type == XML_ELEMENT_NODE) && 7187: (nw->children == NULL)) 7188: nw->extra = 1; 7189: 7190: break; 7191: } 7192: cur = cur->next; 7193: } 7194: #ifdef LIBXML_LEGACY_ENABLED 7195: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7196: xmlAddEntityReference(ent, firstChild, nw); 7197: #endif /* LIBXML_LEGACY_ENABLED */ 7198: } else if (list == NULL) { 7199: xmlNodePtr nw = NULL, cur, next, last, 7200: firstChild = NULL; 7201: 7202: /* 7203: * We are copying here, make sure there is no abuse 7204: */ 7205: ctxt->sizeentcopy += ent->length; 7206: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7207: return; 7208: 7209: /* 7210: * Copy the entity child list and make it the new 7211: * entity child list. The goal is to make sure any 7212: * ID or REF referenced will be the one from the 7213: * document content and not the entity copy. 7214: */ 7215: cur = ent->children; 7216: ent->children = NULL; 7217: last = ent->last; 7218: ent->last = NULL; 7219: while (cur != NULL) { 7220: next = cur->next; 7221: cur->next = NULL; 7222: cur->parent = NULL; 7223: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7224: if (nw != NULL) { 7225: if (nw->_private == NULL) 7226: nw->_private = cur->_private; 7227: if (firstChild == NULL){ 7228: firstChild = cur; 7229: } 7230: xmlAddChild((xmlNodePtr) ent, nw); 7231: xmlAddChild(ctxt->node, cur); 7232: } 7233: if (cur == last) 7234: break; 7235: cur = next; 7236: } 7237: if (ent->owner == 0) 7238: ent->owner = 1; 7239: #ifdef LIBXML_LEGACY_ENABLED 7240: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7241: xmlAddEntityReference(ent, firstChild, nw); 7242: #endif /* LIBXML_LEGACY_ENABLED */ 7243: } else { 7244: const xmlChar *nbktext; 7245: 7246: /* 7247: * the name change is to avoid coalescing of the 7248: * node with a possible previous text one which 7249: * would make ent->children a dangling pointer 7250: */ 7251: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7252: -1); 7253: if (ent->children->type == XML_TEXT_NODE) 7254: ent->children->name = nbktext; 7255: if ((ent->last != ent->children) && 7256: (ent->last->type == XML_TEXT_NODE)) 7257: ent->last->name = nbktext; 7258: xmlAddChildList(ctxt->node, ent->children); 7259: } 7260: 7261: /* 7262: * This is to avoid a nasty side effect, see 7263: * characters() in SAX.c 7264: */ 7265: ctxt->nodemem = 0; 7266: ctxt->nodelen = 0; 7267: return; 7268: } 7269: } 7270: } 7271: 7272: /** 7273: * xmlParseEntityRef: 7274: * @ctxt: an XML parser context 7275: * 7276: * parse ENTITY references declarations 7277: * 7278: * [68] EntityRef ::= '&' Name ';' 7279: * 7280: * [ WFC: Entity Declared ] 7281: * In a document without any DTD, a document with only an internal DTD 7282: * subset which contains no parameter entity references, or a document 7283: * with "standalone='yes'", the Name given in the entity reference 7284: * must match that in an entity declaration, except that well-formed 7285: * documents need not declare any of the following entities: amp, lt, 7286: * gt, apos, quot. The declaration of a parameter entity must precede 7287: * any reference to it. Similarly, the declaration of a general entity 7288: * must precede any reference to it which appears in a default value in an 7289: * attribute-list declaration. Note that if entities are declared in the 7290: * external subset or in external parameter entities, a non-validating 7291: * processor is not obligated to read and process their declarations; 7292: * for such documents, the rule that an entity must be declared is a 7293: * well-formedness constraint only if standalone='yes'. 7294: * 7295: * [ WFC: Parsed Entity ] 7296: * An entity reference must not contain the name of an unparsed entity 7297: * 7298: * Returns the xmlEntityPtr if found, or NULL otherwise. 7299: */ 7300: xmlEntityPtr 7301: xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7302: const xmlChar *name; 7303: xmlEntityPtr ent = NULL; 7304: 7305: GROW; 7306: 7307: if (RAW != '&') 7308: return(NULL); 7309: NEXT; 7310: name = xmlParseName(ctxt); 7311: if (name == NULL) { 7312: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7313: "xmlParseEntityRef: no name\n"); 7314: return(NULL); 7315: } 7316: if (RAW != ';') { 7317: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7318: return(NULL); 7319: } 7320: NEXT; 7321: 7322: /* 7323: * Predefined entites override any extra definition 7324: */ 7325: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7326: ent = xmlGetPredefinedEntity(name); 7327: if (ent != NULL) 7328: return(ent); 7329: } 7330: 7331: /* 7332: * Increate the number of entity references parsed 7333: */ 7334: ctxt->nbentities++; 7335: 7336: /* 7337: * Ask first SAX for entity resolution, otherwise try the 7338: * entities which may have stored in the parser context. 7339: */ 7340: if (ctxt->sax != NULL) { 7341: if (ctxt->sax->getEntity != NULL) 7342: ent = ctxt->sax->getEntity(ctxt->userData, name); 7343: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7344: (ctxt->options & XML_PARSE_OLDSAX)) 7345: ent = xmlGetPredefinedEntity(name); 7346: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7347: (ctxt->userData==ctxt)) { 7348: ent = xmlSAX2GetEntity(ctxt, name); 7349: } 7350: } 7351: /* 7352: * [ WFC: Entity Declared ] 7353: * In a document without any DTD, a document with only an 7354: * internal DTD subset which contains no parameter entity 7355: * references, or a document with "standalone='yes'", the 7356: * Name given in the entity reference must match that in an 7357: * entity declaration, except that well-formed documents 7358: * need not declare any of the following entities: amp, lt, 7359: * gt, apos, quot. 7360: * The declaration of a parameter entity must precede any 7361: * reference to it. 7362: * Similarly, the declaration of a general entity must 7363: * precede any reference to it which appears in a default 7364: * value in an attribute-list declaration. Note that if 7365: * entities are declared in the external subset or in 7366: * external parameter entities, a non-validating processor 7367: * is not obligated to read and process their declarations; 7368: * for such documents, the rule that an entity must be 7369: * declared is a well-formedness constraint only if 7370: * standalone='yes'. 7371: */ 7372: if (ent == NULL) { 7373: if ((ctxt->standalone == 1) || 7374: ((ctxt->hasExternalSubset == 0) && 7375: (ctxt->hasPErefs == 0))) { 7376: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7377: "Entity '%s' not defined\n", name); 7378: } else { 7379: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7380: "Entity '%s' not defined\n", name); 7381: if ((ctxt->inSubset == 0) && 7382: (ctxt->sax != NULL) && 7383: (ctxt->sax->reference != NULL)) { 7384: ctxt->sax->reference(ctxt->userData, name); 7385: } 7386: } 7387: ctxt->valid = 0; 7388: } 7389: 7390: /* 7391: * [ WFC: Parsed Entity ] 7392: * An entity reference must not contain the name of an 7393: * unparsed entity 7394: */ 7395: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7396: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7397: "Entity reference to unparsed entity %s\n", name); 7398: } 7399: 7400: /* 7401: * [ WFC: No External Entity References ] 7402: * Attribute values cannot contain direct or indirect 7403: * entity references to external entities. 7404: */ 7405: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7406: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7407: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7408: "Attribute references external entity '%s'\n", name); 7409: } 7410: /* 7411: * [ WFC: No < in Attribute Values ] 7412: * The replacement text of any entity referred to directly or 7413: * indirectly in an attribute value (other than "<") must 7414: * not contain a <. 7415: */ 7416: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7417: (ent != NULL) && (ent->content != NULL) && 7418: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7419: (xmlStrchr(ent->content, '<'))) { 7420: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7421: "'<' in entity '%s' is not allowed in attributes values\n", name); 7422: } 7423: 7424: /* 7425: * Internal check, no parameter entities here ... 7426: */ 7427: else { 7428: switch (ent->etype) { 7429: case XML_INTERNAL_PARAMETER_ENTITY: 7430: case XML_EXTERNAL_PARAMETER_ENTITY: 7431: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7432: "Attempt to reference the parameter entity '%s'\n", 7433: name); 7434: break; 7435: default: 7436: break; 7437: } 7438: } 7439: 7440: /* 7441: * [ WFC: No Recursion ] 7442: * A parsed entity must not contain a recursive reference 7443: * to itself, either directly or indirectly. 7444: * Done somewhere else 7445: */ 7446: return(ent); 7447: } 7448: 7449: /** 7450: * xmlParseStringEntityRef: 7451: * @ctxt: an XML parser context 7452: * @str: a pointer to an index in the string 7453: * 7454: * parse ENTITY references declarations, but this version parses it from 7455: * a string value. 7456: * 7457: * [68] EntityRef ::= '&' Name ';' 7458: * 7459: * [ WFC: Entity Declared ] 7460: * In a document without any DTD, a document with only an internal DTD 7461: * subset which contains no parameter entity references, or a document 7462: * with "standalone='yes'", the Name given in the entity reference 7463: * must match that in an entity declaration, except that well-formed 7464: * documents need not declare any of the following entities: amp, lt, 7465: * gt, apos, quot. The declaration of a parameter entity must precede 7466: * any reference to it. Similarly, the declaration of a general entity 7467: * must precede any reference to it which appears in a default value in an 7468: * attribute-list declaration. Note that if entities are declared in the 7469: * external subset or in external parameter entities, a non-validating 7470: * processor is not obligated to read and process their declarations; 7471: * for such documents, the rule that an entity must be declared is a 7472: * well-formedness constraint only if standalone='yes'. 7473: * 7474: * [ WFC: Parsed Entity ] 7475: * An entity reference must not contain the name of an unparsed entity 7476: * 7477: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7478: * is updated to the current location in the string. 7479: */ 7480: static xmlEntityPtr 7481: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7482: xmlChar *name; 7483: const xmlChar *ptr; 7484: xmlChar cur; 7485: xmlEntityPtr ent = NULL; 7486: 7487: if ((str == NULL) || (*str == NULL)) 7488: return(NULL); 7489: ptr = *str; 7490: cur = *ptr; 7491: if (cur != '&') 7492: return(NULL); 7493: 7494: ptr++; 7495: name = xmlParseStringName(ctxt, &ptr); 7496: if (name == NULL) { 7497: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7498: "xmlParseStringEntityRef: no name\n"); 7499: *str = ptr; 7500: return(NULL); 7501: } 7502: if (*ptr != ';') { 7503: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7504: xmlFree(name); 7505: *str = ptr; 7506: return(NULL); 7507: } 7508: ptr++; 7509: 7510: 7511: /* 7512: * Predefined entites override any extra definition 7513: */ 7514: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7515: ent = xmlGetPredefinedEntity(name); 7516: if (ent != NULL) { 7517: xmlFree(name); 7518: *str = ptr; 7519: return(ent); 7520: } 7521: } 7522: 7523: /* 7524: * Increate the number of entity references parsed 7525: */ 7526: ctxt->nbentities++; 7527: 7528: /* 7529: * Ask first SAX for entity resolution, otherwise try the 7530: * entities which may have stored in the parser context. 7531: */ 7532: if (ctxt->sax != NULL) { 7533: if (ctxt->sax->getEntity != NULL) 7534: ent = ctxt->sax->getEntity(ctxt->userData, name); 7535: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7536: ent = xmlGetPredefinedEntity(name); 7537: if ((ent == NULL) && (ctxt->userData==ctxt)) { 7538: ent = xmlSAX2GetEntity(ctxt, name); 7539: } 7540: } 7541: 7542: /* 7543: * [ WFC: Entity Declared ] 7544: * In a document without any DTD, a document with only an 7545: * internal DTD subset which contains no parameter entity 7546: * references, or a document with "standalone='yes'", the 7547: * Name given in the entity reference must match that in an 7548: * entity declaration, except that well-formed documents 7549: * need not declare any of the following entities: amp, lt, 7550: * gt, apos, quot. 7551: * The declaration of a parameter entity must precede any 7552: * reference to it. 7553: * Similarly, the declaration of a general entity must 7554: * precede any reference to it which appears in a default 7555: * value in an attribute-list declaration. Note that if 7556: * entities are declared in the external subset or in 7557: * external parameter entities, a non-validating processor 7558: * is not obligated to read and process their declarations; 7559: * for such documents, the rule that an entity must be 7560: * declared is a well-formedness constraint only if 7561: * standalone='yes'. 7562: */ 7563: if (ent == NULL) { 7564: if ((ctxt->standalone == 1) || 7565: ((ctxt->hasExternalSubset == 0) && 7566: (ctxt->hasPErefs == 0))) { 7567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7568: "Entity '%s' not defined\n", name); 7569: } else { 7570: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7571: "Entity '%s' not defined\n", 7572: name); 7573: } 7574: /* TODO ? check regressions ctxt->valid = 0; */ 7575: } 7576: 7577: /* 7578: * [ WFC: Parsed Entity ] 7579: * An entity reference must not contain the name of an 7580: * unparsed entity 7581: */ 7582: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7583: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7584: "Entity reference to unparsed entity %s\n", name); 7585: } 7586: 7587: /* 7588: * [ WFC: No External Entity References ] 7589: * Attribute values cannot contain direct or indirect 7590: * entity references to external entities. 7591: */ 7592: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7593: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7594: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7595: "Attribute references external entity '%s'\n", name); 7596: } 7597: /* 7598: * [ WFC: No < in Attribute Values ] 7599: * The replacement text of any entity referred to directly or 7600: * indirectly in an attribute value (other than "<") must 7601: * not contain a <. 7602: */ 7603: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7604: (ent != NULL) && (ent->content != NULL) && 7605: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7606: (xmlStrchr(ent->content, '<'))) { 7607: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7608: "'<' in entity '%s' is not allowed in attributes values\n", 7609: name); 7610: } 7611: 7612: /* 7613: * Internal check, no parameter entities here ... 7614: */ 7615: else { 7616: switch (ent->etype) { 7617: case XML_INTERNAL_PARAMETER_ENTITY: 7618: case XML_EXTERNAL_PARAMETER_ENTITY: 7619: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7620: "Attempt to reference the parameter entity '%s'\n", 7621: name); 7622: break; 7623: default: 7624: break; 7625: } 7626: } 7627: 7628: /* 7629: * [ WFC: No Recursion ] 7630: * A parsed entity must not contain a recursive reference 7631: * to itself, either directly or indirectly. 7632: * Done somewhere else 7633: */ 7634: 7635: xmlFree(name); 7636: *str = ptr; 7637: return(ent); 7638: } 7639: 7640: /** 7641: * xmlParsePEReference: 7642: * @ctxt: an XML parser context 7643: * 7644: * parse PEReference declarations 7645: * The entity content is handled directly by pushing it's content as 7646: * a new input stream. 7647: * 7648: * [69] PEReference ::= '%' Name ';' 7649: * 7650: * [ WFC: No Recursion ] 7651: * A parsed entity must not contain a recursive 7652: * reference to itself, either directly or indirectly. 7653: * 7654: * [ WFC: Entity Declared ] 7655: * In a document without any DTD, a document with only an internal DTD 7656: * subset which contains no parameter entity references, or a document 7657: * with "standalone='yes'", ... ... The declaration of a parameter 7658: * entity must precede any reference to it... 7659: * 7660: * [ VC: Entity Declared ] 7661: * In a document with an external subset or external parameter entities 7662: * with "standalone='no'", ... ... The declaration of a parameter entity 7663: * must precede any reference to it... 7664: * 7665: * [ WFC: In DTD ] 7666: * Parameter-entity references may only appear in the DTD. 7667: * NOTE: misleading but this is handled. 7668: */ 7669: void 7670: xmlParsePEReference(xmlParserCtxtPtr ctxt) 7671: { 7672: const xmlChar *name; 7673: xmlEntityPtr entity = NULL; 7674: xmlParserInputPtr input; 7675: 7676: if (RAW != '%') 7677: return; 7678: NEXT; 7679: name = xmlParseName(ctxt); 7680: if (name == NULL) { 7681: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7682: "xmlParsePEReference: no name\n"); 7683: return; 7684: } 7685: if (RAW != ';') { 7686: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7687: return; 7688: } 7689: 7690: NEXT; 7691: 7692: /* 7693: * Increate the number of entity references parsed 7694: */ 7695: ctxt->nbentities++; 7696: 7697: /* 7698: * Request the entity from SAX 7699: */ 7700: if ((ctxt->sax != NULL) && 7701: (ctxt->sax->getParameterEntity != NULL)) 7702: entity = ctxt->sax->getParameterEntity(ctxt->userData, 7703: name); 7704: if (entity == NULL) { 7705: /* 7706: * [ WFC: Entity Declared ] 7707: * In a document without any DTD, a document with only an 7708: * internal DTD subset which contains no parameter entity 7709: * references, or a document with "standalone='yes'", ... 7710: * ... The declaration of a parameter entity must precede 7711: * any reference to it... 7712: */ 7713: if ((ctxt->standalone == 1) || 7714: ((ctxt->hasExternalSubset == 0) && 7715: (ctxt->hasPErefs == 0))) { 7716: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7717: "PEReference: %%%s; not found\n", 7718: name); 7719: } else { 7720: /* 7721: * [ VC: Entity Declared ] 7722: * In a document with an external subset or external 7723: * parameter entities with "standalone='no'", ... 7724: * ... The declaration of a parameter entity must 7725: * precede any reference to it... 7726: */ 7727: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7728: "PEReference: %%%s; not found\n", 7729: name, NULL); 7730: ctxt->valid = 0; 7731: } 7732: } else { 7733: /* 7734: * Internal checking in case the entity quest barfed 7735: */ 7736: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7737: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7738: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7739: "Internal: %%%s; is not a parameter entity\n", 7740: name, NULL); 7741: } else if (ctxt->input->free != deallocblankswrapper) { 7742: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7743: if (xmlPushInput(ctxt, input) < 0) 7744: return; 7745: } else { 7746: /* 7747: * TODO !!! 7748: * handle the extra spaces added before and after 7749: * c.f. http://www.w3.org/TR/REC-xml#as-PE 7750: */ 7751: input = xmlNewEntityInputStream(ctxt, entity); 7752: if (xmlPushInput(ctxt, input) < 0) 7753: return; 7754: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7755: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7756: (IS_BLANK_CH(NXT(5)))) { 7757: xmlParseTextDecl(ctxt); 7758: if (ctxt->errNo == 7759: XML_ERR_UNSUPPORTED_ENCODING) { 7760: /* 7761: * The XML REC instructs us to stop parsing 7762: * right here 7763: */ 7764: ctxt->instate = XML_PARSER_EOF; 7765: return; 7766: } 7767: } 7768: } 7769: } 7770: ctxt->hasPErefs = 1; 7771: } 7772: 7773: /** 7774: * xmlLoadEntityContent: 7775: * @ctxt: an XML parser context 7776: * @entity: an unloaded system entity 7777: * 7778: * Load the original content of the given system entity from the 7779: * ExternalID/SystemID given. This is to be used for Included in Literal 7780: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7781: * 7782: * Returns 0 in case of success and -1 in case of failure 7783: */ 7784: static int 7785: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7786: xmlParserInputPtr input; 7787: xmlBufferPtr buf; 7788: int l, c; 7789: int count = 0; 7790: 7791: if ((ctxt == NULL) || (entity == NULL) || 7792: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7793: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7794: (entity->content != NULL)) { 7795: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7796: "xmlLoadEntityContent parameter error"); 7797: return(-1); 7798: } 7799: 7800: if (xmlParserDebugEntities) 7801: xmlGenericError(xmlGenericErrorContext, 7802: "Reading %s entity content input\n", entity->name); 7803: 7804: buf = xmlBufferCreate(); 7805: if (buf == NULL) { 7806: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7807: "xmlLoadEntityContent parameter error"); 7808: return(-1); 7809: } 7810: 7811: input = xmlNewEntityInputStream(ctxt, entity); 7812: if (input == NULL) { 7813: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7814: "xmlLoadEntityContent input error"); 7815: xmlBufferFree(buf); 7816: return(-1); 7817: } 7818: 7819: /* 7820: * Push the entity as the current input, read char by char 7821: * saving to the buffer until the end of the entity or an error 7822: */ 7823: if (xmlPushInput(ctxt, input) < 0) { 7824: xmlBufferFree(buf); 7825: return(-1); 7826: } 7827: 7828: GROW; 7829: c = CUR_CHAR(l); 7830: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7831: (IS_CHAR(c))) { 7832: xmlBufferAdd(buf, ctxt->input->cur, l); 7833: if (count++ > 100) { 7834: count = 0; 7835: GROW; 7836: } 7837: NEXTL(l); 7838: c = CUR_CHAR(l); 7839: } 7840: 7841: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7842: xmlPopInput(ctxt); 7843: } else if (!IS_CHAR(c)) { 7844: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7845: "xmlLoadEntityContent: invalid char value %d\n", 7846: c); 7847: xmlBufferFree(buf); 7848: return(-1); 7849: } 7850: entity->content = buf->content; 7851: buf->content = NULL; 7852: xmlBufferFree(buf); 7853: 7854: return(0); 7855: } 7856: 7857: /** 7858: * xmlParseStringPEReference: 7859: * @ctxt: an XML parser context 7860: * @str: a pointer to an index in the string 7861: * 7862: * parse PEReference declarations 7863: * 7864: * [69] PEReference ::= '%' Name ';' 7865: * 7866: * [ WFC: No Recursion ] 7867: * A parsed entity must not contain a recursive 7868: * reference to itself, either directly or indirectly. 7869: * 7870: * [ WFC: Entity Declared ] 7871: * In a document without any DTD, a document with only an internal DTD 7872: * subset which contains no parameter entity references, or a document 7873: * with "standalone='yes'", ... ... The declaration of a parameter 7874: * entity must precede any reference to it... 7875: * 7876: * [ VC: Entity Declared ] 7877: * In a document with an external subset or external parameter entities 7878: * with "standalone='no'", ... ... The declaration of a parameter entity 7879: * must precede any reference to it... 7880: * 7881: * [ WFC: In DTD ] 7882: * Parameter-entity references may only appear in the DTD. 7883: * NOTE: misleading but this is handled. 7884: * 7885: * Returns the string of the entity content. 7886: * str is updated to the current value of the index 7887: */ 7888: static xmlEntityPtr 7889: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7890: const xmlChar *ptr; 7891: xmlChar cur; 7892: xmlChar *name; 7893: xmlEntityPtr entity = NULL; 7894: 7895: if ((str == NULL) || (*str == NULL)) return(NULL); 7896: ptr = *str; 7897: cur = *ptr; 7898: if (cur != '%') 7899: return(NULL); 7900: ptr++; 7901: name = xmlParseStringName(ctxt, &ptr); 7902: if (name == NULL) { 7903: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7904: "xmlParseStringPEReference: no name\n"); 7905: *str = ptr; 7906: return(NULL); 7907: } 7908: cur = *ptr; 7909: if (cur != ';') { 7910: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7911: xmlFree(name); 7912: *str = ptr; 7913: return(NULL); 7914: } 7915: ptr++; 7916: 7917: /* 7918: * Increate the number of entity references parsed 7919: */ 7920: ctxt->nbentities++; 7921: 7922: /* 7923: * Request the entity from SAX 7924: */ 7925: if ((ctxt->sax != NULL) && 7926: (ctxt->sax->getParameterEntity != NULL)) 7927: entity = ctxt->sax->getParameterEntity(ctxt->userData, 7928: name); 7929: if (entity == NULL) { 7930: /* 7931: * [ WFC: Entity Declared ] 7932: * In a document without any DTD, a document with only an 7933: * internal DTD subset which contains no parameter entity 7934: * references, or a document with "standalone='yes'", ... 7935: * ... The declaration of a parameter entity must precede 7936: * any reference to it... 7937: */ 7938: if ((ctxt->standalone == 1) || 7939: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7940: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7941: "PEReference: %%%s; not found\n", name); 7942: } else { 7943: /* 7944: * [ VC: Entity Declared ] 7945: * In a document with an external subset or external 7946: * parameter entities with "standalone='no'", ... 7947: * ... The declaration of a parameter entity must 7948: * precede any reference to it... 7949: */ 7950: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7951: "PEReference: %%%s; not found\n", 7952: name, NULL); 7953: ctxt->valid = 0; 7954: } 7955: } else { 7956: /* 7957: * Internal checking in case the entity quest barfed 7958: */ 7959: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7960: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7961: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7962: "%%%s; is not a parameter entity\n", 7963: name, NULL); 7964: } 7965: } 7966: ctxt->hasPErefs = 1; 7967: xmlFree(name); 7968: *str = ptr; 7969: return(entity); 7970: } 7971: 7972: /** 7973: * xmlParseDocTypeDecl: 7974: * @ctxt: an XML parser context 7975: * 7976: * parse a DOCTYPE declaration 7977: * 7978: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7979: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7980: * 7981: * [ VC: Root Element Type ] 7982: * The Name in the document type declaration must match the element 7983: * type of the root element. 7984: */ 7985: 7986: void 7987: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7988: const xmlChar *name = NULL; 7989: xmlChar *ExternalID = NULL; 7990: xmlChar *URI = NULL; 7991: 7992: /* 7993: * We know that '<!DOCTYPE' has been detected. 7994: */ 7995: SKIP(9); 7996: 7997: SKIP_BLANKS; 7998: 7999: /* 8000: * Parse the DOCTYPE name. 8001: */ 8002: name = xmlParseName(ctxt); 8003: if (name == NULL) { 8004: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8005: "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8006: } 8007: ctxt->intSubName = name; 8008: 8009: SKIP_BLANKS; 8010: 8011: /* 8012: * Check for SystemID and ExternalID 8013: */ 8014: URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8015: 8016: if ((URI != NULL) || (ExternalID != NULL)) { 8017: ctxt->hasExternalSubset = 1; 8018: } 8019: ctxt->extSubURI = URI; 8020: ctxt->extSubSystem = ExternalID; 8021: 8022: SKIP_BLANKS; 8023: 8024: /* 8025: * Create and update the internal subset. 8026: */ 8027: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8028: (!ctxt->disableSAX)) 8029: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8030: 8031: /* 8032: * Is there any internal subset declarations ? 8033: * they are handled separately in xmlParseInternalSubset() 8034: */ 8035: if (RAW == '[') 8036: return; 8037: 8038: /* 8039: * We should be at the end of the DOCTYPE declaration. 8040: */ 8041: if (RAW != '>') { 8042: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8043: } 8044: NEXT; 8045: } 8046: 8047: /** 8048: * xmlParseInternalSubset: 8049: * @ctxt: an XML parser context 8050: * 8051: * parse the internal subset declaration 8052: * 8053: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8054: */ 8055: 8056: static void 8057: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8058: /* 8059: * Is there any DTD definition ? 8060: */ 8061: if (RAW == '[') { 8062: ctxt->instate = XML_PARSER_DTD; 8063: NEXT; 8064: /* 8065: * Parse the succession of Markup declarations and 8066: * PEReferences. 8067: * Subsequence (markupdecl | PEReference | S)* 8068: */ 8069: while (RAW != ']') { 8070: const xmlChar *check = CUR_PTR; 8071: unsigned int cons = ctxt->input->consumed; 8072: 8073: SKIP_BLANKS; 8074: xmlParseMarkupDecl(ctxt); 8075: xmlParsePEReference(ctxt); 8076: 8077: /* 8078: * Pop-up of finished entities. 8079: */ 8080: while ((RAW == 0) && (ctxt->inputNr > 1)) 8081: xmlPopInput(ctxt); 8082: 8083: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8084: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8085: "xmlParseInternalSubset: error detected in Markup declaration\n"); 8086: break; 8087: } 8088: } 8089: if (RAW == ']') { 8090: NEXT; 8091: SKIP_BLANKS; 8092: } 8093: } 8094: 8095: /* 8096: * We should be at the end of the DOCTYPE declaration. 8097: */ 8098: if (RAW != '>') { 8099: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8100: } 8101: NEXT; 8102: } 8103: 8104: #ifdef LIBXML_SAX1_ENABLED 8105: /** 8106: * xmlParseAttribute: 8107: * @ctxt: an XML parser context 8108: * @value: a xmlChar ** used to store the value of the attribute 8109: * 8110: * parse an attribute 8111: * 8112: * [41] Attribute ::= Name Eq AttValue 8113: * 8114: * [ WFC: No External Entity References ] 8115: * Attribute values cannot contain direct or indirect entity references 8116: * to external entities. 8117: * 8118: * [ WFC: No < in Attribute Values ] 8119: * The replacement text of any entity referred to directly or indirectly in 8120: * an attribute value (other than "<") must not contain a <. 8121: * 8122: * [ VC: Attribute Value Type ] 8123: * The attribute must have been declared; the value must be of the type 8124: * declared for it. 8125: * 8126: * [25] Eq ::= S? '=' S? 8127: * 8128: * With namespace: 8129: * 8130: * [NS 11] Attribute ::= QName Eq AttValue 8131: * 8132: * Also the case QName == xmlns:??? is handled independently as a namespace 8133: * definition. 8134: * 8135: * Returns the attribute name, and the value in *value. 8136: */ 8137: 8138: const xmlChar * 8139: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8140: const xmlChar *name; 8141: xmlChar *val; 8142: 8143: *value = NULL; 8144: GROW; 8145: name = xmlParseName(ctxt); 8146: if (name == NULL) { 8147: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8148: "error parsing attribute name\n"); 8149: return(NULL); 8150: } 8151: 8152: /* 8153: * read the value 8154: */ 8155: SKIP_BLANKS; 8156: if (RAW == '=') { 8157: NEXT; 8158: SKIP_BLANKS; 8159: val = xmlParseAttValue(ctxt); 8160: ctxt->instate = XML_PARSER_CONTENT; 8161: } else { 8162: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8163: "Specification mandate value for attribute %s\n", name); 8164: return(NULL); 8165: } 8166: 8167: /* 8168: * Check that xml:lang conforms to the specification 8169: * No more registered as an error, just generate a warning now 8170: * since this was deprecated in XML second edition 8171: */ 8172: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8173: if (!xmlCheckLanguageID(val)) { 8174: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8175: "Malformed value for xml:lang : %s\n", 8176: val, NULL); 8177: } 8178: } 8179: 8180: /* 8181: * Check that xml:space conforms to the specification 8182: */ 8183: if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8184: if (xmlStrEqual(val, BAD_CAST "default")) 8185: *(ctxt->space) = 0; 8186: else if (xmlStrEqual(val, BAD_CAST "preserve")) 8187: *(ctxt->space) = 1; 8188: else { 8189: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8190: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8191: val, NULL); 8192: } 8193: } 8194: 8195: *value = val; 8196: return(name); 8197: } 8198: 8199: /** 8200: * xmlParseStartTag: 8201: * @ctxt: an XML parser context 8202: * 8203: * parse a start of tag either for rule element or 8204: * EmptyElement. In both case we don't parse the tag closing chars. 8205: * 8206: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8207: * 8208: * [ WFC: Unique Att Spec ] 8209: * No attribute name may appear more than once in the same start-tag or 8210: * empty-element tag. 8211: * 8212: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8213: * 8214: * [ WFC: Unique Att Spec ] 8215: * No attribute name may appear more than once in the same start-tag or 8216: * empty-element tag. 8217: * 8218: * With namespace: 8219: * 8220: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8221: * 8222: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8223: * 8224: * Returns the element name parsed 8225: */ 8226: 8227: const xmlChar * 8228: xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8229: const xmlChar *name; 8230: const xmlChar *attname; 8231: xmlChar *attvalue; 8232: const xmlChar **atts = ctxt->atts; 8233: int nbatts = 0; 8234: int maxatts = ctxt->maxatts; 8235: int i; 8236: 8237: if (RAW != '<') return(NULL); 8238: NEXT1; 8239: 8240: name = xmlParseName(ctxt); 8241: if (name == NULL) { 8242: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8243: "xmlParseStartTag: invalid element name\n"); 8244: return(NULL); 8245: } 8246: 8247: /* 8248: * Now parse the attributes, it ends up with the ending 8249: * 8250: * (S Attribute)* S? 8251: */ 8252: SKIP_BLANKS; 8253: GROW; 8254: 8255: while ((RAW != '>') && 8256: ((RAW != '/') || (NXT(1) != '>')) && 8257: (IS_BYTE_CHAR(RAW))) { 8258: const xmlChar *q = CUR_PTR; 8259: unsigned int cons = ctxt->input->consumed; 8260: 8261: attname = xmlParseAttribute(ctxt, &attvalue); 8262: if ((attname != NULL) && (attvalue != NULL)) { 8263: /* 8264: * [ WFC: Unique Att Spec ] 8265: * No attribute name may appear more than once in the same 8266: * start-tag or empty-element tag. 8267: */ 8268: for (i = 0; i < nbatts;i += 2) { 8269: if (xmlStrEqual(atts[i], attname)) { 8270: xmlErrAttributeDup(ctxt, NULL, attname); 8271: xmlFree(attvalue); 8272: goto failed; 8273: } 8274: } 8275: /* 8276: * Add the pair to atts 8277: */ 8278: if (atts == NULL) { 8279: maxatts = 22; /* allow for 10 attrs by default */ 8280: atts = (const xmlChar **) 8281: xmlMalloc(maxatts * sizeof(xmlChar *)); 8282: if (atts == NULL) { 8283: xmlErrMemory(ctxt, NULL); 8284: if (attvalue != NULL) 8285: xmlFree(attvalue); 8286: goto failed; 8287: } 8288: ctxt->atts = atts; 8289: ctxt->maxatts = maxatts; 8290: } else if (nbatts + 4 > maxatts) { 8291: const xmlChar **n; 8292: 8293: maxatts *= 2; 8294: n = (const xmlChar **) xmlRealloc((void *) atts, 8295: maxatts * sizeof(const xmlChar *)); 8296: if (n == NULL) { 8297: xmlErrMemory(ctxt, NULL); 8298: if (attvalue != NULL) 8299: xmlFree(attvalue); 8300: goto failed; 8301: } 8302: atts = n; 8303: ctxt->atts = atts; 8304: ctxt->maxatts = maxatts; 8305: } 8306: atts[nbatts++] = attname; 8307: atts[nbatts++] = attvalue; 8308: atts[nbatts] = NULL; 8309: atts[nbatts + 1] = NULL; 8310: } else { 8311: if (attvalue != NULL) 8312: xmlFree(attvalue); 8313: } 8314: 8315: failed: 8316: 8317: GROW 8318: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8319: break; 8320: if (!IS_BLANK_CH(RAW)) { 8321: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8322: "attributes construct error\n"); 8323: } 8324: SKIP_BLANKS; 8325: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8326: (attname == NULL) && (attvalue == NULL)) { 8327: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8328: "xmlParseStartTag: problem parsing attributes\n"); 8329: break; 8330: } 8331: SHRINK; 8332: GROW; 8333: } 8334: 8335: /* 8336: * SAX: Start of Element ! 8337: */ 8338: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8339: (!ctxt->disableSAX)) { 8340: if (nbatts > 0) 8341: ctxt->sax->startElement(ctxt->userData, name, atts); 8342: else 8343: ctxt->sax->startElement(ctxt->userData, name, NULL); 8344: } 8345: 8346: if (atts != NULL) { 8347: /* Free only the content strings */ 8348: for (i = 1;i < nbatts;i+=2) 8349: if (atts[i] != NULL) 8350: xmlFree((xmlChar *) atts[i]); 8351: } 8352: return(name); 8353: } 8354: 8355: /** 8356: * xmlParseEndTag1: 8357: * @ctxt: an XML parser context 8358: * @line: line of the start tag 8359: * @nsNr: number of namespaces on the start tag 8360: * 8361: * parse an end of tag 8362: * 8363: * [42] ETag ::= '</' Name S? '>' 8364: * 8365: * With namespace 8366: * 8367: * [NS 9] ETag ::= '</' QName S? '>' 8368: */ 8369: 8370: static void 8371: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8372: const xmlChar *name; 8373: 8374: GROW; 8375: if ((RAW != '<') || (NXT(1) != '/')) { 8376: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8377: "xmlParseEndTag: '</' not found\n"); 8378: return; 8379: } 8380: SKIP(2); 8381: 8382: name = xmlParseNameAndCompare(ctxt,ctxt->name); 8383: 8384: /* 8385: * We should definitely be at the ending "S? '>'" part 8386: */ 8387: GROW; 8388: SKIP_BLANKS; 8389: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8390: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8391: } else 8392: NEXT1; 8393: 8394: /* 8395: * [ WFC: Element Type Match ] 8396: * The Name in an element's end-tag must match the element type in the 8397: * start-tag. 8398: * 8399: */ 8400: if (name != (xmlChar*)1) { 8401: if (name == NULL) name = BAD_CAST "unparseable"; 8402: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8403: "Opening and ending tag mismatch: %s line %d and %s\n", 8404: ctxt->name, line, name); 8405: } 8406: 8407: /* 8408: * SAX: End of Tag 8409: */ 8410: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8411: (!ctxt->disableSAX)) 8412: ctxt->sax->endElement(ctxt->userData, ctxt->name); 8413: 8414: namePop(ctxt); 8415: spacePop(ctxt); 8416: return; 8417: } 8418: 8419: /** 8420: * xmlParseEndTag: 8421: * @ctxt: an XML parser context 8422: * 8423: * parse an end of tag 8424: * 8425: * [42] ETag ::= '</' Name S? '>' 8426: * 8427: * With namespace 8428: * 8429: * [NS 9] ETag ::= '</' QName S? '>' 8430: */ 8431: 8432: void 8433: xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8434: xmlParseEndTag1(ctxt, 0); 8435: } 8436: #endif /* LIBXML_SAX1_ENABLED */ 8437: 8438: /************************************************************************ 8439: * * 8440: * SAX 2 specific operations * 8441: * * 8442: ************************************************************************/ 8443: 8444: /* 8445: * xmlGetNamespace: 8446: * @ctxt: an XML parser context 8447: * @prefix: the prefix to lookup 8448: * 8449: * Lookup the namespace name for the @prefix (which ca be NULL) 8450: * The prefix must come from the @ctxt->dict dictionnary 8451: * 8452: * Returns the namespace name or NULL if not bound 8453: */ 8454: static const xmlChar * 8455: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8456: int i; 8457: 8458: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8459: for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8460: if (ctxt->nsTab[i] == prefix) { 8461: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8462: return(NULL); 8463: return(ctxt->nsTab[i + 1]); 8464: } 8465: return(NULL); 8466: } 8467: 8468: /** 8469: * xmlParseQName: 8470: * @ctxt: an XML parser context 8471: * @prefix: pointer to store the prefix part 8472: * 8473: * parse an XML Namespace QName 8474: * 8475: * [6] QName ::= (Prefix ':')? LocalPart 8476: * [7] Prefix ::= NCName 8477: * [8] LocalPart ::= NCName 8478: * 8479: * Returns the Name parsed or NULL 8480: */ 8481: 8482: static const xmlChar * 8483: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8484: const xmlChar *l, *p; 8485: 8486: GROW; 8487: 8488: l = xmlParseNCName(ctxt); 8489: if (l == NULL) { 8490: if (CUR == ':') { 8491: l = xmlParseName(ctxt); 8492: if (l != NULL) { 8493: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8494: "Failed to parse QName '%s'\n", l, NULL, NULL); 8495: *prefix = NULL; 8496: return(l); 8497: } 8498: } 8499: return(NULL); 8500: } 8501: if (CUR == ':') { 8502: NEXT; 8503: p = l; 8504: l = xmlParseNCName(ctxt); 8505: if (l == NULL) { 8506: xmlChar *tmp; 8507: 8508: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8509: "Failed to parse QName '%s:'\n", p, NULL, NULL); 8510: l = xmlParseNmtoken(ctxt); 8511: if (l == NULL) 8512: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8513: else { 8514: tmp = xmlBuildQName(l, p, NULL, 0); 8515: xmlFree((char *)l); 8516: } 8517: p = xmlDictLookup(ctxt->dict, tmp, -1); 8518: if (tmp != NULL) xmlFree(tmp); 8519: *prefix = NULL; 8520: return(p); 8521: } 8522: if (CUR == ':') { 8523: xmlChar *tmp; 8524: 8525: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8526: "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8527: NEXT; 8528: tmp = (xmlChar *) xmlParseName(ctxt); 8529: if (tmp != NULL) { 8530: tmp = xmlBuildQName(tmp, l, NULL, 0); 8531: l = xmlDictLookup(ctxt->dict, tmp, -1); 8532: if (tmp != NULL) xmlFree(tmp); 8533: *prefix = p; 8534: return(l); 8535: } 8536: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8537: l = xmlDictLookup(ctxt->dict, tmp, -1); 8538: if (tmp != NULL) xmlFree(tmp); 8539: *prefix = p; 8540: return(l); 8541: } 8542: *prefix = p; 8543: } else 8544: *prefix = NULL; 8545: return(l); 8546: } 8547: 8548: /** 8549: * xmlParseQNameAndCompare: 8550: * @ctxt: an XML parser context 8551: * @name: the localname 8552: * @prefix: the prefix, if any. 8553: * 8554: * parse an XML name and compares for match 8555: * (specialized for endtag parsing) 8556: * 8557: * Returns NULL for an illegal name, (xmlChar*) 1 for success 8558: * and the name for mismatch 8559: */ 8560: 8561: static const xmlChar * 8562: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8563: xmlChar const *prefix) { 8564: const xmlChar *cmp; 8565: const xmlChar *in; 8566: const xmlChar *ret; 8567: const xmlChar *prefix2; 8568: 8569: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8570: 8571: GROW; 8572: in = ctxt->input->cur; 8573: 8574: cmp = prefix; 8575: while (*in != 0 && *in == *cmp) { 8576: ++in; 8577: ++cmp; 8578: } 8579: if ((*cmp == 0) && (*in == ':')) { 8580: in++; 8581: cmp = name; 8582: while (*in != 0 && *in == *cmp) { 8583: ++in; 8584: ++cmp; 8585: } 8586: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8587: /* success */ 8588: ctxt->input->cur = in; 8589: return((const xmlChar*) 1); 8590: } 8591: } 8592: /* 8593: * all strings coms from the dictionary, equality can be done directly 8594: */ 8595: ret = xmlParseQName (ctxt, &prefix2); 8596: if ((ret == name) && (prefix == prefix2)) 8597: return((const xmlChar*) 1); 8598: return ret; 8599: } 8600: 8601: /** 8602: * xmlParseAttValueInternal: 8603: * @ctxt: an XML parser context 8604: * @len: attribute len result 8605: * @alloc: whether the attribute was reallocated as a new string 8606: * @normalize: if 1 then further non-CDATA normalization must be done 8607: * 8608: * parse a value for an attribute. 8609: * NOTE: if no normalization is needed, the routine will return pointers 8610: * directly from the data buffer. 8611: * 8612: * 3.3.3 Attribute-Value Normalization: 8613: * Before the value of an attribute is passed to the application or 8614: * checked for validity, the XML processor must normalize it as follows: 8615: * - a character reference is processed by appending the referenced 8616: * character to the attribute value 8617: * - an entity reference is processed by recursively processing the 8618: * replacement text of the entity 8619: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8620: * appending #x20 to the normalized value, except that only a single 8621: * #x20 is appended for a "#xD#xA" sequence that is part of an external 8622: * parsed entity or the literal entity value of an internal parsed entity 8623: * - other characters are processed by appending them to the normalized value 8624: * If the declared value is not CDATA, then the XML processor must further 8625: * process the normalized attribute value by discarding any leading and 8626: * trailing space (#x20) characters, and by replacing sequences of space 8627: * (#x20) characters by a single space (#x20) character. 8628: * All attributes for which no declaration has been read should be treated 8629: * by a non-validating parser as if declared CDATA. 8630: * 8631: * Returns the AttValue parsed or NULL. The value has to be freed by the 8632: * caller if it was copied, this can be detected by val[*len] == 0. 8633: */ 8634: 8635: static xmlChar * 8636: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8637: int normalize) 8638: { 8639: xmlChar limit = 0; 8640: const xmlChar *in = NULL, *start, *end, *last; 8641: xmlChar *ret = NULL; 8642: 8643: GROW; 8644: in = (xmlChar *) CUR_PTR; 8645: if (*in != '"' && *in != '\'') { 8646: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8647: return (NULL); 8648: } 8649: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8650: 8651: /* 8652: * try to handle in this routine the most common case where no 8653: * allocation of a new string is required and where content is 8654: * pure ASCII. 8655: */ 8656: limit = *in++; 8657: end = ctxt->input->end; 8658: start = in; 8659: if (in >= end) { 8660: const xmlChar *oldbase = ctxt->input->base; 8661: GROW; 8662: if (oldbase != ctxt->input->base) { 8663: long delta = ctxt->input->base - oldbase; 8664: start = start + delta; 8665: in = in + delta; 8666: } 8667: end = ctxt->input->end; 8668: } 8669: if (normalize) { 8670: /* 8671: * Skip any leading spaces 8672: */ 8673: while ((in < end) && (*in != limit) && 8674: ((*in == 0x20) || (*in == 0x9) || 8675: (*in == 0xA) || (*in == 0xD))) { 8676: in++; 8677: start = in; 8678: if (in >= end) { 8679: const xmlChar *oldbase = ctxt->input->base; 8680: GROW; 8681: if (oldbase != ctxt->input->base) { 8682: long delta = ctxt->input->base - oldbase; 8683: start = start + delta; 8684: in = in + delta; 8685: } 8686: end = ctxt->input->end; 8687: } 8688: } 8689: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8690: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8691: if ((*in++ == 0x20) && (*in == 0x20)) break; 8692: if (in >= end) { 8693: const xmlChar *oldbase = ctxt->input->base; 8694: GROW; 8695: if (oldbase != ctxt->input->base) { 8696: long delta = ctxt->input->base - oldbase; 8697: start = start + delta; 8698: in = in + delta; 8699: } 8700: end = ctxt->input->end; 8701: } 8702: } 8703: last = in; 8704: /* 8705: * skip the trailing blanks 8706: */ 8707: while ((last[-1] == 0x20) && (last > start)) last--; 8708: while ((in < end) && (*in != limit) && 8709: ((*in == 0x20) || (*in == 0x9) || 8710: (*in == 0xA) || (*in == 0xD))) { 8711: in++; 8712: if (in >= end) { 8713: const xmlChar *oldbase = ctxt->input->base; 8714: GROW; 8715: if (oldbase != ctxt->input->base) { 8716: long delta = ctxt->input->base - oldbase; 8717: start = start + delta; 8718: in = in + delta; 8719: last = last + delta; 8720: } 8721: end = ctxt->input->end; 8722: } 8723: } 8724: if (*in != limit) goto need_complex; 8725: } else { 8726: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8727: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8728: in++; 8729: if (in >= end) { 8730: const xmlChar *oldbase = ctxt->input->base; 8731: GROW; 8732: if (oldbase != ctxt->input->base) { 8733: long delta = ctxt->input->base - oldbase; 8734: start = start + delta; 8735: in = in + delta; 8736: } 8737: end = ctxt->input->end; 8738: } 8739: } 8740: last = in; 8741: if (*in != limit) goto need_complex; 8742: } 8743: in++; 8744: if (len != NULL) { 8745: *len = last - start; 8746: ret = (xmlChar *) start; 8747: } else { 8748: if (alloc) *alloc = 1; 8749: ret = xmlStrndup(start, last - start); 8750: } 8751: CUR_PTR = in; 8752: if (alloc) *alloc = 0; 8753: return ret; 8754: need_complex: 8755: if (alloc) *alloc = 1; 8756: return xmlParseAttValueComplex(ctxt, len, normalize); 8757: } 8758: 8759: /** 8760: * xmlParseAttribute2: 8761: * @ctxt: an XML parser context 8762: * @pref: the element prefix 8763: * @elem: the element name 8764: * @prefix: a xmlChar ** used to store the value of the attribute prefix 8765: * @value: a xmlChar ** used to store the value of the attribute 8766: * @len: an int * to save the length of the attribute 8767: * @alloc: an int * to indicate if the attribute was allocated 8768: * 8769: * parse an attribute in the new SAX2 framework. 8770: * 8771: * Returns the attribute name, and the value in *value, . 8772: */ 8773: 8774: static const xmlChar * 8775: xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8776: const xmlChar * pref, const xmlChar * elem, 8777: const xmlChar ** prefix, xmlChar ** value, 8778: int *len, int *alloc) 8779: { 8780: const xmlChar *name; 8781: xmlChar *val, *internal_val = NULL; 8782: int normalize = 0; 8783: 8784: *value = NULL; 8785: GROW; 8786: name = xmlParseQName(ctxt, prefix); 8787: if (name == NULL) { 8788: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8789: "error parsing attribute name\n"); 8790: return (NULL); 8791: } 8792: 8793: /* 8794: * get the type if needed 8795: */ 8796: if (ctxt->attsSpecial != NULL) { 8797: int type; 8798: 8799: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8800: pref, elem, *prefix, name); 8801: if (type != 0) 8802: normalize = 1; 8803: } 8804: 8805: /* 8806: * read the value 8807: */ 8808: SKIP_BLANKS; 8809: if (RAW == '=') { 8810: NEXT; 8811: SKIP_BLANKS; 8812: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8813: if (normalize) { 8814: /* 8815: * Sometimes a second normalisation pass for spaces is needed 8816: * but that only happens if charrefs or entities refernces 8817: * have been used in the attribute value, i.e. the attribute 8818: * value have been extracted in an allocated string already. 8819: */ 8820: if (*alloc) { 8821: const xmlChar *val2; 8822: 8823: val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8824: if ((val2 != NULL) && (val2 != val)) { 8825: xmlFree(val); 8826: val = (xmlChar *) val2; 8827: } 8828: } 8829: } 8830: ctxt->instate = XML_PARSER_CONTENT; 8831: } else { 8832: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8833: "Specification mandate value for attribute %s\n", 8834: name); 8835: return (NULL); 8836: } 8837: 8838: if (*prefix == ctxt->str_xml) { 8839: /* 8840: * Check that xml:lang conforms to the specification 8841: * No more registered as an error, just generate a warning now 8842: * since this was deprecated in XML second edition 8843: */ 8844: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8845: internal_val = xmlStrndup(val, *len); 8846: if (!xmlCheckLanguageID(internal_val)) { 8847: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8848: "Malformed value for xml:lang : %s\n", 8849: internal_val, NULL); 8850: } 8851: } 8852: 8853: /* 8854: * Check that xml:space conforms to the specification 8855: */ 8856: if (xmlStrEqual(name, BAD_CAST "space")) { 8857: internal_val = xmlStrndup(val, *len); 8858: if (xmlStrEqual(internal_val, BAD_CAST "default")) 8859: *(ctxt->space) = 0; 8860: else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8861: *(ctxt->space) = 1; 8862: else { 8863: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8864: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8865: internal_val, NULL); 8866: } 8867: } 8868: if (internal_val) { 8869: xmlFree(internal_val); 8870: } 8871: } 8872: 8873: *value = val; 8874: return (name); 8875: } 8876: /** 8877: * xmlParseStartTag2: 8878: * @ctxt: an XML parser context 8879: * 8880: * parse a start of tag either for rule element or 8881: * EmptyElement. In both case we don't parse the tag closing chars. 8882: * This routine is called when running SAX2 parsing 8883: * 8884: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8885: * 8886: * [ WFC: Unique Att Spec ] 8887: * No attribute name may appear more than once in the same start-tag or 8888: * empty-element tag. 8889: * 8890: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8891: * 8892: * [ WFC: Unique Att Spec ] 8893: * No attribute name may appear more than once in the same start-tag or 8894: * empty-element tag. 8895: * 8896: * With namespace: 8897: * 8898: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8899: * 8900: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8901: * 8902: * Returns the element name parsed 8903: */ 8904: 8905: static const xmlChar * 8906: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8907: const xmlChar **URI, int *tlen) { 8908: const xmlChar *localname; 8909: const xmlChar *prefix; 8910: const xmlChar *attname; 8911: const xmlChar *aprefix; 8912: const xmlChar *nsname; 8913: xmlChar *attvalue; 8914: const xmlChar **atts = ctxt->atts; 8915: int maxatts = ctxt->maxatts; 8916: int nratts, nbatts, nbdef; 8917: int i, j, nbNs, attval, oldline, oldcol; 8918: const xmlChar *base; 8919: unsigned long cur; 8920: int nsNr = ctxt->nsNr; 8921: 8922: if (RAW != '<') return(NULL); 8923: NEXT1; 8924: 8925: /* 8926: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8927: * point since the attribute values may be stored as pointers to 8928: * the buffer and calling SHRINK would destroy them ! 8929: * The Shrinking is only possible once the full set of attribute 8930: * callbacks have been done. 8931: */ 8932: reparse: 8933: SHRINK; 8934: base = ctxt->input->base; 8935: cur = ctxt->input->cur - ctxt->input->base; 8936: oldline = ctxt->input->line; 8937: oldcol = ctxt->input->col; 8938: nbatts = 0; 8939: nratts = 0; 8940: nbdef = 0; 8941: nbNs = 0; 8942: attval = 0; 8943: /* Forget any namespaces added during an earlier parse of this element. */ 8944: ctxt->nsNr = nsNr; 8945: 8946: localname = xmlParseQName(ctxt, &prefix); 8947: if (localname == NULL) { 8948: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8949: "StartTag: invalid element name\n"); 8950: return(NULL); 8951: } 8952: *tlen = ctxt->input->cur - ctxt->input->base - cur; 8953: 8954: /* 8955: * Now parse the attributes, it ends up with the ending 8956: * 8957: * (S Attribute)* S? 8958: */ 8959: SKIP_BLANKS; 8960: GROW; 8961: if (ctxt->input->base != base) goto base_changed; 8962: 8963: while ((RAW != '>') && 8964: ((RAW != '/') || (NXT(1) != '>')) && 8965: (IS_BYTE_CHAR(RAW))) { 8966: const xmlChar *q = CUR_PTR; 8967: unsigned int cons = ctxt->input->consumed; 8968: int len = -1, alloc = 0; 8969: 8970: attname = xmlParseAttribute2(ctxt, prefix, localname, 8971: &aprefix, &attvalue, &len, &alloc); 8972: if (ctxt->input->base != base) { 8973: if ((attvalue != NULL) && (alloc != 0)) 8974: xmlFree(attvalue); 8975: attvalue = NULL; 8976: goto base_changed; 8977: } 8978: if ((attname != NULL) && (attvalue != NULL)) { 8979: if (len < 0) len = xmlStrlen(attvalue); 8980: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8981: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8982: xmlURIPtr uri; 8983: 8984: if (*URL != 0) { 8985: uri = xmlParseURI((const char *) URL); 8986: if (uri == NULL) { 8987: xmlNsErr(ctxt, XML_WAR_NS_URI, 8988: "xmlns: '%s' is not a valid URI\n", 8989: URL, NULL, NULL); 8990: } else { 8991: if (uri->scheme == NULL) { 8992: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8993: "xmlns: URI %s is not absolute\n", 8994: URL, NULL, NULL); 8995: } 8996: xmlFreeURI(uri); 8997: } 8998: if (URL == ctxt->str_xml_ns) { 8999: if (attname != ctxt->str_xml) { 9000: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9001: "xml namespace URI cannot be the default namespace\n", 9002: NULL, NULL, NULL); 9003: } 9004: goto skip_default_ns; 9005: } 9006: if ((len == 29) && 9007: (xmlStrEqual(URL, 9008: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9009: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9010: "reuse of the xmlns namespace name is forbidden\n", 9011: NULL, NULL, NULL); 9012: goto skip_default_ns; 9013: } 9014: } 9015: /* 9016: * check that it's not a defined namespace 9017: */ 9018: for (j = 1;j <= nbNs;j++) 9019: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9020: break; 9021: if (j <= nbNs) 9022: xmlErrAttributeDup(ctxt, NULL, attname); 9023: else 9024: if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9025: skip_default_ns: 9026: if (alloc != 0) xmlFree(attvalue); 9027: SKIP_BLANKS; 9028: continue; 9029: } 9030: if (aprefix == ctxt->str_xmlns) { 9031: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9032: xmlURIPtr uri; 9033: 9034: if (attname == ctxt->str_xml) { 9035: if (URL != ctxt->str_xml_ns) { 9036: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9037: "xml namespace prefix mapped to wrong URI\n", 9038: NULL, NULL, NULL); 9039: } 9040: /* 9041: * Do not keep a namespace definition node 9042: */ 9043: goto skip_ns; 9044: } 9045: if (URL == ctxt->str_xml_ns) { 9046: if (attname != ctxt->str_xml) { 9047: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9048: "xml namespace URI mapped to wrong prefix\n", 9049: NULL, NULL, NULL); 9050: } 9051: goto skip_ns; 9052: } 9053: if (attname == ctxt->str_xmlns) { 9054: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9055: "redefinition of the xmlns prefix is forbidden\n", 9056: NULL, NULL, NULL); 9057: goto skip_ns; 9058: } 9059: if ((len == 29) && 9060: (xmlStrEqual(URL, 9061: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9062: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9063: "reuse of the xmlns namespace name is forbidden\n", 9064: NULL, NULL, NULL); 9065: goto skip_ns; 9066: } 9067: if ((URL == NULL) || (URL[0] == 0)) { 9068: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9069: "xmlns:%s: Empty XML namespace is not allowed\n", 9070: attname, NULL, NULL); 9071: goto skip_ns; 9072: } else { 9073: uri = xmlParseURI((const char *) URL); 9074: if (uri == NULL) { 9075: xmlNsErr(ctxt, XML_WAR_NS_URI, 9076: "xmlns:%s: '%s' is not a valid URI\n", 9077: attname, URL, NULL); 9078: } else { 9079: if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9080: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9081: "xmlns:%s: URI %s is not absolute\n", 9082: attname, URL, NULL); 9083: } 9084: xmlFreeURI(uri); 9085: } 9086: } 9087: 9088: /* 9089: * check that it's not a defined namespace 9090: */ 9091: for (j = 1;j <= nbNs;j++) 9092: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9093: break; 9094: if (j <= nbNs) 9095: xmlErrAttributeDup(ctxt, aprefix, attname); 9096: else 9097: if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9098: skip_ns: 9099: if (alloc != 0) xmlFree(attvalue); 9100: SKIP_BLANKS; 9101: if (ctxt->input->base != base) goto base_changed; 9102: continue; 9103: } 9104: 9105: /* 9106: * Add the pair to atts 9107: */ 9108: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9109: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9110: if (attvalue[len] == 0) 9111: xmlFree(attvalue); 9112: goto failed; 9113: } 9114: maxatts = ctxt->maxatts; 9115: atts = ctxt->atts; 9116: } 9117: ctxt->attallocs[nratts++] = alloc; 9118: atts[nbatts++] = attname; 9119: atts[nbatts++] = aprefix; 9120: atts[nbatts++] = NULL; /* the URI will be fetched later */ 9121: atts[nbatts++] = attvalue; 9122: attvalue += len; 9123: atts[nbatts++] = attvalue; 9124: /* 9125: * tag if some deallocation is needed 9126: */ 9127: if (alloc != 0) attval = 1; 9128: } else { 9129: if ((attvalue != NULL) && (attvalue[len] == 0)) 9130: xmlFree(attvalue); 9131: } 9132: 9133: failed: 9134: 9135: GROW 9136: if (ctxt->input->base != base) goto base_changed; 9137: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9138: break; 9139: if (!IS_BLANK_CH(RAW)) { 9140: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9141: "attributes construct error\n"); 9142: break; 9143: } 9144: SKIP_BLANKS; 9145: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9146: (attname == NULL) && (attvalue == NULL)) { 9147: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9148: "xmlParseStartTag: problem parsing attributes\n"); 9149: break; 9150: } 9151: GROW; 9152: if (ctxt->input->base != base) goto base_changed; 9153: } 9154: 9155: /* 9156: * The attributes defaulting 9157: */ 9158: if (ctxt->attsDefault != NULL) { 9159: xmlDefAttrsPtr defaults; 9160: 9161: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9162: if (defaults != NULL) { 9163: for (i = 0;i < defaults->nbAttrs;i++) { 9164: attname = defaults->values[5 * i]; 9165: aprefix = defaults->values[5 * i + 1]; 9166: 9167: /* 9168: * special work for namespaces defaulted defs 9169: */ 9170: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9171: /* 9172: * check that it's not a defined namespace 9173: */ 9174: for (j = 1;j <= nbNs;j++) 9175: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9176: break; 9177: if (j <= nbNs) continue; 9178: 9179: nsname = xmlGetNamespace(ctxt, NULL); 9180: if (nsname != defaults->values[5 * i + 2]) { 9181: if (nsPush(ctxt, NULL, 9182: defaults->values[5 * i + 2]) > 0) 9183: nbNs++; 9184: } 9185: } else if (aprefix == ctxt->str_xmlns) { 9186: /* 9187: * check that it's not a defined namespace 9188: */ 9189: for (j = 1;j <= nbNs;j++) 9190: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9191: break; 9192: if (j <= nbNs) continue; 9193: 9194: nsname = xmlGetNamespace(ctxt, attname); 9195: if (nsname != defaults->values[2]) { 9196: if (nsPush(ctxt, attname, 9197: defaults->values[5 * i + 2]) > 0) 9198: nbNs++; 9199: } 9200: } else { 9201: /* 9202: * check that it's not a defined attribute 9203: */ 9204: for (j = 0;j < nbatts;j+=5) { 9205: if ((attname == atts[j]) && (aprefix == atts[j+1])) 9206: break; 9207: } 9208: if (j < nbatts) continue; 9209: 9210: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9211: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9212: return(NULL); 9213: } 9214: maxatts = ctxt->maxatts; 9215: atts = ctxt->atts; 9216: } 9217: atts[nbatts++] = attname; 9218: atts[nbatts++] = aprefix; 9219: if (aprefix == NULL) 9220: atts[nbatts++] = NULL; 9221: else 9222: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9223: atts[nbatts++] = defaults->values[5 * i + 2]; 9224: atts[nbatts++] = defaults->values[5 * i + 3]; 9225: if ((ctxt->standalone == 1) && 9226: (defaults->values[5 * i + 4] != NULL)) { 9227: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9228: "standalone: attribute %s on %s defaulted from external subset\n", 9229: attname, localname); 9230: } 9231: nbdef++; 9232: } 9233: } 9234: } 9235: } 9236: 9237: /* 9238: * The attributes checkings 9239: */ 9240: for (i = 0; i < nbatts;i += 5) { 9241: /* 9242: * The default namespace does not apply to attribute names. 9243: */ 9244: if (atts[i + 1] != NULL) { 9245: nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9246: if (nsname == NULL) { 9247: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9248: "Namespace prefix %s for %s on %s is not defined\n", 9249: atts[i + 1], atts[i], localname); 9250: } 9251: atts[i + 2] = nsname; 9252: } else 9253: nsname = NULL; 9254: /* 9255: * [ WFC: Unique Att Spec ] 9256: * No attribute name may appear more than once in the same 9257: * start-tag or empty-element tag. 9258: * As extended by the Namespace in XML REC. 9259: */ 9260: for (j = 0; j < i;j += 5) { 9261: if (atts[i] == atts[j]) { 9262: if (atts[i+1] == atts[j+1]) { 9263: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9264: break; 9265: } 9266: if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9267: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9268: "Namespaced Attribute %s in '%s' redefined\n", 9269: atts[i], nsname, NULL); 9270: break; 9271: } 9272: } 9273: } 9274: } 9275: 9276: nsname = xmlGetNamespace(ctxt, prefix); 9277: if ((prefix != NULL) && (nsname == NULL)) { 9278: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9279: "Namespace prefix %s on %s is not defined\n", 9280: prefix, localname, NULL); 9281: } 9282: *pref = prefix; 9283: *URI = nsname; 9284: 9285: /* 9286: * SAX: Start of Element ! 9287: */ 9288: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9289: (!ctxt->disableSAX)) { 9290: if (nbNs > 0) 9291: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9292: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9293: nbatts / 5, nbdef, atts); 9294: else 9295: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9296: nsname, 0, NULL, nbatts / 5, nbdef, atts); 9297: } 9298: 9299: /* 9300: * Free up attribute allocated strings if needed 9301: */ 9302: if (attval != 0) { 9303: for (i = 3,j = 0; j < nratts;i += 5,j++) 9304: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9305: xmlFree((xmlChar *) atts[i]); 9306: } 9307: 9308: return(localname); 9309: 9310: base_changed: 9311: /* 9312: * the attribute strings are valid iif the base didn't changed 9313: */ 9314: if (attval != 0) { 9315: for (i = 3,j = 0; j < nratts;i += 5,j++) 9316: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9317: xmlFree((xmlChar *) atts[i]); 9318: } 9319: ctxt->input->cur = ctxt->input->base + cur; 9320: ctxt->input->line = oldline; 9321: ctxt->input->col = oldcol; 9322: if (ctxt->wellFormed == 1) { 9323: goto reparse; 9324: } 9325: return(NULL); 9326: } 9327: 9328: /** 9329: * xmlParseEndTag2: 9330: * @ctxt: an XML parser context 9331: * @line: line of the start tag 9332: * @nsNr: number of namespaces on the start tag 9333: * 9334: * parse an end of tag 9335: * 9336: * [42] ETag ::= '</' Name S? '>' 9337: * 9338: * With namespace 9339: * 9340: * [NS 9] ETag ::= '</' QName S? '>' 9341: */ 9342: 9343: static void 9344: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9345: const xmlChar *URI, int line, int nsNr, int tlen) { 9346: const xmlChar *name; 9347: 9348: GROW; 9349: if ((RAW != '<') || (NXT(1) != '/')) { 9350: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9351: return; 9352: } 9353: SKIP(2); 9354: 9355: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9356: if (ctxt->input->cur[tlen] == '>') { 9357: ctxt->input->cur += tlen + 1; 9358: goto done; 9359: } 9360: ctxt->input->cur += tlen; 9361: name = (xmlChar*)1; 9362: } else { 9363: if (prefix == NULL) 9364: name = xmlParseNameAndCompare(ctxt, ctxt->name); 9365: else 9366: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9367: } 9368: 9369: /* 9370: * We should definitely be at the ending "S? '>'" part 9371: */ 9372: GROW; 9373: SKIP_BLANKS; 9374: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9375: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9376: } else 9377: NEXT1; 9378: 9379: /* 9380: * [ WFC: Element Type Match ] 9381: * The Name in an element's end-tag must match the element type in the 9382: * start-tag. 9383: * 9384: */ 9385: if (name != (xmlChar*)1) { 9386: if (name == NULL) name = BAD_CAST "unparseable"; 9387: if ((line == 0) && (ctxt->node != NULL)) 9388: line = ctxt->node->line; 9389: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9390: "Opening and ending tag mismatch: %s line %d and %s\n", 9391: ctxt->name, line, name); 9392: } 9393: 9394: /* 9395: * SAX: End of Tag 9396: */ 9397: done: 9398: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9399: (!ctxt->disableSAX)) 9400: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9401: 9402: spacePop(ctxt); 9403: if (nsNr != 0) 9404: nsPop(ctxt, nsNr); 9405: return; 9406: } 9407: 9408: /** 9409: * xmlParseCDSect: 9410: * @ctxt: an XML parser context 9411: * 9412: * Parse escaped pure raw content. 9413: * 9414: * [18] CDSect ::= CDStart CData CDEnd 9415: * 9416: * [19] CDStart ::= '<![CDATA[' 9417: * 9418: * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9419: * 9420: * [21] CDEnd ::= ']]>' 9421: */ 9422: void 9423: xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9424: xmlChar *buf = NULL; 9425: int len = 0; 9426: int size = XML_PARSER_BUFFER_SIZE; 9427: int r, rl; 9428: int s, sl; 9429: int cur, l; 9430: int count = 0; 9431: 9432: /* Check 2.6.0 was NXT(0) not RAW */ 9433: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9434: SKIP(9); 9435: } else 9436: return; 9437: 9438: ctxt->instate = XML_PARSER_CDATA_SECTION; 9439: r = CUR_CHAR(rl); 9440: if (!IS_CHAR(r)) { 9441: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9442: ctxt->instate = XML_PARSER_CONTENT; 9443: return; 9444: } 9445: NEXTL(rl); 9446: s = CUR_CHAR(sl); 9447: if (!IS_CHAR(s)) { 9448: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9449: ctxt->instate = XML_PARSER_CONTENT; 9450: return; 9451: } 9452: NEXTL(sl); 9453: cur = CUR_CHAR(l); 9454: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9455: if (buf == NULL) { 9456: xmlErrMemory(ctxt, NULL); 9457: return; 9458: } 9459: while (IS_CHAR(cur) && 9460: ((r != ']') || (s != ']') || (cur != '>'))) { 9461: if (len + 5 >= size) { 9462: xmlChar *tmp; 9463: 9464: size *= 2; 9465: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9466: if (tmp == NULL) { 9467: xmlFree(buf); 9468: xmlErrMemory(ctxt, NULL); 9469: return; 9470: } 9471: buf = tmp; 9472: } 9473: COPY_BUF(rl,buf,len,r); 9474: r = s; 9475: rl = sl; 9476: s = cur; 9477: sl = l; 9478: count++; 9479: if (count > 50) { 9480: GROW; 9481: count = 0; 9482: } 9483: NEXTL(l); 9484: cur = CUR_CHAR(l); 9485: } 9486: buf[len] = 0; 9487: ctxt->instate = XML_PARSER_CONTENT; 9488: if (cur != '>') { 9489: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9490: "CData section not finished\n%.50s\n", buf); 9491: xmlFree(buf); 9492: return; 9493: } 9494: NEXTL(l); 9495: 9496: /* 9497: * OK the buffer is to be consumed as cdata. 9498: */ 9499: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9500: if (ctxt->sax->cdataBlock != NULL) 9501: ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9502: else if (ctxt->sax->characters != NULL) 9503: ctxt->sax->characters(ctxt->userData, buf, len); 9504: } 9505: xmlFree(buf); 9506: } 9507: 9508: /** 9509: * xmlParseContent: 9510: * @ctxt: an XML parser context 9511: * 9512: * Parse a content: 9513: * 9514: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9515: */ 9516: 9517: void 9518: xmlParseContent(xmlParserCtxtPtr ctxt) { 9519: GROW; 9520: while ((RAW != 0) && 9521: ((RAW != '<') || (NXT(1) != '/')) && 9522: (ctxt->instate != XML_PARSER_EOF)) { 9523: const xmlChar *test = CUR_PTR; 9524: unsigned int cons = ctxt->input->consumed; 9525: const xmlChar *cur = ctxt->input->cur; 9526: 9527: /* 9528: * First case : a Processing Instruction. 9529: */ 9530: if ((*cur == '<') && (cur[1] == '?')) { 9531: xmlParsePI(ctxt); 9532: } 9533: 9534: /* 9535: * Second case : a CDSection 9536: */ 9537: /* 2.6.0 test was *cur not RAW */ 9538: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9539: xmlParseCDSect(ctxt); 9540: } 9541: 9542: /* 9543: * Third case : a comment 9544: */ 9545: else if ((*cur == '<') && (NXT(1) == '!') && 9546: (NXT(2) == '-') && (NXT(3) == '-')) { 9547: xmlParseComment(ctxt); 9548: ctxt->instate = XML_PARSER_CONTENT; 9549: } 9550: 9551: /* 9552: * Fourth case : a sub-element. 9553: */ 9554: else if (*cur == '<') { 9555: xmlParseElement(ctxt); 9556: } 9557: 9558: /* 9559: * Fifth case : a reference. If if has not been resolved, 9560: * parsing returns it's Name, create the node 9561: */ 9562: 9563: else if (*cur == '&') { 9564: xmlParseReference(ctxt); 9565: } 9566: 9567: /* 9568: * Last case, text. Note that References are handled directly. 9569: */ 9570: else { 9571: xmlParseCharData(ctxt, 0); 9572: } 9573: 9574: GROW; 9575: /* 9576: * Pop-up of finished entities. 9577: */ 9578: while ((RAW == 0) && (ctxt->inputNr > 1)) 9579: xmlPopInput(ctxt); 9580: SHRINK; 9581: 9582: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9583: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9584: "detected an error in element content\n"); 9585: ctxt->instate = XML_PARSER_EOF; 9586: break; 9587: } 9588: } 9589: } 9590: 9591: /** 9592: * xmlParseElement: 9593: * @ctxt: an XML parser context 9594: * 9595: * parse an XML element, this is highly recursive 9596: * 9597: * [39] element ::= EmptyElemTag | STag content ETag 9598: * 9599: * [ WFC: Element Type Match ] 9600: * The Name in an element's end-tag must match the element type in the 9601: * start-tag. 9602: * 9603: */ 9604: 9605: void 9606: xmlParseElement(xmlParserCtxtPtr ctxt) { 9607: const xmlChar *name; 9608: const xmlChar *prefix = NULL; 9609: const xmlChar *URI = NULL; 9610: xmlParserNodeInfo node_info; 9611: int line, tlen = 0; 9612: xmlNodePtr ret; 9613: int nsNr = ctxt->nsNr; 9614: 9615: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9616: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9617: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9618: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9619: xmlParserMaxDepth); 9620: ctxt->instate = XML_PARSER_EOF; 9621: return; 9622: } 9623: 9624: /* Capture start position */ 9625: if (ctxt->record_info) { 9626: node_info.begin_pos = ctxt->input->consumed + 9627: (CUR_PTR - ctxt->input->base); 9628: node_info.begin_line = ctxt->input->line; 9629: } 9630: 9631: if (ctxt->spaceNr == 0) 9632: spacePush(ctxt, -1); 9633: else if (*ctxt->space == -2) 9634: spacePush(ctxt, -1); 9635: else 9636: spacePush(ctxt, *ctxt->space); 9637: 9638: line = ctxt->input->line; 9639: #ifdef LIBXML_SAX1_ENABLED 9640: if (ctxt->sax2) 9641: #endif /* LIBXML_SAX1_ENABLED */ 9642: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9643: #ifdef LIBXML_SAX1_ENABLED 9644: else 9645: name = xmlParseStartTag(ctxt); 9646: #endif /* LIBXML_SAX1_ENABLED */ 9647: if (ctxt->instate == XML_PARSER_EOF) 9648: return; 9649: if (name == NULL) { 9650: spacePop(ctxt); 9651: return; 9652: } 9653: namePush(ctxt, name); 9654: ret = ctxt->node; 9655: 9656: #ifdef LIBXML_VALID_ENABLED 9657: /* 9658: * [ VC: Root Element Type ] 9659: * The Name in the document type declaration must match the element 9660: * type of the root element. 9661: */ 9662: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9663: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9664: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9665: #endif /* LIBXML_VALID_ENABLED */ 9666: 9667: /* 9668: * Check for an Empty Element. 9669: */ 9670: if ((RAW == '/') && (NXT(1) == '>')) { 9671: SKIP(2); 9672: if (ctxt->sax2) { 9673: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9674: (!ctxt->disableSAX)) 9675: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9676: #ifdef LIBXML_SAX1_ENABLED 9677: } else { 9678: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9679: (!ctxt->disableSAX)) 9680: ctxt->sax->endElement(ctxt->userData, name); 9681: #endif /* LIBXML_SAX1_ENABLED */ 9682: } 9683: namePop(ctxt); 9684: spacePop(ctxt); 9685: if (nsNr != ctxt->nsNr) 9686: nsPop(ctxt, ctxt->nsNr - nsNr); 9687: if ( ret != NULL && ctxt->record_info ) { 9688: node_info.end_pos = ctxt->input->consumed + 9689: (CUR_PTR - ctxt->input->base); 9690: node_info.end_line = ctxt->input->line; 9691: node_info.node = ret; 9692: xmlParserAddNodeInfo(ctxt, &node_info); 9693: } 9694: return; 9695: } 9696: if (RAW == '>') { 9697: NEXT1; 9698: } else { 9699: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9700: "Couldn't find end of Start Tag %s line %d\n", 9701: name, line, NULL); 9702: 9703: /* 9704: * end of parsing of this node. 9705: */ 9706: nodePop(ctxt); 9707: namePop(ctxt); 9708: spacePop(ctxt); 9709: if (nsNr != ctxt->nsNr) 9710: nsPop(ctxt, ctxt->nsNr - nsNr); 9711: 9712: /* 9713: * Capture end position and add node 9714: */ 9715: if ( ret != NULL && ctxt->record_info ) { 9716: node_info.end_pos = ctxt->input->consumed + 9717: (CUR_PTR - ctxt->input->base); 9718: node_info.end_line = ctxt->input->line; 9719: node_info.node = ret; 9720: xmlParserAddNodeInfo(ctxt, &node_info); 9721: } 9722: return; 9723: } 9724: 9725: /* 9726: * Parse the content of the element: 9727: */ 9728: xmlParseContent(ctxt); 9729: if (!IS_BYTE_CHAR(RAW)) { 9730: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9731: "Premature end of data in tag %s line %d\n", 9732: name, line, NULL); 9733: 9734: /* 9735: * end of parsing of this node. 9736: */ 9737: nodePop(ctxt); 9738: namePop(ctxt); 9739: spacePop(ctxt); 9740: if (nsNr != ctxt->nsNr) 9741: nsPop(ctxt, ctxt->nsNr - nsNr); 9742: return; 9743: } 9744: 9745: /* 9746: * parse the end of tag: '</' should be here. 9747: */ 9748: if (ctxt->sax2) { 9749: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9750: namePop(ctxt); 9751: } 9752: #ifdef LIBXML_SAX1_ENABLED 9753: else 9754: xmlParseEndTag1(ctxt, line); 9755: #endif /* LIBXML_SAX1_ENABLED */ 9756: 9757: /* 9758: * Capture end position and add node 9759: */ 9760: if ( ret != NULL && ctxt->record_info ) { 9761: node_info.end_pos = ctxt->input->consumed + 9762: (CUR_PTR - ctxt->input->base); 9763: node_info.end_line = ctxt->input->line; 9764: node_info.node = ret; 9765: xmlParserAddNodeInfo(ctxt, &node_info); 9766: } 9767: } 9768: 9769: /** 9770: * xmlParseVersionNum: 9771: * @ctxt: an XML parser context 9772: * 9773: * parse the XML version value. 9774: * 9775: * [26] VersionNum ::= '1.' [0-9]+ 9776: * 9777: * In practice allow [0-9].[0-9]+ at that level 9778: * 9779: * Returns the string giving the XML version number, or NULL 9780: */ 9781: xmlChar * 9782: xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9783: xmlChar *buf = NULL; 9784: int len = 0; 9785: int size = 10; 9786: xmlChar cur; 9787: 9788: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9789: if (buf == NULL) { 9790: xmlErrMemory(ctxt, NULL); 9791: return(NULL); 9792: } 9793: cur = CUR; 9794: if (!((cur >= '0') && (cur <= '9'))) { 9795: xmlFree(buf); 9796: return(NULL); 9797: } 9798: buf[len++] = cur; 9799: NEXT; 9800: cur=CUR; 9801: if (cur != '.') { 9802: xmlFree(buf); 9803: return(NULL); 9804: } 9805: buf[len++] = cur; 9806: NEXT; 9807: cur=CUR; 9808: while ((cur >= '0') && (cur <= '9')) { 9809: if (len + 1 >= size) { 9810: xmlChar *tmp; 9811: 9812: size *= 2; 9813: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9814: if (tmp == NULL) { 9815: xmlFree(buf); 9816: xmlErrMemory(ctxt, NULL); 9817: return(NULL); 9818: } 9819: buf = tmp; 9820: } 9821: buf[len++] = cur; 9822: NEXT; 9823: cur=CUR; 9824: } 9825: buf[len] = 0; 9826: return(buf); 9827: } 9828: 9829: /** 9830: * xmlParseVersionInfo: 9831: * @ctxt: an XML parser context 9832: * 9833: * parse the XML version. 9834: * 9835: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9836: * 9837: * [25] Eq ::= S? '=' S? 9838: * 9839: * Returns the version string, e.g. "1.0" 9840: */ 9841: 9842: xmlChar * 9843: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9844: xmlChar *version = NULL; 9845: 9846: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9847: SKIP(7); 9848: SKIP_BLANKS; 9849: if (RAW != '=') { 9850: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9851: return(NULL); 9852: } 9853: NEXT; 9854: SKIP_BLANKS; 9855: if (RAW == '"') { 9856: NEXT; 9857: version = xmlParseVersionNum(ctxt); 9858: if (RAW != '"') { 9859: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9860: } else 9861: NEXT; 9862: } else if (RAW == '\''){ 9863: NEXT; 9864: version = xmlParseVersionNum(ctxt); 9865: if (RAW != '\'') { 9866: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9867: } else 9868: NEXT; 9869: } else { 9870: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9871: } 9872: } 9873: return(version); 9874: } 9875: 9876: /** 9877: * xmlParseEncName: 9878: * @ctxt: an XML parser context 9879: * 9880: * parse the XML encoding name 9881: * 9882: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9883: * 9884: * Returns the encoding name value or NULL 9885: */ 9886: xmlChar * 9887: xmlParseEncName(xmlParserCtxtPtr ctxt) { 9888: xmlChar *buf = NULL; 9889: int len = 0; 9890: int size = 10; 9891: xmlChar cur; 9892: 9893: cur = CUR; 9894: if (((cur >= 'a') && (cur <= 'z')) || 9895: ((cur >= 'A') && (cur <= 'Z'))) { 9896: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9897: if (buf == NULL) { 9898: xmlErrMemory(ctxt, NULL); 9899: return(NULL); 9900: } 9901: 9902: buf[len++] = cur; 9903: NEXT; 9904: cur = CUR; 9905: while (((cur >= 'a') && (cur <= 'z')) || 9906: ((cur >= 'A') && (cur <= 'Z')) || 9907: ((cur >= '0') && (cur <= '9')) || 9908: (cur == '.') || (cur == '_') || 9909: (cur == '-')) { 9910: if (len + 1 >= size) { 9911: xmlChar *tmp; 9912: 9913: size *= 2; 9914: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9915: if (tmp == NULL) { 9916: xmlErrMemory(ctxt, NULL); 9917: xmlFree(buf); 9918: return(NULL); 9919: } 9920: buf = tmp; 9921: } 9922: buf[len++] = cur; 9923: NEXT; 9924: cur = CUR; 9925: if (cur == 0) { 9926: SHRINK; 9927: GROW; 9928: cur = CUR; 9929: } 9930: } 9931: buf[len] = 0; 9932: } else { 9933: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9934: } 9935: return(buf); 9936: } 9937: 9938: /** 9939: * xmlParseEncodingDecl: 9940: * @ctxt: an XML parser context 9941: * 9942: * parse the XML encoding declaration 9943: * 9944: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9945: * 9946: * this setups the conversion filters. 9947: * 9948: * Returns the encoding value or NULL 9949: */ 9950: 9951: const xmlChar * 9952: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9953: xmlChar *encoding = NULL; 9954: 9955: SKIP_BLANKS; 9956: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9957: SKIP(8); 9958: SKIP_BLANKS; 9959: if (RAW != '=') { 9960: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9961: return(NULL); 9962: } 9963: NEXT; 9964: SKIP_BLANKS; 9965: if (RAW == '"') { 9966: NEXT; 9967: encoding = xmlParseEncName(ctxt); 9968: if (RAW != '"') { 9969: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9970: } else 9971: NEXT; 9972: } else if (RAW == '\''){ 9973: NEXT; 9974: encoding = xmlParseEncName(ctxt); 9975: if (RAW != '\'') { 9976: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9977: } else 9978: NEXT; 9979: } else { 9980: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9981: } 9982: 9983: /* 9984: * Non standard parsing, allowing the user to ignore encoding 9985: */ 9986: if (ctxt->options & XML_PARSE_IGNORE_ENC) 9987: return(encoding); 9988: 9989: /* 9990: * UTF-16 encoding stwich has already taken place at this stage, 9991: * more over the little-endian/big-endian selection is already done 9992: */ 9993: if ((encoding != NULL) && 9994: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9995: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9996: /* 9997: * If no encoding was passed to the parser, that we are 9998: * using UTF-16 and no decoder is present i.e. the 9999: * document is apparently UTF-8 compatible, then raise an 10000: * encoding mismatch fatal error 10001: */ 10002: if ((ctxt->encoding == NULL) && 10003: (ctxt->input->buf != NULL) && 10004: (ctxt->input->buf->encoder == NULL)) { 10005: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10006: "Document labelled UTF-16 but has UTF-8 content\n"); 10007: } 10008: if (ctxt->encoding != NULL) 10009: xmlFree((xmlChar *) ctxt->encoding); 10010: ctxt->encoding = encoding; 10011: } 10012: /* 10013: * UTF-8 encoding is handled natively 10014: */ 10015: else if ((encoding != NULL) && 10016: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10017: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10018: if (ctxt->encoding != NULL) 10019: xmlFree((xmlChar *) ctxt->encoding); 10020: ctxt->encoding = encoding; 10021: } 10022: else if (encoding != NULL) { 10023: xmlCharEncodingHandlerPtr handler; 10024: 10025: if (ctxt->input->encoding != NULL) 10026: xmlFree((xmlChar *) ctxt->input->encoding); 10027: ctxt->input->encoding = encoding; 10028: 10029: handler = xmlFindCharEncodingHandler((const char *) encoding); 10030: if (handler != NULL) { 10031: xmlSwitchToEncoding(ctxt, handler); 10032: } else { 10033: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10034: "Unsupported encoding %s\n", encoding); 10035: return(NULL); 10036: } 10037: } 10038: } 10039: return(encoding); 10040: } 10041: 10042: /** 10043: * xmlParseSDDecl: 10044: * @ctxt: an XML parser context 10045: * 10046: * parse the XML standalone declaration 10047: * 10048: * [32] SDDecl ::= S 'standalone' Eq 10049: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10050: * 10051: * [ VC: Standalone Document Declaration ] 10052: * TODO The standalone document declaration must have the value "no" 10053: * if any external markup declarations contain declarations of: 10054: * - attributes with default values, if elements to which these 10055: * attributes apply appear in the document without specifications 10056: * of values for these attributes, or 10057: * - entities (other than amp, lt, gt, apos, quot), if references 10058: * to those entities appear in the document, or 10059: * - attributes with values subject to normalization, where the 10060: * attribute appears in the document with a value which will change 10061: * as a result of normalization, or 10062: * - element types with element content, if white space occurs directly 10063: * within any instance of those types. 10064: * 10065: * Returns: 10066: * 1 if standalone="yes" 10067: * 0 if standalone="no" 10068: * -2 if standalone attribute is missing or invalid 10069: * (A standalone value of -2 means that the XML declaration was found, 10070: * but no value was specified for the standalone attribute). 10071: */ 10072: 10073: int 10074: xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10075: int standalone = -2; 10076: 10077: SKIP_BLANKS; 10078: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10079: SKIP(10); 10080: SKIP_BLANKS; 10081: if (RAW != '=') { 10082: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10083: return(standalone); 10084: } 10085: NEXT; 10086: SKIP_BLANKS; 10087: if (RAW == '\''){ 10088: NEXT; 10089: if ((RAW == 'n') && (NXT(1) == 'o')) { 10090: standalone = 0; 10091: SKIP(2); 10092: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10093: (NXT(2) == 's')) { 10094: standalone = 1; 10095: SKIP(3); 10096: } else { 10097: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10098: } 10099: if (RAW != '\'') { 10100: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10101: } else 10102: NEXT; 10103: } else if (RAW == '"'){ 10104: NEXT; 10105: if ((RAW == 'n') && (NXT(1) == 'o')) { 10106: standalone = 0; 10107: SKIP(2); 10108: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10109: (NXT(2) == 's')) { 10110: standalone = 1; 10111: SKIP(3); 10112: } else { 10113: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10114: } 10115: if (RAW != '"') { 10116: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10117: } else 10118: NEXT; 10119: } else { 10120: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10121: } 10122: } 10123: return(standalone); 10124: } 10125: 10126: /** 10127: * xmlParseXMLDecl: 10128: * @ctxt: an XML parser context 10129: * 10130: * parse an XML declaration header 10131: * 10132: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10133: */ 10134: 10135: void 10136: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10137: xmlChar *version; 10138: 10139: /* 10140: * This value for standalone indicates that the document has an 10141: * XML declaration but it does not have a standalone attribute. 10142: * It will be overwritten later if a standalone attribute is found. 10143: */ 10144: ctxt->input->standalone = -2; 10145: 10146: /* 10147: * We know that '<?xml' is here. 10148: */ 10149: SKIP(5); 10150: 10151: if (!IS_BLANK_CH(RAW)) { 10152: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10153: "Blank needed after '<?xml'\n"); 10154: } 10155: SKIP_BLANKS; 10156: 10157: /* 10158: * We must have the VersionInfo here. 10159: */ 10160: version = xmlParseVersionInfo(ctxt); 10161: if (version == NULL) { 10162: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10163: } else { 10164: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10165: /* 10166: * Changed here for XML-1.0 5th edition 10167: */ 10168: if (ctxt->options & XML_PARSE_OLD10) { 10169: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10170: "Unsupported version '%s'\n", 10171: version); 10172: } else { 10173: if ((version[0] == '1') && ((version[1] == '.'))) { 10174: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10175: "Unsupported version '%s'\n", 10176: version, NULL); 10177: } else { 10178: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10179: "Unsupported version '%s'\n", 10180: version); 10181: } 10182: } 10183: } 10184: if (ctxt->version != NULL) 10185: xmlFree((void *) ctxt->version); 10186: ctxt->version = version; 10187: } 10188: 10189: /* 10190: * We may have the encoding declaration 10191: */ 10192: if (!IS_BLANK_CH(RAW)) { 10193: if ((RAW == '?') && (NXT(1) == '>')) { 10194: SKIP(2); 10195: return; 10196: } 10197: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10198: } 10199: xmlParseEncodingDecl(ctxt); 10200: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10201: /* 10202: * The XML REC instructs us to stop parsing right here 10203: */ 10204: return; 10205: } 10206: 10207: /* 10208: * We may have the standalone status. 10209: */ 10210: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10211: if ((RAW == '?') && (NXT(1) == '>')) { 10212: SKIP(2); 10213: return; 10214: } 10215: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10216: } 10217: 10218: /* 10219: * We can grow the input buffer freely at that point 10220: */ 10221: GROW; 10222: 10223: SKIP_BLANKS; 10224: ctxt->input->standalone = xmlParseSDDecl(ctxt); 10225: 10226: SKIP_BLANKS; 10227: if ((RAW == '?') && (NXT(1) == '>')) { 10228: SKIP(2); 10229: } else if (RAW == '>') { 10230: /* Deprecated old WD ... */ 10231: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10232: NEXT; 10233: } else { 10234: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10235: MOVETO_ENDTAG(CUR_PTR); 10236: NEXT; 10237: } 10238: } 10239: 10240: /** 10241: * xmlParseMisc: 10242: * @ctxt: an XML parser context 10243: * 10244: * parse an XML Misc* optional field. 10245: * 10246: * [27] Misc ::= Comment | PI | S 10247: */ 10248: 10249: void 10250: xmlParseMisc(xmlParserCtxtPtr ctxt) { 10251: while (((RAW == '<') && (NXT(1) == '?')) || 10252: (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10253: IS_BLANK_CH(CUR)) { 10254: if ((RAW == '<') && (NXT(1) == '?')) { 10255: xmlParsePI(ctxt); 10256: } else if (IS_BLANK_CH(CUR)) { 10257: NEXT; 10258: } else 10259: xmlParseComment(ctxt); 10260: } 10261: } 10262: 10263: /** 10264: * xmlParseDocument: 10265: * @ctxt: an XML parser context 10266: * 10267: * parse an XML document (and build a tree if using the standard SAX 10268: * interface). 10269: * 10270: * [1] document ::= prolog element Misc* 10271: * 10272: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10273: * 10274: * Returns 0, -1 in case of error. the parser context is augmented 10275: * as a result of the parsing. 10276: */ 10277: 10278: int 10279: xmlParseDocument(xmlParserCtxtPtr ctxt) { 10280: xmlChar start[4]; 10281: xmlCharEncoding enc; 10282: 10283: xmlInitParser(); 10284: 10285: if ((ctxt == NULL) || (ctxt->input == NULL)) 10286: return(-1); 10287: 10288: GROW; 10289: 10290: /* 10291: * SAX: detecting the level. 10292: */ 10293: xmlDetectSAX2(ctxt); 10294: 10295: /* 10296: * SAX: beginning of the document processing. 10297: */ 10298: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10299: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10300: 10301: if ((ctxt->encoding == NULL) && 10302: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10303: /* 10304: * Get the 4 first bytes and decode the charset 10305: * if enc != XML_CHAR_ENCODING_NONE 10306: * plug some encoding conversion routines. 10307: */ 10308: start[0] = RAW; 10309: start[1] = NXT(1); 10310: start[2] = NXT(2); 10311: start[3] = NXT(3); 10312: enc = xmlDetectCharEncoding(&start[0], 4); 10313: if (enc != XML_CHAR_ENCODING_NONE) { 10314: xmlSwitchEncoding(ctxt, enc); 10315: } 10316: } 10317: 10318: 10319: if (CUR == 0) { 10320: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10321: } 10322: 10323: /* 10324: * Check for the XMLDecl in the Prolog. 10325: * do not GROW here to avoid the detected encoder to decode more 10326: * than just the first line, unless the amount of data is really 10327: * too small to hold "<?xml version="1.0" encoding="foo" 10328: */ 10329: if ((ctxt->input->end - ctxt->input->cur) < 35) { 10330: GROW; 10331: } 10332: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10333: 10334: /* 10335: * Note that we will switch encoding on the fly. 10336: */ 10337: xmlParseXMLDecl(ctxt); 10338: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10339: /* 10340: * The XML REC instructs us to stop parsing right here 10341: */ 10342: return(-1); 10343: } 10344: ctxt->standalone = ctxt->input->standalone; 10345: SKIP_BLANKS; 10346: } else { 10347: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10348: } 10349: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10350: ctxt->sax->startDocument(ctxt->userData); 10351: 10352: /* 10353: * The Misc part of the Prolog 10354: */ 10355: GROW; 10356: xmlParseMisc(ctxt); 10357: 10358: /* 10359: * Then possibly doc type declaration(s) and more Misc 10360: * (doctypedecl Misc*)? 10361: */ 10362: GROW; 10363: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10364: 10365: ctxt->inSubset = 1; 10366: xmlParseDocTypeDecl(ctxt); 10367: if (RAW == '[') { 10368: ctxt->instate = XML_PARSER_DTD; 10369: xmlParseInternalSubset(ctxt); 10370: } 10371: 10372: /* 10373: * Create and update the external subset. 10374: */ 10375: ctxt->inSubset = 2; 10376: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10377: (!ctxt->disableSAX)) 10378: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10379: ctxt->extSubSystem, ctxt->extSubURI); 10380: ctxt->inSubset = 0; 10381: 10382: xmlCleanSpecialAttr(ctxt); 10383: 10384: ctxt->instate = XML_PARSER_PROLOG; 10385: xmlParseMisc(ctxt); 10386: } 10387: 10388: /* 10389: * Time to start parsing the tree itself 10390: */ 10391: GROW; 10392: if (RAW != '<') { 10393: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10394: "Start tag expected, '<' not found\n"); 10395: } else { 10396: ctxt->instate = XML_PARSER_CONTENT; 10397: xmlParseElement(ctxt); 10398: ctxt->instate = XML_PARSER_EPILOG; 10399: 10400: 10401: /* 10402: * The Misc part at the end 10403: */ 10404: xmlParseMisc(ctxt); 10405: 10406: if (RAW != 0) { 10407: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10408: } 10409: ctxt->instate = XML_PARSER_EOF; 10410: } 10411: 10412: /* 10413: * SAX: end of the document processing. 10414: */ 10415: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10416: ctxt->sax->endDocument(ctxt->userData); 10417: 10418: /* 10419: * Remove locally kept entity definitions if the tree was not built 10420: */ 10421: if ((ctxt->myDoc != NULL) && 10422: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10423: xmlFreeDoc(ctxt->myDoc); 10424: ctxt->myDoc = NULL; 10425: } 10426: 10427: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10428: ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10429: if (ctxt->valid) 10430: ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10431: if (ctxt->nsWellFormed) 10432: ctxt->myDoc->properties |= XML_DOC_NSVALID; 10433: if (ctxt->options & XML_PARSE_OLD10) 10434: ctxt->myDoc->properties |= XML_DOC_OLD10; 10435: } 10436: if (! ctxt->wellFormed) { 10437: ctxt->valid = 0; 10438: return(-1); 10439: } 10440: return(0); 10441: } 10442: 10443: /** 10444: * xmlParseExtParsedEnt: 10445: * @ctxt: an XML parser context 10446: * 10447: * parse a general parsed entity 10448: * An external general parsed entity is well-formed if it matches the 10449: * production labeled extParsedEnt. 10450: * 10451: * [78] extParsedEnt ::= TextDecl? content 10452: * 10453: * Returns 0, -1 in case of error. the parser context is augmented 10454: * as a result of the parsing. 10455: */ 10456: 10457: int 10458: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10459: xmlChar start[4]; 10460: xmlCharEncoding enc; 10461: 10462: if ((ctxt == NULL) || (ctxt->input == NULL)) 10463: return(-1); 10464: 10465: xmlDefaultSAXHandlerInit(); 10466: 10467: xmlDetectSAX2(ctxt); 10468: 10469: GROW; 10470: 10471: /* 10472: * SAX: beginning of the document processing. 10473: */ 10474: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10475: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10476: 10477: /* 10478: * Get the 4 first bytes and decode the charset 10479: * if enc != XML_CHAR_ENCODING_NONE 10480: * plug some encoding conversion routines. 10481: */ 10482: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10483: start[0] = RAW; 10484: start[1] = NXT(1); 10485: start[2] = NXT(2); 10486: start[3] = NXT(3); 10487: enc = xmlDetectCharEncoding(start, 4); 10488: if (enc != XML_CHAR_ENCODING_NONE) { 10489: xmlSwitchEncoding(ctxt, enc); 10490: } 10491: } 10492: 10493: 10494: if (CUR == 0) { 10495: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10496: } 10497: 10498: /* 10499: * Check for the XMLDecl in the Prolog. 10500: */ 10501: GROW; 10502: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10503: 10504: /* 10505: * Note that we will switch encoding on the fly. 10506: */ 10507: xmlParseXMLDecl(ctxt); 10508: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10509: /* 10510: * The XML REC instructs us to stop parsing right here 10511: */ 10512: return(-1); 10513: } 10514: SKIP_BLANKS; 10515: } else { 10516: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10517: } 10518: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10519: ctxt->sax->startDocument(ctxt->userData); 10520: 10521: /* 10522: * Doing validity checking on chunk doesn't make sense 10523: */ 10524: ctxt->instate = XML_PARSER_CONTENT; 10525: ctxt->validate = 0; 10526: ctxt->loadsubset = 0; 10527: ctxt->depth = 0; 10528: 10529: xmlParseContent(ctxt); 10530: 10531: if ((RAW == '<') && (NXT(1) == '/')) { 10532: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10533: } else if (RAW != 0) { 10534: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10535: } 10536: 10537: /* 10538: * SAX: end of the document processing. 10539: */ 10540: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10541: ctxt->sax->endDocument(ctxt->userData); 10542: 10543: if (! ctxt->wellFormed) return(-1); 10544: return(0); 10545: } 10546: 10547: #ifdef LIBXML_PUSH_ENABLED 10548: /************************************************************************ 10549: * * 10550: * Progressive parsing interfaces * 10551: * * 10552: ************************************************************************/ 10553: 10554: /** 10555: * xmlParseLookupSequence: 10556: * @ctxt: an XML parser context 10557: * @first: the first char to lookup 10558: * @next: the next char to lookup or zero 10559: * @third: the next char to lookup or zero 10560: * 10561: * Try to find if a sequence (first, next, third) or just (first next) or 10562: * (first) is available in the input stream. 10563: * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10564: * to avoid rescanning sequences of bytes, it DOES change the state of the 10565: * parser, do not use liberally. 10566: * 10567: * Returns the index to the current parsing point if the full sequence 10568: * is available, -1 otherwise. 10569: */ 10570: static int 10571: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10572: xmlChar next, xmlChar third) { 10573: int base, len; 10574: xmlParserInputPtr in; 10575: const xmlChar *buf; 10576: 10577: in = ctxt->input; 10578: if (in == NULL) return(-1); 10579: base = in->cur - in->base; 10580: if (base < 0) return(-1); 10581: if (ctxt->checkIndex > base) 10582: base = ctxt->checkIndex; 10583: if (in->buf == NULL) { 10584: buf = in->base; 10585: len = in->length; 10586: } else { 10587: buf = in->buf->buffer->content; 10588: len = in->buf->buffer->use; 10589: } 10590: /* take into account the sequence length */ 10591: if (third) len -= 2; 10592: else if (next) len --; 10593: for (;base < len;base++) { 10594: if (buf[base] == first) { 10595: if (third != 0) { 10596: if ((buf[base + 1] != next) || 10597: (buf[base + 2] != third)) continue; 10598: } else if (next != 0) { 10599: if (buf[base + 1] != next) continue; 10600: } 10601: ctxt->checkIndex = 0; 10602: #ifdef DEBUG_PUSH 10603: if (next == 0) 10604: xmlGenericError(xmlGenericErrorContext, 10605: "PP: lookup '%c' found at %d\n", 10606: first, base); 10607: else if (third == 0) 10608: xmlGenericError(xmlGenericErrorContext, 10609: "PP: lookup '%c%c' found at %d\n", 10610: first, next, base); 10611: else 10612: xmlGenericError(xmlGenericErrorContext, 10613: "PP: lookup '%c%c%c' found at %d\n", 10614: first, next, third, base); 10615: #endif 10616: return(base - (in->cur - in->base)); 10617: } 10618: } 10619: ctxt->checkIndex = base; 10620: #ifdef DEBUG_PUSH 10621: if (next == 0) 10622: xmlGenericError(xmlGenericErrorContext, 10623: "PP: lookup '%c' failed\n", first); 10624: else if (third == 0) 10625: xmlGenericError(xmlGenericErrorContext, 10626: "PP: lookup '%c%c' failed\n", first, next); 10627: else 10628: xmlGenericError(xmlGenericErrorContext, 10629: "PP: lookup '%c%c%c' failed\n", first, next, third); 10630: #endif 10631: return(-1); 10632: } 10633: 10634: /** 10635: * xmlParseGetLasts: 10636: * @ctxt: an XML parser context 10637: * @lastlt: pointer to store the last '<' from the input 10638: * @lastgt: pointer to store the last '>' from the input 10639: * 10640: * Lookup the last < and > in the current chunk 10641: */ 10642: static void 10643: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10644: const xmlChar **lastgt) { 10645: const xmlChar *tmp; 10646: 10647: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10648: xmlGenericError(xmlGenericErrorContext, 10649: "Internal error: xmlParseGetLasts\n"); 10650: return; 10651: } 10652: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10653: tmp = ctxt->input->end; 10654: tmp--; 10655: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10656: if (tmp < ctxt->input->base) { 10657: *lastlt = NULL; 10658: *lastgt = NULL; 10659: } else { 10660: *lastlt = tmp; 10661: tmp++; 10662: while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10663: if (*tmp == '\'') { 10664: tmp++; 10665: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10666: if (tmp < ctxt->input->end) tmp++; 10667: } else if (*tmp == '"') { 10668: tmp++; 10669: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10670: if (tmp < ctxt->input->end) tmp++; 10671: } else 10672: tmp++; 10673: } 10674: if (tmp < ctxt->input->end) 10675: *lastgt = tmp; 10676: else { 10677: tmp = *lastlt; 10678: tmp--; 10679: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10680: if (tmp >= ctxt->input->base) 10681: *lastgt = tmp; 10682: else 10683: *lastgt = NULL; 10684: } 10685: } 10686: } else { 10687: *lastlt = NULL; 10688: *lastgt = NULL; 10689: } 10690: } 10691: /** 10692: * xmlCheckCdataPush: 10693: * @cur: pointer to the bock of characters 10694: * @len: length of the block in bytes 10695: * 10696: * Check that the block of characters is okay as SCdata content [20] 10697: * 10698: * Returns the number of bytes to pass if okay, a negative index where an 10699: * UTF-8 error occured otherwise 10700: */ 10701: static int 10702: xmlCheckCdataPush(const xmlChar *utf, int len) { 10703: int ix; 10704: unsigned char c; 10705: int codepoint; 10706: 10707: if ((utf == NULL) || (len <= 0)) 10708: return(0); 10709: 10710: for (ix = 0; ix < len;) { /* string is 0-terminated */ 10711: c = utf[ix]; 10712: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10713: if (c >= 0x20) 10714: ix++; 10715: else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10716: ix++; 10717: else 10718: return(-ix); 10719: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10720: if (ix + 2 > len) return(ix); 10721: if ((utf[ix+1] & 0xc0 ) != 0x80) 10722: return(-ix); 10723: codepoint = (utf[ix] & 0x1f) << 6; 10724: codepoint |= utf[ix+1] & 0x3f; 10725: if (!xmlIsCharQ(codepoint)) 10726: return(-ix); 10727: ix += 2; 10728: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10729: if (ix + 3 > len) return(ix); 10730: if (((utf[ix+1] & 0xc0) != 0x80) || 10731: ((utf[ix+2] & 0xc0) != 0x80)) 10732: return(-ix); 10733: codepoint = (utf[ix] & 0xf) << 12; 10734: codepoint |= (utf[ix+1] & 0x3f) << 6; 10735: codepoint |= utf[ix+2] & 0x3f; 10736: if (!xmlIsCharQ(codepoint)) 10737: return(-ix); 10738: ix += 3; 10739: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10740: if (ix + 4 > len) return(ix); 10741: if (((utf[ix+1] & 0xc0) != 0x80) || 10742: ((utf[ix+2] & 0xc0) != 0x80) || 10743: ((utf[ix+3] & 0xc0) != 0x80)) 10744: return(-ix); 10745: codepoint = (utf[ix] & 0x7) << 18; 10746: codepoint |= (utf[ix+1] & 0x3f) << 12; 10747: codepoint |= (utf[ix+2] & 0x3f) << 6; 10748: codepoint |= utf[ix+3] & 0x3f; 10749: if (!xmlIsCharQ(codepoint)) 10750: return(-ix); 10751: ix += 4; 10752: } else /* unknown encoding */ 10753: return(-ix); 10754: } 10755: return(ix); 10756: } 10757: 10758: /** 10759: * xmlParseTryOrFinish: 10760: * @ctxt: an XML parser context 10761: * @terminate: last chunk indicator 10762: * 10763: * Try to progress on parsing 10764: * 10765: * Returns zero if no parsing was possible 10766: */ 10767: static int 10768: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10769: int ret = 0; 10770: int avail, tlen; 10771: xmlChar cur, next; 10772: const xmlChar *lastlt, *lastgt; 10773: 10774: if (ctxt->input == NULL) 10775: return(0); 10776: 10777: #ifdef DEBUG_PUSH 10778: switch (ctxt->instate) { 10779: case XML_PARSER_EOF: 10780: xmlGenericError(xmlGenericErrorContext, 10781: "PP: try EOF\n"); break; 10782: case XML_PARSER_START: 10783: xmlGenericError(xmlGenericErrorContext, 10784: "PP: try START\n"); break; 10785: case XML_PARSER_MISC: 10786: xmlGenericError(xmlGenericErrorContext, 10787: "PP: try MISC\n");break; 10788: case XML_PARSER_COMMENT: 10789: xmlGenericError(xmlGenericErrorContext, 10790: "PP: try COMMENT\n");break; 10791: case XML_PARSER_PROLOG: 10792: xmlGenericError(xmlGenericErrorContext, 10793: "PP: try PROLOG\n");break; 10794: case XML_PARSER_START_TAG: 10795: xmlGenericError(xmlGenericErrorContext, 10796: "PP: try START_TAG\n");break; 10797: case XML_PARSER_CONTENT: 10798: xmlGenericError(xmlGenericErrorContext, 10799: "PP: try CONTENT\n");break; 10800: case XML_PARSER_CDATA_SECTION: 10801: xmlGenericError(xmlGenericErrorContext, 10802: "PP: try CDATA_SECTION\n");break; 10803: case XML_PARSER_END_TAG: 10804: xmlGenericError(xmlGenericErrorContext, 10805: "PP: try END_TAG\n");break; 10806: case XML_PARSER_ENTITY_DECL: 10807: xmlGenericError(xmlGenericErrorContext, 10808: "PP: try ENTITY_DECL\n");break; 10809: case XML_PARSER_ENTITY_VALUE: 10810: xmlGenericError(xmlGenericErrorContext, 10811: "PP: try ENTITY_VALUE\n");break; 10812: case XML_PARSER_ATTRIBUTE_VALUE: 10813: xmlGenericError(xmlGenericErrorContext, 10814: "PP: try ATTRIBUTE_VALUE\n");break; 10815: case XML_PARSER_DTD: 10816: xmlGenericError(xmlGenericErrorContext, 10817: "PP: try DTD\n");break; 10818: case XML_PARSER_EPILOG: 10819: xmlGenericError(xmlGenericErrorContext, 10820: "PP: try EPILOG\n");break; 10821: case XML_PARSER_PI: 10822: xmlGenericError(xmlGenericErrorContext, 10823: "PP: try PI\n");break; 10824: case XML_PARSER_IGNORE: 10825: xmlGenericError(xmlGenericErrorContext, 10826: "PP: try IGNORE\n");break; 10827: } 10828: #endif 10829: 10830: if ((ctxt->input != NULL) && 10831: (ctxt->input->cur - ctxt->input->base > 4096)) { 10832: xmlSHRINK(ctxt); 10833: ctxt->checkIndex = 0; 10834: } 10835: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10836: 10837: while (1) { 10838: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10839: return(0); 10840: 10841: 10842: /* 10843: * Pop-up of finished entities. 10844: */ 10845: while ((RAW == 0) && (ctxt->inputNr > 1)) 10846: xmlPopInput(ctxt); 10847: 10848: if (ctxt->input == NULL) break; 10849: if (ctxt->input->buf == NULL) 10850: avail = ctxt->input->length - 10851: (ctxt->input->cur - ctxt->input->base); 10852: else { 10853: /* 10854: * If we are operating on converted input, try to flush 10855: * remainng chars to avoid them stalling in the non-converted 10856: * buffer. 10857: */ 10858: if ((ctxt->input->buf->raw != NULL) && 10859: (ctxt->input->buf->raw->use > 0)) { 10860: int base = ctxt->input->base - 10861: ctxt->input->buf->buffer->content; 10862: int current = ctxt->input->cur - ctxt->input->base; 10863: 10864: xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10865: ctxt->input->base = ctxt->input->buf->buffer->content + base; 10866: ctxt->input->cur = ctxt->input->base + current; 10867: ctxt->input->end = 10868: &ctxt->input->buf->buffer->content[ 10869: ctxt->input->buf->buffer->use]; 10870: } 10871: avail = ctxt->input->buf->buffer->use - 10872: (ctxt->input->cur - ctxt->input->base); 10873: } 10874: if (avail < 1) 10875: goto done; 10876: switch (ctxt->instate) { 10877: case XML_PARSER_EOF: 10878: /* 10879: * Document parsing is done ! 10880: */ 10881: goto done; 10882: case XML_PARSER_START: 10883: if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10884: xmlChar start[4]; 10885: xmlCharEncoding enc; 10886: 10887: /* 10888: * Very first chars read from the document flow. 10889: */ 10890: if (avail < 4) 10891: goto done; 10892: 10893: /* 10894: * Get the 4 first bytes and decode the charset 10895: * if enc != XML_CHAR_ENCODING_NONE 10896: * plug some encoding conversion routines, 10897: * else xmlSwitchEncoding will set to (default) 10898: * UTF8. 10899: */ 10900: start[0] = RAW; 10901: start[1] = NXT(1); 10902: start[2] = NXT(2); 10903: start[3] = NXT(3); 10904: enc = xmlDetectCharEncoding(start, 4); 10905: xmlSwitchEncoding(ctxt, enc); 10906: break; 10907: } 10908: 10909: if (avail < 2) 10910: goto done; 10911: cur = ctxt->input->cur[0]; 10912: next = ctxt->input->cur[1]; 10913: if (cur == 0) { 10914: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10915: ctxt->sax->setDocumentLocator(ctxt->userData, 10916: &xmlDefaultSAXLocator); 10917: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10918: ctxt->instate = XML_PARSER_EOF; 10919: #ifdef DEBUG_PUSH 10920: xmlGenericError(xmlGenericErrorContext, 10921: "PP: entering EOF\n"); 10922: #endif 10923: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10924: ctxt->sax->endDocument(ctxt->userData); 10925: goto done; 10926: } 10927: if ((cur == '<') && (next == '?')) { 10928: /* PI or XML decl */ 10929: if (avail < 5) return(ret); 10930: if ((!terminate) && 10931: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10932: return(ret); 10933: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10934: ctxt->sax->setDocumentLocator(ctxt->userData, 10935: &xmlDefaultSAXLocator); 10936: if ((ctxt->input->cur[2] == 'x') && 10937: (ctxt->input->cur[3] == 'm') && 10938: (ctxt->input->cur[4] == 'l') && 10939: (IS_BLANK_CH(ctxt->input->cur[5]))) { 10940: ret += 5; 10941: #ifdef DEBUG_PUSH 10942: xmlGenericError(xmlGenericErrorContext, 10943: "PP: Parsing XML Decl\n"); 10944: #endif 10945: xmlParseXMLDecl(ctxt); 10946: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10947: /* 10948: * The XML REC instructs us to stop parsing right 10949: * here 10950: */ 10951: ctxt->instate = XML_PARSER_EOF; 10952: return(0); 10953: } 10954: ctxt->standalone = ctxt->input->standalone; 10955: if ((ctxt->encoding == NULL) && 10956: (ctxt->input->encoding != NULL)) 10957: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10958: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10959: (!ctxt->disableSAX)) 10960: ctxt->sax->startDocument(ctxt->userData); 10961: ctxt->instate = XML_PARSER_MISC; 10962: #ifdef DEBUG_PUSH 10963: xmlGenericError(xmlGenericErrorContext, 10964: "PP: entering MISC\n"); 10965: #endif 10966: } else { 10967: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10968: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10969: (!ctxt->disableSAX)) 10970: ctxt->sax->startDocument(ctxt->userData); 10971: ctxt->instate = XML_PARSER_MISC; 10972: #ifdef DEBUG_PUSH 10973: xmlGenericError(xmlGenericErrorContext, 10974: "PP: entering MISC\n"); 10975: #endif 10976: } 10977: } else { 10978: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10979: ctxt->sax->setDocumentLocator(ctxt->userData, 10980: &xmlDefaultSAXLocator); 10981: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10982: if (ctxt->version == NULL) { 10983: xmlErrMemory(ctxt, NULL); 10984: break; 10985: } 10986: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10987: (!ctxt->disableSAX)) 10988: ctxt->sax->startDocument(ctxt->userData); 10989: ctxt->instate = XML_PARSER_MISC; 10990: #ifdef DEBUG_PUSH 10991: xmlGenericError(xmlGenericErrorContext, 10992: "PP: entering MISC\n"); 10993: #endif 10994: } 10995: break; 10996: case XML_PARSER_START_TAG: { 10997: const xmlChar *name; 10998: const xmlChar *prefix = NULL; 10999: const xmlChar *URI = NULL; 11000: int nsNr = ctxt->nsNr; 11001: 11002: if ((avail < 2) && (ctxt->inputNr == 1)) 11003: goto done; 11004: cur = ctxt->input->cur[0]; 11005: if (cur != '<') { 11006: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11007: ctxt->instate = XML_PARSER_EOF; 11008: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11009: ctxt->sax->endDocument(ctxt->userData); 11010: goto done; 11011: } 11012: if (!terminate) { 11013: if (ctxt->progressive) { 11014: /* > can be found unescaped in attribute values */ 11015: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11016: goto done; 11017: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11018: goto done; 11019: } 11020: } 11021: if (ctxt->spaceNr == 0) 11022: spacePush(ctxt, -1); 11023: else if (*ctxt->space == -2) 11024: spacePush(ctxt, -1); 11025: else 11026: spacePush(ctxt, *ctxt->space); 11027: #ifdef LIBXML_SAX1_ENABLED 11028: if (ctxt->sax2) 11029: #endif /* LIBXML_SAX1_ENABLED */ 11030: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11031: #ifdef LIBXML_SAX1_ENABLED 11032: else 11033: name = xmlParseStartTag(ctxt); 11034: #endif /* LIBXML_SAX1_ENABLED */ 11035: if (ctxt->instate == XML_PARSER_EOF) 11036: goto done; 11037: if (name == NULL) { 11038: spacePop(ctxt); 11039: ctxt->instate = XML_PARSER_EOF; 11040: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11041: ctxt->sax->endDocument(ctxt->userData); 11042: goto done; 11043: } 11044: #ifdef LIBXML_VALID_ENABLED 11045: /* 11046: * [ VC: Root Element Type ] 11047: * The Name in the document type declaration must match 11048: * the element type of the root element. 11049: */ 11050: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11051: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11052: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11053: #endif /* LIBXML_VALID_ENABLED */ 11054: 11055: /* 11056: * Check for an Empty Element. 11057: */ 11058: if ((RAW == '/') && (NXT(1) == '>')) { 11059: SKIP(2); 11060: 11061: if (ctxt->sax2) { 11062: if ((ctxt->sax != NULL) && 11063: (ctxt->sax->endElementNs != NULL) && 11064: (!ctxt->disableSAX)) 11065: ctxt->sax->endElementNs(ctxt->userData, name, 11066: prefix, URI); 11067: if (ctxt->nsNr - nsNr > 0) 11068: nsPop(ctxt, ctxt->nsNr - nsNr); 11069: #ifdef LIBXML_SAX1_ENABLED 11070: } else { 11071: if ((ctxt->sax != NULL) && 11072: (ctxt->sax->endElement != NULL) && 11073: (!ctxt->disableSAX)) 11074: ctxt->sax->endElement(ctxt->userData, name); 11075: #endif /* LIBXML_SAX1_ENABLED */ 11076: } 11077: spacePop(ctxt); 11078: if (ctxt->nameNr == 0) { 11079: ctxt->instate = XML_PARSER_EPILOG; 11080: } else { 11081: ctxt->instate = XML_PARSER_CONTENT; 11082: } 11083: break; 11084: } 11085: if (RAW == '>') { 11086: NEXT; 11087: } else { 11088: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11089: "Couldn't find end of Start Tag %s\n", 11090: name); 11091: nodePop(ctxt); 11092: spacePop(ctxt); 11093: } 11094: if (ctxt->sax2) 11095: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11096: #ifdef LIBXML_SAX1_ENABLED 11097: else 11098: namePush(ctxt, name); 11099: #endif /* LIBXML_SAX1_ENABLED */ 11100: 11101: ctxt->instate = XML_PARSER_CONTENT; 11102: break; 11103: } 11104: case XML_PARSER_CONTENT: { 11105: const xmlChar *test; 11106: unsigned int cons; 11107: if ((avail < 2) && (ctxt->inputNr == 1)) 11108: goto done; 11109: cur = ctxt->input->cur[0]; 11110: next = ctxt->input->cur[1]; 11111: 11112: test = CUR_PTR; 11113: cons = ctxt->input->consumed; 11114: if ((cur == '<') && (next == '/')) { 11115: ctxt->instate = XML_PARSER_END_TAG; 11116: break; 11117: } else if ((cur == '<') && (next == '?')) { 11118: if ((!terminate) && 11119: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11120: goto done; 11121: xmlParsePI(ctxt); 11122: } else if ((cur == '<') && (next != '!')) { 11123: ctxt->instate = XML_PARSER_START_TAG; 11124: break; 11125: } else if ((cur == '<') && (next == '!') && 11126: (ctxt->input->cur[2] == '-') && 11127: (ctxt->input->cur[3] == '-')) { 11128: int term; 11129: 11130: if (avail < 4) 11131: goto done; 11132: ctxt->input->cur += 4; 11133: term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11134: ctxt->input->cur -= 4; 11135: if ((!terminate) && (term < 0)) 11136: goto done; 11137: xmlParseComment(ctxt); 11138: ctxt->instate = XML_PARSER_CONTENT; 11139: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11140: (ctxt->input->cur[2] == '[') && 11141: (ctxt->input->cur[3] == 'C') && 11142: (ctxt->input->cur[4] == 'D') && 11143: (ctxt->input->cur[5] == 'A') && 11144: (ctxt->input->cur[6] == 'T') && 11145: (ctxt->input->cur[7] == 'A') && 11146: (ctxt->input->cur[8] == '[')) { 11147: SKIP(9); 11148: ctxt->instate = XML_PARSER_CDATA_SECTION; 11149: break; 11150: } else if ((cur == '<') && (next == '!') && 11151: (avail < 9)) { 11152: goto done; 11153: } else if (cur == '&') { 11154: if ((!terminate) && 11155: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11156: goto done; 11157: xmlParseReference(ctxt); 11158: } else { 11159: /* TODO Avoid the extra copy, handle directly !!! */ 11160: /* 11161: * Goal of the following test is: 11162: * - minimize calls to the SAX 'character' callback 11163: * when they are mergeable 11164: * - handle an problem for isBlank when we only parse 11165: * a sequence of blank chars and the next one is 11166: * not available to check against '<' presence. 11167: * - tries to homogenize the differences in SAX 11168: * callbacks between the push and pull versions 11169: * of the parser. 11170: */ 11171: if ((ctxt->inputNr == 1) && 11172: (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11173: if (!terminate) { 11174: if (ctxt->progressive) { 11175: if ((lastlt == NULL) || 11176: (ctxt->input->cur > lastlt)) 11177: goto done; 11178: } else if (xmlParseLookupSequence(ctxt, 11179: '<', 0, 0) < 0) { 11180: goto done; 11181: } 11182: } 11183: } 11184: ctxt->checkIndex = 0; 11185: xmlParseCharData(ctxt, 0); 11186: } 11187: /* 11188: * Pop-up of finished entities. 11189: */ 11190: while ((RAW == 0) && (ctxt->inputNr > 1)) 11191: xmlPopInput(ctxt); 11192: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11193: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11194: "detected an error in element content\n"); 11195: ctxt->instate = XML_PARSER_EOF; 11196: break; 11197: } 11198: break; 11199: } 11200: case XML_PARSER_END_TAG: 11201: if (avail < 2) 11202: goto done; 11203: if (!terminate) { 11204: if (ctxt->progressive) { 11205: /* > can be found unescaped in attribute values */ 11206: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11207: goto done; 11208: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11209: goto done; 11210: } 11211: } 11212: if (ctxt->sax2) { 11213: xmlParseEndTag2(ctxt, 11214: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11215: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11216: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11217: nameNsPop(ctxt); 11218: } 11219: #ifdef LIBXML_SAX1_ENABLED 11220: else 11221: xmlParseEndTag1(ctxt, 0); 11222: #endif /* LIBXML_SAX1_ENABLED */ 11223: if (ctxt->instate == XML_PARSER_EOF) { 11224: /* Nothing */ 11225: } else if (ctxt->nameNr == 0) { 11226: ctxt->instate = XML_PARSER_EPILOG; 11227: } else { 11228: ctxt->instate = XML_PARSER_CONTENT; 11229: } 11230: break; 11231: case XML_PARSER_CDATA_SECTION: { 11232: /* 11233: * The Push mode need to have the SAX callback for 11234: * cdataBlock merge back contiguous callbacks. 11235: */ 11236: int base; 11237: 11238: base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11239: if (base < 0) { 11240: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11241: int tmp; 11242: 11243: tmp = xmlCheckCdataPush(ctxt->input->cur, 11244: XML_PARSER_BIG_BUFFER_SIZE); 11245: if (tmp < 0) { 11246: tmp = -tmp; 11247: ctxt->input->cur += tmp; 11248: goto encoding_error; 11249: } 11250: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11251: if (ctxt->sax->cdataBlock != NULL) 11252: ctxt->sax->cdataBlock(ctxt->userData, 11253: ctxt->input->cur, tmp); 11254: else if (ctxt->sax->characters != NULL) 11255: ctxt->sax->characters(ctxt->userData, 11256: ctxt->input->cur, tmp); 11257: } 11258: SKIPL(tmp); 11259: ctxt->checkIndex = 0; 11260: } 11261: goto done; 11262: } else { 11263: int tmp; 11264: 11265: tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11266: if ((tmp < 0) || (tmp != base)) { 11267: tmp = -tmp; 11268: ctxt->input->cur += tmp; 11269: goto encoding_error; 11270: } 11271: if ((ctxt->sax != NULL) && (base == 0) && 11272: (ctxt->sax->cdataBlock != NULL) && 11273: (!ctxt->disableSAX)) { 11274: /* 11275: * Special case to provide identical behaviour 11276: * between pull and push parsers on enpty CDATA 11277: * sections 11278: */ 11279: if ((ctxt->input->cur - ctxt->input->base >= 9) && 11280: (!strncmp((const char *)&ctxt->input->cur[-9], 11281: "<![CDATA[", 9))) 11282: ctxt->sax->cdataBlock(ctxt->userData, 11283: BAD_CAST "", 0); 11284: } else if ((ctxt->sax != NULL) && (base > 0) && 11285: (!ctxt->disableSAX)) { 11286: if (ctxt->sax->cdataBlock != NULL) 11287: ctxt->sax->cdataBlock(ctxt->userData, 11288: ctxt->input->cur, base); 11289: else if (ctxt->sax->characters != NULL) 11290: ctxt->sax->characters(ctxt->userData, 11291: ctxt->input->cur, base); 11292: } 11293: SKIPL(base + 3); 11294: ctxt->checkIndex = 0; 11295: ctxt->instate = XML_PARSER_CONTENT; 11296: #ifdef DEBUG_PUSH 11297: xmlGenericError(xmlGenericErrorContext, 11298: "PP: entering CONTENT\n"); 11299: #endif 11300: } 11301: break; 11302: } 11303: case XML_PARSER_MISC: 11304: SKIP_BLANKS; 11305: if (ctxt->input->buf == NULL) 11306: avail = ctxt->input->length - 11307: (ctxt->input->cur - ctxt->input->base); 11308: else 11309: avail = ctxt->input->buf->buffer->use - 11310: (ctxt->input->cur - ctxt->input->base); 11311: if (avail < 2) 11312: goto done; 11313: cur = ctxt->input->cur[0]; 11314: next = ctxt->input->cur[1]; 11315: if ((cur == '<') && (next == '?')) { 11316: if ((!terminate) && 11317: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11318: goto done; 11319: #ifdef DEBUG_PUSH 11320: xmlGenericError(xmlGenericErrorContext, 11321: "PP: Parsing PI\n"); 11322: #endif 11323: xmlParsePI(ctxt); 11324: ctxt->checkIndex = 0; 11325: } else if ((cur == '<') && (next == '!') && 11326: (ctxt->input->cur[2] == '-') && 11327: (ctxt->input->cur[3] == '-')) { 11328: if ((!terminate) && 11329: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11330: goto done; 11331: #ifdef DEBUG_PUSH 11332: xmlGenericError(xmlGenericErrorContext, 11333: "PP: Parsing Comment\n"); 11334: #endif 11335: xmlParseComment(ctxt); 11336: ctxt->instate = XML_PARSER_MISC; 11337: ctxt->checkIndex = 0; 11338: } else if ((cur == '<') && (next == '!') && 11339: (ctxt->input->cur[2] == 'D') && 11340: (ctxt->input->cur[3] == 'O') && 11341: (ctxt->input->cur[4] == 'C') && 11342: (ctxt->input->cur[5] == 'T') && 11343: (ctxt->input->cur[6] == 'Y') && 11344: (ctxt->input->cur[7] == 'P') && 11345: (ctxt->input->cur[8] == 'E')) { 11346: if ((!terminate) && 11347: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11348: goto done; 11349: #ifdef DEBUG_PUSH 11350: xmlGenericError(xmlGenericErrorContext, 11351: "PP: Parsing internal subset\n"); 11352: #endif 11353: ctxt->inSubset = 1; 11354: xmlParseDocTypeDecl(ctxt); 11355: if (RAW == '[') { 11356: ctxt->instate = XML_PARSER_DTD; 11357: #ifdef DEBUG_PUSH 11358: xmlGenericError(xmlGenericErrorContext, 11359: "PP: entering DTD\n"); 11360: #endif 11361: } else { 11362: /* 11363: * Create and update the external subset. 11364: */ 11365: ctxt->inSubset = 2; 11366: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11367: (ctxt->sax->externalSubset != NULL)) 11368: ctxt->sax->externalSubset(ctxt->userData, 11369: ctxt->intSubName, ctxt->extSubSystem, 11370: ctxt->extSubURI); 11371: ctxt->inSubset = 0; 11372: xmlCleanSpecialAttr(ctxt); 11373: ctxt->instate = XML_PARSER_PROLOG; 11374: #ifdef DEBUG_PUSH 11375: xmlGenericError(xmlGenericErrorContext, 11376: "PP: entering PROLOG\n"); 11377: #endif 11378: } 11379: } else if ((cur == '<') && (next == '!') && 11380: (avail < 9)) { 11381: goto done; 11382: } else { 11383: ctxt->instate = XML_PARSER_START_TAG; 11384: ctxt->progressive = 1; 11385: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11386: #ifdef DEBUG_PUSH 11387: xmlGenericError(xmlGenericErrorContext, 11388: "PP: entering START_TAG\n"); 11389: #endif 11390: } 11391: break; 11392: case XML_PARSER_PROLOG: 11393: SKIP_BLANKS; 11394: if (ctxt->input->buf == NULL) 11395: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11396: else 11397: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11398: if (avail < 2) 11399: goto done; 11400: cur = ctxt->input->cur[0]; 11401: next = ctxt->input->cur[1]; 11402: if ((cur == '<') && (next == '?')) { 11403: if ((!terminate) && 11404: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11405: goto done; 11406: #ifdef DEBUG_PUSH 11407: xmlGenericError(xmlGenericErrorContext, 11408: "PP: Parsing PI\n"); 11409: #endif 11410: xmlParsePI(ctxt); 11411: } else if ((cur == '<') && (next == '!') && 11412: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11413: if ((!terminate) && 11414: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11415: goto done; 11416: #ifdef DEBUG_PUSH 11417: xmlGenericError(xmlGenericErrorContext, 11418: "PP: Parsing Comment\n"); 11419: #endif 11420: xmlParseComment(ctxt); 11421: ctxt->instate = XML_PARSER_PROLOG; 11422: } else if ((cur == '<') && (next == '!') && 11423: (avail < 4)) { 11424: goto done; 11425: } else { 11426: ctxt->instate = XML_PARSER_START_TAG; 11427: if (ctxt->progressive == 0) 11428: ctxt->progressive = 1; 11429: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11430: #ifdef DEBUG_PUSH 11431: xmlGenericError(xmlGenericErrorContext, 11432: "PP: entering START_TAG\n"); 11433: #endif 11434: } 11435: break; 11436: case XML_PARSER_EPILOG: 11437: SKIP_BLANKS; 11438: if (ctxt->input->buf == NULL) 11439: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11440: else 11441: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11442: if (avail < 2) 11443: goto done; 11444: cur = ctxt->input->cur[0]; 11445: next = ctxt->input->cur[1]; 11446: if ((cur == '<') && (next == '?')) { 11447: if ((!terminate) && 11448: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11449: goto done; 11450: #ifdef DEBUG_PUSH 11451: xmlGenericError(xmlGenericErrorContext, 11452: "PP: Parsing PI\n"); 11453: #endif 11454: xmlParsePI(ctxt); 11455: ctxt->instate = XML_PARSER_EPILOG; 11456: } else if ((cur == '<') && (next == '!') && 11457: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11458: if ((!terminate) && 11459: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11460: goto done; 11461: #ifdef DEBUG_PUSH 11462: xmlGenericError(xmlGenericErrorContext, 11463: "PP: Parsing Comment\n"); 11464: #endif 11465: xmlParseComment(ctxt); 11466: ctxt->instate = XML_PARSER_EPILOG; 11467: } else if ((cur == '<') && (next == '!') && 11468: (avail < 4)) { 11469: goto done; 11470: } else { 11471: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11472: ctxt->instate = XML_PARSER_EOF; 11473: #ifdef DEBUG_PUSH 11474: xmlGenericError(xmlGenericErrorContext, 11475: "PP: entering EOF\n"); 11476: #endif 11477: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11478: ctxt->sax->endDocument(ctxt->userData); 11479: goto done; 11480: } 11481: break; 11482: case XML_PARSER_DTD: { 11483: /* 11484: * Sorry but progressive parsing of the internal subset 11485: * is not expected to be supported. We first check that 11486: * the full content of the internal subset is available and 11487: * the parsing is launched only at that point. 11488: * Internal subset ends up with "']' S? '>'" in an unescaped 11489: * section and not in a ']]>' sequence which are conditional 11490: * sections (whoever argued to keep that crap in XML deserve 11491: * a place in hell !). 11492: */ 11493: int base, i; 11494: xmlChar *buf; 11495: xmlChar quote = 0; 11496: 11497: base = ctxt->input->cur - ctxt->input->base; 11498: if (base < 0) return(0); 11499: if (ctxt->checkIndex > base) 11500: base = ctxt->checkIndex; 11501: buf = ctxt->input->buf->buffer->content; 11502: for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11503: base++) { 11504: if (quote != 0) { 11505: if (buf[base] == quote) 11506: quote = 0; 11507: continue; 11508: } 11509: if ((quote == 0) && (buf[base] == '<')) { 11510: int found = 0; 11511: /* special handling of comments */ 11512: if (((unsigned int) base + 4 < 11513: ctxt->input->buf->buffer->use) && 11514: (buf[base + 1] == '!') && 11515: (buf[base + 2] == '-') && 11516: (buf[base + 3] == '-')) { 11517: for (;(unsigned int) base + 3 < 11518: ctxt->input->buf->buffer->use; base++) { 11519: if ((buf[base] == '-') && 11520: (buf[base + 1] == '-') && 11521: (buf[base + 2] == '>')) { 11522: found = 1; 11523: base += 2; 11524: break; 11525: } 11526: } 11527: if (!found) { 11528: #if 0 11529: fprintf(stderr, "unfinished comment\n"); 11530: #endif 11531: break; /* for */ 11532: } 11533: continue; 11534: } 11535: } 11536: if (buf[base] == '"') { 11537: quote = '"'; 11538: continue; 11539: } 11540: if (buf[base] == '\'') { 11541: quote = '\''; 11542: continue; 11543: } 11544: if (buf[base] == ']') { 11545: #if 0 11546: fprintf(stderr, "%c%c%c%c: ", buf[base], 11547: buf[base + 1], buf[base + 2], buf[base + 3]); 11548: #endif 11549: if ((unsigned int) base +1 >= 11550: ctxt->input->buf->buffer->use) 11551: break; 11552: if (buf[base + 1] == ']') { 11553: /* conditional crap, skip both ']' ! */ 11554: base++; 11555: continue; 11556: } 11557: for (i = 1; 11558: (unsigned int) base + i < ctxt->input->buf->buffer->use; 11559: i++) { 11560: if (buf[base + i] == '>') { 11561: #if 0 11562: fprintf(stderr, "found\n"); 11563: #endif 11564: goto found_end_int_subset; 11565: } 11566: if (!IS_BLANK_CH(buf[base + i])) { 11567: #if 0 11568: fprintf(stderr, "not found\n"); 11569: #endif 11570: goto not_end_of_int_subset; 11571: } 11572: } 11573: #if 0 11574: fprintf(stderr, "end of stream\n"); 11575: #endif 11576: break; 11577: 11578: } 11579: not_end_of_int_subset: 11580: continue; /* for */ 11581: } 11582: /* 11583: * We didn't found the end of the Internal subset 11584: */ 11585: #ifdef DEBUG_PUSH 11586: if (next == 0) 11587: xmlGenericError(xmlGenericErrorContext, 11588: "PP: lookup of int subset end filed\n"); 11589: #endif 11590: goto done; 11591: 11592: found_end_int_subset: 11593: xmlParseInternalSubset(ctxt); 11594: ctxt->inSubset = 2; 11595: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11596: (ctxt->sax->externalSubset != NULL)) 11597: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11598: ctxt->extSubSystem, ctxt->extSubURI); 11599: ctxt->inSubset = 0; 11600: xmlCleanSpecialAttr(ctxt); 11601: ctxt->instate = XML_PARSER_PROLOG; 11602: ctxt->checkIndex = 0; 11603: #ifdef DEBUG_PUSH 11604: xmlGenericError(xmlGenericErrorContext, 11605: "PP: entering PROLOG\n"); 11606: #endif 11607: break; 11608: } 11609: case XML_PARSER_COMMENT: 11610: xmlGenericError(xmlGenericErrorContext, 11611: "PP: internal error, state == COMMENT\n"); 11612: ctxt->instate = XML_PARSER_CONTENT; 11613: #ifdef DEBUG_PUSH 11614: xmlGenericError(xmlGenericErrorContext, 11615: "PP: entering CONTENT\n"); 11616: #endif 11617: break; 11618: case XML_PARSER_IGNORE: 11619: xmlGenericError(xmlGenericErrorContext, 11620: "PP: internal error, state == IGNORE"); 11621: ctxt->instate = XML_PARSER_DTD; 11622: #ifdef DEBUG_PUSH 11623: xmlGenericError(xmlGenericErrorContext, 11624: "PP: entering DTD\n"); 11625: #endif 11626: break; 11627: case XML_PARSER_PI: 11628: xmlGenericError(xmlGenericErrorContext, 11629: "PP: internal error, state == PI\n"); 11630: ctxt->instate = XML_PARSER_CONTENT; 11631: #ifdef DEBUG_PUSH 11632: xmlGenericError(xmlGenericErrorContext, 11633: "PP: entering CONTENT\n"); 11634: #endif 11635: break; 11636: case XML_PARSER_ENTITY_DECL: 11637: xmlGenericError(xmlGenericErrorContext, 11638: "PP: internal error, state == ENTITY_DECL\n"); 11639: ctxt->instate = XML_PARSER_DTD; 11640: #ifdef DEBUG_PUSH 11641: xmlGenericError(xmlGenericErrorContext, 11642: "PP: entering DTD\n"); 11643: #endif 11644: break; 11645: case XML_PARSER_ENTITY_VALUE: 11646: xmlGenericError(xmlGenericErrorContext, 11647: "PP: internal error, state == ENTITY_VALUE\n"); 11648: ctxt->instate = XML_PARSER_CONTENT; 11649: #ifdef DEBUG_PUSH 11650: xmlGenericError(xmlGenericErrorContext, 11651: "PP: entering DTD\n"); 11652: #endif 11653: break; 11654: case XML_PARSER_ATTRIBUTE_VALUE: 11655: xmlGenericError(xmlGenericErrorContext, 11656: "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11657: ctxt->instate = XML_PARSER_START_TAG; 11658: #ifdef DEBUG_PUSH 11659: xmlGenericError(xmlGenericErrorContext, 11660: "PP: entering START_TAG\n"); 11661: #endif 11662: break; 11663: case XML_PARSER_SYSTEM_LITERAL: 11664: xmlGenericError(xmlGenericErrorContext, 11665: "PP: internal error, state == SYSTEM_LITERAL\n"); 11666: ctxt->instate = XML_PARSER_START_TAG; 11667: #ifdef DEBUG_PUSH 11668: xmlGenericError(xmlGenericErrorContext, 11669: "PP: entering START_TAG\n"); 11670: #endif 11671: break; 11672: case XML_PARSER_PUBLIC_LITERAL: 11673: xmlGenericError(xmlGenericErrorContext, 11674: "PP: internal error, state == PUBLIC_LITERAL\n"); 11675: ctxt->instate = XML_PARSER_START_TAG; 11676: #ifdef DEBUG_PUSH 11677: xmlGenericError(xmlGenericErrorContext, 11678: "PP: entering START_TAG\n"); 11679: #endif 11680: break; 11681: } 11682: } 11683: done: 11684: #ifdef DEBUG_PUSH 11685: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11686: #endif 11687: return(ret); 11688: encoding_error: 11689: { 11690: char buffer[150]; 11691: 11692: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11693: ctxt->input->cur[0], ctxt->input->cur[1], 11694: ctxt->input->cur[2], ctxt->input->cur[3]); 11695: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11696: "Input is not proper UTF-8, indicate encoding !\n%s", 11697: BAD_CAST buffer, NULL); 11698: } 11699: return(0); 11700: } 11701: 11702: /** 11703: * xmlParseChunk: 11704: * @ctxt: an XML parser context 11705: * @chunk: an char array 11706: * @size: the size in byte of the chunk 11707: * @terminate: last chunk indicator 11708: * 11709: * Parse a Chunk of memory 11710: * 11711: * Returns zero if no error, the xmlParserErrors otherwise. 11712: */ 11713: int 11714: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11715: int terminate) { 11716: int end_in_lf = 0; 11717: int remain = 0; 11718: 11719: if (ctxt == NULL) 11720: return(XML_ERR_INTERNAL_ERROR); 11721: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11722: return(ctxt->errNo); 11723: if (ctxt->instate == XML_PARSER_START) 11724: xmlDetectSAX2(ctxt); 11725: if ((size > 0) && (chunk != NULL) && (!terminate) && 11726: (chunk[size - 1] == '\r')) { 11727: end_in_lf = 1; 11728: size--; 11729: } 11730: 11731: xmldecl_done: 11732: 11733: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11734: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11735: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11736: int cur = ctxt->input->cur - ctxt->input->base; 11737: int res; 11738: 11739: /* 11740: * Specific handling if we autodetected an encoding, we should not 11741: * push more than the first line ... which depend on the encoding 11742: * And only push the rest once the final encoding was detected 11743: */ 11744: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 11745: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 11746: unsigned int len = 45; 11747: 11748: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11749: BAD_CAST "UTF-16")) || 11750: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11751: BAD_CAST "UTF16"))) 11752: len = 90; 11753: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11754: BAD_CAST "UCS-4")) || 11755: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11756: BAD_CAST "UCS4"))) 11757: len = 180; 11758: 11759: if (ctxt->input->buf->rawconsumed < len) 11760: len -= ctxt->input->buf->rawconsumed; 11761: 11762: /* 11763: * Change size for reading the initial declaration only 11764: * if size is greater than len. Otherwise, memmove in xmlBufferAdd 11765: * will blindly copy extra bytes from memory. 11766: */ 11767: if ((unsigned int) size > len) { 11768: remain = size - len; 11769: size = len; 11770: } else { 11771: remain = 0; 11772: } 11773: } 11774: res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11775: if (res < 0) { 11776: ctxt->errNo = XML_PARSER_EOF; 11777: ctxt->disableSAX = 1; 11778: return (XML_PARSER_EOF); 11779: } 11780: ctxt->input->base = ctxt->input->buf->buffer->content + base; 11781: ctxt->input->cur = ctxt->input->base + cur; 11782: ctxt->input->end = 11783: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11784: #ifdef DEBUG_PUSH 11785: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11786: #endif 11787: 11788: } else if (ctxt->instate != XML_PARSER_EOF) { 11789: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11790: xmlParserInputBufferPtr in = ctxt->input->buf; 11791: if ((in->encoder != NULL) && (in->buffer != NULL) && 11792: (in->raw != NULL)) { 11793: int nbchars; 11794: 11795: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11796: if (nbchars < 0) { 11797: /* TODO 2.6.0 */ 11798: xmlGenericError(xmlGenericErrorContext, 11799: "xmlParseChunk: encoder error\n"); 11800: return(XML_ERR_INVALID_ENCODING); 11801: } 11802: } 11803: } 11804: } 11805: if (remain != 0) 11806: xmlParseTryOrFinish(ctxt, 0); 11807: else 11808: xmlParseTryOrFinish(ctxt, terminate); 11809: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11810: return(ctxt->errNo); 11811: 11812: if (remain != 0) { 11813: chunk += size; 11814: size = remain; 11815: remain = 0; 11816: goto xmldecl_done; 11817: } 11818: if ((end_in_lf == 1) && (ctxt->input != NULL) && 11819: (ctxt->input->buf != NULL)) { 11820: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11821: } 11822: if (terminate) { 11823: /* 11824: * Check for termination 11825: */ 11826: int avail = 0; 11827: 11828: if (ctxt->input != NULL) { 11829: if (ctxt->input->buf == NULL) 11830: avail = ctxt->input->length - 11831: (ctxt->input->cur - ctxt->input->base); 11832: else 11833: avail = ctxt->input->buf->buffer->use - 11834: (ctxt->input->cur - ctxt->input->base); 11835: } 11836: 11837: if ((ctxt->instate != XML_PARSER_EOF) && 11838: (ctxt->instate != XML_PARSER_EPILOG)) { 11839: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11840: } 11841: if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11842: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11843: } 11844: if (ctxt->instate != XML_PARSER_EOF) { 11845: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11846: ctxt->sax->endDocument(ctxt->userData); 11847: } 11848: ctxt->instate = XML_PARSER_EOF; 11849: } 11850: return((xmlParserErrors) ctxt->errNo); 11851: } 11852: 11853: /************************************************************************ 11854: * * 11855: * I/O front end functions to the parser * 11856: * * 11857: ************************************************************************/ 11858: 11859: /** 11860: * xmlCreatePushParserCtxt: 11861: * @sax: a SAX handler 11862: * @user_data: The user data returned on SAX callbacks 11863: * @chunk: a pointer to an array of chars 11864: * @size: number of chars in the array 11865: * @filename: an optional file name or URI 11866: * 11867: * Create a parser context for using the XML parser in push mode. 11868: * If @buffer and @size are non-NULL, the data is used to detect 11869: * the encoding. The remaining characters will be parsed so they 11870: * don't need to be fed in again through xmlParseChunk. 11871: * To allow content encoding detection, @size should be >= 4 11872: * The value of @filename is used for fetching external entities 11873: * and error/warning reports. 11874: * 11875: * Returns the new parser context or NULL 11876: */ 11877: 11878: xmlParserCtxtPtr 11879: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11880: const char *chunk, int size, const char *filename) { 11881: xmlParserCtxtPtr ctxt; 11882: xmlParserInputPtr inputStream; 11883: xmlParserInputBufferPtr buf; 11884: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11885: 11886: /* 11887: * plug some encoding conversion routines 11888: */ 11889: if ((chunk != NULL) && (size >= 4)) 11890: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11891: 11892: buf = xmlAllocParserInputBuffer(enc); 11893: if (buf == NULL) return(NULL); 11894: 11895: ctxt = xmlNewParserCtxt(); 11896: if (ctxt == NULL) { 11897: xmlErrMemory(NULL, "creating parser: out of memory\n"); 11898: xmlFreeParserInputBuffer(buf); 11899: return(NULL); 11900: } 11901: ctxt->dictNames = 1; 11902: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11903: if (ctxt->pushTab == NULL) { 11904: xmlErrMemory(ctxt, NULL); 11905: xmlFreeParserInputBuffer(buf); 11906: xmlFreeParserCtxt(ctxt); 11907: return(NULL); 11908: } 11909: if (sax != NULL) { 11910: #ifdef LIBXML_SAX1_ENABLED 11911: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11912: #endif /* LIBXML_SAX1_ENABLED */ 11913: xmlFree(ctxt->sax); 11914: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11915: if (ctxt->sax == NULL) { 11916: xmlErrMemory(ctxt, NULL); 11917: xmlFreeParserInputBuffer(buf); 11918: xmlFreeParserCtxt(ctxt); 11919: return(NULL); 11920: } 11921: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11922: if (sax->initialized == XML_SAX2_MAGIC) 11923: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11924: else 11925: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11926: if (user_data != NULL) 11927: ctxt->userData = user_data; 11928: } 11929: if (filename == NULL) { 11930: ctxt->directory = NULL; 11931: } else { 11932: ctxt->directory = xmlParserGetDirectory(filename); 11933: } 11934: 11935: inputStream = xmlNewInputStream(ctxt); 11936: if (inputStream == NULL) { 11937: xmlFreeParserCtxt(ctxt); 11938: xmlFreeParserInputBuffer(buf); 11939: return(NULL); 11940: } 11941: 11942: if (filename == NULL) 11943: inputStream->filename = NULL; 11944: else { 11945: inputStream->filename = (char *) 11946: xmlCanonicPath((const xmlChar *) filename); 11947: if (inputStream->filename == NULL) { 11948: xmlFreeParserCtxt(ctxt); 11949: xmlFreeParserInputBuffer(buf); 11950: return(NULL); 11951: } 11952: } 11953: inputStream->buf = buf; 11954: inputStream->base = inputStream->buf->buffer->content; 11955: inputStream->cur = inputStream->buf->buffer->content; 11956: inputStream->end = 11957: &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11958: 11959: inputPush(ctxt, inputStream); 11960: 11961: /* 11962: * If the caller didn't provide an initial 'chunk' for determining 11963: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11964: * that it can be automatically determined later 11965: */ 11966: if ((size == 0) || (chunk == NULL)) { 11967: ctxt->charset = XML_CHAR_ENCODING_NONE; 11968: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11969: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11970: int cur = ctxt->input->cur - ctxt->input->base; 11971: 11972: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11973: 11974: ctxt->input->base = ctxt->input->buf->buffer->content + base; 11975: ctxt->input->cur = ctxt->input->base + cur; 11976: ctxt->input->end = 11977: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11978: #ifdef DEBUG_PUSH 11979: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11980: #endif 11981: } 11982: 11983: if (enc != XML_CHAR_ENCODING_NONE) { 11984: xmlSwitchEncoding(ctxt, enc); 11985: } 11986: 11987: return(ctxt); 11988: } 11989: #endif /* LIBXML_PUSH_ENABLED */ 11990: 11991: /** 11992: * xmlStopParser: 11993: * @ctxt: an XML parser context 11994: * 11995: * Blocks further parser processing 11996: */ 11997: void 11998: xmlStopParser(xmlParserCtxtPtr ctxt) { 11999: if (ctxt == NULL) 12000: return; 12001: ctxt->instate = XML_PARSER_EOF; 12002: ctxt->disableSAX = 1; 12003: if (ctxt->input != NULL) { 12004: ctxt->input->cur = BAD_CAST""; 12005: ctxt->input->base = ctxt->input->cur; 12006: } 12007: } 12008: 12009: /** 12010: * xmlCreateIOParserCtxt: 12011: * @sax: a SAX handler 12012: * @user_data: The user data returned on SAX callbacks 12013: * @ioread: an I/O read function 12014: * @ioclose: an I/O close function 12015: * @ioctx: an I/O handler 12016: * @enc: the charset encoding if known 12017: * 12018: * Create a parser context for using the XML parser with an existing 12019: * I/O stream 12020: * 12021: * Returns the new parser context or NULL 12022: */ 12023: xmlParserCtxtPtr 12024: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12025: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12026: void *ioctx, xmlCharEncoding enc) { 12027: xmlParserCtxtPtr ctxt; 12028: xmlParserInputPtr inputStream; 12029: xmlParserInputBufferPtr buf; 12030: 12031: if (ioread == NULL) return(NULL); 12032: 12033: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12034: if (buf == NULL) { 12035: if (ioclose != NULL) 12036: ioclose(ioctx); 12037: return (NULL); 12038: } 12039: 12040: ctxt = xmlNewParserCtxt(); 12041: if (ctxt == NULL) { 12042: xmlFreeParserInputBuffer(buf); 12043: return(NULL); 12044: } 12045: if (sax != NULL) { 12046: #ifdef LIBXML_SAX1_ENABLED 12047: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12048: #endif /* LIBXML_SAX1_ENABLED */ 12049: xmlFree(ctxt->sax); 12050: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12051: if (ctxt->sax == NULL) { 12052: xmlErrMemory(ctxt, NULL); 12053: xmlFreeParserCtxt(ctxt); 12054: return(NULL); 12055: } 12056: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12057: if (sax->initialized == XML_SAX2_MAGIC) 12058: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12059: else 12060: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12061: if (user_data != NULL) 12062: ctxt->userData = user_data; 12063: } 12064: 12065: inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12066: if (inputStream == NULL) { 12067: xmlFreeParserCtxt(ctxt); 12068: return(NULL); 12069: } 12070: inputPush(ctxt, inputStream); 12071: 12072: return(ctxt); 12073: } 12074: 12075: #ifdef LIBXML_VALID_ENABLED 12076: /************************************************************************ 12077: * * 12078: * Front ends when parsing a DTD * 12079: * * 12080: ************************************************************************/ 12081: 12082: /** 12083: * xmlIOParseDTD: 12084: * @sax: the SAX handler block or NULL 12085: * @input: an Input Buffer 12086: * @enc: the charset encoding if known 12087: * 12088: * Load and parse a DTD 12089: * 12090: * Returns the resulting xmlDtdPtr or NULL in case of error. 12091: * @input will be freed by the function in any case. 12092: */ 12093: 12094: xmlDtdPtr 12095: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12096: xmlCharEncoding enc) { 12097: xmlDtdPtr ret = NULL; 12098: xmlParserCtxtPtr ctxt; 12099: xmlParserInputPtr pinput = NULL; 12100: xmlChar start[4]; 12101: 12102: if (input == NULL) 12103: return(NULL); 12104: 12105: ctxt = xmlNewParserCtxt(); 12106: if (ctxt == NULL) { 12107: xmlFreeParserInputBuffer(input); 12108: return(NULL); 12109: } 12110: 12111: /* 12112: * Set-up the SAX context 12113: */ 12114: if (sax != NULL) { 12115: if (ctxt->sax != NULL) 12116: xmlFree(ctxt->sax); 12117: ctxt->sax = sax; 12118: ctxt->userData = ctxt; 12119: } 12120: xmlDetectSAX2(ctxt); 12121: 12122: /* 12123: * generate a parser input from the I/O handler 12124: */ 12125: 12126: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12127: if (pinput == NULL) { 12128: if (sax != NULL) ctxt->sax = NULL; 12129: xmlFreeParserInputBuffer(input); 12130: xmlFreeParserCtxt(ctxt); 12131: return(NULL); 12132: } 12133: 12134: /* 12135: * plug some encoding conversion routines here. 12136: */ 12137: if (xmlPushInput(ctxt, pinput) < 0) { 12138: if (sax != NULL) ctxt->sax = NULL; 12139: xmlFreeParserCtxt(ctxt); 12140: return(NULL); 12141: } 12142: if (enc != XML_CHAR_ENCODING_NONE) { 12143: xmlSwitchEncoding(ctxt, enc); 12144: } 12145: 12146: pinput->filename = NULL; 12147: pinput->line = 1; 12148: pinput->col = 1; 12149: pinput->base = ctxt->input->cur; 12150: pinput->cur = ctxt->input->cur; 12151: pinput->free = NULL; 12152: 12153: /* 12154: * let's parse that entity knowing it's an external subset. 12155: */ 12156: ctxt->inSubset = 2; 12157: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12158: if (ctxt->myDoc == NULL) { 12159: xmlErrMemory(ctxt, "New Doc failed"); 12160: return(NULL); 12161: } 12162: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12163: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12164: BAD_CAST "none", BAD_CAST "none"); 12165: 12166: if ((enc == XML_CHAR_ENCODING_NONE) && 12167: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12168: /* 12169: * Get the 4 first bytes and decode the charset 12170: * if enc != XML_CHAR_ENCODING_NONE 12171: * plug some encoding conversion routines. 12172: */ 12173: start[0] = RAW; 12174: start[1] = NXT(1); 12175: start[2] = NXT(2); 12176: start[3] = NXT(3); 12177: enc = xmlDetectCharEncoding(start, 4); 12178: if (enc != XML_CHAR_ENCODING_NONE) { 12179: xmlSwitchEncoding(ctxt, enc); 12180: } 12181: } 12182: 12183: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12184: 12185: if (ctxt->myDoc != NULL) { 12186: if (ctxt->wellFormed) { 12187: ret = ctxt->myDoc->extSubset; 12188: ctxt->myDoc->extSubset = NULL; 12189: if (ret != NULL) { 12190: xmlNodePtr tmp; 12191: 12192: ret->doc = NULL; 12193: tmp = ret->children; 12194: while (tmp != NULL) { 12195: tmp->doc = NULL; 12196: tmp = tmp->next; 12197: } 12198: } 12199: } else { 12200: ret = NULL; 12201: } 12202: xmlFreeDoc(ctxt->myDoc); 12203: ctxt->myDoc = NULL; 12204: } 12205: if (sax != NULL) ctxt->sax = NULL; 12206: xmlFreeParserCtxt(ctxt); 12207: 12208: return(ret); 12209: } 12210: 12211: /** 12212: * xmlSAXParseDTD: 12213: * @sax: the SAX handler block 12214: * @ExternalID: a NAME* containing the External ID of the DTD 12215: * @SystemID: a NAME* containing the URL to the DTD 12216: * 12217: * Load and parse an external subset. 12218: * 12219: * Returns the resulting xmlDtdPtr or NULL in case of error. 12220: */ 12221: 12222: xmlDtdPtr 12223: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12224: const xmlChar *SystemID) { 12225: xmlDtdPtr ret = NULL; 12226: xmlParserCtxtPtr ctxt; 12227: xmlParserInputPtr input = NULL; 12228: xmlCharEncoding enc; 12229: xmlChar* systemIdCanonic; 12230: 12231: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12232: 12233: ctxt = xmlNewParserCtxt(); 12234: if (ctxt == NULL) { 12235: return(NULL); 12236: } 12237: 12238: /* 12239: * Set-up the SAX context 12240: */ 12241: if (sax != NULL) { 12242: if (ctxt->sax != NULL) 12243: xmlFree(ctxt->sax); 12244: ctxt->sax = sax; 12245: ctxt->userData = ctxt; 12246: } 12247: 12248: /* 12249: * Canonicalise the system ID 12250: */ 12251: systemIdCanonic = xmlCanonicPath(SystemID); 12252: if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12253: xmlFreeParserCtxt(ctxt); 12254: return(NULL); 12255: } 12256: 12257: /* 12258: * Ask the Entity resolver to load the damn thing 12259: */ 12260: 12261: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12262: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12263: systemIdCanonic); 12264: if (input == NULL) { 12265: if (sax != NULL) ctxt->sax = NULL; 12266: xmlFreeParserCtxt(ctxt); 12267: if (systemIdCanonic != NULL) 12268: xmlFree(systemIdCanonic); 12269: return(NULL); 12270: } 12271: 12272: /* 12273: * plug some encoding conversion routines here. 12274: */ 12275: if (xmlPushInput(ctxt, input) < 0) { 12276: if (sax != NULL) ctxt->sax = NULL; 12277: xmlFreeParserCtxt(ctxt); 12278: if (systemIdCanonic != NULL) 12279: xmlFree(systemIdCanonic); 12280: return(NULL); 12281: } 12282: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12283: enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12284: xmlSwitchEncoding(ctxt, enc); 12285: } 12286: 12287: if (input->filename == NULL) 12288: input->filename = (char *) systemIdCanonic; 12289: else 12290: xmlFree(systemIdCanonic); 12291: input->line = 1; 12292: input->col = 1; 12293: input->base = ctxt->input->cur; 12294: input->cur = ctxt->input->cur; 12295: input->free = NULL; 12296: 12297: /* 12298: * let's parse that entity knowing it's an external subset. 12299: */ 12300: ctxt->inSubset = 2; 12301: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12302: if (ctxt->myDoc == NULL) { 12303: xmlErrMemory(ctxt, "New Doc failed"); 12304: if (sax != NULL) ctxt->sax = NULL; 12305: xmlFreeParserCtxt(ctxt); 12306: return(NULL); 12307: } 12308: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12309: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12310: ExternalID, SystemID); 12311: xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12312: 12313: if (ctxt->myDoc != NULL) { 12314: if (ctxt->wellFormed) { 12315: ret = ctxt->myDoc->extSubset; 12316: ctxt->myDoc->extSubset = NULL; 12317: if (ret != NULL) { 12318: xmlNodePtr tmp; 12319: 12320: ret->doc = NULL; 12321: tmp = ret->children; 12322: while (tmp != NULL) { 12323: tmp->doc = NULL; 12324: tmp = tmp->next; 12325: } 12326: } 12327: } else { 12328: ret = NULL; 12329: } 12330: xmlFreeDoc(ctxt->myDoc); 12331: ctxt->myDoc = NULL; 12332: } 12333: if (sax != NULL) ctxt->sax = NULL; 12334: xmlFreeParserCtxt(ctxt); 12335: 12336: return(ret); 12337: } 12338: 12339: 12340: /** 12341: * xmlParseDTD: 12342: * @ExternalID: a NAME* containing the External ID of the DTD 12343: * @SystemID: a NAME* containing the URL to the DTD 12344: * 12345: * Load and parse an external subset. 12346: * 12347: * Returns the resulting xmlDtdPtr or NULL in case of error. 12348: */ 12349: 12350: xmlDtdPtr 12351: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12352: return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12353: } 12354: #endif /* LIBXML_VALID_ENABLED */ 12355: 12356: /************************************************************************ 12357: * * 12358: * Front ends when parsing an Entity * 12359: * * 12360: ************************************************************************/ 12361: 12362: /** 12363: * xmlParseCtxtExternalEntity: 12364: * @ctx: the existing parsing context 12365: * @URL: the URL for the entity to load 12366: * @ID: the System ID for the entity to load 12367: * @lst: the return value for the set of parsed nodes 12368: * 12369: * Parse an external general entity within an existing parsing context 12370: * An external general parsed entity is well-formed if it matches the 12371: * production labeled extParsedEnt. 12372: * 12373: * [78] extParsedEnt ::= TextDecl? content 12374: * 12375: * Returns 0 if the entity is well formed, -1 in case of args problem and 12376: * the parser error code otherwise 12377: */ 12378: 12379: int 12380: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12381: const xmlChar *ID, xmlNodePtr *lst) { 12382: xmlParserCtxtPtr ctxt; 12383: xmlDocPtr newDoc; 12384: xmlNodePtr newRoot; 12385: xmlSAXHandlerPtr oldsax = NULL; 12386: int ret = 0; 12387: xmlChar start[4]; 12388: xmlCharEncoding enc; 12389: 12390: if (ctx == NULL) return(-1); 12391: 12392: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12393: (ctx->depth > 1024)) { 12394: return(XML_ERR_ENTITY_LOOP); 12395: } 12396: 12397: if (lst != NULL) 12398: *lst = NULL; 12399: if ((URL == NULL) && (ID == NULL)) 12400: return(-1); 12401: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12402: return(-1); 12403: 12404: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12405: if (ctxt == NULL) { 12406: return(-1); 12407: } 12408: 12409: oldsax = ctxt->sax; 12410: ctxt->sax = ctx->sax; 12411: xmlDetectSAX2(ctxt); 12412: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12413: if (newDoc == NULL) { 12414: xmlFreeParserCtxt(ctxt); 12415: return(-1); 12416: } 12417: newDoc->properties = XML_DOC_INTERNAL; 12418: if (ctx->myDoc->dict) { 12419: newDoc->dict = ctx->myDoc->dict; 12420: xmlDictReference(newDoc->dict); 12421: } 12422: if (ctx->myDoc != NULL) { 12423: newDoc->intSubset = ctx->myDoc->intSubset; 12424: newDoc->extSubset = ctx->myDoc->extSubset; 12425: } 12426: if (ctx->myDoc->URL != NULL) { 12427: newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12428: } 12429: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12430: if (newRoot == NULL) { 12431: ctxt->sax = oldsax; 12432: xmlFreeParserCtxt(ctxt); 12433: newDoc->intSubset = NULL; 12434: newDoc->extSubset = NULL; 12435: xmlFreeDoc(newDoc); 12436: return(-1); 12437: } 12438: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12439: nodePush(ctxt, newDoc->children); 12440: if (ctx->myDoc == NULL) { 12441: ctxt->myDoc = newDoc; 12442: } else { 12443: ctxt->myDoc = ctx->myDoc; 12444: newDoc->children->doc = ctx->myDoc; 12445: } 12446: 12447: /* 12448: * Get the 4 first bytes and decode the charset 12449: * if enc != XML_CHAR_ENCODING_NONE 12450: * plug some encoding conversion routines. 12451: */ 12452: GROW 12453: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12454: start[0] = RAW; 12455: start[1] = NXT(1); 12456: start[2] = NXT(2); 12457: start[3] = NXT(3); 12458: enc = xmlDetectCharEncoding(start, 4); 12459: if (enc != XML_CHAR_ENCODING_NONE) { 12460: xmlSwitchEncoding(ctxt, enc); 12461: } 12462: } 12463: 12464: /* 12465: * Parse a possible text declaration first 12466: */ 12467: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12468: xmlParseTextDecl(ctxt); 12469: /* 12470: * An XML-1.0 document can't reference an entity not XML-1.0 12471: */ 12472: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12473: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12474: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12475: "Version mismatch between document and entity\n"); 12476: } 12477: } 12478: 12479: /* 12480: * If the user provided its own SAX callbacks then reuse the 12481: * useData callback field, otherwise the expected setup in a 12482: * DOM builder is to have userData == ctxt 12483: */ 12484: if (ctx->userData == ctx) 12485: ctxt->userData = ctxt; 12486: else 12487: ctxt->userData = ctx->userData; 12488: 12489: /* 12490: * Doing validity checking on chunk doesn't make sense 12491: */ 12492: ctxt->instate = XML_PARSER_CONTENT; 12493: ctxt->validate = ctx->validate; 12494: ctxt->valid = ctx->valid; 12495: ctxt->loadsubset = ctx->loadsubset; 12496: ctxt->depth = ctx->depth + 1; 12497: ctxt->replaceEntities = ctx->replaceEntities; 12498: if (ctxt->validate) { 12499: ctxt->vctxt.error = ctx->vctxt.error; 12500: ctxt->vctxt.warning = ctx->vctxt.warning; 12501: } else { 12502: ctxt->vctxt.error = NULL; 12503: ctxt->vctxt.warning = NULL; 12504: } 12505: ctxt->vctxt.nodeTab = NULL; 12506: ctxt->vctxt.nodeNr = 0; 12507: ctxt->vctxt.nodeMax = 0; 12508: ctxt->vctxt.node = NULL; 12509: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12510: ctxt->dict = ctx->dict; 12511: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12512: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12513: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12514: ctxt->dictNames = ctx->dictNames; 12515: ctxt->attsDefault = ctx->attsDefault; 12516: ctxt->attsSpecial = ctx->attsSpecial; 12517: ctxt->linenumbers = ctx->linenumbers; 12518: 12519: xmlParseContent(ctxt); 12520: 12521: ctx->validate = ctxt->validate; 12522: ctx->valid = ctxt->valid; 12523: if ((RAW == '<') && (NXT(1) == '/')) { 12524: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12525: } else if (RAW != 0) { 12526: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12527: } 12528: if (ctxt->node != newDoc->children) { 12529: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12530: } 12531: 12532: if (!ctxt->wellFormed) { 12533: if (ctxt->errNo == 0) 12534: ret = 1; 12535: else 12536: ret = ctxt->errNo; 12537: } else { 12538: if (lst != NULL) { 12539: xmlNodePtr cur; 12540: 12541: /* 12542: * Return the newly created nodeset after unlinking it from 12543: * they pseudo parent. 12544: */ 12545: cur = newDoc->children->children; 12546: *lst = cur; 12547: while (cur != NULL) { 12548: cur->parent = NULL; 12549: cur = cur->next; 12550: } 12551: newDoc->children->children = NULL; 12552: } 12553: ret = 0; 12554: } 12555: ctxt->sax = oldsax; 12556: ctxt->dict = NULL; 12557: ctxt->attsDefault = NULL; 12558: ctxt->attsSpecial = NULL; 12559: xmlFreeParserCtxt(ctxt); 12560: newDoc->intSubset = NULL; 12561: newDoc->extSubset = NULL; 12562: xmlFreeDoc(newDoc); 12563: 12564: return(ret); 12565: } 12566: 12567: /** 12568: * xmlParseExternalEntityPrivate: 12569: * @doc: the document the chunk pertains to 12570: * @oldctxt: the previous parser context if available 12571: * @sax: the SAX handler bloc (possibly NULL) 12572: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12573: * @depth: Used for loop detection, use 0 12574: * @URL: the URL for the entity to load 12575: * @ID: the System ID for the entity to load 12576: * @list: the return value for the set of parsed nodes 12577: * 12578: * Private version of xmlParseExternalEntity() 12579: * 12580: * Returns 0 if the entity is well formed, -1 in case of args problem and 12581: * the parser error code otherwise 12582: */ 12583: 12584: static xmlParserErrors 12585: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12586: xmlSAXHandlerPtr sax, 12587: void *user_data, int depth, const xmlChar *URL, 12588: const xmlChar *ID, xmlNodePtr *list) { 12589: xmlParserCtxtPtr ctxt; 12590: xmlDocPtr newDoc; 12591: xmlNodePtr newRoot; 12592: xmlSAXHandlerPtr oldsax = NULL; 12593: xmlParserErrors ret = XML_ERR_OK; 12594: xmlChar start[4]; 12595: xmlCharEncoding enc; 12596: 12597: if (((depth > 40) && 12598: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12599: (depth > 1024)) { 12600: return(XML_ERR_ENTITY_LOOP); 12601: } 12602: 12603: if (list != NULL) 12604: *list = NULL; 12605: if ((URL == NULL) && (ID == NULL)) 12606: return(XML_ERR_INTERNAL_ERROR); 12607: if (doc == NULL) 12608: return(XML_ERR_INTERNAL_ERROR); 12609: 12610: 12611: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12612: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12613: ctxt->userData = ctxt; 12614: if (oldctxt != NULL) { 12615: ctxt->_private = oldctxt->_private; 12616: ctxt->loadsubset = oldctxt->loadsubset; 12617: ctxt->validate = oldctxt->validate; 12618: ctxt->external = oldctxt->external; 12619: ctxt->record_info = oldctxt->record_info; 12620: ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12621: ctxt->node_seq.length = oldctxt->node_seq.length; 12622: ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12623: } else { 12624: /* 12625: * Doing validity checking on chunk without context 12626: * doesn't make sense 12627: */ 12628: ctxt->_private = NULL; 12629: ctxt->validate = 0; 12630: ctxt->external = 2; 12631: ctxt->loadsubset = 0; 12632: } 12633: if (sax != NULL) { 12634: oldsax = ctxt->sax; 12635: ctxt->sax = sax; 12636: if (user_data != NULL) 12637: ctxt->userData = user_data; 12638: } 12639: xmlDetectSAX2(ctxt); 12640: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12641: if (newDoc == NULL) { 12642: ctxt->node_seq.maximum = 0; 12643: ctxt->node_seq.length = 0; 12644: ctxt->node_seq.buffer = NULL; 12645: xmlFreeParserCtxt(ctxt); 12646: return(XML_ERR_INTERNAL_ERROR); 12647: } 12648: newDoc->properties = XML_DOC_INTERNAL; 12649: newDoc->intSubset = doc->intSubset; 12650: newDoc->extSubset = doc->extSubset; 12651: newDoc->dict = doc->dict; 12652: xmlDictReference(newDoc->dict); 12653: 12654: if (doc->URL != NULL) { 12655: newDoc->URL = xmlStrdup(doc->URL); 12656: } 12657: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12658: if (newRoot == NULL) { 12659: if (sax != NULL) 12660: ctxt->sax = oldsax; 12661: ctxt->node_seq.maximum = 0; 12662: ctxt->node_seq.length = 0; 12663: ctxt->node_seq.buffer = NULL; 12664: xmlFreeParserCtxt(ctxt); 12665: newDoc->intSubset = NULL; 12666: newDoc->extSubset = NULL; 12667: xmlFreeDoc(newDoc); 12668: return(XML_ERR_INTERNAL_ERROR); 12669: } 12670: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12671: nodePush(ctxt, newDoc->children); 12672: ctxt->myDoc = doc; 12673: newRoot->doc = doc; 12674: 12675: /* 12676: * Get the 4 first bytes and decode the charset 12677: * if enc != XML_CHAR_ENCODING_NONE 12678: * plug some encoding conversion routines. 12679: */ 12680: GROW; 12681: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12682: start[0] = RAW; 12683: start[1] = NXT(1); 12684: start[2] = NXT(2); 12685: start[3] = NXT(3); 12686: enc = xmlDetectCharEncoding(start, 4); 12687: if (enc != XML_CHAR_ENCODING_NONE) { 12688: xmlSwitchEncoding(ctxt, enc); 12689: } 12690: } 12691: 12692: /* 12693: * Parse a possible text declaration first 12694: */ 12695: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12696: xmlParseTextDecl(ctxt); 12697: } 12698: 12699: ctxt->instate = XML_PARSER_CONTENT; 12700: ctxt->depth = depth; 12701: 12702: xmlParseContent(ctxt); 12703: 12704: if ((RAW == '<') && (NXT(1) == '/')) { 12705: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12706: } else if (RAW != 0) { 12707: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12708: } 12709: if (ctxt->node != newDoc->children) { 12710: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12711: } 12712: 12713: if (!ctxt->wellFormed) { 12714: if (ctxt->errNo == 0) 12715: ret = XML_ERR_INTERNAL_ERROR; 12716: else 12717: ret = (xmlParserErrors)ctxt->errNo; 12718: } else { 12719: if (list != NULL) { 12720: xmlNodePtr cur; 12721: 12722: /* 12723: * Return the newly created nodeset after unlinking it from 12724: * they pseudo parent. 12725: */ 12726: cur = newDoc->children->children; 12727: *list = cur; 12728: while (cur != NULL) { 12729: cur->parent = NULL; 12730: cur = cur->next; 12731: } 12732: newDoc->children->children = NULL; 12733: } 12734: ret = XML_ERR_OK; 12735: } 12736: 12737: /* 12738: * Record in the parent context the number of entities replacement 12739: * done when parsing that reference. 12740: */ 12741: if (oldctxt != NULL) 12742: oldctxt->nbentities += ctxt->nbentities; 12743: 12744: /* 12745: * Also record the size of the entity parsed 12746: */ 12747: if (ctxt->input != NULL) { 12748: oldctxt->sizeentities += ctxt->input->consumed; 12749: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12750: } 12751: /* 12752: * And record the last error if any 12753: */ 12754: if (ctxt->lastError.code != XML_ERR_OK) 12755: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12756: 12757: if (sax != NULL) 12758: ctxt->sax = oldsax; 12759: oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12760: oldctxt->node_seq.length = ctxt->node_seq.length; 12761: oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12762: ctxt->node_seq.maximum = 0; 12763: ctxt->node_seq.length = 0; 12764: ctxt->node_seq.buffer = NULL; 12765: xmlFreeParserCtxt(ctxt); 12766: newDoc->intSubset = NULL; 12767: newDoc->extSubset = NULL; 12768: xmlFreeDoc(newDoc); 12769: 12770: return(ret); 12771: } 12772: 12773: #ifdef LIBXML_SAX1_ENABLED 12774: /** 12775: * xmlParseExternalEntity: 12776: * @doc: the document the chunk pertains to 12777: * @sax: the SAX handler bloc (possibly NULL) 12778: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12779: * @depth: Used for loop detection, use 0 12780: * @URL: the URL for the entity to load 12781: * @ID: the System ID for the entity to load 12782: * @lst: the return value for the set of parsed nodes 12783: * 12784: * Parse an external general entity 12785: * An external general parsed entity is well-formed if it matches the 12786: * production labeled extParsedEnt. 12787: * 12788: * [78] extParsedEnt ::= TextDecl? content 12789: * 12790: * Returns 0 if the entity is well formed, -1 in case of args problem and 12791: * the parser error code otherwise 12792: */ 12793: 12794: int 12795: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12796: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12797: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12798: ID, lst)); 12799: } 12800: 12801: /** 12802: * xmlParseBalancedChunkMemory: 12803: * @doc: the document the chunk pertains to 12804: * @sax: the SAX handler bloc (possibly NULL) 12805: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12806: * @depth: Used for loop detection, use 0 12807: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12808: * @lst: the return value for the set of parsed nodes 12809: * 12810: * Parse a well-balanced chunk of an XML document 12811: * called by the parser 12812: * The allowed sequence for the Well Balanced Chunk is the one defined by 12813: * the content production in the XML grammar: 12814: * 12815: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12816: * 12817: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12818: * the parser error code otherwise 12819: */ 12820: 12821: int 12822: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12823: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12824: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12825: depth, string, lst, 0 ); 12826: } 12827: #endif /* LIBXML_SAX1_ENABLED */ 12828: 12829: /** 12830: * xmlParseBalancedChunkMemoryInternal: 12831: * @oldctxt: the existing parsing context 12832: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12833: * @user_data: the user data field for the parser context 12834: * @lst: the return value for the set of parsed nodes 12835: * 12836: * 12837: * Parse a well-balanced chunk of an XML document 12838: * called by the parser 12839: * The allowed sequence for the Well Balanced Chunk is the one defined by 12840: * the content production in the XML grammar: 12841: * 12842: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12843: * 12844: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12845: * error code otherwise 12846: * 12847: * In case recover is set to 1, the nodelist will not be empty even if 12848: * the parsed chunk is not well balanced. 12849: */ 12850: static xmlParserErrors 12851: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12852: const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12853: xmlParserCtxtPtr ctxt; 12854: xmlDocPtr newDoc = NULL; 12855: xmlNodePtr newRoot; 12856: xmlSAXHandlerPtr oldsax = NULL; 12857: xmlNodePtr content = NULL; 12858: xmlNodePtr last = NULL; 12859: int size; 12860: xmlParserErrors ret = XML_ERR_OK; 12861: #ifdef SAX2 12862: int i; 12863: #endif 12864: 12865: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12866: (oldctxt->depth > 1024)) { 12867: return(XML_ERR_ENTITY_LOOP); 12868: } 12869: 12870: 12871: if (lst != NULL) 12872: *lst = NULL; 12873: if (string == NULL) 12874: return(XML_ERR_INTERNAL_ERROR); 12875: 12876: size = xmlStrlen(string); 12877: 12878: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12879: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12880: if (user_data != NULL) 12881: ctxt->userData = user_data; 12882: else 12883: ctxt->userData = ctxt; 12884: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12885: ctxt->dict = oldctxt->dict; 12886: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12887: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12888: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12889: 12890: #ifdef SAX2 12891: /* propagate namespaces down the entity */ 12892: for (i = 0;i < oldctxt->nsNr;i += 2) { 12893: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 12894: } 12895: #endif 12896: 12897: oldsax = ctxt->sax; 12898: ctxt->sax = oldctxt->sax; 12899: xmlDetectSAX2(ctxt); 12900: ctxt->replaceEntities = oldctxt->replaceEntities; 12901: ctxt->options = oldctxt->options; 12902: 12903: ctxt->_private = oldctxt->_private; 12904: if (oldctxt->myDoc == NULL) { 12905: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12906: if (newDoc == NULL) { 12907: ctxt->sax = oldsax; 12908: ctxt->dict = NULL; 12909: xmlFreeParserCtxt(ctxt); 12910: return(XML_ERR_INTERNAL_ERROR); 12911: } 12912: newDoc->properties = XML_DOC_INTERNAL; 12913: newDoc->dict = ctxt->dict; 12914: xmlDictReference(newDoc->dict); 12915: ctxt->myDoc = newDoc; 12916: } else { 12917: ctxt->myDoc = oldctxt->myDoc; 12918: content = ctxt->myDoc->children; 12919: last = ctxt->myDoc->last; 12920: } 12921: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12922: if (newRoot == NULL) { 12923: ctxt->sax = oldsax; 12924: ctxt->dict = NULL; 12925: xmlFreeParserCtxt(ctxt); 12926: if (newDoc != NULL) { 12927: xmlFreeDoc(newDoc); 12928: } 12929: return(XML_ERR_INTERNAL_ERROR); 12930: } 12931: ctxt->myDoc->children = NULL; 12932: ctxt->myDoc->last = NULL; 12933: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12934: nodePush(ctxt, ctxt->myDoc->children); 12935: ctxt->instate = XML_PARSER_CONTENT; 12936: ctxt->depth = oldctxt->depth + 1; 12937: 12938: ctxt->validate = 0; 12939: ctxt->loadsubset = oldctxt->loadsubset; 12940: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12941: /* 12942: * ID/IDREF registration will be done in xmlValidateElement below 12943: */ 12944: ctxt->loadsubset |= XML_SKIP_IDS; 12945: } 12946: ctxt->dictNames = oldctxt->dictNames; 12947: ctxt->attsDefault = oldctxt->attsDefault; 12948: ctxt->attsSpecial = oldctxt->attsSpecial; 12949: 12950: xmlParseContent(ctxt); 12951: if ((RAW == '<') && (NXT(1) == '/')) { 12952: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12953: } else if (RAW != 0) { 12954: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12955: } 12956: if (ctxt->node != ctxt->myDoc->children) { 12957: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12958: } 12959: 12960: if (!ctxt->wellFormed) { 12961: if (ctxt->errNo == 0) 12962: ret = XML_ERR_INTERNAL_ERROR; 12963: else 12964: ret = (xmlParserErrors)ctxt->errNo; 12965: } else { 12966: ret = XML_ERR_OK; 12967: } 12968: 12969: if ((lst != NULL) && (ret == XML_ERR_OK)) { 12970: xmlNodePtr cur; 12971: 12972: /* 12973: * Return the newly created nodeset after unlinking it from 12974: * they pseudo parent. 12975: */ 12976: cur = ctxt->myDoc->children->children; 12977: *lst = cur; 12978: while (cur != NULL) { 12979: #ifdef LIBXML_VALID_ENABLED 12980: if ((oldctxt->validate) && (oldctxt->wellFormed) && 12981: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12982: (cur->type == XML_ELEMENT_NODE)) { 12983: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12984: oldctxt->myDoc, cur); 12985: } 12986: #endif /* LIBXML_VALID_ENABLED */ 12987: cur->parent = NULL; 12988: cur = cur->next; 12989: } 12990: ctxt->myDoc->children->children = NULL; 12991: } 12992: if (ctxt->myDoc != NULL) { 12993: xmlFreeNode(ctxt->myDoc->children); 12994: ctxt->myDoc->children = content; 12995: ctxt->myDoc->last = last; 12996: } 12997: 12998: /* 12999: * Record in the parent context the number of entities replacement 13000: * done when parsing that reference. 13001: */ 13002: if (oldctxt != NULL) 13003: oldctxt->nbentities += ctxt->nbentities; 13004: 13005: /* 13006: * Also record the last error if any 13007: */ 13008: if (ctxt->lastError.code != XML_ERR_OK) 13009: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13010: 13011: ctxt->sax = oldsax; 13012: ctxt->dict = NULL; 13013: ctxt->attsDefault = NULL; 13014: ctxt->attsSpecial = NULL; 13015: xmlFreeParserCtxt(ctxt); 13016: if (newDoc != NULL) { 13017: xmlFreeDoc(newDoc); 13018: } 13019: 13020: return(ret); 13021: } 13022: 13023: /** 13024: * xmlParseInNodeContext: 13025: * @node: the context node 13026: * @data: the input string 13027: * @datalen: the input string length in bytes 13028: * @options: a combination of xmlParserOption 13029: * @lst: the return value for the set of parsed nodes 13030: * 13031: * Parse a well-balanced chunk of an XML document 13032: * within the context (DTD, namespaces, etc ...) of the given node. 13033: * 13034: * The allowed sequence for the data is a Well Balanced Chunk defined by 13035: * the content production in the XML grammar: 13036: * 13037: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13038: * 13039: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13040: * error code otherwise 13041: */ 13042: xmlParserErrors 13043: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13044: int options, xmlNodePtr *lst) { 13045: #ifdef SAX2 13046: xmlParserCtxtPtr ctxt; 13047: xmlDocPtr doc = NULL; 13048: xmlNodePtr fake, cur; 13049: int nsnr = 0; 13050: 13051: xmlParserErrors ret = XML_ERR_OK; 13052: 13053: /* 13054: * check all input parameters, grab the document 13055: */ 13056: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13057: return(XML_ERR_INTERNAL_ERROR); 13058: switch (node->type) { 13059: case XML_ELEMENT_NODE: 13060: case XML_ATTRIBUTE_NODE: 13061: case XML_TEXT_NODE: 13062: case XML_CDATA_SECTION_NODE: 13063: case XML_ENTITY_REF_NODE: 13064: case XML_PI_NODE: 13065: case XML_COMMENT_NODE: 13066: case XML_DOCUMENT_NODE: 13067: case XML_HTML_DOCUMENT_NODE: 13068: break; 13069: default: 13070: return(XML_ERR_INTERNAL_ERROR); 13071: 13072: } 13073: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13074: (node->type != XML_DOCUMENT_NODE) && 13075: (node->type != XML_HTML_DOCUMENT_NODE)) 13076: node = node->parent; 13077: if (node == NULL) 13078: return(XML_ERR_INTERNAL_ERROR); 13079: if (node->type == XML_ELEMENT_NODE) 13080: doc = node->doc; 13081: else 13082: doc = (xmlDocPtr) node; 13083: if (doc == NULL) 13084: return(XML_ERR_INTERNAL_ERROR); 13085: 13086: /* 13087: * allocate a context and set-up everything not related to the 13088: * node position in the tree 13089: */ 13090: if (doc->type == XML_DOCUMENT_NODE) 13091: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13092: #ifdef LIBXML_HTML_ENABLED 13093: else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13094: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13095: /* 13096: * When parsing in context, it makes no sense to add implied 13097: * elements like html/body/etc... 13098: */ 13099: options |= HTML_PARSE_NOIMPLIED; 13100: } 13101: #endif 13102: else 13103: return(XML_ERR_INTERNAL_ERROR); 13104: 13105: if (ctxt == NULL) 13106: return(XML_ERR_NO_MEMORY); 13107: 13108: /* 13109: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13110: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13111: * we must wait until the last moment to free the original one. 13112: */ 13113: if (doc->dict != NULL) { 13114: if (ctxt->dict != NULL) 13115: xmlDictFree(ctxt->dict); 13116: ctxt->dict = doc->dict; 13117: } else 13118: options |= XML_PARSE_NODICT; 13119: 13120: if (doc->encoding != NULL) { 13121: xmlCharEncodingHandlerPtr hdlr; 13122: 13123: if (ctxt->encoding != NULL) 13124: xmlFree((xmlChar *) ctxt->encoding); 13125: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13126: 13127: hdlr = xmlFindCharEncodingHandler(doc->encoding); 13128: if (hdlr != NULL) { 13129: xmlSwitchToEncoding(ctxt, hdlr); 13130: } else { 13131: return(XML_ERR_UNSUPPORTED_ENCODING); 13132: } 13133: } 13134: 13135: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13136: xmlDetectSAX2(ctxt); 13137: ctxt->myDoc = doc; 13138: 13139: fake = xmlNewComment(NULL); 13140: if (fake == NULL) { 13141: xmlFreeParserCtxt(ctxt); 13142: return(XML_ERR_NO_MEMORY); 13143: } 13144: xmlAddChild(node, fake); 13145: 13146: if (node->type == XML_ELEMENT_NODE) { 13147: nodePush(ctxt, node); 13148: /* 13149: * initialize the SAX2 namespaces stack 13150: */ 13151: cur = node; 13152: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13153: xmlNsPtr ns = cur->nsDef; 13154: const xmlChar *iprefix, *ihref; 13155: 13156: while (ns != NULL) { 13157: if (ctxt->dict) { 13158: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13159: ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13160: } else { 13161: iprefix = ns->prefix; 13162: ihref = ns->href; 13163: } 13164: 13165: if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13166: nsPush(ctxt, iprefix, ihref); 13167: nsnr++; 13168: } 13169: ns = ns->next; 13170: } 13171: cur = cur->parent; 13172: } 13173: ctxt->instate = XML_PARSER_CONTENT; 13174: } 13175: 13176: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13177: /* 13178: * ID/IDREF registration will be done in xmlValidateElement below 13179: */ 13180: ctxt->loadsubset |= XML_SKIP_IDS; 13181: } 13182: 13183: #ifdef LIBXML_HTML_ENABLED 13184: if (doc->type == XML_HTML_DOCUMENT_NODE) 13185: __htmlParseContent(ctxt); 13186: else 13187: #endif 13188: xmlParseContent(ctxt); 13189: 13190: nsPop(ctxt, nsnr); 13191: if ((RAW == '<') && (NXT(1) == '/')) { 13192: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13193: } else if (RAW != 0) { 13194: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13195: } 13196: if ((ctxt->node != NULL) && (ctxt->node != node)) { 13197: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13198: ctxt->wellFormed = 0; 13199: } 13200: 13201: if (!ctxt->wellFormed) { 13202: if (ctxt->errNo == 0) 13203: ret = XML_ERR_INTERNAL_ERROR; 13204: else 13205: ret = (xmlParserErrors)ctxt->errNo; 13206: } else { 13207: ret = XML_ERR_OK; 13208: } 13209: 13210: /* 13211: * Return the newly created nodeset after unlinking it from 13212: * the pseudo sibling. 13213: */ 13214: 13215: cur = fake->next; 13216: fake->next = NULL; 13217: node->last = fake; 13218: 13219: if (cur != NULL) { 13220: cur->prev = NULL; 13221: } 13222: 13223: *lst = cur; 13224: 13225: while (cur != NULL) { 13226: cur->parent = NULL; 13227: cur = cur->next; 13228: } 13229: 13230: xmlUnlinkNode(fake); 13231: xmlFreeNode(fake); 13232: 13233: 13234: if (ret != XML_ERR_OK) { 13235: xmlFreeNodeList(*lst); 13236: *lst = NULL; 13237: } 13238: 13239: if (doc->dict != NULL) 13240: ctxt->dict = NULL; 13241: xmlFreeParserCtxt(ctxt); 13242: 13243: return(ret); 13244: #else /* !SAX2 */ 13245: return(XML_ERR_INTERNAL_ERROR); 13246: #endif 13247: } 13248: 13249: #ifdef LIBXML_SAX1_ENABLED 13250: /** 13251: * xmlParseBalancedChunkMemoryRecover: 13252: * @doc: the document the chunk pertains to 13253: * @sax: the SAX handler bloc (possibly NULL) 13254: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13255: * @depth: Used for loop detection, use 0 13256: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13257: * @lst: the return value for the set of parsed nodes 13258: * @recover: return nodes even if the data is broken (use 0) 13259: * 13260: * 13261: * Parse a well-balanced chunk of an XML document 13262: * called by the parser 13263: * The allowed sequence for the Well Balanced Chunk is the one defined by 13264: * the content production in the XML grammar: 13265: * 13266: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13267: * 13268: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13269: * the parser error code otherwise 13270: * 13271: * In case recover is set to 1, the nodelist will not be empty even if 13272: * the parsed chunk is not well balanced, assuming the parsing succeeded to 13273: * some extent. 13274: */ 13275: int 13276: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13277: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13278: int recover) { 13279: xmlParserCtxtPtr ctxt; 13280: xmlDocPtr newDoc; 13281: xmlSAXHandlerPtr oldsax = NULL; 13282: xmlNodePtr content, newRoot; 13283: int size; 13284: int ret = 0; 13285: 13286: if (depth > 40) { 13287: return(XML_ERR_ENTITY_LOOP); 13288: } 13289: 13290: 13291: if (lst != NULL) 13292: *lst = NULL; 13293: if (string == NULL) 13294: return(-1); 13295: 13296: size = xmlStrlen(string); 13297: 13298: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13299: if (ctxt == NULL) return(-1); 13300: ctxt->userData = ctxt; 13301: if (sax != NULL) { 13302: oldsax = ctxt->sax; 13303: ctxt->sax = sax; 13304: if (user_data != NULL) 13305: ctxt->userData = user_data; 13306: } 13307: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13308: if (newDoc == NULL) { 13309: xmlFreeParserCtxt(ctxt); 13310: return(-1); 13311: } 13312: newDoc->properties = XML_DOC_INTERNAL; 13313: if ((doc != NULL) && (doc->dict != NULL)) { 13314: xmlDictFree(ctxt->dict); 13315: ctxt->dict = doc->dict; 13316: xmlDictReference(ctxt->dict); 13317: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13318: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13319: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13320: ctxt->dictNames = 1; 13321: } else { 13322: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13323: } 13324: if (doc != NULL) { 13325: newDoc->intSubset = doc->intSubset; 13326: newDoc->extSubset = doc->extSubset; 13327: } 13328: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13329: if (newRoot == NULL) { 13330: if (sax != NULL) 13331: ctxt->sax = oldsax; 13332: xmlFreeParserCtxt(ctxt); 13333: newDoc->intSubset = NULL; 13334: newDoc->extSubset = NULL; 13335: xmlFreeDoc(newDoc); 13336: return(-1); 13337: } 13338: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13339: nodePush(ctxt, newRoot); 13340: if (doc == NULL) { 13341: ctxt->myDoc = newDoc; 13342: } else { 13343: ctxt->myDoc = newDoc; 13344: newDoc->children->doc = doc; 13345: /* Ensure that doc has XML spec namespace */ 13346: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13347: newDoc->oldNs = doc->oldNs; 13348: } 13349: ctxt->instate = XML_PARSER_CONTENT; 13350: ctxt->depth = depth; 13351: 13352: /* 13353: * Doing validity checking on chunk doesn't make sense 13354: */ 13355: ctxt->validate = 0; 13356: ctxt->loadsubset = 0; 13357: xmlDetectSAX2(ctxt); 13358: 13359: if ( doc != NULL ){ 13360: content = doc->children; 13361: doc->children = NULL; 13362: xmlParseContent(ctxt); 13363: doc->children = content; 13364: } 13365: else { 13366: xmlParseContent(ctxt); 13367: } 13368: if ((RAW == '<') && (NXT(1) == '/')) { 13369: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13370: } else if (RAW != 0) { 13371: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13372: } 13373: if (ctxt->node != newDoc->children) { 13374: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13375: } 13376: 13377: if (!ctxt->wellFormed) { 13378: if (ctxt->errNo == 0) 13379: ret = 1; 13380: else 13381: ret = ctxt->errNo; 13382: } else { 13383: ret = 0; 13384: } 13385: 13386: if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13387: xmlNodePtr cur; 13388: 13389: /* 13390: * Return the newly created nodeset after unlinking it from 13391: * they pseudo parent. 13392: */ 13393: cur = newDoc->children->children; 13394: *lst = cur; 13395: while (cur != NULL) { 13396: xmlSetTreeDoc(cur, doc); 13397: cur->parent = NULL; 13398: cur = cur->next; 13399: } 13400: newDoc->children->children = NULL; 13401: } 13402: 13403: if (sax != NULL) 13404: ctxt->sax = oldsax; 13405: xmlFreeParserCtxt(ctxt); 13406: newDoc->intSubset = NULL; 13407: newDoc->extSubset = NULL; 13408: newDoc->oldNs = NULL; 13409: xmlFreeDoc(newDoc); 13410: 13411: return(ret); 13412: } 13413: 13414: /** 13415: * xmlSAXParseEntity: 13416: * @sax: the SAX handler block 13417: * @filename: the filename 13418: * 13419: * parse an XML external entity out of context and build a tree. 13420: * It use the given SAX function block to handle the parsing callback. 13421: * If sax is NULL, fallback to the default DOM tree building routines. 13422: * 13423: * [78] extParsedEnt ::= TextDecl? content 13424: * 13425: * This correspond to a "Well Balanced" chunk 13426: * 13427: * Returns the resulting document tree 13428: */ 13429: 13430: xmlDocPtr 13431: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13432: xmlDocPtr ret; 13433: xmlParserCtxtPtr ctxt; 13434: 13435: ctxt = xmlCreateFileParserCtxt(filename); 13436: if (ctxt == NULL) { 13437: return(NULL); 13438: } 13439: if (sax != NULL) { 13440: if (ctxt->sax != NULL) 13441: xmlFree(ctxt->sax); 13442: ctxt->sax = sax; 13443: ctxt->userData = NULL; 13444: } 13445: 13446: xmlParseExtParsedEnt(ctxt); 13447: 13448: if (ctxt->wellFormed) 13449: ret = ctxt->myDoc; 13450: else { 13451: ret = NULL; 13452: xmlFreeDoc(ctxt->myDoc); 13453: ctxt->myDoc = NULL; 13454: } 13455: if (sax != NULL) 13456: ctxt->sax = NULL; 13457: xmlFreeParserCtxt(ctxt); 13458: 13459: return(ret); 13460: } 13461: 13462: /** 13463: * xmlParseEntity: 13464: * @filename: the filename 13465: * 13466: * parse an XML external entity out of context and build a tree. 13467: * 13468: * [78] extParsedEnt ::= TextDecl? content 13469: * 13470: * This correspond to a "Well Balanced" chunk 13471: * 13472: * Returns the resulting document tree 13473: */ 13474: 13475: xmlDocPtr 13476: xmlParseEntity(const char *filename) { 13477: return(xmlSAXParseEntity(NULL, filename)); 13478: } 13479: #endif /* LIBXML_SAX1_ENABLED */ 13480: 13481: /** 13482: * xmlCreateEntityParserCtxtInternal: 13483: * @URL: the entity URL 13484: * @ID: the entity PUBLIC ID 13485: * @base: a possible base for the target URI 13486: * @pctx: parser context used to set options on new context 13487: * 13488: * Create a parser context for an external entity 13489: * Automatic support for ZLIB/Compress compressed document is provided 13490: * by default if found at compile-time. 13491: * 13492: * Returns the new parser context or NULL 13493: */ 13494: static xmlParserCtxtPtr 13495: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13496: const xmlChar *base, xmlParserCtxtPtr pctx) { 13497: xmlParserCtxtPtr ctxt; 13498: xmlParserInputPtr inputStream; 13499: char *directory = NULL; 13500: xmlChar *uri; 13501: 13502: ctxt = xmlNewParserCtxt(); 13503: if (ctxt == NULL) { 13504: return(NULL); 13505: } 13506: 13507: if (pctx != NULL) { 13508: ctxt->options = pctx->options; 13509: ctxt->_private = pctx->_private; 13510: } 13511: 13512: uri = xmlBuildURI(URL, base); 13513: 13514: if (uri == NULL) { 13515: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13516: if (inputStream == NULL) { 13517: xmlFreeParserCtxt(ctxt); 13518: return(NULL); 13519: } 13520: 13521: inputPush(ctxt, inputStream); 13522: 13523: if ((ctxt->directory == NULL) && (directory == NULL)) 13524: directory = xmlParserGetDirectory((char *)URL); 13525: if ((ctxt->directory == NULL) && (directory != NULL)) 13526: ctxt->directory = directory; 13527: } else { 13528: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13529: if (inputStream == NULL) { 13530: xmlFree(uri); 13531: xmlFreeParserCtxt(ctxt); 13532: return(NULL); 13533: } 13534: 13535: inputPush(ctxt, inputStream); 13536: 13537: if ((ctxt->directory == NULL) && (directory == NULL)) 13538: directory = xmlParserGetDirectory((char *)uri); 13539: if ((ctxt->directory == NULL) && (directory != NULL)) 13540: ctxt->directory = directory; 13541: xmlFree(uri); 13542: } 13543: return(ctxt); 13544: } 13545: 13546: /** 13547: * xmlCreateEntityParserCtxt: 13548: * @URL: the entity URL 13549: * @ID: the entity PUBLIC ID 13550: * @base: a possible base for the target URI 13551: * 13552: * Create a parser context for an external entity 13553: * Automatic support for ZLIB/Compress compressed document is provided 13554: * by default if found at compile-time. 13555: * 13556: * Returns the new parser context or NULL 13557: */ 13558: xmlParserCtxtPtr 13559: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13560: const xmlChar *base) { 13561: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13562: 13563: } 13564: 13565: /************************************************************************ 13566: * * 13567: * Front ends when parsing from a file * 13568: * * 13569: ************************************************************************/ 13570: 13571: /** 13572: * xmlCreateURLParserCtxt: 13573: * @filename: the filename or URL 13574: * @options: a combination of xmlParserOption 13575: * 13576: * Create a parser context for a file or URL content. 13577: * Automatic support for ZLIB/Compress compressed document is provided 13578: * by default if found at compile-time and for file accesses 13579: * 13580: * Returns the new parser context or NULL 13581: */ 13582: xmlParserCtxtPtr 13583: xmlCreateURLParserCtxt(const char *filename, int options) 13584: { 13585: xmlParserCtxtPtr ctxt; 13586: xmlParserInputPtr inputStream; 13587: char *directory = NULL; 13588: 13589: ctxt = xmlNewParserCtxt(); 13590: if (ctxt == NULL) { 13591: xmlErrMemory(NULL, "cannot allocate parser context"); 13592: return(NULL); 13593: } 13594: 13595: if (options) 13596: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13597: ctxt->linenumbers = 1; 13598: 13599: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13600: if (inputStream == NULL) { 13601: xmlFreeParserCtxt(ctxt); 13602: return(NULL); 13603: } 13604: 13605: inputPush(ctxt, inputStream); 13606: if ((ctxt->directory == NULL) && (directory == NULL)) 13607: directory = xmlParserGetDirectory(filename); 13608: if ((ctxt->directory == NULL) && (directory != NULL)) 13609: ctxt->directory = directory; 13610: 13611: return(ctxt); 13612: } 13613: 13614: /** 13615: * xmlCreateFileParserCtxt: 13616: * @filename: the filename 13617: * 13618: * Create a parser context for a file content. 13619: * Automatic support for ZLIB/Compress compressed document is provided 13620: * by default if found at compile-time. 13621: * 13622: * Returns the new parser context or NULL 13623: */ 13624: xmlParserCtxtPtr 13625: xmlCreateFileParserCtxt(const char *filename) 13626: { 13627: return(xmlCreateURLParserCtxt(filename, 0)); 13628: } 13629: 13630: #ifdef LIBXML_SAX1_ENABLED 13631: /** 13632: * xmlSAXParseFileWithData: 13633: * @sax: the SAX handler block 13634: * @filename: the filename 13635: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13636: * documents 13637: * @data: the userdata 13638: * 13639: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13640: * compressed document is provided by default if found at compile-time. 13641: * It use the given SAX function block to handle the parsing callback. 13642: * If sax is NULL, fallback to the default DOM tree building routines. 13643: * 13644: * User data (void *) is stored within the parser context in the 13645: * context's _private member, so it is available nearly everywhere in libxml 13646: * 13647: * Returns the resulting document tree 13648: */ 13649: 13650: xmlDocPtr 13651: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13652: int recovery, void *data) { 13653: xmlDocPtr ret; 13654: xmlParserCtxtPtr ctxt; 13655: 13656: xmlInitParser(); 13657: 13658: ctxt = xmlCreateFileParserCtxt(filename); 13659: if (ctxt == NULL) { 13660: return(NULL); 13661: } 13662: if (sax != NULL) { 13663: if (ctxt->sax != NULL) 13664: xmlFree(ctxt->sax); 13665: ctxt->sax = sax; 13666: } 13667: xmlDetectSAX2(ctxt); 13668: if (data!=NULL) { 13669: ctxt->_private = data; 13670: } 13671: 13672: if (ctxt->directory == NULL) 13673: ctxt->directory = xmlParserGetDirectory(filename); 13674: 13675: ctxt->recovery = recovery; 13676: 13677: xmlParseDocument(ctxt); 13678: 13679: if ((ctxt->wellFormed) || recovery) { 13680: ret = ctxt->myDoc; 13681: if (ret != NULL) { 13682: if (ctxt->input->buf->compressed > 0) 13683: ret->compression = 9; 13684: else 13685: ret->compression = ctxt->input->buf->compressed; 13686: } 13687: } 13688: else { 13689: ret = NULL; 13690: xmlFreeDoc(ctxt->myDoc); 13691: ctxt->myDoc = NULL; 13692: } 13693: if (sax != NULL) 13694: ctxt->sax = NULL; 13695: xmlFreeParserCtxt(ctxt); 13696: 13697: return(ret); 13698: } 13699: 13700: /** 13701: * xmlSAXParseFile: 13702: * @sax: the SAX handler block 13703: * @filename: the filename 13704: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13705: * documents 13706: * 13707: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13708: * compressed document is provided by default if found at compile-time. 13709: * It use the given SAX function block to handle the parsing callback. 13710: * If sax is NULL, fallback to the default DOM tree building routines. 13711: * 13712: * Returns the resulting document tree 13713: */ 13714: 13715: xmlDocPtr 13716: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13717: int recovery) { 13718: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13719: } 13720: 13721: /** 13722: * xmlRecoverDoc: 13723: * @cur: a pointer to an array of xmlChar 13724: * 13725: * parse an XML in-memory document and build a tree. 13726: * In the case the document is not Well Formed, a attempt to build a 13727: * tree is tried anyway 13728: * 13729: * Returns the resulting document tree or NULL in case of failure 13730: */ 13731: 13732: xmlDocPtr 13733: xmlRecoverDoc(const xmlChar *cur) { 13734: return(xmlSAXParseDoc(NULL, cur, 1)); 13735: } 13736: 13737: /** 13738: * xmlParseFile: 13739: * @filename: the filename 13740: * 13741: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13742: * compressed document is provided by default if found at compile-time. 13743: * 13744: * Returns the resulting document tree if the file was wellformed, 13745: * NULL otherwise. 13746: */ 13747: 13748: xmlDocPtr 13749: xmlParseFile(const char *filename) { 13750: return(xmlSAXParseFile(NULL, filename, 0)); 13751: } 13752: 13753: /** 13754: * xmlRecoverFile: 13755: * @filename: the filename 13756: * 13757: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13758: * compressed document is provided by default if found at compile-time. 13759: * In the case the document is not Well Formed, it attempts to build 13760: * a tree anyway 13761: * 13762: * Returns the resulting document tree or NULL in case of failure 13763: */ 13764: 13765: xmlDocPtr 13766: xmlRecoverFile(const char *filename) { 13767: return(xmlSAXParseFile(NULL, filename, 1)); 13768: } 13769: 13770: 13771: /** 13772: * xmlSetupParserForBuffer: 13773: * @ctxt: an XML parser context 13774: * @buffer: a xmlChar * buffer 13775: * @filename: a file name 13776: * 13777: * Setup the parser context to parse a new buffer; Clears any prior 13778: * contents from the parser context. The buffer parameter must not be 13779: * NULL, but the filename parameter can be 13780: */ 13781: void 13782: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13783: const char* filename) 13784: { 13785: xmlParserInputPtr input; 13786: 13787: if ((ctxt == NULL) || (buffer == NULL)) 13788: return; 13789: 13790: input = xmlNewInputStream(ctxt); 13791: if (input == NULL) { 13792: xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13793: xmlClearParserCtxt(ctxt); 13794: return; 13795: } 13796: 13797: xmlClearParserCtxt(ctxt); 13798: if (filename != NULL) 13799: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13800: input->base = buffer; 13801: input->cur = buffer; 13802: input->end = &buffer[xmlStrlen(buffer)]; 13803: inputPush(ctxt, input); 13804: } 13805: 13806: /** 13807: * xmlSAXUserParseFile: 13808: * @sax: a SAX handler 13809: * @user_data: The user data returned on SAX callbacks 13810: * @filename: a file name 13811: * 13812: * parse an XML file and call the given SAX handler routines. 13813: * Automatic support for ZLIB/Compress compressed document is provided 13814: * 13815: * Returns 0 in case of success or a error number otherwise 13816: */ 13817: int 13818: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13819: const char *filename) { 13820: int ret = 0; 13821: xmlParserCtxtPtr ctxt; 13822: 13823: ctxt = xmlCreateFileParserCtxt(filename); 13824: if (ctxt == NULL) return -1; 13825: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13826: xmlFree(ctxt->sax); 13827: ctxt->sax = sax; 13828: xmlDetectSAX2(ctxt); 13829: 13830: if (user_data != NULL) 13831: ctxt->userData = user_data; 13832: 13833: xmlParseDocument(ctxt); 13834: 13835: if (ctxt->wellFormed) 13836: ret = 0; 13837: else { 13838: if (ctxt->errNo != 0) 13839: ret = ctxt->errNo; 13840: else 13841: ret = -1; 13842: } 13843: if (sax != NULL) 13844: ctxt->sax = NULL; 13845: if (ctxt->myDoc != NULL) { 13846: xmlFreeDoc(ctxt->myDoc); 13847: ctxt->myDoc = NULL; 13848: } 13849: xmlFreeParserCtxt(ctxt); 13850: 13851: return ret; 13852: } 13853: #endif /* LIBXML_SAX1_ENABLED */ 13854: 13855: /************************************************************************ 13856: * * 13857: * Front ends when parsing from memory * 13858: * * 13859: ************************************************************************/ 13860: 13861: /** 13862: * xmlCreateMemoryParserCtxt: 13863: * @buffer: a pointer to a char array 13864: * @size: the size of the array 13865: * 13866: * Create a parser context for an XML in-memory document. 13867: * 13868: * Returns the new parser context or NULL 13869: */ 13870: xmlParserCtxtPtr 13871: xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13872: xmlParserCtxtPtr ctxt; 13873: xmlParserInputPtr input; 13874: xmlParserInputBufferPtr buf; 13875: 13876: if (buffer == NULL) 13877: return(NULL); 13878: if (size <= 0) 13879: return(NULL); 13880: 13881: ctxt = xmlNewParserCtxt(); 13882: if (ctxt == NULL) 13883: return(NULL); 13884: 13885: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13886: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13887: if (buf == NULL) { 13888: xmlFreeParserCtxt(ctxt); 13889: return(NULL); 13890: } 13891: 13892: input = xmlNewInputStream(ctxt); 13893: if (input == NULL) { 13894: xmlFreeParserInputBuffer(buf); 13895: xmlFreeParserCtxt(ctxt); 13896: return(NULL); 13897: } 13898: 13899: input->filename = NULL; 13900: input->buf = buf; 13901: input->base = input->buf->buffer->content; 13902: input->cur = input->buf->buffer->content; 13903: input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13904: 13905: inputPush(ctxt, input); 13906: return(ctxt); 13907: } 13908: 13909: #ifdef LIBXML_SAX1_ENABLED 13910: /** 13911: * xmlSAXParseMemoryWithData: 13912: * @sax: the SAX handler block 13913: * @buffer: an pointer to a char array 13914: * @size: the size of the array 13915: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13916: * documents 13917: * @data: the userdata 13918: * 13919: * parse an XML in-memory block and use the given SAX function block 13920: * to handle the parsing callback. If sax is NULL, fallback to the default 13921: * DOM tree building routines. 13922: * 13923: * User data (void *) is stored within the parser context in the 13924: * context's _private member, so it is available nearly everywhere in libxml 13925: * 13926: * Returns the resulting document tree 13927: */ 13928: 13929: xmlDocPtr 13930: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13931: int size, int recovery, void *data) { 13932: xmlDocPtr ret; 13933: xmlParserCtxtPtr ctxt; 13934: 13935: xmlInitParser(); 13936: 13937: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13938: if (ctxt == NULL) return(NULL); 13939: if (sax != NULL) { 13940: if (ctxt->sax != NULL) 13941: xmlFree(ctxt->sax); 13942: ctxt->sax = sax; 13943: } 13944: xmlDetectSAX2(ctxt); 13945: if (data!=NULL) { 13946: ctxt->_private=data; 13947: } 13948: 13949: ctxt->recovery = recovery; 13950: 13951: xmlParseDocument(ctxt); 13952: 13953: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13954: else { 13955: ret = NULL; 13956: xmlFreeDoc(ctxt->myDoc); 13957: ctxt->myDoc = NULL; 13958: } 13959: if (sax != NULL) 13960: ctxt->sax = NULL; 13961: xmlFreeParserCtxt(ctxt); 13962: 13963: return(ret); 13964: } 13965: 13966: /** 13967: * xmlSAXParseMemory: 13968: * @sax: the SAX handler block 13969: * @buffer: an pointer to a char array 13970: * @size: the size of the array 13971: * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13972: * documents 13973: * 13974: * parse an XML in-memory block and use the given SAX function block 13975: * to handle the parsing callback. If sax is NULL, fallback to the default 13976: * DOM tree building routines. 13977: * 13978: * Returns the resulting document tree 13979: */ 13980: xmlDocPtr 13981: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13982: int size, int recovery) { 13983: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13984: } 13985: 13986: /** 13987: * xmlParseMemory: 13988: * @buffer: an pointer to a char array 13989: * @size: the size of the array 13990: * 13991: * parse an XML in-memory block and build a tree. 13992: * 13993: * Returns the resulting document tree 13994: */ 13995: 13996: xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13997: return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13998: } 13999: 14000: /** 14001: * xmlRecoverMemory: 14002: * @buffer: an pointer to a char array 14003: * @size: the size of the array 14004: * 14005: * parse an XML in-memory block and build a tree. 14006: * In the case the document is not Well Formed, an attempt to 14007: * build a tree is tried anyway 14008: * 14009: * Returns the resulting document tree or NULL in case of error 14010: */ 14011: 14012: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14013: return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14014: } 14015: 14016: /** 14017: * xmlSAXUserParseMemory: 14018: * @sax: a SAX handler 14019: * @user_data: The user data returned on SAX callbacks 14020: * @buffer: an in-memory XML document input 14021: * @size: the length of the XML document in bytes 14022: * 14023: * A better SAX parsing routine. 14024: * parse an XML in-memory buffer and call the given SAX handler routines. 14025: * 14026: * Returns 0 in case of success or a error number otherwise 14027: */ 14028: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14029: const char *buffer, int size) { 14030: int ret = 0; 14031: xmlParserCtxtPtr ctxt; 14032: 14033: xmlInitParser(); 14034: 14035: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14036: if (ctxt == NULL) return -1; 14037: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14038: xmlFree(ctxt->sax); 14039: ctxt->sax = sax; 14040: xmlDetectSAX2(ctxt); 14041: 14042: if (user_data != NULL) 14043: ctxt->userData = user_data; 14044: 14045: xmlParseDocument(ctxt); 14046: 14047: if (ctxt->wellFormed) 14048: ret = 0; 14049: else { 14050: if (ctxt->errNo != 0) 14051: ret = ctxt->errNo; 14052: else 14053: ret = -1; 14054: } 14055: if (sax != NULL) 14056: ctxt->sax = NULL; 14057: if (ctxt->myDoc != NULL) { 14058: xmlFreeDoc(ctxt->myDoc); 14059: ctxt->myDoc = NULL; 14060: } 14061: xmlFreeParserCtxt(ctxt); 14062: 14063: return ret; 14064: } 14065: #endif /* LIBXML_SAX1_ENABLED */ 14066: 14067: /** 14068: * xmlCreateDocParserCtxt: 14069: * @cur: a pointer to an array of xmlChar 14070: * 14071: * Creates a parser context for an XML in-memory document. 14072: * 14073: * Returns the new parser context or NULL 14074: */ 14075: xmlParserCtxtPtr 14076: xmlCreateDocParserCtxt(const xmlChar *cur) { 14077: int len; 14078: 14079: if (cur == NULL) 14080: return(NULL); 14081: len = xmlStrlen(cur); 14082: return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14083: } 14084: 14085: #ifdef LIBXML_SAX1_ENABLED 14086: /** 14087: * xmlSAXParseDoc: 14088: * @sax: the SAX handler block 14089: * @cur: a pointer to an array of xmlChar 14090: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14091: * documents 14092: * 14093: * parse an XML in-memory document and build a tree. 14094: * It use the given SAX function block to handle the parsing callback. 14095: * If sax is NULL, fallback to the default DOM tree building routines. 14096: * 14097: * Returns the resulting document tree 14098: */ 14099: 14100: xmlDocPtr 14101: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14102: xmlDocPtr ret; 14103: xmlParserCtxtPtr ctxt; 14104: xmlSAXHandlerPtr oldsax = NULL; 14105: 14106: if (cur == NULL) return(NULL); 14107: 14108: 14109: ctxt = xmlCreateDocParserCtxt(cur); 14110: if (ctxt == NULL) return(NULL); 14111: if (sax != NULL) { 14112: oldsax = ctxt->sax; 14113: ctxt->sax = sax; 14114: ctxt->userData = NULL; 14115: } 14116: xmlDetectSAX2(ctxt); 14117: 14118: xmlParseDocument(ctxt); 14119: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14120: else { 14121: ret = NULL; 14122: xmlFreeDoc(ctxt->myDoc); 14123: ctxt->myDoc = NULL; 14124: } 14125: if (sax != NULL) 14126: ctxt->sax = oldsax; 14127: xmlFreeParserCtxt(ctxt); 14128: 14129: return(ret); 14130: } 14131: 14132: /** 14133: * xmlParseDoc: 14134: * @cur: a pointer to an array of xmlChar 14135: * 14136: * parse an XML in-memory document and build a tree. 14137: * 14138: * Returns the resulting document tree 14139: */ 14140: 14141: xmlDocPtr 14142: xmlParseDoc(const xmlChar *cur) { 14143: return(xmlSAXParseDoc(NULL, cur, 0)); 14144: } 14145: #endif /* LIBXML_SAX1_ENABLED */ 14146: 14147: #ifdef LIBXML_LEGACY_ENABLED 14148: /************************************************************************ 14149: * * 14150: * Specific function to keep track of entities references * 14151: * and used by the XSLT debugger * 14152: * * 14153: ************************************************************************/ 14154: 14155: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14156: 14157: /** 14158: * xmlAddEntityReference: 14159: * @ent : A valid entity 14160: * @firstNode : A valid first node for children of entity 14161: * @lastNode : A valid last node of children entity 14162: * 14163: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14164: */ 14165: static void 14166: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14167: xmlNodePtr lastNode) 14168: { 14169: if (xmlEntityRefFunc != NULL) { 14170: (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14171: } 14172: } 14173: 14174: 14175: /** 14176: * xmlSetEntityReferenceFunc: 14177: * @func: A valid function 14178: * 14179: * Set the function to call call back when a xml reference has been made 14180: */ 14181: void 14182: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14183: { 14184: xmlEntityRefFunc = func; 14185: } 14186: #endif /* LIBXML_LEGACY_ENABLED */ 14187: 14188: /************************************************************************ 14189: * * 14190: * Miscellaneous * 14191: * * 14192: ************************************************************************/ 14193: 14194: #ifdef LIBXML_XPATH_ENABLED 14195: #include <libxml/xpath.h> 14196: #endif 14197: 14198: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14199: static int xmlParserInitialized = 0; 14200: 14201: /** 14202: * xmlInitParser: 14203: * 14204: * Initialization function for the XML parser. 14205: * This is not reentrant. Call once before processing in case of 14206: * use in multithreaded programs. 14207: */ 14208: 14209: void 14210: xmlInitParser(void) { 14211: if (xmlParserInitialized != 0) 14212: return; 14213: 14214: #ifdef LIBXML_THREAD_ENABLED 14215: __xmlGlobalInitMutexLock(); 14216: if (xmlParserInitialized == 0) { 14217: #endif 14218: xmlInitThreads(); 14219: xmlInitGlobals(); 14220: if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14221: (xmlGenericError == NULL)) 14222: initGenericErrorDefaultFunc(NULL); 14223: xmlInitMemory(); 14224: xmlInitializeDict(); 14225: xmlInitCharEncodingHandlers(); 14226: xmlDefaultSAXHandlerInit(); 14227: xmlRegisterDefaultInputCallbacks(); 14228: #ifdef LIBXML_OUTPUT_ENABLED 14229: xmlRegisterDefaultOutputCallbacks(); 14230: #endif /* LIBXML_OUTPUT_ENABLED */ 14231: #ifdef LIBXML_HTML_ENABLED 14232: htmlInitAutoClose(); 14233: htmlDefaultSAXHandlerInit(); 14234: #endif 14235: #ifdef LIBXML_XPATH_ENABLED 14236: xmlXPathInit(); 14237: #endif 14238: xmlParserInitialized = 1; 14239: #ifdef LIBXML_THREAD_ENABLED 14240: } 14241: __xmlGlobalInitMutexUnlock(); 14242: #endif 14243: } 14244: 14245: /** 14246: * xmlCleanupParser: 14247: * 14248: * This function name is somewhat misleading. It does not clean up 14249: * parser state, it cleans up memory allocated by the library itself. 14250: * It is a cleanup function for the XML library. It tries to reclaim all 14251: * related global memory allocated for the library processing. 14252: * It doesn't deallocate any document related memory. One should 14253: * call xmlCleanupParser() only when the process has finished using 14254: * the library and all XML/HTML documents built with it. 14255: * See also xmlInitParser() which has the opposite function of preparing 14256: * the library for operations. 14257: * 14258: * WARNING: if your application is multithreaded or has plugin support 14259: * calling this may crash the application if another thread or 14260: * a plugin is still using libxml2. It's sometimes very hard to 14261: * guess if libxml2 is in use in the application, some libraries 14262: * or plugins may use it without notice. In case of doubt abstain 14263: * from calling this function or do it just before calling exit() 14264: * to avoid leak reports from valgrind ! 14265: */ 14266: 14267: void 14268: xmlCleanupParser(void) { 14269: if (!xmlParserInitialized) 14270: return; 14271: 14272: xmlCleanupCharEncodingHandlers(); 14273: #ifdef LIBXML_CATALOG_ENABLED 14274: xmlCatalogCleanup(); 14275: #endif 14276: xmlDictCleanup(); 14277: xmlCleanupInputCallbacks(); 14278: #ifdef LIBXML_OUTPUT_ENABLED 14279: xmlCleanupOutputCallbacks(); 14280: #endif 14281: #ifdef LIBXML_SCHEMAS_ENABLED 14282: xmlSchemaCleanupTypes(); 14283: xmlRelaxNGCleanupTypes(); 14284: #endif 14285: xmlCleanupGlobals(); 14286: xmlResetLastError(); 14287: xmlCleanupThreads(); /* must be last if called not from the main thread */ 14288: xmlCleanupMemory(); 14289: xmlParserInitialized = 0; 14290: } 14291: 14292: /************************************************************************ 14293: * * 14294: * New set (2.6.0) of simpler and more flexible APIs * 14295: * * 14296: ************************************************************************/ 14297: 14298: /** 14299: * DICT_FREE: 14300: * @str: a string 14301: * 14302: * Free a string if it is not owned by the "dict" dictionnary in the 14303: * current scope 14304: */ 14305: #define DICT_FREE(str) \ 14306: if ((str) && ((!dict) || \ 14307: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14308: xmlFree((char *)(str)); 14309: 14310: /** 14311: * xmlCtxtReset: 14312: * @ctxt: an XML parser context 14313: * 14314: * Reset a parser context 14315: */ 14316: void 14317: xmlCtxtReset(xmlParserCtxtPtr ctxt) 14318: { 14319: xmlParserInputPtr input; 14320: xmlDictPtr dict; 14321: 14322: if (ctxt == NULL) 14323: return; 14324: 14325: dict = ctxt->dict; 14326: 14327: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14328: xmlFreeInputStream(input); 14329: } 14330: ctxt->inputNr = 0; 14331: ctxt->input = NULL; 14332: 14333: ctxt->spaceNr = 0; 14334: if (ctxt->spaceTab != NULL) { 14335: ctxt->spaceTab[0] = -1; 14336: ctxt->space = &ctxt->spaceTab[0]; 14337: } else { 14338: ctxt->space = NULL; 14339: } 14340: 14341: 14342: ctxt->nodeNr = 0; 14343: ctxt->node = NULL; 14344: 14345: ctxt->nameNr = 0; 14346: ctxt->name = NULL; 14347: 14348: DICT_FREE(ctxt->version); 14349: ctxt->version = NULL; 14350: DICT_FREE(ctxt->encoding); 14351: ctxt->encoding = NULL; 14352: DICT_FREE(ctxt->directory); 14353: ctxt->directory = NULL; 14354: DICT_FREE(ctxt->extSubURI); 14355: ctxt->extSubURI = NULL; 14356: DICT_FREE(ctxt->extSubSystem); 14357: ctxt->extSubSystem = NULL; 14358: if (ctxt->myDoc != NULL) 14359: xmlFreeDoc(ctxt->myDoc); 14360: ctxt->myDoc = NULL; 14361: 14362: ctxt->standalone = -1; 14363: ctxt->hasExternalSubset = 0; 14364: ctxt->hasPErefs = 0; 14365: ctxt->html = 0; 14366: ctxt->external = 0; 14367: ctxt->instate = XML_PARSER_START; 14368: ctxt->token = 0; 14369: 14370: ctxt->wellFormed = 1; 14371: ctxt->nsWellFormed = 1; 14372: ctxt->disableSAX = 0; 14373: ctxt->valid = 1; 14374: #if 0 14375: ctxt->vctxt.userData = ctxt; 14376: ctxt->vctxt.error = xmlParserValidityError; 14377: ctxt->vctxt.warning = xmlParserValidityWarning; 14378: #endif 14379: ctxt->record_info = 0; 14380: ctxt->nbChars = 0; 14381: ctxt->checkIndex = 0; 14382: ctxt->inSubset = 0; 14383: ctxt->errNo = XML_ERR_OK; 14384: ctxt->depth = 0; 14385: ctxt->charset = XML_CHAR_ENCODING_UTF8; 14386: ctxt->catalogs = NULL; 14387: ctxt->nbentities = 0; 14388: ctxt->sizeentities = 0; 14389: ctxt->sizeentcopy = 0; 14390: xmlInitNodeInfoSeq(&ctxt->node_seq); 14391: 14392: if (ctxt->attsDefault != NULL) { 14393: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14394: ctxt->attsDefault = NULL; 14395: } 14396: if (ctxt->attsSpecial != NULL) { 14397: xmlHashFree(ctxt->attsSpecial, NULL); 14398: ctxt->attsSpecial = NULL; 14399: } 14400: 14401: #ifdef LIBXML_CATALOG_ENABLED 14402: if (ctxt->catalogs != NULL) 14403: xmlCatalogFreeLocal(ctxt->catalogs); 14404: #endif 14405: if (ctxt->lastError.code != XML_ERR_OK) 14406: xmlResetError(&ctxt->lastError); 14407: } 14408: 14409: /** 14410: * xmlCtxtResetPush: 14411: * @ctxt: an XML parser context 14412: * @chunk: a pointer to an array of chars 14413: * @size: number of chars in the array 14414: * @filename: an optional file name or URI 14415: * @encoding: the document encoding, or NULL 14416: * 14417: * Reset a push parser context 14418: * 14419: * Returns 0 in case of success and 1 in case of error 14420: */ 14421: int 14422: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14423: int size, const char *filename, const char *encoding) 14424: { 14425: xmlParserInputPtr inputStream; 14426: xmlParserInputBufferPtr buf; 14427: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14428: 14429: if (ctxt == NULL) 14430: return(1); 14431: 14432: if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14433: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14434: 14435: buf = xmlAllocParserInputBuffer(enc); 14436: if (buf == NULL) 14437: return(1); 14438: 14439: if (ctxt == NULL) { 14440: xmlFreeParserInputBuffer(buf); 14441: return(1); 14442: } 14443: 14444: xmlCtxtReset(ctxt); 14445: 14446: if (ctxt->pushTab == NULL) { 14447: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14448: sizeof(xmlChar *)); 14449: if (ctxt->pushTab == NULL) { 14450: xmlErrMemory(ctxt, NULL); 14451: xmlFreeParserInputBuffer(buf); 14452: return(1); 14453: } 14454: } 14455: 14456: if (filename == NULL) { 14457: ctxt->directory = NULL; 14458: } else { 14459: ctxt->directory = xmlParserGetDirectory(filename); 14460: } 14461: 14462: inputStream = xmlNewInputStream(ctxt); 14463: if (inputStream == NULL) { 14464: xmlFreeParserInputBuffer(buf); 14465: return(1); 14466: } 14467: 14468: if (filename == NULL) 14469: inputStream->filename = NULL; 14470: else 14471: inputStream->filename = (char *) 14472: xmlCanonicPath((const xmlChar *) filename); 14473: inputStream->buf = buf; 14474: inputStream->base = inputStream->buf->buffer->content; 14475: inputStream->cur = inputStream->buf->buffer->content; 14476: inputStream->end = 14477: &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14478: 14479: inputPush(ctxt, inputStream); 14480: 14481: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14482: (ctxt->input->buf != NULL)) { 14483: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14484: int cur = ctxt->input->cur - ctxt->input->base; 14485: 14486: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14487: 14488: ctxt->input->base = ctxt->input->buf->buffer->content + base; 14489: ctxt->input->cur = ctxt->input->base + cur; 14490: ctxt->input->end = 14491: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14492: use]; 14493: #ifdef DEBUG_PUSH 14494: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14495: #endif 14496: } 14497: 14498: if (encoding != NULL) { 14499: xmlCharEncodingHandlerPtr hdlr; 14500: 14501: if (ctxt->encoding != NULL) 14502: xmlFree((xmlChar *) ctxt->encoding); 14503: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14504: 14505: hdlr = xmlFindCharEncodingHandler(encoding); 14506: if (hdlr != NULL) { 14507: xmlSwitchToEncoding(ctxt, hdlr); 14508: } else { 14509: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14510: "Unsupported encoding %s\n", BAD_CAST encoding); 14511: } 14512: } else if (enc != XML_CHAR_ENCODING_NONE) { 14513: xmlSwitchEncoding(ctxt, enc); 14514: } 14515: 14516: return(0); 14517: } 14518: 14519: 14520: /** 14521: * xmlCtxtUseOptionsInternal: 14522: * @ctxt: an XML parser context 14523: * @options: a combination of xmlParserOption 14524: * @encoding: the user provided encoding to use 14525: * 14526: * Applies the options to the parser context 14527: * 14528: * Returns 0 in case of success, the set of unknown or unimplemented options 14529: * in case of error. 14530: */ 14531: static int 14532: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14533: { 14534: if (ctxt == NULL) 14535: return(-1); 14536: if (encoding != NULL) { 14537: if (ctxt->encoding != NULL) 14538: xmlFree((xmlChar *) ctxt->encoding); 14539: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14540: } 14541: if (options & XML_PARSE_RECOVER) { 14542: ctxt->recovery = 1; 14543: options -= XML_PARSE_RECOVER; 14544: ctxt->options |= XML_PARSE_RECOVER; 14545: } else 14546: ctxt->recovery = 0; 14547: if (options & XML_PARSE_DTDLOAD) { 14548: ctxt->loadsubset = XML_DETECT_IDS; 14549: options -= XML_PARSE_DTDLOAD; 14550: ctxt->options |= XML_PARSE_DTDLOAD; 14551: } else 14552: ctxt->loadsubset = 0; 14553: if (options & XML_PARSE_DTDATTR) { 14554: ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14555: options -= XML_PARSE_DTDATTR; 14556: ctxt->options |= XML_PARSE_DTDATTR; 14557: } 14558: if (options & XML_PARSE_NOENT) { 14559: ctxt->replaceEntities = 1; 14560: /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14561: options -= XML_PARSE_NOENT; 14562: ctxt->options |= XML_PARSE_NOENT; 14563: } else 14564: ctxt->replaceEntities = 0; 14565: if (options & XML_PARSE_PEDANTIC) { 14566: ctxt->pedantic = 1; 14567: options -= XML_PARSE_PEDANTIC; 14568: ctxt->options |= XML_PARSE_PEDANTIC; 14569: } else 14570: ctxt->pedantic = 0; 14571: if (options & XML_PARSE_NOBLANKS) { 14572: ctxt->keepBlanks = 0; 14573: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14574: options -= XML_PARSE_NOBLANKS; 14575: ctxt->options |= XML_PARSE_NOBLANKS; 14576: } else 14577: ctxt->keepBlanks = 1; 14578: if (options & XML_PARSE_DTDVALID) { 14579: ctxt->validate = 1; 14580: if (options & XML_PARSE_NOWARNING) 14581: ctxt->vctxt.warning = NULL; 14582: if (options & XML_PARSE_NOERROR) 14583: ctxt->vctxt.error = NULL; 14584: options -= XML_PARSE_DTDVALID; 14585: ctxt->options |= XML_PARSE_DTDVALID; 14586: } else 14587: ctxt->validate = 0; 14588: if (options & XML_PARSE_NOWARNING) { 14589: ctxt->sax->warning = NULL; 14590: options -= XML_PARSE_NOWARNING; 14591: } 14592: if (options & XML_PARSE_NOERROR) { 14593: ctxt->sax->error = NULL; 14594: ctxt->sax->fatalError = NULL; 14595: options -= XML_PARSE_NOERROR; 14596: } 14597: #ifdef LIBXML_SAX1_ENABLED 14598: if (options & XML_PARSE_SAX1) { 14599: ctxt->sax->startElement = xmlSAX2StartElement; 14600: ctxt->sax->endElement = xmlSAX2EndElement; 14601: ctxt->sax->startElementNs = NULL; 14602: ctxt->sax->endElementNs = NULL; 14603: ctxt->sax->initialized = 1; 14604: options -= XML_PARSE_SAX1; 14605: ctxt->options |= XML_PARSE_SAX1; 14606: } 14607: #endif /* LIBXML_SAX1_ENABLED */ 14608: if (options & XML_PARSE_NODICT) { 14609: ctxt->dictNames = 0; 14610: options -= XML_PARSE_NODICT; 14611: ctxt->options |= XML_PARSE_NODICT; 14612: } else { 14613: ctxt->dictNames = 1; 14614: } 14615: if (options & XML_PARSE_NOCDATA) { 14616: ctxt->sax->cdataBlock = NULL; 14617: options -= XML_PARSE_NOCDATA; 14618: ctxt->options |= XML_PARSE_NOCDATA; 14619: } 14620: if (options & XML_PARSE_NSCLEAN) { 14621: ctxt->options |= XML_PARSE_NSCLEAN; 14622: options -= XML_PARSE_NSCLEAN; 14623: } 14624: if (options & XML_PARSE_NONET) { 14625: ctxt->options |= XML_PARSE_NONET; 14626: options -= XML_PARSE_NONET; 14627: } 14628: if (options & XML_PARSE_COMPACT) { 14629: ctxt->options |= XML_PARSE_COMPACT; 14630: options -= XML_PARSE_COMPACT; 14631: } 14632: if (options & XML_PARSE_OLD10) { 14633: ctxt->options |= XML_PARSE_OLD10; 14634: options -= XML_PARSE_OLD10; 14635: } 14636: if (options & XML_PARSE_NOBASEFIX) { 14637: ctxt->options |= XML_PARSE_NOBASEFIX; 14638: options -= XML_PARSE_NOBASEFIX; 14639: } 14640: if (options & XML_PARSE_HUGE) { 14641: ctxt->options |= XML_PARSE_HUGE; 14642: options -= XML_PARSE_HUGE; 14643: } 14644: if (options & XML_PARSE_OLDSAX) { 14645: ctxt->options |= XML_PARSE_OLDSAX; 14646: options -= XML_PARSE_OLDSAX; 14647: } 14648: if (options & XML_PARSE_IGNORE_ENC) { 14649: ctxt->options |= XML_PARSE_IGNORE_ENC; 14650: options -= XML_PARSE_IGNORE_ENC; 14651: } 14652: ctxt->linenumbers = 1; 14653: return (options); 14654: } 14655: 14656: /** 14657: * xmlCtxtUseOptions: 14658: * @ctxt: an XML parser context 14659: * @options: a combination of xmlParserOption 14660: * 14661: * Applies the options to the parser context 14662: * 14663: * Returns 0 in case of success, the set of unknown or unimplemented options 14664: * in case of error. 14665: */ 14666: int 14667: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14668: { 14669: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14670: } 14671: 14672: /** 14673: * xmlDoRead: 14674: * @ctxt: an XML parser context 14675: * @URL: the base URL to use for the document 14676: * @encoding: the document encoding, or NULL 14677: * @options: a combination of xmlParserOption 14678: * @reuse: keep the context for reuse 14679: * 14680: * Common front-end for the xmlRead functions 14681: * 14682: * Returns the resulting document tree or NULL 14683: */ 14684: static xmlDocPtr 14685: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14686: int options, int reuse) 14687: { 14688: xmlDocPtr ret; 14689: 14690: xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14691: if (encoding != NULL) { 14692: xmlCharEncodingHandlerPtr hdlr; 14693: 14694: hdlr = xmlFindCharEncodingHandler(encoding); 14695: if (hdlr != NULL) 14696: xmlSwitchToEncoding(ctxt, hdlr); 14697: } 14698: if ((URL != NULL) && (ctxt->input != NULL) && 14699: (ctxt->input->filename == NULL)) 14700: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14701: xmlParseDocument(ctxt); 14702: if ((ctxt->wellFormed) || ctxt->recovery) 14703: ret = ctxt->myDoc; 14704: else { 14705: ret = NULL; 14706: if (ctxt->myDoc != NULL) { 14707: xmlFreeDoc(ctxt->myDoc); 14708: } 14709: } 14710: ctxt->myDoc = NULL; 14711: if (!reuse) { 14712: xmlFreeParserCtxt(ctxt); 14713: } 14714: 14715: return (ret); 14716: } 14717: 14718: /** 14719: * xmlReadDoc: 14720: * @cur: a pointer to a zero terminated string 14721: * @URL: the base URL to use for the document 14722: * @encoding: the document encoding, or NULL 14723: * @options: a combination of xmlParserOption 14724: * 14725: * parse an XML in-memory document and build a tree. 14726: * 14727: * Returns the resulting document tree 14728: */ 14729: xmlDocPtr 14730: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14731: { 14732: xmlParserCtxtPtr ctxt; 14733: 14734: if (cur == NULL) 14735: return (NULL); 14736: 14737: ctxt = xmlCreateDocParserCtxt(cur); 14738: if (ctxt == NULL) 14739: return (NULL); 14740: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14741: } 14742: 14743: /** 14744: * xmlReadFile: 14745: * @filename: a file or URL 14746: * @encoding: the document encoding, or NULL 14747: * @options: a combination of xmlParserOption 14748: * 14749: * parse an XML file from the filesystem or the network. 14750: * 14751: * Returns the resulting document tree 14752: */ 14753: xmlDocPtr 14754: xmlReadFile(const char *filename, const char *encoding, int options) 14755: { 14756: xmlParserCtxtPtr ctxt; 14757: 14758: ctxt = xmlCreateURLParserCtxt(filename, options); 14759: if (ctxt == NULL) 14760: return (NULL); 14761: return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14762: } 14763: 14764: /** 14765: * xmlReadMemory: 14766: * @buffer: a pointer to a char array 14767: * @size: the size of the array 14768: * @URL: the base URL to use for the document 14769: * @encoding: the document encoding, or NULL 14770: * @options: a combination of xmlParserOption 14771: * 14772: * parse an XML in-memory document and build a tree. 14773: * 14774: * Returns the resulting document tree 14775: */ 14776: xmlDocPtr 14777: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14778: { 14779: xmlParserCtxtPtr ctxt; 14780: 14781: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14782: if (ctxt == NULL) 14783: return (NULL); 14784: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14785: } 14786: 14787: /** 14788: * xmlReadFd: 14789: * @fd: an open file descriptor 14790: * @URL: the base URL to use for the document 14791: * @encoding: the document encoding, or NULL 14792: * @options: a combination of xmlParserOption 14793: * 14794: * parse an XML from a file descriptor and build a tree. 14795: * NOTE that the file descriptor will not be closed when the 14796: * reader is closed or reset. 14797: * 14798: * Returns the resulting document tree 14799: */ 14800: xmlDocPtr 14801: xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14802: { 14803: xmlParserCtxtPtr ctxt; 14804: xmlParserInputBufferPtr input; 14805: xmlParserInputPtr stream; 14806: 14807: if (fd < 0) 14808: return (NULL); 14809: 14810: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14811: if (input == NULL) 14812: return (NULL); 14813: input->closecallback = NULL; 14814: ctxt = xmlNewParserCtxt(); 14815: if (ctxt == NULL) { 14816: xmlFreeParserInputBuffer(input); 14817: return (NULL); 14818: } 14819: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14820: if (stream == NULL) { 14821: xmlFreeParserInputBuffer(input); 14822: xmlFreeParserCtxt(ctxt); 14823: return (NULL); 14824: } 14825: inputPush(ctxt, stream); 14826: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14827: } 14828: 14829: /** 14830: * xmlReadIO: 14831: * @ioread: an I/O read function 14832: * @ioclose: an I/O close function 14833: * @ioctx: an I/O handler 14834: * @URL: the base URL to use for the document 14835: * @encoding: the document encoding, or NULL 14836: * @options: a combination of xmlParserOption 14837: * 14838: * parse an XML document from I/O functions and source and build a tree. 14839: * 14840: * Returns the resulting document tree 14841: */ 14842: xmlDocPtr 14843: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14844: void *ioctx, const char *URL, const char *encoding, int options) 14845: { 14846: xmlParserCtxtPtr ctxt; 14847: xmlParserInputBufferPtr input; 14848: xmlParserInputPtr stream; 14849: 14850: if (ioread == NULL) 14851: return (NULL); 14852: 14853: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14854: XML_CHAR_ENCODING_NONE); 14855: if (input == NULL) { 14856: if (ioclose != NULL) 14857: ioclose(ioctx); 14858: return (NULL); 14859: } 14860: ctxt = xmlNewParserCtxt(); 14861: if (ctxt == NULL) { 14862: xmlFreeParserInputBuffer(input); 14863: return (NULL); 14864: } 14865: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14866: if (stream == NULL) { 14867: xmlFreeParserInputBuffer(input); 14868: xmlFreeParserCtxt(ctxt); 14869: return (NULL); 14870: } 14871: inputPush(ctxt, stream); 14872: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14873: } 14874: 14875: /** 14876: * xmlCtxtReadDoc: 14877: * @ctxt: an XML parser context 14878: * @cur: a pointer to a zero terminated string 14879: * @URL: the base URL to use for the document 14880: * @encoding: the document encoding, or NULL 14881: * @options: a combination of xmlParserOption 14882: * 14883: * parse an XML in-memory document and build a tree. 14884: * This reuses the existing @ctxt parser context 14885: * 14886: * Returns the resulting document tree 14887: */ 14888: xmlDocPtr 14889: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14890: const char *URL, const char *encoding, int options) 14891: { 14892: xmlParserInputPtr stream; 14893: 14894: if (cur == NULL) 14895: return (NULL); 14896: if (ctxt == NULL) 14897: return (NULL); 14898: 14899: xmlCtxtReset(ctxt); 14900: 14901: stream = xmlNewStringInputStream(ctxt, cur); 14902: if (stream == NULL) { 14903: return (NULL); 14904: } 14905: inputPush(ctxt, stream); 14906: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14907: } 14908: 14909: /** 14910: * xmlCtxtReadFile: 14911: * @ctxt: an XML parser context 14912: * @filename: a file or URL 14913: * @encoding: the document encoding, or NULL 14914: * @options: a combination of xmlParserOption 14915: * 14916: * parse an XML file from the filesystem or the network. 14917: * This reuses the existing @ctxt parser context 14918: * 14919: * Returns the resulting document tree 14920: */ 14921: xmlDocPtr 14922: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14923: const char *encoding, int options) 14924: { 14925: xmlParserInputPtr stream; 14926: 14927: if (filename == NULL) 14928: return (NULL); 14929: if (ctxt == NULL) 14930: return (NULL); 14931: 14932: xmlCtxtReset(ctxt); 14933: 14934: stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14935: if (stream == NULL) { 14936: return (NULL); 14937: } 14938: inputPush(ctxt, stream); 14939: return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14940: } 14941: 14942: /** 14943: * xmlCtxtReadMemory: 14944: * @ctxt: an XML parser context 14945: * @buffer: a pointer to a char array 14946: * @size: the size of the array 14947: * @URL: the base URL to use for the document 14948: * @encoding: the document encoding, or NULL 14949: * @options: a combination of xmlParserOption 14950: * 14951: * parse an XML in-memory document and build a tree. 14952: * This reuses the existing @ctxt parser context 14953: * 14954: * Returns the resulting document tree 14955: */ 14956: xmlDocPtr 14957: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14958: const char *URL, const char *encoding, int options) 14959: { 14960: xmlParserInputBufferPtr input; 14961: xmlParserInputPtr stream; 14962: 14963: if (ctxt == NULL) 14964: return (NULL); 14965: if (buffer == NULL) 14966: return (NULL); 14967: 14968: xmlCtxtReset(ctxt); 14969: 14970: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14971: if (input == NULL) { 14972: return(NULL); 14973: } 14974: 14975: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14976: if (stream == NULL) { 14977: xmlFreeParserInputBuffer(input); 14978: return(NULL); 14979: } 14980: 14981: inputPush(ctxt, stream); 14982: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14983: } 14984: 14985: /** 14986: * xmlCtxtReadFd: 14987: * @ctxt: an XML parser context 14988: * @fd: an open file descriptor 14989: * @URL: the base URL to use for the document 14990: * @encoding: the document encoding, or NULL 14991: * @options: a combination of xmlParserOption 14992: * 14993: * parse an XML from a file descriptor and build a tree. 14994: * This reuses the existing @ctxt parser context 14995: * NOTE that the file descriptor will not be closed when the 14996: * reader is closed or reset. 14997: * 14998: * Returns the resulting document tree 14999: */ 15000: xmlDocPtr 15001: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15002: const char *URL, const char *encoding, int options) 15003: { 15004: xmlParserInputBufferPtr input; 15005: xmlParserInputPtr stream; 15006: 15007: if (fd < 0) 15008: return (NULL); 15009: if (ctxt == NULL) 15010: return (NULL); 15011: 15012: xmlCtxtReset(ctxt); 15013: 15014: 15015: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15016: if (input == NULL) 15017: return (NULL); 15018: input->closecallback = NULL; 15019: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15020: if (stream == NULL) { 15021: xmlFreeParserInputBuffer(input); 15022: return (NULL); 15023: } 15024: inputPush(ctxt, stream); 15025: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15026: } 15027: 15028: /** 15029: * xmlCtxtReadIO: 15030: * @ctxt: an XML parser context 15031: * @ioread: an I/O read function 15032: * @ioclose: an I/O close function 15033: * @ioctx: an I/O handler 15034: * @URL: the base URL to use for the document 15035: * @encoding: the document encoding, or NULL 15036: * @options: a combination of xmlParserOption 15037: * 15038: * parse an XML document from I/O functions and source and build a tree. 15039: * This reuses the existing @ctxt parser context 15040: * 15041: * Returns the resulting document tree 15042: */ 15043: xmlDocPtr 15044: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15045: xmlInputCloseCallback ioclose, void *ioctx, 15046: const char *URL, 15047: const char *encoding, int options) 15048: { 15049: xmlParserInputBufferPtr input; 15050: xmlParserInputPtr stream; 15051: 15052: if (ioread == NULL) 15053: return (NULL); 15054: if (ctxt == NULL) 15055: return (NULL); 15056: 15057: xmlCtxtReset(ctxt); 15058: 15059: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15060: XML_CHAR_ENCODING_NONE); 15061: if (input == NULL) { 15062: if (ioclose != NULL) 15063: ioclose(ioctx); 15064: return (NULL); 15065: } 15066: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15067: if (stream == NULL) { 15068: xmlFreeParserInputBuffer(input); 15069: return (NULL); 15070: } 15071: inputPush(ctxt, stream); 15072: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15073: } 15074: 15075: #define bottom_parser 15076: #include "elfgcchack.h"