embedaddon/libxml2/parser.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parser.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:20 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD

2.8.0

1: /* 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3: * implemented on top of the SAX interfaces 4: * 5: * References: 6: * The XML specification: 7: * http://www.w3.org/TR/REC-xml 8: * Original 1.0 version: 9: * http://www.w3.org/TR/1998/REC-xml-19980210 10: * XML second edition working draft 11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12: * 13: * Okay this is a big file, the parser core is around 7000 lines, then it 14: * is followed by the progressive parser top routines, then the various 15: * high level APIs to call the parser and a few miscellaneous functions. 16: * A number of helper functions and deprecated ones have been moved to 17: * parserInternals.c to reduce this file size. 18: * As much as possible the functions are associated with their relative 19: * production in the XML specification. A few productions defining the 20: * different ranges of character are actually implanted either in 21: * parserInternals.h or parserInternals.c 22: * The DOM tree build is realized from the default SAX callbacks in 23: * the module SAX.c. 24: * The routines doing the validation checks are in valid.c and called either 25: * from the SAX callbacks or as standalone functions using a preparsed 26: * document. 27: * 28: * See Copyright for the status of this software. 29: * 30: * daniel@veillard.com 31: */ 32: 33: #define IN_LIBXML 34: #include "libxml.h" 35: 36: #if defined(WIN32) && !defined (__CYGWIN__) 37: #define XML_DIR_SEP '\\' 38: #else 39: #define XML_DIR_SEP '/' 40: #endif 41: 42: #include <stdlib.h> 43: #include <string.h> 44: #include <stdarg.h> 45: #include <libxml/xmlmemory.h> 46: #include <libxml/threads.h> 47: #include <libxml/globals.h> 48: #include <libxml/tree.h> 49: #include <libxml/parser.h> 50: #include <libxml/parserInternals.h> 51: #include <libxml/valid.h> 52: #include <libxml/entities.h> 53: #include <libxml/xmlerror.h> 54: #include <libxml/encoding.h> 55: #include <libxml/xmlIO.h> 56: #include <libxml/uri.h> 57: #ifdef LIBXML_CATALOG_ENABLED 58: #include <libxml/catalog.h> 59: #endif 60: #ifdef LIBXML_SCHEMAS_ENABLED 61: #include <libxml/xmlschemastypes.h> 62: #include <libxml/relaxng.h> 63: #endif 64: #ifdef HAVE_CTYPE_H 65: #include <ctype.h> 66: #endif 67: #ifdef HAVE_STDLIB_H 68: #include <stdlib.h> 69: #endif 70: #ifdef HAVE_SYS_STAT_H 71: #include <sys/stat.h> 72: #endif 73: #ifdef HAVE_FCNTL_H 74: #include <fcntl.h> 75: #endif 76: #ifdef HAVE_UNISTD_H 77: #include <unistd.h> 78: #endif 79: #ifdef HAVE_ZLIB_H 80: #include <zlib.h> 81: #endif 82: #ifdef HAVE_LZMA_H 83: #include <lzma.h> 84: #endif 85: 86: static void 87: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 88: 89: static xmlParserCtxtPtr 90: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 91: const xmlChar *base, xmlParserCtxtPtr pctx); 92: 93: /************************************************************************ 94: * * 95: * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 96: * * 97: ************************************************************************/ 98: 99: #define XML_PARSER_BIG_ENTITY 1000 100: #define XML_PARSER_LOT_ENTITY 5000 101: 102: /* 103: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 104: * replacement over the size in byte of the input indicates that you have 105: * and eponential behaviour. A value of 10 correspond to at least 3 entity 106: * replacement per byte of input. 107: */ 108: #define XML_PARSER_NON_LINEAR 10 109: 110: /* 111: * xmlParserEntityCheck 112: * 113: * Function to check non-linear entity expansion behaviour 114: * This is here to detect and stop exponential linear entity expansion 115: * This is not a limitation of the parser but a safety 116: * boundary feature. It can be disabled with the XML_PARSE_HUGE 117: * parser option. 118: */ 119: static int 120: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 121: xmlEntityPtr ent) 122: { 123: unsigned long consumed = 0; 124: 125: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 126: return (0); 127: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 128: return (1); 129: if (size != 0) { 130: /* 131: * Do the check based on the replacement size of the entity 132: */ 133: if (size < XML_PARSER_BIG_ENTITY) 134: return(0); 135: 136: /* 137: * A limit on the amount of text data reasonably used 138: */ 139: if (ctxt->input != NULL) { 140: consumed = ctxt->input->consumed + 141: (ctxt->input->cur - ctxt->input->base); 142: } 143: consumed += ctxt->sizeentities; 144: 145: if ((size < XML_PARSER_NON_LINEAR * consumed) && 146: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 147: return (0); 148: } else if (ent != NULL) { 149: /* 150: * use the number of parsed entities in the replacement 151: */ 152: size = ent->checked; 153: 154: /* 155: * The amount of data parsed counting entities size only once 156: */ 157: if (ctxt->input != NULL) { 158: consumed = ctxt->input->consumed + 159: (ctxt->input->cur - ctxt->input->base); 160: } 161: consumed += ctxt->sizeentities; 162: 163: /* 164: * Check the density of entities for the amount of data 165: * knowing an entity reference will take at least 3 bytes 166: */ 167: if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 168: return (0); 169: } else { 170: /* 171: * strange we got no data for checking just return 172: */ 173: return (0); 174: } 175: 176: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 177: return (1); 178: } 179: 180: /** 181: * xmlParserMaxDepth: 182: * 183: * arbitrary depth limit for the XML documents that we allow to 184: * process. This is not a limitation of the parser but a safety 185: * boundary feature. It can be disabled with the XML_PARSE_HUGE 186: * parser option. 187: */ 188: unsigned int xmlParserMaxDepth = 256; 189: 190: 191: 192: #define SAX2 1 193: #define XML_PARSER_BIG_BUFFER_SIZE 300 194: #define XML_PARSER_BUFFER_SIZE 100 195: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 196: 197: /* 198: * List of XML prefixed PI allowed by W3C specs 199: */ 200: 201: static const char *xmlW3CPIs[] = { 202: "xml-stylesheet", 203: "xml-model", 204: NULL 205: }; 206: 207: 208: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 209: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 210: const xmlChar **str); 211: 212: static xmlParserErrors 213: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 214: xmlSAXHandlerPtr sax, 215: void *user_data, int depth, const xmlChar *URL, 216: const xmlChar *ID, xmlNodePtr *list); 217: 218: static int 219: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 220: const char *encoding); 221: #ifdef LIBXML_LEGACY_ENABLED 222: static void 223: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 224: xmlNodePtr lastNode); 225: #endif /* LIBXML_LEGACY_ENABLED */ 226: 227: static xmlParserErrors 228: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 229: const xmlChar *string, void *user_data, xmlNodePtr *lst); 230: 231: static int 232: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 233: 234: /************************************************************************ 235: * * 236: * Some factorized error routines * 237: * * 238: ************************************************************************/ 239: 240: /** 241: * xmlErrAttributeDup: 242: * @ctxt: an XML parser context 243: * @prefix: the attribute prefix 244: * @localname: the attribute localname 245: * 246: * Handle a redefinition of attribute error 247: */ 248: static void 249: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 250: const xmlChar * localname) 251: { 252: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 253: (ctxt->instate == XML_PARSER_EOF)) 254: return; 255: if (ctxt != NULL) 256: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 257: 258: if (prefix == NULL) 259: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 260: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 261: (const char *) localname, NULL, NULL, 0, 0, 262: "Attribute %s redefined\n", localname); 263: else 264: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 265: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 266: (const char *) prefix, (const char *) localname, 267: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 268: localname); 269: if (ctxt != NULL) { 270: ctxt->wellFormed = 0; 271: if (ctxt->recovery == 0) 272: ctxt->disableSAX = 1; 273: } 274: } 275: 276: /** 277: * xmlFatalErr: 278: * @ctxt: an XML parser context 279: * @error: the error number 280: * @extra: extra information string 281: * 282: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 283: */ 284: static void 285: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 286: { 287: const char *errmsg; 288: 289: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 290: (ctxt->instate == XML_PARSER_EOF)) 291: return; 292: switch (error) { 293: case XML_ERR_INVALID_HEX_CHARREF: 294: errmsg = "CharRef: invalid hexadecimal value\n"; 295: break; 296: case XML_ERR_INVALID_DEC_CHARREF: 297: errmsg = "CharRef: invalid decimal value\n"; 298: break; 299: case XML_ERR_INVALID_CHARREF: 300: errmsg = "CharRef: invalid value\n"; 301: break; 302: case XML_ERR_INTERNAL_ERROR: 303: errmsg = "internal error"; 304: break; 305: case XML_ERR_PEREF_AT_EOF: 306: errmsg = "PEReference at end of document\n"; 307: break; 308: case XML_ERR_PEREF_IN_PROLOG: 309: errmsg = "PEReference in prolog\n"; 310: break; 311: case XML_ERR_PEREF_IN_EPILOG: 312: errmsg = "PEReference in epilog\n"; 313: break; 314: case XML_ERR_PEREF_NO_NAME: 315: errmsg = "PEReference: no name\n"; 316: break; 317: case XML_ERR_PEREF_SEMICOL_MISSING: 318: errmsg = "PEReference: expecting ';'\n"; 319: break; 320: case XML_ERR_ENTITY_LOOP: 321: errmsg = "Detected an entity reference loop\n"; 322: break; 323: case XML_ERR_ENTITY_NOT_STARTED: 324: errmsg = "EntityValue: \" or ' expected\n"; 325: break; 326: case XML_ERR_ENTITY_PE_INTERNAL: 327: errmsg = "PEReferences forbidden in internal subset\n"; 328: break; 329: case XML_ERR_ENTITY_NOT_FINISHED: 330: errmsg = "EntityValue: \" or ' expected\n"; 331: break; 332: case XML_ERR_ATTRIBUTE_NOT_STARTED: 333: errmsg = "AttValue: \" or ' expected\n"; 334: break; 335: case XML_ERR_LT_IN_ATTRIBUTE: 336: errmsg = "Unescaped '<' not allowed in attributes values\n"; 337: break; 338: case XML_ERR_LITERAL_NOT_STARTED: 339: errmsg = "SystemLiteral \" or ' expected\n"; 340: break; 341: case XML_ERR_LITERAL_NOT_FINISHED: 342: errmsg = "Unfinished System or Public ID \" or ' expected\n"; 343: break; 344: case XML_ERR_MISPLACED_CDATA_END: 345: errmsg = "Sequence ']]>' not allowed in content\n"; 346: break; 347: case XML_ERR_URI_REQUIRED: 348: errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 349: break; 350: case XML_ERR_PUBID_REQUIRED: 351: errmsg = "PUBLIC, the Public Identifier is missing\n"; 352: break; 353: case XML_ERR_HYPHEN_IN_COMMENT: 354: errmsg = "Comment must not contain '--' (double-hyphen)\n"; 355: break; 356: case XML_ERR_PI_NOT_STARTED: 357: errmsg = "xmlParsePI : no target name\n"; 358: break; 359: case XML_ERR_RESERVED_XML_NAME: 360: errmsg = "Invalid PI name\n"; 361: break; 362: case XML_ERR_NOTATION_NOT_STARTED: 363: errmsg = "NOTATION: Name expected here\n"; 364: break; 365: case XML_ERR_NOTATION_NOT_FINISHED: 366: errmsg = "'>' required to close NOTATION declaration\n"; 367: break; 368: case XML_ERR_VALUE_REQUIRED: 369: errmsg = "Entity value required\n"; 370: break; 371: case XML_ERR_URI_FRAGMENT: 372: errmsg = "Fragment not allowed"; 373: break; 374: case XML_ERR_ATTLIST_NOT_STARTED: 375: errmsg = "'(' required to start ATTLIST enumeration\n"; 376: break; 377: case XML_ERR_NMTOKEN_REQUIRED: 378: errmsg = "NmToken expected in ATTLIST enumeration\n"; 379: break; 380: case XML_ERR_ATTLIST_NOT_FINISHED: 381: errmsg = "')' required to finish ATTLIST enumeration\n"; 382: break; 383: case XML_ERR_MIXED_NOT_STARTED: 384: errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 385: break; 386: case XML_ERR_PCDATA_REQUIRED: 387: errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 388: break; 389: case XML_ERR_ELEMCONTENT_NOT_STARTED: 390: errmsg = "ContentDecl : Name or '(' expected\n"; 391: break; 392: case XML_ERR_ELEMCONTENT_NOT_FINISHED: 393: errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 394: break; 395: case XML_ERR_PEREF_IN_INT_SUBSET: 396: errmsg = 397: "PEReference: forbidden within markup decl in internal subset\n"; 398: break; 399: case XML_ERR_GT_REQUIRED: 400: errmsg = "expected '>'\n"; 401: break; 402: case XML_ERR_CONDSEC_INVALID: 403: errmsg = "XML conditional section '[' expected\n"; 404: break; 405: case XML_ERR_EXT_SUBSET_NOT_FINISHED: 406: errmsg = "Content error in the external subset\n"; 407: break; 408: case XML_ERR_CONDSEC_INVALID_KEYWORD: 409: errmsg = 410: "conditional section INCLUDE or IGNORE keyword expected\n"; 411: break; 412: case XML_ERR_CONDSEC_NOT_FINISHED: 413: errmsg = "XML conditional section not closed\n"; 414: break; 415: case XML_ERR_XMLDECL_NOT_STARTED: 416: errmsg = "Text declaration '<?xml' required\n"; 417: break; 418: case XML_ERR_XMLDECL_NOT_FINISHED: 419: errmsg = "parsing XML declaration: '?>' expected\n"; 420: break; 421: case XML_ERR_EXT_ENTITY_STANDALONE: 422: errmsg = "external parsed entities cannot be standalone\n"; 423: break; 424: case XML_ERR_ENTITYREF_SEMICOL_MISSING: 425: errmsg = "EntityRef: expecting ';'\n"; 426: break; 427: case XML_ERR_DOCTYPE_NOT_FINISHED: 428: errmsg = "DOCTYPE improperly terminated\n"; 429: break; 430: case XML_ERR_LTSLASH_REQUIRED: 431: errmsg = "EndTag: '</' not found\n"; 432: break; 433: case XML_ERR_EQUAL_REQUIRED: 434: errmsg = "expected '='\n"; 435: break; 436: case XML_ERR_STRING_NOT_CLOSED: 437: errmsg = "String not closed expecting \" or '\n"; 438: break; 439: case XML_ERR_STRING_NOT_STARTED: 440: errmsg = "String not started expecting ' or \"\n"; 441: break; 442: case XML_ERR_ENCODING_NAME: 443: errmsg = "Invalid XML encoding name\n"; 444: break; 445: case XML_ERR_STANDALONE_VALUE: 446: errmsg = "standalone accepts only 'yes' or 'no'\n"; 447: break; 448: case XML_ERR_DOCUMENT_EMPTY: 449: errmsg = "Document is empty\n"; 450: break; 451: case XML_ERR_DOCUMENT_END: 452: errmsg = "Extra content at the end of the document\n"; 453: break; 454: case XML_ERR_NOT_WELL_BALANCED: 455: errmsg = "chunk is not well balanced\n"; 456: break; 457: case XML_ERR_EXTRA_CONTENT: 458: errmsg = "extra content at the end of well balanced chunk\n"; 459: break; 460: case XML_ERR_VERSION_MISSING: 461: errmsg = "Malformed declaration expecting version\n"; 462: break; 463: #if 0 464: case: 465: errmsg = "\n"; 466: break; 467: #endif 468: default: 469: errmsg = "Unregistered error message\n"; 470: } 471: if (ctxt != NULL) 472: ctxt->errNo = error; 473: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 474: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 475: info); 476: if (ctxt != NULL) { 477: ctxt->wellFormed = 0; 478: if (ctxt->recovery == 0) 479: ctxt->disableSAX = 1; 480: } 481: } 482: 483: /** 484: * xmlFatalErrMsg: 485: * @ctxt: an XML parser context 486: * @error: the error number 487: * @msg: the error message 488: * 489: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 490: */ 491: static void 492: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 493: const char *msg) 494: { 495: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 496: (ctxt->instate == XML_PARSER_EOF)) 497: return; 498: if (ctxt != NULL) 499: ctxt->errNo = error; 500: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 501: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 502: if (ctxt != NULL) { 503: ctxt->wellFormed = 0; 504: if (ctxt->recovery == 0) 505: ctxt->disableSAX = 1; 506: } 507: } 508: 509: /** 510: * xmlWarningMsg: 511: * @ctxt: an XML parser context 512: * @error: the error number 513: * @msg: the error message 514: * @str1: extra data 515: * @str2: extra data 516: * 517: * Handle a warning. 518: */ 519: static void 520: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 521: const char *msg, const xmlChar *str1, const xmlChar *str2) 522: { 523: xmlStructuredErrorFunc schannel = NULL; 524: 525: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 526: (ctxt->instate == XML_PARSER_EOF)) 527: return; 528: if ((ctxt != NULL) && (ctxt->sax != NULL) && 529: (ctxt->sax->initialized == XML_SAX2_MAGIC)) 530: schannel = ctxt->sax->serror; 531: if (ctxt != NULL) { 532: __xmlRaiseError(schannel, 533: (ctxt->sax) ? ctxt->sax->warning : NULL, 534: ctxt->userData, 535: ctxt, NULL, XML_FROM_PARSER, error, 536: XML_ERR_WARNING, NULL, 0, 537: (const char *) str1, (const char *) str2, NULL, 0, 0, 538: msg, (const char *) str1, (const char *) str2); 539: } else { 540: __xmlRaiseError(schannel, NULL, NULL, 541: ctxt, NULL, XML_FROM_PARSER, error, 542: XML_ERR_WARNING, NULL, 0, 543: (const char *) str1, (const char *) str2, NULL, 0, 0, 544: msg, (const char *) str1, (const char *) str2); 545: } 546: } 547: 548: /** 549: * xmlValidityError: 550: * @ctxt: an XML parser context 551: * @error: the error number 552: * @msg: the error message 553: * @str1: extra data 554: * 555: * Handle a validity error. 556: */ 557: static void 558: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 559: const char *msg, const xmlChar *str1, const xmlChar *str2) 560: { 561: xmlStructuredErrorFunc schannel = NULL; 562: 563: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 564: (ctxt->instate == XML_PARSER_EOF)) 565: return; 566: if (ctxt != NULL) { 567: ctxt->errNo = error; 568: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569: schannel = ctxt->sax->serror; 570: } 571: if (ctxt != NULL) { 572: __xmlRaiseError(schannel, 573: ctxt->vctxt.error, ctxt->vctxt.userData, 574: ctxt, NULL, XML_FROM_DTD, error, 575: XML_ERR_ERROR, NULL, 0, (const char *) str1, 576: (const char *) str2, NULL, 0, 0, 577: msg, (const char *) str1, (const char *) str2); 578: ctxt->valid = 0; 579: } else { 580: __xmlRaiseError(schannel, NULL, NULL, 581: ctxt, NULL, XML_FROM_DTD, error, 582: XML_ERR_ERROR, NULL, 0, (const char *) str1, 583: (const char *) str2, NULL, 0, 0, 584: msg, (const char *) str1, (const char *) str2); 585: } 586: } 587: 588: /** 589: * xmlFatalErrMsgInt: 590: * @ctxt: an XML parser context 591: * @error: the error number 592: * @msg: the error message 593: * @val: an integer value 594: * 595: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 596: */ 597: static void 598: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 599: const char *msg, int val) 600: { 601: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 602: (ctxt->instate == XML_PARSER_EOF)) 603: return; 604: if (ctxt != NULL) 605: ctxt->errNo = error; 606: __xmlRaiseError(NULL, NULL, NULL, 607: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 608: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 609: if (ctxt != NULL) { 610: ctxt->wellFormed = 0; 611: if (ctxt->recovery == 0) 612: ctxt->disableSAX = 1; 613: } 614: } 615: 616: /** 617: * xmlFatalErrMsgStrIntStr: 618: * @ctxt: an XML parser context 619: * @error: the error number 620: * @msg: the error message 621: * @str1: an string info 622: * @val: an integer value 623: * @str2: an string info 624: * 625: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 626: */ 627: static void 628: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 629: const char *msg, const xmlChar *str1, int val, 630: const xmlChar *str2) 631: { 632: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 633: (ctxt->instate == XML_PARSER_EOF)) 634: return; 635: if (ctxt != NULL) 636: ctxt->errNo = error; 637: __xmlRaiseError(NULL, NULL, NULL, 638: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 639: NULL, 0, (const char *) str1, (const char *) str2, 640: NULL, val, 0, msg, str1, val, str2); 641: if (ctxt != NULL) { 642: ctxt->wellFormed = 0; 643: if (ctxt->recovery == 0) 644: ctxt->disableSAX = 1; 645: } 646: } 647: 648: /** 649: * xmlFatalErrMsgStr: 650: * @ctxt: an XML parser context 651: * @error: the error number 652: * @msg: the error message 653: * @val: a string value 654: * 655: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 656: */ 657: static void 658: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 659: const char *msg, const xmlChar * val) 660: { 661: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 662: (ctxt->instate == XML_PARSER_EOF)) 663: return; 664: if (ctxt != NULL) 665: ctxt->errNo = error; 666: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 667: XML_FROM_PARSER, error, XML_ERR_FATAL, 668: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 669: val); 670: if (ctxt != NULL) { 671: ctxt->wellFormed = 0; 672: if (ctxt->recovery == 0) 673: ctxt->disableSAX = 1; 674: } 675: } 676: 677: /** 678: * xmlErrMsgStr: 679: * @ctxt: an XML parser context 680: * @error: the error number 681: * @msg: the error message 682: * @val: a string value 683: * 684: * Handle a non fatal parser error 685: */ 686: static void 687: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 688: const char *msg, const xmlChar * val) 689: { 690: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 691: (ctxt->instate == XML_PARSER_EOF)) 692: return; 693: if (ctxt != NULL) 694: ctxt->errNo = error; 695: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 696: XML_FROM_PARSER, error, XML_ERR_ERROR, 697: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 698: val); 699: } 700: 701: /** 702: * xmlNsErr: 703: * @ctxt: an XML parser context 704: * @error: the error number 705: * @msg: the message 706: * @info1: extra information string 707: * @info2: extra information string 708: * 709: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 710: */ 711: static void 712: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 713: const char *msg, 714: const xmlChar * info1, const xmlChar * info2, 715: const xmlChar * info3) 716: { 717: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 718: (ctxt->instate == XML_PARSER_EOF)) 719: return; 720: if (ctxt != NULL) 721: ctxt->errNo = error; 722: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 723: XML_ERR_ERROR, NULL, 0, (const char *) info1, 724: (const char *) info2, (const char *) info3, 0, 0, msg, 725: info1, info2, info3); 726: if (ctxt != NULL) 727: ctxt->nsWellFormed = 0; 728: } 729: 730: /** 731: * xmlNsWarn 732: * @ctxt: an XML parser context 733: * @error: the error number 734: * @msg: the message 735: * @info1: extra information string 736: * @info2: extra information string 737: * 738: * Handle a namespace warning error 739: */ 740: static void 741: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 742: const char *msg, 743: const xmlChar * info1, const xmlChar * info2, 744: const xmlChar * info3) 745: { 746: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 747: (ctxt->instate == XML_PARSER_EOF)) 748: return; 749: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 750: XML_ERR_WARNING, NULL, 0, (const char *) info1, 751: (const char *) info2, (const char *) info3, 0, 0, msg, 752: info1, info2, info3); 753: } 754: 755: /************************************************************************ 756: * * 757: * Library wide options * 758: * * 759: ************************************************************************/ 760: 761: /** 762: * xmlHasFeature: 763: * @feature: the feature to be examined 764: * 765: * Examines if the library has been compiled with a given feature. 766: * 767: * Returns a non-zero value if the feature exist, otherwise zero. 768: * Returns zero (0) if the feature does not exist or an unknown 769: * unknown feature is requested, non-zero otherwise. 770: */ 771: int 772: xmlHasFeature(xmlFeature feature) 773: { 774: switch (feature) { 775: case XML_WITH_THREAD: 776: #ifdef LIBXML_THREAD_ENABLED 777: return(1); 778: #else 779: return(0); 780: #endif 781: case XML_WITH_TREE: 782: #ifdef LIBXML_TREE_ENABLED 783: return(1); 784: #else 785: return(0); 786: #endif 787: case XML_WITH_OUTPUT: 788: #ifdef LIBXML_OUTPUT_ENABLED 789: return(1); 790: #else 791: return(0); 792: #endif 793: case XML_WITH_PUSH: 794: #ifdef LIBXML_PUSH_ENABLED 795: return(1); 796: #else 797: return(0); 798: #endif 799: case XML_WITH_READER: 800: #ifdef LIBXML_READER_ENABLED 801: return(1); 802: #else 803: return(0); 804: #endif 805: case XML_WITH_PATTERN: 806: #ifdef LIBXML_PATTERN_ENABLED 807: return(1); 808: #else 809: return(0); 810: #endif 811: case XML_WITH_WRITER: 812: #ifdef LIBXML_WRITER_ENABLED 813: return(1); 814: #else 815: return(0); 816: #endif 817: case XML_WITH_SAX1: 818: #ifdef LIBXML_SAX1_ENABLED 819: return(1); 820: #else 821: return(0); 822: #endif 823: case XML_WITH_FTP: 824: #ifdef LIBXML_FTP_ENABLED 825: return(1); 826: #else 827: return(0); 828: #endif 829: case XML_WITH_HTTP: 830: #ifdef LIBXML_HTTP_ENABLED 831: return(1); 832: #else 833: return(0); 834: #endif 835: case XML_WITH_VALID: 836: #ifdef LIBXML_VALID_ENABLED 837: return(1); 838: #else 839: return(0); 840: #endif 841: case XML_WITH_HTML: 842: #ifdef LIBXML_HTML_ENABLED 843: return(1); 844: #else 845: return(0); 846: #endif 847: case XML_WITH_LEGACY: 848: #ifdef LIBXML_LEGACY_ENABLED 849: return(1); 850: #else 851: return(0); 852: #endif 853: case XML_WITH_C14N: 854: #ifdef LIBXML_C14N_ENABLED 855: return(1); 856: #else 857: return(0); 858: #endif 859: case XML_WITH_CATALOG: 860: #ifdef LIBXML_CATALOG_ENABLED 861: return(1); 862: #else 863: return(0); 864: #endif 865: case XML_WITH_XPATH: 866: #ifdef LIBXML_XPATH_ENABLED 867: return(1); 868: #else 869: return(0); 870: #endif 871: case XML_WITH_XPTR: 872: #ifdef LIBXML_XPTR_ENABLED 873: return(1); 874: #else 875: return(0); 876: #endif 877: case XML_WITH_XINCLUDE: 878: #ifdef LIBXML_XINCLUDE_ENABLED 879: return(1); 880: #else 881: return(0); 882: #endif 883: case XML_WITH_ICONV: 884: #ifdef LIBXML_ICONV_ENABLED 885: return(1); 886: #else 887: return(0); 888: #endif 889: case XML_WITH_ISO8859X: 890: #ifdef LIBXML_ISO8859X_ENABLED 891: return(1); 892: #else 893: return(0); 894: #endif 895: case XML_WITH_UNICODE: 896: #ifdef LIBXML_UNICODE_ENABLED 897: return(1); 898: #else 899: return(0); 900: #endif 901: case XML_WITH_REGEXP: 902: #ifdef LIBXML_REGEXP_ENABLED 903: return(1); 904: #else 905: return(0); 906: #endif 907: case XML_WITH_AUTOMATA: 908: #ifdef LIBXML_AUTOMATA_ENABLED 909: return(1); 910: #else 911: return(0); 912: #endif 913: case XML_WITH_EXPR: 914: #ifdef LIBXML_EXPR_ENABLED 915: return(1); 916: #else 917: return(0); 918: #endif 919: case XML_WITH_SCHEMAS: 920: #ifdef LIBXML_SCHEMAS_ENABLED 921: return(1); 922: #else 923: return(0); 924: #endif 925: case XML_WITH_SCHEMATRON: 926: #ifdef LIBXML_SCHEMATRON_ENABLED 927: return(1); 928: #else 929: return(0); 930: #endif 931: case XML_WITH_MODULES: 932: #ifdef LIBXML_MODULES_ENABLED 933: return(1); 934: #else 935: return(0); 936: #endif 937: case XML_WITH_DEBUG: 938: #ifdef LIBXML_DEBUG_ENABLED 939: return(1); 940: #else 941: return(0); 942: #endif 943: case XML_WITH_DEBUG_MEM: 944: #ifdef DEBUG_MEMORY_LOCATION 945: return(1); 946: #else 947: return(0); 948: #endif 949: case XML_WITH_DEBUG_RUN: 950: #ifdef LIBXML_DEBUG_RUNTIME 951: return(1); 952: #else 953: return(0); 954: #endif 955: case XML_WITH_ZLIB: 956: #ifdef LIBXML_ZLIB_ENABLED 957: return(1); 958: #else 959: return(0); 960: #endif 961: case XML_WITH_LZMA: 962: #ifdef LIBXML_LZMA_ENABLED 963: return(1); 964: #else 965: return(0); 966: #endif 967: case XML_WITH_ICU: 968: #ifdef LIBXML_ICU_ENABLED 969: return(1); 970: #else 971: return(0); 972: #endif 973: default: 974: break; 975: } 976: return(0); 977: } 978: 979: /************************************************************************ 980: * * 981: * SAX2 defaulted attributes handling * 982: * * 983: ************************************************************************/ 984: 985: /** 986: * xmlDetectSAX2: 987: * @ctxt: an XML parser context 988: * 989: * Do the SAX2 detection and specific intialization 990: */ 991: static void 992: xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 993: if (ctxt == NULL) return; 994: #ifdef LIBXML_SAX1_ENABLED 995: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 996: ((ctxt->sax->startElementNs != NULL) || 997: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 998: #else 999: ctxt->sax2 = 1; 1000: #endif /* LIBXML_SAX1_ENABLED */ 1001: 1002: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1003: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1004: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1005: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1006: (ctxt->str_xml_ns == NULL)) { 1007: xmlErrMemory(ctxt, NULL); 1008: } 1009: } 1010: 1011: typedef struct _xmlDefAttrs xmlDefAttrs; 1012: typedef xmlDefAttrs *xmlDefAttrsPtr; 1013: struct _xmlDefAttrs { 1014: int nbAttrs; /* number of defaulted attributes on that element */ 1015: int maxAttrs; /* the size of the array */ 1016: const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1017: }; 1018: 1019: /** 1020: * xmlAttrNormalizeSpace: 1021: * @src: the source string 1022: * @dst: the target string 1023: * 1024: * Normalize the space in non CDATA attribute values: 1025: * If the attribute type is not CDATA, then the XML processor MUST further 1026: * process the normalized attribute value by discarding any leading and 1027: * trailing space (#x20) characters, and by replacing sequences of space 1028: * (#x20) characters by a single space (#x20) character. 1029: * Note that the size of dst need to be at least src, and if one doesn't need 1030: * to preserve dst (and it doesn't come from a dictionary or read-only) then 1031: * passing src as dst is just fine. 1032: * 1033: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1034: * is needed. 1035: */ 1036: static xmlChar * 1037: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1038: { 1039: if ((src == NULL) || (dst == NULL)) 1040: return(NULL); 1041: 1042: while (*src == 0x20) src++; 1043: while (*src != 0) { 1044: if (*src == 0x20) { 1045: while (*src == 0x20) src++; 1046: if (*src != 0) 1047: *dst++ = 0x20; 1048: } else { 1049: *dst++ = *src++; 1050: } 1051: } 1052: *dst = 0; 1053: if (dst == src) 1054: return(NULL); 1055: return(dst); 1056: } 1057: 1058: /** 1059: * xmlAttrNormalizeSpace2: 1060: * @src: the source string 1061: * 1062: * Normalize the space in non CDATA attribute values, a slightly more complex 1063: * front end to avoid allocation problems when running on attribute values 1064: * coming from the input. 1065: * 1066: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1067: * is needed. 1068: */ 1069: static const xmlChar * 1070: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1071: { 1072: int i; 1073: int remove_head = 0; 1074: int need_realloc = 0; 1075: const xmlChar *cur; 1076: 1077: if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1078: return(NULL); 1079: i = *len; 1080: if (i <= 0) 1081: return(NULL); 1082: 1083: cur = src; 1084: while (*cur == 0x20) { 1085: cur++; 1086: remove_head++; 1087: } 1088: while (*cur != 0) { 1089: if (*cur == 0x20) { 1090: cur++; 1091: if ((*cur == 0x20) || (*cur == 0)) { 1092: need_realloc = 1; 1093: break; 1094: } 1095: } else 1096: cur++; 1097: } 1098: if (need_realloc) { 1099: xmlChar *ret; 1100: 1101: ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1102: if (ret == NULL) { 1103: xmlErrMemory(ctxt, NULL); 1104: return(NULL); 1105: } 1106: xmlAttrNormalizeSpace(ret, ret); 1107: *len = (int) strlen((const char *)ret); 1108: return(ret); 1109: } else if (remove_head) { 1110: *len -= remove_head; 1111: memmove(src, src + remove_head, 1 + *len); 1112: return(src); 1113: } 1114: return(NULL); 1115: } 1116: 1117: /** 1118: * xmlAddDefAttrs: 1119: * @ctxt: an XML parser context 1120: * @fullname: the element fullname 1121: * @fullattr: the attribute fullname 1122: * @value: the attribute value 1123: * 1124: * Add a defaulted attribute for an element 1125: */ 1126: static void 1127: xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1128: const xmlChar *fullname, 1129: const xmlChar *fullattr, 1130: const xmlChar *value) { 1131: xmlDefAttrsPtr defaults; 1132: int len; 1133: const xmlChar *name; 1134: const xmlChar *prefix; 1135: 1136: /* 1137: * Allows to detect attribute redefinitions 1138: */ 1139: if (ctxt->attsSpecial != NULL) { 1140: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1141: return; 1142: } 1143: 1144: if (ctxt->attsDefault == NULL) { 1145: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1146: if (ctxt->attsDefault == NULL) 1147: goto mem_error; 1148: } 1149: 1150: /* 1151: * split the element name into prefix:localname , the string found 1152: * are within the DTD and then not associated to namespace names. 1153: */ 1154: name = xmlSplitQName3(fullname, &len); 1155: if (name == NULL) { 1156: name = xmlDictLookup(ctxt->dict, fullname, -1); 1157: prefix = NULL; 1158: } else { 1159: name = xmlDictLookup(ctxt->dict, name, -1); 1160: prefix = xmlDictLookup(ctxt->dict, fullname, len); 1161: } 1162: 1163: /* 1164: * make sure there is some storage 1165: */ 1166: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1167: if (defaults == NULL) { 1168: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1169: (4 * 5) * sizeof(const xmlChar *)); 1170: if (defaults == NULL) 1171: goto mem_error; 1172: defaults->nbAttrs = 0; 1173: defaults->maxAttrs = 4; 1174: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1175: defaults, NULL) < 0) { 1176: xmlFree(defaults); 1177: goto mem_error; 1178: } 1179: } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1180: xmlDefAttrsPtr temp; 1181: 1182: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1183: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1184: if (temp == NULL) 1185: goto mem_error; 1186: defaults = temp; 1187: defaults->maxAttrs *= 2; 1188: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1189: defaults, NULL) < 0) { 1190: xmlFree(defaults); 1191: goto mem_error; 1192: } 1193: } 1194: 1195: /* 1196: * Split the element name into prefix:localname , the string found 1197: * are within the DTD and hen not associated to namespace names. 1198: */ 1199: name = xmlSplitQName3(fullattr, &len); 1200: if (name == NULL) { 1201: name = xmlDictLookup(ctxt->dict, fullattr, -1); 1202: prefix = NULL; 1203: } else { 1204: name = xmlDictLookup(ctxt->dict, name, -1); 1205: prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1206: } 1207: 1208: defaults->values[5 * defaults->nbAttrs] = name; 1209: defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1210: /* intern the string and precompute the end */ 1211: len = xmlStrlen(value); 1212: value = xmlDictLookup(ctxt->dict, value, len); 1213: defaults->values[5 * defaults->nbAttrs + 2] = value; 1214: defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1215: if (ctxt->external) 1216: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1217: else 1218: defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1219: defaults->nbAttrs++; 1220: 1221: return; 1222: 1223: mem_error: 1224: xmlErrMemory(ctxt, NULL); 1225: return; 1226: } 1227: 1228: /** 1229: * xmlAddSpecialAttr: 1230: * @ctxt: an XML parser context 1231: * @fullname: the element fullname 1232: * @fullattr: the attribute fullname 1233: * @type: the attribute type 1234: * 1235: * Register this attribute type 1236: */ 1237: static void 1238: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1239: const xmlChar *fullname, 1240: const xmlChar *fullattr, 1241: int type) 1242: { 1243: if (ctxt->attsSpecial == NULL) { 1244: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1245: if (ctxt->attsSpecial == NULL) 1246: goto mem_error; 1247: } 1248: 1249: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1250: return; 1251: 1252: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1253: (void *) (long) type); 1254: return; 1255: 1256: mem_error: 1257: xmlErrMemory(ctxt, NULL); 1258: return; 1259: } 1260: 1261: /** 1262: * xmlCleanSpecialAttrCallback: 1263: * 1264: * Removes CDATA attributes from the special attribute table 1265: */ 1266: static void 1267: xmlCleanSpecialAttrCallback(void *payload, void *data, 1268: const xmlChar *fullname, const xmlChar *fullattr, 1269: const xmlChar *unused ATTRIBUTE_UNUSED) { 1270: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1271: 1272: if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1273: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1274: } 1275: } 1276: 1277: /** 1278: * xmlCleanSpecialAttr: 1279: * @ctxt: an XML parser context 1280: * 1281: * Trim the list of attributes defined to remove all those of type 1282: * CDATA as they are not special. This call should be done when finishing 1283: * to parse the DTD and before starting to parse the document root. 1284: */ 1285: static void 1286: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1287: { 1288: if (ctxt->attsSpecial == NULL) 1289: return; 1290: 1291: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1292: 1293: if (xmlHashSize(ctxt->attsSpecial) == 0) { 1294: xmlHashFree(ctxt->attsSpecial, NULL); 1295: ctxt->attsSpecial = NULL; 1296: } 1297: return; 1298: } 1299: 1300: /** 1301: * xmlCheckLanguageID: 1302: * @lang: pointer to the string value 1303: * 1304: * Checks that the value conforms to the LanguageID production: 1305: * 1306: * NOTE: this is somewhat deprecated, those productions were removed from 1307: * the XML Second edition. 1308: * 1309: * [33] LanguageID ::= Langcode ('-' Subcode)* 1310: * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1311: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1312: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1313: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1314: * [38] Subcode ::= ([a-z] | [A-Z])+ 1315: * 1316: * The current REC reference the sucessors of RFC 1766, currently 5646 1317: * 1318: * http://www.rfc-editor.org/rfc/rfc5646.txt 1319: * langtag = language 1320: * ["-" script] 1321: * ["-" region] 1322: * *("-" variant) 1323: * *("-" extension) 1324: * ["-" privateuse] 1325: * language = 2*3ALPHA ; shortest ISO 639 code 1326: * ["-" extlang] ; sometimes followed by 1327: * ; extended language subtags 1328: * / 4ALPHA ; or reserved for future use 1329: * / 5*8ALPHA ; or registered language subtag 1330: * 1331: * extlang = 3ALPHA ; selected ISO 639 codes 1332: * *2("-" 3ALPHA) ; permanently reserved 1333: * 1334: * script = 4ALPHA ; ISO 15924 code 1335: * 1336: * region = 2ALPHA ; ISO 3166-1 code 1337: * / 3DIGIT ; UN M.49 code 1338: * 1339: * variant = 5*8alphanum ; registered variants 1340: * / (DIGIT 3alphanum) 1341: * 1342: * extension = singleton 1*("-" (2*8alphanum)) 1343: * 1344: * ; Single alphanumerics 1345: * ; "x" reserved for private use 1346: * singleton = DIGIT ; 0 - 9 1347: * / %x41-57 ; A - W 1348: * / %x59-5A ; Y - Z 1349: * / %x61-77 ; a - w 1350: * / %x79-7A ; y - z 1351: * 1352: * it sounds right to still allow Irregular i-xxx IANA and user codes too 1353: * The parser below doesn't try to cope with extension or privateuse 1354: * that could be added but that's not interoperable anyway 1355: * 1356: * Returns 1 if correct 0 otherwise 1357: **/ 1358: int 1359: xmlCheckLanguageID(const xmlChar * lang) 1360: { 1361: const xmlChar *cur = lang, *nxt; 1362: 1363: if (cur == NULL) 1364: return (0); 1365: if (((cur[0] == 'i') && (cur[1] == '-')) || 1366: ((cur[0] == 'I') && (cur[1] == '-')) || 1367: ((cur[0] == 'x') && (cur[1] == '-')) || 1368: ((cur[0] == 'X') && (cur[1] == '-'))) { 1369: /* 1370: * Still allow IANA code and user code which were coming 1371: * from the previous version of the XML-1.0 specification 1372: * it's deprecated but we should not fail 1373: */ 1374: cur += 2; 1375: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1376: ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1377: cur++; 1378: return(cur[0] == 0); 1379: } 1380: nxt = cur; 1381: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1382: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1383: nxt++; 1384: if (nxt - cur >= 4) { 1385: /* 1386: * Reserved 1387: */ 1388: if ((nxt - cur > 8) || (nxt[0] != 0)) 1389: return(0); 1390: return(1); 1391: } 1392: if (nxt - cur < 2) 1393: return(0); 1394: /* we got an ISO 639 code */ 1395: if (nxt[0] == 0) 1396: return(1); 1397: if (nxt[0] != '-') 1398: return(0); 1399: 1400: nxt++; 1401: cur = nxt; 1402: /* now we can have extlang or script or region or variant */ 1403: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1404: goto region_m49; 1405: 1406: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1407: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1408: nxt++; 1409: if (nxt - cur == 4) 1410: goto script; 1411: if (nxt - cur == 2) 1412: goto region; 1413: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1414: goto variant; 1415: if (nxt - cur != 3) 1416: return(0); 1417: /* we parsed an extlang */ 1418: if (nxt[0] == 0) 1419: return(1); 1420: if (nxt[0] != '-') 1421: return(0); 1422: 1423: nxt++; 1424: cur = nxt; 1425: /* now we can have script or region or variant */ 1426: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1427: goto region_m49; 1428: 1429: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1430: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1431: nxt++; 1432: if (nxt - cur == 2) 1433: goto region; 1434: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1435: goto variant; 1436: if (nxt - cur != 4) 1437: return(0); 1438: /* we parsed a script */ 1439: script: 1440: if (nxt[0] == 0) 1441: return(1); 1442: if (nxt[0] != '-') 1443: return(0); 1444: 1445: nxt++; 1446: cur = nxt; 1447: /* now we can have region or variant */ 1448: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1449: goto region_m49; 1450: 1451: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1452: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1453: nxt++; 1454: 1455: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1456: goto variant; 1457: if (nxt - cur != 2) 1458: return(0); 1459: /* we parsed a region */ 1460: region: 1461: if (nxt[0] == 0) 1462: return(1); 1463: if (nxt[0] != '-') 1464: return(0); 1465: 1466: nxt++; 1467: cur = nxt; 1468: /* now we can just have a variant */ 1469: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1470: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1471: nxt++; 1472: 1473: if ((nxt - cur < 5) || (nxt - cur > 8)) 1474: return(0); 1475: 1476: /* we parsed a variant */ 1477: variant: 1478: if (nxt[0] == 0) 1479: return(1); 1480: if (nxt[0] != '-') 1481: return(0); 1482: /* extensions and private use subtags not checked */ 1483: return (1); 1484: 1485: region_m49: 1486: if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1487: ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1488: nxt += 3; 1489: goto region; 1490: } 1491: return(0); 1492: } 1493: 1494: /************************************************************************ 1495: * * 1496: * Parser stacks related functions and macros * 1497: * * 1498: ************************************************************************/ 1499: 1500: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1501: const xmlChar ** str); 1502: 1503: #ifdef SAX2 1504: /** 1505: * nsPush: 1506: * @ctxt: an XML parser context 1507: * @prefix: the namespace prefix or NULL 1508: * @URL: the namespace name 1509: * 1510: * Pushes a new parser namespace on top of the ns stack 1511: * 1512: * Returns -1 in case of error, -2 if the namespace should be discarded 1513: * and the index in the stack otherwise. 1514: */ 1515: static int 1516: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1517: { 1518: if (ctxt->options & XML_PARSE_NSCLEAN) { 1519: int i; 1520: for (i = 0;i < ctxt->nsNr;i += 2) { 1521: if (ctxt->nsTab[i] == prefix) { 1522: /* in scope */ 1523: if (ctxt->nsTab[i + 1] == URL) 1524: return(-2); 1525: /* out of scope keep it */ 1526: break; 1527: } 1528: } 1529: } 1530: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1531: ctxt->nsMax = 10; 1532: ctxt->nsNr = 0; 1533: ctxt->nsTab = (const xmlChar **) 1534: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1535: if (ctxt->nsTab == NULL) { 1536: xmlErrMemory(ctxt, NULL); 1537: ctxt->nsMax = 0; 1538: return (-1); 1539: } 1540: } else if (ctxt->nsNr >= ctxt->nsMax) { 1541: const xmlChar ** tmp; 1542: ctxt->nsMax *= 2; 1543: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1544: ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1545: if (tmp == NULL) { 1546: xmlErrMemory(ctxt, NULL); 1547: ctxt->nsMax /= 2; 1548: return (-1); 1549: } 1550: ctxt->nsTab = tmp; 1551: } 1552: ctxt->nsTab[ctxt->nsNr++] = prefix; 1553: ctxt->nsTab[ctxt->nsNr++] = URL; 1554: return (ctxt->nsNr); 1555: } 1556: /** 1557: * nsPop: 1558: * @ctxt: an XML parser context 1559: * @nr: the number to pop 1560: * 1561: * Pops the top @nr parser prefix/namespace from the ns stack 1562: * 1563: * Returns the number of namespaces removed 1564: */ 1565: static int 1566: nsPop(xmlParserCtxtPtr ctxt, int nr) 1567: { 1568: int i; 1569: 1570: if (ctxt->nsTab == NULL) return(0); 1571: if (ctxt->nsNr < nr) { 1572: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1573: nr = ctxt->nsNr; 1574: } 1575: if (ctxt->nsNr <= 0) 1576: return (0); 1577: 1578: for (i = 0;i < nr;i++) { 1579: ctxt->nsNr--; 1580: ctxt->nsTab[ctxt->nsNr] = NULL; 1581: } 1582: return(nr); 1583: } 1584: #endif 1585: 1586: static int 1587: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1588: const xmlChar **atts; 1589: int *attallocs; 1590: int maxatts; 1591: 1592: if (ctxt->atts == NULL) { 1593: maxatts = 55; /* allow for 10 attrs by default */ 1594: atts = (const xmlChar **) 1595: xmlMalloc(maxatts * sizeof(xmlChar *)); 1596: if (atts == NULL) goto mem_error; 1597: ctxt->atts = atts; 1598: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1599: if (attallocs == NULL) goto mem_error; 1600: ctxt->attallocs = attallocs; 1601: ctxt->maxatts = maxatts; 1602: } else if (nr + 5 > ctxt->maxatts) { 1603: maxatts = (nr + 5) * 2; 1604: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1605: maxatts * sizeof(const xmlChar *)); 1606: if (atts == NULL) goto mem_error; 1607: ctxt->atts = atts; 1608: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1609: (maxatts / 5) * sizeof(int)); 1610: if (attallocs == NULL) goto mem_error; 1611: ctxt->attallocs = attallocs; 1612: ctxt->maxatts = maxatts; 1613: } 1614: return(ctxt->maxatts); 1615: mem_error: 1616: xmlErrMemory(ctxt, NULL); 1617: return(-1); 1618: } 1619: 1620: /** 1621: * inputPush: 1622: * @ctxt: an XML parser context 1623: * @value: the parser input 1624: * 1625: * Pushes a new parser input on top of the input stack 1626: * 1627: * Returns -1 in case of error, the index in the stack otherwise 1628: */ 1629: int 1630: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1631: { 1632: if ((ctxt == NULL) || (value == NULL)) 1633: return(-1); 1634: if (ctxt->inputNr >= ctxt->inputMax) { 1635: ctxt->inputMax *= 2; 1636: ctxt->inputTab = 1637: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1638: ctxt->inputMax * 1639: sizeof(ctxt->inputTab[0])); 1640: if (ctxt->inputTab == NULL) { 1641: xmlErrMemory(ctxt, NULL); 1642: xmlFreeInputStream(value); 1643: ctxt->inputMax /= 2; 1644: value = NULL; 1645: return (-1); 1646: } 1647: } 1648: ctxt->inputTab[ctxt->inputNr] = value; 1649: ctxt->input = value; 1650: return (ctxt->inputNr++); 1651: } 1652: /** 1653: * inputPop: 1654: * @ctxt: an XML parser context 1655: * 1656: * Pops the top parser input from the input stack 1657: * 1658: * Returns the input just removed 1659: */ 1660: xmlParserInputPtr 1661: inputPop(xmlParserCtxtPtr ctxt) 1662: { 1663: xmlParserInputPtr ret; 1664: 1665: if (ctxt == NULL) 1666: return(NULL); 1667: if (ctxt->inputNr <= 0) 1668: return (NULL); 1669: ctxt->inputNr--; 1670: if (ctxt->inputNr > 0) 1671: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1672: else 1673: ctxt->input = NULL; 1674: ret = ctxt->inputTab[ctxt->inputNr]; 1675: ctxt->inputTab[ctxt->inputNr] = NULL; 1676: return (ret); 1677: } 1678: /** 1679: * nodePush: 1680: * @ctxt: an XML parser context 1681: * @value: the element node 1682: * 1683: * Pushes a new element node on top of the node stack 1684: * 1685: * Returns -1 in case of error, the index in the stack otherwise 1686: */ 1687: int 1688: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1689: { 1690: if (ctxt == NULL) return(0); 1691: if (ctxt->nodeNr >= ctxt->nodeMax) { 1692: xmlNodePtr *tmp; 1693: 1694: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1695: ctxt->nodeMax * 2 * 1696: sizeof(ctxt->nodeTab[0])); 1697: if (tmp == NULL) { 1698: xmlErrMemory(ctxt, NULL); 1699: return (-1); 1700: } 1701: ctxt->nodeTab = tmp; 1702: ctxt->nodeMax *= 2; 1703: } 1704: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1705: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1706: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1707: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1708: xmlParserMaxDepth); 1709: ctxt->instate = XML_PARSER_EOF; 1710: return(-1); 1711: } 1712: ctxt->nodeTab[ctxt->nodeNr] = value; 1713: ctxt->node = value; 1714: return (ctxt->nodeNr++); 1715: } 1716: 1717: /** 1718: * nodePop: 1719: * @ctxt: an XML parser context 1720: * 1721: * Pops the top element node from the node stack 1722: * 1723: * Returns the node just removed 1724: */ 1725: xmlNodePtr 1726: nodePop(xmlParserCtxtPtr ctxt) 1727: { 1728: xmlNodePtr ret; 1729: 1730: if (ctxt == NULL) return(NULL); 1731: if (ctxt->nodeNr <= 0) 1732: return (NULL); 1733: ctxt->nodeNr--; 1734: if (ctxt->nodeNr > 0) 1735: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1736: else 1737: ctxt->node = NULL; 1738: ret = ctxt->nodeTab[ctxt->nodeNr]; 1739: ctxt->nodeTab[ctxt->nodeNr] = NULL; 1740: return (ret); 1741: } 1742: 1743: #ifdef LIBXML_PUSH_ENABLED 1744: /** 1745: * nameNsPush: 1746: * @ctxt: an XML parser context 1747: * @value: the element name 1748: * @prefix: the element prefix 1749: * @URI: the element namespace name 1750: * 1751: * Pushes a new element name/prefix/URL on top of the name stack 1752: * 1753: * Returns -1 in case of error, the index in the stack otherwise 1754: */ 1755: static int 1756: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1757: const xmlChar *prefix, const xmlChar *URI, int nsNr) 1758: { 1759: if (ctxt->nameNr >= ctxt->nameMax) { 1760: const xmlChar * *tmp; 1761: void **tmp2; 1762: ctxt->nameMax *= 2; 1763: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1764: ctxt->nameMax * 1765: sizeof(ctxt->nameTab[0])); 1766: if (tmp == NULL) { 1767: ctxt->nameMax /= 2; 1768: goto mem_error; 1769: } 1770: ctxt->nameTab = tmp; 1771: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1772: ctxt->nameMax * 3 * 1773: sizeof(ctxt->pushTab[0])); 1774: if (tmp2 == NULL) { 1775: ctxt->nameMax /= 2; 1776: goto mem_error; 1777: } 1778: ctxt->pushTab = tmp2; 1779: } 1780: ctxt->nameTab[ctxt->nameNr] = value; 1781: ctxt->name = value; 1782: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1783: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1784: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1785: return (ctxt->nameNr++); 1786: mem_error: 1787: xmlErrMemory(ctxt, NULL); 1788: return (-1); 1789: } 1790: /** 1791: * nameNsPop: 1792: * @ctxt: an XML parser context 1793: * 1794: * Pops the top element/prefix/URI name from the name stack 1795: * 1796: * Returns the name just removed 1797: */ 1798: static const xmlChar * 1799: nameNsPop(xmlParserCtxtPtr ctxt) 1800: { 1801: const xmlChar *ret; 1802: 1803: if (ctxt->nameNr <= 0) 1804: return (NULL); 1805: ctxt->nameNr--; 1806: if (ctxt->nameNr > 0) 1807: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1808: else 1809: ctxt->name = NULL; 1810: ret = ctxt->nameTab[ctxt->nameNr]; 1811: ctxt->nameTab[ctxt->nameNr] = NULL; 1812: return (ret); 1813: } 1814: #endif /* LIBXML_PUSH_ENABLED */ 1815: 1816: /** 1817: * namePush: 1818: * @ctxt: an XML parser context 1819: * @value: the element name 1820: * 1821: * Pushes a new element name on top of the name stack 1822: * 1823: * Returns -1 in case of error, the index in the stack otherwise 1824: */ 1825: int 1826: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1827: { 1828: if (ctxt == NULL) return (-1); 1829: 1830: if (ctxt->nameNr >= ctxt->nameMax) { 1831: const xmlChar * *tmp; 1832: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1833: ctxt->nameMax * 2 * 1834: sizeof(ctxt->nameTab[0])); 1835: if (tmp == NULL) { 1836: goto mem_error; 1837: } 1838: ctxt->nameTab = tmp; 1839: ctxt->nameMax *= 2; 1840: } 1841: ctxt->nameTab[ctxt->nameNr] = value; 1842: ctxt->name = value; 1843: return (ctxt->nameNr++); 1844: mem_error: 1845: xmlErrMemory(ctxt, NULL); 1846: return (-1); 1847: } 1848: /** 1849: * namePop: 1850: * @ctxt: an XML parser context 1851: * 1852: * Pops the top element name from the name stack 1853: * 1854: * Returns the name just removed 1855: */ 1856: const xmlChar * 1857: namePop(xmlParserCtxtPtr ctxt) 1858: { 1859: const xmlChar *ret; 1860: 1861: if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1862: return (NULL); 1863: ctxt->nameNr--; 1864: if (ctxt->nameNr > 0) 1865: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1866: else 1867: ctxt->name = NULL; 1868: ret = ctxt->nameTab[ctxt->nameNr]; 1869: ctxt->nameTab[ctxt->nameNr] = NULL; 1870: return (ret); 1871: } 1872: 1873: static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1874: if (ctxt->spaceNr >= ctxt->spaceMax) { 1875: int *tmp; 1876: 1877: ctxt->spaceMax *= 2; 1878: tmp = (int *) xmlRealloc(ctxt->spaceTab, 1879: ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1880: if (tmp == NULL) { 1881: xmlErrMemory(ctxt, NULL); 1882: ctxt->spaceMax /=2; 1883: return(-1); 1884: } 1885: ctxt->spaceTab = tmp; 1886: } 1887: ctxt->spaceTab[ctxt->spaceNr] = val; 1888: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1889: return(ctxt->spaceNr++); 1890: } 1891: 1892: static int spacePop(xmlParserCtxtPtr ctxt) { 1893: int ret; 1894: if (ctxt->spaceNr <= 0) return(0); 1895: ctxt->spaceNr--; 1896: if (ctxt->spaceNr > 0) 1897: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1898: else 1899: ctxt->space = &ctxt->spaceTab[0]; 1900: ret = ctxt->spaceTab[ctxt->spaceNr]; 1901: ctxt->spaceTab[ctxt->spaceNr] = -1; 1902: return(ret); 1903: } 1904: 1905: /* 1906: * Macros for accessing the content. Those should be used only by the parser, 1907: * and not exported. 1908: * 1909: * Dirty macros, i.e. one often need to make assumption on the context to 1910: * use them 1911: * 1912: * CUR_PTR return the current pointer to the xmlChar to be parsed. 1913: * To be used with extreme caution since operations consuming 1914: * characters may move the input buffer to a different location ! 1915: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1916: * This should be used internally by the parser 1917: * only to compare to ASCII values otherwise it would break when 1918: * running with UTF-8 encoding. 1919: * RAW same as CUR but in the input buffer, bypass any token 1920: * extraction that may have been done 1921: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1922: * to compare on ASCII based substring. 1923: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1924: * strings without newlines within the parser. 1925: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1926: * defined char within the parser. 1927: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1928: * 1929: * NEXT Skip to the next character, this does the proper decoding 1930: * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1931: * NEXTL(l) Skip the current unicode character of l xmlChars long. 1932: * CUR_CHAR(l) returns the current unicode character (int), set l 1933: * to the number of xmlChars used for the encoding [0-5]. 1934: * CUR_SCHAR same but operate on a string instead of the context 1935: * COPY_BUF copy the current unicode char to the target buffer, increment 1936: * the index 1937: * GROW, SHRINK handling of input buffers 1938: */ 1939: 1940: #define RAW (*ctxt->input->cur) 1941: #define CUR (*ctxt->input->cur) 1942: #define NXT(val) ctxt->input->cur[(val)] 1943: #define CUR_PTR ctxt->input->cur 1944: 1945: #define CMP4( s, c1, c2, c3, c4 ) \ 1946: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1947: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1948: #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1949: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1950: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1951: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1952: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1953: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1954: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1955: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1956: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1957: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1958: ((unsigned char *) s)[ 8 ] == c9 ) 1959: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1960: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1961: ((unsigned char *) s)[ 9 ] == c10 ) 1962: 1963: #define SKIP(val) do { \ 1964: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1965: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1966: if ((*ctxt->input->cur == 0) && \ 1967: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1968: xmlPopInput(ctxt); \ 1969: } while (0) 1970: 1971: #define SKIPL(val) do { \ 1972: int skipl; \ 1973: for(skipl=0; skipl<val; skipl++) { \ 1974: if (*(ctxt->input->cur) == '\n') { \ 1975: ctxt->input->line++; ctxt->input->col = 1; \ 1976: } else ctxt->input->col++; \ 1977: ctxt->nbChars++; \ 1978: ctxt->input->cur++; \ 1979: } \ 1980: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1981: if ((*ctxt->input->cur == 0) && \ 1982: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1983: xmlPopInput(ctxt); \ 1984: } while (0) 1985: 1986: #define SHRINK if ((ctxt->progressive == 0) && \ 1987: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1988: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1989: xmlSHRINK (ctxt); 1990: 1991: static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1992: xmlParserInputShrink(ctxt->input); 1993: if ((*ctxt->input->cur == 0) && 1994: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1995: xmlPopInput(ctxt); 1996: } 1997: 1998: #define GROW if ((ctxt->progressive == 0) && \ 1999: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2000: xmlGROW (ctxt); 2001: 2002: static void xmlGROW (xmlParserCtxtPtr ctxt) { 2003: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2004: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2005: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2006: xmlPopInput(ctxt); 2007: } 2008: 2009: #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2010: 2011: #define NEXT xmlNextChar(ctxt) 2012: 2013: #define NEXT1 { \ 2014: ctxt->input->col++; \ 2015: ctxt->input->cur++; \ 2016: ctxt->nbChars++; \ 2017: if (*ctxt->input->cur == 0) \ 2018: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2019: } 2020: 2021: #define NEXTL(l) do { \ 2022: if (*(ctxt->input->cur) == '\n') { \ 2023: ctxt->input->line++; ctxt->input->col = 1; \ 2024: } else ctxt->input->col++; \ 2025: ctxt->input->cur += l; \ 2026: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2027: } while (0) 2028: 2029: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2030: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2031: 2032: #define COPY_BUF(l,b,i,v) \ 2033: if (l == 1) b[i++] = (xmlChar) v; \ 2034: else i += xmlCopyCharMultiByte(&b[i],v) 2035: 2036: /** 2037: * xmlSkipBlankChars: 2038: * @ctxt: the XML parser context 2039: * 2040: * skip all blanks character found at that point in the input streams. 2041: * It pops up finished entities in the process if allowable at that point. 2042: * 2043: * Returns the number of space chars skipped 2044: */ 2045: 2046: int 2047: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2048: int res = 0; 2049: 2050: /* 2051: * It's Okay to use CUR/NEXT here since all the blanks are on 2052: * the ASCII range. 2053: */ 2054: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2055: const xmlChar *cur; 2056: /* 2057: * if we are in the document content, go really fast 2058: */ 2059: cur = ctxt->input->cur; 2060: while (IS_BLANK_CH(*cur)) { 2061: if (*cur == '\n') { 2062: ctxt->input->line++; ctxt->input->col = 1; 2063: } 2064: cur++; 2065: res++; 2066: if (*cur == 0) { 2067: ctxt->input->cur = cur; 2068: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2069: cur = ctxt->input->cur; 2070: } 2071: } 2072: ctxt->input->cur = cur; 2073: } else { 2074: int cur; 2075: do { 2076: cur = CUR; 2077: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2078: NEXT; 2079: cur = CUR; 2080: res++; 2081: } 2082: while ((cur == 0) && (ctxt->inputNr > 1) && 2083: (ctxt->instate != XML_PARSER_COMMENT)) { 2084: xmlPopInput(ctxt); 2085: cur = CUR; 2086: } 2087: /* 2088: * Need to handle support of entities branching here 2089: */ 2090: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2091: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2092: } 2093: return(res); 2094: } 2095: 2096: /************************************************************************ 2097: * * 2098: * Commodity functions to handle entities * 2099: * * 2100: ************************************************************************/ 2101: 2102: /** 2103: * xmlPopInput: 2104: * @ctxt: an XML parser context 2105: * 2106: * xmlPopInput: the current input pointed by ctxt->input came to an end 2107: * pop it and return the next char. 2108: * 2109: * Returns the current xmlChar in the parser context 2110: */ 2111: xmlChar 2112: xmlPopInput(xmlParserCtxtPtr ctxt) { 2113: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2114: if (xmlParserDebugEntities) 2115: xmlGenericError(xmlGenericErrorContext, 2116: "Popping input %d\n", ctxt->inputNr); 2117: xmlFreeInputStream(inputPop(ctxt)); 2118: if ((*ctxt->input->cur == 0) && 2119: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2120: return(xmlPopInput(ctxt)); 2121: return(CUR); 2122: } 2123: 2124: /** 2125: * xmlPushInput: 2126: * @ctxt: an XML parser context 2127: * @input: an XML parser input fragment (entity, XML fragment ...). 2128: * 2129: * xmlPushInput: switch to a new input stream which is stacked on top 2130: * of the previous one(s). 2131: * Returns -1 in case of error or the index in the input stack 2132: */ 2133: int 2134: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2135: int ret; 2136: if (input == NULL) return(-1); 2137: 2138: if (xmlParserDebugEntities) { 2139: if ((ctxt->input != NULL) && (ctxt->input->filename)) 2140: xmlGenericError(xmlGenericErrorContext, 2141: "%s(%d): ", ctxt->input->filename, 2142: ctxt->input->line); 2143: xmlGenericError(xmlGenericErrorContext, 2144: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2145: } 2146: ret = inputPush(ctxt, input); 2147: GROW; 2148: return(ret); 2149: } 2150: 2151: /** 2152: * xmlParseCharRef: 2153: * @ctxt: an XML parser context 2154: * 2155: * parse Reference declarations 2156: * 2157: * [66] CharRef ::= '&#' [0-9]+ ';' | 2158: * '&#x' [0-9a-fA-F]+ ';' 2159: * 2160: * [ WFC: Legal Character ] 2161: * Characters referred to using character references must match the 2162: * production for Char. 2163: * 2164: * Returns the value parsed (as an int), 0 in case of error 2165: */ 2166: int 2167: xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2168: unsigned int val = 0; 2169: int count = 0; 2170: unsigned int outofrange = 0; 2171: 2172: /* 2173: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2174: */ 2175: if ((RAW == '&') && (NXT(1) == '#') && 2176: (NXT(2) == 'x')) { 2177: SKIP(3); 2178: GROW; 2179: while (RAW != ';') { /* loop blocked by count */ 2180: if (count++ > 20) { 2181: count = 0; 2182: GROW; 2183: } 2184: if ((RAW >= '0') && (RAW <= '9')) 2185: val = val * 16 + (CUR - '0'); 2186: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2187: val = val * 16 + (CUR - 'a') + 10; 2188: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2189: val = val * 16 + (CUR - 'A') + 10; 2190: else { 2191: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2192: val = 0; 2193: break; 2194: } 2195: if (val > 0x10FFFF) 2196: outofrange = val; 2197: 2198: NEXT; 2199: count++; 2200: } 2201: if (RAW == ';') { 2202: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2203: ctxt->input->col++; 2204: ctxt->nbChars ++; 2205: ctxt->input->cur++; 2206: } 2207: } else if ((RAW == '&') && (NXT(1) == '#')) { 2208: SKIP(2); 2209: GROW; 2210: while (RAW != ';') { /* loop blocked by count */ 2211: if (count++ > 20) { 2212: count = 0; 2213: GROW; 2214: } 2215: if ((RAW >= '0') && (RAW <= '9')) 2216: val = val * 10 + (CUR - '0'); 2217: else { 2218: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2219: val = 0; 2220: break; 2221: } 2222: if (val > 0x10FFFF) 2223: outofrange = val; 2224: 2225: NEXT; 2226: count++; 2227: } 2228: if (RAW == ';') { 2229: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2230: ctxt->input->col++; 2231: ctxt->nbChars ++; 2232: ctxt->input->cur++; 2233: } 2234: } else { 2235: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2236: } 2237: 2238: /* 2239: * [ WFC: Legal Character ] 2240: * Characters referred to using character references must match the 2241: * production for Char. 2242: */ 2243: if ((IS_CHAR(val) && (outofrange == 0))) { 2244: return(val); 2245: } else { 2246: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2247: "xmlParseCharRef: invalid xmlChar value %d\n", 2248: val); 2249: } 2250: return(0); 2251: } 2252: 2253: /** 2254: * xmlParseStringCharRef: 2255: * @ctxt: an XML parser context 2256: * @str: a pointer to an index in the string 2257: * 2258: * parse Reference declarations, variant parsing from a string rather 2259: * than an an input flow. 2260: * 2261: * [66] CharRef ::= '&#' [0-9]+ ';' | 2262: * '&#x' [0-9a-fA-F]+ ';' 2263: * 2264: * [ WFC: Legal Character ] 2265: * Characters referred to using character references must match the 2266: * production for Char. 2267: * 2268: * Returns the value parsed (as an int), 0 in case of error, str will be 2269: * updated to the current value of the index 2270: */ 2271: static int 2272: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2273: const xmlChar *ptr; 2274: xmlChar cur; 2275: unsigned int val = 0; 2276: unsigned int outofrange = 0; 2277: 2278: if ((str == NULL) || (*str == NULL)) return(0); 2279: ptr = *str; 2280: cur = *ptr; 2281: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2282: ptr += 3; 2283: cur = *ptr; 2284: while (cur != ';') { /* Non input consuming loop */ 2285: if ((cur >= '0') && (cur <= '9')) 2286: val = val * 16 + (cur - '0'); 2287: else if ((cur >= 'a') && (cur <= 'f')) 2288: val = val * 16 + (cur - 'a') + 10; 2289: else if ((cur >= 'A') && (cur <= 'F')) 2290: val = val * 16 + (cur - 'A') + 10; 2291: else { 2292: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2293: val = 0; 2294: break; 2295: } 2296: if (val > 0x10FFFF) 2297: outofrange = val; 2298: 2299: ptr++; 2300: cur = *ptr; 2301: } 2302: if (cur == ';') 2303: ptr++; 2304: } else if ((cur == '&') && (ptr[1] == '#')){ 2305: ptr += 2; 2306: cur = *ptr; 2307: while (cur != ';') { /* Non input consuming loops */ 2308: if ((cur >= '0') && (cur <= '9')) 2309: val = val * 10 + (cur - '0'); 2310: else { 2311: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2312: val = 0; 2313: break; 2314: } 2315: if (val > 0x10FFFF) 2316: outofrange = val; 2317: 2318: ptr++; 2319: cur = *ptr; 2320: } 2321: if (cur == ';') 2322: ptr++; 2323: } else { 2324: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2325: return(0); 2326: } 2327: *str = ptr; 2328: 2329: /* 2330: * [ WFC: Legal Character ] 2331: * Characters referred to using character references must match the 2332: * production for Char. 2333: */ 2334: if ((IS_CHAR(val) && (outofrange == 0))) { 2335: return(val); 2336: } else { 2337: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2338: "xmlParseStringCharRef: invalid xmlChar value %d\n", 2339: val); 2340: } 2341: return(0); 2342: } 2343: 2344: /** 2345: * xmlNewBlanksWrapperInputStream: 2346: * @ctxt: an XML parser context 2347: * @entity: an Entity pointer 2348: * 2349: * Create a new input stream for wrapping 2350: * blanks around a PEReference 2351: * 2352: * Returns the new input stream or NULL 2353: */ 2354: 2355: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2356: 2357: static xmlParserInputPtr 2358: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2359: xmlParserInputPtr input; 2360: xmlChar *buffer; 2361: size_t length; 2362: if (entity == NULL) { 2363: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2364: "xmlNewBlanksWrapperInputStream entity\n"); 2365: return(NULL); 2366: } 2367: if (xmlParserDebugEntities) 2368: xmlGenericError(xmlGenericErrorContext, 2369: "new blanks wrapper for entity: %s\n", entity->name); 2370: input = xmlNewInputStream(ctxt); 2371: if (input == NULL) { 2372: return(NULL); 2373: } 2374: length = xmlStrlen(entity->name) + 5; 2375: buffer = xmlMallocAtomic(length); 2376: if (buffer == NULL) { 2377: xmlErrMemory(ctxt, NULL); 2378: xmlFree(input); 2379: return(NULL); 2380: } 2381: buffer [0] = ' '; 2382: buffer [1] = '%'; 2383: buffer [length-3] = ';'; 2384: buffer [length-2] = ' '; 2385: buffer [length-1] = 0; 2386: memcpy(buffer + 2, entity->name, length - 5); 2387: input->free = deallocblankswrapper; 2388: input->base = buffer; 2389: input->cur = buffer; 2390: input->length = length; 2391: input->end = &buffer[length]; 2392: return(input); 2393: } 2394: 2395: /** 2396: * xmlParserHandlePEReference: 2397: * @ctxt: the parser context 2398: * 2399: * [69] PEReference ::= '%' Name ';' 2400: * 2401: * [ WFC: No Recursion ] 2402: * A parsed entity must not contain a recursive 2403: * reference to itself, either directly or indirectly. 2404: * 2405: * [ WFC: Entity Declared ] 2406: * In a document without any DTD, a document with only an internal DTD 2407: * subset which contains no parameter entity references, or a document 2408: * with "standalone='yes'", ... ... The declaration of a parameter 2409: * entity must precede any reference to it... 2410: * 2411: * [ VC: Entity Declared ] 2412: * In a document with an external subset or external parameter entities 2413: * with "standalone='no'", ... ... The declaration of a parameter entity 2414: * must precede any reference to it... 2415: * 2416: * [ WFC: In DTD ] 2417: * Parameter-entity references may only appear in the DTD. 2418: * NOTE: misleading but this is handled. 2419: * 2420: * A PEReference may have been detected in the current input stream 2421: * the handling is done accordingly to 2422: * http://www.w3.org/TR/REC-xml#entproc 2423: * i.e. 2424: * - Included in literal in entity values 2425: * - Included as Parameter Entity reference within DTDs 2426: */ 2427: void 2428: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2429: const xmlChar *name; 2430: xmlEntityPtr entity = NULL; 2431: xmlParserInputPtr input; 2432: 2433: if (RAW != '%') return; 2434: switch(ctxt->instate) { 2435: case XML_PARSER_CDATA_SECTION: 2436: return; 2437: case XML_PARSER_COMMENT: 2438: return; 2439: case XML_PARSER_START_TAG: 2440: return; 2441: case XML_PARSER_END_TAG: 2442: return; 2443: case XML_PARSER_EOF: 2444: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2445: return; 2446: case XML_PARSER_PROLOG: 2447: case XML_PARSER_START: 2448: case XML_PARSER_MISC: 2449: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2450: return; 2451: case XML_PARSER_ENTITY_DECL: 2452: case XML_PARSER_CONTENT: 2453: case XML_PARSER_ATTRIBUTE_VALUE: 2454: case XML_PARSER_PI: 2455: case XML_PARSER_SYSTEM_LITERAL: 2456: case XML_PARSER_PUBLIC_LITERAL: 2457: /* we just ignore it there */ 2458: return; 2459: case XML_PARSER_EPILOG: 2460: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2461: return; 2462: case XML_PARSER_ENTITY_VALUE: 2463: /* 2464: * NOTE: in the case of entity values, we don't do the 2465: * substitution here since we need the literal 2466: * entity value to be able to save the internal 2467: * subset of the document. 2468: * This will be handled by xmlStringDecodeEntities 2469: */ 2470: return; 2471: case XML_PARSER_DTD: 2472: /* 2473: * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2474: * In the internal DTD subset, parameter-entity references 2475: * can occur only where markup declarations can occur, not 2476: * within markup declarations. 2477: * In that case this is handled in xmlParseMarkupDecl 2478: */ 2479: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2480: return; 2481: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2482: return; 2483: break; 2484: case XML_PARSER_IGNORE: 2485: return; 2486: } 2487: 2488: NEXT; 2489: name = xmlParseName(ctxt); 2490: if (xmlParserDebugEntities) 2491: xmlGenericError(xmlGenericErrorContext, 2492: "PEReference: %s\n", name); 2493: if (name == NULL) { 2494: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2495: } else { 2496: if (RAW == ';') { 2497: NEXT; 2498: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2499: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2500: if (entity == NULL) { 2501: 2502: /* 2503: * [ WFC: Entity Declared ] 2504: * In a document without any DTD, a document with only an 2505: * internal DTD subset which contains no parameter entity 2506: * references, or a document with "standalone='yes'", ... 2507: * ... The declaration of a parameter entity must precede 2508: * any reference to it... 2509: */ 2510: if ((ctxt->standalone == 1) || 2511: ((ctxt->hasExternalSubset == 0) && 2512: (ctxt->hasPErefs == 0))) { 2513: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2514: "PEReference: %%%s; not found\n", name); 2515: } else { 2516: /* 2517: * [ VC: Entity Declared ] 2518: * In a document with an external subset or external 2519: * parameter entities with "standalone='no'", ... 2520: * ... The declaration of a parameter entity must precede 2521: * any reference to it... 2522: */ 2523: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2524: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2525: "PEReference: %%%s; not found\n", 2526: name, NULL); 2527: } else 2528: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2529: "PEReference: %%%s; not found\n", 2530: name, NULL); 2531: ctxt->valid = 0; 2532: } 2533: } else if (ctxt->input->free != deallocblankswrapper) { 2534: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2535: if (xmlPushInput(ctxt, input) < 0) 2536: return; 2537: } else { 2538: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2539: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2540: xmlChar start[4]; 2541: xmlCharEncoding enc; 2542: 2543: /* 2544: * handle the extra spaces added before and after 2545: * c.f. http://www.w3.org/TR/REC-xml#as-PE 2546: * this is done independently. 2547: */ 2548: input = xmlNewEntityInputStream(ctxt, entity); 2549: if (xmlPushInput(ctxt, input) < 0) 2550: return; 2551: 2552: /* 2553: * Get the 4 first bytes and decode the charset 2554: * if enc != XML_CHAR_ENCODING_NONE 2555: * plug some encoding conversion routines. 2556: * Note that, since we may have some non-UTF8 2557: * encoding (like UTF16, bug 135229), the 'length' 2558: * is not known, but we can calculate based upon 2559: * the amount of data in the buffer. 2560: */ 2561: GROW 2562: if ((ctxt->input->end - ctxt->input->cur)>=4) { 2563: start[0] = RAW; 2564: start[1] = NXT(1); 2565: start[2] = NXT(2); 2566: start[3] = NXT(3); 2567: enc = xmlDetectCharEncoding(start, 4); 2568: if (enc != XML_CHAR_ENCODING_NONE) { 2569: xmlSwitchEncoding(ctxt, enc); 2570: } 2571: } 2572: 2573: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2574: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2575: (IS_BLANK_CH(NXT(5)))) { 2576: xmlParseTextDecl(ctxt); 2577: } 2578: } else { 2579: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2580: "PEReference: %s is not a parameter entity\n", 2581: name); 2582: } 2583: } 2584: } else { 2585: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2586: } 2587: } 2588: } 2589: 2590: /* 2591: * Macro used to grow the current buffer. 2592: */ 2593: #define growBuffer(buffer, n) { \ 2594: xmlChar *tmp; \ 2595: buffer##_size *= 2; \ 2596: buffer##_size += n; \ 2597: tmp = (xmlChar *) \ 2598: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2599: if (tmp == NULL) goto mem_error; \ 2600: buffer = tmp; \ 2601: } 2602: 2603: /** 2604: * xmlStringLenDecodeEntities: 2605: * @ctxt: the parser context 2606: * @str: the input string 2607: * @len: the string length 2608: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2609: * @end: an end marker xmlChar, 0 if none 2610: * @end2: an end marker xmlChar, 0 if none 2611: * @end3: an end marker xmlChar, 0 if none 2612: * 2613: * Takes a entity string content and process to do the adequate substitutions. 2614: * 2615: * [67] Reference ::= EntityRef | CharRef 2616: * 2617: * [69] PEReference ::= '%' Name ';' 2618: * 2619: * Returns A newly allocated string with the substitution done. The caller 2620: * must deallocate it ! 2621: */ 2622: xmlChar * 2623: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2624: int what, xmlChar end, xmlChar end2, xmlChar end3) { 2625: xmlChar *buffer = NULL; 2626: int buffer_size = 0; 2627: 2628: xmlChar *current = NULL; 2629: xmlChar *rep = NULL; 2630: const xmlChar *last; 2631: xmlEntityPtr ent; 2632: int c,l; 2633: int nbchars = 0; 2634: 2635: if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2636: return(NULL); 2637: last = str + len; 2638: 2639: if (((ctxt->depth > 40) && 2640: ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2641: (ctxt->depth > 1024)) { 2642: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2643: return(NULL); 2644: } 2645: 2646: /* 2647: * allocate a translation buffer. 2648: */ 2649: buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2650: buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2651: if (buffer == NULL) goto mem_error; 2652: 2653: /* 2654: * OK loop until we reach one of the ending char or a size limit. 2655: * we are operating on already parsed values. 2656: */ 2657: if (str < last) 2658: c = CUR_SCHAR(str, l); 2659: else 2660: c = 0; 2661: while ((c != 0) && (c != end) && /* non input consuming loop */ 2662: (c != end2) && (c != end3)) { 2663: 2664: if (c == 0) break; 2665: if ((c == '&') && (str[1] == '#')) { 2666: int val = xmlParseStringCharRef(ctxt, &str); 2667: if (val != 0) { 2668: COPY_BUF(0,buffer,nbchars,val); 2669: } 2670: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2671: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2672: } 2673: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2674: if (xmlParserDebugEntities) 2675: xmlGenericError(xmlGenericErrorContext, 2676: "String decoding Entity Reference: %.30s\n", 2677: str); 2678: ent = xmlParseStringEntityRef(ctxt, &str); 2679: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2680: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2681: goto int_error; 2682: if (ent != NULL) 2683: ctxt->nbentities += ent->checked; 2684: if ((ent != NULL) && 2685: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2686: if (ent->content != NULL) { 2687: COPY_BUF(0,buffer,nbchars,ent->content[0]); 2688: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2689: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2690: } 2691: } else { 2692: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2693: "predefined entity has no content\n"); 2694: } 2695: } else if ((ent != NULL) && (ent->content != NULL)) { 2696: ctxt->depth++; 2697: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2698: 0, 0, 0); 2699: ctxt->depth--; 2700: 2701: if (rep != NULL) { 2702: current = rep; 2703: while (*current != 0) { /* non input consuming loop */ 2704: buffer[nbchars++] = *current++; 2705: if (nbchars > 2706: buffer_size - XML_PARSER_BUFFER_SIZE) { 2707: if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2708: goto int_error; 2709: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2710: } 2711: } 2712: xmlFree(rep); 2713: rep = NULL; 2714: } 2715: } else if (ent != NULL) { 2716: int i = xmlStrlen(ent->name); 2717: const xmlChar *cur = ent->name; 2718: 2719: buffer[nbchars++] = '&'; 2720: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2721: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2722: } 2723: for (;i > 0;i--) 2724: buffer[nbchars++] = *cur++; 2725: buffer[nbchars++] = ';'; 2726: } 2727: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2728: if (xmlParserDebugEntities) 2729: xmlGenericError(xmlGenericErrorContext, 2730: "String decoding PE Reference: %.30s\n", str); 2731: ent = xmlParseStringPEReference(ctxt, &str); 2732: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2733: goto int_error; 2734: if (ent != NULL) 2735: ctxt->nbentities += ent->checked; 2736: if (ent != NULL) { 2737: if (ent->content == NULL) { 2738: xmlLoadEntityContent(ctxt, ent); 2739: } 2740: ctxt->depth++; 2741: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2742: 0, 0, 0); 2743: ctxt->depth--; 2744: if (rep != NULL) { 2745: current = rep; 2746: while (*current != 0) { /* non input consuming loop */ 2747: buffer[nbchars++] = *current++; 2748: if (nbchars > 2749: buffer_size - XML_PARSER_BUFFER_SIZE) { 2750: if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2751: goto int_error; 2752: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2753: } 2754: } 2755: xmlFree(rep); 2756: rep = NULL; 2757: } 2758: } 2759: } else { 2760: COPY_BUF(l,buffer,nbchars,c); 2761: str += l; 2762: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2763: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2764: } 2765: } 2766: if (str < last) 2767: c = CUR_SCHAR(str, l); 2768: else 2769: c = 0; 2770: } 2771: buffer[nbchars] = 0; 2772: return(buffer); 2773: 2774: mem_error: 2775: xmlErrMemory(ctxt, NULL); 2776: int_error: 2777: if (rep != NULL) 2778: xmlFree(rep); 2779: if (buffer != NULL) 2780: xmlFree(buffer); 2781: return(NULL); 2782: } 2783: 2784: /** 2785: * xmlStringDecodeEntities: 2786: * @ctxt: the parser context 2787: * @str: the input string 2788: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2789: * @end: an end marker xmlChar, 0 if none 2790: * @end2: an end marker xmlChar, 0 if none 2791: * @end3: an end marker xmlChar, 0 if none 2792: * 2793: * Takes a entity string content and process to do the adequate substitutions. 2794: * 2795: * [67] Reference ::= EntityRef | CharRef 2796: * 2797: * [69] PEReference ::= '%' Name ';' 2798: * 2799: * Returns A newly allocated string with the substitution done. The caller 2800: * must deallocate it ! 2801: */ 2802: xmlChar * 2803: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2804: xmlChar end, xmlChar end2, xmlChar end3) { 2805: if ((ctxt == NULL) || (str == NULL)) return(NULL); 2806: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2807: end, end2, end3)); 2808: } 2809: 2810: /************************************************************************ 2811: * * 2812: * Commodity functions, cleanup needed ? * 2813: * * 2814: ************************************************************************/ 2815: 2816: /** 2817: * areBlanks: 2818: * @ctxt: an XML parser context 2819: * @str: a xmlChar * 2820: * @len: the size of @str 2821: * @blank_chars: we know the chars are blanks 2822: * 2823: * Is this a sequence of blank chars that one can ignore ? 2824: * 2825: * Returns 1 if ignorable 0 otherwise. 2826: */ 2827: 2828: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2829: int blank_chars) { 2830: int i, ret; 2831: xmlNodePtr lastChild; 2832: 2833: /* 2834: * Don't spend time trying to differentiate them, the same callback is 2835: * used ! 2836: */ 2837: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2838: return(0); 2839: 2840: /* 2841: * Check for xml:space value. 2842: */ 2843: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2844: (*(ctxt->space) == -2)) 2845: return(0); 2846: 2847: /* 2848: * Check that the string is made of blanks 2849: */ 2850: if (blank_chars == 0) { 2851: for (i = 0;i < len;i++) 2852: if (!(IS_BLANK_CH(str[i]))) return(0); 2853: } 2854: 2855: /* 2856: * Look if the element is mixed content in the DTD if available 2857: */ 2858: if (ctxt->node == NULL) return(0); 2859: if (ctxt->myDoc != NULL) { 2860: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2861: if (ret == 0) return(1); 2862: if (ret == 1) return(0); 2863: } 2864: 2865: /* 2866: * Otherwise, heuristic :-\ 2867: */ 2868: if ((RAW != '<') && (RAW != 0xD)) return(0); 2869: if ((ctxt->node->children == NULL) && 2870: (RAW == '<') && (NXT(1) == '/')) return(0); 2871: 2872: lastChild = xmlGetLastChild(ctxt->node); 2873: if (lastChild == NULL) { 2874: if ((ctxt->node->type != XML_ELEMENT_NODE) && 2875: (ctxt->node->content != NULL)) return(0); 2876: } else if (xmlNodeIsText(lastChild)) 2877: return(0); 2878: else if ((ctxt->node->children != NULL) && 2879: (xmlNodeIsText(ctxt->node->children))) 2880: return(0); 2881: return(1); 2882: } 2883: 2884: /************************************************************************ 2885: * * 2886: * Extra stuff for namespace support * 2887: * Relates to http://www.w3.org/TR/WD-xml-names * 2888: * * 2889: ************************************************************************/ 2890: 2891: /** 2892: * xmlSplitQName: 2893: * @ctxt: an XML parser context 2894: * @name: an XML parser context 2895: * @prefix: a xmlChar ** 2896: * 2897: * parse an UTF8 encoded XML qualified name string 2898: * 2899: * [NS 5] QName ::= (Prefix ':')? LocalPart 2900: * 2901: * [NS 6] Prefix ::= NCName 2902: * 2903: * [NS 7] LocalPart ::= NCName 2904: * 2905: * Returns the local part, and prefix is updated 2906: * to get the Prefix if any. 2907: */ 2908: 2909: xmlChar * 2910: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2911: xmlChar buf[XML_MAX_NAMELEN + 5]; 2912: xmlChar *buffer = NULL; 2913: int len = 0; 2914: int max = XML_MAX_NAMELEN; 2915: xmlChar *ret = NULL; 2916: const xmlChar *cur = name; 2917: int c; 2918: 2919: if (prefix == NULL) return(NULL); 2920: *prefix = NULL; 2921: 2922: if (cur == NULL) return(NULL); 2923: 2924: #ifndef XML_XML_NAMESPACE 2925: /* xml: prefix is not really a namespace */ 2926: if ((cur[0] == 'x') && (cur[1] == 'm') && 2927: (cur[2] == 'l') && (cur[3] == ':')) 2928: return(xmlStrdup(name)); 2929: #endif 2930: 2931: /* nasty but well=formed */ 2932: if (cur[0] == ':') 2933: return(xmlStrdup(name)); 2934: 2935: c = *cur++; 2936: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2937: buf[len++] = c; 2938: c = *cur++; 2939: } 2940: if (len >= max) { 2941: /* 2942: * Okay someone managed to make a huge name, so he's ready to pay 2943: * for the processing speed. 2944: */ 2945: max = len * 2; 2946: 2947: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2948: if (buffer == NULL) { 2949: xmlErrMemory(ctxt, NULL); 2950: return(NULL); 2951: } 2952: memcpy(buffer, buf, len); 2953: while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2954: if (len + 10 > max) { 2955: xmlChar *tmp; 2956: 2957: max *= 2; 2958: tmp = (xmlChar *) xmlRealloc(buffer, 2959: max * sizeof(xmlChar)); 2960: if (tmp == NULL) { 2961: xmlFree(buffer); 2962: xmlErrMemory(ctxt, NULL); 2963: return(NULL); 2964: } 2965: buffer = tmp; 2966: } 2967: buffer[len++] = c; 2968: c = *cur++; 2969: } 2970: buffer[len] = 0; 2971: } 2972: 2973: if ((c == ':') && (*cur == 0)) { 2974: if (buffer != NULL) 2975: xmlFree(buffer); 2976: *prefix = NULL; 2977: return(xmlStrdup(name)); 2978: } 2979: 2980: if (buffer == NULL) 2981: ret = xmlStrndup(buf, len); 2982: else { 2983: ret = buffer; 2984: buffer = NULL; 2985: max = XML_MAX_NAMELEN; 2986: } 2987: 2988: 2989: if (c == ':') { 2990: c = *cur; 2991: *prefix = ret; 2992: if (c == 0) { 2993: return(xmlStrndup(BAD_CAST "", 0)); 2994: } 2995: len = 0; 2996: 2997: /* 2998: * Check that the first character is proper to start 2999: * a new name 3000: */ 3001: if (!(((c >= 0x61) && (c <= 0x7A)) || 3002: ((c >= 0x41) && (c <= 0x5A)) || 3003: (c == '_') || (c == ':'))) { 3004: int l; 3005: int first = CUR_SCHAR(cur, l); 3006: 3007: if (!IS_LETTER(first) && (first != '_')) { 3008: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3009: "Name %s is not XML Namespace compliant\n", 3010: name); 3011: } 3012: } 3013: cur++; 3014: 3015: while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3016: buf[len++] = c; 3017: c = *cur++; 3018: } 3019: if (len >= max) { 3020: /* 3021: * Okay someone managed to make a huge name, so he's ready to pay 3022: * for the processing speed. 3023: */ 3024: max = len * 2; 3025: 3026: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3027: if (buffer == NULL) { 3028: xmlErrMemory(ctxt, NULL); 3029: return(NULL); 3030: } 3031: memcpy(buffer, buf, len); 3032: while (c != 0) { /* tested bigname2.xml */ 3033: if (len + 10 > max) { 3034: xmlChar *tmp; 3035: 3036: max *= 2; 3037: tmp = (xmlChar *) xmlRealloc(buffer, 3038: max * sizeof(xmlChar)); 3039: if (tmp == NULL) { 3040: xmlErrMemory(ctxt, NULL); 3041: xmlFree(buffer); 3042: return(NULL); 3043: } 3044: buffer = tmp; 3045: } 3046: buffer[len++] = c; 3047: c = *cur++; 3048: } 3049: buffer[len] = 0; 3050: } 3051: 3052: if (buffer == NULL) 3053: ret = xmlStrndup(buf, len); 3054: else { 3055: ret = buffer; 3056: } 3057: } 3058: 3059: return(ret); 3060: } 3061: 3062: /************************************************************************ 3063: * * 3064: * The parser itself * 3065: * Relates to http://www.w3.org/TR/REC-xml * 3066: * * 3067: ************************************************************************/ 3068: 3069: /************************************************************************ 3070: * * 3071: * Routines to parse Name, NCName and NmToken * 3072: * * 3073: ************************************************************************/ 3074: #ifdef DEBUG 3075: static unsigned long nbParseName = 0; 3076: static unsigned long nbParseNmToken = 0; 3077: static unsigned long nbParseNCName = 0; 3078: static unsigned long nbParseNCNameComplex = 0; 3079: static unsigned long nbParseNameComplex = 0; 3080: static unsigned long nbParseStringName = 0; 3081: #endif 3082: 3083: /* 3084: * The two following functions are related to the change of accepted 3085: * characters for Name and NmToken in the Revision 5 of XML-1.0 3086: * They correspond to the modified production [4] and the new production [4a] 3087: * changes in that revision. Also note that the macros used for the 3088: * productions Letter, Digit, CombiningChar and Extender are not needed 3089: * anymore. 3090: * We still keep compatibility to pre-revision5 parsing semantic if the 3091: * new XML_PARSE_OLD10 option is given to the parser. 3092: */ 3093: static int 3094: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3095: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3096: /* 3097: * Use the new checks of production [4] [4a] amd [5] of the 3098: * Update 5 of XML-1.0 3099: */ 3100: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3101: (((c >= 'a') && (c <= 'z')) || 3102: ((c >= 'A') && (c <= 'Z')) || 3103: (c == '_') || (c == ':') || 3104: ((c >= 0xC0) && (c <= 0xD6)) || 3105: ((c >= 0xD8) && (c <= 0xF6)) || 3106: ((c >= 0xF8) && (c <= 0x2FF)) || 3107: ((c >= 0x370) && (c <= 0x37D)) || 3108: ((c >= 0x37F) && (c <= 0x1FFF)) || 3109: ((c >= 0x200C) && (c <= 0x200D)) || 3110: ((c >= 0x2070) && (c <= 0x218F)) || 3111: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3112: ((c >= 0x3001) && (c <= 0xD7FF)) || 3113: ((c >= 0xF900) && (c <= 0xFDCF)) || 3114: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3115: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3116: return(1); 3117: } else { 3118: if (IS_LETTER(c) || (c == '_') || (c == ':')) 3119: return(1); 3120: } 3121: return(0); 3122: } 3123: 3124: static int 3125: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3126: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3127: /* 3128: * Use the new checks of production [4] [4a] amd [5] of the 3129: * Update 5 of XML-1.0 3130: */ 3131: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3132: (((c >= 'a') && (c <= 'z')) || 3133: ((c >= 'A') && (c <= 'Z')) || 3134: ((c >= '0') && (c <= '9')) || /* !start */ 3135: (c == '_') || (c == ':') || 3136: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3137: ((c >= 0xC0) && (c <= 0xD6)) || 3138: ((c >= 0xD8) && (c <= 0xF6)) || 3139: ((c >= 0xF8) && (c <= 0x2FF)) || 3140: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3141: ((c >= 0x370) && (c <= 0x37D)) || 3142: ((c >= 0x37F) && (c <= 0x1FFF)) || 3143: ((c >= 0x200C) && (c <= 0x200D)) || 3144: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3145: ((c >= 0x2070) && (c <= 0x218F)) || 3146: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3147: ((c >= 0x3001) && (c <= 0xD7FF)) || 3148: ((c >= 0xF900) && (c <= 0xFDCF)) || 3149: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3150: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3151: return(1); 3152: } else { 3153: if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3154: (c == '.') || (c == '-') || 3155: (c == '_') || (c == ':') || 3156: (IS_COMBINING(c)) || 3157: (IS_EXTENDER(c))) 3158: return(1); 3159: } 3160: return(0); 3161: } 3162: 3163: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3164: int *len, int *alloc, int normalize); 3165: 3166: static const xmlChar * 3167: xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3168: int len = 0, l; 3169: int c; 3170: int count = 0; 3171: 3172: #ifdef DEBUG 3173: nbParseNameComplex++; 3174: #endif 3175: 3176: /* 3177: * Handler for more complex cases 3178: */ 3179: GROW; 3180: c = CUR_CHAR(l); 3181: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3182: /* 3183: * Use the new checks of production [4] [4a] amd [5] of the 3184: * Update 5 of XML-1.0 3185: */ 3186: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3187: (!(((c >= 'a') && (c <= 'z')) || 3188: ((c >= 'A') && (c <= 'Z')) || 3189: (c == '_') || (c == ':') || 3190: ((c >= 0xC0) && (c <= 0xD6)) || 3191: ((c >= 0xD8) && (c <= 0xF6)) || 3192: ((c >= 0xF8) && (c <= 0x2FF)) || 3193: ((c >= 0x370) && (c <= 0x37D)) || 3194: ((c >= 0x37F) && (c <= 0x1FFF)) || 3195: ((c >= 0x200C) && (c <= 0x200D)) || 3196: ((c >= 0x2070) && (c <= 0x218F)) || 3197: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3198: ((c >= 0x3001) && (c <= 0xD7FF)) || 3199: ((c >= 0xF900) && (c <= 0xFDCF)) || 3200: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3201: ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3202: return(NULL); 3203: } 3204: len += l; 3205: NEXTL(l); 3206: c = CUR_CHAR(l); 3207: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3208: (((c >= 'a') && (c <= 'z')) || 3209: ((c >= 'A') && (c <= 'Z')) || 3210: ((c >= '0') && (c <= '9')) || /* !start */ 3211: (c == '_') || (c == ':') || 3212: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3213: ((c >= 0xC0) && (c <= 0xD6)) || 3214: ((c >= 0xD8) && (c <= 0xF6)) || 3215: ((c >= 0xF8) && (c <= 0x2FF)) || 3216: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3217: ((c >= 0x370) && (c <= 0x37D)) || 3218: ((c >= 0x37F) && (c <= 0x1FFF)) || 3219: ((c >= 0x200C) && (c <= 0x200D)) || 3220: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3221: ((c >= 0x2070) && (c <= 0x218F)) || 3222: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3223: ((c >= 0x3001) && (c <= 0xD7FF)) || 3224: ((c >= 0xF900) && (c <= 0xFDCF)) || 3225: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3226: ((c >= 0x10000) && (c <= 0xEFFFF)) 3227: )) { 3228: if (count++ > 100) { 3229: count = 0; 3230: GROW; 3231: } 3232: len += l; 3233: NEXTL(l); 3234: c = CUR_CHAR(l); 3235: } 3236: } else { 3237: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3238: (!IS_LETTER(c) && (c != '_') && 3239: (c != ':'))) { 3240: return(NULL); 3241: } 3242: len += l; 3243: NEXTL(l); 3244: c = CUR_CHAR(l); 3245: 3246: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3247: ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3248: (c == '.') || (c == '-') || 3249: (c == '_') || (c == ':') || 3250: (IS_COMBINING(c)) || 3251: (IS_EXTENDER(c)))) { 3252: if (count++ > 100) { 3253: count = 0; 3254: GROW; 3255: } 3256: len += l; 3257: NEXTL(l); 3258: c = CUR_CHAR(l); 3259: } 3260: } 3261: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3262: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3263: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3264: } 3265: 3266: /** 3267: * xmlParseName: 3268: * @ctxt: an XML parser context 3269: * 3270: * parse an XML name. 3271: * 3272: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3273: * CombiningChar | Extender 3274: * 3275: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3276: * 3277: * [6] Names ::= Name (#x20 Name)* 3278: * 3279: * Returns the Name parsed or NULL 3280: */ 3281: 3282: const xmlChar * 3283: xmlParseName(xmlParserCtxtPtr ctxt) { 3284: const xmlChar *in; 3285: const xmlChar *ret; 3286: int count = 0; 3287: 3288: GROW; 3289: 3290: #ifdef DEBUG 3291: nbParseName++; 3292: #endif 3293: 3294: /* 3295: * Accelerator for simple ASCII names 3296: */ 3297: in = ctxt->input->cur; 3298: if (((*in >= 0x61) && (*in <= 0x7A)) || 3299: ((*in >= 0x41) && (*in <= 0x5A)) || 3300: (*in == '_') || (*in == ':')) { 3301: in++; 3302: while (((*in >= 0x61) && (*in <= 0x7A)) || 3303: ((*in >= 0x41) && (*in <= 0x5A)) || 3304: ((*in >= 0x30) && (*in <= 0x39)) || 3305: (*in == '_') || (*in == '-') || 3306: (*in == ':') || (*in == '.')) 3307: in++; 3308: if ((*in > 0) && (*in < 0x80)) { 3309: count = in - ctxt->input->cur; 3310: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3311: ctxt->input->cur = in; 3312: ctxt->nbChars += count; 3313: ctxt->input->col += count; 3314: if (ret == NULL) 3315: xmlErrMemory(ctxt, NULL); 3316: return(ret); 3317: } 3318: } 3319: /* accelerator for special cases */ 3320: return(xmlParseNameComplex(ctxt)); 3321: } 3322: 3323: static const xmlChar * 3324: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3325: int len = 0, l; 3326: int c; 3327: int count = 0; 3328: 3329: #ifdef DEBUG 3330: nbParseNCNameComplex++; 3331: #endif 3332: 3333: /* 3334: * Handler for more complex cases 3335: */ 3336: GROW; 3337: c = CUR_CHAR(l); 3338: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3339: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3340: return(NULL); 3341: } 3342: 3343: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3344: (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3345: if (count++ > 100) { 3346: count = 0; 3347: GROW; 3348: } 3349: len += l; 3350: NEXTL(l); 3351: c = CUR_CHAR(l); 3352: } 3353: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3354: } 3355: 3356: /** 3357: * xmlParseNCName: 3358: * @ctxt: an XML parser context 3359: * @len: lenght of the string parsed 3360: * 3361: * parse an XML name. 3362: * 3363: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3364: * CombiningChar | Extender 3365: * 3366: * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3367: * 3368: * Returns the Name parsed or NULL 3369: */ 3370: 3371: static const xmlChar * 3372: xmlParseNCName(xmlParserCtxtPtr ctxt) { 3373: const xmlChar *in; 3374: const xmlChar *ret; 3375: int count = 0; 3376: 3377: #ifdef DEBUG 3378: nbParseNCName++; 3379: #endif 3380: 3381: /* 3382: * Accelerator for simple ASCII names 3383: */ 3384: in = ctxt->input->cur; 3385: if (((*in >= 0x61) && (*in <= 0x7A)) || 3386: ((*in >= 0x41) && (*in <= 0x5A)) || 3387: (*in == '_')) { 3388: in++; 3389: while (((*in >= 0x61) && (*in <= 0x7A)) || 3390: ((*in >= 0x41) && (*in <= 0x5A)) || 3391: ((*in >= 0x30) && (*in <= 0x39)) || 3392: (*in == '_') || (*in == '-') || 3393: (*in == '.')) 3394: in++; 3395: if ((*in > 0) && (*in < 0x80)) { 3396: count = in - ctxt->input->cur; 3397: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3398: ctxt->input->cur = in; 3399: ctxt->nbChars += count; 3400: ctxt->input->col += count; 3401: if (ret == NULL) { 3402: xmlErrMemory(ctxt, NULL); 3403: } 3404: return(ret); 3405: } 3406: } 3407: return(xmlParseNCNameComplex(ctxt)); 3408: } 3409: 3410: /** 3411: * xmlParseNameAndCompare: 3412: * @ctxt: an XML parser context 3413: * 3414: * parse an XML name and compares for match 3415: * (specialized for endtag parsing) 3416: * 3417: * Returns NULL for an illegal name, (xmlChar*) 1 for success 3418: * and the name for mismatch 3419: */ 3420: 3421: static const xmlChar * 3422: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3423: register const xmlChar *cmp = other; 3424: register const xmlChar *in; 3425: const xmlChar *ret; 3426: 3427: GROW; 3428: 3429: in = ctxt->input->cur; 3430: while (*in != 0 && *in == *cmp) { 3431: ++in; 3432: ++cmp; 3433: ctxt->input->col++; 3434: } 3435: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3436: /* success */ 3437: ctxt->input->cur = in; 3438: return (const xmlChar*) 1; 3439: } 3440: /* failure (or end of input buffer), check with full function */ 3441: ret = xmlParseName (ctxt); 3442: /* strings coming from the dictionnary direct compare possible */ 3443: if (ret == other) { 3444: return (const xmlChar*) 1; 3445: } 3446: return ret; 3447: } 3448: 3449: /** 3450: * xmlParseStringName: 3451: * @ctxt: an XML parser context 3452: * @str: a pointer to the string pointer (IN/OUT) 3453: * 3454: * parse an XML name. 3455: * 3456: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3457: * CombiningChar | Extender 3458: * 3459: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3460: * 3461: * [6] Names ::= Name (#x20 Name)* 3462: * 3463: * Returns the Name parsed or NULL. The @str pointer 3464: * is updated to the current location in the string. 3465: */ 3466: 3467: static xmlChar * 3468: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3469: xmlChar buf[XML_MAX_NAMELEN + 5]; 3470: const xmlChar *cur = *str; 3471: int len = 0, l; 3472: int c; 3473: 3474: #ifdef DEBUG 3475: nbParseStringName++; 3476: #endif 3477: 3478: c = CUR_SCHAR(cur, l); 3479: if (!xmlIsNameStartChar(ctxt, c)) { 3480: return(NULL); 3481: } 3482: 3483: COPY_BUF(l,buf,len,c); 3484: cur += l; 3485: c = CUR_SCHAR(cur, l); 3486: while (xmlIsNameChar(ctxt, c)) { 3487: COPY_BUF(l,buf,len,c); 3488: cur += l; 3489: c = CUR_SCHAR(cur, l); 3490: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3491: /* 3492: * Okay someone managed to make a huge name, so he's ready to pay 3493: * for the processing speed. 3494: */ 3495: xmlChar *buffer; 3496: int max = len * 2; 3497: 3498: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3499: if (buffer == NULL) { 3500: xmlErrMemory(ctxt, NULL); 3501: return(NULL); 3502: } 3503: memcpy(buffer, buf, len); 3504: while (xmlIsNameChar(ctxt, c)) { 3505: if (len + 10 > max) { 3506: xmlChar *tmp; 3507: max *= 2; 3508: tmp = (xmlChar *) xmlRealloc(buffer, 3509: max * sizeof(xmlChar)); 3510: if (tmp == NULL) { 3511: xmlErrMemory(ctxt, NULL); 3512: xmlFree(buffer); 3513: return(NULL); 3514: } 3515: buffer = tmp; 3516: } 3517: COPY_BUF(l,buffer,len,c); 3518: cur += l; 3519: c = CUR_SCHAR(cur, l); 3520: } 3521: buffer[len] = 0; 3522: *str = cur; 3523: return(buffer); 3524: } 3525: } 3526: *str = cur; 3527: return(xmlStrndup(buf, len)); 3528: } 3529: 3530: /** 3531: * xmlParseNmtoken: 3532: * @ctxt: an XML parser context 3533: * 3534: * parse an XML Nmtoken. 3535: * 3536: * [7] Nmtoken ::= (NameChar)+ 3537: * 3538: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3539: * 3540: * Returns the Nmtoken parsed or NULL 3541: */ 3542: 3543: xmlChar * 3544: xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3545: xmlChar buf[XML_MAX_NAMELEN + 5]; 3546: int len = 0, l; 3547: int c; 3548: int count = 0; 3549: 3550: #ifdef DEBUG 3551: nbParseNmToken++; 3552: #endif 3553: 3554: GROW; 3555: c = CUR_CHAR(l); 3556: 3557: while (xmlIsNameChar(ctxt, c)) { 3558: if (count++ > 100) { 3559: count = 0; 3560: GROW; 3561: } 3562: COPY_BUF(l,buf,len,c); 3563: NEXTL(l); 3564: c = CUR_CHAR(l); 3565: if (len >= XML_MAX_NAMELEN) { 3566: /* 3567: * Okay someone managed to make a huge token, so he's ready to pay 3568: * for the processing speed. 3569: */ 3570: xmlChar *buffer; 3571: int max = len * 2; 3572: 3573: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3574: if (buffer == NULL) { 3575: xmlErrMemory(ctxt, NULL); 3576: return(NULL); 3577: } 3578: memcpy(buffer, buf, len); 3579: while (xmlIsNameChar(ctxt, c)) { 3580: if (count++ > 100) { 3581: count = 0; 3582: GROW; 3583: } 3584: if (len + 10 > max) { 3585: xmlChar *tmp; 3586: 3587: max *= 2; 3588: tmp = (xmlChar *) xmlRealloc(buffer, 3589: max * sizeof(xmlChar)); 3590: if (tmp == NULL) { 3591: xmlErrMemory(ctxt, NULL); 3592: xmlFree(buffer); 3593: return(NULL); 3594: } 3595: buffer = tmp; 3596: } 3597: COPY_BUF(l,buffer,len,c); 3598: NEXTL(l); 3599: c = CUR_CHAR(l); 3600: } 3601: buffer[len] = 0; 3602: return(buffer); 3603: } 3604: } 3605: if (len == 0) 3606: return(NULL); 3607: return(xmlStrndup(buf, len)); 3608: } 3609: 3610: /** 3611: * xmlParseEntityValue: 3612: * @ctxt: an XML parser context 3613: * @orig: if non-NULL store a copy of the original entity value 3614: * 3615: * parse a value for ENTITY declarations 3616: * 3617: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3618: * "'" ([^%&'] | PEReference | Reference)* "'" 3619: * 3620: * Returns the EntityValue parsed with reference substituted or NULL 3621: */ 3622: 3623: xmlChar * 3624: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3625: xmlChar *buf = NULL; 3626: int len = 0; 3627: int size = XML_PARSER_BUFFER_SIZE; 3628: int c, l; 3629: xmlChar stop; 3630: xmlChar *ret = NULL; 3631: const xmlChar *cur = NULL; 3632: xmlParserInputPtr input; 3633: 3634: if (RAW == '"') stop = '"'; 3635: else if (RAW == '\'') stop = '\''; 3636: else { 3637: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3638: return(NULL); 3639: } 3640: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3641: if (buf == NULL) { 3642: xmlErrMemory(ctxt, NULL); 3643: return(NULL); 3644: } 3645: 3646: /* 3647: * The content of the entity definition is copied in a buffer. 3648: */ 3649: 3650: ctxt->instate = XML_PARSER_ENTITY_VALUE; 3651: input = ctxt->input; 3652: GROW; 3653: NEXT; 3654: c = CUR_CHAR(l); 3655: /* 3656: * NOTE: 4.4.5 Included in Literal 3657: * When a parameter entity reference appears in a literal entity 3658: * value, ... a single or double quote character in the replacement 3659: * text is always treated as a normal data character and will not 3660: * terminate the literal. 3661: * In practice it means we stop the loop only when back at parsing 3662: * the initial entity and the quote is found 3663: */ 3664: while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3665: (ctxt->input != input))) { 3666: if (len + 5 >= size) { 3667: xmlChar *tmp; 3668: 3669: size *= 2; 3670: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3671: if (tmp == NULL) { 3672: xmlErrMemory(ctxt, NULL); 3673: xmlFree(buf); 3674: return(NULL); 3675: } 3676: buf = tmp; 3677: } 3678: COPY_BUF(l,buf,len,c); 3679: NEXTL(l); 3680: /* 3681: * Pop-up of finished entities. 3682: */ 3683: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3684: xmlPopInput(ctxt); 3685: 3686: GROW; 3687: c = CUR_CHAR(l); 3688: if (c == 0) { 3689: GROW; 3690: c = CUR_CHAR(l); 3691: } 3692: } 3693: buf[len] = 0; 3694: 3695: /* 3696: * Raise problem w.r.t. '&' and '%' being used in non-entities 3697: * reference constructs. Note Charref will be handled in 3698: * xmlStringDecodeEntities() 3699: */ 3700: cur = buf; 3701: while (*cur != 0) { /* non input consuming */ 3702: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3703: xmlChar *name; 3704: xmlChar tmp = *cur; 3705: 3706: cur++; 3707: name = xmlParseStringName(ctxt, &cur); 3708: if ((name == NULL) || (*cur != ';')) { 3709: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3710: "EntityValue: '%c' forbidden except for entities references\n", 3711: tmp); 3712: } 3713: if ((tmp == '%') && (ctxt->inSubset == 1) && 3714: (ctxt->inputNr == 1)) { 3715: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3716: } 3717: if (name != NULL) 3718: xmlFree(name); 3719: if (*cur == 0) 3720: break; 3721: } 3722: cur++; 3723: } 3724: 3725: /* 3726: * Then PEReference entities are substituted. 3727: */ 3728: if (c != stop) { 3729: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3730: xmlFree(buf); 3731: } else { 3732: NEXT; 3733: /* 3734: * NOTE: 4.4.7 Bypassed 3735: * When a general entity reference appears in the EntityValue in 3736: * an entity declaration, it is bypassed and left as is. 3737: * so XML_SUBSTITUTE_REF is not set here. 3738: */ 3739: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3740: 0, 0, 0); 3741: if (orig != NULL) 3742: *orig = buf; 3743: else 3744: xmlFree(buf); 3745: } 3746: 3747: return(ret); 3748: } 3749: 3750: /** 3751: * xmlParseAttValueComplex: 3752: * @ctxt: an XML parser context 3753: * @len: the resulting attribute len 3754: * @normalize: wether to apply the inner normalization 3755: * 3756: * parse a value for an attribute, this is the fallback function 3757: * of xmlParseAttValue() when the attribute parsing requires handling 3758: * of non-ASCII characters, or normalization compaction. 3759: * 3760: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3761: */ 3762: static xmlChar * 3763: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3764: xmlChar limit = 0; 3765: xmlChar *buf = NULL; 3766: xmlChar *rep = NULL; 3767: int len = 0; 3768: int buf_size = 0; 3769: int c, l, in_space = 0; 3770: xmlChar *current = NULL; 3771: xmlEntityPtr ent; 3772: 3773: if (NXT(0) == '"') { 3774: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3775: limit = '"'; 3776: NEXT; 3777: } else if (NXT(0) == '\'') { 3778: limit = '\''; 3779: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3780: NEXT; 3781: } else { 3782: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3783: return(NULL); 3784: } 3785: 3786: /* 3787: * allocate a translation buffer. 3788: */ 3789: buf_size = XML_PARSER_BUFFER_SIZE; 3790: buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3791: if (buf == NULL) goto mem_error; 3792: 3793: /* 3794: * OK loop until we reach one of the ending char or a size limit. 3795: */ 3796: c = CUR_CHAR(l); 3797: while ((NXT(0) != limit) && /* checked */ 3798: (IS_CHAR(c)) && (c != '<')) { 3799: if (c == 0) break; 3800: if (c == '&') { 3801: in_space = 0; 3802: if (NXT(1) == '#') { 3803: int val = xmlParseCharRef(ctxt); 3804: 3805: if (val == '&') { 3806: if (ctxt->replaceEntities) { 3807: if (len > buf_size - 10) { 3808: growBuffer(buf, 10); 3809: } 3810: buf[len++] = '&'; 3811: } else { 3812: /* 3813: * The reparsing will be done in xmlStringGetNodeList() 3814: * called by the attribute() function in SAX.c 3815: */ 3816: if (len > buf_size - 10) { 3817: growBuffer(buf, 10); 3818: } 3819: buf[len++] = '&'; 3820: buf[len++] = '#'; 3821: buf[len++] = '3'; 3822: buf[len++] = '8'; 3823: buf[len++] = ';'; 3824: } 3825: } else if (val != 0) { 3826: if (len > buf_size - 10) { 3827: growBuffer(buf, 10); 3828: } 3829: len += xmlCopyChar(0, &buf[len], val); 3830: } 3831: } else { 3832: ent = xmlParseEntityRef(ctxt); 3833: ctxt->nbentities++; 3834: if (ent != NULL) 3835: ctxt->nbentities += ent->owner; 3836: if ((ent != NULL) && 3837: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3838: if (len > buf_size - 10) { 3839: growBuffer(buf, 10); 3840: } 3841: if ((ctxt->replaceEntities == 0) && 3842: (ent->content[0] == '&')) { 3843: buf[len++] = '&'; 3844: buf[len++] = '#'; 3845: buf[len++] = '3'; 3846: buf[len++] = '8'; 3847: buf[len++] = ';'; 3848: } else { 3849: buf[len++] = ent->content[0]; 3850: } 3851: } else if ((ent != NULL) && 3852: (ctxt->replaceEntities != 0)) { 3853: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3854: rep = xmlStringDecodeEntities(ctxt, ent->content, 3855: XML_SUBSTITUTE_REF, 3856: 0, 0, 0); 3857: if (rep != NULL) { 3858: current = rep; 3859: while (*current != 0) { /* non input consuming */ 3860: if ((*current == 0xD) || (*current == 0xA) || 3861: (*current == 0x9)) { 3862: buf[len++] = 0x20; 3863: current++; 3864: } else 3865: buf[len++] = *current++; 3866: if (len > buf_size - 10) { 3867: growBuffer(buf, 10); 3868: } 3869: } 3870: xmlFree(rep); 3871: rep = NULL; 3872: } 3873: } else { 3874: if (len > buf_size - 10) { 3875: growBuffer(buf, 10); 3876: } 3877: if (ent->content != NULL) 3878: buf[len++] = ent->content[0]; 3879: } 3880: } else if (ent != NULL) { 3881: int i = xmlStrlen(ent->name); 3882: const xmlChar *cur = ent->name; 3883: 3884: /* 3885: * This may look absurd but is needed to detect 3886: * entities problems 3887: */ 3888: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3889: (ent->content != NULL)) { 3890: rep = xmlStringDecodeEntities(ctxt, ent->content, 3891: XML_SUBSTITUTE_REF, 0, 0, 0); 3892: if (rep != NULL) { 3893: xmlFree(rep); 3894: rep = NULL; 3895: } 3896: } 3897: 3898: /* 3899: * Just output the reference 3900: */ 3901: buf[len++] = '&'; 3902: while (len > buf_size - i - 10) { 3903: growBuffer(buf, i + 10); 3904: } 3905: for (;i > 0;i--) 3906: buf[len++] = *cur++; 3907: buf[len++] = ';'; 3908: } 3909: } 3910: } else { 3911: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3912: if ((len != 0) || (!normalize)) { 3913: if ((!normalize) || (!in_space)) { 3914: COPY_BUF(l,buf,len,0x20); 3915: while (len > buf_size - 10) { 3916: growBuffer(buf, 10); 3917: } 3918: } 3919: in_space = 1; 3920: } 3921: } else { 3922: in_space = 0; 3923: COPY_BUF(l,buf,len,c); 3924: if (len > buf_size - 10) { 3925: growBuffer(buf, 10); 3926: } 3927: } 3928: NEXTL(l); 3929: } 3930: GROW; 3931: c = CUR_CHAR(l); 3932: } 3933: if ((in_space) && (normalize)) { 3934: while (buf[len - 1] == 0x20) len--; 3935: } 3936: buf[len] = 0; 3937: if (RAW == '<') { 3938: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3939: } else if (RAW != limit) { 3940: if ((c != 0) && (!IS_CHAR(c))) { 3941: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3942: "invalid character in attribute value\n"); 3943: } else { 3944: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3945: "AttValue: ' expected\n"); 3946: } 3947: } else 3948: NEXT; 3949: if (attlen != NULL) *attlen = len; 3950: return(buf); 3951: 3952: mem_error: 3953: xmlErrMemory(ctxt, NULL); 3954: if (buf != NULL) 3955: xmlFree(buf); 3956: if (rep != NULL) 3957: xmlFree(rep); 3958: return(NULL); 3959: } 3960: 3961: /** 3962: * xmlParseAttValue: 3963: * @ctxt: an XML parser context 3964: * 3965: * parse a value for an attribute 3966: * Note: the parser won't do substitution of entities here, this 3967: * will be handled later in xmlStringGetNodeList 3968: * 3969: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3970: * "'" ([^<&'] | Reference)* "'" 3971: * 3972: * 3.3.3 Attribute-Value Normalization: 3973: * Before the value of an attribute is passed to the application or 3974: * checked for validity, the XML processor must normalize it as follows: 3975: * - a character reference is processed by appending the referenced 3976: * character to the attribute value 3977: * - an entity reference is processed by recursively processing the 3978: * replacement text of the entity 3979: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3980: * appending #x20 to the normalized value, except that only a single 3981: * #x20 is appended for a "#xD#xA" sequence that is part of an external 3982: * parsed entity or the literal entity value of an internal parsed entity 3983: * - other characters are processed by appending them to the normalized value 3984: * If the declared value is not CDATA, then the XML processor must further 3985: * process the normalized attribute value by discarding any leading and 3986: * trailing space (#x20) characters, and by replacing sequences of space 3987: * (#x20) characters by a single space (#x20) character. 3988: * All attributes for which no declaration has been read should be treated 3989: * by a non-validating parser as if declared CDATA. 3990: * 3991: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3992: */ 3993: 3994: 3995: xmlChar * 3996: xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3997: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3998: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3999: } 4000: 4001: /** 4002: * xmlParseSystemLiteral: 4003: * @ctxt: an XML parser context 4004: * 4005: * parse an XML Literal 4006: * 4007: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4008: * 4009: * Returns the SystemLiteral parsed or NULL 4010: */ 4011: 4012: xmlChar * 4013: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4014: xmlChar *buf = NULL; 4015: int len = 0; 4016: int size = XML_PARSER_BUFFER_SIZE; 4017: int cur, l; 4018: xmlChar stop; 4019: int state = ctxt->instate; 4020: int count = 0; 4021: 4022: SHRINK; 4023: if (RAW == '"') { 4024: NEXT; 4025: stop = '"'; 4026: } else if (RAW == '\'') { 4027: NEXT; 4028: stop = '\''; 4029: } else { 4030: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4031: return(NULL); 4032: } 4033: 4034: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4035: if (buf == NULL) { 4036: xmlErrMemory(ctxt, NULL); 4037: return(NULL); 4038: } 4039: ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4040: cur = CUR_CHAR(l); 4041: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4042: if (len + 5 >= size) { 4043: xmlChar *tmp; 4044: 4045: size *= 2; 4046: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4047: if (tmp == NULL) { 4048: xmlFree(buf); 4049: xmlErrMemory(ctxt, NULL); 4050: ctxt->instate = (xmlParserInputState) state; 4051: return(NULL); 4052: } 4053: buf = tmp; 4054: } 4055: count++; 4056: if (count > 50) { 4057: GROW; 4058: count = 0; 4059: } 4060: COPY_BUF(l,buf,len,cur); 4061: NEXTL(l); 4062: cur = CUR_CHAR(l); 4063: if (cur == 0) { 4064: GROW; 4065: SHRINK; 4066: cur = CUR_CHAR(l); 4067: } 4068: } 4069: buf[len] = 0; 4070: ctxt->instate = (xmlParserInputState) state; 4071: if (!IS_CHAR(cur)) { 4072: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4073: } else { 4074: NEXT; 4075: } 4076: return(buf); 4077: } 4078: 4079: /** 4080: * xmlParsePubidLiteral: 4081: * @ctxt: an XML parser context 4082: * 4083: * parse an XML public literal 4084: * 4085: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4086: * 4087: * Returns the PubidLiteral parsed or NULL. 4088: */ 4089: 4090: xmlChar * 4091: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4092: xmlChar *buf = NULL; 4093: int len = 0; 4094: int size = XML_PARSER_BUFFER_SIZE; 4095: xmlChar cur; 4096: xmlChar stop; 4097: int count = 0; 4098: xmlParserInputState oldstate = ctxt->instate; 4099: 4100: SHRINK; 4101: if (RAW == '"') { 4102: NEXT; 4103: stop = '"'; 4104: } else if (RAW == '\'') { 4105: NEXT; 4106: stop = '\''; 4107: } else { 4108: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4109: return(NULL); 4110: } 4111: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4112: if (buf == NULL) { 4113: xmlErrMemory(ctxt, NULL); 4114: return(NULL); 4115: } 4116: ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4117: cur = CUR; 4118: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4119: if (len + 1 >= size) { 4120: xmlChar *tmp; 4121: 4122: size *= 2; 4123: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4124: if (tmp == NULL) { 4125: xmlErrMemory(ctxt, NULL); 4126: xmlFree(buf); 4127: return(NULL); 4128: } 4129: buf = tmp; 4130: } 4131: buf[len++] = cur; 4132: count++; 4133: if (count > 50) { 4134: GROW; 4135: count = 0; 4136: } 4137: NEXT; 4138: cur = CUR; 4139: if (cur == 0) { 4140: GROW; 4141: SHRINK; 4142: cur = CUR; 4143: } 4144: } 4145: buf[len] = 0; 4146: if (cur != stop) { 4147: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4148: } else { 4149: NEXT; 4150: } 4151: ctxt->instate = oldstate; 4152: return(buf); 4153: } 4154: 4155: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4156: 4157: /* 4158: * used for the test in the inner loop of the char data testing 4159: */ 4160: static const unsigned char test_char_data[256] = { 4161: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4162: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4163: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4164: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4165: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4166: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4167: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4168: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4169: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4170: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4171: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4172: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4173: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4174: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4175: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4176: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4177: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4178: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4179: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4180: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4181: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4182: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4183: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4184: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4185: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4186: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4187: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4188: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4189: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4190: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4191: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4192: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4193: }; 4194: 4195: /** 4196: * xmlParseCharData: 4197: * @ctxt: an XML parser context 4198: * @cdata: int indicating whether we are within a CDATA section 4199: * 4200: * parse a CharData section. 4201: * if we are within a CDATA section ']]>' marks an end of section. 4202: * 4203: * The right angle bracket (>) may be represented using the string ">", 4204: * and must, for compatibility, be escaped using ">" or a character 4205: * reference when it appears in the string "]]>" in content, when that 4206: * string is not marking the end of a CDATA section. 4207: * 4208: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4209: */ 4210: 4211: void 4212: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4213: const xmlChar *in; 4214: int nbchar = 0; 4215: int line = ctxt->input->line; 4216: int col = ctxt->input->col; 4217: int ccol; 4218: 4219: SHRINK; 4220: GROW; 4221: /* 4222: * Accelerated common case where input don't need to be 4223: * modified before passing it to the handler. 4224: */ 4225: if (!cdata) { 4226: in = ctxt->input->cur; 4227: do { 4228: get_more_space: 4229: while (*in == 0x20) { in++; ctxt->input->col++; } 4230: if (*in == 0xA) { 4231: do { 4232: ctxt->input->line++; ctxt->input->col = 1; 4233: in++; 4234: } while (*in == 0xA); 4235: goto get_more_space; 4236: } 4237: if (*in == '<') { 4238: nbchar = in - ctxt->input->cur; 4239: if (nbchar > 0) { 4240: const xmlChar *tmp = ctxt->input->cur; 4241: ctxt->input->cur = in; 4242: 4243: if ((ctxt->sax != NULL) && 4244: (ctxt->sax->ignorableWhitespace != 4245: ctxt->sax->characters)) { 4246: if (areBlanks(ctxt, tmp, nbchar, 1)) { 4247: if (ctxt->sax->ignorableWhitespace != NULL) 4248: ctxt->sax->ignorableWhitespace(ctxt->userData, 4249: tmp, nbchar); 4250: } else { 4251: if (ctxt->sax->characters != NULL) 4252: ctxt->sax->characters(ctxt->userData, 4253: tmp, nbchar); 4254: if (*ctxt->space == -1) 4255: *ctxt->space = -2; 4256: } 4257: } else if ((ctxt->sax != NULL) && 4258: (ctxt->sax->characters != NULL)) { 4259: ctxt->sax->characters(ctxt->userData, 4260: tmp, nbchar); 4261: } 4262: } 4263: return; 4264: } 4265: 4266: get_more: 4267: ccol = ctxt->input->col; 4268: while (test_char_data[*in]) { 4269: in++; 4270: ccol++; 4271: } 4272: ctxt->input->col = ccol; 4273: if (*in == 0xA) { 4274: do { 4275: ctxt->input->line++; ctxt->input->col = 1; 4276: in++; 4277: } while (*in == 0xA); 4278: goto get_more; 4279: } 4280: if (*in == ']') { 4281: if ((in[1] == ']') && (in[2] == '>')) { 4282: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4283: ctxt->input->cur = in; 4284: return; 4285: } 4286: in++; 4287: ctxt->input->col++; 4288: goto get_more; 4289: } 4290: nbchar = in - ctxt->input->cur; 4291: if (nbchar > 0) { 4292: if ((ctxt->sax != NULL) && 4293: (ctxt->sax->ignorableWhitespace != 4294: ctxt->sax->characters) && 4295: (IS_BLANK_CH(*ctxt->input->cur))) { 4296: const xmlChar *tmp = ctxt->input->cur; 4297: ctxt->input->cur = in; 4298: 4299: if (areBlanks(ctxt, tmp, nbchar, 0)) { 4300: if (ctxt->sax->ignorableWhitespace != NULL) 4301: ctxt->sax->ignorableWhitespace(ctxt->userData, 4302: tmp, nbchar); 4303: } else { 4304: if (ctxt->sax->characters != NULL) 4305: ctxt->sax->characters(ctxt->userData, 4306: tmp, nbchar); 4307: if (*ctxt->space == -1) 4308: *ctxt->space = -2; 4309: } 4310: line = ctxt->input->line; 4311: col = ctxt->input->col; 4312: } else if (ctxt->sax != NULL) { 4313: if (ctxt->sax->characters != NULL) 4314: ctxt->sax->characters(ctxt->userData, 4315: ctxt->input->cur, nbchar); 4316: line = ctxt->input->line; 4317: col = ctxt->input->col; 4318: } 4319: /* something really bad happened in the SAX callback */ 4320: if (ctxt->instate != XML_PARSER_CONTENT) 4321: return; 4322: } 4323: ctxt->input->cur = in; 4324: if (*in == 0xD) { 4325: in++; 4326: if (*in == 0xA) { 4327: ctxt->input->cur = in; 4328: in++; 4329: ctxt->input->line++; ctxt->input->col = 1; 4330: continue; /* while */ 4331: } 4332: in--; 4333: } 4334: if (*in == '<') { 4335: return; 4336: } 4337: if (*in == '&') { 4338: return; 4339: } 4340: SHRINK; 4341: GROW; 4342: in = ctxt->input->cur; 4343: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4344: nbchar = 0; 4345: } 4346: ctxt->input->line = line; 4347: ctxt->input->col = col; 4348: xmlParseCharDataComplex(ctxt, cdata); 4349: } 4350: 4351: /** 4352: * xmlParseCharDataComplex: 4353: * @ctxt: an XML parser context 4354: * @cdata: int indicating whether we are within a CDATA section 4355: * 4356: * parse a CharData section.this is the fallback function 4357: * of xmlParseCharData() when the parsing requires handling 4358: * of non-ASCII characters. 4359: */ 4360: static void 4361: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4362: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4363: int nbchar = 0; 4364: int cur, l; 4365: int count = 0; 4366: 4367: SHRINK; 4368: GROW; 4369: cur = CUR_CHAR(l); 4370: while ((cur != '<') && /* checked */ 4371: (cur != '&') && 4372: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4373: if ((cur == ']') && (NXT(1) == ']') && 4374: (NXT(2) == '>')) { 4375: if (cdata) break; 4376: else { 4377: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4378: } 4379: } 4380: COPY_BUF(l,buf,nbchar,cur); 4381: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4382: buf[nbchar] = 0; 4383: 4384: /* 4385: * OK the segment is to be consumed as chars. 4386: */ 4387: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4388: if (areBlanks(ctxt, buf, nbchar, 0)) { 4389: if (ctxt->sax->ignorableWhitespace != NULL) 4390: ctxt->sax->ignorableWhitespace(ctxt->userData, 4391: buf, nbchar); 4392: } else { 4393: if (ctxt->sax->characters != NULL) 4394: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4395: if ((ctxt->sax->characters != 4396: ctxt->sax->ignorableWhitespace) && 4397: (*ctxt->space == -1)) 4398: *ctxt->space = -2; 4399: } 4400: } 4401: nbchar = 0; 4402: /* something really bad happened in the SAX callback */ 4403: if (ctxt->instate != XML_PARSER_CONTENT) 4404: return; 4405: } 4406: count++; 4407: if (count > 50) { 4408: GROW; 4409: count = 0; 4410: } 4411: NEXTL(l); 4412: cur = CUR_CHAR(l); 4413: } 4414: if (nbchar != 0) { 4415: buf[nbchar] = 0; 4416: /* 4417: * OK the segment is to be consumed as chars. 4418: */ 4419: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4420: if (areBlanks(ctxt, buf, nbchar, 0)) { 4421: if (ctxt->sax->ignorableWhitespace != NULL) 4422: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4423: } else { 4424: if (ctxt->sax->characters != NULL) 4425: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4426: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4427: (*ctxt->space == -1)) 4428: *ctxt->space = -2; 4429: } 4430: } 4431: } 4432: if ((cur != 0) && (!IS_CHAR(cur))) { 4433: /* Generate the error and skip the offending character */ 4434: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4435: "PCDATA invalid Char value %d\n", 4436: cur); 4437: NEXTL(l); 4438: } 4439: } 4440: 4441: /** 4442: * xmlParseExternalID: 4443: * @ctxt: an XML parser context 4444: * @publicID: a xmlChar** receiving PubidLiteral 4445: * @strict: indicate whether we should restrict parsing to only 4446: * production [75], see NOTE below 4447: * 4448: * Parse an External ID or a Public ID 4449: * 4450: * NOTE: Productions [75] and [83] interact badly since [75] can generate 4451: * 'PUBLIC' S PubidLiteral S SystemLiteral 4452: * 4453: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4454: * | 'PUBLIC' S PubidLiteral S SystemLiteral 4455: * 4456: * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4457: * 4458: * Returns the function returns SystemLiteral and in the second 4459: * case publicID receives PubidLiteral, is strict is off 4460: * it is possible to return NULL and have publicID set. 4461: */ 4462: 4463: xmlChar * 4464: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4465: xmlChar *URI = NULL; 4466: 4467: SHRINK; 4468: 4469: *publicID = NULL; 4470: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4471: SKIP(6); 4472: if (!IS_BLANK_CH(CUR)) { 4473: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4474: "Space required after 'SYSTEM'\n"); 4475: } 4476: SKIP_BLANKS; 4477: URI = xmlParseSystemLiteral(ctxt); 4478: if (URI == NULL) { 4479: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4480: } 4481: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4482: SKIP(6); 4483: if (!IS_BLANK_CH(CUR)) { 4484: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4485: "Space required after 'PUBLIC'\n"); 4486: } 4487: SKIP_BLANKS; 4488: *publicID = xmlParsePubidLiteral(ctxt); 4489: if (*publicID == NULL) { 4490: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4491: } 4492: if (strict) { 4493: /* 4494: * We don't handle [83] so "S SystemLiteral" is required. 4495: */ 4496: if (!IS_BLANK_CH(CUR)) { 4497: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4498: "Space required after the Public Identifier\n"); 4499: } 4500: } else { 4501: /* 4502: * We handle [83] so we return immediately, if 4503: * "S SystemLiteral" is not detected. From a purely parsing 4504: * point of view that's a nice mess. 4505: */ 4506: const xmlChar *ptr; 4507: GROW; 4508: 4509: ptr = CUR_PTR; 4510: if (!IS_BLANK_CH(*ptr)) return(NULL); 4511: 4512: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4513: if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4514: } 4515: SKIP_BLANKS; 4516: URI = xmlParseSystemLiteral(ctxt); 4517: if (URI == NULL) { 4518: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4519: } 4520: } 4521: return(URI); 4522: } 4523: 4524: /** 4525: * xmlParseCommentComplex: 4526: * @ctxt: an XML parser context 4527: * @buf: the already parsed part of the buffer 4528: * @len: number of bytes filles in the buffer 4529: * @size: allocated size of the buffer 4530: * 4531: * Skip an XML (SGML) comment  4532: * The spec says that "For compatibility, the string "--" (double-hyphen) 4533: * must not occur within comments. " 4534: * This is the slow routine in case the accelerator for ascii didn't work 4535: * 4536: * [15] Comment ::= '' 4537: */ 4538: static void 4539: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4540: int q, ql; 4541: int r, rl; 4542: int cur, l; 4543: int count = 0; 4544: int inputid; 4545: 4546: inputid = ctxt->input->id; 4547: 4548: if (buf == NULL) { 4549: len = 0; 4550: size = XML_PARSER_BUFFER_SIZE; 4551: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4552: if (buf == NULL) { 4553: xmlErrMemory(ctxt, NULL); 4554: return; 4555: } 4556: } 4557: GROW; /* Assure there's enough input data */ 4558: q = CUR_CHAR(ql); 4559: if (q == 0) 4560: goto not_terminated; 4561: if (!IS_CHAR(q)) { 4562: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4563: "xmlParseComment: invalid xmlChar value %d\n", 4564: q); 4565: xmlFree (buf); 4566: return; 4567: } 4568: NEXTL(ql); 4569: r = CUR_CHAR(rl); 4570: if (r == 0) 4571: goto not_terminated; 4572: if (!IS_CHAR(r)) { 4573: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4574: "xmlParseComment: invalid xmlChar value %d\n", 4575: q); 4576: xmlFree (buf); 4577: return; 4578: } 4579: NEXTL(rl); 4580: cur = CUR_CHAR(l); 4581: if (cur == 0) 4582: goto not_terminated; 4583: while (IS_CHAR(cur) && /* checked */ 4584: ((cur != '>') || 4585: (r != '-') || (q != '-'))) { 4586: if ((r == '-') && (q == '-')) { 4587: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4588: } 4589: if (len + 5 >= size) { 4590: xmlChar *new_buf; 4591: size *= 2; 4592: new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4593: if (new_buf == NULL) { 4594: xmlFree (buf); 4595: xmlErrMemory(ctxt, NULL); 4596: return; 4597: } 4598: buf = new_buf; 4599: } 4600: COPY_BUF(ql,buf,len,q); 4601: q = r; 4602: ql = rl; 4603: r = cur; 4604: rl = l; 4605: 4606: count++; 4607: if (count > 50) { 4608: GROW; 4609: count = 0; 4610: } 4611: NEXTL(l); 4612: cur = CUR_CHAR(l); 4613: if (cur == 0) { 4614: SHRINK; 4615: GROW; 4616: cur = CUR_CHAR(l); 4617: } 4618: } 4619: buf[len] = 0; 4620: if (cur == 0) { 4621: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4622: "Comment not terminated \n 4651: * The spec says that "For compatibility, the string "--" (double-hyphen) 4652: * must not occur within comments. " 4653: * 4654: * [15] Comment ::= '' 4655: */ 4656: void 4657: xmlParseComment(xmlParserCtxtPtr ctxt) { 4658: xmlChar *buf = NULL; 4659: int size = XML_PARSER_BUFFER_SIZE; 4660: int len = 0; 4661: xmlParserInputState state; 4662: const xmlChar *in; 4663: int nbchar = 0, ccol; 4664: int inputid; 4665: 4666: /* 4667: * Check that there is a comment right here. 4668: */ 4669: if ((RAW != '<') || (NXT(1) != '!') || 4670: (NXT(2) != '-') || (NXT(3) != '-')) return; 4671: state = ctxt->instate; 4672: ctxt->instate = XML_PARSER_COMMENT; 4673: inputid = ctxt->input->id; 4674: SKIP(4); 4675: SHRINK; 4676: GROW; 4677: 4678: /* 4679: * Accelerated common case where input don't need to be 4680: * modified before passing it to the handler. 4681: */ 4682: in = ctxt->input->cur; 4683: do { 4684: if (*in == 0xA) { 4685: do { 4686: ctxt->input->line++; ctxt->input->col = 1; 4687: in++; 4688: } while (*in == 0xA); 4689: } 4690: get_more: 4691: ccol = ctxt->input->col; 4692: while (((*in > '-') && (*in <= 0x7F)) || 4693: ((*in >= 0x20) && (*in < '-')) || 4694: (*in == 0x09)) { 4695: in++; 4696: ccol++; 4697: } 4698: ctxt->input->col = ccol; 4699: if (*in == 0xA) { 4700: do { 4701: ctxt->input->line++; ctxt->input->col = 1; 4702: in++; 4703: } while (*in == 0xA); 4704: goto get_more; 4705: } 4706: nbchar = in - ctxt->input->cur; 4707: /* 4708: * save current set of data 4709: */ 4710: if (nbchar > 0) { 4711: if ((ctxt->sax != NULL) && 4712: (ctxt->sax->comment != NULL)) { 4713: if (buf == NULL) { 4714: if ((*in == '-') && (in[1] == '-')) 4715: size = nbchar + 1; 4716: else 4717: size = XML_PARSER_BUFFER_SIZE + nbchar; 4718: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4719: if (buf == NULL) { 4720: xmlErrMemory(ctxt, NULL); 4721: ctxt->instate = state; 4722: return; 4723: } 4724: len = 0; 4725: } else if (len + nbchar + 1 >= size) { 4726: xmlChar *new_buf; 4727: size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4728: new_buf = (xmlChar *) xmlRealloc(buf, 4729: size * sizeof(xmlChar)); 4730: if (new_buf == NULL) { 4731: xmlFree (buf); 4732: xmlErrMemory(ctxt, NULL); 4733: ctxt->instate = state; 4734: return; 4735: } 4736: buf = new_buf; 4737: } 4738: memcpy(&buf[len], ctxt->input->cur, nbchar); 4739: len += nbchar; 4740: buf[len] = 0; 4741: } 4742: } 4743: ctxt->input->cur = in; 4744: if (*in == 0xA) { 4745: in++; 4746: ctxt->input->line++; ctxt->input->col = 1; 4747: } 4748: if (*in == 0xD) { 4749: in++; 4750: if (*in == 0xA) { 4751: ctxt->input->cur = in; 4752: in++; 4753: ctxt->input->line++; ctxt->input->col = 1; 4754: continue; /* while */ 4755: } 4756: in--; 4757: } 4758: SHRINK; 4759: GROW; 4760: in = ctxt->input->cur; 4761: if (*in == '-') { 4762: if (in[1] == '-') { 4763: if (in[2] == '>') { 4764: if (ctxt->input->id != inputid) { 4765: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4766: "comment doesn't start and stop in the same entity\n"); 4767: } 4768: SKIP(3); 4769: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4770: (!ctxt->disableSAX)) { 4771: if (buf != NULL) 4772: ctxt->sax->comment(ctxt->userData, buf); 4773: else 4774: ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4775: } 4776: if (buf != NULL) 4777: xmlFree(buf); 4778: ctxt->instate = state; 4779: return; 4780: } 4781: if (buf != NULL) { 4782: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4783: "Double hyphen within comment: " 4784: "<!--%.50s\n", 4785: buf); 4786: } else 4787: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 4788: "Double hyphen within comment\n", NULL); 4789: in++; 4790: ctxt->input->col++; 4791: } 4792: in++; 4793: ctxt->input->col++; 4794: goto get_more; 4795: } 4796: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4797: xmlParseCommentComplex(ctxt, buf, len, size); 4798: ctxt->instate = state; 4799: return; 4800: } 4801: 4802: 4803: /** 4804: * xmlParsePITarget: 4805: * @ctxt: an XML parser context 4806: * 4807: * parse the name of a PI 4808: * 4809: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4810: * 4811: * Returns the PITarget name or NULL 4812: */ 4813: 4814: const xmlChar * 4815: xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4816: const xmlChar *name; 4817: 4818: name = xmlParseName(ctxt); 4819: if ((name != NULL) && 4820: ((name[0] == 'x') || (name[0] == 'X')) && 4821: ((name[1] == 'm') || (name[1] == 'M')) && 4822: ((name[2] == 'l') || (name[2] == 'L'))) { 4823: int i; 4824: if ((name[0] == 'x') && (name[1] == 'm') && 4825: (name[2] == 'l') && (name[3] == 0)) { 4826: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4827: "XML declaration allowed only at the start of the document\n"); 4828: return(name); 4829: } else if (name[3] == 0) { 4830: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4831: return(name); 4832: } 4833: for (i = 0;;i++) { 4834: if (xmlW3CPIs[i] == NULL) break; 4835: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4836: return(name); 4837: } 4838: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4839: "xmlParsePITarget: invalid name prefix 'xml'\n", 4840: NULL, NULL); 4841: } 4842: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4843: xmlNsErr(ctxt, XML_NS_ERR_COLON, 4844: "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4845: } 4846: return(name); 4847: } 4848: 4849: #ifdef LIBXML_CATALOG_ENABLED 4850: /** 4851: * xmlParseCatalogPI: 4852: * @ctxt: an XML parser context 4853: * @catalog: the PI value string 4854: * 4855: * parse an XML Catalog Processing Instruction. 4856: * 4857: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4858: * 4859: * Occurs only if allowed by the user and if happening in the Misc 4860: * part of the document before any doctype informations 4861: * This will add the given catalog to the parsing context in order 4862: * to be used if there is a resolution need further down in the document 4863: */ 4864: 4865: static void 4866: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4867: xmlChar *URL = NULL; 4868: const xmlChar *tmp, *base; 4869: xmlChar marker; 4870: 4871: tmp = catalog; 4872: while (IS_BLANK_CH(*tmp)) tmp++; 4873: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4874: goto error; 4875: tmp += 7; 4876: while (IS_BLANK_CH(*tmp)) tmp++; 4877: if (*tmp != '=') { 4878: return; 4879: } 4880: tmp++; 4881: while (IS_BLANK_CH(*tmp)) tmp++; 4882: marker = *tmp; 4883: if ((marker != '\'') && (marker != '"')) 4884: goto error; 4885: tmp++; 4886: base = tmp; 4887: while ((*tmp != 0) && (*tmp != marker)) tmp++; 4888: if (*tmp == 0) 4889: goto error; 4890: URL = xmlStrndup(base, tmp - base); 4891: tmp++; 4892: while (IS_BLANK_CH(*tmp)) tmp++; 4893: if (*tmp != 0) 4894: goto error; 4895: 4896: if (URL != NULL) { 4897: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4898: xmlFree(URL); 4899: } 4900: return; 4901: 4902: error: 4903: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4904: "Catalog PI syntax error: %s\n", 4905: catalog, NULL); 4906: if (URL != NULL) 4907: xmlFree(URL); 4908: } 4909: #endif 4910: 4911: /** 4912: * xmlParsePI: 4913: * @ctxt: an XML parser context 4914: * 4915: * parse an XML Processing Instruction. 4916: * 4917: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4918: * 4919: * The processing is transfered to SAX once parsed. 4920: */ 4921: 4922: void 4923: xmlParsePI(xmlParserCtxtPtr ctxt) { 4924: xmlChar *buf = NULL; 4925: int len = 0; 4926: int size = XML_PARSER_BUFFER_SIZE; 4927: int cur, l; 4928: const xmlChar *target; 4929: xmlParserInputState state; 4930: int count = 0; 4931: 4932: if ((RAW == '<') && (NXT(1) == '?')) { 4933: xmlParserInputPtr input = ctxt->input; 4934: state = ctxt->instate; 4935: ctxt->instate = XML_PARSER_PI; 4936: /* 4937: * this is a Processing Instruction. 4938: */ 4939: SKIP(2); 4940: SHRINK; 4941: 4942: /* 4943: * Parse the target name and check for special support like 4944: * namespace. 4945: */ 4946: target = xmlParsePITarget(ctxt); 4947: if (target != NULL) { 4948: if ((RAW == '?') && (NXT(1) == '>')) { 4949: if (input != ctxt->input) { 4950: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4951: "PI declaration doesn't start and stop in the same entity\n"); 4952: } 4953: SKIP(2); 4954: 4955: /* 4956: * SAX: PI detected. 4957: */ 4958: if ((ctxt->sax) && (!ctxt->disableSAX) && 4959: (ctxt->sax->processingInstruction != NULL)) 4960: ctxt->sax->processingInstruction(ctxt->userData, 4961: target, NULL); 4962: if (ctxt->instate != XML_PARSER_EOF) 4963: ctxt->instate = state; 4964: return; 4965: } 4966: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4967: if (buf == NULL) { 4968: xmlErrMemory(ctxt, NULL); 4969: ctxt->instate = state; 4970: return; 4971: } 4972: cur = CUR; 4973: if (!IS_BLANK(cur)) { 4974: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4975: "ParsePI: PI %s space expected\n", target); 4976: } 4977: SKIP_BLANKS; 4978: cur = CUR_CHAR(l); 4979: while (IS_CHAR(cur) && /* checked */ 4980: ((cur != '?') || (NXT(1) != '>'))) { 4981: if (len + 5 >= size) { 4982: xmlChar *tmp; 4983: 4984: size *= 2; 4985: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4986: if (tmp == NULL) { 4987: xmlErrMemory(ctxt, NULL); 4988: xmlFree(buf); 4989: ctxt->instate = state; 4990: return; 4991: } 4992: buf = tmp; 4993: } 4994: count++; 4995: if (count > 50) { 4996: GROW; 4997: count = 0; 4998: } 4999: COPY_BUF(l,buf,len,cur); 5000: NEXTL(l); 5001: cur = CUR_CHAR(l); 5002: if (cur == 0) { 5003: SHRINK; 5004: GROW; 5005: cur = CUR_CHAR(l); 5006: } 5007: } 5008: buf[len] = 0; 5009: if (cur != '?') { 5010: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5011: "ParsePI: PI %s never end ...\n", target); 5012: } else { 5013: if (input != ctxt->input) { 5014: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5015: "PI declaration doesn't start and stop in the same entity\n"); 5016: } 5017: SKIP(2); 5018: 5019: #ifdef LIBXML_CATALOG_ENABLED 5020: if (((state == XML_PARSER_MISC) || 5021: (state == XML_PARSER_START)) && 5022: (xmlStrEqual(target, XML_CATALOG_PI))) { 5023: xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5024: if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5025: (allow == XML_CATA_ALLOW_ALL)) 5026: xmlParseCatalogPI(ctxt, buf); 5027: } 5028: #endif 5029: 5030: 5031: /* 5032: * SAX: PI detected. 5033: */ 5034: if ((ctxt->sax) && (!ctxt->disableSAX) && 5035: (ctxt->sax->processingInstruction != NULL)) 5036: ctxt->sax->processingInstruction(ctxt->userData, 5037: target, buf); 5038: } 5039: xmlFree(buf); 5040: } else { 5041: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5042: } 5043: if (ctxt->instate != XML_PARSER_EOF) 5044: ctxt->instate = state; 5045: } 5046: } 5047: 5048: /** 5049: * xmlParseNotationDecl: 5050: * @ctxt: an XML parser context 5051: * 5052: * parse a notation declaration 5053: * 5054: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5055: * 5056: * Hence there is actually 3 choices: 5057: * 'PUBLIC' S PubidLiteral 5058: * 'PUBLIC' S PubidLiteral S SystemLiteral 5059: * and 'SYSTEM' S SystemLiteral 5060: * 5061: * See the NOTE on xmlParseExternalID(). 5062: */ 5063: 5064: void 5065: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5066: const xmlChar *name; 5067: xmlChar *Pubid; 5068: xmlChar *Systemid; 5069: 5070: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5071: xmlParserInputPtr input = ctxt->input; 5072: SHRINK; 5073: SKIP(10); 5074: if (!IS_BLANK_CH(CUR)) { 5075: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5076: "Space required after '<!NOTATION'\n"); 5077: return; 5078: } 5079: SKIP_BLANKS; 5080: 5081: name = xmlParseName(ctxt); 5082: if (name == NULL) { 5083: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5084: return; 5085: } 5086: if (!IS_BLANK_CH(CUR)) { 5087: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5088: "Space required after the NOTATION name'\n"); 5089: return; 5090: } 5091: if (xmlStrchr(name, ':') != NULL) { 5092: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5093: "colon are forbidden from notation names '%s'\n", 5094: name, NULL, NULL); 5095: } 5096: SKIP_BLANKS; 5097: 5098: /* 5099: * Parse the IDs. 5100: */ 5101: Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5102: SKIP_BLANKS; 5103: 5104: if (RAW == '>') { 5105: if (input != ctxt->input) { 5106: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5107: "Notation declaration doesn't start and stop in the same entity\n"); 5108: } 5109: NEXT; 5110: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5111: (ctxt->sax->notationDecl != NULL)) 5112: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5113: } else { 5114: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5115: } 5116: if (Systemid != NULL) xmlFree(Systemid); 5117: if (Pubid != NULL) xmlFree(Pubid); 5118: } 5119: } 5120: 5121: /** 5122: * xmlParseEntityDecl: 5123: * @ctxt: an XML parser context 5124: * 5125: * parse <!ENTITY declarations 5126: * 5127: * [70] EntityDecl ::= GEDecl | PEDecl 5128: * 5129: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5130: * 5131: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5132: * 5133: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5134: * 5135: * [74] PEDef ::= EntityValue | ExternalID 5136: * 5137: * [76] NDataDecl ::= S 'NDATA' S Name 5138: * 5139: * [ VC: Notation Declared ] 5140: * The Name must match the declared name of a notation. 5141: */ 5142: 5143: void 5144: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5145: const xmlChar *name = NULL; 5146: xmlChar *value = NULL; 5147: xmlChar *URI = NULL, *literal = NULL; 5148: const xmlChar *ndata = NULL; 5149: int isParameter = 0; 5150: xmlChar *orig = NULL; 5151: int skipped; 5152: 5153: /* GROW; done in the caller */ 5154: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5155: xmlParserInputPtr input = ctxt->input; 5156: SHRINK; 5157: SKIP(8); 5158: skipped = SKIP_BLANKS; 5159: if (skipped == 0) { 5160: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5161: "Space required after '<!ENTITY'\n"); 5162: } 5163: 5164: if (RAW == '%') { 5165: NEXT; 5166: skipped = SKIP_BLANKS; 5167: if (skipped == 0) { 5168: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5169: "Space required after '%'\n"); 5170: } 5171: isParameter = 1; 5172: } 5173: 5174: name = xmlParseName(ctxt); 5175: if (name == NULL) { 5176: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5177: "xmlParseEntityDecl: no name\n"); 5178: return; 5179: } 5180: if (xmlStrchr(name, ':') != NULL) { 5181: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5182: "colon are forbidden from entities names '%s'\n", 5183: name, NULL, NULL); 5184: } 5185: skipped = SKIP_BLANKS; 5186: if (skipped == 0) { 5187: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5188: "Space required after the entity name\n"); 5189: } 5190: 5191: ctxt->instate = XML_PARSER_ENTITY_DECL; 5192: /* 5193: * handle the various case of definitions... 5194: */ 5195: if (isParameter) { 5196: if ((RAW == '"') || (RAW == '\'')) { 5197: value = xmlParseEntityValue(ctxt, &orig); 5198: if (value) { 5199: if ((ctxt->sax != NULL) && 5200: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5201: ctxt->sax->entityDecl(ctxt->userData, name, 5202: XML_INTERNAL_PARAMETER_ENTITY, 5203: NULL, NULL, value); 5204: } 5205: } else { 5206: URI = xmlParseExternalID(ctxt, &literal, 1); 5207: if ((URI == NULL) && (literal == NULL)) { 5208: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5209: } 5210: if (URI) { 5211: xmlURIPtr uri; 5212: 5213: uri = xmlParseURI((const char *) URI); 5214: if (uri == NULL) { 5215: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5216: "Invalid URI: %s\n", URI); 5217: /* 5218: * This really ought to be a well formedness error 5219: * but the XML Core WG decided otherwise c.f. issue 5220: * E26 of the XML erratas. 5221: */ 5222: } else { 5223: if (uri->fragment != NULL) { 5224: /* 5225: * Okay this is foolish to block those but not 5226: * invalid URIs. 5227: */ 5228: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5229: } else { 5230: if ((ctxt->sax != NULL) && 5231: (!ctxt->disableSAX) && 5232: (ctxt->sax->entityDecl != NULL)) 5233: ctxt->sax->entityDecl(ctxt->userData, name, 5234: XML_EXTERNAL_PARAMETER_ENTITY, 5235: literal, URI, NULL); 5236: } 5237: xmlFreeURI(uri); 5238: } 5239: } 5240: } 5241: } else { 5242: if ((RAW == '"') || (RAW == '\'')) { 5243: value = xmlParseEntityValue(ctxt, &orig); 5244: if ((ctxt->sax != NULL) && 5245: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5246: ctxt->sax->entityDecl(ctxt->userData, name, 5247: XML_INTERNAL_GENERAL_ENTITY, 5248: NULL, NULL, value); 5249: /* 5250: * For expat compatibility in SAX mode. 5251: */ 5252: if ((ctxt->myDoc == NULL) || 5253: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5254: if (ctxt->myDoc == NULL) { 5255: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5256: if (ctxt->myDoc == NULL) { 5257: xmlErrMemory(ctxt, "New Doc failed"); 5258: return; 5259: } 5260: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5261: } 5262: if (ctxt->myDoc->intSubset == NULL) 5263: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5264: BAD_CAST "fake", NULL, NULL); 5265: 5266: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5267: NULL, NULL, value); 5268: } 5269: } else { 5270: URI = xmlParseExternalID(ctxt, &literal, 1); 5271: if ((URI == NULL) && (literal == NULL)) { 5272: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5273: } 5274: if (URI) { 5275: xmlURIPtr uri; 5276: 5277: uri = xmlParseURI((const char *)URI); 5278: if (uri == NULL) { 5279: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5280: "Invalid URI: %s\n", URI); 5281: /* 5282: * This really ought to be a well formedness error 5283: * but the XML Core WG decided otherwise c.f. issue 5284: * E26 of the XML erratas. 5285: */ 5286: } else { 5287: if (uri->fragment != NULL) { 5288: /* 5289: * Okay this is foolish to block those but not 5290: * invalid URIs. 5291: */ 5292: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5293: } 5294: xmlFreeURI(uri); 5295: } 5296: } 5297: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5298: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5299: "Space required before 'NDATA'\n"); 5300: } 5301: SKIP_BLANKS; 5302: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5303: SKIP(5); 5304: if (!IS_BLANK_CH(CUR)) { 5305: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5306: "Space required after 'NDATA'\n"); 5307: } 5308: SKIP_BLANKS; 5309: ndata = xmlParseName(ctxt); 5310: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5311: (ctxt->sax->unparsedEntityDecl != NULL)) 5312: ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5313: literal, URI, ndata); 5314: } else { 5315: if ((ctxt->sax != NULL) && 5316: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5317: ctxt->sax->entityDecl(ctxt->userData, name, 5318: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5319: literal, URI, NULL); 5320: /* 5321: * For expat compatibility in SAX mode. 5322: * assuming the entity repalcement was asked for 5323: */ 5324: if ((ctxt->replaceEntities != 0) && 5325: ((ctxt->myDoc == NULL) || 5326: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5327: if (ctxt->myDoc == NULL) { 5328: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5329: if (ctxt->myDoc == NULL) { 5330: xmlErrMemory(ctxt, "New Doc failed"); 5331: return; 5332: } 5333: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5334: } 5335: 5336: if (ctxt->myDoc->intSubset == NULL) 5337: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5338: BAD_CAST "fake", NULL, NULL); 5339: xmlSAX2EntityDecl(ctxt, name, 5340: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5341: literal, URI, NULL); 5342: } 5343: } 5344: } 5345: } 5346: SKIP_BLANKS; 5347: if (RAW != '>') { 5348: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5349: "xmlParseEntityDecl: entity %s not terminated\n", name); 5350: } else { 5351: if (input != ctxt->input) { 5352: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5353: "Entity declaration doesn't start and stop in the same entity\n"); 5354: } 5355: NEXT; 5356: } 5357: if (orig != NULL) { 5358: /* 5359: * Ugly mechanism to save the raw entity value. 5360: */ 5361: xmlEntityPtr cur = NULL; 5362: 5363: if (isParameter) { 5364: if ((ctxt->sax != NULL) && 5365: (ctxt->sax->getParameterEntity != NULL)) 5366: cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5367: } else { 5368: if ((ctxt->sax != NULL) && 5369: (ctxt->sax->getEntity != NULL)) 5370: cur = ctxt->sax->getEntity(ctxt->userData, name); 5371: if ((cur == NULL) && (ctxt->userData==ctxt)) { 5372: cur = xmlSAX2GetEntity(ctxt, name); 5373: } 5374: } 5375: if (cur != NULL) { 5376: if (cur->orig != NULL) 5377: xmlFree(orig); 5378: else 5379: cur->orig = orig; 5380: } else 5381: xmlFree(orig); 5382: } 5383: if (value != NULL) xmlFree(value); 5384: if (URI != NULL) xmlFree(URI); 5385: if (literal != NULL) xmlFree(literal); 5386: } 5387: } 5388: 5389: /** 5390: * xmlParseDefaultDecl: 5391: * @ctxt: an XML parser context 5392: * @value: Receive a possible fixed default value for the attribute 5393: * 5394: * Parse an attribute default declaration 5395: * 5396: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5397: * 5398: * [ VC: Required Attribute ] 5399: * if the default declaration is the keyword #REQUIRED, then the 5400: * attribute must be specified for all elements of the type in the 5401: * attribute-list declaration. 5402: * 5403: * [ VC: Attribute Default Legal ] 5404: * The declared default value must meet the lexical constraints of 5405: * the declared attribute type c.f. xmlValidateAttributeDecl() 5406: * 5407: * [ VC: Fixed Attribute Default ] 5408: * if an attribute has a default value declared with the #FIXED 5409: * keyword, instances of that attribute must match the default value. 5410: * 5411: * [ WFC: No < in Attribute Values ] 5412: * handled in xmlParseAttValue() 5413: * 5414: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5415: * or XML_ATTRIBUTE_FIXED. 5416: */ 5417: 5418: int 5419: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5420: int val; 5421: xmlChar *ret; 5422: 5423: *value = NULL; 5424: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5425: SKIP(9); 5426: return(XML_ATTRIBUTE_REQUIRED); 5427: } 5428: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5429: SKIP(8); 5430: return(XML_ATTRIBUTE_IMPLIED); 5431: } 5432: val = XML_ATTRIBUTE_NONE; 5433: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5434: SKIP(6); 5435: val = XML_ATTRIBUTE_FIXED; 5436: if (!IS_BLANK_CH(CUR)) { 5437: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5438: "Space required after '#FIXED'\n"); 5439: } 5440: SKIP_BLANKS; 5441: } 5442: ret = xmlParseAttValue(ctxt); 5443: ctxt->instate = XML_PARSER_DTD; 5444: if (ret == NULL) { 5445: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5446: "Attribute default value declaration error\n"); 5447: } else 5448: *value = ret; 5449: return(val); 5450: } 5451: 5452: /** 5453: * xmlParseNotationType: 5454: * @ctxt: an XML parser context 5455: * 5456: * parse an Notation attribute type. 5457: * 5458: * Note: the leading 'NOTATION' S part has already being parsed... 5459: * 5460: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5461: * 5462: * [ VC: Notation Attributes ] 5463: * Values of this type must match one of the notation names included 5464: * in the declaration; all notation names in the declaration must be declared. 5465: * 5466: * Returns: the notation attribute tree built while parsing 5467: */ 5468: 5469: xmlEnumerationPtr 5470: xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5471: const xmlChar *name; 5472: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5473: 5474: if (RAW != '(') { 5475: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5476: return(NULL); 5477: } 5478: SHRINK; 5479: do { 5480: NEXT; 5481: SKIP_BLANKS; 5482: name = xmlParseName(ctxt); 5483: if (name == NULL) { 5484: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5485: "Name expected in NOTATION declaration\n"); 5486: xmlFreeEnumeration(ret); 5487: return(NULL); 5488: } 5489: tmp = ret; 5490: while (tmp != NULL) { 5491: if (xmlStrEqual(name, tmp->name)) { 5492: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5493: "standalone: attribute notation value token %s duplicated\n", 5494: name, NULL); 5495: if (!xmlDictOwns(ctxt->dict, name)) 5496: xmlFree((xmlChar *) name); 5497: break; 5498: } 5499: tmp = tmp->next; 5500: } 5501: if (tmp == NULL) { 5502: cur = xmlCreateEnumeration(name); 5503: if (cur == NULL) { 5504: xmlFreeEnumeration(ret); 5505: return(NULL); 5506: } 5507: if (last == NULL) ret = last = cur; 5508: else { 5509: last->next = cur; 5510: last = cur; 5511: } 5512: } 5513: SKIP_BLANKS; 5514: } while (RAW == '|'); 5515: if (RAW != ')') { 5516: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5517: xmlFreeEnumeration(ret); 5518: return(NULL); 5519: } 5520: NEXT; 5521: return(ret); 5522: } 5523: 5524: /** 5525: * xmlParseEnumerationType: 5526: * @ctxt: an XML parser context 5527: * 5528: * parse an Enumeration attribute type. 5529: * 5530: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5531: * 5532: * [ VC: Enumeration ] 5533: * Values of this type must match one of the Nmtoken tokens in 5534: * the declaration 5535: * 5536: * Returns: the enumeration attribute tree built while parsing 5537: */ 5538: 5539: xmlEnumerationPtr 5540: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5541: xmlChar *name; 5542: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5543: 5544: if (RAW != '(') { 5545: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5546: return(NULL); 5547: } 5548: SHRINK; 5549: do { 5550: NEXT; 5551: SKIP_BLANKS; 5552: name = xmlParseNmtoken(ctxt); 5553: if (name == NULL) { 5554: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5555: return(ret); 5556: } 5557: tmp = ret; 5558: while (tmp != NULL) { 5559: if (xmlStrEqual(name, tmp->name)) { 5560: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5561: "standalone: attribute enumeration value token %s duplicated\n", 5562: name, NULL); 5563: if (!xmlDictOwns(ctxt->dict, name)) 5564: xmlFree(name); 5565: break; 5566: } 5567: tmp = tmp->next; 5568: } 5569: if (tmp == NULL) { 5570: cur = xmlCreateEnumeration(name); 5571: if (!xmlDictOwns(ctxt->dict, name)) 5572: xmlFree(name); 5573: if (cur == NULL) { 5574: xmlFreeEnumeration(ret); 5575: return(NULL); 5576: } 5577: if (last == NULL) ret = last = cur; 5578: else { 5579: last->next = cur; 5580: last = cur; 5581: } 5582: } 5583: SKIP_BLANKS; 5584: } while (RAW == '|'); 5585: if (RAW != ')') { 5586: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5587: return(ret); 5588: } 5589: NEXT; 5590: return(ret); 5591: } 5592: 5593: /** 5594: * xmlParseEnumeratedType: 5595: * @ctxt: an XML parser context 5596: * @tree: the enumeration tree built while parsing 5597: * 5598: * parse an Enumerated attribute type. 5599: * 5600: * [57] EnumeratedType ::= NotationType | Enumeration 5601: * 5602: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5603: * 5604: * 5605: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5606: */ 5607: 5608: int 5609: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5610: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5611: SKIP(8); 5612: if (!IS_BLANK_CH(CUR)) { 5613: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5614: "Space required after 'NOTATION'\n"); 5615: return(0); 5616: } 5617: SKIP_BLANKS; 5618: *tree = xmlParseNotationType(ctxt); 5619: if (*tree == NULL) return(0); 5620: return(XML_ATTRIBUTE_NOTATION); 5621: } 5622: *tree = xmlParseEnumerationType(ctxt); 5623: if (*tree == NULL) return(0); 5624: return(XML_ATTRIBUTE_ENUMERATION); 5625: } 5626: 5627: /** 5628: * xmlParseAttributeType: 5629: * @ctxt: an XML parser context 5630: * @tree: the enumeration tree built while parsing 5631: * 5632: * parse the Attribute list def for an element 5633: * 5634: * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5635: * 5636: * [55] StringType ::= 'CDATA' 5637: * 5638: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5639: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5640: * 5641: * Validity constraints for attribute values syntax are checked in 5642: * xmlValidateAttributeValue() 5643: * 5644: * [ VC: ID ] 5645: * Values of type ID must match the Name production. A name must not 5646: * appear more than once in an XML document as a value of this type; 5647: * i.e., ID values must uniquely identify the elements which bear them. 5648: * 5649: * [ VC: One ID per Element Type ] 5650: * No element type may have more than one ID attribute specified. 5651: * 5652: * [ VC: ID Attribute Default ] 5653: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5654: * 5655: * [ VC: IDREF ] 5656: * Values of type IDREF must match the Name production, and values 5657: * of type IDREFS must match Names; each IDREF Name must match the value 5658: * of an ID attribute on some element in the XML document; i.e. IDREF 5659: * values must match the value of some ID attribute. 5660: * 5661: * [ VC: Entity Name ] 5662: * Values of type ENTITY must match the Name production, values 5663: * of type ENTITIES must match Names; each Entity Name must match the 5664: * name of an unparsed entity declared in the DTD. 5665: * 5666: * [ VC: Name Token ] 5667: * Values of type NMTOKEN must match the Nmtoken production; values 5668: * of type NMTOKENS must match Nmtokens. 5669: * 5670: * Returns the attribute type 5671: */ 5672: int 5673: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5674: SHRINK; 5675: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5676: SKIP(5); 5677: return(XML_ATTRIBUTE_CDATA); 5678: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5679: SKIP(6); 5680: return(XML_ATTRIBUTE_IDREFS); 5681: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5682: SKIP(5); 5683: return(XML_ATTRIBUTE_IDREF); 5684: } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5685: SKIP(2); 5686: return(XML_ATTRIBUTE_ID); 5687: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5688: SKIP(6); 5689: return(XML_ATTRIBUTE_ENTITY); 5690: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5691: SKIP(8); 5692: return(XML_ATTRIBUTE_ENTITIES); 5693: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5694: SKIP(8); 5695: return(XML_ATTRIBUTE_NMTOKENS); 5696: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5697: SKIP(7); 5698: return(XML_ATTRIBUTE_NMTOKEN); 5699: } 5700: return(xmlParseEnumeratedType(ctxt, tree)); 5701: } 5702: 5703: /** 5704: * xmlParseAttributeListDecl: 5705: * @ctxt: an XML parser context 5706: * 5707: * : parse the Attribute list def for an element 5708: * 5709: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5710: * 5711: * [53] AttDef ::= S Name S AttType S DefaultDecl 5712: * 5713: */ 5714: void 5715: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5716: const xmlChar *elemName; 5717: const xmlChar *attrName; 5718: xmlEnumerationPtr tree; 5719: 5720: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5721: xmlParserInputPtr input = ctxt->input; 5722: 5723: SKIP(9); 5724: if (!IS_BLANK_CH(CUR)) { 5725: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5726: "Space required after '<!ATTLIST'\n"); 5727: } 5728: SKIP_BLANKS; 5729: elemName = xmlParseName(ctxt); 5730: if (elemName == NULL) { 5731: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5732: "ATTLIST: no name for Element\n"); 5733: return; 5734: } 5735: SKIP_BLANKS; 5736: GROW; 5737: while (RAW != '>') { 5738: const xmlChar *check = CUR_PTR; 5739: int type; 5740: int def; 5741: xmlChar *defaultValue = NULL; 5742: 5743: GROW; 5744: tree = NULL; 5745: attrName = xmlParseName(ctxt); 5746: if (attrName == NULL) { 5747: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5748: "ATTLIST: no name for Attribute\n"); 5749: break; 5750: } 5751: GROW; 5752: if (!IS_BLANK_CH(CUR)) { 5753: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5754: "Space required after the attribute name\n"); 5755: break; 5756: } 5757: SKIP_BLANKS; 5758: 5759: type = xmlParseAttributeType(ctxt, &tree); 5760: if (type <= 0) { 5761: break; 5762: } 5763: 5764: GROW; 5765: if (!IS_BLANK_CH(CUR)) { 5766: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5767: "Space required after the attribute type\n"); 5768: if (tree != NULL) 5769: xmlFreeEnumeration(tree); 5770: break; 5771: } 5772: SKIP_BLANKS; 5773: 5774: def = xmlParseDefaultDecl(ctxt, &defaultValue); 5775: if (def <= 0) { 5776: if (defaultValue != NULL) 5777: xmlFree(defaultValue); 5778: if (tree != NULL) 5779: xmlFreeEnumeration(tree); 5780: break; 5781: } 5782: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5783: xmlAttrNormalizeSpace(defaultValue, defaultValue); 5784: 5785: GROW; 5786: if (RAW != '>') { 5787: if (!IS_BLANK_CH(CUR)) { 5788: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5789: "Space required after the attribute default value\n"); 5790: if (defaultValue != NULL) 5791: xmlFree(defaultValue); 5792: if (tree != NULL) 5793: xmlFreeEnumeration(tree); 5794: break; 5795: } 5796: SKIP_BLANKS; 5797: } 5798: if (check == CUR_PTR) { 5799: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5800: "in xmlParseAttributeListDecl\n"); 5801: if (defaultValue != NULL) 5802: xmlFree(defaultValue); 5803: if (tree != NULL) 5804: xmlFreeEnumeration(tree); 5805: break; 5806: } 5807: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5808: (ctxt->sax->attributeDecl != NULL)) 5809: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5810: type, def, defaultValue, tree); 5811: else if (tree != NULL) 5812: xmlFreeEnumeration(tree); 5813: 5814: if ((ctxt->sax2) && (defaultValue != NULL) && 5815: (def != XML_ATTRIBUTE_IMPLIED) && 5816: (def != XML_ATTRIBUTE_REQUIRED)) { 5817: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5818: } 5819: if (ctxt->sax2) { 5820: xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5821: } 5822: if (defaultValue != NULL) 5823: xmlFree(defaultValue); 5824: GROW; 5825: } 5826: if (RAW == '>') { 5827: if (input != ctxt->input) { 5828: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5829: "Attribute list declaration doesn't start and stop in the same entity\n", 5830: NULL, NULL); 5831: } 5832: NEXT; 5833: } 5834: } 5835: } 5836: 5837: /** 5838: * xmlParseElementMixedContentDecl: 5839: * @ctxt: an XML parser context 5840: * @inputchk: the input used for the current entity, needed for boundary checks 5841: * 5842: * parse the declaration for a Mixed Element content 5843: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5844: * 5845: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5846: * '(' S? '#PCDATA' S? ')' 5847: * 5848: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5849: * 5850: * [ VC: No Duplicate Types ] 5851: * The same name must not appear more than once in a single 5852: * mixed-content declaration. 5853: * 5854: * returns: the list of the xmlElementContentPtr describing the element choices 5855: */ 5856: xmlElementContentPtr 5857: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5858: xmlElementContentPtr ret = NULL, cur = NULL, n; 5859: const xmlChar *elem = NULL; 5860: 5861: GROW; 5862: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5863: SKIP(7); 5864: SKIP_BLANKS; 5865: SHRINK; 5866: if (RAW == ')') { 5867: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5868: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5869: "Element content declaration doesn't start and stop in the same entity\n", 5870: NULL, NULL); 5871: } 5872: NEXT; 5873: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5874: if (ret == NULL) 5875: return(NULL); 5876: if (RAW == '*') { 5877: ret->ocur = XML_ELEMENT_CONTENT_MULT; 5878: NEXT; 5879: } 5880: return(ret); 5881: } 5882: if ((RAW == '(') || (RAW == '|')) { 5883: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5884: if (ret == NULL) return(NULL); 5885: } 5886: while (RAW == '|') { 5887: NEXT; 5888: if (elem == NULL) { 5889: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5890: if (ret == NULL) return(NULL); 5891: ret->c1 = cur; 5892: if (cur != NULL) 5893: cur->parent = ret; 5894: cur = ret; 5895: } else { 5896: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5897: if (n == NULL) return(NULL); 5898: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5899: if (n->c1 != NULL) 5900: n->c1->parent = n; 5901: cur->c2 = n; 5902: if (n != NULL) 5903: n->parent = cur; 5904: cur = n; 5905: } 5906: SKIP_BLANKS; 5907: elem = xmlParseName(ctxt); 5908: if (elem == NULL) { 5909: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5910: "xmlParseElementMixedContentDecl : Name expected\n"); 5911: xmlFreeDocElementContent(ctxt->myDoc, cur); 5912: return(NULL); 5913: } 5914: SKIP_BLANKS; 5915: GROW; 5916: } 5917: if ((RAW == ')') && (NXT(1) == '*')) { 5918: if (elem != NULL) { 5919: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5920: XML_ELEMENT_CONTENT_ELEMENT); 5921: if (cur->c2 != NULL) 5922: cur->c2->parent = cur; 5923: } 5924: if (ret != NULL) 5925: ret->ocur = XML_ELEMENT_CONTENT_MULT; 5926: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5927: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5928: "Element content declaration doesn't start and stop in the same entity\n", 5929: NULL, NULL); 5930: } 5931: SKIP(2); 5932: } else { 5933: xmlFreeDocElementContent(ctxt->myDoc, ret); 5934: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5935: return(NULL); 5936: } 5937: 5938: } else { 5939: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5940: } 5941: return(ret); 5942: } 5943: 5944: /** 5945: * xmlParseElementChildrenContentDeclPriv: 5946: * @ctxt: an XML parser context 5947: * @inputchk: the input used for the current entity, needed for boundary checks 5948: * @depth: the level of recursion 5949: * 5950: * parse the declaration for a Mixed Element content 5951: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5952: * 5953: * 5954: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5955: * 5956: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5957: * 5958: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5959: * 5960: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5961: * 5962: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5963: * TODO Parameter-entity replacement text must be properly nested 5964: * with parenthesized groups. That is to say, if either of the 5965: * opening or closing parentheses in a choice, seq, or Mixed 5966: * construct is contained in the replacement text for a parameter 5967: * entity, both must be contained in the same replacement text. For 5968: * interoperability, if a parameter-entity reference appears in a 5969: * choice, seq, or Mixed construct, its replacement text should not 5970: * be empty, and neither the first nor last non-blank character of 5971: * the replacement text should be a connector (| or ,). 5972: * 5973: * Returns the tree of xmlElementContentPtr describing the element 5974: * hierarchy. 5975: */ 5976: static xmlElementContentPtr 5977: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 5978: int depth) { 5979: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5980: const xmlChar *elem; 5981: xmlChar type = 0; 5982: 5983: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 5984: (depth > 2048)) { 5985: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 5986: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 5987: depth); 5988: return(NULL); 5989: } 5990: SKIP_BLANKS; 5991: GROW; 5992: if (RAW == '(') { 5993: int inputid = ctxt->input->id; 5994: 5995: /* Recurse on first child */ 5996: NEXT; 5997: SKIP_BLANKS; 5998: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5999: depth + 1); 6000: SKIP_BLANKS; 6001: GROW; 6002: } else { 6003: elem = xmlParseName(ctxt); 6004: if (elem == NULL) { 6005: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6006: return(NULL); 6007: } 6008: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6009: if (cur == NULL) { 6010: xmlErrMemory(ctxt, NULL); 6011: return(NULL); 6012: } 6013: GROW; 6014: if (RAW == '?') { 6015: cur->ocur = XML_ELEMENT_CONTENT_OPT; 6016: NEXT; 6017: } else if (RAW == '*') { 6018: cur->ocur = XML_ELEMENT_CONTENT_MULT; 6019: NEXT; 6020: } else if (RAW == '+') { 6021: cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6022: NEXT; 6023: } else { 6024: cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6025: } 6026: GROW; 6027: } 6028: SKIP_BLANKS; 6029: SHRINK; 6030: while (RAW != ')') { 6031: /* 6032: * Each loop we parse one separator and one element. 6033: */ 6034: if (RAW == ',') { 6035: if (type == 0) type = CUR; 6036: 6037: /* 6038: * Detect "Name | Name , Name" error 6039: */ 6040: else if (type != CUR) { 6041: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6042: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6043: type); 6044: if ((last != NULL) && (last != ret)) 6045: xmlFreeDocElementContent(ctxt->myDoc, last); 6046: if (ret != NULL) 6047: xmlFreeDocElementContent(ctxt->myDoc, ret); 6048: return(NULL); 6049: } 6050: NEXT; 6051: 6052: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6053: if (op == NULL) { 6054: if ((last != NULL) && (last != ret)) 6055: xmlFreeDocElementContent(ctxt->myDoc, last); 6056: xmlFreeDocElementContent(ctxt->myDoc, ret); 6057: return(NULL); 6058: } 6059: if (last == NULL) { 6060: op->c1 = ret; 6061: if (ret != NULL) 6062: ret->parent = op; 6063: ret = cur = op; 6064: } else { 6065: cur->c2 = op; 6066: if (op != NULL) 6067: op->parent = cur; 6068: op->c1 = last; 6069: if (last != NULL) 6070: last->parent = op; 6071: cur =op; 6072: last = NULL; 6073: } 6074: } else if (RAW == '|') { 6075: if (type == 0) type = CUR; 6076: 6077: /* 6078: * Detect "Name , Name | Name" error 6079: */ 6080: else if (type != CUR) { 6081: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6082: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6083: type); 6084: if ((last != NULL) && (last != ret)) 6085: xmlFreeDocElementContent(ctxt->myDoc, last); 6086: if (ret != NULL) 6087: xmlFreeDocElementContent(ctxt->myDoc, ret); 6088: return(NULL); 6089: } 6090: NEXT; 6091: 6092: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6093: if (op == NULL) { 6094: if ((last != NULL) && (last != ret)) 6095: xmlFreeDocElementContent(ctxt->myDoc, last); 6096: if (ret != NULL) 6097: xmlFreeDocElementContent(ctxt->myDoc, ret); 6098: return(NULL); 6099: } 6100: if (last == NULL) { 6101: op->c1 = ret; 6102: if (ret != NULL) 6103: ret->parent = op; 6104: ret = cur = op; 6105: } else { 6106: cur->c2 = op; 6107: if (op != NULL) 6108: op->parent = cur; 6109: op->c1 = last; 6110: if (last != NULL) 6111: last->parent = op; 6112: cur =op; 6113: last = NULL; 6114: } 6115: } else { 6116: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6117: if ((last != NULL) && (last != ret)) 6118: xmlFreeDocElementContent(ctxt->myDoc, last); 6119: if (ret != NULL) 6120: xmlFreeDocElementContent(ctxt->myDoc, ret); 6121: return(NULL); 6122: } 6123: GROW; 6124: SKIP_BLANKS; 6125: GROW; 6126: if (RAW == '(') { 6127: int inputid = ctxt->input->id; 6128: /* Recurse on second child */ 6129: NEXT; 6130: SKIP_BLANKS; 6131: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6132: depth + 1); 6133: SKIP_BLANKS; 6134: } else { 6135: elem = xmlParseName(ctxt); 6136: if (elem == NULL) { 6137: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6138: if (ret != NULL) 6139: xmlFreeDocElementContent(ctxt->myDoc, ret); 6140: return(NULL); 6141: } 6142: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6143: if (last == NULL) { 6144: if (ret != NULL) 6145: xmlFreeDocElementContent(ctxt->myDoc, ret); 6146: return(NULL); 6147: } 6148: if (RAW == '?') { 6149: last->ocur = XML_ELEMENT_CONTENT_OPT; 6150: NEXT; 6151: } else if (RAW == '*') { 6152: last->ocur = XML_ELEMENT_CONTENT_MULT; 6153: NEXT; 6154: } else if (RAW == '+') { 6155: last->ocur = XML_ELEMENT_CONTENT_PLUS; 6156: NEXT; 6157: } else { 6158: last->ocur = XML_ELEMENT_CONTENT_ONCE; 6159: } 6160: } 6161: SKIP_BLANKS; 6162: GROW; 6163: } 6164: if ((cur != NULL) && (last != NULL)) { 6165: cur->c2 = last; 6166: if (last != NULL) 6167: last->parent = cur; 6168: } 6169: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6170: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6171: "Element content declaration doesn't start and stop in the same entity\n", 6172: NULL, NULL); 6173: } 6174: NEXT; 6175: if (RAW == '?') { 6176: if (ret != NULL) { 6177: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6178: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6179: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6180: else 6181: ret->ocur = XML_ELEMENT_CONTENT_OPT; 6182: } 6183: NEXT; 6184: } else if (RAW == '*') { 6185: if (ret != NULL) { 6186: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6187: cur = ret; 6188: /* 6189: * Some normalization: 6190: * (a | b* | c?)* == (a | b | c)* 6191: */ 6192: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6193: if ((cur->c1 != NULL) && 6194: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6195: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6196: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6197: if ((cur->c2 != NULL) && 6198: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6199: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6200: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6201: cur = cur->c2; 6202: } 6203: } 6204: NEXT; 6205: } else if (RAW == '+') { 6206: if (ret != NULL) { 6207: int found = 0; 6208: 6209: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6210: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6211: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6212: else 6213: ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6214: /* 6215: * Some normalization: 6216: * (a | b*)+ == (a | b)* 6217: * (a | b?)+ == (a | b)* 6218: */ 6219: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6220: if ((cur->c1 != NULL) && 6221: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6222: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6223: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6224: found = 1; 6225: } 6226: if ((cur->c2 != NULL) && 6227: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6228: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6229: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6230: found = 1; 6231: } 6232: cur = cur->c2; 6233: } 6234: if (found) 6235: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6236: } 6237: NEXT; 6238: } 6239: return(ret); 6240: } 6241: 6242: /** 6243: * xmlParseElementChildrenContentDecl: 6244: * @ctxt: an XML parser context 6245: * @inputchk: the input used for the current entity, needed for boundary checks 6246: * 6247: * parse the declaration for a Mixed Element content 6248: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6249: * 6250: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6251: * 6252: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6253: * 6254: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6255: * 6256: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6257: * 6258: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6259: * TODO Parameter-entity replacement text must be properly nested 6260: * with parenthesized groups. That is to say, if either of the 6261: * opening or closing parentheses in a choice, seq, or Mixed 6262: * construct is contained in the replacement text for a parameter 6263: * entity, both must be contained in the same replacement text. For 6264: * interoperability, if a parameter-entity reference appears in a 6265: * choice, seq, or Mixed construct, its replacement text should not 6266: * be empty, and neither the first nor last non-blank character of 6267: * the replacement text should be a connector (| or ,). 6268: * 6269: * Returns the tree of xmlElementContentPtr describing the element 6270: * hierarchy. 6271: */ 6272: xmlElementContentPtr 6273: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6274: /* stub left for API/ABI compat */ 6275: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6276: } 6277: 6278: /** 6279: * xmlParseElementContentDecl: 6280: * @ctxt: an XML parser context 6281: * @name: the name of the element being defined. 6282: * @result: the Element Content pointer will be stored here if any 6283: * 6284: * parse the declaration for an Element content either Mixed or Children, 6285: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6286: * 6287: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6288: * 6289: * returns: the type of element content XML_ELEMENT_TYPE_xxx 6290: */ 6291: 6292: int 6293: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6294: xmlElementContentPtr *result) { 6295: 6296: xmlElementContentPtr tree = NULL; 6297: int inputid = ctxt->input->id; 6298: int res; 6299: 6300: *result = NULL; 6301: 6302: if (RAW != '(') { 6303: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6304: "xmlParseElementContentDecl : %s '(' expected\n", name); 6305: return(-1); 6306: } 6307: NEXT; 6308: GROW; 6309: SKIP_BLANKS; 6310: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6311: tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6312: res = XML_ELEMENT_TYPE_MIXED; 6313: } else { 6314: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6315: res = XML_ELEMENT_TYPE_ELEMENT; 6316: } 6317: SKIP_BLANKS; 6318: *result = tree; 6319: return(res); 6320: } 6321: 6322: /** 6323: * xmlParseElementDecl: 6324: * @ctxt: an XML parser context 6325: * 6326: * parse an Element declaration. 6327: * 6328: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6329: * 6330: * [ VC: Unique Element Type Declaration ] 6331: * No element type may be declared more than once 6332: * 6333: * Returns the type of the element, or -1 in case of error 6334: */ 6335: int 6336: xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6337: const xmlChar *name; 6338: int ret = -1; 6339: xmlElementContentPtr content = NULL; 6340: 6341: /* GROW; done in the caller */ 6342: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6343: xmlParserInputPtr input = ctxt->input; 6344: 6345: SKIP(9); 6346: if (!IS_BLANK_CH(CUR)) { 6347: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6348: "Space required after 'ELEMENT'\n"); 6349: } 6350: SKIP_BLANKS; 6351: name = xmlParseName(ctxt); 6352: if (name == NULL) { 6353: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6354: "xmlParseElementDecl: no name for Element\n"); 6355: return(-1); 6356: } 6357: while ((RAW == 0) && (ctxt->inputNr > 1)) 6358: xmlPopInput(ctxt); 6359: if (!IS_BLANK_CH(CUR)) { 6360: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6361: "Space required after the element name\n"); 6362: } 6363: SKIP_BLANKS; 6364: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6365: SKIP(5); 6366: /* 6367: * Element must always be empty. 6368: */ 6369: ret = XML_ELEMENT_TYPE_EMPTY; 6370: } else if ((RAW == 'A') && (NXT(1) == 'N') && 6371: (NXT(2) == 'Y')) { 6372: SKIP(3); 6373: /* 6374: * Element is a generic container. 6375: */ 6376: ret = XML_ELEMENT_TYPE_ANY; 6377: } else if (RAW == '(') { 6378: ret = xmlParseElementContentDecl(ctxt, name, &content); 6379: } else { 6380: /* 6381: * [ WFC: PEs in Internal Subset ] error handling. 6382: */ 6383: if ((RAW == '%') && (ctxt->external == 0) && 6384: (ctxt->inputNr == 1)) { 6385: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6386: "PEReference: forbidden within markup decl in internal subset\n"); 6387: } else { 6388: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6389: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6390: } 6391: return(-1); 6392: } 6393: 6394: SKIP_BLANKS; 6395: /* 6396: * Pop-up of finished entities. 6397: */ 6398: while ((RAW == 0) && (ctxt->inputNr > 1)) 6399: xmlPopInput(ctxt); 6400: SKIP_BLANKS; 6401: 6402: if (RAW != '>') { 6403: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6404: if (content != NULL) { 6405: xmlFreeDocElementContent(ctxt->myDoc, content); 6406: } 6407: } else { 6408: if (input != ctxt->input) { 6409: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6410: "Element declaration doesn't start and stop in the same entity\n"); 6411: } 6412: 6413: NEXT; 6414: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6415: (ctxt->sax->elementDecl != NULL)) { 6416: if (content != NULL) 6417: content->parent = NULL; 6418: ctxt->sax->elementDecl(ctxt->userData, name, ret, 6419: content); 6420: if ((content != NULL) && (content->parent == NULL)) { 6421: /* 6422: * this is a trick: if xmlAddElementDecl is called, 6423: * instead of copying the full tree it is plugged directly 6424: * if called from the parser. Avoid duplicating the 6425: * interfaces or change the API/ABI 6426: */ 6427: xmlFreeDocElementContent(ctxt->myDoc, content); 6428: } 6429: } else if (content != NULL) { 6430: xmlFreeDocElementContent(ctxt->myDoc, content); 6431: } 6432: } 6433: } 6434: return(ret); 6435: } 6436: 6437: /** 6438: * xmlParseConditionalSections 6439: * @ctxt: an XML parser context 6440: * 6441: * [61] conditionalSect ::= includeSect | ignoreSect 6442: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6443: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6444: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6445: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6446: */ 6447: 6448: static void 6449: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6450: int id = ctxt->input->id; 6451: 6452: SKIP(3); 6453: SKIP_BLANKS; 6454: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6455: SKIP(7); 6456: SKIP_BLANKS; 6457: if (RAW != '[') { 6458: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6459: } else { 6460: if (ctxt->input->id != id) { 6461: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6462: "All markup of the conditional section is not in the same entity\n", 6463: NULL, NULL); 6464: } 6465: NEXT; 6466: } 6467: if (xmlParserDebugEntities) { 6468: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6469: xmlGenericError(xmlGenericErrorContext, 6470: "%s(%d): ", ctxt->input->filename, 6471: ctxt->input->line); 6472: xmlGenericError(xmlGenericErrorContext, 6473: "Entering INCLUDE Conditional Section\n"); 6474: } 6475: 6476: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6477: (NXT(2) != '>'))) { 6478: const xmlChar *check = CUR_PTR; 6479: unsigned int cons = ctxt->input->consumed; 6480: 6481: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6482: xmlParseConditionalSections(ctxt); 6483: } else if (IS_BLANK_CH(CUR)) { 6484: NEXT; 6485: } else if (RAW == '%') { 6486: xmlParsePEReference(ctxt); 6487: } else 6488: xmlParseMarkupDecl(ctxt); 6489: 6490: /* 6491: * Pop-up of finished entities. 6492: */ 6493: while ((RAW == 0) && (ctxt->inputNr > 1)) 6494: xmlPopInput(ctxt); 6495: 6496: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6497: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6498: break; 6499: } 6500: } 6501: if (xmlParserDebugEntities) { 6502: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6503: xmlGenericError(xmlGenericErrorContext, 6504: "%s(%d): ", ctxt->input->filename, 6505: ctxt->input->line); 6506: xmlGenericError(xmlGenericErrorContext, 6507: "Leaving INCLUDE Conditional Section\n"); 6508: } 6509: 6510: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6511: int state; 6512: xmlParserInputState instate; 6513: int depth = 0; 6514: 6515: SKIP(6); 6516: SKIP_BLANKS; 6517: if (RAW != '[') { 6518: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6519: } else { 6520: if (ctxt->input->id != id) { 6521: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6522: "All markup of the conditional section is not in the same entity\n", 6523: NULL, NULL); 6524: } 6525: NEXT; 6526: } 6527: if (xmlParserDebugEntities) { 6528: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6529: xmlGenericError(xmlGenericErrorContext, 6530: "%s(%d): ", ctxt->input->filename, 6531: ctxt->input->line); 6532: xmlGenericError(xmlGenericErrorContext, 6533: "Entering IGNORE Conditional Section\n"); 6534: } 6535: 6536: /* 6537: * Parse up to the end of the conditional section 6538: * But disable SAX event generating DTD building in the meantime 6539: */ 6540: state = ctxt->disableSAX; 6541: instate = ctxt->instate; 6542: if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6543: ctxt->instate = XML_PARSER_IGNORE; 6544: 6545: while ((depth >= 0) && (RAW != 0)) { 6546: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6547: depth++; 6548: SKIP(3); 6549: continue; 6550: } 6551: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6552: if (--depth >= 0) SKIP(3); 6553: continue; 6554: } 6555: NEXT; 6556: continue; 6557: } 6558: 6559: ctxt->disableSAX = state; 6560: ctxt->instate = instate; 6561: 6562: if (xmlParserDebugEntities) { 6563: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6564: xmlGenericError(xmlGenericErrorContext, 6565: "%s(%d): ", ctxt->input->filename, 6566: ctxt->input->line); 6567: xmlGenericError(xmlGenericErrorContext, 6568: "Leaving IGNORE Conditional Section\n"); 6569: } 6570: 6571: } else { 6572: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6573: } 6574: 6575: if (RAW == 0) 6576: SHRINK; 6577: 6578: if (RAW == 0) { 6579: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6580: } else { 6581: if (ctxt->input->id != id) { 6582: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6583: "All markup of the conditional section is not in the same entity\n", 6584: NULL, NULL); 6585: } 6586: SKIP(3); 6587: } 6588: } 6589: 6590: /** 6591: * xmlParseMarkupDecl: 6592: * @ctxt: an XML parser context 6593: * 6594: * parse Markup declarations 6595: * 6596: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6597: * NotationDecl | PI | Comment 6598: * 6599: * [ VC: Proper Declaration/PE Nesting ] 6600: * Parameter-entity replacement text must be properly nested with 6601: * markup declarations. That is to say, if either the first character 6602: * or the last character of a markup declaration (markupdecl above) is 6603: * contained in the replacement text for a parameter-entity reference, 6604: * both must be contained in the same replacement text. 6605: * 6606: * [ WFC: PEs in Internal Subset ] 6607: * In the internal DTD subset, parameter-entity references can occur 6608: * only where markup declarations can occur, not within markup declarations. 6609: * (This does not apply to references that occur in external parameter 6610: * entities or to the external subset.) 6611: */ 6612: void 6613: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6614: GROW; 6615: if (CUR == '<') { 6616: if (NXT(1) == '!') { 6617: switch (NXT(2)) { 6618: case 'E': 6619: if (NXT(3) == 'L') 6620: xmlParseElementDecl(ctxt); 6621: else if (NXT(3) == 'N') 6622: xmlParseEntityDecl(ctxt); 6623: break; 6624: case 'A': 6625: xmlParseAttributeListDecl(ctxt); 6626: break; 6627: case 'N': 6628: xmlParseNotationDecl(ctxt); 6629: break; 6630: case '-': 6631: xmlParseComment(ctxt); 6632: break; 6633: default: 6634: /* there is an error but it will be detected later */ 6635: break; 6636: } 6637: } else if (NXT(1) == '?') { 6638: xmlParsePI(ctxt); 6639: } 6640: } 6641: /* 6642: * This is only for internal subset. On external entities, 6643: * the replacement is done before parsing stage 6644: */ 6645: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6646: xmlParsePEReference(ctxt); 6647: 6648: /* 6649: * Conditional sections are allowed from entities included 6650: * by PE References in the internal subset. 6651: */ 6652: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6653: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6654: xmlParseConditionalSections(ctxt); 6655: } 6656: } 6657: 6658: ctxt->instate = XML_PARSER_DTD; 6659: } 6660: 6661: /** 6662: * xmlParseTextDecl: 6663: * @ctxt: an XML parser context 6664: * 6665: * parse an XML declaration header for external entities 6666: * 6667: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6668: */ 6669: 6670: void 6671: xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6672: xmlChar *version; 6673: const xmlChar *encoding; 6674: 6675: /* 6676: * We know that '<?xml' is here. 6677: */ 6678: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6679: SKIP(5); 6680: } else { 6681: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6682: return; 6683: } 6684: 6685: if (!IS_BLANK_CH(CUR)) { 6686: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6687: "Space needed after '<?xml'\n"); 6688: } 6689: SKIP_BLANKS; 6690: 6691: /* 6692: * We may have the VersionInfo here. 6693: */ 6694: version = xmlParseVersionInfo(ctxt); 6695: if (version == NULL) 6696: version = xmlCharStrdup(XML_DEFAULT_VERSION); 6697: else { 6698: if (!IS_BLANK_CH(CUR)) { 6699: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6700: "Space needed here\n"); 6701: } 6702: } 6703: ctxt->input->version = version; 6704: 6705: /* 6706: * We must have the encoding declaration 6707: */ 6708: encoding = xmlParseEncodingDecl(ctxt); 6709: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6710: /* 6711: * The XML REC instructs us to stop parsing right here 6712: */ 6713: return; 6714: } 6715: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6716: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6717: "Missing encoding in text declaration\n"); 6718: } 6719: 6720: SKIP_BLANKS; 6721: if ((RAW == '?') && (NXT(1) == '>')) { 6722: SKIP(2); 6723: } else if (RAW == '>') { 6724: /* Deprecated old WD ... */ 6725: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6726: NEXT; 6727: } else { 6728: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6729: MOVETO_ENDTAG(CUR_PTR); 6730: NEXT; 6731: } 6732: } 6733: 6734: /** 6735: * xmlParseExternalSubset: 6736: * @ctxt: an XML parser context 6737: * @ExternalID: the external identifier 6738: * @SystemID: the system identifier (or URL) 6739: * 6740: * parse Markup declarations from an external subset 6741: * 6742: * [30] extSubset ::= textDecl? extSubsetDecl 6743: * 6744: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6745: */ 6746: void 6747: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6748: const xmlChar *SystemID) { 6749: xmlDetectSAX2(ctxt); 6750: GROW; 6751: 6752: if ((ctxt->encoding == NULL) && 6753: (ctxt->input->end - ctxt->input->cur >= 4)) { 6754: xmlChar start[4]; 6755: xmlCharEncoding enc; 6756: 6757: start[0] = RAW; 6758: start[1] = NXT(1); 6759: start[2] = NXT(2); 6760: start[3] = NXT(3); 6761: enc = xmlDetectCharEncoding(start, 4); 6762: if (enc != XML_CHAR_ENCODING_NONE) 6763: xmlSwitchEncoding(ctxt, enc); 6764: } 6765: 6766: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6767: xmlParseTextDecl(ctxt); 6768: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6769: /* 6770: * The XML REC instructs us to stop parsing right here 6771: */ 6772: ctxt->instate = XML_PARSER_EOF; 6773: return; 6774: } 6775: } 6776: if (ctxt->myDoc == NULL) { 6777: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6778: if (ctxt->myDoc == NULL) { 6779: xmlErrMemory(ctxt, "New Doc failed"); 6780: return; 6781: } 6782: ctxt->myDoc->properties = XML_DOC_INTERNAL; 6783: } 6784: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6785: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6786: 6787: ctxt->instate = XML_PARSER_DTD; 6788: ctxt->external = 1; 6789: while (((RAW == '<') && (NXT(1) == '?')) || 6790: ((RAW == '<') && (NXT(1) == '!')) || 6791: (RAW == '%') || IS_BLANK_CH(CUR)) { 6792: const xmlChar *check = CUR_PTR; 6793: unsigned int cons = ctxt->input->consumed; 6794: 6795: GROW; 6796: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6797: xmlParseConditionalSections(ctxt); 6798: } else if (IS_BLANK_CH(CUR)) { 6799: NEXT; 6800: } else if (RAW == '%') { 6801: xmlParsePEReference(ctxt); 6802: } else 6803: xmlParseMarkupDecl(ctxt); 6804: 6805: /* 6806: * Pop-up of finished entities. 6807: */ 6808: while ((RAW == 0) && (ctxt->inputNr > 1)) 6809: xmlPopInput(ctxt); 6810: 6811: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6812: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6813: break; 6814: } 6815: } 6816: 6817: if (RAW != 0) { 6818: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6819: } 6820: 6821: } 6822: 6823: /** 6824: * xmlParseReference: 6825: * @ctxt: an XML parser context 6826: * 6827: * parse and handle entity references in content, depending on the SAX 6828: * interface, this may end-up in a call to character() if this is a 6829: * CharRef, a predefined entity, if there is no reference() callback. 6830: * or if the parser was asked to switch to that mode. 6831: * 6832: * [67] Reference ::= EntityRef | CharRef 6833: */ 6834: void 6835: xmlParseReference(xmlParserCtxtPtr ctxt) { 6836: xmlEntityPtr ent; 6837: xmlChar *val; 6838: int was_checked; 6839: xmlNodePtr list = NULL; 6840: xmlParserErrors ret = XML_ERR_OK; 6841: 6842: 6843: if (RAW != '&') 6844: return; 6845: 6846: /* 6847: * Simple case of a CharRef 6848: */ 6849: if (NXT(1) == '#') { 6850: int i = 0; 6851: xmlChar out[10]; 6852: int hex = NXT(2); 6853: int value = xmlParseCharRef(ctxt); 6854: 6855: if (value == 0) 6856: return; 6857: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6858: /* 6859: * So we are using non-UTF-8 buffers 6860: * Check that the char fit on 8bits, if not 6861: * generate a CharRef. 6862: */ 6863: if (value <= 0xFF) { 6864: out[0] = value; 6865: out[1] = 0; 6866: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6867: (!ctxt->disableSAX)) 6868: ctxt->sax->characters(ctxt->userData, out, 1); 6869: } else { 6870: if ((hex == 'x') || (hex == 'X')) 6871: snprintf((char *)out, sizeof(out), "#x%X", value); 6872: else 6873: snprintf((char *)out, sizeof(out), "#%d", value); 6874: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6875: (!ctxt->disableSAX)) 6876: ctxt->sax->reference(ctxt->userData, out); 6877: } 6878: } else { 6879: /* 6880: * Just encode the value in UTF-8 6881: */ 6882: COPY_BUF(0 ,out, i, value); 6883: out[i] = 0; 6884: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6885: (!ctxt->disableSAX)) 6886: ctxt->sax->characters(ctxt->userData, out, i); 6887: } 6888: return; 6889: } 6890: 6891: /* 6892: * We are seeing an entity reference 6893: */ 6894: ent = xmlParseEntityRef(ctxt); 6895: if (ent == NULL) return; 6896: if (!ctxt->wellFormed) 6897: return; 6898: was_checked = ent->checked; 6899: 6900: /* special case of predefined entities */ 6901: if ((ent->name == NULL) || 6902: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6903: val = ent->content; 6904: if (val == NULL) return; 6905: /* 6906: * inline the entity. 6907: */ 6908: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6909: (!ctxt->disableSAX)) 6910: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6911: return; 6912: } 6913: 6914: /* 6915: * The first reference to the entity trigger a parsing phase 6916: * where the ent->children is filled with the result from 6917: * the parsing. 6918: */ 6919: if (ent->checked == 0) { 6920: unsigned long oldnbent = ctxt->nbentities; 6921: 6922: /* 6923: * This is a bit hackish but this seems the best 6924: * way to make sure both SAX and DOM entity support 6925: * behaves okay. 6926: */ 6927: void *user_data; 6928: if (ctxt->userData == ctxt) 6929: user_data = NULL; 6930: else 6931: user_data = ctxt->userData; 6932: 6933: /* 6934: * Check that this entity is well formed 6935: * 4.3.2: An internal general parsed entity is well-formed 6936: * if its replacement text matches the production labeled 6937: * content. 6938: */ 6939: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6940: ctxt->depth++; 6941: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6942: user_data, &list); 6943: ctxt->depth--; 6944: 6945: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6946: ctxt->depth++; 6947: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6948: user_data, ctxt->depth, ent->URI, 6949: ent->ExternalID, &list); 6950: ctxt->depth--; 6951: } else { 6952: ret = XML_ERR_ENTITY_PE_INTERNAL; 6953: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6954: "invalid entity type found\n", NULL); 6955: } 6956: 6957: /* 6958: * Store the number of entities needing parsing for this entity 6959: * content and do checkings 6960: */ 6961: ent->checked = ctxt->nbentities - oldnbent; 6962: if (ret == XML_ERR_ENTITY_LOOP) { 6963: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6964: xmlFreeNodeList(list); 6965: return; 6966: } 6967: if (xmlParserEntityCheck(ctxt, 0, ent)) { 6968: xmlFreeNodeList(list); 6969: return; 6970: } 6971: 6972: if ((ret == XML_ERR_OK) && (list != NULL)) { 6973: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6974: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6975: (ent->children == NULL)) { 6976: ent->children = list; 6977: if (ctxt->replaceEntities) { 6978: /* 6979: * Prune it directly in the generated document 6980: * except for single text nodes. 6981: */ 6982: if (((list->type == XML_TEXT_NODE) && 6983: (list->next == NULL)) || 6984: (ctxt->parseMode == XML_PARSE_READER)) { 6985: list->parent = (xmlNodePtr) ent; 6986: list = NULL; 6987: ent->owner = 1; 6988: } else { 6989: ent->owner = 0; 6990: while (list != NULL) { 6991: list->parent = (xmlNodePtr) ctxt->node; 6992: list->doc = ctxt->myDoc; 6993: if (list->next == NULL) 6994: ent->last = list; 6995: list = list->next; 6996: } 6997: list = ent->children; 6998: #ifdef LIBXML_LEGACY_ENABLED 6999: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7000: xmlAddEntityReference(ent, list, NULL); 7001: #endif /* LIBXML_LEGACY_ENABLED */ 7002: } 7003: } else { 7004: ent->owner = 1; 7005: while (list != NULL) { 7006: list->parent = (xmlNodePtr) ent; 7007: xmlSetTreeDoc(list, ent->doc); 7008: if (list->next == NULL) 7009: ent->last = list; 7010: list = list->next; 7011: } 7012: } 7013: } else { 7014: xmlFreeNodeList(list); 7015: list = NULL; 7016: } 7017: } else if ((ret != XML_ERR_OK) && 7018: (ret != XML_WAR_UNDECLARED_ENTITY)) { 7019: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7020: "Entity '%s' failed to parse\n", ent->name); 7021: } else if (list != NULL) { 7022: xmlFreeNodeList(list); 7023: list = NULL; 7024: } 7025: if (ent->checked == 0) 7026: ent->checked = 1; 7027: } else if (ent->checked != 1) { 7028: ctxt->nbentities += ent->checked; 7029: } 7030: 7031: /* 7032: * Now that the entity content has been gathered 7033: * provide it to the application, this can take different forms based 7034: * on the parsing modes. 7035: */ 7036: if (ent->children == NULL) { 7037: /* 7038: * Probably running in SAX mode and the callbacks don't 7039: * build the entity content. So unless we already went 7040: * though parsing for first checking go though the entity 7041: * content to generate callbacks associated to the entity 7042: */ 7043: if (was_checked != 0) { 7044: void *user_data; 7045: /* 7046: * This is a bit hackish but this seems the best 7047: * way to make sure both SAX and DOM entity support 7048: * behaves okay. 7049: */ 7050: if (ctxt->userData == ctxt) 7051: user_data = NULL; 7052: else 7053: user_data = ctxt->userData; 7054: 7055: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7056: ctxt->depth++; 7057: ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7058: ent->content, user_data, NULL); 7059: ctxt->depth--; 7060: } else if (ent->etype == 7061: XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7062: ctxt->depth++; 7063: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7064: ctxt->sax, user_data, ctxt->depth, 7065: ent->URI, ent->ExternalID, NULL); 7066: ctxt->depth--; 7067: } else { 7068: ret = XML_ERR_ENTITY_PE_INTERNAL; 7069: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7070: "invalid entity type found\n", NULL); 7071: } 7072: if (ret == XML_ERR_ENTITY_LOOP) { 7073: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7074: return; 7075: } 7076: } 7077: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7078: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7079: /* 7080: * Entity reference callback comes second, it's somewhat 7081: * superfluous but a compatibility to historical behaviour 7082: */ 7083: ctxt->sax->reference(ctxt->userData, ent->name); 7084: } 7085: return; 7086: } 7087: 7088: /* 7089: * If we didn't get any children for the entity being built 7090: */ 7091: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7092: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7093: /* 7094: * Create a node. 7095: */ 7096: ctxt->sax->reference(ctxt->userData, ent->name); 7097: return; 7098: } 7099: 7100: if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7101: /* 7102: * There is a problem on the handling of _private for entities 7103: * (bug 155816): Should we copy the content of the field from 7104: * the entity (possibly overwriting some value set by the user 7105: * when a copy is created), should we leave it alone, or should 7106: * we try to take care of different situations? The problem 7107: * is exacerbated by the usage of this field by the xmlReader. 7108: * To fix this bug, we look at _private on the created node 7109: * and, if it's NULL, we copy in whatever was in the entity. 7110: * If it's not NULL we leave it alone. This is somewhat of a 7111: * hack - maybe we should have further tests to determine 7112: * what to do. 7113: */ 7114: if ((ctxt->node != NULL) && (ent->children != NULL)) { 7115: /* 7116: * Seems we are generating the DOM content, do 7117: * a simple tree copy for all references except the first 7118: * In the first occurrence list contains the replacement. 7119: * progressive == 2 means we are operating on the Reader 7120: * and since nodes are discarded we must copy all the time. 7121: */ 7122: if (((list == NULL) && (ent->owner == 0)) || 7123: (ctxt->parseMode == XML_PARSE_READER)) { 7124: xmlNodePtr nw = NULL, cur, firstChild = NULL; 7125: 7126: /* 7127: * when operating on a reader, the entities definitions 7128: * are always owning the entities subtree. 7129: if (ctxt->parseMode == XML_PARSE_READER) 7130: ent->owner = 1; 7131: */ 7132: 7133: cur = ent->children; 7134: while (cur != NULL) { 7135: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7136: if (nw != NULL) { 7137: if (nw->_private == NULL) 7138: nw->_private = cur->_private; 7139: if (firstChild == NULL){ 7140: firstChild = nw; 7141: } 7142: nw = xmlAddChild(ctxt->node, nw); 7143: } 7144: if (cur == ent->last) { 7145: /* 7146: * needed to detect some strange empty 7147: * node cases in the reader tests 7148: */ 7149: if ((ctxt->parseMode == XML_PARSE_READER) && 7150: (nw != NULL) && 7151: (nw->type == XML_ELEMENT_NODE) && 7152: (nw->children == NULL)) 7153: nw->extra = 1; 7154: 7155: break; 7156: } 7157: cur = cur->next; 7158: } 7159: #ifdef LIBXML_LEGACY_ENABLED 7160: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7161: xmlAddEntityReference(ent, firstChild, nw); 7162: #endif /* LIBXML_LEGACY_ENABLED */ 7163: } else if (list == NULL) { 7164: xmlNodePtr nw = NULL, cur, next, last, 7165: firstChild = NULL; 7166: /* 7167: * Copy the entity child list and make it the new 7168: * entity child list. The goal is to make sure any 7169: * ID or REF referenced will be the one from the 7170: * document content and not the entity copy. 7171: */ 7172: cur = ent->children; 7173: ent->children = NULL; 7174: last = ent->last; 7175: ent->last = NULL; 7176: while (cur != NULL) { 7177: next = cur->next; 7178: cur->next = NULL; 7179: cur->parent = NULL; 7180: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7181: if (nw != NULL) { 7182: if (nw->_private == NULL) 7183: nw->_private = cur->_private; 7184: if (firstChild == NULL){ 7185: firstChild = cur; 7186: } 7187: xmlAddChild((xmlNodePtr) ent, nw); 7188: xmlAddChild(ctxt->node, cur); 7189: } 7190: if (cur == last) 7191: break; 7192: cur = next; 7193: } 7194: if (ent->owner == 0) 7195: ent->owner = 1; 7196: #ifdef LIBXML_LEGACY_ENABLED 7197: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7198: xmlAddEntityReference(ent, firstChild, nw); 7199: #endif /* LIBXML_LEGACY_ENABLED */ 7200: } else { 7201: const xmlChar *nbktext; 7202: 7203: /* 7204: * the name change is to avoid coalescing of the 7205: * node with a possible previous text one which 7206: * would make ent->children a dangling pointer 7207: */ 7208: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7209: -1); 7210: if (ent->children->type == XML_TEXT_NODE) 7211: ent->children->name = nbktext; 7212: if ((ent->last != ent->children) && 7213: (ent->last->type == XML_TEXT_NODE)) 7214: ent->last->name = nbktext; 7215: xmlAddChildList(ctxt->node, ent->children); 7216: } 7217: 7218: /* 7219: * This is to avoid a nasty side effect, see 7220: * characters() in SAX.c 7221: */ 7222: ctxt->nodemem = 0; 7223: ctxt->nodelen = 0; 7224: return; 7225: } 7226: } 7227: } 7228: 7229: /** 7230: * xmlParseEntityRef: 7231: * @ctxt: an XML parser context 7232: * 7233: * parse ENTITY references declarations 7234: * 7235: * [68] EntityRef ::= '&' Name ';' 7236: * 7237: * [ WFC: Entity Declared ] 7238: * In a document without any DTD, a document with only an internal DTD 7239: * subset which contains no parameter entity references, or a document 7240: * with "standalone='yes'", the Name given in the entity reference 7241: * must match that in an entity declaration, except that well-formed 7242: * documents need not declare any of the following entities: amp, lt, 7243: * gt, apos, quot. The declaration of a parameter entity must precede 7244: * any reference to it. Similarly, the declaration of a general entity 7245: * must precede any reference to it which appears in a default value in an 7246: * attribute-list declaration. Note that if entities are declared in the 7247: * external subset or in external parameter entities, a non-validating 7248: * processor is not obligated to read and process their declarations; 7249: * for such documents, the rule that an entity must be declared is a 7250: * well-formedness constraint only if standalone='yes'. 7251: * 7252: * [ WFC: Parsed Entity ] 7253: * An entity reference must not contain the name of an unparsed entity 7254: * 7255: * Returns the xmlEntityPtr if found, or NULL otherwise. 7256: */ 7257: xmlEntityPtr 7258: xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7259: const xmlChar *name; 7260: xmlEntityPtr ent = NULL; 7261: 7262: GROW; 7263: 7264: if (RAW != '&') 7265: return(NULL); 7266: NEXT; 7267: name = xmlParseName(ctxt); 7268: if (name == NULL) { 7269: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7270: "xmlParseEntityRef: no name\n"); 7271: return(NULL); 7272: } 7273: if (RAW != ';') { 7274: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7275: return(NULL); 7276: } 7277: NEXT; 7278: 7279: /* 7280: * Predefined entites override any extra definition 7281: */ 7282: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7283: ent = xmlGetPredefinedEntity(name); 7284: if (ent != NULL) 7285: return(ent); 7286: } 7287: 7288: /* 7289: * Increate the number of entity references parsed 7290: */ 7291: ctxt->nbentities++; 7292: 7293: /* 7294: * Ask first SAX for entity resolution, otherwise try the 7295: * entities which may have stored in the parser context. 7296: */ 7297: if (ctxt->sax != NULL) { 7298: if (ctxt->sax->getEntity != NULL) 7299: ent = ctxt->sax->getEntity(ctxt->userData, name); 7300: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7301: (ctxt->options & XML_PARSE_OLDSAX)) 7302: ent = xmlGetPredefinedEntity(name); 7303: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7304: (ctxt->userData==ctxt)) { 7305: ent = xmlSAX2GetEntity(ctxt, name); 7306: } 7307: } 7308: /* 7309: * [ WFC: Entity Declared ] 7310: * In a document without any DTD, a document with only an 7311: * internal DTD subset which contains no parameter entity 7312: * references, or a document with "standalone='yes'", the 7313: * Name given in the entity reference must match that in an 7314: * entity declaration, except that well-formed documents 7315: * need not declare any of the following entities: amp, lt, 7316: * gt, apos, quot. 7317: * The declaration of a parameter entity must precede any 7318: * reference to it. 7319: * Similarly, the declaration of a general entity must 7320: * precede any reference to it which appears in a default 7321: * value in an attribute-list declaration. Note that if 7322: * entities are declared in the external subset or in 7323: * external parameter entities, a non-validating processor 7324: * is not obligated to read and process their declarations; 7325: * for such documents, the rule that an entity must be 7326: * declared is a well-formedness constraint only if 7327: * standalone='yes'. 7328: */ 7329: if (ent == NULL) { 7330: if ((ctxt->standalone == 1) || 7331: ((ctxt->hasExternalSubset == 0) && 7332: (ctxt->hasPErefs == 0))) { 7333: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7334: "Entity '%s' not defined\n", name); 7335: } else { 7336: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7337: "Entity '%s' not defined\n", name); 7338: if ((ctxt->inSubset == 0) && 7339: (ctxt->sax != NULL) && 7340: (ctxt->sax->reference != NULL)) { 7341: ctxt->sax->reference(ctxt->userData, name); 7342: } 7343: } 7344: ctxt->valid = 0; 7345: } 7346: 7347: /* 7348: * [ WFC: Parsed Entity ] 7349: * An entity reference must not contain the name of an 7350: * unparsed entity 7351: */ 7352: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7353: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7354: "Entity reference to unparsed entity %s\n", name); 7355: } 7356: 7357: /* 7358: * [ WFC: No External Entity References ] 7359: * Attribute values cannot contain direct or indirect 7360: * entity references to external entities. 7361: */ 7362: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7363: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7364: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7365: "Attribute references external entity '%s'\n", name); 7366: } 7367: /* 7368: * [ WFC: No < in Attribute Values ] 7369: * The replacement text of any entity referred to directly or 7370: * indirectly in an attribute value (other than "<") must 7371: * not contain a <. 7372: */ 7373: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7374: (ent != NULL) && (ent->content != NULL) && 7375: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7376: (xmlStrchr(ent->content, '<'))) { 7377: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7378: "'<' in entity '%s' is not allowed in attributes values\n", name); 7379: } 7380: 7381: /* 7382: * Internal check, no parameter entities here ... 7383: */ 7384: else { 7385: switch (ent->etype) { 7386: case XML_INTERNAL_PARAMETER_ENTITY: 7387: case XML_EXTERNAL_PARAMETER_ENTITY: 7388: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7389: "Attempt to reference the parameter entity '%s'\n", 7390: name); 7391: break; 7392: default: 7393: break; 7394: } 7395: } 7396: 7397: /* 7398: * [ WFC: No Recursion ] 7399: * A parsed entity must not contain a recursive reference 7400: * to itself, either directly or indirectly. 7401: * Done somewhere else 7402: */ 7403: return(ent); 7404: } 7405: 7406: /** 7407: * xmlParseStringEntityRef: 7408: * @ctxt: an XML parser context 7409: * @str: a pointer to an index in the string 7410: * 7411: * parse ENTITY references declarations, but this version parses it from 7412: * a string value. 7413: * 7414: * [68] EntityRef ::= '&' Name ';' 7415: * 7416: * [ WFC: Entity Declared ] 7417: * In a document without any DTD, a document with only an internal DTD 7418: * subset which contains no parameter entity references, or a document 7419: * with "standalone='yes'", the Name given in the entity reference 7420: * must match that in an entity declaration, except that well-formed 7421: * documents need not declare any of the following entities: amp, lt, 7422: * gt, apos, quot. The declaration of a parameter entity must precede 7423: * any reference to it. Similarly, the declaration of a general entity 7424: * must precede any reference to it which appears in a default value in an 7425: * attribute-list declaration. Note that if entities are declared in the 7426: * external subset or in external parameter entities, a non-validating 7427: * processor is not obligated to read and process their declarations; 7428: * for such documents, the rule that an entity must be declared is a 7429: * well-formedness constraint only if standalone='yes'. 7430: * 7431: * [ WFC: Parsed Entity ] 7432: * An entity reference must not contain the name of an unparsed entity 7433: * 7434: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7435: * is updated to the current location in the string. 7436: */ 7437: static xmlEntityPtr 7438: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7439: xmlChar *name; 7440: const xmlChar *ptr; 7441: xmlChar cur; 7442: xmlEntityPtr ent = NULL; 7443: 7444: if ((str == NULL) || (*str == NULL)) 7445: return(NULL); 7446: ptr = *str; 7447: cur = *ptr; 7448: if (cur != '&') 7449: return(NULL); 7450: 7451: ptr++; 7452: name = xmlParseStringName(ctxt, &ptr); 7453: if (name == NULL) { 7454: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7455: "xmlParseStringEntityRef: no name\n"); 7456: *str = ptr; 7457: return(NULL); 7458: } 7459: if (*ptr != ';') { 7460: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7461: xmlFree(name); 7462: *str = ptr; 7463: return(NULL); 7464: } 7465: ptr++; 7466: 7467: 7468: /* 7469: * Predefined entites override any extra definition 7470: */ 7471: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7472: ent = xmlGetPredefinedEntity(name); 7473: if (ent != NULL) { 7474: xmlFree(name); 7475: *str = ptr; 7476: return(ent); 7477: } 7478: } 7479: 7480: /* 7481: * Increate the number of entity references parsed 7482: */ 7483: ctxt->nbentities++; 7484: 7485: /* 7486: * Ask first SAX for entity resolution, otherwise try the 7487: * entities which may have stored in the parser context. 7488: */ 7489: if (ctxt->sax != NULL) { 7490: if (ctxt->sax->getEntity != NULL) 7491: ent = ctxt->sax->getEntity(ctxt->userData, name); 7492: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7493: ent = xmlGetPredefinedEntity(name); 7494: if ((ent == NULL) && (ctxt->userData==ctxt)) { 7495: ent = xmlSAX2GetEntity(ctxt, name); 7496: } 7497: } 7498: 7499: /* 7500: * [ WFC: Entity Declared ] 7501: * In a document without any DTD, a document with only an 7502: * internal DTD subset which contains no parameter entity 7503: * references, or a document with "standalone='yes'", the 7504: * Name given in the entity reference must match that in an 7505: * entity declaration, except that well-formed documents 7506: * need not declare any of the following entities: amp, lt, 7507: * gt, apos, quot. 7508: * The declaration of a parameter entity must precede any 7509: * reference to it. 7510: * Similarly, the declaration of a general entity must 7511: * precede any reference to it which appears in a default 7512: * value in an attribute-list declaration. Note that if 7513: * entities are declared in the external subset or in 7514: * external parameter entities, a non-validating processor 7515: * is not obligated to read and process their declarations; 7516: * for such documents, the rule that an entity must be 7517: * declared is a well-formedness constraint only if 7518: * standalone='yes'. 7519: */ 7520: if (ent == NULL) { 7521: if ((ctxt->standalone == 1) || 7522: ((ctxt->hasExternalSubset == 0) && 7523: (ctxt->hasPErefs == 0))) { 7524: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7525: "Entity '%s' not defined\n", name); 7526: } else { 7527: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7528: "Entity '%s' not defined\n", 7529: name); 7530: } 7531: /* TODO ? check regressions ctxt->valid = 0; */ 7532: } 7533: 7534: /* 7535: * [ WFC: Parsed Entity ] 7536: * An entity reference must not contain the name of an 7537: * unparsed entity 7538: */ 7539: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7540: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7541: "Entity reference to unparsed entity %s\n", name); 7542: } 7543: 7544: /* 7545: * [ WFC: No External Entity References ] 7546: * Attribute values cannot contain direct or indirect 7547: * entity references to external entities. 7548: */ 7549: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7550: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7551: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7552: "Attribute references external entity '%s'\n", name); 7553: } 7554: /* 7555: * [ WFC: No < in Attribute Values ] 7556: * The replacement text of any entity referred to directly or 7557: * indirectly in an attribute value (other than "<") must 7558: * not contain a <. 7559: */ 7560: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7561: (ent != NULL) && (ent->content != NULL) && 7562: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7563: (xmlStrchr(ent->content, '<'))) { 7564: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7565: "'<' in entity '%s' is not allowed in attributes values\n", 7566: name); 7567: } 7568: 7569: /* 7570: * Internal check, no parameter entities here ... 7571: */ 7572: else { 7573: switch (ent->etype) { 7574: case XML_INTERNAL_PARAMETER_ENTITY: 7575: case XML_EXTERNAL_PARAMETER_ENTITY: 7576: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7577: "Attempt to reference the parameter entity '%s'\n", 7578: name); 7579: break; 7580: default: 7581: break; 7582: } 7583: } 7584: 7585: /* 7586: * [ WFC: No Recursion ] 7587: * A parsed entity must not contain a recursive reference 7588: * to itself, either directly or indirectly. 7589: * Done somewhere else 7590: */ 7591: 7592: xmlFree(name); 7593: *str = ptr; 7594: return(ent); 7595: } 7596: 7597: /** 7598: * xmlParsePEReference: 7599: * @ctxt: an XML parser context 7600: * 7601: * parse PEReference declarations 7602: * The entity content is handled directly by pushing it's content as 7603: * a new input stream. 7604: * 7605: * [69] PEReference ::= '%' Name ';' 7606: * 7607: * [ WFC: No Recursion ] 7608: * A parsed entity must not contain a recursive 7609: * reference to itself, either directly or indirectly. 7610: * 7611: * [ WFC: Entity Declared ] 7612: * In a document without any DTD, a document with only an internal DTD 7613: * subset which contains no parameter entity references, or a document 7614: * with "standalone='yes'", ... ... The declaration of a parameter 7615: * entity must precede any reference to it... 7616: * 7617: * [ VC: Entity Declared ] 7618: * In a document with an external subset or external parameter entities 7619: * with "standalone='no'", ... ... The declaration of a parameter entity 7620: * must precede any reference to it... 7621: * 7622: * [ WFC: In DTD ] 7623: * Parameter-entity references may only appear in the DTD. 7624: * NOTE: misleading but this is handled. 7625: */ 7626: void 7627: xmlParsePEReference(xmlParserCtxtPtr ctxt) 7628: { 7629: const xmlChar *name; 7630: xmlEntityPtr entity = NULL; 7631: xmlParserInputPtr input; 7632: 7633: if (RAW != '%') 7634: return; 7635: NEXT; 7636: name = xmlParseName(ctxt); 7637: if (name == NULL) { 7638: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7639: "xmlParsePEReference: no name\n"); 7640: return; 7641: } 7642: if (RAW != ';') { 7643: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7644: return; 7645: } 7646: 7647: NEXT; 7648: 7649: /* 7650: * Increate the number of entity references parsed 7651: */ 7652: ctxt->nbentities++; 7653: 7654: /* 7655: * Request the entity from SAX 7656: */ 7657: if ((ctxt->sax != NULL) && 7658: (ctxt->sax->getParameterEntity != NULL)) 7659: entity = ctxt->sax->getParameterEntity(ctxt->userData, 7660: name); 7661: if (entity == NULL) { 7662: /* 7663: * [ WFC: Entity Declared ] 7664: * In a document without any DTD, a document with only an 7665: * internal DTD subset which contains no parameter entity 7666: * references, or a document with "standalone='yes'", ... 7667: * ... The declaration of a parameter entity must precede 7668: * any reference to it... 7669: */ 7670: if ((ctxt->standalone == 1) || 7671: ((ctxt->hasExternalSubset == 0) && 7672: (ctxt->hasPErefs == 0))) { 7673: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7674: "PEReference: %%%s; not found\n", 7675: name); 7676: } else { 7677: /* 7678: * [ VC: Entity Declared ] 7679: * In a document with an external subset or external 7680: * parameter entities with "standalone='no'", ... 7681: * ... The declaration of a parameter entity must 7682: * precede any reference to it... 7683: */ 7684: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7685: "PEReference: %%%s; not found\n", 7686: name, NULL); 7687: ctxt->valid = 0; 7688: } 7689: } else { 7690: /* 7691: * Internal checking in case the entity quest barfed 7692: */ 7693: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7694: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7695: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7696: "Internal: %%%s; is not a parameter entity\n", 7697: name, NULL); 7698: } else if (ctxt->input->free != deallocblankswrapper) { 7699: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7700: if (xmlPushInput(ctxt, input) < 0) 7701: return; 7702: } else { 7703: /* 7704: * TODO !!! 7705: * handle the extra spaces added before and after 7706: * c.f. http://www.w3.org/TR/REC-xml#as-PE 7707: */ 7708: input = xmlNewEntityInputStream(ctxt, entity); 7709: if (xmlPushInput(ctxt, input) < 0) 7710: return; 7711: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7712: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7713: (IS_BLANK_CH(NXT(5)))) { 7714: xmlParseTextDecl(ctxt); 7715: if (ctxt->errNo == 7716: XML_ERR_UNSUPPORTED_ENCODING) { 7717: /* 7718: * The XML REC instructs us to stop parsing 7719: * right here 7720: */ 7721: ctxt->instate = XML_PARSER_EOF; 7722: return; 7723: } 7724: } 7725: } 7726: } 7727: ctxt->hasPErefs = 1; 7728: } 7729: 7730: /** 7731: * xmlLoadEntityContent: 7732: * @ctxt: an XML parser context 7733: * @entity: an unloaded system entity 7734: * 7735: * Load the original content of the given system entity from the 7736: * ExternalID/SystemID given. This is to be used for Included in Literal 7737: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7738: * 7739: * Returns 0 in case of success and -1 in case of failure 7740: */ 7741: static int 7742: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7743: xmlParserInputPtr input; 7744: xmlBufferPtr buf; 7745: int l, c; 7746: int count = 0; 7747: 7748: if ((ctxt == NULL) || (entity == NULL) || 7749: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7750: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7751: (entity->content != NULL)) { 7752: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7753: "xmlLoadEntityContent parameter error"); 7754: return(-1); 7755: } 7756: 7757: if (xmlParserDebugEntities) 7758: xmlGenericError(xmlGenericErrorContext, 7759: "Reading %s entity content input\n", entity->name); 7760: 7761: buf = xmlBufferCreate(); 7762: if (buf == NULL) { 7763: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7764: "xmlLoadEntityContent parameter error"); 7765: return(-1); 7766: } 7767: 7768: input = xmlNewEntityInputStream(ctxt, entity); 7769: if (input == NULL) { 7770: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7771: "xmlLoadEntityContent input error"); 7772: xmlBufferFree(buf); 7773: return(-1); 7774: } 7775: 7776: /* 7777: * Push the entity as the current input, read char by char 7778: * saving to the buffer until the end of the entity or an error 7779: */ 7780: if (xmlPushInput(ctxt, input) < 0) { 7781: xmlBufferFree(buf); 7782: return(-1); 7783: } 7784: 7785: GROW; 7786: c = CUR_CHAR(l); 7787: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7788: (IS_CHAR(c))) { 7789: xmlBufferAdd(buf, ctxt->input->cur, l); 7790: if (count++ > 100) { 7791: count = 0; 7792: GROW; 7793: } 7794: NEXTL(l); 7795: c = CUR_CHAR(l); 7796: } 7797: 7798: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7799: xmlPopInput(ctxt); 7800: } else if (!IS_CHAR(c)) { 7801: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7802: "xmlLoadEntityContent: invalid char value %d\n", 7803: c); 7804: xmlBufferFree(buf); 7805: return(-1); 7806: } 7807: entity->content = buf->content; 7808: buf->content = NULL; 7809: xmlBufferFree(buf); 7810: 7811: return(0); 7812: } 7813: 7814: /** 7815: * xmlParseStringPEReference: 7816: * @ctxt: an XML parser context 7817: * @str: a pointer to an index in the string 7818: * 7819: * parse PEReference declarations 7820: * 7821: * [69] PEReference ::= '%' Name ';' 7822: * 7823: * [ WFC: No Recursion ] 7824: * A parsed entity must not contain a recursive 7825: * reference to itself, either directly or indirectly. 7826: * 7827: * [ WFC: Entity Declared ] 7828: * In a document without any DTD, a document with only an internal DTD 7829: * subset which contains no parameter entity references, or a document 7830: * with "standalone='yes'", ... ... The declaration of a parameter 7831: * entity must precede any reference to it... 7832: * 7833: * [ VC: Entity Declared ] 7834: * In a document with an external subset or external parameter entities 7835: * with "standalone='no'", ... ... The declaration of a parameter entity 7836: * must precede any reference to it... 7837: * 7838: * [ WFC: In DTD ] 7839: * Parameter-entity references may only appear in the DTD. 7840: * NOTE: misleading but this is handled. 7841: * 7842: * Returns the string of the entity content. 7843: * str is updated to the current value of the index 7844: */ 7845: static xmlEntityPtr 7846: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7847: const xmlChar *ptr; 7848: xmlChar cur; 7849: xmlChar *name; 7850: xmlEntityPtr entity = NULL; 7851: 7852: if ((str == NULL) || (*str == NULL)) return(NULL); 7853: ptr = *str; 7854: cur = *ptr; 7855: if (cur != '%') 7856: return(NULL); 7857: ptr++; 7858: name = xmlParseStringName(ctxt, &ptr); 7859: if (name == NULL) { 7860: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7861: "xmlParseStringPEReference: no name\n"); 7862: *str = ptr; 7863: return(NULL); 7864: } 7865: cur = *ptr; 7866: if (cur != ';') { 7867: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7868: xmlFree(name); 7869: *str = ptr; 7870: return(NULL); 7871: } 7872: ptr++; 7873: 7874: /* 7875: * Increate the number of entity references parsed 7876: */ 7877: ctxt->nbentities++; 7878: 7879: /* 7880: * Request the entity from SAX 7881: */ 7882: if ((ctxt->sax != NULL) && 7883: (ctxt->sax->getParameterEntity != NULL)) 7884: entity = ctxt->sax->getParameterEntity(ctxt->userData, 7885: name); 7886: if (entity == NULL) { 7887: /* 7888: * [ WFC: Entity Declared ] 7889: * In a document without any DTD, a document with only an 7890: * internal DTD subset which contains no parameter entity 7891: * references, or a document with "standalone='yes'", ... 7892: * ... The declaration of a parameter entity must precede 7893: * any reference to it... 7894: */ 7895: if ((ctxt->standalone == 1) || 7896: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7897: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7898: "PEReference: %%%s; not found\n", name); 7899: } else { 7900: /* 7901: * [ VC: Entity Declared ] 7902: * In a document with an external subset or external 7903: * parameter entities with "standalone='no'", ... 7904: * ... The declaration of a parameter entity must 7905: * precede any reference to it... 7906: */ 7907: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7908: "PEReference: %%%s; not found\n", 7909: name, NULL); 7910: ctxt->valid = 0; 7911: } 7912: } else { 7913: /* 7914: * Internal checking in case the entity quest barfed 7915: */ 7916: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7917: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7918: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7919: "%%%s; is not a parameter entity\n", 7920: name, NULL); 7921: } 7922: } 7923: ctxt->hasPErefs = 1; 7924: xmlFree(name); 7925: *str = ptr; 7926: return(entity); 7927: } 7928: 7929: /** 7930: * xmlParseDocTypeDecl: 7931: * @ctxt: an XML parser context 7932: * 7933: * parse a DOCTYPE declaration 7934: * 7935: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7936: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7937: * 7938: * [ VC: Root Element Type ] 7939: * The Name in the document type declaration must match the element 7940: * type of the root element. 7941: */ 7942: 7943: void 7944: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7945: const xmlChar *name = NULL; 7946: xmlChar *ExternalID = NULL; 7947: xmlChar *URI = NULL; 7948: 7949: /* 7950: * We know that '<!DOCTYPE' has been detected. 7951: */ 7952: SKIP(9); 7953: 7954: SKIP_BLANKS; 7955: 7956: /* 7957: * Parse the DOCTYPE name. 7958: */ 7959: name = xmlParseName(ctxt); 7960: if (name == NULL) { 7961: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7962: "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7963: } 7964: ctxt->intSubName = name; 7965: 7966: SKIP_BLANKS; 7967: 7968: /* 7969: * Check for SystemID and ExternalID 7970: */ 7971: URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7972: 7973: if ((URI != NULL) || (ExternalID != NULL)) { 7974: ctxt->hasExternalSubset = 1; 7975: } 7976: ctxt->extSubURI = URI; 7977: ctxt->extSubSystem = ExternalID; 7978: 7979: SKIP_BLANKS; 7980: 7981: /* 7982: * Create and update the internal subset. 7983: */ 7984: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7985: (!ctxt->disableSAX)) 7986: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7987: 7988: /* 7989: * Is there any internal subset declarations ? 7990: * they are handled separately in xmlParseInternalSubset() 7991: */ 7992: if (RAW == '[') 7993: return; 7994: 7995: /* 7996: * We should be at the end of the DOCTYPE declaration. 7997: */ 7998: if (RAW != '>') { 7999: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8000: } 8001: NEXT; 8002: } 8003: 8004: /** 8005: * xmlParseInternalSubset: 8006: * @ctxt: an XML parser context 8007: * 8008: * parse the internal subset declaration 8009: * 8010: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8011: */ 8012: 8013: static void 8014: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8015: /* 8016: * Is there any DTD definition ? 8017: */ 8018: if (RAW == '[') { 8019: ctxt->instate = XML_PARSER_DTD; 8020: NEXT; 8021: /* 8022: * Parse the succession of Markup declarations and 8023: * PEReferences. 8024: * Subsequence (markupdecl | PEReference | S)* 8025: */ 8026: while (RAW != ']') { 8027: const xmlChar *check = CUR_PTR; 8028: unsigned int cons = ctxt->input->consumed; 8029: 8030: SKIP_BLANKS; 8031: xmlParseMarkupDecl(ctxt); 8032: xmlParsePEReference(ctxt); 8033: 8034: /* 8035: * Pop-up of finished entities. 8036: */ 8037: while ((RAW == 0) && (ctxt->inputNr > 1)) 8038: xmlPopInput(ctxt); 8039: 8040: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8041: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8042: "xmlParseInternalSubset: error detected in Markup declaration\n"); 8043: break; 8044: } 8045: } 8046: if (RAW == ']') { 8047: NEXT; 8048: SKIP_BLANKS; 8049: } 8050: } 8051: 8052: /* 8053: * We should be at the end of the DOCTYPE declaration. 8054: */ 8055: if (RAW != '>') { 8056: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8057: } 8058: NEXT; 8059: } 8060: 8061: #ifdef LIBXML_SAX1_ENABLED 8062: /** 8063: * xmlParseAttribute: 8064: * @ctxt: an XML parser context 8065: * @value: a xmlChar ** used to store the value of the attribute 8066: * 8067: * parse an attribute 8068: * 8069: * [41] Attribute ::= Name Eq AttValue 8070: * 8071: * [ WFC: No External Entity References ] 8072: * Attribute values cannot contain direct or indirect entity references 8073: * to external entities. 8074: * 8075: * [ WFC: No < in Attribute Values ] 8076: * The replacement text of any entity referred to directly or indirectly in 8077: * an attribute value (other than "<") must not contain a <. 8078: * 8079: * [ VC: Attribute Value Type ] 8080: * The attribute must have been declared; the value must be of the type 8081: * declared for it. 8082: * 8083: * [25] Eq ::= S? '=' S? 8084: * 8085: * With namespace: 8086: * 8087: * [NS 11] Attribute ::= QName Eq AttValue 8088: * 8089: * Also the case QName == xmlns:??? is handled independently as a namespace 8090: * definition. 8091: * 8092: * Returns the attribute name, and the value in *value. 8093: */ 8094: 8095: const xmlChar * 8096: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8097: const xmlChar *name; 8098: xmlChar *val; 8099: 8100: *value = NULL; 8101: GROW; 8102: name = xmlParseName(ctxt); 8103: if (name == NULL) { 8104: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8105: "error parsing attribute name\n"); 8106: return(NULL); 8107: } 8108: 8109: /* 8110: * read the value 8111: */ 8112: SKIP_BLANKS; 8113: if (RAW == '=') { 8114: NEXT; 8115: SKIP_BLANKS; 8116: val = xmlParseAttValue(ctxt); 8117: ctxt->instate = XML_PARSER_CONTENT; 8118: } else { 8119: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8120: "Specification mandate value for attribute %s\n", name); 8121: return(NULL); 8122: } 8123: 8124: /* 8125: * Check that xml:lang conforms to the specification 8126: * No more registered as an error, just generate a warning now 8127: * since this was deprecated in XML second edition 8128: */ 8129: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8130: if (!xmlCheckLanguageID(val)) { 8131: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8132: "Malformed value for xml:lang : %s\n", 8133: val, NULL); 8134: } 8135: } 8136: 8137: /* 8138: * Check that xml:space conforms to the specification 8139: */ 8140: if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8141: if (xmlStrEqual(val, BAD_CAST "default")) 8142: *(ctxt->space) = 0; 8143: else if (xmlStrEqual(val, BAD_CAST "preserve")) 8144: *(ctxt->space) = 1; 8145: else { 8146: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8147: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8148: val, NULL); 8149: } 8150: } 8151: 8152: *value = val; 8153: return(name); 8154: } 8155: 8156: /** 8157: * xmlParseStartTag: 8158: * @ctxt: an XML parser context 8159: * 8160: * parse a start of tag either for rule element or 8161: * EmptyElement. In both case we don't parse the tag closing chars. 8162: * 8163: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8164: * 8165: * [ WFC: Unique Att Spec ] 8166: * No attribute name may appear more than once in the same start-tag or 8167: * empty-element tag. 8168: * 8169: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8170: * 8171: * [ WFC: Unique Att Spec ] 8172: * No attribute name may appear more than once in the same start-tag or 8173: * empty-element tag. 8174: * 8175: * With namespace: 8176: * 8177: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8178: * 8179: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8180: * 8181: * Returns the element name parsed 8182: */ 8183: 8184: const xmlChar * 8185: xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8186: const xmlChar *name; 8187: const xmlChar *attname; 8188: xmlChar *attvalue; 8189: const xmlChar **atts = ctxt->atts; 8190: int nbatts = 0; 8191: int maxatts = ctxt->maxatts; 8192: int i; 8193: 8194: if (RAW != '<') return(NULL); 8195: NEXT1; 8196: 8197: name = xmlParseName(ctxt); 8198: if (name == NULL) { 8199: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8200: "xmlParseStartTag: invalid element name\n"); 8201: return(NULL); 8202: } 8203: 8204: /* 8205: * Now parse the attributes, it ends up with the ending 8206: * 8207: * (S Attribute)* S? 8208: */ 8209: SKIP_BLANKS; 8210: GROW; 8211: 8212: while ((RAW != '>') && 8213: ((RAW != '/') || (NXT(1) != '>')) && 8214: (IS_BYTE_CHAR(RAW))) { 8215: const xmlChar *q = CUR_PTR; 8216: unsigned int cons = ctxt->input->consumed; 8217: 8218: attname = xmlParseAttribute(ctxt, &attvalue); 8219: if ((attname != NULL) && (attvalue != NULL)) { 8220: /* 8221: * [ WFC: Unique Att Spec ] 8222: * No attribute name may appear more than once in the same 8223: * start-tag or empty-element tag. 8224: */ 8225: for (i = 0; i < nbatts;i += 2) { 8226: if (xmlStrEqual(atts[i], attname)) { 8227: xmlErrAttributeDup(ctxt, NULL, attname); 8228: xmlFree(attvalue); 8229: goto failed; 8230: } 8231: } 8232: /* 8233: * Add the pair to atts 8234: */ 8235: if (atts == NULL) { 8236: maxatts = 22; /* allow for 10 attrs by default */ 8237: atts = (const xmlChar **) 8238: xmlMalloc(maxatts * sizeof(xmlChar *)); 8239: if (atts == NULL) { 8240: xmlErrMemory(ctxt, NULL); 8241: if (attvalue != NULL) 8242: xmlFree(attvalue); 8243: goto failed; 8244: } 8245: ctxt->atts = atts; 8246: ctxt->maxatts = maxatts; 8247: } else if (nbatts + 4 > maxatts) { 8248: const xmlChar **n; 8249: 8250: maxatts *= 2; 8251: n = (const xmlChar **) xmlRealloc((void *) atts, 8252: maxatts * sizeof(const xmlChar *)); 8253: if (n == NULL) { 8254: xmlErrMemory(ctxt, NULL); 8255: if (attvalue != NULL) 8256: xmlFree(attvalue); 8257: goto failed; 8258: } 8259: atts = n; 8260: ctxt->atts = atts; 8261: ctxt->maxatts = maxatts; 8262: } 8263: atts[nbatts++] = attname; 8264: atts[nbatts++] = attvalue; 8265: atts[nbatts] = NULL; 8266: atts[nbatts + 1] = NULL; 8267: } else { 8268: if (attvalue != NULL) 8269: xmlFree(attvalue); 8270: } 8271: 8272: failed: 8273: 8274: GROW 8275: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8276: break; 8277: if (!IS_BLANK_CH(RAW)) { 8278: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8279: "attributes construct error\n"); 8280: } 8281: SKIP_BLANKS; 8282: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8283: (attname == NULL) && (attvalue == NULL)) { 8284: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8285: "xmlParseStartTag: problem parsing attributes\n"); 8286: break; 8287: } 8288: SHRINK; 8289: GROW; 8290: } 8291: 8292: /* 8293: * SAX: Start of Element ! 8294: */ 8295: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8296: (!ctxt->disableSAX)) { 8297: if (nbatts > 0) 8298: ctxt->sax->startElement(ctxt->userData, name, atts); 8299: else 8300: ctxt->sax->startElement(ctxt->userData, name, NULL); 8301: } 8302: 8303: if (atts != NULL) { 8304: /* Free only the content strings */ 8305: for (i = 1;i < nbatts;i+=2) 8306: if (atts[i] != NULL) 8307: xmlFree((xmlChar *) atts[i]); 8308: } 8309: return(name); 8310: } 8311: 8312: /** 8313: * xmlParseEndTag1: 8314: * @ctxt: an XML parser context 8315: * @line: line of the start tag 8316: * @nsNr: number of namespaces on the start tag 8317: * 8318: * parse an end of tag 8319: * 8320: * [42] ETag ::= '</' Name S? '>' 8321: * 8322: * With namespace 8323: * 8324: * [NS 9] ETag ::= '</' QName S? '>' 8325: */ 8326: 8327: static void 8328: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8329: const xmlChar *name; 8330: 8331: GROW; 8332: if ((RAW != '<') || (NXT(1) != '/')) { 8333: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8334: "xmlParseEndTag: '</' not found\n"); 8335: return; 8336: } 8337: SKIP(2); 8338: 8339: name = xmlParseNameAndCompare(ctxt,ctxt->name); 8340: 8341: /* 8342: * We should definitely be at the ending "S? '>'" part 8343: */ 8344: GROW; 8345: SKIP_BLANKS; 8346: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8347: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8348: } else 8349: NEXT1; 8350: 8351: /* 8352: * [ WFC: Element Type Match ] 8353: * The Name in an element's end-tag must match the element type in the 8354: * start-tag. 8355: * 8356: */ 8357: if (name != (xmlChar*)1) { 8358: if (name == NULL) name = BAD_CAST "unparseable"; 8359: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8360: "Opening and ending tag mismatch: %s line %d and %s\n", 8361: ctxt->name, line, name); 8362: } 8363: 8364: /* 8365: * SAX: End of Tag 8366: */ 8367: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8368: (!ctxt->disableSAX)) 8369: ctxt->sax->endElement(ctxt->userData, ctxt->name); 8370: 8371: namePop(ctxt); 8372: spacePop(ctxt); 8373: return; 8374: } 8375: 8376: /** 8377: * xmlParseEndTag: 8378: * @ctxt: an XML parser context 8379: * 8380: * parse an end of tag 8381: * 8382: * [42] ETag ::= '</' Name S? '>' 8383: * 8384: * With namespace 8385: * 8386: * [NS 9] ETag ::= '</' QName S? '>' 8387: */ 8388: 8389: void 8390: xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8391: xmlParseEndTag1(ctxt, 0); 8392: } 8393: #endif /* LIBXML_SAX1_ENABLED */ 8394: 8395: /************************************************************************ 8396: * * 8397: * SAX 2 specific operations * 8398: * * 8399: ************************************************************************/ 8400: 8401: /* 8402: * xmlGetNamespace: 8403: * @ctxt: an XML parser context 8404: * @prefix: the prefix to lookup 8405: * 8406: * Lookup the namespace name for the @prefix (which ca be NULL) 8407: * The prefix must come from the @ctxt->dict dictionnary 8408: * 8409: * Returns the namespace name or NULL if not bound 8410: */ 8411: static const xmlChar * 8412: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8413: int i; 8414: 8415: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8416: for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8417: if (ctxt->nsTab[i] == prefix) { 8418: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8419: return(NULL); 8420: return(ctxt->nsTab[i + 1]); 8421: } 8422: return(NULL); 8423: } 8424: 8425: /** 8426: * xmlParseQName: 8427: * @ctxt: an XML parser context 8428: * @prefix: pointer to store the prefix part 8429: * 8430: * parse an XML Namespace QName 8431: * 8432: * [6] QName ::= (Prefix ':')? LocalPart 8433: * [7] Prefix ::= NCName 8434: * [8] LocalPart ::= NCName 8435: * 8436: * Returns the Name parsed or NULL 8437: */ 8438: 8439: static const xmlChar * 8440: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8441: const xmlChar *l, *p; 8442: 8443: GROW; 8444: 8445: l = xmlParseNCName(ctxt); 8446: if (l == NULL) { 8447: if (CUR == ':') { 8448: l = xmlParseName(ctxt); 8449: if (l != NULL) { 8450: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8451: "Failed to parse QName '%s'\n", l, NULL, NULL); 8452: *prefix = NULL; 8453: return(l); 8454: } 8455: } 8456: return(NULL); 8457: } 8458: if (CUR == ':') { 8459: NEXT; 8460: p = l; 8461: l = xmlParseNCName(ctxt); 8462: if (l == NULL) { 8463: xmlChar *tmp; 8464: 8465: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8466: "Failed to parse QName '%s:'\n", p, NULL, NULL); 8467: l = xmlParseNmtoken(ctxt); 8468: if (l == NULL) 8469: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8470: else { 8471: tmp = xmlBuildQName(l, p, NULL, 0); 8472: xmlFree((char *)l); 8473: } 8474: p = xmlDictLookup(ctxt->dict, tmp, -1); 8475: if (tmp != NULL) xmlFree(tmp); 8476: *prefix = NULL; 8477: return(p); 8478: } 8479: if (CUR == ':') { 8480: xmlChar *tmp; 8481: 8482: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8483: "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8484: NEXT; 8485: tmp = (xmlChar *) xmlParseName(ctxt); 8486: if (tmp != NULL) { 8487: tmp = xmlBuildQName(tmp, l, NULL, 0); 8488: l = xmlDictLookup(ctxt->dict, tmp, -1); 8489: if (tmp != NULL) xmlFree(tmp); 8490: *prefix = p; 8491: return(l); 8492: } 8493: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8494: l = xmlDictLookup(ctxt->dict, tmp, -1); 8495: if (tmp != NULL) xmlFree(tmp); 8496: *prefix = p; 8497: return(l); 8498: } 8499: *prefix = p; 8500: } else 8501: *prefix = NULL; 8502: return(l); 8503: } 8504: 8505: /** 8506: * xmlParseQNameAndCompare: 8507: * @ctxt: an XML parser context 8508: * @name: the localname 8509: * @prefix: the prefix, if any. 8510: * 8511: * parse an XML name and compares for match 8512: * (specialized for endtag parsing) 8513: * 8514: * Returns NULL for an illegal name, (xmlChar*) 1 for success 8515: * and the name for mismatch 8516: */ 8517: 8518: static const xmlChar * 8519: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8520: xmlChar const *prefix) { 8521: const xmlChar *cmp; 8522: const xmlChar *in; 8523: const xmlChar *ret; 8524: const xmlChar *prefix2; 8525: 8526: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8527: 8528: GROW; 8529: in = ctxt->input->cur; 8530: 8531: cmp = prefix; 8532: while (*in != 0 && *in == *cmp) { 8533: ++in; 8534: ++cmp; 8535: } 8536: if ((*cmp == 0) && (*in == ':')) { 8537: in++; 8538: cmp = name; 8539: while (*in != 0 && *in == *cmp) { 8540: ++in; 8541: ++cmp; 8542: } 8543: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8544: /* success */ 8545: ctxt->input->cur = in; 8546: return((const xmlChar*) 1); 8547: } 8548: } 8549: /* 8550: * all strings coms from the dictionary, equality can be done directly 8551: */ 8552: ret = xmlParseQName (ctxt, &prefix2); 8553: if ((ret == name) && (prefix == prefix2)) 8554: return((const xmlChar*) 1); 8555: return ret; 8556: } 8557: 8558: /** 8559: * xmlParseAttValueInternal: 8560: * @ctxt: an XML parser context 8561: * @len: attribute len result 8562: * @alloc: whether the attribute was reallocated as a new string 8563: * @normalize: if 1 then further non-CDATA normalization must be done 8564: * 8565: * parse a value for an attribute. 8566: * NOTE: if no normalization is needed, the routine will return pointers 8567: * directly from the data buffer. 8568: * 8569: * 3.3.3 Attribute-Value Normalization: 8570: * Before the value of an attribute is passed to the application or 8571: * checked for validity, the XML processor must normalize it as follows: 8572: * - a character reference is processed by appending the referenced 8573: * character to the attribute value 8574: * - an entity reference is processed by recursively processing the 8575: * replacement text of the entity 8576: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8577: * appending #x20 to the normalized value, except that only a single 8578: * #x20 is appended for a "#xD#xA" sequence that is part of an external 8579: * parsed entity or the literal entity value of an internal parsed entity 8580: * - other characters are processed by appending them to the normalized value 8581: * If the declared value is not CDATA, then the XML processor must further 8582: * process the normalized attribute value by discarding any leading and 8583: * trailing space (#x20) characters, and by replacing sequences of space 8584: * (#x20) characters by a single space (#x20) character. 8585: * All attributes for which no declaration has been read should be treated 8586: * by a non-validating parser as if declared CDATA. 8587: * 8588: * Returns the AttValue parsed or NULL. The value has to be freed by the 8589: * caller if it was copied, this can be detected by val[*len] == 0. 8590: */ 8591: 8592: static xmlChar * 8593: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8594: int normalize) 8595: { 8596: xmlChar limit = 0; 8597: const xmlChar *in = NULL, *start, *end, *last; 8598: xmlChar *ret = NULL; 8599: 8600: GROW; 8601: in = (xmlChar *) CUR_PTR; 8602: if (*in != '"' && *in != '\'') { 8603: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8604: return (NULL); 8605: } 8606: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8607: 8608: /* 8609: * try to handle in this routine the most common case where no 8610: * allocation of a new string is required and where content is 8611: * pure ASCII. 8612: */ 8613: limit = *in++; 8614: end = ctxt->input->end; 8615: start = in; 8616: if (in >= end) { 8617: const xmlChar *oldbase = ctxt->input->base; 8618: GROW; 8619: if (oldbase != ctxt->input->base) { 8620: long delta = ctxt->input->base - oldbase; 8621: start = start + delta; 8622: in = in + delta; 8623: } 8624: end = ctxt->input->end; 8625: } 8626: if (normalize) { 8627: /* 8628: * Skip any leading spaces 8629: */ 8630: while ((in < end) && (*in != limit) && 8631: ((*in == 0x20) || (*in == 0x9) || 8632: (*in == 0xA) || (*in == 0xD))) { 8633: in++; 8634: start = in; 8635: if (in >= end) { 8636: const xmlChar *oldbase = ctxt->input->base; 8637: GROW; 8638: if (oldbase != ctxt->input->base) { 8639: long delta = ctxt->input->base - oldbase; 8640: start = start + delta; 8641: in = in + delta; 8642: } 8643: end = ctxt->input->end; 8644: } 8645: } 8646: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8647: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8648: if ((*in++ == 0x20) && (*in == 0x20)) break; 8649: if (in >= end) { 8650: const xmlChar *oldbase = ctxt->input->base; 8651: GROW; 8652: if (oldbase != ctxt->input->base) { 8653: long delta = ctxt->input->base - oldbase; 8654: start = start + delta; 8655: in = in + delta; 8656: } 8657: end = ctxt->input->end; 8658: } 8659: } 8660: last = in; 8661: /* 8662: * skip the trailing blanks 8663: */ 8664: while ((last[-1] == 0x20) && (last > start)) last--; 8665: while ((in < end) && (*in != limit) && 8666: ((*in == 0x20) || (*in == 0x9) || 8667: (*in == 0xA) || (*in == 0xD))) { 8668: in++; 8669: if (in >= end) { 8670: const xmlChar *oldbase = ctxt->input->base; 8671: GROW; 8672: if (oldbase != ctxt->input->base) { 8673: long delta = ctxt->input->base - oldbase; 8674: start = start + delta; 8675: in = in + delta; 8676: last = last + delta; 8677: } 8678: end = ctxt->input->end; 8679: } 8680: } 8681: if (*in != limit) goto need_complex; 8682: } else { 8683: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8684: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8685: in++; 8686: if (in >= end) { 8687: const xmlChar *oldbase = ctxt->input->base; 8688: GROW; 8689: if (oldbase != ctxt->input->base) { 8690: long delta = ctxt->input->base - oldbase; 8691: start = start + delta; 8692: in = in + delta; 8693: } 8694: end = ctxt->input->end; 8695: } 8696: } 8697: last = in; 8698: if (*in != limit) goto need_complex; 8699: } 8700: in++; 8701: if (len != NULL) { 8702: *len = last - start; 8703: ret = (xmlChar *) start; 8704: } else { 8705: if (alloc) *alloc = 1; 8706: ret = xmlStrndup(start, last - start); 8707: } 8708: CUR_PTR = in; 8709: if (alloc) *alloc = 0; 8710: return ret; 8711: need_complex: 8712: if (alloc) *alloc = 1; 8713: return xmlParseAttValueComplex(ctxt, len, normalize); 8714: } 8715: 8716: /** 8717: * xmlParseAttribute2: 8718: * @ctxt: an XML parser context 8719: * @pref: the element prefix 8720: * @elem: the element name 8721: * @prefix: a xmlChar ** used to store the value of the attribute prefix 8722: * @value: a xmlChar ** used to store the value of the attribute 8723: * @len: an int * to save the length of the attribute 8724: * @alloc: an int * to indicate if the attribute was allocated 8725: * 8726: * parse an attribute in the new SAX2 framework. 8727: * 8728: * Returns the attribute name, and the value in *value, . 8729: */ 8730: 8731: static const xmlChar * 8732: xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8733: const xmlChar * pref, const xmlChar * elem, 8734: const xmlChar ** prefix, xmlChar ** value, 8735: int *len, int *alloc) 8736: { 8737: const xmlChar *name; 8738: xmlChar *val, *internal_val = NULL; 8739: int normalize = 0; 8740: 8741: *value = NULL; 8742: GROW; 8743: name = xmlParseQName(ctxt, prefix); 8744: if (name == NULL) { 8745: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8746: "error parsing attribute name\n"); 8747: return (NULL); 8748: } 8749: 8750: /* 8751: * get the type if needed 8752: */ 8753: if (ctxt->attsSpecial != NULL) { 8754: int type; 8755: 8756: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8757: pref, elem, *prefix, name); 8758: if (type != 0) 8759: normalize = 1; 8760: } 8761: 8762: /* 8763: * read the value 8764: */ 8765: SKIP_BLANKS; 8766: if (RAW == '=') { 8767: NEXT; 8768: SKIP_BLANKS; 8769: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8770: if (normalize) { 8771: /* 8772: * Sometimes a second normalisation pass for spaces is needed 8773: * but that only happens if charrefs or entities refernces 8774: * have been used in the attribute value, i.e. the attribute 8775: * value have been extracted in an allocated string already. 8776: */ 8777: if (*alloc) { 8778: const xmlChar *val2; 8779: 8780: val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8781: if ((val2 != NULL) && (val2 != val)) { 8782: xmlFree(val); 8783: val = (xmlChar *) val2; 8784: } 8785: } 8786: } 8787: ctxt->instate = XML_PARSER_CONTENT; 8788: } else { 8789: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8790: "Specification mandate value for attribute %s\n", 8791: name); 8792: return (NULL); 8793: } 8794: 8795: if (*prefix == ctxt->str_xml) { 8796: /* 8797: * Check that xml:lang conforms to the specification 8798: * No more registered as an error, just generate a warning now 8799: * since this was deprecated in XML second edition 8800: */ 8801: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8802: internal_val = xmlStrndup(val, *len); 8803: if (!xmlCheckLanguageID(internal_val)) { 8804: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8805: "Malformed value for xml:lang : %s\n", 8806: internal_val, NULL); 8807: } 8808: } 8809: 8810: /* 8811: * Check that xml:space conforms to the specification 8812: */ 8813: if (xmlStrEqual(name, BAD_CAST "space")) { 8814: internal_val = xmlStrndup(val, *len); 8815: if (xmlStrEqual(internal_val, BAD_CAST "default")) 8816: *(ctxt->space) = 0; 8817: else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8818: *(ctxt->space) = 1; 8819: else { 8820: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8821: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8822: internal_val, NULL); 8823: } 8824: } 8825: if (internal_val) { 8826: xmlFree(internal_val); 8827: } 8828: } 8829: 8830: *value = val; 8831: return (name); 8832: } 8833: /** 8834: * xmlParseStartTag2: 8835: * @ctxt: an XML parser context 8836: * 8837: * parse a start of tag either for rule element or 8838: * EmptyElement. In both case we don't parse the tag closing chars. 8839: * This routine is called when running SAX2 parsing 8840: * 8841: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8842: * 8843: * [ WFC: Unique Att Spec ] 8844: * No attribute name may appear more than once in the same start-tag or 8845: * empty-element tag. 8846: * 8847: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8848: * 8849: * [ WFC: Unique Att Spec ] 8850: * No attribute name may appear more than once in the same start-tag or 8851: * empty-element tag. 8852: * 8853: * With namespace: 8854: * 8855: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8856: * 8857: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8858: * 8859: * Returns the element name parsed 8860: */ 8861: 8862: static const xmlChar * 8863: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8864: const xmlChar **URI, int *tlen) { 8865: const xmlChar *localname; 8866: const xmlChar *prefix; 8867: const xmlChar *attname; 8868: const xmlChar *aprefix; 8869: const xmlChar *nsname; 8870: xmlChar *attvalue; 8871: const xmlChar **atts = ctxt->atts; 8872: int maxatts = ctxt->maxatts; 8873: int nratts, nbatts, nbdef; 8874: int i, j, nbNs, attval, oldline, oldcol; 8875: const xmlChar *base; 8876: unsigned long cur; 8877: int nsNr = ctxt->nsNr; 8878: 8879: if (RAW != '<') return(NULL); 8880: NEXT1; 8881: 8882: /* 8883: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8884: * point since the attribute values may be stored as pointers to 8885: * the buffer and calling SHRINK would destroy them ! 8886: * The Shrinking is only possible once the full set of attribute 8887: * callbacks have been done. 8888: */ 8889: reparse: 8890: SHRINK; 8891: base = ctxt->input->base; 8892: cur = ctxt->input->cur - ctxt->input->base; 8893: oldline = ctxt->input->line; 8894: oldcol = ctxt->input->col; 8895: nbatts = 0; 8896: nratts = 0; 8897: nbdef = 0; 8898: nbNs = 0; 8899: attval = 0; 8900: /* Forget any namespaces added during an earlier parse of this element. */ 8901: ctxt->nsNr = nsNr; 8902: 8903: localname = xmlParseQName(ctxt, &prefix); 8904: if (localname == NULL) { 8905: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8906: "StartTag: invalid element name\n"); 8907: return(NULL); 8908: } 8909: *tlen = ctxt->input->cur - ctxt->input->base - cur; 8910: 8911: /* 8912: * Now parse the attributes, it ends up with the ending 8913: * 8914: * (S Attribute)* S? 8915: */ 8916: SKIP_BLANKS; 8917: GROW; 8918: if (ctxt->input->base != base) goto base_changed; 8919: 8920: while ((RAW != '>') && 8921: ((RAW != '/') || (NXT(1) != '>')) && 8922: (IS_BYTE_CHAR(RAW))) { 8923: const xmlChar *q = CUR_PTR; 8924: unsigned int cons = ctxt->input->consumed; 8925: int len = -1, alloc = 0; 8926: 8927: attname = xmlParseAttribute2(ctxt, prefix, localname, 8928: &aprefix, &attvalue, &len, &alloc); 8929: if (ctxt->input->base != base) { 8930: if ((attvalue != NULL) && (alloc != 0)) 8931: xmlFree(attvalue); 8932: attvalue = NULL; 8933: goto base_changed; 8934: } 8935: if ((attname != NULL) && (attvalue != NULL)) { 8936: if (len < 0) len = xmlStrlen(attvalue); 8937: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8938: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8939: xmlURIPtr uri; 8940: 8941: if (*URL != 0) { 8942: uri = xmlParseURI((const char *) URL); 8943: if (uri == NULL) { 8944: xmlNsErr(ctxt, XML_WAR_NS_URI, 8945: "xmlns: '%s' is not a valid URI\n", 8946: URL, NULL, NULL); 8947: } else { 8948: if (uri->scheme == NULL) { 8949: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8950: "xmlns: URI %s is not absolute\n", 8951: URL, NULL, NULL); 8952: } 8953: xmlFreeURI(uri); 8954: } 8955: if (URL == ctxt->str_xml_ns) { 8956: if (attname != ctxt->str_xml) { 8957: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8958: "xml namespace URI cannot be the default namespace\n", 8959: NULL, NULL, NULL); 8960: } 8961: goto skip_default_ns; 8962: } 8963: if ((len == 29) && 8964: (xmlStrEqual(URL, 8965: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8966: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8967: "reuse of the xmlns namespace name is forbidden\n", 8968: NULL, NULL, NULL); 8969: goto skip_default_ns; 8970: } 8971: } 8972: /* 8973: * check that it's not a defined namespace 8974: */ 8975: for (j = 1;j <= nbNs;j++) 8976: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8977: break; 8978: if (j <= nbNs) 8979: xmlErrAttributeDup(ctxt, NULL, attname); 8980: else 8981: if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8982: skip_default_ns: 8983: if (alloc != 0) xmlFree(attvalue); 8984: SKIP_BLANKS; 8985: continue; 8986: } 8987: if (aprefix == ctxt->str_xmlns) { 8988: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8989: xmlURIPtr uri; 8990: 8991: if (attname == ctxt->str_xml) { 8992: if (URL != ctxt->str_xml_ns) { 8993: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8994: "xml namespace prefix mapped to wrong URI\n", 8995: NULL, NULL, NULL); 8996: } 8997: /* 8998: * Do not keep a namespace definition node 8999: */ 9000: goto skip_ns; 9001: } 9002: if (URL == ctxt->str_xml_ns) { 9003: if (attname != ctxt->str_xml) { 9004: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9005: "xml namespace URI mapped to wrong prefix\n", 9006: NULL, NULL, NULL); 9007: } 9008: goto skip_ns; 9009: } 9010: if (attname == ctxt->str_xmlns) { 9011: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9012: "redefinition of the xmlns prefix is forbidden\n", 9013: NULL, NULL, NULL); 9014: goto skip_ns; 9015: } 9016: if ((len == 29) && 9017: (xmlStrEqual(URL, 9018: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9019: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9020: "reuse of the xmlns namespace name is forbidden\n", 9021: NULL, NULL, NULL); 9022: goto skip_ns; 9023: } 9024: if ((URL == NULL) || (URL[0] == 0)) { 9025: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9026: "xmlns:%s: Empty XML namespace is not allowed\n", 9027: attname, NULL, NULL); 9028: goto skip_ns; 9029: } else { 9030: uri = xmlParseURI((const char *) URL); 9031: if (uri == NULL) { 9032: xmlNsErr(ctxt, XML_WAR_NS_URI, 9033: "xmlns:%s: '%s' is not a valid URI\n", 9034: attname, URL, NULL); 9035: } else { 9036: if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9037: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9038: "xmlns:%s: URI %s is not absolute\n", 9039: attname, URL, NULL); 9040: } 9041: xmlFreeURI(uri); 9042: } 9043: } 9044: 9045: /* 9046: * check that it's not a defined namespace 9047: */ 9048: for (j = 1;j <= nbNs;j++) 9049: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9050: break; 9051: if (j <= nbNs) 9052: xmlErrAttributeDup(ctxt, aprefix, attname); 9053: else 9054: if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9055: skip_ns: 9056: if (alloc != 0) xmlFree(attvalue); 9057: SKIP_BLANKS; 9058: if (ctxt->input->base != base) goto base_changed; 9059: continue; 9060: } 9061: 9062: /* 9063: * Add the pair to atts 9064: */ 9065: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9066: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9067: if (attvalue[len] == 0) 9068: xmlFree(attvalue); 9069: goto failed; 9070: } 9071: maxatts = ctxt->maxatts; 9072: atts = ctxt->atts; 9073: } 9074: ctxt->attallocs[nratts++] = alloc; 9075: atts[nbatts++] = attname; 9076: atts[nbatts++] = aprefix; 9077: atts[nbatts++] = NULL; /* the URI will be fetched later */ 9078: atts[nbatts++] = attvalue; 9079: attvalue += len; 9080: atts[nbatts++] = attvalue; 9081: /* 9082: * tag if some deallocation is needed 9083: */ 9084: if (alloc != 0) attval = 1; 9085: } else { 9086: if ((attvalue != NULL) && (attvalue[len] == 0)) 9087: xmlFree(attvalue); 9088: } 9089: 9090: failed: 9091: 9092: GROW 9093: if (ctxt->input->base != base) goto base_changed; 9094: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9095: break; 9096: if (!IS_BLANK_CH(RAW)) { 9097: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9098: "attributes construct error\n"); 9099: break; 9100: } 9101: SKIP_BLANKS; 9102: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9103: (attname == NULL) && (attvalue == NULL)) { 9104: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9105: "xmlParseStartTag: problem parsing attributes\n"); 9106: break; 9107: } 9108: GROW; 9109: if (ctxt->input->base != base) goto base_changed; 9110: } 9111: 9112: /* 9113: * The attributes defaulting 9114: */ 9115: if (ctxt->attsDefault != NULL) { 9116: xmlDefAttrsPtr defaults; 9117: 9118: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9119: if (defaults != NULL) { 9120: for (i = 0;i < defaults->nbAttrs;i++) { 9121: attname = defaults->values[5 * i]; 9122: aprefix = defaults->values[5 * i + 1]; 9123: 9124: /* 9125: * special work for namespaces defaulted defs 9126: */ 9127: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9128: /* 9129: * check that it's not a defined namespace 9130: */ 9131: for (j = 1;j <= nbNs;j++) 9132: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9133: break; 9134: if (j <= nbNs) continue; 9135: 9136: nsname = xmlGetNamespace(ctxt, NULL); 9137: if (nsname != defaults->values[5 * i + 2]) { 9138: if (nsPush(ctxt, NULL, 9139: defaults->values[5 * i + 2]) > 0) 9140: nbNs++; 9141: } 9142: } else if (aprefix == ctxt->str_xmlns) { 9143: /* 9144: * check that it's not a defined namespace 9145: */ 9146: for (j = 1;j <= nbNs;j++) 9147: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9148: break; 9149: if (j <= nbNs) continue; 9150: 9151: nsname = xmlGetNamespace(ctxt, attname); 9152: if (nsname != defaults->values[2]) { 9153: if (nsPush(ctxt, attname, 9154: defaults->values[5 * i + 2]) > 0) 9155: nbNs++; 9156: } 9157: } else { 9158: /* 9159: * check that it's not a defined attribute 9160: */ 9161: for (j = 0;j < nbatts;j+=5) { 9162: if ((attname == atts[j]) && (aprefix == atts[j+1])) 9163: break; 9164: } 9165: if (j < nbatts) continue; 9166: 9167: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9168: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9169: return(NULL); 9170: } 9171: maxatts = ctxt->maxatts; 9172: atts = ctxt->atts; 9173: } 9174: atts[nbatts++] = attname; 9175: atts[nbatts++] = aprefix; 9176: if (aprefix == NULL) 9177: atts[nbatts++] = NULL; 9178: else 9179: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9180: atts[nbatts++] = defaults->values[5 * i + 2]; 9181: atts[nbatts++] = defaults->values[5 * i + 3]; 9182: if ((ctxt->standalone == 1) && 9183: (defaults->values[5 * i + 4] != NULL)) { 9184: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9185: "standalone: attribute %s on %s defaulted from external subset\n", 9186: attname, localname); 9187: } 9188: nbdef++; 9189: } 9190: } 9191: } 9192: } 9193: 9194: /* 9195: * The attributes checkings 9196: */ 9197: for (i = 0; i < nbatts;i += 5) { 9198: /* 9199: * The default namespace does not apply to attribute names. 9200: */ 9201: if (atts[i + 1] != NULL) { 9202: nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9203: if (nsname == NULL) { 9204: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9205: "Namespace prefix %s for %s on %s is not defined\n", 9206: atts[i + 1], atts[i], localname); 9207: } 9208: atts[i + 2] = nsname; 9209: } else 9210: nsname = NULL; 9211: /* 9212: * [ WFC: Unique Att Spec ] 9213: * No attribute name may appear more than once in the same 9214: * start-tag or empty-element tag. 9215: * As extended by the Namespace in XML REC. 9216: */ 9217: for (j = 0; j < i;j += 5) { 9218: if (atts[i] == atts[j]) { 9219: if (atts[i+1] == atts[j+1]) { 9220: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9221: break; 9222: } 9223: if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9224: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9225: "Namespaced Attribute %s in '%s' redefined\n", 9226: atts[i], nsname, NULL); 9227: break; 9228: } 9229: } 9230: } 9231: } 9232: 9233: nsname = xmlGetNamespace(ctxt, prefix); 9234: if ((prefix != NULL) && (nsname == NULL)) { 9235: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9236: "Namespace prefix %s on %s is not defined\n", 9237: prefix, localname, NULL); 9238: } 9239: *pref = prefix; 9240: *URI = nsname; 9241: 9242: /* 9243: * SAX: Start of Element ! 9244: */ 9245: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9246: (!ctxt->disableSAX)) { 9247: if (nbNs > 0) 9248: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9249: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9250: nbatts / 5, nbdef, atts); 9251: else 9252: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9253: nsname, 0, NULL, nbatts / 5, nbdef, atts); 9254: } 9255: 9256: /* 9257: * Free up attribute allocated strings if needed 9258: */ 9259: if (attval != 0) { 9260: for (i = 3,j = 0; j < nratts;i += 5,j++) 9261: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9262: xmlFree((xmlChar *) atts[i]); 9263: } 9264: 9265: return(localname); 9266: 9267: base_changed: 9268: /* 9269: * the attribute strings are valid iif the base didn't changed 9270: */ 9271: if (attval != 0) { 9272: for (i = 3,j = 0; j < nratts;i += 5,j++) 9273: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9274: xmlFree((xmlChar *) atts[i]); 9275: } 9276: ctxt->input->cur = ctxt->input->base + cur; 9277: ctxt->input->line = oldline; 9278: ctxt->input->col = oldcol; 9279: if (ctxt->wellFormed == 1) { 9280: goto reparse; 9281: } 9282: return(NULL); 9283: } 9284: 9285: /** 9286: * xmlParseEndTag2: 9287: * @ctxt: an XML parser context 9288: * @line: line of the start tag 9289: * @nsNr: number of namespaces on the start tag 9290: * 9291: * parse an end of tag 9292: * 9293: * [42] ETag ::= '</' Name S? '>' 9294: * 9295: * With namespace 9296: * 9297: * [NS 9] ETag ::= '</' QName S? '>' 9298: */ 9299: 9300: static void 9301: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9302: const xmlChar *URI, int line, int nsNr, int tlen) { 9303: const xmlChar *name; 9304: 9305: GROW; 9306: if ((RAW != '<') || (NXT(1) != '/')) { 9307: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9308: return; 9309: } 9310: SKIP(2); 9311: 9312: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9313: if (ctxt->input->cur[tlen] == '>') { 9314: ctxt->input->cur += tlen + 1; 9315: goto done; 9316: } 9317: ctxt->input->cur += tlen; 9318: name = (xmlChar*)1; 9319: } else { 9320: if (prefix == NULL) 9321: name = xmlParseNameAndCompare(ctxt, ctxt->name); 9322: else 9323: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9324: } 9325: 9326: /* 9327: * We should definitely be at the ending "S? '>'" part 9328: */ 9329: GROW; 9330: SKIP_BLANKS; 9331: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9332: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9333: } else 9334: NEXT1; 9335: 9336: /* 9337: * [ WFC: Element Type Match ] 9338: * The Name in an element's end-tag must match the element type in the 9339: * start-tag. 9340: * 9341: */ 9342: if (name != (xmlChar*)1) { 9343: if (name == NULL) name = BAD_CAST "unparseable"; 9344: if ((line == 0) && (ctxt->node != NULL)) 9345: line = ctxt->node->line; 9346: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9347: "Opening and ending tag mismatch: %s line %d and %s\n", 9348: ctxt->name, line, name); 9349: } 9350: 9351: /* 9352: * SAX: End of Tag 9353: */ 9354: done: 9355: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9356: (!ctxt->disableSAX)) 9357: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9358: 9359: spacePop(ctxt); 9360: if (nsNr != 0) 9361: nsPop(ctxt, nsNr); 9362: return; 9363: } 9364: 9365: /** 9366: * xmlParseCDSect: 9367: * @ctxt: an XML parser context 9368: * 9369: * Parse escaped pure raw content. 9370: * 9371: * [18] CDSect ::= CDStart CData CDEnd 9372: * 9373: * [19] CDStart ::= '<![CDATA[' 9374: * 9375: * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9376: * 9377: * [21] CDEnd ::= ']]>' 9378: */ 9379: void 9380: xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9381: xmlChar *buf = NULL; 9382: int len = 0; 9383: int size = XML_PARSER_BUFFER_SIZE; 9384: int r, rl; 9385: int s, sl; 9386: int cur, l; 9387: int count = 0; 9388: 9389: /* Check 2.6.0 was NXT(0) not RAW */ 9390: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9391: SKIP(9); 9392: } else 9393: return; 9394: 9395: ctxt->instate = XML_PARSER_CDATA_SECTION; 9396: r = CUR_CHAR(rl); 9397: if (!IS_CHAR(r)) { 9398: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9399: ctxt->instate = XML_PARSER_CONTENT; 9400: return; 9401: } 9402: NEXTL(rl); 9403: s = CUR_CHAR(sl); 9404: if (!IS_CHAR(s)) { 9405: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9406: ctxt->instate = XML_PARSER_CONTENT; 9407: return; 9408: } 9409: NEXTL(sl); 9410: cur = CUR_CHAR(l); 9411: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9412: if (buf == NULL) { 9413: xmlErrMemory(ctxt, NULL); 9414: return; 9415: } 9416: while (IS_CHAR(cur) && 9417: ((r != ']') || (s != ']') || (cur != '>'))) { 9418: if (len + 5 >= size) { 9419: xmlChar *tmp; 9420: 9421: size *= 2; 9422: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9423: if (tmp == NULL) { 9424: xmlFree(buf); 9425: xmlErrMemory(ctxt, NULL); 9426: return; 9427: } 9428: buf = tmp; 9429: } 9430: COPY_BUF(rl,buf,len,r); 9431: r = s; 9432: rl = sl; 9433: s = cur; 9434: sl = l; 9435: count++; 9436: if (count > 50) { 9437: GROW; 9438: count = 0; 9439: } 9440: NEXTL(l); 9441: cur = CUR_CHAR(l); 9442: } 9443: buf[len] = 0; 9444: ctxt->instate = XML_PARSER_CONTENT; 9445: if (cur != '>') { 9446: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9447: "CData section not finished\n%.50s\n", buf); 9448: xmlFree(buf); 9449: return; 9450: } 9451: NEXTL(l); 9452: 9453: /* 9454: * OK the buffer is to be consumed as cdata. 9455: */ 9456: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9457: if (ctxt->sax->cdataBlock != NULL) 9458: ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9459: else if (ctxt->sax->characters != NULL) 9460: ctxt->sax->characters(ctxt->userData, buf, len); 9461: } 9462: xmlFree(buf); 9463: } 9464: 9465: /** 9466: * xmlParseContent: 9467: * @ctxt: an XML parser context 9468: * 9469: * Parse a content: 9470: * 9471: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9472: */ 9473: 9474: void 9475: xmlParseContent(xmlParserCtxtPtr ctxt) { 9476: GROW; 9477: while ((RAW != 0) && 9478: ((RAW != '<') || (NXT(1) != '/')) && 9479: (ctxt->instate != XML_PARSER_EOF)) { 9480: const xmlChar *test = CUR_PTR; 9481: unsigned int cons = ctxt->input->consumed; 9482: const xmlChar *cur = ctxt->input->cur; 9483: 9484: /* 9485: * First case : a Processing Instruction. 9486: */ 9487: if ((*cur == '<') && (cur[1] == '?')) { 9488: xmlParsePI(ctxt); 9489: } 9490: 9491: /* 9492: * Second case : a CDSection 9493: */ 9494: /* 2.6.0 test was *cur not RAW */ 9495: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9496: xmlParseCDSect(ctxt); 9497: } 9498: 9499: /* 9500: * Third case : a comment 9501: */ 9502: else if ((*cur == '<') && (NXT(1) == '!') && 9503: (NXT(2) == '-') && (NXT(3) == '-')) { 9504: xmlParseComment(ctxt); 9505: ctxt->instate = XML_PARSER_CONTENT; 9506: } 9507: 9508: /* 9509: * Fourth case : a sub-element. 9510: */ 9511: else if (*cur == '<') { 9512: xmlParseElement(ctxt); 9513: } 9514: 9515: /* 9516: * Fifth case : a reference. If if has not been resolved, 9517: * parsing returns it's Name, create the node 9518: */ 9519: 9520: else if (*cur == '&') { 9521: xmlParseReference(ctxt); 9522: } 9523: 9524: /* 9525: * Last case, text. Note that References are handled directly. 9526: */ 9527: else { 9528: xmlParseCharData(ctxt, 0); 9529: } 9530: 9531: GROW; 9532: /* 9533: * Pop-up of finished entities. 9534: */ 9535: while ((RAW == 0) && (ctxt->inputNr > 1)) 9536: xmlPopInput(ctxt); 9537: SHRINK; 9538: 9539: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9540: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9541: "detected an error in element content\n"); 9542: ctxt->instate = XML_PARSER_EOF; 9543: break; 9544: } 9545: } 9546: } 9547: 9548: /** 9549: * xmlParseElement: 9550: * @ctxt: an XML parser context 9551: * 9552: * parse an XML element, this is highly recursive 9553: * 9554: * [39] element ::= EmptyElemTag | STag content ETag 9555: * 9556: * [ WFC: Element Type Match ] 9557: * The Name in an element's end-tag must match the element type in the 9558: * start-tag. 9559: * 9560: */ 9561: 9562: void 9563: xmlParseElement(xmlParserCtxtPtr ctxt) { 9564: const xmlChar *name; 9565: const xmlChar *prefix = NULL; 9566: const xmlChar *URI = NULL; 9567: xmlParserNodeInfo node_info; 9568: int line, tlen = 0; 9569: xmlNodePtr ret; 9570: int nsNr = ctxt->nsNr; 9571: 9572: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9573: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9574: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9575: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9576: xmlParserMaxDepth); 9577: ctxt->instate = XML_PARSER_EOF; 9578: return; 9579: } 9580: 9581: /* Capture start position */ 9582: if (ctxt->record_info) { 9583: node_info.begin_pos = ctxt->input->consumed + 9584: (CUR_PTR - ctxt->input->base); 9585: node_info.begin_line = ctxt->input->line; 9586: } 9587: 9588: if (ctxt->spaceNr == 0) 9589: spacePush(ctxt, -1); 9590: else if (*ctxt->space == -2) 9591: spacePush(ctxt, -1); 9592: else 9593: spacePush(ctxt, *ctxt->space); 9594: 9595: line = ctxt->input->line; 9596: #ifdef LIBXML_SAX1_ENABLED 9597: if (ctxt->sax2) 9598: #endif /* LIBXML_SAX1_ENABLED */ 9599: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9600: #ifdef LIBXML_SAX1_ENABLED 9601: else 9602: name = xmlParseStartTag(ctxt); 9603: #endif /* LIBXML_SAX1_ENABLED */ 9604: if (ctxt->instate == XML_PARSER_EOF) 9605: return; 9606: if (name == NULL) { 9607: spacePop(ctxt); 9608: return; 9609: } 9610: namePush(ctxt, name); 9611: ret = ctxt->node; 9612: 9613: #ifdef LIBXML_VALID_ENABLED 9614: /* 9615: * [ VC: Root Element Type ] 9616: * The Name in the document type declaration must match the element 9617: * type of the root element. 9618: */ 9619: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9620: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9621: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9622: #endif /* LIBXML_VALID_ENABLED */ 9623: 9624: /* 9625: * Check for an Empty Element. 9626: */ 9627: if ((RAW == '/') && (NXT(1) == '>')) { 9628: SKIP(2); 9629: if (ctxt->sax2) { 9630: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9631: (!ctxt->disableSAX)) 9632: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9633: #ifdef LIBXML_SAX1_ENABLED 9634: } else { 9635: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9636: (!ctxt->disableSAX)) 9637: ctxt->sax->endElement(ctxt->userData, name); 9638: #endif /* LIBXML_SAX1_ENABLED */ 9639: } 9640: namePop(ctxt); 9641: spacePop(ctxt); 9642: if (nsNr != ctxt->nsNr) 9643: nsPop(ctxt, ctxt->nsNr - nsNr); 9644: if ( ret != NULL && ctxt->record_info ) { 9645: node_info.end_pos = ctxt->input->consumed + 9646: (CUR_PTR - ctxt->input->base); 9647: node_info.end_line = ctxt->input->line; 9648: node_info.node = ret; 9649: xmlParserAddNodeInfo(ctxt, &node_info); 9650: } 9651: return; 9652: } 9653: if (RAW == '>') { 9654: NEXT1; 9655: } else { 9656: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9657: "Couldn't find end of Start Tag %s line %d\n", 9658: name, line, NULL); 9659: 9660: /* 9661: * end of parsing of this node. 9662: */ 9663: nodePop(ctxt); 9664: namePop(ctxt); 9665: spacePop(ctxt); 9666: if (nsNr != ctxt->nsNr) 9667: nsPop(ctxt, ctxt->nsNr - nsNr); 9668: 9669: /* 9670: * Capture end position and add node 9671: */ 9672: if ( ret != NULL && ctxt->record_info ) { 9673: node_info.end_pos = ctxt->input->consumed + 9674: (CUR_PTR - ctxt->input->base); 9675: node_info.end_line = ctxt->input->line; 9676: node_info.node = ret; 9677: xmlParserAddNodeInfo(ctxt, &node_info); 9678: } 9679: return; 9680: } 9681: 9682: /* 9683: * Parse the content of the element: 9684: */ 9685: xmlParseContent(ctxt); 9686: if (!IS_BYTE_CHAR(RAW)) { 9687: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9688: "Premature end of data in tag %s line %d\n", 9689: name, line, NULL); 9690: 9691: /* 9692: * end of parsing of this node. 9693: */ 9694: nodePop(ctxt); 9695: namePop(ctxt); 9696: spacePop(ctxt); 9697: if (nsNr != ctxt->nsNr) 9698: nsPop(ctxt, ctxt->nsNr - nsNr); 9699: return; 9700: } 9701: 9702: /* 9703: * parse the end of tag: '</' should be here. 9704: */ 9705: if (ctxt->sax2) { 9706: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9707: namePop(ctxt); 9708: } 9709: #ifdef LIBXML_SAX1_ENABLED 9710: else 9711: xmlParseEndTag1(ctxt, line); 9712: #endif /* LIBXML_SAX1_ENABLED */ 9713: 9714: /* 9715: * Capture end position and add node 9716: */ 9717: if ( ret != NULL && ctxt->record_info ) { 9718: node_info.end_pos = ctxt->input->consumed + 9719: (CUR_PTR - ctxt->input->base); 9720: node_info.end_line = ctxt->input->line; 9721: node_info.node = ret; 9722: xmlParserAddNodeInfo(ctxt, &node_info); 9723: } 9724: } 9725: 9726: /** 9727: * xmlParseVersionNum: 9728: * @ctxt: an XML parser context 9729: * 9730: * parse the XML version value. 9731: * 9732: * [26] VersionNum ::= '1.' [0-9]+ 9733: * 9734: * In practice allow [0-9].[0-9]+ at that level 9735: * 9736: * Returns the string giving the XML version number, or NULL 9737: */ 9738: xmlChar * 9739: xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9740: xmlChar *buf = NULL; 9741: int len = 0; 9742: int size = 10; 9743: xmlChar cur; 9744: 9745: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9746: if (buf == NULL) { 9747: xmlErrMemory(ctxt, NULL); 9748: return(NULL); 9749: } 9750: cur = CUR; 9751: if (!((cur >= '0') && (cur <= '9'))) { 9752: xmlFree(buf); 9753: return(NULL); 9754: } 9755: buf[len++] = cur; 9756: NEXT; 9757: cur=CUR; 9758: if (cur != '.') { 9759: xmlFree(buf); 9760: return(NULL); 9761: } 9762: buf[len++] = cur; 9763: NEXT; 9764: cur=CUR; 9765: while ((cur >= '0') && (cur <= '9')) { 9766: if (len + 1 >= size) { 9767: xmlChar *tmp; 9768: 9769: size *= 2; 9770: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9771: if (tmp == NULL) { 9772: xmlFree(buf); 9773: xmlErrMemory(ctxt, NULL); 9774: return(NULL); 9775: } 9776: buf = tmp; 9777: } 9778: buf[len++] = cur; 9779: NEXT; 9780: cur=CUR; 9781: } 9782: buf[len] = 0; 9783: return(buf); 9784: } 9785: 9786: /** 9787: * xmlParseVersionInfo: 9788: * @ctxt: an XML parser context 9789: * 9790: * parse the XML version. 9791: * 9792: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9793: * 9794: * [25] Eq ::= S? '=' S? 9795: * 9796: * Returns the version string, e.g. "1.0" 9797: */ 9798: 9799: xmlChar * 9800: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9801: xmlChar *version = NULL; 9802: 9803: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9804: SKIP(7); 9805: SKIP_BLANKS; 9806: if (RAW != '=') { 9807: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9808: return(NULL); 9809: } 9810: NEXT; 9811: SKIP_BLANKS; 9812: if (RAW == '"') { 9813: NEXT; 9814: version = xmlParseVersionNum(ctxt); 9815: if (RAW != '"') { 9816: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9817: } else 9818: NEXT; 9819: } else if (RAW == '\''){ 9820: NEXT; 9821: version = xmlParseVersionNum(ctxt); 9822: if (RAW != '\'') { 9823: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9824: } else 9825: NEXT; 9826: } else { 9827: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9828: } 9829: } 9830: return(version); 9831: } 9832: 9833: /** 9834: * xmlParseEncName: 9835: * @ctxt: an XML parser context 9836: * 9837: * parse the XML encoding name 9838: * 9839: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9840: * 9841: * Returns the encoding name value or NULL 9842: */ 9843: xmlChar * 9844: xmlParseEncName(xmlParserCtxtPtr ctxt) { 9845: xmlChar *buf = NULL; 9846: int len = 0; 9847: int size = 10; 9848: xmlChar cur; 9849: 9850: cur = CUR; 9851: if (((cur >= 'a') && (cur <= 'z')) || 9852: ((cur >= 'A') && (cur <= 'Z'))) { 9853: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9854: if (buf == NULL) { 9855: xmlErrMemory(ctxt, NULL); 9856: return(NULL); 9857: } 9858: 9859: buf[len++] = cur; 9860: NEXT; 9861: cur = CUR; 9862: while (((cur >= 'a') && (cur <= 'z')) || 9863: ((cur >= 'A') && (cur <= 'Z')) || 9864: ((cur >= '0') && (cur <= '9')) || 9865: (cur == '.') || (cur == '_') || 9866: (cur == '-')) { 9867: if (len + 1 >= size) { 9868: xmlChar *tmp; 9869: 9870: size *= 2; 9871: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9872: if (tmp == NULL) { 9873: xmlErrMemory(ctxt, NULL); 9874: xmlFree(buf); 9875: return(NULL); 9876: } 9877: buf = tmp; 9878: } 9879: buf[len++] = cur; 9880: NEXT; 9881: cur = CUR; 9882: if (cur == 0) { 9883: SHRINK; 9884: GROW; 9885: cur = CUR; 9886: } 9887: } 9888: buf[len] = 0; 9889: } else { 9890: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9891: } 9892: return(buf); 9893: } 9894: 9895: /** 9896: * xmlParseEncodingDecl: 9897: * @ctxt: an XML parser context 9898: * 9899: * parse the XML encoding declaration 9900: * 9901: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9902: * 9903: * this setups the conversion filters. 9904: * 9905: * Returns the encoding value or NULL 9906: */ 9907: 9908: const xmlChar * 9909: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9910: xmlChar *encoding = NULL; 9911: 9912: SKIP_BLANKS; 9913: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9914: SKIP(8); 9915: SKIP_BLANKS; 9916: if (RAW != '=') { 9917: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9918: return(NULL); 9919: } 9920: NEXT; 9921: SKIP_BLANKS; 9922: if (RAW == '"') { 9923: NEXT; 9924: encoding = xmlParseEncName(ctxt); 9925: if (RAW != '"') { 9926: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9927: } else 9928: NEXT; 9929: } else if (RAW == '\''){ 9930: NEXT; 9931: encoding = xmlParseEncName(ctxt); 9932: if (RAW != '\'') { 9933: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9934: } else 9935: NEXT; 9936: } else { 9937: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9938: } 9939: 9940: /* 9941: * Non standard parsing, allowing the user to ignore encoding 9942: */ 9943: if (ctxt->options & XML_PARSE_IGNORE_ENC) 9944: return(encoding); 9945: 9946: /* 9947: * UTF-16 encoding stwich has already taken place at this stage, 9948: * more over the little-endian/big-endian selection is already done 9949: */ 9950: if ((encoding != NULL) && 9951: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9952: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9953: /* 9954: * If no encoding was passed to the parser, that we are 9955: * using UTF-16 and no decoder is present i.e. the 9956: * document is apparently UTF-8 compatible, then raise an 9957: * encoding mismatch fatal error 9958: */ 9959: if ((ctxt->encoding == NULL) && 9960: (ctxt->input->buf != NULL) && 9961: (ctxt->input->buf->encoder == NULL)) { 9962: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9963: "Document labelled UTF-16 but has UTF-8 content\n"); 9964: } 9965: if (ctxt->encoding != NULL) 9966: xmlFree((xmlChar *) ctxt->encoding); 9967: ctxt->encoding = encoding; 9968: } 9969: /* 9970: * UTF-8 encoding is handled natively 9971: */ 9972: else if ((encoding != NULL) && 9973: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9974: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9975: if (ctxt->encoding != NULL) 9976: xmlFree((xmlChar *) ctxt->encoding); 9977: ctxt->encoding = encoding; 9978: } 9979: else if (encoding != NULL) { 9980: xmlCharEncodingHandlerPtr handler; 9981: 9982: if (ctxt->input->encoding != NULL) 9983: xmlFree((xmlChar *) ctxt->input->encoding); 9984: ctxt->input->encoding = encoding; 9985: 9986: handler = xmlFindCharEncodingHandler((const char *) encoding); 9987: if (handler != NULL) { 9988: xmlSwitchToEncoding(ctxt, handler); 9989: } else { 9990: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9991: "Unsupported encoding %s\n", encoding); 9992: return(NULL); 9993: } 9994: } 9995: } 9996: return(encoding); 9997: } 9998: 9999: /** 10000: * xmlParseSDDecl: 10001: * @ctxt: an XML parser context 10002: * 10003: * parse the XML standalone declaration 10004: * 10005: * [32] SDDecl ::= S 'standalone' Eq 10006: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10007: * 10008: * [ VC: Standalone Document Declaration ] 10009: * TODO The standalone document declaration must have the value "no" 10010: * if any external markup declarations contain declarations of: 10011: * - attributes with default values, if elements to which these 10012: * attributes apply appear in the document without specifications 10013: * of values for these attributes, or 10014: * - entities (other than amp, lt, gt, apos, quot), if references 10015: * to those entities appear in the document, or 10016: * - attributes with values subject to normalization, where the 10017: * attribute appears in the document with a value which will change 10018: * as a result of normalization, or 10019: * - element types with element content, if white space occurs directly 10020: * within any instance of those types. 10021: * 10022: * Returns: 10023: * 1 if standalone="yes" 10024: * 0 if standalone="no" 10025: * -2 if standalone attribute is missing or invalid 10026: * (A standalone value of -2 means that the XML declaration was found, 10027: * but no value was specified for the standalone attribute). 10028: */ 10029: 10030: int 10031: xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10032: int standalone = -2; 10033: 10034: SKIP_BLANKS; 10035: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10036: SKIP(10); 10037: SKIP_BLANKS; 10038: if (RAW != '=') { 10039: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10040: return(standalone); 10041: } 10042: NEXT; 10043: SKIP_BLANKS; 10044: if (RAW == '\''){ 10045: NEXT; 10046: if ((RAW == 'n') && (NXT(1) == 'o')) { 10047: standalone = 0; 10048: SKIP(2); 10049: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10050: (NXT(2) == 's')) { 10051: standalone = 1; 10052: SKIP(3); 10053: } else { 10054: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10055: } 10056: if (RAW != '\'') { 10057: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10058: } else 10059: NEXT; 10060: } else if (RAW == '"'){ 10061: NEXT; 10062: if ((RAW == 'n') && (NXT(1) == 'o')) { 10063: standalone = 0; 10064: SKIP(2); 10065: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10066: (NXT(2) == 's')) { 10067: standalone = 1; 10068: SKIP(3); 10069: } else { 10070: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10071: } 10072: if (RAW != '"') { 10073: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10074: } else 10075: NEXT; 10076: } else { 10077: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10078: } 10079: } 10080: return(standalone); 10081: } 10082: 10083: /** 10084: * xmlParseXMLDecl: 10085: * @ctxt: an XML parser context 10086: * 10087: * parse an XML declaration header 10088: * 10089: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10090: */ 10091: 10092: void 10093: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10094: xmlChar *version; 10095: 10096: /* 10097: * This value for standalone indicates that the document has an 10098: * XML declaration but it does not have a standalone attribute. 10099: * It will be overwritten later if a standalone attribute is found. 10100: */ 10101: ctxt->input->standalone = -2; 10102: 10103: /* 10104: * We know that '<?xml' is here. 10105: */ 10106: SKIP(5); 10107: 10108: if (!IS_BLANK_CH(RAW)) { 10109: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10110: "Blank needed after '<?xml'\n"); 10111: } 10112: SKIP_BLANKS; 10113: 10114: /* 10115: * We must have the VersionInfo here. 10116: */ 10117: version = xmlParseVersionInfo(ctxt); 10118: if (version == NULL) { 10119: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10120: } else { 10121: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10122: /* 10123: * Changed here for XML-1.0 5th edition 10124: */ 10125: if (ctxt->options & XML_PARSE_OLD10) { 10126: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10127: "Unsupported version '%s'\n", 10128: version); 10129: } else { 10130: if ((version[0] == '1') && ((version[1] == '.'))) { 10131: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10132: "Unsupported version '%s'\n", 10133: version, NULL); 10134: } else { 10135: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10136: "Unsupported version '%s'\n", 10137: version); 10138: } 10139: } 10140: } 10141: if (ctxt->version != NULL) 10142: xmlFree((void *) ctxt->version); 10143: ctxt->version = version; 10144: } 10145: 10146: /* 10147: * We may have the encoding declaration 10148: */ 10149: if (!IS_BLANK_CH(RAW)) { 10150: if ((RAW == '?') && (NXT(1) == '>')) { 10151: SKIP(2); 10152: return; 10153: } 10154: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10155: } 10156: xmlParseEncodingDecl(ctxt); 10157: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10158: /* 10159: * The XML REC instructs us to stop parsing right here 10160: */ 10161: return; 10162: } 10163: 10164: /* 10165: * We may have the standalone status. 10166: */ 10167: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10168: if ((RAW == '?') && (NXT(1) == '>')) { 10169: SKIP(2); 10170: return; 10171: } 10172: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10173: } 10174: 10175: /* 10176: * We can grow the input buffer freely at that point 10177: */ 10178: GROW; 10179: 10180: SKIP_BLANKS; 10181: ctxt->input->standalone = xmlParseSDDecl(ctxt); 10182: 10183: SKIP_BLANKS; 10184: if ((RAW == '?') && (NXT(1) == '>')) { 10185: SKIP(2); 10186: } else if (RAW == '>') { 10187: /* Deprecated old WD ... */ 10188: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10189: NEXT; 10190: } else { 10191: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10192: MOVETO_ENDTAG(CUR_PTR); 10193: NEXT; 10194: } 10195: } 10196: 10197: /** 10198: * xmlParseMisc: 10199: * @ctxt: an XML parser context 10200: * 10201: * parse an XML Misc* optional field. 10202: * 10203: * [27] Misc ::= Comment | PI | S 10204: */ 10205: 10206: void 10207: xmlParseMisc(xmlParserCtxtPtr ctxt) { 10208: while (((RAW == '<') && (NXT(1) == '?')) || 10209: (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10210: IS_BLANK_CH(CUR)) { 10211: if ((RAW == '<') && (NXT(1) == '?')) { 10212: xmlParsePI(ctxt); 10213: } else if (IS_BLANK_CH(CUR)) { 10214: NEXT; 10215: } else 10216: xmlParseComment(ctxt); 10217: } 10218: } 10219: 10220: /** 10221: * xmlParseDocument: 10222: * @ctxt: an XML parser context 10223: * 10224: * parse an XML document (and build a tree if using the standard SAX 10225: * interface). 10226: * 10227: * [1] document ::= prolog element Misc* 10228: * 10229: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10230: * 10231: * Returns 0, -1 in case of error. the parser context is augmented 10232: * as a result of the parsing. 10233: */ 10234: 10235: int 10236: xmlParseDocument(xmlParserCtxtPtr ctxt) { 10237: xmlChar start[4]; 10238: xmlCharEncoding enc; 10239: 10240: xmlInitParser(); 10241: 10242: if ((ctxt == NULL) || (ctxt->input == NULL)) 10243: return(-1); 10244: 10245: GROW; 10246: 10247: /* 10248: * SAX: detecting the level. 10249: */ 10250: xmlDetectSAX2(ctxt); 10251: 10252: /* 10253: * SAX: beginning of the document processing. 10254: */ 10255: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10256: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10257: 10258: if ((ctxt->encoding == NULL) && 10259: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10260: /* 10261: * Get the 4 first bytes and decode the charset 10262: * if enc != XML_CHAR_ENCODING_NONE 10263: * plug some encoding conversion routines. 10264: */ 10265: start[0] = RAW; 10266: start[1] = NXT(1); 10267: start[2] = NXT(2); 10268: start[3] = NXT(3); 10269: enc = xmlDetectCharEncoding(&start[0], 4); 10270: if (enc != XML_CHAR_ENCODING_NONE) { 10271: xmlSwitchEncoding(ctxt, enc); 10272: } 10273: } 10274: 10275: 10276: if (CUR == 0) { 10277: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10278: } 10279: 10280: /* 10281: * Check for the XMLDecl in the Prolog. 10282: * do not GROW here to avoid the detected encoder to decode more 10283: * than just the first line, unless the amount of data is really 10284: * too small to hold "<?xml version="1.0" encoding="foo" 10285: */ 10286: if ((ctxt->input->end - ctxt->input->cur) < 35) { 10287: GROW; 10288: } 10289: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10290: 10291: /* 10292: * Note that we will switch encoding on the fly. 10293: */ 10294: xmlParseXMLDecl(ctxt); 10295: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10296: /* 10297: * The XML REC instructs us to stop parsing right here 10298: */ 10299: return(-1); 10300: } 10301: ctxt->standalone = ctxt->input->standalone; 10302: SKIP_BLANKS; 10303: } else { 10304: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10305: } 10306: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10307: ctxt->sax->startDocument(ctxt->userData); 10308: 10309: /* 10310: * The Misc part of the Prolog 10311: */ 10312: GROW; 10313: xmlParseMisc(ctxt); 10314: 10315: /* 10316: * Then possibly doc type declaration(s) and more Misc 10317: * (doctypedecl Misc*)? 10318: */ 10319: GROW; 10320: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10321: 10322: ctxt->inSubset = 1; 10323: xmlParseDocTypeDecl(ctxt); 10324: if (RAW == '[') { 10325: ctxt->instate = XML_PARSER_DTD; 10326: xmlParseInternalSubset(ctxt); 10327: } 10328: 10329: /* 10330: * Create and update the external subset. 10331: */ 10332: ctxt->inSubset = 2; 10333: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10334: (!ctxt->disableSAX)) 10335: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10336: ctxt->extSubSystem, ctxt->extSubURI); 10337: ctxt->inSubset = 0; 10338: 10339: xmlCleanSpecialAttr(ctxt); 10340: 10341: ctxt->instate = XML_PARSER_PROLOG; 10342: xmlParseMisc(ctxt); 10343: } 10344: 10345: /* 10346: * Time to start parsing the tree itself 10347: */ 10348: GROW; 10349: if (RAW != '<') { 10350: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10351: "Start tag expected, '<' not found\n"); 10352: } else { 10353: ctxt->instate = XML_PARSER_CONTENT; 10354: xmlParseElement(ctxt); 10355: ctxt->instate = XML_PARSER_EPILOG; 10356: 10357: 10358: /* 10359: * The Misc part at the end 10360: */ 10361: xmlParseMisc(ctxt); 10362: 10363: if (RAW != 0) { 10364: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10365: } 10366: ctxt->instate = XML_PARSER_EOF; 10367: } 10368: 10369: /* 10370: * SAX: end of the document processing. 10371: */ 10372: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10373: ctxt->sax->endDocument(ctxt->userData); 10374: 10375: /* 10376: * Remove locally kept entity definitions if the tree was not built 10377: */ 10378: if ((ctxt->myDoc != NULL) && 10379: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10380: xmlFreeDoc(ctxt->myDoc); 10381: ctxt->myDoc = NULL; 10382: } 10383: 10384: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10385: ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10386: if (ctxt->valid) 10387: ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10388: if (ctxt->nsWellFormed) 10389: ctxt->myDoc->properties |= XML_DOC_NSVALID; 10390: if (ctxt->options & XML_PARSE_OLD10) 10391: ctxt->myDoc->properties |= XML_DOC_OLD10; 10392: } 10393: if (! ctxt->wellFormed) { 10394: ctxt->valid = 0; 10395: return(-1); 10396: } 10397: return(0); 10398: } 10399: 10400: /** 10401: * xmlParseExtParsedEnt: 10402: * @ctxt: an XML parser context 10403: * 10404: * parse a general parsed entity 10405: * An external general parsed entity is well-formed if it matches the 10406: * production labeled extParsedEnt. 10407: * 10408: * [78] extParsedEnt ::= TextDecl? content 10409: * 10410: * Returns 0, -1 in case of error. the parser context is augmented 10411: * as a result of the parsing. 10412: */ 10413: 10414: int 10415: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10416: xmlChar start[4]; 10417: xmlCharEncoding enc; 10418: 10419: if ((ctxt == NULL) || (ctxt->input == NULL)) 10420: return(-1); 10421: 10422: xmlDefaultSAXHandlerInit(); 10423: 10424: xmlDetectSAX2(ctxt); 10425: 10426: GROW; 10427: 10428: /* 10429: * SAX: beginning of the document processing. 10430: */ 10431: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10432: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10433: 10434: /* 10435: * Get the 4 first bytes and decode the charset 10436: * if enc != XML_CHAR_ENCODING_NONE 10437: * plug some encoding conversion routines. 10438: */ 10439: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10440: start[0] = RAW; 10441: start[1] = NXT(1); 10442: start[2] = NXT(2); 10443: start[3] = NXT(3); 10444: enc = xmlDetectCharEncoding(start, 4); 10445: if (enc != XML_CHAR_ENCODING_NONE) { 10446: xmlSwitchEncoding(ctxt, enc); 10447: } 10448: } 10449: 10450: 10451: if (CUR == 0) { 10452: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10453: } 10454: 10455: /* 10456: * Check for the XMLDecl in the Prolog. 10457: */ 10458: GROW; 10459: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10460: 10461: /* 10462: * Note that we will switch encoding on the fly. 10463: */ 10464: xmlParseXMLDecl(ctxt); 10465: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10466: /* 10467: * The XML REC instructs us to stop parsing right here 10468: */ 10469: return(-1); 10470: } 10471: SKIP_BLANKS; 10472: } else { 10473: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10474: } 10475: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10476: ctxt->sax->startDocument(ctxt->userData); 10477: 10478: /* 10479: * Doing validity checking on chunk doesn't make sense 10480: */ 10481: ctxt->instate = XML_PARSER_CONTENT; 10482: ctxt->validate = 0; 10483: ctxt->loadsubset = 0; 10484: ctxt->depth = 0; 10485: 10486: xmlParseContent(ctxt); 10487: 10488: if ((RAW == '<') && (NXT(1) == '/')) { 10489: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10490: } else if (RAW != 0) { 10491: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10492: } 10493: 10494: /* 10495: * SAX: end of the document processing. 10496: */ 10497: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10498: ctxt->sax->endDocument(ctxt->userData); 10499: 10500: if (! ctxt->wellFormed) return(-1); 10501: return(0); 10502: } 10503: 10504: #ifdef LIBXML_PUSH_ENABLED 10505: /************************************************************************ 10506: * * 10507: * Progressive parsing interfaces * 10508: * * 10509: ************************************************************************/ 10510: 10511: /** 10512: * xmlParseLookupSequence: 10513: * @ctxt: an XML parser context 10514: * @first: the first char to lookup 10515: * @next: the next char to lookup or zero 10516: * @third: the next char to lookup or zero 10517: * 10518: * Try to find if a sequence (first, next, third) or just (first next) or 10519: * (first) is available in the input stream. 10520: * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10521: * to avoid rescanning sequences of bytes, it DOES change the state of the 10522: * parser, do not use liberally. 10523: * 10524: * Returns the index to the current parsing point if the full sequence 10525: * is available, -1 otherwise. 10526: */ 10527: static int 10528: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10529: xmlChar next, xmlChar third) { 10530: int base, len; 10531: xmlParserInputPtr in; 10532: const xmlChar *buf; 10533: 10534: in = ctxt->input; 10535: if (in == NULL) return(-1); 10536: base = in->cur - in->base; 10537: if (base < 0) return(-1); 10538: if (ctxt->checkIndex > base) 10539: base = ctxt->checkIndex; 10540: if (in->buf == NULL) { 10541: buf = in->base; 10542: len = in->length; 10543: } else { 10544: buf = in->buf->buffer->content; 10545: len = in->buf->buffer->use; 10546: } 10547: /* take into account the sequence length */ 10548: if (third) len -= 2; 10549: else if (next) len --; 10550: for (;base < len;base++) { 10551: if (buf[base] == first) { 10552: if (third != 0) { 10553: if ((buf[base + 1] != next) || 10554: (buf[base + 2] != third)) continue; 10555: } else if (next != 0) { 10556: if (buf[base + 1] != next) continue; 10557: } 10558: ctxt->checkIndex = 0; 10559: #ifdef DEBUG_PUSH 10560: if (next == 0) 10561: xmlGenericError(xmlGenericErrorContext, 10562: "PP: lookup '%c' found at %d\n", 10563: first, base); 10564: else if (third == 0) 10565: xmlGenericError(xmlGenericErrorContext, 10566: "PP: lookup '%c%c' found at %d\n", 10567: first, next, base); 10568: else 10569: xmlGenericError(xmlGenericErrorContext, 10570: "PP: lookup '%c%c%c' found at %d\n", 10571: first, next, third, base); 10572: #endif 10573: return(base - (in->cur - in->base)); 10574: } 10575: } 10576: ctxt->checkIndex = base; 10577: #ifdef DEBUG_PUSH 10578: if (next == 0) 10579: xmlGenericError(xmlGenericErrorContext, 10580: "PP: lookup '%c' failed\n", first); 10581: else if (third == 0) 10582: xmlGenericError(xmlGenericErrorContext, 10583: "PP: lookup '%c%c' failed\n", first, next); 10584: else 10585: xmlGenericError(xmlGenericErrorContext, 10586: "PP: lookup '%c%c%c' failed\n", first, next, third); 10587: #endif 10588: return(-1); 10589: } 10590: 10591: /** 10592: * xmlParseGetLasts: 10593: * @ctxt: an XML parser context 10594: * @lastlt: pointer to store the last '<' from the input 10595: * @lastgt: pointer to store the last '>' from the input 10596: * 10597: * Lookup the last < and > in the current chunk 10598: */ 10599: static void 10600: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10601: const xmlChar **lastgt) { 10602: const xmlChar *tmp; 10603: 10604: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10605: xmlGenericError(xmlGenericErrorContext, 10606: "Internal error: xmlParseGetLasts\n"); 10607: return; 10608: } 10609: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10610: tmp = ctxt->input->end; 10611: tmp--; 10612: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10613: if (tmp < ctxt->input->base) { 10614: *lastlt = NULL; 10615: *lastgt = NULL; 10616: } else { 10617: *lastlt = tmp; 10618: tmp++; 10619: while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10620: if (*tmp == '\'') { 10621: tmp++; 10622: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10623: if (tmp < ctxt->input->end) tmp++; 10624: } else if (*tmp == '"') { 10625: tmp++; 10626: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10627: if (tmp < ctxt->input->end) tmp++; 10628: } else 10629: tmp++; 10630: } 10631: if (tmp < ctxt->input->end) 10632: *lastgt = tmp; 10633: else { 10634: tmp = *lastlt; 10635: tmp--; 10636: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10637: if (tmp >= ctxt->input->base) 10638: *lastgt = tmp; 10639: else 10640: *lastgt = NULL; 10641: } 10642: } 10643: } else { 10644: *lastlt = NULL; 10645: *lastgt = NULL; 10646: } 10647: } 10648: /** 10649: * xmlCheckCdataPush: 10650: * @cur: pointer to the bock of characters 10651: * @len: length of the block in bytes 10652: * 10653: * Check that the block of characters is okay as SCdata content [20] 10654: * 10655: * Returns the number of bytes to pass if okay, a negative index where an 10656: * UTF-8 error occured otherwise 10657: */ 10658: static int 10659: xmlCheckCdataPush(const xmlChar *utf, int len) { 10660: int ix; 10661: unsigned char c; 10662: int codepoint; 10663: 10664: if ((utf == NULL) || (len <= 0)) 10665: return(0); 10666: 10667: for (ix = 0; ix < len;) { /* string is 0-terminated */ 10668: c = utf[ix]; 10669: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10670: if (c >= 0x20) 10671: ix++; 10672: else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10673: ix++; 10674: else 10675: return(-ix); 10676: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10677: if (ix + 2 > len) return(ix); 10678: if ((utf[ix+1] & 0xc0 ) != 0x80) 10679: return(-ix); 10680: codepoint = (utf[ix] & 0x1f) << 6; 10681: codepoint |= utf[ix+1] & 0x3f; 10682: if (!xmlIsCharQ(codepoint)) 10683: return(-ix); 10684: ix += 2; 10685: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10686: if (ix + 3 > len) return(ix); 10687: if (((utf[ix+1] & 0xc0) != 0x80) || 10688: ((utf[ix+2] & 0xc0) != 0x80)) 10689: return(-ix); 10690: codepoint = (utf[ix] & 0xf) << 12; 10691: codepoint |= (utf[ix+1] & 0x3f) << 6; 10692: codepoint |= utf[ix+2] & 0x3f; 10693: if (!xmlIsCharQ(codepoint)) 10694: return(-ix); 10695: ix += 3; 10696: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10697: if (ix + 4 > len) return(ix); 10698: if (((utf[ix+1] & 0xc0) != 0x80) || 10699: ((utf[ix+2] & 0xc0) != 0x80) || 10700: ((utf[ix+3] & 0xc0) != 0x80)) 10701: return(-ix); 10702: codepoint = (utf[ix] & 0x7) << 18; 10703: codepoint |= (utf[ix+1] & 0x3f) << 12; 10704: codepoint |= (utf[ix+2] & 0x3f) << 6; 10705: codepoint |= utf[ix+3] & 0x3f; 10706: if (!xmlIsCharQ(codepoint)) 10707: return(-ix); 10708: ix += 4; 10709: } else /* unknown encoding */ 10710: return(-ix); 10711: } 10712: return(ix); 10713: } 10714: 10715: /** 10716: * xmlParseTryOrFinish: 10717: * @ctxt: an XML parser context 10718: * @terminate: last chunk indicator 10719: * 10720: * Try to progress on parsing 10721: * 10722: * Returns zero if no parsing was possible 10723: */ 10724: static int 10725: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10726: int ret = 0; 10727: int avail, tlen; 10728: xmlChar cur, next; 10729: const xmlChar *lastlt, *lastgt; 10730: 10731: if (ctxt->input == NULL) 10732: return(0); 10733: 10734: #ifdef DEBUG_PUSH 10735: switch (ctxt->instate) { 10736: case XML_PARSER_EOF: 10737: xmlGenericError(xmlGenericErrorContext, 10738: "PP: try EOF\n"); break; 10739: case XML_PARSER_START: 10740: xmlGenericError(xmlGenericErrorContext, 10741: "PP: try START\n"); break; 10742: case XML_PARSER_MISC: 10743: xmlGenericError(xmlGenericErrorContext, 10744: "PP: try MISC\n");break; 10745: case XML_PARSER_COMMENT: 10746: xmlGenericError(xmlGenericErrorContext, 10747: "PP: try COMMENT\n");break; 10748: case XML_PARSER_PROLOG: 10749: xmlGenericError(xmlGenericErrorContext, 10750: "PP: try PROLOG\n");break; 10751: case XML_PARSER_START_TAG: 10752: xmlGenericError(xmlGenericErrorContext, 10753: "PP: try START_TAG\n");break; 10754: case XML_PARSER_CONTENT: 10755: xmlGenericError(xmlGenericErrorContext, 10756: "PP: try CONTENT\n");break; 10757: case XML_PARSER_CDATA_SECTION: 10758: xmlGenericError(xmlGenericErrorContext, 10759: "PP: try CDATA_SECTION\n");break; 10760: case XML_PARSER_END_TAG: 10761: xmlGenericError(xmlGenericErrorContext, 10762: "PP: try END_TAG\n");break; 10763: case XML_PARSER_ENTITY_DECL: 10764: xmlGenericError(xmlGenericErrorContext, 10765: "PP: try ENTITY_DECL\n");break; 10766: case XML_PARSER_ENTITY_VALUE: 10767: xmlGenericError(xmlGenericErrorContext, 10768: "PP: try ENTITY_VALUE\n");break; 10769: case XML_PARSER_ATTRIBUTE_VALUE: 10770: xmlGenericError(xmlGenericErrorContext, 10771: "PP: try ATTRIBUTE_VALUE\n");break; 10772: case XML_PARSER_DTD: 10773: xmlGenericError(xmlGenericErrorContext, 10774: "PP: try DTD\n");break; 10775: case XML_PARSER_EPILOG: 10776: xmlGenericError(xmlGenericErrorContext, 10777: "PP: try EPILOG\n");break; 10778: case XML_PARSER_PI: 10779: xmlGenericError(xmlGenericErrorContext, 10780: "PP: try PI\n");break; 10781: case XML_PARSER_IGNORE: 10782: xmlGenericError(xmlGenericErrorContext, 10783: "PP: try IGNORE\n");break; 10784: } 10785: #endif 10786: 10787: if ((ctxt->input != NULL) && 10788: (ctxt->input->cur - ctxt->input->base > 4096)) { 10789: xmlSHRINK(ctxt); 10790: ctxt->checkIndex = 0; 10791: } 10792: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10793: 10794: while (1) { 10795: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10796: return(0); 10797: 10798: 10799: /* 10800: * Pop-up of finished entities. 10801: */ 10802: while ((RAW == 0) && (ctxt->inputNr > 1)) 10803: xmlPopInput(ctxt); 10804: 10805: if (ctxt->input == NULL) break; 10806: if (ctxt->input->buf == NULL) 10807: avail = ctxt->input->length - 10808: (ctxt->input->cur - ctxt->input->base); 10809: else { 10810: /* 10811: * If we are operating on converted input, try to flush 10812: * remainng chars to avoid them stalling in the non-converted 10813: * buffer. 10814: */ 10815: if ((ctxt->input->buf->raw != NULL) && 10816: (ctxt->input->buf->raw->use > 0)) { 10817: int base = ctxt->input->base - 10818: ctxt->input->buf->buffer->content; 10819: int current = ctxt->input->cur - ctxt->input->base; 10820: 10821: xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10822: ctxt->input->base = ctxt->input->buf->buffer->content + base; 10823: ctxt->input->cur = ctxt->input->base + current; 10824: ctxt->input->end = 10825: &ctxt->input->buf->buffer->content[ 10826: ctxt->input->buf->buffer->use]; 10827: } 10828: avail = ctxt->input->buf->buffer->use - 10829: (ctxt->input->cur - ctxt->input->base); 10830: } 10831: if (avail < 1) 10832: goto done; 10833: switch (ctxt->instate) { 10834: case XML_PARSER_EOF: 10835: /* 10836: * Document parsing is done ! 10837: */ 10838: goto done; 10839: case XML_PARSER_START: 10840: if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10841: xmlChar start[4]; 10842: xmlCharEncoding enc; 10843: 10844: /* 10845: * Very first chars read from the document flow. 10846: */ 10847: if (avail < 4) 10848: goto done; 10849: 10850: /* 10851: * Get the 4 first bytes and decode the charset 10852: * if enc != XML_CHAR_ENCODING_NONE 10853: * plug some encoding conversion routines, 10854: * else xmlSwitchEncoding will set to (default) 10855: * UTF8. 10856: */ 10857: start[0] = RAW; 10858: start[1] = NXT(1); 10859: start[2] = NXT(2); 10860: start[3] = NXT(3); 10861: enc = xmlDetectCharEncoding(start, 4); 10862: xmlSwitchEncoding(ctxt, enc); 10863: break; 10864: } 10865: 10866: if (avail < 2) 10867: goto done; 10868: cur = ctxt->input->cur[0]; 10869: next = ctxt->input->cur[1]; 10870: if (cur == 0) { 10871: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10872: ctxt->sax->setDocumentLocator(ctxt->userData, 10873: &xmlDefaultSAXLocator); 10874: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10875: ctxt->instate = XML_PARSER_EOF; 10876: #ifdef DEBUG_PUSH 10877: xmlGenericError(xmlGenericErrorContext, 10878: "PP: entering EOF\n"); 10879: #endif 10880: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10881: ctxt->sax->endDocument(ctxt->userData); 10882: goto done; 10883: } 10884: if ((cur == '<') && (next == '?')) { 10885: /* PI or XML decl */ 10886: if (avail < 5) return(ret); 10887: if ((!terminate) && 10888: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10889: return(ret); 10890: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10891: ctxt->sax->setDocumentLocator(ctxt->userData, 10892: &xmlDefaultSAXLocator); 10893: if ((ctxt->input->cur[2] == 'x') && 10894: (ctxt->input->cur[3] == 'm') && 10895: (ctxt->input->cur[4] == 'l') && 10896: (IS_BLANK_CH(ctxt->input->cur[5]))) { 10897: ret += 5; 10898: #ifdef DEBUG_PUSH 10899: xmlGenericError(xmlGenericErrorContext, 10900: "PP: Parsing XML Decl\n"); 10901: #endif 10902: xmlParseXMLDecl(ctxt); 10903: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10904: /* 10905: * The XML REC instructs us to stop parsing right 10906: * here 10907: */ 10908: ctxt->instate = XML_PARSER_EOF; 10909: return(0); 10910: } 10911: ctxt->standalone = ctxt->input->standalone; 10912: if ((ctxt->encoding == NULL) && 10913: (ctxt->input->encoding != NULL)) 10914: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10915: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10916: (!ctxt->disableSAX)) 10917: ctxt->sax->startDocument(ctxt->userData); 10918: ctxt->instate = XML_PARSER_MISC; 10919: #ifdef DEBUG_PUSH 10920: xmlGenericError(xmlGenericErrorContext, 10921: "PP: entering MISC\n"); 10922: #endif 10923: } else { 10924: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10925: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10926: (!ctxt->disableSAX)) 10927: ctxt->sax->startDocument(ctxt->userData); 10928: ctxt->instate = XML_PARSER_MISC; 10929: #ifdef DEBUG_PUSH 10930: xmlGenericError(xmlGenericErrorContext, 10931: "PP: entering MISC\n"); 10932: #endif 10933: } 10934: } else { 10935: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10936: ctxt->sax->setDocumentLocator(ctxt->userData, 10937: &xmlDefaultSAXLocator); 10938: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10939: if (ctxt->version == NULL) { 10940: xmlErrMemory(ctxt, NULL); 10941: break; 10942: } 10943: if ((ctxt->sax) && (ctxt->sax->startDocument) && 10944: (!ctxt->disableSAX)) 10945: ctxt->sax->startDocument(ctxt->userData); 10946: ctxt->instate = XML_PARSER_MISC; 10947: #ifdef DEBUG_PUSH 10948: xmlGenericError(xmlGenericErrorContext, 10949: "PP: entering MISC\n"); 10950: #endif 10951: } 10952: break; 10953: case XML_PARSER_START_TAG: { 10954: const xmlChar *name; 10955: const xmlChar *prefix = NULL; 10956: const xmlChar *URI = NULL; 10957: int nsNr = ctxt->nsNr; 10958: 10959: if ((avail < 2) && (ctxt->inputNr == 1)) 10960: goto done; 10961: cur = ctxt->input->cur[0]; 10962: if (cur != '<') { 10963: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10964: ctxt->instate = XML_PARSER_EOF; 10965: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10966: ctxt->sax->endDocument(ctxt->userData); 10967: goto done; 10968: } 10969: if (!terminate) { 10970: if (ctxt->progressive) { 10971: /* > can be found unescaped in attribute values */ 10972: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10973: goto done; 10974: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10975: goto done; 10976: } 10977: } 10978: if (ctxt->spaceNr == 0) 10979: spacePush(ctxt, -1); 10980: else if (*ctxt->space == -2) 10981: spacePush(ctxt, -1); 10982: else 10983: spacePush(ctxt, *ctxt->space); 10984: #ifdef LIBXML_SAX1_ENABLED 10985: if (ctxt->sax2) 10986: #endif /* LIBXML_SAX1_ENABLED */ 10987: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10988: #ifdef LIBXML_SAX1_ENABLED 10989: else 10990: name = xmlParseStartTag(ctxt); 10991: #endif /* LIBXML_SAX1_ENABLED */ 10992: if (ctxt->instate == XML_PARSER_EOF) 10993: goto done; 10994: if (name == NULL) { 10995: spacePop(ctxt); 10996: ctxt->instate = XML_PARSER_EOF; 10997: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10998: ctxt->sax->endDocument(ctxt->userData); 10999: goto done; 11000: } 11001: #ifdef LIBXML_VALID_ENABLED 11002: /* 11003: * [ VC: Root Element Type ] 11004: * The Name in the document type declaration must match 11005: * the element type of the root element. 11006: */ 11007: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11008: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11009: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11010: #endif /* LIBXML_VALID_ENABLED */ 11011: 11012: /* 11013: * Check for an Empty Element. 11014: */ 11015: if ((RAW == '/') && (NXT(1) == '>')) { 11016: SKIP(2); 11017: 11018: if (ctxt->sax2) { 11019: if ((ctxt->sax != NULL) && 11020: (ctxt->sax->endElementNs != NULL) && 11021: (!ctxt->disableSAX)) 11022: ctxt->sax->endElementNs(ctxt->userData, name, 11023: prefix, URI); 11024: if (ctxt->nsNr - nsNr > 0) 11025: nsPop(ctxt, ctxt->nsNr - nsNr); 11026: #ifdef LIBXML_SAX1_ENABLED 11027: } else { 11028: if ((ctxt->sax != NULL) && 11029: (ctxt->sax->endElement != NULL) && 11030: (!ctxt->disableSAX)) 11031: ctxt->sax->endElement(ctxt->userData, name); 11032: #endif /* LIBXML_SAX1_ENABLED */ 11033: } 11034: spacePop(ctxt); 11035: if (ctxt->nameNr == 0) { 11036: ctxt->instate = XML_PARSER_EPILOG; 11037: } else { 11038: ctxt->instate = XML_PARSER_CONTENT; 11039: } 11040: break; 11041: } 11042: if (RAW == '>') { 11043: NEXT; 11044: } else { 11045: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11046: "Couldn't find end of Start Tag %s\n", 11047: name); 11048: nodePop(ctxt); 11049: spacePop(ctxt); 11050: } 11051: if (ctxt->sax2) 11052: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11053: #ifdef LIBXML_SAX1_ENABLED 11054: else 11055: namePush(ctxt, name); 11056: #endif /* LIBXML_SAX1_ENABLED */ 11057: 11058: ctxt->instate = XML_PARSER_CONTENT; 11059: break; 11060: } 11061: case XML_PARSER_CONTENT: { 11062: const xmlChar *test; 11063: unsigned int cons; 11064: if ((avail < 2) && (ctxt->inputNr == 1)) 11065: goto done; 11066: cur = ctxt->input->cur[0]; 11067: next = ctxt->input->cur[1]; 11068: 11069: test = CUR_PTR; 11070: cons = ctxt->input->consumed; 11071: if ((cur == '<') && (next == '/')) { 11072: ctxt->instate = XML_PARSER_END_TAG; 11073: break; 11074: } else if ((cur == '<') && (next == '?')) { 11075: if ((!terminate) && 11076: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11077: goto done; 11078: xmlParsePI(ctxt); 11079: } else if ((cur == '<') && (next != '!')) { 11080: ctxt->instate = XML_PARSER_START_TAG; 11081: break; 11082: } else if ((cur == '<') && (next == '!') && 11083: (ctxt->input->cur[2] == '-') && 11084: (ctxt->input->cur[3] == '-')) { 11085: int term; 11086: 11087: if (avail < 4) 11088: goto done; 11089: ctxt->input->cur += 4; 11090: term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11091: ctxt->input->cur -= 4; 11092: if ((!terminate) && (term < 0)) 11093: goto done; 11094: xmlParseComment(ctxt); 11095: ctxt->instate = XML_PARSER_CONTENT; 11096: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11097: (ctxt->input->cur[2] == '[') && 11098: (ctxt->input->cur[3] == 'C') && 11099: (ctxt->input->cur[4] == 'D') && 11100: (ctxt->input->cur[5] == 'A') && 11101: (ctxt->input->cur[6] == 'T') && 11102: (ctxt->input->cur[7] == 'A') && 11103: (ctxt->input->cur[8] == '[')) { 11104: SKIP(9); 11105: ctxt->instate = XML_PARSER_CDATA_SECTION; 11106: break; 11107: } else if ((cur == '<') && (next == '!') && 11108: (avail < 9)) { 11109: goto done; 11110: } else if (cur == '&') { 11111: if ((!terminate) && 11112: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11113: goto done; 11114: xmlParseReference(ctxt); 11115: } else { 11116: /* TODO Avoid the extra copy, handle directly !!! */ 11117: /* 11118: * Goal of the following test is: 11119: * - minimize calls to the SAX 'character' callback 11120: * when they are mergeable 11121: * - handle an problem for isBlank when we only parse 11122: * a sequence of blank chars and the next one is 11123: * not available to check against '<' presence. 11124: * - tries to homogenize the differences in SAX 11125: * callbacks between the push and pull versions 11126: * of the parser. 11127: */ 11128: if ((ctxt->inputNr == 1) && 11129: (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11130: if (!terminate) { 11131: if (ctxt->progressive) { 11132: if ((lastlt == NULL) || 11133: (ctxt->input->cur > lastlt)) 11134: goto done; 11135: } else if (xmlParseLookupSequence(ctxt, 11136: '<', 0, 0) < 0) { 11137: goto done; 11138: } 11139: } 11140: } 11141: ctxt->checkIndex = 0; 11142: xmlParseCharData(ctxt, 0); 11143: } 11144: /* 11145: * Pop-up of finished entities. 11146: */ 11147: while ((RAW == 0) && (ctxt->inputNr > 1)) 11148: xmlPopInput(ctxt); 11149: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11150: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11151: "detected an error in element content\n"); 11152: ctxt->instate = XML_PARSER_EOF; 11153: break; 11154: } 11155: break; 11156: } 11157: case XML_PARSER_END_TAG: 11158: if (avail < 2) 11159: goto done; 11160: if (!terminate) { 11161: if (ctxt->progressive) { 11162: /* > can be found unescaped in attribute values */ 11163: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11164: goto done; 11165: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11166: goto done; 11167: } 11168: } 11169: if (ctxt->sax2) { 11170: xmlParseEndTag2(ctxt, 11171: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11172: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11173: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11174: nameNsPop(ctxt); 11175: } 11176: #ifdef LIBXML_SAX1_ENABLED 11177: else 11178: xmlParseEndTag1(ctxt, 0); 11179: #endif /* LIBXML_SAX1_ENABLED */ 11180: if (ctxt->instate == XML_PARSER_EOF) { 11181: /* Nothing */ 11182: } else if (ctxt->nameNr == 0) { 11183: ctxt->instate = XML_PARSER_EPILOG; 11184: } else { 11185: ctxt->instate = XML_PARSER_CONTENT; 11186: } 11187: break; 11188: case XML_PARSER_CDATA_SECTION: { 11189: /* 11190: * The Push mode need to have the SAX callback for 11191: * cdataBlock merge back contiguous callbacks. 11192: */ 11193: int base; 11194: 11195: base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11196: if (base < 0) { 11197: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11198: int tmp; 11199: 11200: tmp = xmlCheckCdataPush(ctxt->input->cur, 11201: XML_PARSER_BIG_BUFFER_SIZE); 11202: if (tmp < 0) { 11203: tmp = -tmp; 11204: ctxt->input->cur += tmp; 11205: goto encoding_error; 11206: } 11207: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11208: if (ctxt->sax->cdataBlock != NULL) 11209: ctxt->sax->cdataBlock(ctxt->userData, 11210: ctxt->input->cur, tmp); 11211: else if (ctxt->sax->characters != NULL) 11212: ctxt->sax->characters(ctxt->userData, 11213: ctxt->input->cur, tmp); 11214: } 11215: SKIPL(tmp); 11216: ctxt->checkIndex = 0; 11217: } 11218: goto done; 11219: } else { 11220: int tmp; 11221: 11222: tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11223: if ((tmp < 0) || (tmp != base)) { 11224: tmp = -tmp; 11225: ctxt->input->cur += tmp; 11226: goto encoding_error; 11227: } 11228: if ((ctxt->sax != NULL) && (base == 0) && 11229: (ctxt->sax->cdataBlock != NULL) && 11230: (!ctxt->disableSAX)) { 11231: /* 11232: * Special case to provide identical behaviour 11233: * between pull and push parsers on enpty CDATA 11234: * sections 11235: */ 11236: if ((ctxt->input->cur - ctxt->input->base >= 9) && 11237: (!strncmp((const char *)&ctxt->input->cur[-9], 11238: "<![CDATA[", 9))) 11239: ctxt->sax->cdataBlock(ctxt->userData, 11240: BAD_CAST "", 0); 11241: } else if ((ctxt->sax != NULL) && (base > 0) && 11242: (!ctxt->disableSAX)) { 11243: if (ctxt->sax->cdataBlock != NULL) 11244: ctxt->sax->cdataBlock(ctxt->userData, 11245: ctxt->input->cur, base); 11246: else if (ctxt->sax->characters != NULL) 11247: ctxt->sax->characters(ctxt->userData, 11248: ctxt->input->cur, base); 11249: } 11250: SKIPL(base + 3); 11251: ctxt->checkIndex = 0; 11252: ctxt->instate = XML_PARSER_CONTENT; 11253: #ifdef DEBUG_PUSH 11254: xmlGenericError(xmlGenericErrorContext, 11255: "PP: entering CONTENT\n"); 11256: #endif 11257: } 11258: break; 11259: } 11260: case XML_PARSER_MISC: 11261: SKIP_BLANKS; 11262: if (ctxt->input->buf == NULL) 11263: avail = ctxt->input->length - 11264: (ctxt->input->cur - ctxt->input->base); 11265: else 11266: avail = ctxt->input->buf->buffer->use - 11267: (ctxt->input->cur - ctxt->input->base); 11268: if (avail < 2) 11269: goto done; 11270: cur = ctxt->input->cur[0]; 11271: next = ctxt->input->cur[1]; 11272: if ((cur == '<') && (next == '?')) { 11273: if ((!terminate) && 11274: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11275: goto done; 11276: #ifdef DEBUG_PUSH 11277: xmlGenericError(xmlGenericErrorContext, 11278: "PP: Parsing PI\n"); 11279: #endif 11280: xmlParsePI(ctxt); 11281: ctxt->checkIndex = 0; 11282: } else if ((cur == '<') && (next == '!') && 11283: (ctxt->input->cur[2] == '-') && 11284: (ctxt->input->cur[3] == '-')) { 11285: if ((!terminate) && 11286: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11287: goto done; 11288: #ifdef DEBUG_PUSH 11289: xmlGenericError(xmlGenericErrorContext, 11290: "PP: Parsing Comment\n"); 11291: #endif 11292: xmlParseComment(ctxt); 11293: ctxt->instate = XML_PARSER_MISC; 11294: ctxt->checkIndex = 0; 11295: } else if ((cur == '<') && (next == '!') && 11296: (ctxt->input->cur[2] == 'D') && 11297: (ctxt->input->cur[3] == 'O') && 11298: (ctxt->input->cur[4] == 'C') && 11299: (ctxt->input->cur[5] == 'T') && 11300: (ctxt->input->cur[6] == 'Y') && 11301: (ctxt->input->cur[7] == 'P') && 11302: (ctxt->input->cur[8] == 'E')) { 11303: if ((!terminate) && 11304: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11305: goto done; 11306: #ifdef DEBUG_PUSH 11307: xmlGenericError(xmlGenericErrorContext, 11308: "PP: Parsing internal subset\n"); 11309: #endif 11310: ctxt->inSubset = 1; 11311: xmlParseDocTypeDecl(ctxt); 11312: if (RAW == '[') { 11313: ctxt->instate = XML_PARSER_DTD; 11314: #ifdef DEBUG_PUSH 11315: xmlGenericError(xmlGenericErrorContext, 11316: "PP: entering DTD\n"); 11317: #endif 11318: } else { 11319: /* 11320: * Create and update the external subset. 11321: */ 11322: ctxt->inSubset = 2; 11323: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11324: (ctxt->sax->externalSubset != NULL)) 11325: ctxt->sax->externalSubset(ctxt->userData, 11326: ctxt->intSubName, ctxt->extSubSystem, 11327: ctxt->extSubURI); 11328: ctxt->inSubset = 0; 11329: xmlCleanSpecialAttr(ctxt); 11330: ctxt->instate = XML_PARSER_PROLOG; 11331: #ifdef DEBUG_PUSH 11332: xmlGenericError(xmlGenericErrorContext, 11333: "PP: entering PROLOG\n"); 11334: #endif 11335: } 11336: } else if ((cur == '<') && (next == '!') && 11337: (avail < 9)) { 11338: goto done; 11339: } else { 11340: ctxt->instate = XML_PARSER_START_TAG; 11341: ctxt->progressive = 1; 11342: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11343: #ifdef DEBUG_PUSH 11344: xmlGenericError(xmlGenericErrorContext, 11345: "PP: entering START_TAG\n"); 11346: #endif 11347: } 11348: break; 11349: case XML_PARSER_PROLOG: 11350: SKIP_BLANKS; 11351: if (ctxt->input->buf == NULL) 11352: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11353: else 11354: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11355: if (avail < 2) 11356: goto done; 11357: cur = ctxt->input->cur[0]; 11358: next = ctxt->input->cur[1]; 11359: if ((cur == '<') && (next == '?')) { 11360: if ((!terminate) && 11361: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11362: goto done; 11363: #ifdef DEBUG_PUSH 11364: xmlGenericError(xmlGenericErrorContext, 11365: "PP: Parsing PI\n"); 11366: #endif 11367: xmlParsePI(ctxt); 11368: } else if ((cur == '<') && (next == '!') && 11369: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11370: if ((!terminate) && 11371: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11372: goto done; 11373: #ifdef DEBUG_PUSH 11374: xmlGenericError(xmlGenericErrorContext, 11375: "PP: Parsing Comment\n"); 11376: #endif 11377: xmlParseComment(ctxt); 11378: ctxt->instate = XML_PARSER_PROLOG; 11379: } else if ((cur == '<') && (next == '!') && 11380: (avail < 4)) { 11381: goto done; 11382: } else { 11383: ctxt->instate = XML_PARSER_START_TAG; 11384: if (ctxt->progressive == 0) 11385: ctxt->progressive = 1; 11386: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11387: #ifdef DEBUG_PUSH 11388: xmlGenericError(xmlGenericErrorContext, 11389: "PP: entering START_TAG\n"); 11390: #endif 11391: } 11392: break; 11393: case XML_PARSER_EPILOG: 11394: SKIP_BLANKS; 11395: if (ctxt->input->buf == NULL) 11396: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11397: else 11398: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11399: if (avail < 2) 11400: goto done; 11401: cur = ctxt->input->cur[0]; 11402: next = ctxt->input->cur[1]; 11403: if ((cur == '<') && (next == '?')) { 11404: if ((!terminate) && 11405: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11406: goto done; 11407: #ifdef DEBUG_PUSH 11408: xmlGenericError(xmlGenericErrorContext, 11409: "PP: Parsing PI\n"); 11410: #endif 11411: xmlParsePI(ctxt); 11412: ctxt->instate = XML_PARSER_EPILOG; 11413: } else if ((cur == '<') && (next == '!') && 11414: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11415: if ((!terminate) && 11416: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11417: goto done; 11418: #ifdef DEBUG_PUSH 11419: xmlGenericError(xmlGenericErrorContext, 11420: "PP: Parsing Comment\n"); 11421: #endif 11422: xmlParseComment(ctxt); 11423: ctxt->instate = XML_PARSER_EPILOG; 11424: } else if ((cur == '<') && (next == '!') && 11425: (avail < 4)) { 11426: goto done; 11427: } else { 11428: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11429: ctxt->instate = XML_PARSER_EOF; 11430: #ifdef DEBUG_PUSH 11431: xmlGenericError(xmlGenericErrorContext, 11432: "PP: entering EOF\n"); 11433: #endif 11434: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11435: ctxt->sax->endDocument(ctxt->userData); 11436: goto done; 11437: } 11438: break; 11439: case XML_PARSER_DTD: { 11440: /* 11441: * Sorry but progressive parsing of the internal subset 11442: * is not expected to be supported. We first check that 11443: * the full content of the internal subset is available and 11444: * the parsing is launched only at that point. 11445: * Internal subset ends up with "']' S? '>'" in an unescaped 11446: * section and not in a ']]>' sequence which are conditional 11447: * sections (whoever argued to keep that crap in XML deserve 11448: * a place in hell !). 11449: */ 11450: int base, i; 11451: xmlChar *buf; 11452: xmlChar quote = 0; 11453: 11454: base = ctxt->input->cur - ctxt->input->base; 11455: if (base < 0) return(0); 11456: if (ctxt->checkIndex > base) 11457: base = ctxt->checkIndex; 11458: buf = ctxt->input->buf->buffer->content; 11459: for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11460: base++) { 11461: if (quote != 0) { 11462: if (buf[base] == quote) 11463: quote = 0; 11464: continue; 11465: } 11466: if ((quote == 0) && (buf[base] == '<')) { 11467: int found = 0; 11468: /* special handling of comments */ 11469: if (((unsigned int) base + 4 < 11470: ctxt->input->buf->buffer->use) && 11471: (buf[base + 1] == '!') && 11472: (buf[base + 2] == '-') && 11473: (buf[base + 3] == '-')) { 11474: for (;(unsigned int) base + 3 < 11475: ctxt->input->buf->buffer->use; base++) { 11476: if ((buf[base] == '-') && 11477: (buf[base + 1] == '-') && 11478: (buf[base + 2] == '>')) { 11479: found = 1; 11480: base += 2; 11481: break; 11482: } 11483: } 11484: if (!found) { 11485: #if 0 11486: fprintf(stderr, "unfinished comment\n"); 11487: #endif 11488: break; /* for */ 11489: } 11490: continue; 11491: } 11492: } 11493: if (buf[base] == '"') { 11494: quote = '"'; 11495: continue; 11496: } 11497: if (buf[base] == '\'') { 11498: quote = '\''; 11499: continue; 11500: } 11501: if (buf[base] == ']') { 11502: #if 0 11503: fprintf(stderr, "%c%c%c%c: ", buf[base], 11504: buf[base + 1], buf[base + 2], buf[base + 3]); 11505: #endif 11506: if ((unsigned int) base +1 >= 11507: ctxt->input->buf->buffer->use) 11508: break; 11509: if (buf[base + 1] == ']') { 11510: /* conditional crap, skip both ']' ! */ 11511: base++; 11512: continue; 11513: } 11514: for (i = 1; 11515: (unsigned int) base + i < ctxt->input->buf->buffer->use; 11516: i++) { 11517: if (buf[base + i] == '>') { 11518: #if 0 11519: fprintf(stderr, "found\n"); 11520: #endif 11521: goto found_end_int_subset; 11522: } 11523: if (!IS_BLANK_CH(buf[base + i])) { 11524: #if 0 11525: fprintf(stderr, "not found\n"); 11526: #endif 11527: goto not_end_of_int_subset; 11528: } 11529: } 11530: #if 0 11531: fprintf(stderr, "end of stream\n"); 11532: #endif 11533: break; 11534: 11535: } 11536: not_end_of_int_subset: 11537: continue; /* for */ 11538: } 11539: /* 11540: * We didn't found the end of the Internal subset 11541: */ 11542: #ifdef DEBUG_PUSH 11543: if (next == 0) 11544: xmlGenericError(xmlGenericErrorContext, 11545: "PP: lookup of int subset end filed\n"); 11546: #endif 11547: goto done; 11548: 11549: found_end_int_subset: 11550: xmlParseInternalSubset(ctxt); 11551: ctxt->inSubset = 2; 11552: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11553: (ctxt->sax->externalSubset != NULL)) 11554: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11555: ctxt->extSubSystem, ctxt->extSubURI); 11556: ctxt->inSubset = 0; 11557: xmlCleanSpecialAttr(ctxt); 11558: ctxt->instate = XML_PARSER_PROLOG; 11559: ctxt->checkIndex = 0; 11560: #ifdef DEBUG_PUSH 11561: xmlGenericError(xmlGenericErrorContext, 11562: "PP: entering PROLOG\n"); 11563: #endif 11564: break; 11565: } 11566: case XML_PARSER_COMMENT: 11567: xmlGenericError(xmlGenericErrorContext, 11568: "PP: internal error, state == COMMENT\n"); 11569: ctxt->instate = XML_PARSER_CONTENT; 11570: #ifdef DEBUG_PUSH 11571: xmlGenericError(xmlGenericErrorContext, 11572: "PP: entering CONTENT\n"); 11573: #endif 11574: break; 11575: case XML_PARSER_IGNORE: 11576: xmlGenericError(xmlGenericErrorContext, 11577: "PP: internal error, state == IGNORE"); 11578: ctxt->instate = XML_PARSER_DTD; 11579: #ifdef DEBUG_PUSH 11580: xmlGenericError(xmlGenericErrorContext, 11581: "PP: entering DTD\n"); 11582: #endif 11583: break; 11584: case XML_PARSER_PI: 11585: xmlGenericError(xmlGenericErrorContext, 11586: "PP: internal error, state == PI\n"); 11587: ctxt->instate = XML_PARSER_CONTENT; 11588: #ifdef DEBUG_PUSH 11589: xmlGenericError(xmlGenericErrorContext, 11590: "PP: entering CONTENT\n"); 11591: #endif 11592: break; 11593: case XML_PARSER_ENTITY_DECL: 11594: xmlGenericError(xmlGenericErrorContext, 11595: "PP: internal error, state == ENTITY_DECL\n"); 11596: ctxt->instate = XML_PARSER_DTD; 11597: #ifdef DEBUG_PUSH 11598: xmlGenericError(xmlGenericErrorContext, 11599: "PP: entering DTD\n"); 11600: #endif 11601: break; 11602: case XML_PARSER_ENTITY_VALUE: 11603: xmlGenericError(xmlGenericErrorContext, 11604: "PP: internal error, state == ENTITY_VALUE\n"); 11605: ctxt->instate = XML_PARSER_CONTENT; 11606: #ifdef DEBUG_PUSH 11607: xmlGenericError(xmlGenericErrorContext, 11608: "PP: entering DTD\n"); 11609: #endif 11610: break; 11611: case XML_PARSER_ATTRIBUTE_VALUE: 11612: xmlGenericError(xmlGenericErrorContext, 11613: "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11614: ctxt->instate = XML_PARSER_START_TAG; 11615: #ifdef DEBUG_PUSH 11616: xmlGenericError(xmlGenericErrorContext, 11617: "PP: entering START_TAG\n"); 11618: #endif 11619: break; 11620: case XML_PARSER_SYSTEM_LITERAL: 11621: xmlGenericError(xmlGenericErrorContext, 11622: "PP: internal error, state == SYSTEM_LITERAL\n"); 11623: ctxt->instate = XML_PARSER_START_TAG; 11624: #ifdef DEBUG_PUSH 11625: xmlGenericError(xmlGenericErrorContext, 11626: "PP: entering START_TAG\n"); 11627: #endif 11628: break; 11629: case XML_PARSER_PUBLIC_LITERAL: 11630: xmlGenericError(xmlGenericErrorContext, 11631: "PP: internal error, state == PUBLIC_LITERAL\n"); 11632: ctxt->instate = XML_PARSER_START_TAG; 11633: #ifdef DEBUG_PUSH 11634: xmlGenericError(xmlGenericErrorContext, 11635: "PP: entering START_TAG\n"); 11636: #endif 11637: break; 11638: } 11639: } 11640: done: 11641: #ifdef DEBUG_PUSH 11642: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11643: #endif 11644: return(ret); 11645: encoding_error: 11646: { 11647: char buffer[150]; 11648: 11649: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11650: ctxt->input->cur[0], ctxt->input->cur[1], 11651: ctxt->input->cur[2], ctxt->input->cur[3]); 11652: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11653: "Input is not proper UTF-8, indicate encoding !\n%s", 11654: BAD_CAST buffer, NULL); 11655: } 11656: return(0); 11657: } 11658: 11659: /** 11660: * xmlParseChunk: 11661: * @ctxt: an XML parser context 11662: * @chunk: an char array 11663: * @size: the size in byte of the chunk 11664: * @terminate: last chunk indicator 11665: * 11666: * Parse a Chunk of memory 11667: * 11668: * Returns zero if no error, the xmlParserErrors otherwise. 11669: */ 11670: int 11671: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11672: int terminate) { 11673: int end_in_lf = 0; 11674: int remain = 0; 11675: 11676: if (ctxt == NULL) 11677: return(XML_ERR_INTERNAL_ERROR); 11678: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11679: return(ctxt->errNo); 11680: if (ctxt->instate == XML_PARSER_START) 11681: xmlDetectSAX2(ctxt); 11682: if ((size > 0) && (chunk != NULL) && (!terminate) && 11683: (chunk[size - 1] == '\r')) { 11684: end_in_lf = 1; 11685: size--; 11686: } 11687: 11688: xmldecl_done: 11689: 11690: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11691: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11692: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11693: int cur = ctxt->input->cur - ctxt->input->base; 11694: int res; 11695: 11696: /* 11697: * Specific handling if we autodetected an encoding, we should not 11698: * push more than the first line ... which depend on the encoding 11699: * And only push the rest once the final encoding was detected 11700: */ 11701: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 11702: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 11703: unsigned int len = 45; 11704: 11705: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11706: BAD_CAST "UTF-16")) || 11707: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11708: BAD_CAST "UTF16"))) 11709: len = 90; 11710: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11711: BAD_CAST "UCS-4")) || 11712: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11713: BAD_CAST "UCS4"))) 11714: len = 180; 11715: 11716: if (ctxt->input->buf->rawconsumed < len) 11717: len -= ctxt->input->buf->rawconsumed; 11718: 11719: /* 11720: * Change size for reading the initial declaration only 11721: * if size is greater than len. Otherwise, memmove in xmlBufferAdd 11722: * will blindly copy extra bytes from memory. 11723: */ 11724: if ((unsigned int) size > len) { 11725: remain = size - len; 11726: size = len; 11727: } else { 11728: remain = 0; 11729: } 11730: } 11731: res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11732: if (res < 0) { 11733: ctxt->errNo = XML_PARSER_EOF; 11734: ctxt->disableSAX = 1; 11735: return (XML_PARSER_EOF); 11736: } 11737: ctxt->input->base = ctxt->input->buf->buffer->content + base; 11738: ctxt->input->cur = ctxt->input->base + cur; 11739: ctxt->input->end = 11740: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11741: #ifdef DEBUG_PUSH 11742: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11743: #endif 11744: 11745: } else if (ctxt->instate != XML_PARSER_EOF) { 11746: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11747: xmlParserInputBufferPtr in = ctxt->input->buf; 11748: if ((in->encoder != NULL) && (in->buffer != NULL) && 11749: (in->raw != NULL)) { 11750: int nbchars; 11751: 11752: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11753: if (nbchars < 0) { 11754: /* TODO 2.6.0 */ 11755: xmlGenericError(xmlGenericErrorContext, 11756: "xmlParseChunk: encoder error\n"); 11757: return(XML_ERR_INVALID_ENCODING); 11758: } 11759: } 11760: } 11761: } 11762: if (remain != 0) 11763: xmlParseTryOrFinish(ctxt, 0); 11764: else 11765: xmlParseTryOrFinish(ctxt, terminate); 11766: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11767: return(ctxt->errNo); 11768: 11769: if (remain != 0) { 11770: chunk += size; 11771: size = remain; 11772: remain = 0; 11773: goto xmldecl_done; 11774: } 11775: if ((end_in_lf == 1) && (ctxt->input != NULL) && 11776: (ctxt->input->buf != NULL)) { 11777: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11778: } 11779: if (terminate) { 11780: /* 11781: * Check for termination 11782: */ 11783: int avail = 0; 11784: 11785: if (ctxt->input != NULL) { 11786: if (ctxt->input->buf == NULL) 11787: avail = ctxt->input->length - 11788: (ctxt->input->cur - ctxt->input->base); 11789: else 11790: avail = ctxt->input->buf->buffer->use - 11791: (ctxt->input->cur - ctxt->input->base); 11792: } 11793: 11794: if ((ctxt->instate != XML_PARSER_EOF) && 11795: (ctxt->instate != XML_PARSER_EPILOG)) { 11796: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11797: } 11798: if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11799: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11800: } 11801: if (ctxt->instate != XML_PARSER_EOF) { 11802: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11803: ctxt->sax->endDocument(ctxt->userData); 11804: } 11805: ctxt->instate = XML_PARSER_EOF; 11806: } 11807: return((xmlParserErrors) ctxt->errNo); 11808: } 11809: 11810: /************************************************************************ 11811: * * 11812: * I/O front end functions to the parser * 11813: * * 11814: ************************************************************************/ 11815: 11816: /** 11817: * xmlCreatePushParserCtxt: 11818: * @sax: a SAX handler 11819: * @user_data: The user data returned on SAX callbacks 11820: * @chunk: a pointer to an array of chars 11821: * @size: number of chars in the array 11822: * @filename: an optional file name or URI 11823: * 11824: * Create a parser context for using the XML parser in push mode. 11825: * If @buffer and @size are non-NULL, the data is used to detect 11826: * the encoding. The remaining characters will be parsed so they 11827: * don't need to be fed in again through xmlParseChunk. 11828: * To allow content encoding detection, @size should be >= 4 11829: * The value of @filename is used for fetching external entities 11830: * and error/warning reports. 11831: * 11832: * Returns the new parser context or NULL 11833: */ 11834: 11835: xmlParserCtxtPtr 11836: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11837: const char *chunk, int size, const char *filename) { 11838: xmlParserCtxtPtr ctxt; 11839: xmlParserInputPtr inputStream; 11840: xmlParserInputBufferPtr buf; 11841: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11842: 11843: /* 11844: * plug some encoding conversion routines 11845: */ 11846: if ((chunk != NULL) && (size >= 4)) 11847: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11848: 11849: buf = xmlAllocParserInputBuffer(enc); 11850: if (buf == NULL) return(NULL); 11851: 11852: ctxt = xmlNewParserCtxt(); 11853: if (ctxt == NULL) { 11854: xmlErrMemory(NULL, "creating parser: out of memory\n"); 11855: xmlFreeParserInputBuffer(buf); 11856: return(NULL); 11857: } 11858: ctxt->dictNames = 1; 11859: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11860: if (ctxt->pushTab == NULL) { 11861: xmlErrMemory(ctxt, NULL); 11862: xmlFreeParserInputBuffer(buf); 11863: xmlFreeParserCtxt(ctxt); 11864: return(NULL); 11865: } 11866: if (sax != NULL) { 11867: #ifdef LIBXML_SAX1_ENABLED 11868: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11869: #endif /* LIBXML_SAX1_ENABLED */ 11870: xmlFree(ctxt->sax); 11871: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11872: if (ctxt->sax == NULL) { 11873: xmlErrMemory(ctxt, NULL); 11874: xmlFreeParserInputBuffer(buf); 11875: xmlFreeParserCtxt(ctxt); 11876: return(NULL); 11877: } 11878: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11879: if (sax->initialized == XML_SAX2_MAGIC) 11880: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11881: else 11882: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11883: if (user_data != NULL) 11884: ctxt->userData = user_data; 11885: } 11886: if (filename == NULL) { 11887: ctxt->directory = NULL; 11888: } else { 11889: ctxt->directory = xmlParserGetDirectory(filename); 11890: } 11891: 11892: inputStream = xmlNewInputStream(ctxt); 11893: if (inputStream == NULL) { 11894: xmlFreeParserCtxt(ctxt); 11895: xmlFreeParserInputBuffer(buf); 11896: return(NULL); 11897: } 11898: 11899: if (filename == NULL) 11900: inputStream->filename = NULL; 11901: else { 11902: inputStream->filename = (char *) 11903: xmlCanonicPath((const xmlChar *) filename); 11904: if (inputStream->filename == NULL) { 11905: xmlFreeParserCtxt(ctxt); 11906: xmlFreeParserInputBuffer(buf); 11907: return(NULL); 11908: } 11909: } 11910: inputStream->buf = buf; 11911: inputStream->base = inputStream->buf->buffer->content; 11912: inputStream->cur = inputStream->buf->buffer->content; 11913: inputStream->end = 11914: &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11915: 11916: inputPush(ctxt, inputStream); 11917: 11918: /* 11919: * If the caller didn't provide an initial 'chunk' for determining 11920: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11921: * that it can be automatically determined later 11922: */ 11923: if ((size == 0) || (chunk == NULL)) { 11924: ctxt->charset = XML_CHAR_ENCODING_NONE; 11925: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11926: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11927: int cur = ctxt->input->cur - ctxt->input->base; 11928: 11929: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11930: 11931: ctxt->input->base = ctxt->input->buf->buffer->content + base; 11932: ctxt->input->cur = ctxt->input->base + cur; 11933: ctxt->input->end = 11934: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11935: #ifdef DEBUG_PUSH 11936: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11937: #endif 11938: } 11939: 11940: if (enc != XML_CHAR_ENCODING_NONE) { 11941: xmlSwitchEncoding(ctxt, enc); 11942: } 11943: 11944: return(ctxt); 11945: } 11946: #endif /* LIBXML_PUSH_ENABLED */ 11947: 11948: /** 11949: * xmlStopParser: 11950: * @ctxt: an XML parser context 11951: * 11952: * Blocks further parser processing 11953: */ 11954: void 11955: xmlStopParser(xmlParserCtxtPtr ctxt) { 11956: if (ctxt == NULL) 11957: return; 11958: ctxt->instate = XML_PARSER_EOF; 11959: ctxt->disableSAX = 1; 11960: if (ctxt->input != NULL) { 11961: ctxt->input->cur = BAD_CAST""; 11962: ctxt->input->base = ctxt->input->cur; 11963: } 11964: } 11965: 11966: /** 11967: * xmlCreateIOParserCtxt: 11968: * @sax: a SAX handler 11969: * @user_data: The user data returned on SAX callbacks 11970: * @ioread: an I/O read function 11971: * @ioclose: an I/O close function 11972: * @ioctx: an I/O handler 11973: * @enc: the charset encoding if known 11974: * 11975: * Create a parser context for using the XML parser with an existing 11976: * I/O stream 11977: * 11978: * Returns the new parser context or NULL 11979: */ 11980: xmlParserCtxtPtr 11981: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11982: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11983: void *ioctx, xmlCharEncoding enc) { 11984: xmlParserCtxtPtr ctxt; 11985: xmlParserInputPtr inputStream; 11986: xmlParserInputBufferPtr buf; 11987: 11988: if (ioread == NULL) return(NULL); 11989: 11990: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11991: if (buf == NULL) { 11992: if (ioclose != NULL) 11993: ioclose(ioctx); 11994: return (NULL); 11995: } 11996: 11997: ctxt = xmlNewParserCtxt(); 11998: if (ctxt == NULL) { 11999: xmlFreeParserInputBuffer(buf); 12000: return(NULL); 12001: } 12002: if (sax != NULL) { 12003: #ifdef LIBXML_SAX1_ENABLED 12004: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12005: #endif /* LIBXML_SAX1_ENABLED */ 12006: xmlFree(ctxt->sax); 12007: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12008: if (ctxt->sax == NULL) { 12009: xmlErrMemory(ctxt, NULL); 12010: xmlFreeParserCtxt(ctxt); 12011: return(NULL); 12012: } 12013: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12014: if (sax->initialized == XML_SAX2_MAGIC) 12015: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12016: else 12017: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12018: if (user_data != NULL) 12019: ctxt->userData = user_data; 12020: } 12021: 12022: inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12023: if (inputStream == NULL) { 12024: xmlFreeParserCtxt(ctxt); 12025: return(NULL); 12026: } 12027: inputPush(ctxt, inputStream); 12028: 12029: return(ctxt); 12030: } 12031: 12032: #ifdef LIBXML_VALID_ENABLED 12033: /************************************************************************ 12034: * * 12035: * Front ends when parsing a DTD * 12036: * * 12037: ************************************************************************/ 12038: 12039: /** 12040: * xmlIOParseDTD: 12041: * @sax: the SAX handler block or NULL 12042: * @input: an Input Buffer 12043: * @enc: the charset encoding if known 12044: * 12045: * Load and parse a DTD 12046: * 12047: * Returns the resulting xmlDtdPtr or NULL in case of error. 12048: * @input will be freed by the function in any case. 12049: */ 12050: 12051: xmlDtdPtr 12052: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12053: xmlCharEncoding enc) { 12054: xmlDtdPtr ret = NULL; 12055: xmlParserCtxtPtr ctxt; 12056: xmlParserInputPtr pinput = NULL; 12057: xmlChar start[4]; 12058: 12059: if (input == NULL) 12060: return(NULL); 12061: 12062: ctxt = xmlNewParserCtxt(); 12063: if (ctxt == NULL) { 12064: xmlFreeParserInputBuffer(input); 12065: return(NULL); 12066: } 12067: 12068: /* 12069: * Set-up the SAX context 12070: */ 12071: if (sax != NULL) { 12072: if (ctxt->sax != NULL) 12073: xmlFree(ctxt->sax); 12074: ctxt->sax = sax; 12075: ctxt->userData = ctxt; 12076: } 12077: xmlDetectSAX2(ctxt); 12078: 12079: /* 12080: * generate a parser input from the I/O handler 12081: */ 12082: 12083: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12084: if (pinput == NULL) { 12085: if (sax != NULL) ctxt->sax = NULL; 12086: xmlFreeParserInputBuffer(input); 12087: xmlFreeParserCtxt(ctxt); 12088: return(NULL); 12089: } 12090: 12091: /* 12092: * plug some encoding conversion routines here. 12093: */ 12094: if (xmlPushInput(ctxt, pinput) < 0) { 12095: if (sax != NULL) ctxt->sax = NULL; 12096: xmlFreeParserCtxt(ctxt); 12097: return(NULL); 12098: } 12099: if (enc != XML_CHAR_ENCODING_NONE) { 12100: xmlSwitchEncoding(ctxt, enc); 12101: } 12102: 12103: pinput->filename = NULL; 12104: pinput->line = 1; 12105: pinput->col = 1; 12106: pinput->base = ctxt->input->cur; 12107: pinput->cur = ctxt->input->cur; 12108: pinput->free = NULL; 12109: 12110: /* 12111: * let's parse that entity knowing it's an external subset. 12112: */ 12113: ctxt->inSubset = 2; 12114: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12115: if (ctxt->myDoc == NULL) { 12116: xmlErrMemory(ctxt, "New Doc failed"); 12117: return(NULL); 12118: } 12119: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12120: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12121: BAD_CAST "none", BAD_CAST "none"); 12122: 12123: if ((enc == XML_CHAR_ENCODING_NONE) && 12124: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12125: /* 12126: * Get the 4 first bytes and decode the charset 12127: * if enc != XML_CHAR_ENCODING_NONE 12128: * plug some encoding conversion routines. 12129: */ 12130: start[0] = RAW; 12131: start[1] = NXT(1); 12132: start[2] = NXT(2); 12133: start[3] = NXT(3); 12134: enc = xmlDetectCharEncoding(start, 4); 12135: if (enc != XML_CHAR_ENCODING_NONE) { 12136: xmlSwitchEncoding(ctxt, enc); 12137: } 12138: } 12139: 12140: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12141: 12142: if (ctxt->myDoc != NULL) { 12143: if (ctxt->wellFormed) { 12144: ret = ctxt->myDoc->extSubset; 12145: ctxt->myDoc->extSubset = NULL; 12146: if (ret != NULL) { 12147: xmlNodePtr tmp; 12148: 12149: ret->doc = NULL; 12150: tmp = ret->children; 12151: while (tmp != NULL) { 12152: tmp->doc = NULL; 12153: tmp = tmp->next; 12154: } 12155: } 12156: } else { 12157: ret = NULL; 12158: } 12159: xmlFreeDoc(ctxt->myDoc); 12160: ctxt->myDoc = NULL; 12161: } 12162: if (sax != NULL) ctxt->sax = NULL; 12163: xmlFreeParserCtxt(ctxt); 12164: 12165: return(ret); 12166: } 12167: 12168: /** 12169: * xmlSAXParseDTD: 12170: * @sax: the SAX handler block 12171: * @ExternalID: a NAME* containing the External ID of the DTD 12172: * @SystemID: a NAME* containing the URL to the DTD 12173: * 12174: * Load and parse an external subset. 12175: * 12176: * Returns the resulting xmlDtdPtr or NULL in case of error. 12177: */ 12178: 12179: xmlDtdPtr 12180: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12181: const xmlChar *SystemID) { 12182: xmlDtdPtr ret = NULL; 12183: xmlParserCtxtPtr ctxt; 12184: xmlParserInputPtr input = NULL; 12185: xmlCharEncoding enc; 12186: xmlChar* systemIdCanonic; 12187: 12188: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12189: 12190: ctxt = xmlNewParserCtxt(); 12191: if (ctxt == NULL) { 12192: return(NULL); 12193: } 12194: 12195: /* 12196: * Set-up the SAX context 12197: */ 12198: if (sax != NULL) { 12199: if (ctxt->sax != NULL) 12200: xmlFree(ctxt->sax); 12201: ctxt->sax = sax; 12202: ctxt->userData = ctxt; 12203: } 12204: 12205: /* 12206: * Canonicalise the system ID 12207: */ 12208: systemIdCanonic = xmlCanonicPath(SystemID); 12209: if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12210: xmlFreeParserCtxt(ctxt); 12211: return(NULL); 12212: } 12213: 12214: /* 12215: * Ask the Entity resolver to load the damn thing 12216: */ 12217: 12218: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12219: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12220: systemIdCanonic); 12221: if (input == NULL) { 12222: if (sax != NULL) ctxt->sax = NULL; 12223: xmlFreeParserCtxt(ctxt); 12224: if (systemIdCanonic != NULL) 12225: xmlFree(systemIdCanonic); 12226: return(NULL); 12227: } 12228: 12229: /* 12230: * plug some encoding conversion routines here. 12231: */ 12232: if (xmlPushInput(ctxt, input) < 0) { 12233: if (sax != NULL) ctxt->sax = NULL; 12234: xmlFreeParserCtxt(ctxt); 12235: if (systemIdCanonic != NULL) 12236: xmlFree(systemIdCanonic); 12237: return(NULL); 12238: } 12239: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12240: enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12241: xmlSwitchEncoding(ctxt, enc); 12242: } 12243: 12244: if (input->filename == NULL) 12245: input->filename = (char *) systemIdCanonic; 12246: else 12247: xmlFree(systemIdCanonic); 12248: input->line = 1; 12249: input->col = 1; 12250: input->base = ctxt->input->cur; 12251: input->cur = ctxt->input->cur; 12252: input->free = NULL; 12253: 12254: /* 12255: * let's parse that entity knowing it's an external subset. 12256: */ 12257: ctxt->inSubset = 2; 12258: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12259: if (ctxt->myDoc == NULL) { 12260: xmlErrMemory(ctxt, "New Doc failed"); 12261: if (sax != NULL) ctxt->sax = NULL; 12262: xmlFreeParserCtxt(ctxt); 12263: return(NULL); 12264: } 12265: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12266: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12267: ExternalID, SystemID); 12268: xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12269: 12270: if (ctxt->myDoc != NULL) { 12271: if (ctxt->wellFormed) { 12272: ret = ctxt->myDoc->extSubset; 12273: ctxt->myDoc->extSubset = NULL; 12274: if (ret != NULL) { 12275: xmlNodePtr tmp; 12276: 12277: ret->doc = NULL; 12278: tmp = ret->children; 12279: while (tmp != NULL) { 12280: tmp->doc = NULL; 12281: tmp = tmp->next; 12282: } 12283: } 12284: } else { 12285: ret = NULL; 12286: } 12287: xmlFreeDoc(ctxt->myDoc); 12288: ctxt->myDoc = NULL; 12289: } 12290: if (sax != NULL) ctxt->sax = NULL; 12291: xmlFreeParserCtxt(ctxt); 12292: 12293: return(ret); 12294: } 12295: 12296: 12297: /** 12298: * xmlParseDTD: 12299: * @ExternalID: a NAME* containing the External ID of the DTD 12300: * @SystemID: a NAME* containing the URL to the DTD 12301: * 12302: * Load and parse an external subset. 12303: * 12304: * Returns the resulting xmlDtdPtr or NULL in case of error. 12305: */ 12306: 12307: xmlDtdPtr 12308: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12309: return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12310: } 12311: #endif /* LIBXML_VALID_ENABLED */ 12312: 12313: /************************************************************************ 12314: * * 12315: * Front ends when parsing an Entity * 12316: * * 12317: ************************************************************************/ 12318: 12319: /** 12320: * xmlParseCtxtExternalEntity: 12321: * @ctx: the existing parsing context 12322: * @URL: the URL for the entity to load 12323: * @ID: the System ID for the entity to load 12324: * @lst: the return value for the set of parsed nodes 12325: * 12326: * Parse an external general entity within an existing parsing context 12327: * An external general parsed entity is well-formed if it matches the 12328: * production labeled extParsedEnt. 12329: * 12330: * [78] extParsedEnt ::= TextDecl? content 12331: * 12332: * Returns 0 if the entity is well formed, -1 in case of args problem and 12333: * the parser error code otherwise 12334: */ 12335: 12336: int 12337: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12338: const xmlChar *ID, xmlNodePtr *lst) { 12339: xmlParserCtxtPtr ctxt; 12340: xmlDocPtr newDoc; 12341: xmlNodePtr newRoot; 12342: xmlSAXHandlerPtr oldsax = NULL; 12343: int ret = 0; 12344: xmlChar start[4]; 12345: xmlCharEncoding enc; 12346: 12347: if (ctx == NULL) return(-1); 12348: 12349: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12350: (ctx->depth > 1024)) { 12351: return(XML_ERR_ENTITY_LOOP); 12352: } 12353: 12354: if (lst != NULL) 12355: *lst = NULL; 12356: if ((URL == NULL) && (ID == NULL)) 12357: return(-1); 12358: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12359: return(-1); 12360: 12361: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12362: if (ctxt == NULL) { 12363: return(-1); 12364: } 12365: 12366: oldsax = ctxt->sax; 12367: ctxt->sax = ctx->sax; 12368: xmlDetectSAX2(ctxt); 12369: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12370: if (newDoc == NULL) { 12371: xmlFreeParserCtxt(ctxt); 12372: return(-1); 12373: } 12374: newDoc->properties = XML_DOC_INTERNAL; 12375: if (ctx->myDoc->dict) { 12376: newDoc->dict = ctx->myDoc->dict; 12377: xmlDictReference(newDoc->dict); 12378: } 12379: if (ctx->myDoc != NULL) { 12380: newDoc->intSubset = ctx->myDoc->intSubset; 12381: newDoc->extSubset = ctx->myDoc->extSubset; 12382: } 12383: if (ctx->myDoc->URL != NULL) { 12384: newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12385: } 12386: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12387: if (newRoot == NULL) { 12388: ctxt->sax = oldsax; 12389: xmlFreeParserCtxt(ctxt); 12390: newDoc->intSubset = NULL; 12391: newDoc->extSubset = NULL; 12392: xmlFreeDoc(newDoc); 12393: return(-1); 12394: } 12395: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12396: nodePush(ctxt, newDoc->children); 12397: if (ctx->myDoc == NULL) { 12398: ctxt->myDoc = newDoc; 12399: } else { 12400: ctxt->myDoc = ctx->myDoc; 12401: newDoc->children->doc = ctx->myDoc; 12402: } 12403: 12404: /* 12405: * Get the 4 first bytes and decode the charset 12406: * if enc != XML_CHAR_ENCODING_NONE 12407: * plug some encoding conversion routines. 12408: */ 12409: GROW 12410: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12411: start[0] = RAW; 12412: start[1] = NXT(1); 12413: start[2] = NXT(2); 12414: start[3] = NXT(3); 12415: enc = xmlDetectCharEncoding(start, 4); 12416: if (enc != XML_CHAR_ENCODING_NONE) { 12417: xmlSwitchEncoding(ctxt, enc); 12418: } 12419: } 12420: 12421: /* 12422: * Parse a possible text declaration first 12423: */ 12424: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12425: xmlParseTextDecl(ctxt); 12426: /* 12427: * An XML-1.0 document can't reference an entity not XML-1.0 12428: */ 12429: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12430: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12431: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12432: "Version mismatch between document and entity\n"); 12433: } 12434: } 12435: 12436: /* 12437: * If the user provided its own SAX callbacks then reuse the 12438: * useData callback field, otherwise the expected setup in a 12439: * DOM builder is to have userData == ctxt 12440: */ 12441: if (ctx->userData == ctx) 12442: ctxt->userData = ctxt; 12443: else 12444: ctxt->userData = ctx->userData; 12445: 12446: /* 12447: * Doing validity checking on chunk doesn't make sense 12448: */ 12449: ctxt->instate = XML_PARSER_CONTENT; 12450: ctxt->validate = ctx->validate; 12451: ctxt->valid = ctx->valid; 12452: ctxt->loadsubset = ctx->loadsubset; 12453: ctxt->depth = ctx->depth + 1; 12454: ctxt->replaceEntities = ctx->replaceEntities; 12455: if (ctxt->validate) { 12456: ctxt->vctxt.error = ctx->vctxt.error; 12457: ctxt->vctxt.warning = ctx->vctxt.warning; 12458: } else { 12459: ctxt->vctxt.error = NULL; 12460: ctxt->vctxt.warning = NULL; 12461: } 12462: ctxt->vctxt.nodeTab = NULL; 12463: ctxt->vctxt.nodeNr = 0; 12464: ctxt->vctxt.nodeMax = 0; 12465: ctxt->vctxt.node = NULL; 12466: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12467: ctxt->dict = ctx->dict; 12468: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12469: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12470: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12471: ctxt->dictNames = ctx->dictNames; 12472: ctxt->attsDefault = ctx->attsDefault; 12473: ctxt->attsSpecial = ctx->attsSpecial; 12474: ctxt->linenumbers = ctx->linenumbers; 12475: 12476: xmlParseContent(ctxt); 12477: 12478: ctx->validate = ctxt->validate; 12479: ctx->valid = ctxt->valid; 12480: if ((RAW == '<') && (NXT(1) == '/')) { 12481: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12482: } else if (RAW != 0) { 12483: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12484: } 12485: if (ctxt->node != newDoc->children) { 12486: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12487: } 12488: 12489: if (!ctxt->wellFormed) { 12490: if (ctxt->errNo == 0) 12491: ret = 1; 12492: else 12493: ret = ctxt->errNo; 12494: } else { 12495: if (lst != NULL) { 12496: xmlNodePtr cur; 12497: 12498: /* 12499: * Return the newly created nodeset after unlinking it from 12500: * they pseudo parent. 12501: */ 12502: cur = newDoc->children->children; 12503: *lst = cur; 12504: while (cur != NULL) { 12505: cur->parent = NULL; 12506: cur = cur->next; 12507: } 12508: newDoc->children->children = NULL; 12509: } 12510: ret = 0; 12511: } 12512: ctxt->sax = oldsax; 12513: ctxt->dict = NULL; 12514: ctxt->attsDefault = NULL; 12515: ctxt->attsSpecial = NULL; 12516: xmlFreeParserCtxt(ctxt); 12517: newDoc->intSubset = NULL; 12518: newDoc->extSubset = NULL; 12519: xmlFreeDoc(newDoc); 12520: 12521: return(ret); 12522: } 12523: 12524: /** 12525: * xmlParseExternalEntityPrivate: 12526: * @doc: the document the chunk pertains to 12527: * @oldctxt: the previous parser context if available 12528: * @sax: the SAX handler bloc (possibly NULL) 12529: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12530: * @depth: Used for loop detection, use 0 12531: * @URL: the URL for the entity to load 12532: * @ID: the System ID for the entity to load 12533: * @list: the return value for the set of parsed nodes 12534: * 12535: * Private version of xmlParseExternalEntity() 12536: * 12537: * Returns 0 if the entity is well formed, -1 in case of args problem and 12538: * the parser error code otherwise 12539: */ 12540: 12541: static xmlParserErrors 12542: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12543: xmlSAXHandlerPtr sax, 12544: void *user_data, int depth, const xmlChar *URL, 12545: const xmlChar *ID, xmlNodePtr *list) { 12546: xmlParserCtxtPtr ctxt; 12547: xmlDocPtr newDoc; 12548: xmlNodePtr newRoot; 12549: xmlSAXHandlerPtr oldsax = NULL; 12550: xmlParserErrors ret = XML_ERR_OK; 12551: xmlChar start[4]; 12552: xmlCharEncoding enc; 12553: 12554: if (((depth > 40) && 12555: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12556: (depth > 1024)) { 12557: return(XML_ERR_ENTITY_LOOP); 12558: } 12559: 12560: if (list != NULL) 12561: *list = NULL; 12562: if ((URL == NULL) && (ID == NULL)) 12563: return(XML_ERR_INTERNAL_ERROR); 12564: if (doc == NULL) 12565: return(XML_ERR_INTERNAL_ERROR); 12566: 12567: 12568: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12569: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12570: ctxt->userData = ctxt; 12571: if (oldctxt != NULL) { 12572: ctxt->_private = oldctxt->_private; 12573: ctxt->loadsubset = oldctxt->loadsubset; 12574: ctxt->validate = oldctxt->validate; 12575: ctxt->external = oldctxt->external; 12576: ctxt->record_info = oldctxt->record_info; 12577: ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12578: ctxt->node_seq.length = oldctxt->node_seq.length; 12579: ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12580: } else { 12581: /* 12582: * Doing validity checking on chunk without context 12583: * doesn't make sense 12584: */ 12585: ctxt->_private = NULL; 12586: ctxt->validate = 0; 12587: ctxt->external = 2; 12588: ctxt->loadsubset = 0; 12589: } 12590: if (sax != NULL) { 12591: oldsax = ctxt->sax; 12592: ctxt->sax = sax; 12593: if (user_data != NULL) 12594: ctxt->userData = user_data; 12595: } 12596: xmlDetectSAX2(ctxt); 12597: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12598: if (newDoc == NULL) { 12599: ctxt->node_seq.maximum = 0; 12600: ctxt->node_seq.length = 0; 12601: ctxt->node_seq.buffer = NULL; 12602: xmlFreeParserCtxt(ctxt); 12603: return(XML_ERR_INTERNAL_ERROR); 12604: } 12605: newDoc->properties = XML_DOC_INTERNAL; 12606: newDoc->intSubset = doc->intSubset; 12607: newDoc->extSubset = doc->extSubset; 12608: newDoc->dict = doc->dict; 12609: xmlDictReference(newDoc->dict); 12610: 12611: if (doc->URL != NULL) { 12612: newDoc->URL = xmlStrdup(doc->URL); 12613: } 12614: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12615: if (newRoot == NULL) { 12616: if (sax != NULL) 12617: ctxt->sax = oldsax; 12618: ctxt->node_seq.maximum = 0; 12619: ctxt->node_seq.length = 0; 12620: ctxt->node_seq.buffer = NULL; 12621: xmlFreeParserCtxt(ctxt); 12622: newDoc->intSubset = NULL; 12623: newDoc->extSubset = NULL; 12624: xmlFreeDoc(newDoc); 12625: return(XML_ERR_INTERNAL_ERROR); 12626: } 12627: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12628: nodePush(ctxt, newDoc->children); 12629: ctxt->myDoc = doc; 12630: newRoot->doc = doc; 12631: 12632: /* 12633: * Get the 4 first bytes and decode the charset 12634: * if enc != XML_CHAR_ENCODING_NONE 12635: * plug some encoding conversion routines. 12636: */ 12637: GROW; 12638: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12639: start[0] = RAW; 12640: start[1] = NXT(1); 12641: start[2] = NXT(2); 12642: start[3] = NXT(3); 12643: enc = xmlDetectCharEncoding(start, 4); 12644: if (enc != XML_CHAR_ENCODING_NONE) { 12645: xmlSwitchEncoding(ctxt, enc); 12646: } 12647: } 12648: 12649: /* 12650: * Parse a possible text declaration first 12651: */ 12652: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12653: xmlParseTextDecl(ctxt); 12654: } 12655: 12656: ctxt->instate = XML_PARSER_CONTENT; 12657: ctxt->depth = depth; 12658: 12659: xmlParseContent(ctxt); 12660: 12661: if ((RAW == '<') && (NXT(1) == '/')) { 12662: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12663: } else if (RAW != 0) { 12664: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12665: } 12666: if (ctxt->node != newDoc->children) { 12667: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12668: } 12669: 12670: if (!ctxt->wellFormed) { 12671: if (ctxt->errNo == 0) 12672: ret = XML_ERR_INTERNAL_ERROR; 12673: else 12674: ret = (xmlParserErrors)ctxt->errNo; 12675: } else { 12676: if (list != NULL) { 12677: xmlNodePtr cur; 12678: 12679: /* 12680: * Return the newly created nodeset after unlinking it from 12681: * they pseudo parent. 12682: */ 12683: cur = newDoc->children->children; 12684: *list = cur; 12685: while (cur != NULL) { 12686: cur->parent = NULL; 12687: cur = cur->next; 12688: } 12689: newDoc->children->children = NULL; 12690: } 12691: ret = XML_ERR_OK; 12692: } 12693: 12694: /* 12695: * Record in the parent context the number of entities replacement 12696: * done when parsing that reference. 12697: */ 12698: if (oldctxt != NULL) 12699: oldctxt->nbentities += ctxt->nbentities; 12700: 12701: /* 12702: * Also record the size of the entity parsed 12703: */ 12704: if (ctxt->input != NULL) { 12705: oldctxt->sizeentities += ctxt->input->consumed; 12706: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12707: } 12708: /* 12709: * And record the last error if any 12710: */ 12711: if (ctxt->lastError.code != XML_ERR_OK) 12712: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12713: 12714: if (sax != NULL) 12715: ctxt->sax = oldsax; 12716: oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12717: oldctxt->node_seq.length = ctxt->node_seq.length; 12718: oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12719: ctxt->node_seq.maximum = 0; 12720: ctxt->node_seq.length = 0; 12721: ctxt->node_seq.buffer = NULL; 12722: xmlFreeParserCtxt(ctxt); 12723: newDoc->intSubset = NULL; 12724: newDoc->extSubset = NULL; 12725: xmlFreeDoc(newDoc); 12726: 12727: return(ret); 12728: } 12729: 12730: #ifdef LIBXML_SAX1_ENABLED 12731: /** 12732: * xmlParseExternalEntity: 12733: * @doc: the document the chunk pertains to 12734: * @sax: the SAX handler bloc (possibly NULL) 12735: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12736: * @depth: Used for loop detection, use 0 12737: * @URL: the URL for the entity to load 12738: * @ID: the System ID for the entity to load 12739: * @lst: the return value for the set of parsed nodes 12740: * 12741: * Parse an external general entity 12742: * An external general parsed entity is well-formed if it matches the 12743: * production labeled extParsedEnt. 12744: * 12745: * [78] extParsedEnt ::= TextDecl? content 12746: * 12747: * Returns 0 if the entity is well formed, -1 in case of args problem and 12748: * the parser error code otherwise 12749: */ 12750: 12751: int 12752: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12753: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12754: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12755: ID, lst)); 12756: } 12757: 12758: /** 12759: * xmlParseBalancedChunkMemory: 12760: * @doc: the document the chunk pertains to 12761: * @sax: the SAX handler bloc (possibly NULL) 12762: * @user_data: The user data returned on SAX callbacks (possibly NULL) 12763: * @depth: Used for loop detection, use 0 12764: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12765: * @lst: the return value for the set of parsed nodes 12766: * 12767: * Parse a well-balanced chunk of an XML document 12768: * called by the parser 12769: * The allowed sequence for the Well Balanced Chunk is the one defined by 12770: * the content production in the XML grammar: 12771: * 12772: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12773: * 12774: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12775: * the parser error code otherwise 12776: */ 12777: 12778: int 12779: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12780: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12781: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12782: depth, string, lst, 0 ); 12783: } 12784: #endif /* LIBXML_SAX1_ENABLED */ 12785: 12786: /** 12787: * xmlParseBalancedChunkMemoryInternal: 12788: * @oldctxt: the existing parsing context 12789: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12790: * @user_data: the user data field for the parser context 12791: * @lst: the return value for the set of parsed nodes 12792: * 12793: * 12794: * Parse a well-balanced chunk of an XML document 12795: * called by the parser 12796: * The allowed sequence for the Well Balanced Chunk is the one defined by 12797: * the content production in the XML grammar: 12798: * 12799: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12800: * 12801: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12802: * error code otherwise 12803: * 12804: * In case recover is set to 1, the nodelist will not be empty even if 12805: * the parsed chunk is not well balanced. 12806: */ 12807: static xmlParserErrors 12808: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12809: const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12810: xmlParserCtxtPtr ctxt; 12811: xmlDocPtr newDoc = NULL; 12812: xmlNodePtr newRoot; 12813: xmlSAXHandlerPtr oldsax = NULL; 12814: xmlNodePtr content = NULL; 12815: xmlNodePtr last = NULL; 12816: int size; 12817: xmlParserErrors ret = XML_ERR_OK; 12818: #ifdef SAX2 12819: int i; 12820: #endif 12821: 12822: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12823: (oldctxt->depth > 1024)) { 12824: return(XML_ERR_ENTITY_LOOP); 12825: } 12826: 12827: 12828: if (lst != NULL) 12829: *lst = NULL; 12830: if (string == NULL) 12831: return(XML_ERR_INTERNAL_ERROR); 12832: 12833: size = xmlStrlen(string); 12834: 12835: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12836: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12837: if (user_data != NULL) 12838: ctxt->userData = user_data; 12839: else 12840: ctxt->userData = ctxt; 12841: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12842: ctxt->dict = oldctxt->dict; 12843: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12844: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12845: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12846: 12847: #ifdef SAX2 12848: /* propagate namespaces down the entity */ 12849: for (i = 0;i < oldctxt->nsNr;i += 2) { 12850: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 12851: } 12852: #endif 12853: 12854: oldsax = ctxt->sax; 12855: ctxt->sax = oldctxt->sax; 12856: xmlDetectSAX2(ctxt); 12857: ctxt->replaceEntities = oldctxt->replaceEntities; 12858: ctxt->options = oldctxt->options; 12859: 12860: ctxt->_private = oldctxt->_private; 12861: if (oldctxt->myDoc == NULL) { 12862: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12863: if (newDoc == NULL) { 12864: ctxt->sax = oldsax; 12865: ctxt->dict = NULL; 12866: xmlFreeParserCtxt(ctxt); 12867: return(XML_ERR_INTERNAL_ERROR); 12868: } 12869: newDoc->properties = XML_DOC_INTERNAL; 12870: newDoc->dict = ctxt->dict; 12871: xmlDictReference(newDoc->dict); 12872: ctxt->myDoc = newDoc; 12873: } else { 12874: ctxt->myDoc = oldctxt->myDoc; 12875: content = ctxt->myDoc->children; 12876: last = ctxt->myDoc->last; 12877: } 12878: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12879: if (newRoot == NULL) { 12880: ctxt->sax = oldsax; 12881: ctxt->dict = NULL; 12882: xmlFreeParserCtxt(ctxt); 12883: if (newDoc != NULL) { 12884: xmlFreeDoc(newDoc); 12885: } 12886: return(XML_ERR_INTERNAL_ERROR); 12887: } 12888: ctxt->myDoc->children = NULL; 12889: ctxt->myDoc->last = NULL; 12890: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12891: nodePush(ctxt, ctxt->myDoc->children); 12892: ctxt->instate = XML_PARSER_CONTENT; 12893: ctxt->depth = oldctxt->depth + 1; 12894: 12895: ctxt->validate = 0; 12896: ctxt->loadsubset = oldctxt->loadsubset; 12897: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12898: /* 12899: * ID/IDREF registration will be done in xmlValidateElement below 12900: */ 12901: ctxt->loadsubset |= XML_SKIP_IDS; 12902: } 12903: ctxt->dictNames = oldctxt->dictNames; 12904: ctxt->attsDefault = oldctxt->attsDefault; 12905: ctxt->attsSpecial = oldctxt->attsSpecial; 12906: 12907: xmlParseContent(ctxt); 12908: if ((RAW == '<') && (NXT(1) == '/')) { 12909: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12910: } else if (RAW != 0) { 12911: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12912: } 12913: if (ctxt->node != ctxt->myDoc->children) { 12914: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12915: } 12916: 12917: if (!ctxt->wellFormed) { 12918: if (ctxt->errNo == 0) 12919: ret = XML_ERR_INTERNAL_ERROR; 12920: else 12921: ret = (xmlParserErrors)ctxt->errNo; 12922: } else { 12923: ret = XML_ERR_OK; 12924: } 12925: 12926: if ((lst != NULL) && (ret == XML_ERR_OK)) { 12927: xmlNodePtr cur; 12928: 12929: /* 12930: * Return the newly created nodeset after unlinking it from 12931: * they pseudo parent. 12932: */ 12933: cur = ctxt->myDoc->children->children; 12934: *lst = cur; 12935: while (cur != NULL) { 12936: #ifdef LIBXML_VALID_ENABLED 12937: if ((oldctxt->validate) && (oldctxt->wellFormed) && 12938: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12939: (cur->type == XML_ELEMENT_NODE)) { 12940: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12941: oldctxt->myDoc, cur); 12942: } 12943: #endif /* LIBXML_VALID_ENABLED */ 12944: cur->parent = NULL; 12945: cur = cur->next; 12946: } 12947: ctxt->myDoc->children->children = NULL; 12948: } 12949: if (ctxt->myDoc != NULL) { 12950: xmlFreeNode(ctxt->myDoc->children); 12951: ctxt->myDoc->children = content; 12952: ctxt->myDoc->last = last; 12953: } 12954: 12955: /* 12956: * Record in the parent context the number of entities replacement 12957: * done when parsing that reference. 12958: */ 12959: if (oldctxt != NULL) 12960: oldctxt->nbentities += ctxt->nbentities; 12961: 12962: /* 12963: * Also record the last error if any 12964: */ 12965: if (ctxt->lastError.code != XML_ERR_OK) 12966: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12967: 12968: ctxt->sax = oldsax; 12969: ctxt->dict = NULL; 12970: ctxt->attsDefault = NULL; 12971: ctxt->attsSpecial = NULL; 12972: xmlFreeParserCtxt(ctxt); 12973: if (newDoc != NULL) { 12974: xmlFreeDoc(newDoc); 12975: } 12976: 12977: return(ret); 12978: } 12979: 12980: /** 12981: * xmlParseInNodeContext: 12982: * @node: the context node 12983: * @data: the input string 12984: * @datalen: the input string length in bytes 12985: * @options: a combination of xmlParserOption 12986: * @lst: the return value for the set of parsed nodes 12987: * 12988: * Parse a well-balanced chunk of an XML document 12989: * within the context (DTD, namespaces, etc ...) of the given node. 12990: * 12991: * The allowed sequence for the data is a Well Balanced Chunk defined by 12992: * the content production in the XML grammar: 12993: * 12994: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12995: * 12996: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12997: * error code otherwise 12998: */ 12999: xmlParserErrors 13000: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13001: int options, xmlNodePtr *lst) { 13002: #ifdef SAX2 13003: xmlParserCtxtPtr ctxt; 13004: xmlDocPtr doc = NULL; 13005: xmlNodePtr fake, cur; 13006: int nsnr = 0; 13007: 13008: xmlParserErrors ret = XML_ERR_OK; 13009: 13010: /* 13011: * check all input parameters, grab the document 13012: */ 13013: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13014: return(XML_ERR_INTERNAL_ERROR); 13015: switch (node->type) { 13016: case XML_ELEMENT_NODE: 13017: case XML_ATTRIBUTE_NODE: 13018: case XML_TEXT_NODE: 13019: case XML_CDATA_SECTION_NODE: 13020: case XML_ENTITY_REF_NODE: 13021: case XML_PI_NODE: 13022: case XML_COMMENT_NODE: 13023: case XML_DOCUMENT_NODE: 13024: case XML_HTML_DOCUMENT_NODE: 13025: break; 13026: default: 13027: return(XML_ERR_INTERNAL_ERROR); 13028: 13029: } 13030: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13031: (node->type != XML_DOCUMENT_NODE) && 13032: (node->type != XML_HTML_DOCUMENT_NODE)) 13033: node = node->parent; 13034: if (node == NULL) 13035: return(XML_ERR_INTERNAL_ERROR); 13036: if (node->type == XML_ELEMENT_NODE) 13037: doc = node->doc; 13038: else 13039: doc = (xmlDocPtr) node; 13040: if (doc == NULL) 13041: return(XML_ERR_INTERNAL_ERROR); 13042: 13043: /* 13044: * allocate a context and set-up everything not related to the 13045: * node position in the tree 13046: */ 13047: if (doc->type == XML_DOCUMENT_NODE) 13048: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13049: #ifdef LIBXML_HTML_ENABLED 13050: else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13051: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13052: /* 13053: * When parsing in context, it makes no sense to add implied 13054: * elements like html/body/etc... 13055: */ 13056: options |= HTML_PARSE_NOIMPLIED; 13057: } 13058: #endif 13059: else 13060: return(XML_ERR_INTERNAL_ERROR); 13061: 13062: if (ctxt == NULL) 13063: return(XML_ERR_NO_MEMORY); 13064: 13065: /* 13066: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13067: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13068: * we must wait until the last moment to free the original one. 13069: */ 13070: if (doc->dict != NULL) { 13071: if (ctxt->dict != NULL) 13072: xmlDictFree(ctxt->dict); 13073: ctxt->dict = doc->dict; 13074: } else 13075: options |= XML_PARSE_NODICT; 13076: 13077: if (doc->encoding != NULL) { 13078: xmlCharEncodingHandlerPtr hdlr; 13079: 13080: if (ctxt->encoding != NULL) 13081: xmlFree((xmlChar *) ctxt->encoding); 13082: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13083: 13084: hdlr = xmlFindCharEncodingHandler(doc->encoding); 13085: if (hdlr != NULL) { 13086: xmlSwitchToEncoding(ctxt, hdlr); 13087: } else { 13088: return(XML_ERR_UNSUPPORTED_ENCODING); 13089: } 13090: } 13091: 13092: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13093: xmlDetectSAX2(ctxt); 13094: ctxt->myDoc = doc; 13095: 13096: fake = xmlNewComment(NULL); 13097: if (fake == NULL) { 13098: xmlFreeParserCtxt(ctxt); 13099: return(XML_ERR_NO_MEMORY); 13100: } 13101: xmlAddChild(node, fake); 13102: 13103: if (node->type == XML_ELEMENT_NODE) { 13104: nodePush(ctxt, node); 13105: /* 13106: * initialize the SAX2 namespaces stack 13107: */ 13108: cur = node; 13109: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13110: xmlNsPtr ns = cur->nsDef; 13111: const xmlChar *iprefix, *ihref; 13112: 13113: while (ns != NULL) { 13114: if (ctxt->dict) { 13115: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13116: ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13117: } else { 13118: iprefix = ns->prefix; 13119: ihref = ns->href; 13120: } 13121: 13122: if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13123: nsPush(ctxt, iprefix, ihref); 13124: nsnr++; 13125: } 13126: ns = ns->next; 13127: } 13128: cur = cur->parent; 13129: } 13130: ctxt->instate = XML_PARSER_CONTENT; 13131: } 13132: 13133: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13134: /* 13135: * ID/IDREF registration will be done in xmlValidateElement below 13136: */ 13137: ctxt->loadsubset |= XML_SKIP_IDS; 13138: } 13139: 13140: #ifdef LIBXML_HTML_ENABLED 13141: if (doc->type == XML_HTML_DOCUMENT_NODE) 13142: __htmlParseContent(ctxt); 13143: else 13144: #endif 13145: xmlParseContent(ctxt); 13146: 13147: nsPop(ctxt, nsnr); 13148: if ((RAW == '<') && (NXT(1) == '/')) { 13149: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13150: } else if (RAW != 0) { 13151: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13152: } 13153: if ((ctxt->node != NULL) && (ctxt->node != node)) { 13154: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13155: ctxt->wellFormed = 0; 13156: } 13157: 13158: if (!ctxt->wellFormed) { 13159: if (ctxt->errNo == 0) 13160: ret = XML_ERR_INTERNAL_ERROR; 13161: else 13162: ret = (xmlParserErrors)ctxt->errNo; 13163: } else { 13164: ret = XML_ERR_OK; 13165: } 13166: 13167: /* 13168: * Return the newly created nodeset after unlinking it from 13169: * the pseudo sibling. 13170: */ 13171: 13172: cur = fake->next; 13173: fake->next = NULL; 13174: node->last = fake; 13175: 13176: if (cur != NULL) { 13177: cur->prev = NULL; 13178: } 13179: 13180: *lst = cur; 13181: 13182: while (cur != NULL) { 13183: cur->parent = NULL; 13184: cur = cur->next; 13185: } 13186: 13187: xmlUnlinkNode(fake); 13188: xmlFreeNode(fake); 13189: 13190: 13191: if (ret != XML_ERR_OK) { 13192: xmlFreeNodeList(*lst); 13193: *lst = NULL; 13194: } 13195: 13196: if (doc->dict != NULL) 13197: ctxt->dict = NULL; 13198: xmlFreeParserCtxt(ctxt); 13199: 13200: return(ret); 13201: #else /* !SAX2 */ 13202: return(XML_ERR_INTERNAL_ERROR); 13203: #endif 13204: } 13205: 13206: #ifdef LIBXML_SAX1_ENABLED 13207: /** 13208: * xmlParseBalancedChunkMemoryRecover: 13209: * @doc: the document the chunk pertains to 13210: * @sax: the SAX handler bloc (possibly NULL) 13211: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13212: * @depth: Used for loop detection, use 0 13213: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13214: * @lst: the return value for the set of parsed nodes 13215: * @recover: return nodes even if the data is broken (use 0) 13216: * 13217: * 13218: * Parse a well-balanced chunk of an XML document 13219: * called by the parser 13220: * The allowed sequence for the Well Balanced Chunk is the one defined by 13221: * the content production in the XML grammar: 13222: * 13223: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13224: * 13225: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13226: * the parser error code otherwise 13227: * 13228: * In case recover is set to 1, the nodelist will not be empty even if 13229: * the parsed chunk is not well balanced, assuming the parsing succeeded to 13230: * some extent. 13231: */ 13232: int 13233: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13234: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13235: int recover) { 13236: xmlParserCtxtPtr ctxt; 13237: xmlDocPtr newDoc; 13238: xmlSAXHandlerPtr oldsax = NULL; 13239: xmlNodePtr content, newRoot; 13240: int size; 13241: int ret = 0; 13242: 13243: if (depth > 40) { 13244: return(XML_ERR_ENTITY_LOOP); 13245: } 13246: 13247: 13248: if (lst != NULL) 13249: *lst = NULL; 13250: if (string == NULL) 13251: return(-1); 13252: 13253: size = xmlStrlen(string); 13254: 13255: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13256: if (ctxt == NULL) return(-1); 13257: ctxt->userData = ctxt; 13258: if (sax != NULL) { 13259: oldsax = ctxt->sax; 13260: ctxt->sax = sax; 13261: if (user_data != NULL) 13262: ctxt->userData = user_data; 13263: } 13264: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13265: if (newDoc == NULL) { 13266: xmlFreeParserCtxt(ctxt); 13267: return(-1); 13268: } 13269: newDoc->properties = XML_DOC_INTERNAL; 13270: if ((doc != NULL) && (doc->dict != NULL)) { 13271: xmlDictFree(ctxt->dict); 13272: ctxt->dict = doc->dict; 13273: xmlDictReference(ctxt->dict); 13274: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13275: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13276: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13277: ctxt->dictNames = 1; 13278: } else { 13279: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13280: } 13281: if (doc != NULL) { 13282: newDoc->intSubset = doc->intSubset; 13283: newDoc->extSubset = doc->extSubset; 13284: } 13285: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13286: if (newRoot == NULL) { 13287: if (sax != NULL) 13288: ctxt->sax = oldsax; 13289: xmlFreeParserCtxt(ctxt); 13290: newDoc->intSubset = NULL; 13291: newDoc->extSubset = NULL; 13292: xmlFreeDoc(newDoc); 13293: return(-1); 13294: } 13295: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13296: nodePush(ctxt, newRoot); 13297: if (doc == NULL) { 13298: ctxt->myDoc = newDoc; 13299: } else { 13300: ctxt->myDoc = newDoc; 13301: newDoc->children->doc = doc; 13302: /* Ensure that doc has XML spec namespace */ 13303: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13304: newDoc->oldNs = doc->oldNs; 13305: } 13306: ctxt->instate = XML_PARSER_CONTENT; 13307: ctxt->depth = depth; 13308: 13309: /* 13310: * Doing validity checking on chunk doesn't make sense 13311: */ 13312: ctxt->validate = 0; 13313: ctxt->loadsubset = 0; 13314: xmlDetectSAX2(ctxt); 13315: 13316: if ( doc != NULL ){ 13317: content = doc->children; 13318: doc->children = NULL; 13319: xmlParseContent(ctxt); 13320: doc->children = content; 13321: } 13322: else { 13323: xmlParseContent(ctxt); 13324: } 13325: if ((RAW == '<') && (NXT(1) == '/')) { 13326: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13327: } else if (RAW != 0) { 13328: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13329: } 13330: if (ctxt->node != newDoc->children) { 13331: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13332: } 13333: 13334: if (!ctxt->wellFormed) { 13335: if (ctxt->errNo == 0) 13336: ret = 1; 13337: else 13338: ret = ctxt->errNo; 13339: } else { 13340: ret = 0; 13341: } 13342: 13343: if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13344: xmlNodePtr cur; 13345: 13346: /* 13347: * Return the newly created nodeset after unlinking it from 13348: * they pseudo parent. 13349: */ 13350: cur = newDoc->children->children; 13351: *lst = cur; 13352: while (cur != NULL) { 13353: xmlSetTreeDoc(cur, doc); 13354: cur->parent = NULL; 13355: cur = cur->next; 13356: } 13357: newDoc->children->children = NULL; 13358: } 13359: 13360: if (sax != NULL) 13361: ctxt->sax = oldsax; 13362: xmlFreeParserCtxt(ctxt); 13363: newDoc->intSubset = NULL; 13364: newDoc->extSubset = NULL; 13365: newDoc->oldNs = NULL; 13366: xmlFreeDoc(newDoc); 13367: 13368: return(ret); 13369: } 13370: 13371: /** 13372: * xmlSAXParseEntity: 13373: * @sax: the SAX handler block 13374: * @filename: the filename 13375: * 13376: * parse an XML external entity out of context and build a tree. 13377: * It use the given SAX function block to handle the parsing callback. 13378: * If sax is NULL, fallback to the default DOM tree building routines. 13379: * 13380: * [78] extParsedEnt ::= TextDecl? content 13381: * 13382: * This correspond to a "Well Balanced" chunk 13383: * 13384: * Returns the resulting document tree 13385: */ 13386: 13387: xmlDocPtr 13388: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13389: xmlDocPtr ret; 13390: xmlParserCtxtPtr ctxt; 13391: 13392: ctxt = xmlCreateFileParserCtxt(filename); 13393: if (ctxt == NULL) { 13394: return(NULL); 13395: } 13396: if (sax != NULL) { 13397: if (ctxt->sax != NULL) 13398: xmlFree(ctxt->sax); 13399: ctxt->sax = sax; 13400: ctxt->userData = NULL; 13401: } 13402: 13403: xmlParseExtParsedEnt(ctxt); 13404: 13405: if (ctxt->wellFormed) 13406: ret = ctxt->myDoc; 13407: else { 13408: ret = NULL; 13409: xmlFreeDoc(ctxt->myDoc); 13410: ctxt->myDoc = NULL; 13411: } 13412: if (sax != NULL) 13413: ctxt->sax = NULL; 13414: xmlFreeParserCtxt(ctxt); 13415: 13416: return(ret); 13417: } 13418: 13419: /** 13420: * xmlParseEntity: 13421: * @filename: the filename 13422: * 13423: * parse an XML external entity out of context and build a tree. 13424: * 13425: * [78] extParsedEnt ::= TextDecl? content 13426: * 13427: * This correspond to a "Well Balanced" chunk 13428: * 13429: * Returns the resulting document tree 13430: */ 13431: 13432: xmlDocPtr 13433: xmlParseEntity(const char *filename) { 13434: return(xmlSAXParseEntity(NULL, filename)); 13435: } 13436: #endif /* LIBXML_SAX1_ENABLED */ 13437: 13438: /** 13439: * xmlCreateEntityParserCtxtInternal: 13440: * @URL: the entity URL 13441: * @ID: the entity PUBLIC ID 13442: * @base: a possible base for the target URI 13443: * @pctx: parser context used to set options on new context 13444: * 13445: * Create a parser context for an external entity 13446: * Automatic support for ZLIB/Compress compressed document is provided 13447: * by default if found at compile-time. 13448: * 13449: * Returns the new parser context or NULL 13450: */ 13451: static xmlParserCtxtPtr 13452: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13453: const xmlChar *base, xmlParserCtxtPtr pctx) { 13454: xmlParserCtxtPtr ctxt; 13455: xmlParserInputPtr inputStream; 13456: char *directory = NULL; 13457: xmlChar *uri; 13458: 13459: ctxt = xmlNewParserCtxt(); 13460: if (ctxt == NULL) { 13461: return(NULL); 13462: } 13463: 13464: if (pctx != NULL) { 13465: ctxt->options = pctx->options; 13466: ctxt->_private = pctx->_private; 13467: } 13468: 13469: uri = xmlBuildURI(URL, base); 13470: 13471: if (uri == NULL) { 13472: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13473: if (inputStream == NULL) { 13474: xmlFreeParserCtxt(ctxt); 13475: return(NULL); 13476: } 13477: 13478: inputPush(ctxt, inputStream); 13479: 13480: if ((ctxt->directory == NULL) && (directory == NULL)) 13481: directory = xmlParserGetDirectory((char *)URL); 13482: if ((ctxt->directory == NULL) && (directory != NULL)) 13483: ctxt->directory = directory; 13484: } else { 13485: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13486: if (inputStream == NULL) { 13487: xmlFree(uri); 13488: xmlFreeParserCtxt(ctxt); 13489: return(NULL); 13490: } 13491: 13492: inputPush(ctxt, inputStream); 13493: 13494: if ((ctxt->directory == NULL) && (directory == NULL)) 13495: directory = xmlParserGetDirectory((char *)uri); 13496: if ((ctxt->directory == NULL) && (directory != NULL)) 13497: ctxt->directory = directory; 13498: xmlFree(uri); 13499: } 13500: return(ctxt); 13501: } 13502: 13503: /** 13504: * xmlCreateEntityParserCtxt: 13505: * @URL: the entity URL 13506: * @ID: the entity PUBLIC ID 13507: * @base: a possible base for the target URI 13508: * 13509: * Create a parser context for an external entity 13510: * Automatic support for ZLIB/Compress compressed document is provided 13511: * by default if found at compile-time. 13512: * 13513: * Returns the new parser context or NULL 13514: */ 13515: xmlParserCtxtPtr 13516: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13517: const xmlChar *base) { 13518: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13519: 13520: } 13521: 13522: /************************************************************************ 13523: * * 13524: * Front ends when parsing from a file * 13525: * * 13526: ************************************************************************/ 13527: 13528: /** 13529: * xmlCreateURLParserCtxt: 13530: * @filename: the filename or URL 13531: * @options: a combination of xmlParserOption 13532: * 13533: * Create a parser context for a file or URL content. 13534: * Automatic support for ZLIB/Compress compressed document is provided 13535: * by default if found at compile-time and for file accesses 13536: * 13537: * Returns the new parser context or NULL 13538: */ 13539: xmlParserCtxtPtr 13540: xmlCreateURLParserCtxt(const char *filename, int options) 13541: { 13542: xmlParserCtxtPtr ctxt; 13543: xmlParserInputPtr inputStream; 13544: char *directory = NULL; 13545: 13546: ctxt = xmlNewParserCtxt(); 13547: if (ctxt == NULL) { 13548: xmlErrMemory(NULL, "cannot allocate parser context"); 13549: return(NULL); 13550: } 13551: 13552: if (options) 13553: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13554: ctxt->linenumbers = 1; 13555: 13556: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13557: if (inputStream == NULL) { 13558: xmlFreeParserCtxt(ctxt); 13559: return(NULL); 13560: } 13561: 13562: inputPush(ctxt, inputStream); 13563: if ((ctxt->directory == NULL) && (directory == NULL)) 13564: directory = xmlParserGetDirectory(filename); 13565: if ((ctxt->directory == NULL) && (directory != NULL)) 13566: ctxt->directory = directory; 13567: 13568: return(ctxt); 13569: } 13570: 13571: /** 13572: * xmlCreateFileParserCtxt: 13573: * @filename: the filename 13574: * 13575: * Create a parser context for a file content. 13576: * Automatic support for ZLIB/Compress compressed document is provided 13577: * by default if found at compile-time. 13578: * 13579: * Returns the new parser context or NULL 13580: */ 13581: xmlParserCtxtPtr 13582: xmlCreateFileParserCtxt(const char *filename) 13583: { 13584: return(xmlCreateURLParserCtxt(filename, 0)); 13585: } 13586: 13587: #ifdef LIBXML_SAX1_ENABLED 13588: /** 13589: * xmlSAXParseFileWithData: 13590: * @sax: the SAX handler block 13591: * @filename: the filename 13592: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13593: * documents 13594: * @data: the userdata 13595: * 13596: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13597: * compressed document is provided by default if found at compile-time. 13598: * It use the given SAX function block to handle the parsing callback. 13599: * If sax is NULL, fallback to the default DOM tree building routines. 13600: * 13601: * User data (void *) is stored within the parser context in the 13602: * context's _private member, so it is available nearly everywhere in libxml 13603: * 13604: * Returns the resulting document tree 13605: */ 13606: 13607: xmlDocPtr 13608: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13609: int recovery, void *data) { 13610: xmlDocPtr ret; 13611: xmlParserCtxtPtr ctxt; 13612: 13613: xmlInitParser(); 13614: 13615: ctxt = xmlCreateFileParserCtxt(filename); 13616: if (ctxt == NULL) { 13617: return(NULL); 13618: } 13619: if (sax != NULL) { 13620: if (ctxt->sax != NULL) 13621: xmlFree(ctxt->sax); 13622: ctxt->sax = sax; 13623: } 13624: xmlDetectSAX2(ctxt); 13625: if (data!=NULL) { 13626: ctxt->_private = data; 13627: } 13628: 13629: if (ctxt->directory == NULL) 13630: ctxt->directory = xmlParserGetDirectory(filename); 13631: 13632: ctxt->recovery = recovery; 13633: 13634: xmlParseDocument(ctxt); 13635: 13636: if ((ctxt->wellFormed) || recovery) { 13637: ret = ctxt->myDoc; 13638: if (ret != NULL) { 13639: if (ctxt->input->buf->compressed > 0) 13640: ret->compression = 9; 13641: else 13642: ret->compression = ctxt->input->buf->compressed; 13643: } 13644: } 13645: else { 13646: ret = NULL; 13647: xmlFreeDoc(ctxt->myDoc); 13648: ctxt->myDoc = NULL; 13649: } 13650: if (sax != NULL) 13651: ctxt->sax = NULL; 13652: xmlFreeParserCtxt(ctxt); 13653: 13654: return(ret); 13655: } 13656: 13657: /** 13658: * xmlSAXParseFile: 13659: * @sax: the SAX handler block 13660: * @filename: the filename 13661: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13662: * documents 13663: * 13664: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13665: * compressed document is provided by default if found at compile-time. 13666: * It use the given SAX function block to handle the parsing callback. 13667: * If sax is NULL, fallback to the default DOM tree building routines. 13668: * 13669: * Returns the resulting document tree 13670: */ 13671: 13672: xmlDocPtr 13673: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13674: int recovery) { 13675: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13676: } 13677: 13678: /** 13679: * xmlRecoverDoc: 13680: * @cur: a pointer to an array of xmlChar 13681: * 13682: * parse an XML in-memory document and build a tree. 13683: * In the case the document is not Well Formed, a attempt to build a 13684: * tree is tried anyway 13685: * 13686: * Returns the resulting document tree or NULL in case of failure 13687: */ 13688: 13689: xmlDocPtr 13690: xmlRecoverDoc(const xmlChar *cur) { 13691: return(xmlSAXParseDoc(NULL, cur, 1)); 13692: } 13693: 13694: /** 13695: * xmlParseFile: 13696: * @filename: the filename 13697: * 13698: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13699: * compressed document is provided by default if found at compile-time. 13700: * 13701: * Returns the resulting document tree if the file was wellformed, 13702: * NULL otherwise. 13703: */ 13704: 13705: xmlDocPtr 13706: xmlParseFile(const char *filename) { 13707: return(xmlSAXParseFile(NULL, filename, 0)); 13708: } 13709: 13710: /** 13711: * xmlRecoverFile: 13712: * @filename: the filename 13713: * 13714: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13715: * compressed document is provided by default if found at compile-time. 13716: * In the case the document is not Well Formed, it attempts to build 13717: * a tree anyway 13718: * 13719: * Returns the resulting document tree or NULL in case of failure 13720: */ 13721: 13722: xmlDocPtr 13723: xmlRecoverFile(const char *filename) { 13724: return(xmlSAXParseFile(NULL, filename, 1)); 13725: } 13726: 13727: 13728: /** 13729: * xmlSetupParserForBuffer: 13730: * @ctxt: an XML parser context 13731: * @buffer: a xmlChar * buffer 13732: * @filename: a file name 13733: * 13734: * Setup the parser context to parse a new buffer; Clears any prior 13735: * contents from the parser context. The buffer parameter must not be 13736: * NULL, but the filename parameter can be 13737: */ 13738: void 13739: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13740: const char* filename) 13741: { 13742: xmlParserInputPtr input; 13743: 13744: if ((ctxt == NULL) || (buffer == NULL)) 13745: return; 13746: 13747: input = xmlNewInputStream(ctxt); 13748: if (input == NULL) { 13749: xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13750: xmlClearParserCtxt(ctxt); 13751: return; 13752: } 13753: 13754: xmlClearParserCtxt(ctxt); 13755: if (filename != NULL) 13756: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13757: input->base = buffer; 13758: input->cur = buffer; 13759: input->end = &buffer[xmlStrlen(buffer)]; 13760: inputPush(ctxt, input); 13761: } 13762: 13763: /** 13764: * xmlSAXUserParseFile: 13765: * @sax: a SAX handler 13766: * @user_data: The user data returned on SAX callbacks 13767: * @filename: a file name 13768: * 13769: * parse an XML file and call the given SAX handler routines. 13770: * Automatic support for ZLIB/Compress compressed document is provided 13771: * 13772: * Returns 0 in case of success or a error number otherwise 13773: */ 13774: int 13775: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13776: const char *filename) { 13777: int ret = 0; 13778: xmlParserCtxtPtr ctxt; 13779: 13780: ctxt = xmlCreateFileParserCtxt(filename); 13781: if (ctxt == NULL) return -1; 13782: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13783: xmlFree(ctxt->sax); 13784: ctxt->sax = sax; 13785: xmlDetectSAX2(ctxt); 13786: 13787: if (user_data != NULL) 13788: ctxt->userData = user_data; 13789: 13790: xmlParseDocument(ctxt); 13791: 13792: if (ctxt->wellFormed) 13793: ret = 0; 13794: else { 13795: if (ctxt->errNo != 0) 13796: ret = ctxt->errNo; 13797: else 13798: ret = -1; 13799: } 13800: if (sax != NULL) 13801: ctxt->sax = NULL; 13802: if (ctxt->myDoc != NULL) { 13803: xmlFreeDoc(ctxt->myDoc); 13804: ctxt->myDoc = NULL; 13805: } 13806: xmlFreeParserCtxt(ctxt); 13807: 13808: return ret; 13809: } 13810: #endif /* LIBXML_SAX1_ENABLED */ 13811: 13812: /************************************************************************ 13813: * * 13814: * Front ends when parsing from memory * 13815: * * 13816: ************************************************************************/ 13817: 13818: /** 13819: * xmlCreateMemoryParserCtxt: 13820: * @buffer: a pointer to a char array 13821: * @size: the size of the array 13822: * 13823: * Create a parser context for an XML in-memory document. 13824: * 13825: * Returns the new parser context or NULL 13826: */ 13827: xmlParserCtxtPtr 13828: xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13829: xmlParserCtxtPtr ctxt; 13830: xmlParserInputPtr input; 13831: xmlParserInputBufferPtr buf; 13832: 13833: if (buffer == NULL) 13834: return(NULL); 13835: if (size <= 0) 13836: return(NULL); 13837: 13838: ctxt = xmlNewParserCtxt(); 13839: if (ctxt == NULL) 13840: return(NULL); 13841: 13842: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13843: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13844: if (buf == NULL) { 13845: xmlFreeParserCtxt(ctxt); 13846: return(NULL); 13847: } 13848: 13849: input = xmlNewInputStream(ctxt); 13850: if (input == NULL) { 13851: xmlFreeParserInputBuffer(buf); 13852: xmlFreeParserCtxt(ctxt); 13853: return(NULL); 13854: } 13855: 13856: input->filename = NULL; 13857: input->buf = buf; 13858: input->base = input->buf->buffer->content; 13859: input->cur = input->buf->buffer->content; 13860: input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13861: 13862: inputPush(ctxt, input); 13863: return(ctxt); 13864: } 13865: 13866: #ifdef LIBXML_SAX1_ENABLED 13867: /** 13868: * xmlSAXParseMemoryWithData: 13869: * @sax: the SAX handler block 13870: * @buffer: an pointer to a char array 13871: * @size: the size of the array 13872: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13873: * documents 13874: * @data: the userdata 13875: * 13876: * parse an XML in-memory block and use the given SAX function block 13877: * to handle the parsing callback. If sax is NULL, fallback to the default 13878: * DOM tree building routines. 13879: * 13880: * User data (void *) is stored within the parser context in the 13881: * context's _private member, so it is available nearly everywhere in libxml 13882: * 13883: * Returns the resulting document tree 13884: */ 13885: 13886: xmlDocPtr 13887: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13888: int size, int recovery, void *data) { 13889: xmlDocPtr ret; 13890: xmlParserCtxtPtr ctxt; 13891: 13892: xmlInitParser(); 13893: 13894: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13895: if (ctxt == NULL) return(NULL); 13896: if (sax != NULL) { 13897: if (ctxt->sax != NULL) 13898: xmlFree(ctxt->sax); 13899: ctxt->sax = sax; 13900: } 13901: xmlDetectSAX2(ctxt); 13902: if (data!=NULL) { 13903: ctxt->_private=data; 13904: } 13905: 13906: ctxt->recovery = recovery; 13907: 13908: xmlParseDocument(ctxt); 13909: 13910: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13911: else { 13912: ret = NULL; 13913: xmlFreeDoc(ctxt->myDoc); 13914: ctxt->myDoc = NULL; 13915: } 13916: if (sax != NULL) 13917: ctxt->sax = NULL; 13918: xmlFreeParserCtxt(ctxt); 13919: 13920: return(ret); 13921: } 13922: 13923: /** 13924: * xmlSAXParseMemory: 13925: * @sax: the SAX handler block 13926: * @buffer: an pointer to a char array 13927: * @size: the size of the array 13928: * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13929: * documents 13930: * 13931: * parse an XML in-memory block and use the given SAX function block 13932: * to handle the parsing callback. If sax is NULL, fallback to the default 13933: * DOM tree building routines. 13934: * 13935: * Returns the resulting document tree 13936: */ 13937: xmlDocPtr 13938: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13939: int size, int recovery) { 13940: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13941: } 13942: 13943: /** 13944: * xmlParseMemory: 13945: * @buffer: an pointer to a char array 13946: * @size: the size of the array 13947: * 13948: * parse an XML in-memory block and build a tree. 13949: * 13950: * Returns the resulting document tree 13951: */ 13952: 13953: xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13954: return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13955: } 13956: 13957: /** 13958: * xmlRecoverMemory: 13959: * @buffer: an pointer to a char array 13960: * @size: the size of the array 13961: * 13962: * parse an XML in-memory block and build a tree. 13963: * In the case the document is not Well Formed, an attempt to 13964: * build a tree is tried anyway 13965: * 13966: * Returns the resulting document tree or NULL in case of error 13967: */ 13968: 13969: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13970: return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13971: } 13972: 13973: /** 13974: * xmlSAXUserParseMemory: 13975: * @sax: a SAX handler 13976: * @user_data: The user data returned on SAX callbacks 13977: * @buffer: an in-memory XML document input 13978: * @size: the length of the XML document in bytes 13979: * 13980: * A better SAX parsing routine. 13981: * parse an XML in-memory buffer and call the given SAX handler routines. 13982: * 13983: * Returns 0 in case of success or a error number otherwise 13984: */ 13985: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13986: const char *buffer, int size) { 13987: int ret = 0; 13988: xmlParserCtxtPtr ctxt; 13989: 13990: xmlInitParser(); 13991: 13992: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13993: if (ctxt == NULL) return -1; 13994: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13995: xmlFree(ctxt->sax); 13996: ctxt->sax = sax; 13997: xmlDetectSAX2(ctxt); 13998: 13999: if (user_data != NULL) 14000: ctxt->userData = user_data; 14001: 14002: xmlParseDocument(ctxt); 14003: 14004: if (ctxt->wellFormed) 14005: ret = 0; 14006: else { 14007: if (ctxt->errNo != 0) 14008: ret = ctxt->errNo; 14009: else 14010: ret = -1; 14011: } 14012: if (sax != NULL) 14013: ctxt->sax = NULL; 14014: if (ctxt->myDoc != NULL) { 14015: xmlFreeDoc(ctxt->myDoc); 14016: ctxt->myDoc = NULL; 14017: } 14018: xmlFreeParserCtxt(ctxt); 14019: 14020: return ret; 14021: } 14022: #endif /* LIBXML_SAX1_ENABLED */ 14023: 14024: /** 14025: * xmlCreateDocParserCtxt: 14026: * @cur: a pointer to an array of xmlChar 14027: * 14028: * Creates a parser context for an XML in-memory document. 14029: * 14030: * Returns the new parser context or NULL 14031: */ 14032: xmlParserCtxtPtr 14033: xmlCreateDocParserCtxt(const xmlChar *cur) { 14034: int len; 14035: 14036: if (cur == NULL) 14037: return(NULL); 14038: len = xmlStrlen(cur); 14039: return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14040: } 14041: 14042: #ifdef LIBXML_SAX1_ENABLED 14043: /** 14044: * xmlSAXParseDoc: 14045: * @sax: the SAX handler block 14046: * @cur: a pointer to an array of xmlChar 14047: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14048: * documents 14049: * 14050: * parse an XML in-memory document and build a tree. 14051: * It use the given SAX function block to handle the parsing callback. 14052: * If sax is NULL, fallback to the default DOM tree building routines. 14053: * 14054: * Returns the resulting document tree 14055: */ 14056: 14057: xmlDocPtr 14058: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14059: xmlDocPtr ret; 14060: xmlParserCtxtPtr ctxt; 14061: xmlSAXHandlerPtr oldsax = NULL; 14062: 14063: if (cur == NULL) return(NULL); 14064: 14065: 14066: ctxt = xmlCreateDocParserCtxt(cur); 14067: if (ctxt == NULL) return(NULL); 14068: if (sax != NULL) { 14069: oldsax = ctxt->sax; 14070: ctxt->sax = sax; 14071: ctxt->userData = NULL; 14072: } 14073: xmlDetectSAX2(ctxt); 14074: 14075: xmlParseDocument(ctxt); 14076: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14077: else { 14078: ret = NULL; 14079: xmlFreeDoc(ctxt->myDoc); 14080: ctxt->myDoc = NULL; 14081: } 14082: if (sax != NULL) 14083: ctxt->sax = oldsax; 14084: xmlFreeParserCtxt(ctxt); 14085: 14086: return(ret); 14087: } 14088: 14089: /** 14090: * xmlParseDoc: 14091: * @cur: a pointer to an array of xmlChar 14092: * 14093: * parse an XML in-memory document and build a tree. 14094: * 14095: * Returns the resulting document tree 14096: */ 14097: 14098: xmlDocPtr 14099: xmlParseDoc(const xmlChar *cur) { 14100: return(xmlSAXParseDoc(NULL, cur, 0)); 14101: } 14102: #endif /* LIBXML_SAX1_ENABLED */ 14103: 14104: #ifdef LIBXML_LEGACY_ENABLED 14105: /************************************************************************ 14106: * * 14107: * Specific function to keep track of entities references * 14108: * and used by the XSLT debugger * 14109: * * 14110: ************************************************************************/ 14111: 14112: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14113: 14114: /** 14115: * xmlAddEntityReference: 14116: * @ent : A valid entity 14117: * @firstNode : A valid first node for children of entity 14118: * @lastNode : A valid last node of children entity 14119: * 14120: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14121: */ 14122: static void 14123: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14124: xmlNodePtr lastNode) 14125: { 14126: if (xmlEntityRefFunc != NULL) { 14127: (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14128: } 14129: } 14130: 14131: 14132: /** 14133: * xmlSetEntityReferenceFunc: 14134: * @func: A valid function 14135: * 14136: * Set the function to call call back when a xml reference has been made 14137: */ 14138: void 14139: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14140: { 14141: xmlEntityRefFunc = func; 14142: } 14143: #endif /* LIBXML_LEGACY_ENABLED */ 14144: 14145: /************************************************************************ 14146: * * 14147: * Miscellaneous * 14148: * * 14149: ************************************************************************/ 14150: 14151: #ifdef LIBXML_XPATH_ENABLED 14152: #include <libxml/xpath.h> 14153: #endif 14154: 14155: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14156: static int xmlParserInitialized = 0; 14157: 14158: /** 14159: * xmlInitParser: 14160: * 14161: * Initialization function for the XML parser. 14162: * This is not reentrant. Call once before processing in case of 14163: * use in multithreaded programs. 14164: */ 14165: 14166: void 14167: xmlInitParser(void) { 14168: if (xmlParserInitialized != 0) 14169: return; 14170: 14171: #ifdef LIBXML_THREAD_ENABLED 14172: __xmlGlobalInitMutexLock(); 14173: if (xmlParserInitialized == 0) { 14174: #endif 14175: xmlInitThreads(); 14176: xmlInitGlobals(); 14177: if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14178: (xmlGenericError == NULL)) 14179: initGenericErrorDefaultFunc(NULL); 14180: xmlInitMemory(); 14181: xmlInitializeDict(); 14182: xmlInitCharEncodingHandlers(); 14183: xmlDefaultSAXHandlerInit(); 14184: xmlRegisterDefaultInputCallbacks(); 14185: #ifdef LIBXML_OUTPUT_ENABLED 14186: xmlRegisterDefaultOutputCallbacks(); 14187: #endif /* LIBXML_OUTPUT_ENABLED */ 14188: #ifdef LIBXML_HTML_ENABLED 14189: htmlInitAutoClose(); 14190: htmlDefaultSAXHandlerInit(); 14191: #endif 14192: #ifdef LIBXML_XPATH_ENABLED 14193: xmlXPathInit(); 14194: #endif 14195: xmlParserInitialized = 1; 14196: #ifdef LIBXML_THREAD_ENABLED 14197: } 14198: __xmlGlobalInitMutexUnlock(); 14199: #endif 14200: } 14201: 14202: /** 14203: * xmlCleanupParser: 14204: * 14205: * This function name is somewhat misleading. It does not clean up 14206: * parser state, it cleans up memory allocated by the library itself. 14207: * It is a cleanup function for the XML library. It tries to reclaim all 14208: * related global memory allocated for the library processing. 14209: * It doesn't deallocate any document related memory. One should 14210: * call xmlCleanupParser() only when the process has finished using 14211: * the library and all XML/HTML documents built with it. 14212: * See also xmlInitParser() which has the opposite function of preparing 14213: * the library for operations. 14214: * 14215: * WARNING: if your application is multithreaded or has plugin support 14216: * calling this may crash the application if another thread or 14217: * a plugin is still using libxml2. It's sometimes very hard to 14218: * guess if libxml2 is in use in the application, some libraries 14219: * or plugins may use it without notice. In case of doubt abstain 14220: * from calling this function or do it just before calling exit() 14221: * to avoid leak reports from valgrind ! 14222: */ 14223: 14224: void 14225: xmlCleanupParser(void) { 14226: if (!xmlParserInitialized) 14227: return; 14228: 14229: xmlCleanupCharEncodingHandlers(); 14230: #ifdef LIBXML_CATALOG_ENABLED 14231: xmlCatalogCleanup(); 14232: #endif 14233: xmlDictCleanup(); 14234: xmlCleanupInputCallbacks(); 14235: #ifdef LIBXML_OUTPUT_ENABLED 14236: xmlCleanupOutputCallbacks(); 14237: #endif 14238: #ifdef LIBXML_SCHEMAS_ENABLED 14239: xmlSchemaCleanupTypes(); 14240: xmlRelaxNGCleanupTypes(); 14241: #endif 14242: xmlCleanupGlobals(); 14243: xmlResetLastError(); 14244: xmlCleanupThreads(); /* must be last if called not from the main thread */ 14245: xmlCleanupMemory(); 14246: xmlParserInitialized = 0; 14247: } 14248: 14249: /************************************************************************ 14250: * * 14251: * New set (2.6.0) of simpler and more flexible APIs * 14252: * * 14253: ************************************************************************/ 14254: 14255: /** 14256: * DICT_FREE: 14257: * @str: a string 14258: * 14259: * Free a string if it is not owned by the "dict" dictionnary in the 14260: * current scope 14261: */ 14262: #define DICT_FREE(str) \ 14263: if ((str) && ((!dict) || \ 14264: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14265: xmlFree((char *)(str)); 14266: 14267: /** 14268: * xmlCtxtReset: 14269: * @ctxt: an XML parser context 14270: * 14271: * Reset a parser context 14272: */ 14273: void 14274: xmlCtxtReset(xmlParserCtxtPtr ctxt) 14275: { 14276: xmlParserInputPtr input; 14277: xmlDictPtr dict; 14278: 14279: if (ctxt == NULL) 14280: return; 14281: 14282: dict = ctxt->dict; 14283: 14284: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14285: xmlFreeInputStream(input); 14286: } 14287: ctxt->inputNr = 0; 14288: ctxt->input = NULL; 14289: 14290: ctxt->spaceNr = 0; 14291: if (ctxt->spaceTab != NULL) { 14292: ctxt->spaceTab[0] = -1; 14293: ctxt->space = &ctxt->spaceTab[0]; 14294: } else { 14295: ctxt->space = NULL; 14296: } 14297: 14298: 14299: ctxt->nodeNr = 0; 14300: ctxt->node = NULL; 14301: 14302: ctxt->nameNr = 0; 14303: ctxt->name = NULL; 14304: 14305: DICT_FREE(ctxt->version); 14306: ctxt->version = NULL; 14307: DICT_FREE(ctxt->encoding); 14308: ctxt->encoding = NULL; 14309: DICT_FREE(ctxt->directory); 14310: ctxt->directory = NULL; 14311: DICT_FREE(ctxt->extSubURI); 14312: ctxt->extSubURI = NULL; 14313: DICT_FREE(ctxt->extSubSystem); 14314: ctxt->extSubSystem = NULL; 14315: if (ctxt->myDoc != NULL) 14316: xmlFreeDoc(ctxt->myDoc); 14317: ctxt->myDoc = NULL; 14318: 14319: ctxt->standalone = -1; 14320: ctxt->hasExternalSubset = 0; 14321: ctxt->hasPErefs = 0; 14322: ctxt->html = 0; 14323: ctxt->external = 0; 14324: ctxt->instate = XML_PARSER_START; 14325: ctxt->token = 0; 14326: 14327: ctxt->wellFormed = 1; 14328: ctxt->nsWellFormed = 1; 14329: ctxt->disableSAX = 0; 14330: ctxt->valid = 1; 14331: #if 0 14332: ctxt->vctxt.userData = ctxt; 14333: ctxt->vctxt.error = xmlParserValidityError; 14334: ctxt->vctxt.warning = xmlParserValidityWarning; 14335: #endif 14336: ctxt->record_info = 0; 14337: ctxt->nbChars = 0; 14338: ctxt->checkIndex = 0; 14339: ctxt->inSubset = 0; 14340: ctxt->errNo = XML_ERR_OK; 14341: ctxt->depth = 0; 14342: ctxt->charset = XML_CHAR_ENCODING_UTF8; 14343: ctxt->catalogs = NULL; 14344: ctxt->nbentities = 0; 14345: ctxt->sizeentities = 0; 14346: xmlInitNodeInfoSeq(&ctxt->node_seq); 14347: 14348: if (ctxt->attsDefault != NULL) { 14349: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14350: ctxt->attsDefault = NULL; 14351: } 14352: if (ctxt->attsSpecial != NULL) { 14353: xmlHashFree(ctxt->attsSpecial, NULL); 14354: ctxt->attsSpecial = NULL; 14355: } 14356: 14357: #ifdef LIBXML_CATALOG_ENABLED 14358: if (ctxt->catalogs != NULL) 14359: xmlCatalogFreeLocal(ctxt->catalogs); 14360: #endif 14361: if (ctxt->lastError.code != XML_ERR_OK) 14362: xmlResetError(&ctxt->lastError); 14363: } 14364: 14365: /** 14366: * xmlCtxtResetPush: 14367: * @ctxt: an XML parser context 14368: * @chunk: a pointer to an array of chars 14369: * @size: number of chars in the array 14370: * @filename: an optional file name or URI 14371: * @encoding: the document encoding, or NULL 14372: * 14373: * Reset a push parser context 14374: * 14375: * Returns 0 in case of success and 1 in case of error 14376: */ 14377: int 14378: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14379: int size, const char *filename, const char *encoding) 14380: { 14381: xmlParserInputPtr inputStream; 14382: xmlParserInputBufferPtr buf; 14383: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14384: 14385: if (ctxt == NULL) 14386: return(1); 14387: 14388: if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14389: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14390: 14391: buf = xmlAllocParserInputBuffer(enc); 14392: if (buf == NULL) 14393: return(1); 14394: 14395: if (ctxt == NULL) { 14396: xmlFreeParserInputBuffer(buf); 14397: return(1); 14398: } 14399: 14400: xmlCtxtReset(ctxt); 14401: 14402: if (ctxt->pushTab == NULL) { 14403: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14404: sizeof(xmlChar *)); 14405: if (ctxt->pushTab == NULL) { 14406: xmlErrMemory(ctxt, NULL); 14407: xmlFreeParserInputBuffer(buf); 14408: return(1); 14409: } 14410: } 14411: 14412: if (filename == NULL) { 14413: ctxt->directory = NULL; 14414: } else { 14415: ctxt->directory = xmlParserGetDirectory(filename); 14416: } 14417: 14418: inputStream = xmlNewInputStream(ctxt); 14419: if (inputStream == NULL) { 14420: xmlFreeParserInputBuffer(buf); 14421: return(1); 14422: } 14423: 14424: if (filename == NULL) 14425: inputStream->filename = NULL; 14426: else 14427: inputStream->filename = (char *) 14428: xmlCanonicPath((const xmlChar *) filename); 14429: inputStream->buf = buf; 14430: inputStream->base = inputStream->buf->buffer->content; 14431: inputStream->cur = inputStream->buf->buffer->content; 14432: inputStream->end = 14433: &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14434: 14435: inputPush(ctxt, inputStream); 14436: 14437: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14438: (ctxt->input->buf != NULL)) { 14439: int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14440: int cur = ctxt->input->cur - ctxt->input->base; 14441: 14442: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14443: 14444: ctxt->input->base = ctxt->input->buf->buffer->content + base; 14445: ctxt->input->cur = ctxt->input->base + cur; 14446: ctxt->input->end = 14447: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14448: use]; 14449: #ifdef DEBUG_PUSH 14450: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14451: #endif 14452: } 14453: 14454: if (encoding != NULL) { 14455: xmlCharEncodingHandlerPtr hdlr; 14456: 14457: if (ctxt->encoding != NULL) 14458: xmlFree((xmlChar *) ctxt->encoding); 14459: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14460: 14461: hdlr = xmlFindCharEncodingHandler(encoding); 14462: if (hdlr != NULL) { 14463: xmlSwitchToEncoding(ctxt, hdlr); 14464: } else { 14465: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14466: "Unsupported encoding %s\n", BAD_CAST encoding); 14467: } 14468: } else if (enc != XML_CHAR_ENCODING_NONE) { 14469: xmlSwitchEncoding(ctxt, enc); 14470: } 14471: 14472: return(0); 14473: } 14474: 14475: 14476: /** 14477: * xmlCtxtUseOptionsInternal: 14478: * @ctxt: an XML parser context 14479: * @options: a combination of xmlParserOption 14480: * @encoding: the user provided encoding to use 14481: * 14482: * Applies the options to the parser context 14483: * 14484: * Returns 0 in case of success, the set of unknown or unimplemented options 14485: * in case of error. 14486: */ 14487: static int 14488: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14489: { 14490: if (ctxt == NULL) 14491: return(-1); 14492: if (encoding != NULL) { 14493: if (ctxt->encoding != NULL) 14494: xmlFree((xmlChar *) ctxt->encoding); 14495: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14496: } 14497: if (options & XML_PARSE_RECOVER) { 14498: ctxt->recovery = 1; 14499: options -= XML_PARSE_RECOVER; 14500: ctxt->options |= XML_PARSE_RECOVER; 14501: } else 14502: ctxt->recovery = 0; 14503: if (options & XML_PARSE_DTDLOAD) { 14504: ctxt->loadsubset = XML_DETECT_IDS; 14505: options -= XML_PARSE_DTDLOAD; 14506: ctxt->options |= XML_PARSE_DTDLOAD; 14507: } else 14508: ctxt->loadsubset = 0; 14509: if (options & XML_PARSE_DTDATTR) { 14510: ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14511: options -= XML_PARSE_DTDATTR; 14512: ctxt->options |= XML_PARSE_DTDATTR; 14513: } 14514: if (options & XML_PARSE_NOENT) { 14515: ctxt->replaceEntities = 1; 14516: /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14517: options -= XML_PARSE_NOENT; 14518: ctxt->options |= XML_PARSE_NOENT; 14519: } else 14520: ctxt->replaceEntities = 0; 14521: if (options & XML_PARSE_PEDANTIC) { 14522: ctxt->pedantic = 1; 14523: options -= XML_PARSE_PEDANTIC; 14524: ctxt->options |= XML_PARSE_PEDANTIC; 14525: } else 14526: ctxt->pedantic = 0; 14527: if (options & XML_PARSE_NOBLANKS) { 14528: ctxt->keepBlanks = 0; 14529: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14530: options -= XML_PARSE_NOBLANKS; 14531: ctxt->options |= XML_PARSE_NOBLANKS; 14532: } else 14533: ctxt->keepBlanks = 1; 14534: if (options & XML_PARSE_DTDVALID) { 14535: ctxt->validate = 1; 14536: if (options & XML_PARSE_NOWARNING) 14537: ctxt->vctxt.warning = NULL; 14538: if (options & XML_PARSE_NOERROR) 14539: ctxt->vctxt.error = NULL; 14540: options -= XML_PARSE_DTDVALID; 14541: ctxt->options |= XML_PARSE_DTDVALID; 14542: } else 14543: ctxt->validate = 0; 14544: if (options & XML_PARSE_NOWARNING) { 14545: ctxt->sax->warning = NULL; 14546: options -= XML_PARSE_NOWARNING; 14547: } 14548: if (options & XML_PARSE_NOERROR) { 14549: ctxt->sax->error = NULL; 14550: ctxt->sax->fatalError = NULL; 14551: options -= XML_PARSE_NOERROR; 14552: } 14553: #ifdef LIBXML_SAX1_ENABLED 14554: if (options & XML_PARSE_SAX1) { 14555: ctxt->sax->startElement = xmlSAX2StartElement; 14556: ctxt->sax->endElement = xmlSAX2EndElement; 14557: ctxt->sax->startElementNs = NULL; 14558: ctxt->sax->endElementNs = NULL; 14559: ctxt->sax->initialized = 1; 14560: options -= XML_PARSE_SAX1; 14561: ctxt->options |= XML_PARSE_SAX1; 14562: } 14563: #endif /* LIBXML_SAX1_ENABLED */ 14564: if (options & XML_PARSE_NODICT) { 14565: ctxt->dictNames = 0; 14566: options -= XML_PARSE_NODICT; 14567: ctxt->options |= XML_PARSE_NODICT; 14568: } else { 14569: ctxt->dictNames = 1; 14570: } 14571: if (options & XML_PARSE_NOCDATA) { 14572: ctxt->sax->cdataBlock = NULL; 14573: options -= XML_PARSE_NOCDATA; 14574: ctxt->options |= XML_PARSE_NOCDATA; 14575: } 14576: if (options & XML_PARSE_NSCLEAN) { 14577: ctxt->options |= XML_PARSE_NSCLEAN; 14578: options -= XML_PARSE_NSCLEAN; 14579: } 14580: if (options & XML_PARSE_NONET) { 14581: ctxt->options |= XML_PARSE_NONET; 14582: options -= XML_PARSE_NONET; 14583: } 14584: if (options & XML_PARSE_COMPACT) { 14585: ctxt->options |= XML_PARSE_COMPACT; 14586: options -= XML_PARSE_COMPACT; 14587: } 14588: if (options & XML_PARSE_OLD10) { 14589: ctxt->options |= XML_PARSE_OLD10; 14590: options -= XML_PARSE_OLD10; 14591: } 14592: if (options & XML_PARSE_NOBASEFIX) { 14593: ctxt->options |= XML_PARSE_NOBASEFIX; 14594: options -= XML_PARSE_NOBASEFIX; 14595: } 14596: if (options & XML_PARSE_HUGE) { 14597: ctxt->options |= XML_PARSE_HUGE; 14598: options -= XML_PARSE_HUGE; 14599: } 14600: if (options & XML_PARSE_OLDSAX) { 14601: ctxt->options |= XML_PARSE_OLDSAX; 14602: options -= XML_PARSE_OLDSAX; 14603: } 14604: if (options & XML_PARSE_IGNORE_ENC) { 14605: ctxt->options |= XML_PARSE_IGNORE_ENC; 14606: options -= XML_PARSE_IGNORE_ENC; 14607: } 14608: ctxt->linenumbers = 1; 14609: return (options); 14610: } 14611: 14612: /** 14613: * xmlCtxtUseOptions: 14614: * @ctxt: an XML parser context 14615: * @options: a combination of xmlParserOption 14616: * 14617: * Applies the options to the parser context 14618: * 14619: * Returns 0 in case of success, the set of unknown or unimplemented options 14620: * in case of error. 14621: */ 14622: int 14623: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14624: { 14625: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14626: } 14627: 14628: /** 14629: * xmlDoRead: 14630: * @ctxt: an XML parser context 14631: * @URL: the base URL to use for the document 14632: * @encoding: the document encoding, or NULL 14633: * @options: a combination of xmlParserOption 14634: * @reuse: keep the context for reuse 14635: * 14636: * Common front-end for the xmlRead functions 14637: * 14638: * Returns the resulting document tree or NULL 14639: */ 14640: static xmlDocPtr 14641: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14642: int options, int reuse) 14643: { 14644: xmlDocPtr ret; 14645: 14646: xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14647: if (encoding != NULL) { 14648: xmlCharEncodingHandlerPtr hdlr; 14649: 14650: hdlr = xmlFindCharEncodingHandler(encoding); 14651: if (hdlr != NULL) 14652: xmlSwitchToEncoding(ctxt, hdlr); 14653: } 14654: if ((URL != NULL) && (ctxt->input != NULL) && 14655: (ctxt->input->filename == NULL)) 14656: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14657: xmlParseDocument(ctxt); 14658: if ((ctxt->wellFormed) || ctxt->recovery) 14659: ret = ctxt->myDoc; 14660: else { 14661: ret = NULL; 14662: if (ctxt->myDoc != NULL) { 14663: xmlFreeDoc(ctxt->myDoc); 14664: } 14665: } 14666: ctxt->myDoc = NULL; 14667: if (!reuse) { 14668: xmlFreeParserCtxt(ctxt); 14669: } 14670: 14671: return (ret); 14672: } 14673: 14674: /** 14675: * xmlReadDoc: 14676: * @cur: a pointer to a zero terminated string 14677: * @URL: the base URL to use for the document 14678: * @encoding: the document encoding, or NULL 14679: * @options: a combination of xmlParserOption 14680: * 14681: * parse an XML in-memory document and build a tree. 14682: * 14683: * Returns the resulting document tree 14684: */ 14685: xmlDocPtr 14686: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14687: { 14688: xmlParserCtxtPtr ctxt; 14689: 14690: if (cur == NULL) 14691: return (NULL); 14692: 14693: ctxt = xmlCreateDocParserCtxt(cur); 14694: if (ctxt == NULL) 14695: return (NULL); 14696: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14697: } 14698: 14699: /** 14700: * xmlReadFile: 14701: * @filename: a file or URL 14702: * @encoding: the document encoding, or NULL 14703: * @options: a combination of xmlParserOption 14704: * 14705: * parse an XML file from the filesystem or the network. 14706: * 14707: * Returns the resulting document tree 14708: */ 14709: xmlDocPtr 14710: xmlReadFile(const char *filename, const char *encoding, int options) 14711: { 14712: xmlParserCtxtPtr ctxt; 14713: 14714: ctxt = xmlCreateURLParserCtxt(filename, options); 14715: if (ctxt == NULL) 14716: return (NULL); 14717: return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14718: } 14719: 14720: /** 14721: * xmlReadMemory: 14722: * @buffer: a pointer to a char array 14723: * @size: the size of the array 14724: * @URL: the base URL to use for the document 14725: * @encoding: the document encoding, or NULL 14726: * @options: a combination of xmlParserOption 14727: * 14728: * parse an XML in-memory document and build a tree. 14729: * 14730: * Returns the resulting document tree 14731: */ 14732: xmlDocPtr 14733: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14734: { 14735: xmlParserCtxtPtr ctxt; 14736: 14737: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14738: if (ctxt == NULL) 14739: return (NULL); 14740: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14741: } 14742: 14743: /** 14744: * xmlReadFd: 14745: * @fd: an open file descriptor 14746: * @URL: the base URL to use for the document 14747: * @encoding: the document encoding, or NULL 14748: * @options: a combination of xmlParserOption 14749: * 14750: * parse an XML from a file descriptor and build a tree. 14751: * NOTE that the file descriptor will not be closed when the 14752: * reader is closed or reset. 14753: * 14754: * Returns the resulting document tree 14755: */ 14756: xmlDocPtr 14757: xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14758: { 14759: xmlParserCtxtPtr ctxt; 14760: xmlParserInputBufferPtr input; 14761: xmlParserInputPtr stream; 14762: 14763: if (fd < 0) 14764: return (NULL); 14765: 14766: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14767: if (input == NULL) 14768: return (NULL); 14769: input->closecallback = NULL; 14770: ctxt = xmlNewParserCtxt(); 14771: if (ctxt == NULL) { 14772: xmlFreeParserInputBuffer(input); 14773: return (NULL); 14774: } 14775: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14776: if (stream == NULL) { 14777: xmlFreeParserInputBuffer(input); 14778: xmlFreeParserCtxt(ctxt); 14779: return (NULL); 14780: } 14781: inputPush(ctxt, stream); 14782: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14783: } 14784: 14785: /** 14786: * xmlReadIO: 14787: * @ioread: an I/O read function 14788: * @ioclose: an I/O close function 14789: * @ioctx: an I/O handler 14790: * @URL: the base URL to use for the document 14791: * @encoding: the document encoding, or NULL 14792: * @options: a combination of xmlParserOption 14793: * 14794: * parse an XML document from I/O functions and source and build a tree. 14795: * 14796: * Returns the resulting document tree 14797: */ 14798: xmlDocPtr 14799: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14800: void *ioctx, const char *URL, const char *encoding, int options) 14801: { 14802: xmlParserCtxtPtr ctxt; 14803: xmlParserInputBufferPtr input; 14804: xmlParserInputPtr stream; 14805: 14806: if (ioread == NULL) 14807: return (NULL); 14808: 14809: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14810: XML_CHAR_ENCODING_NONE); 14811: if (input == NULL) { 14812: if (ioclose != NULL) 14813: ioclose(ioctx); 14814: return (NULL); 14815: } 14816: ctxt = xmlNewParserCtxt(); 14817: if (ctxt == NULL) { 14818: xmlFreeParserInputBuffer(input); 14819: return (NULL); 14820: } 14821: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14822: if (stream == NULL) { 14823: xmlFreeParserInputBuffer(input); 14824: xmlFreeParserCtxt(ctxt); 14825: return (NULL); 14826: } 14827: inputPush(ctxt, stream); 14828: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14829: } 14830: 14831: /** 14832: * xmlCtxtReadDoc: 14833: * @ctxt: an XML parser context 14834: * @cur: a pointer to a zero terminated string 14835: * @URL: the base URL to use for the document 14836: * @encoding: the document encoding, or NULL 14837: * @options: a combination of xmlParserOption 14838: * 14839: * parse an XML in-memory document and build a tree. 14840: * This reuses the existing @ctxt parser context 14841: * 14842: * Returns the resulting document tree 14843: */ 14844: xmlDocPtr 14845: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14846: const char *URL, const char *encoding, int options) 14847: { 14848: xmlParserInputPtr stream; 14849: 14850: if (cur == NULL) 14851: return (NULL); 14852: if (ctxt == NULL) 14853: return (NULL); 14854: 14855: xmlCtxtReset(ctxt); 14856: 14857: stream = xmlNewStringInputStream(ctxt, cur); 14858: if (stream == NULL) { 14859: return (NULL); 14860: } 14861: inputPush(ctxt, stream); 14862: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14863: } 14864: 14865: /** 14866: * xmlCtxtReadFile: 14867: * @ctxt: an XML parser context 14868: * @filename: a file or URL 14869: * @encoding: the document encoding, or NULL 14870: * @options: a combination of xmlParserOption 14871: * 14872: * parse an XML file from the filesystem or the network. 14873: * This reuses the existing @ctxt parser context 14874: * 14875: * Returns the resulting document tree 14876: */ 14877: xmlDocPtr 14878: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14879: const char *encoding, int options) 14880: { 14881: xmlParserInputPtr stream; 14882: 14883: if (filename == NULL) 14884: return (NULL); 14885: if (ctxt == NULL) 14886: return (NULL); 14887: 14888: xmlCtxtReset(ctxt); 14889: 14890: stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14891: if (stream == NULL) { 14892: return (NULL); 14893: } 14894: inputPush(ctxt, stream); 14895: return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14896: } 14897: 14898: /** 14899: * xmlCtxtReadMemory: 14900: * @ctxt: an XML parser context 14901: * @buffer: a pointer to a char array 14902: * @size: the size of the array 14903: * @URL: the base URL to use for the document 14904: * @encoding: the document encoding, or NULL 14905: * @options: a combination of xmlParserOption 14906: * 14907: * parse an XML in-memory document and build a tree. 14908: * This reuses the existing @ctxt parser context 14909: * 14910: * Returns the resulting document tree 14911: */ 14912: xmlDocPtr 14913: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14914: const char *URL, const char *encoding, int options) 14915: { 14916: xmlParserInputBufferPtr input; 14917: xmlParserInputPtr stream; 14918: 14919: if (ctxt == NULL) 14920: return (NULL); 14921: if (buffer == NULL) 14922: return (NULL); 14923: 14924: xmlCtxtReset(ctxt); 14925: 14926: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14927: if (input == NULL) { 14928: return(NULL); 14929: } 14930: 14931: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14932: if (stream == NULL) { 14933: xmlFreeParserInputBuffer(input); 14934: return(NULL); 14935: } 14936: 14937: inputPush(ctxt, stream); 14938: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14939: } 14940: 14941: /** 14942: * xmlCtxtReadFd: 14943: * @ctxt: an XML parser context 14944: * @fd: an open file descriptor 14945: * @URL: the base URL to use for the document 14946: * @encoding: the document encoding, or NULL 14947: * @options: a combination of xmlParserOption 14948: * 14949: * parse an XML from a file descriptor and build a tree. 14950: * This reuses the existing @ctxt parser context 14951: * NOTE that the file descriptor will not be closed when the 14952: * reader is closed or reset. 14953: * 14954: * Returns the resulting document tree 14955: */ 14956: xmlDocPtr 14957: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 14958: const char *URL, const char *encoding, int options) 14959: { 14960: xmlParserInputBufferPtr input; 14961: xmlParserInputPtr stream; 14962: 14963: if (fd < 0) 14964: return (NULL); 14965: if (ctxt == NULL) 14966: return (NULL); 14967: 14968: xmlCtxtReset(ctxt); 14969: 14970: 14971: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14972: if (input == NULL) 14973: return (NULL); 14974: input->closecallback = NULL; 14975: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14976: if (stream == NULL) { 14977: xmlFreeParserInputBuffer(input); 14978: return (NULL); 14979: } 14980: inputPush(ctxt, stream); 14981: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14982: } 14983: 14984: /** 14985: * xmlCtxtReadIO: 14986: * @ctxt: an XML parser context 14987: * @ioread: an I/O read function 14988: * @ioclose: an I/O close function 14989: * @ioctx: an I/O handler 14990: * @URL: the base URL to use for the document 14991: * @encoding: the document encoding, or NULL 14992: * @options: a combination of xmlParserOption 14993: * 14994: * parse an XML document from I/O functions and source and build a tree. 14995: * This reuses the existing @ctxt parser context 14996: * 14997: * Returns the resulting document tree 14998: */ 14999: xmlDocPtr 15000: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15001: xmlInputCloseCallback ioclose, void *ioctx, 15002: const char *URL, 15003: const char *encoding, int options) 15004: { 15005: xmlParserInputBufferPtr input; 15006: xmlParserInputPtr stream; 15007: 15008: if (ioread == NULL) 15009: return (NULL); 15010: if (ctxt == NULL) 15011: return (NULL); 15012: 15013: xmlCtxtReset(ctxt); 15014: 15015: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15016: XML_CHAR_ENCODING_NONE); 15017: if (input == NULL) { 15018: if (ioclose != NULL) 15019: ioclose(ioctx); 15020: return (NULL); 15021: } 15022: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15023: if (stream == NULL) { 15024: xmlFreeParserInputBuffer(input); 15025: return (NULL); 15026: } 15027: inputPush(ctxt, stream); 15028: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15029: } 15030: 15031: #define bottom_parser 15032: #include "elfgcchack.h"