embedaddon/libxml2/parser.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / parser.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:29 2014 UTC (9 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD

libxml2 2.9.1

1: /* 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3: * implemented on top of the SAX interfaces 4: * 5: * References: 6: * The XML specification: 7: * http://www.w3.org/TR/REC-xml 8: * Original 1.0 version: 9: * http://www.w3.org/TR/1998/REC-xml-19980210 10: * XML second edition working draft 11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12: * 13: * Okay this is a big file, the parser core is around 7000 lines, then it 14: * is followed by the progressive parser top routines, then the various 15: * high level APIs to call the parser and a few miscellaneous functions. 16: * A number of helper functions and deprecated ones have been moved to 17: * parserInternals.c to reduce this file size. 18: * As much as possible the functions are associated with their relative 19: * production in the XML specification. A few productions defining the 20: * different ranges of character are actually implanted either in 21: * parserInternals.h or parserInternals.c 22: * The DOM tree build is realized from the default SAX callbacks in 23: * the module SAX.c. 24: * The routines doing the validation checks are in valid.c and called either 25: * from the SAX callbacks or as standalone functions using a preparsed 26: * document. 27: * 28: * See Copyright for the status of this software. 29: * 30: * daniel@veillard.com 31: */ 32: 33: #define IN_LIBXML 34: #include "libxml.h" 35: 36: #if defined(WIN32) && !defined (__CYGWIN__) 37: #define XML_DIR_SEP '\\' 38: #else 39: #define XML_DIR_SEP '/' 40: #endif 41: 42: #include <stdlib.h> 43: #include <limits.h> 44: #include <string.h> 45: #include <stdarg.h> 46: #include <libxml/xmlmemory.h> 47: #include <libxml/threads.h> 48: #include <libxml/globals.h> 49: #include <libxml/tree.h> 50: #include <libxml/parser.h> 51: #include <libxml/parserInternals.h> 52: #include <libxml/valid.h> 53: #include <libxml/entities.h> 54: #include <libxml/xmlerror.h> 55: #include <libxml/encoding.h> 56: #include <libxml/xmlIO.h> 57: #include <libxml/uri.h> 58: #ifdef LIBXML_CATALOG_ENABLED 59: #include <libxml/catalog.h> 60: #endif 61: #ifdef LIBXML_SCHEMAS_ENABLED 62: #include <libxml/xmlschemastypes.h> 63: #include <libxml/relaxng.h> 64: #endif 65: #ifdef HAVE_CTYPE_H 66: #include <ctype.h> 67: #endif 68: #ifdef HAVE_STDLIB_H 69: #include <stdlib.h> 70: #endif 71: #ifdef HAVE_SYS_STAT_H 72: #include <sys/stat.h> 73: #endif 74: #ifdef HAVE_FCNTL_H 75: #include <fcntl.h> 76: #endif 77: #ifdef HAVE_UNISTD_H 78: #include <unistd.h> 79: #endif 80: #ifdef HAVE_ZLIB_H 81: #include <zlib.h> 82: #endif 83: #ifdef HAVE_LZMA_H 84: #include <lzma.h> 85: #endif 86: 87: #include "buf.h" 88: #include "enc.h" 89: 90: static void 91: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92: 93: static xmlParserCtxtPtr 94: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95: const xmlChar *base, xmlParserCtxtPtr pctx); 96: 97: /************************************************************************ 98: * * 99: * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 100: * * 101: ************************************************************************/ 102: 103: #define XML_PARSER_BIG_ENTITY 1000 104: #define XML_PARSER_LOT_ENTITY 5000 105: 106: /* 107: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 108: * replacement over the size in byte of the input indicates that you have 109: * and eponential behaviour. A value of 10 correspond to at least 3 entity 110: * replacement per byte of input. 111: */ 112: #define XML_PARSER_NON_LINEAR 10 113: 114: /* 115: * xmlParserEntityCheck 116: * 117: * Function to check non-linear entity expansion behaviour 118: * This is here to detect and stop exponential linear entity expansion 119: * This is not a limitation of the parser but a safety 120: * boundary feature. It can be disabled with the XML_PARSE_HUGE 121: * parser option. 122: */ 123: static int 124: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 125: xmlEntityPtr ent, size_t replacement) 126: { 127: size_t consumed = 0; 128: 129: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 130: return (0); 131: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 132: return (1); 133: if (replacement != 0) { 134: if (replacement < XML_MAX_TEXT_LENGTH) 135: return(0); 136: 137: /* 138: * If the volume of entity copy reaches 10 times the 139: * amount of parsed data and over the large text threshold 140: * then that's very likely to be an abuse. 141: */ 142: if (ctxt->input != NULL) { 143: consumed = ctxt->input->consumed + 144: (ctxt->input->cur - ctxt->input->base); 145: } 146: consumed += ctxt->sizeentities; 147: 148: if (replacement < XML_PARSER_NON_LINEAR * consumed) 149: return(0); 150: } else if (size != 0) { 151: /* 152: * Do the check based on the replacement size of the entity 153: */ 154: if (size < XML_PARSER_BIG_ENTITY) 155: return(0); 156: 157: /* 158: * A limit on the amount of text data reasonably used 159: */ 160: if (ctxt->input != NULL) { 161: consumed = ctxt->input->consumed + 162: (ctxt->input->cur - ctxt->input->base); 163: } 164: consumed += ctxt->sizeentities; 165: 166: if ((size < XML_PARSER_NON_LINEAR * consumed) && 167: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 168: return (0); 169: } else if (ent != NULL) { 170: /* 171: * use the number of parsed entities in the replacement 172: */ 173: size = ent->checked / 2; 174: 175: /* 176: * The amount of data parsed counting entities size only once 177: */ 178: if (ctxt->input != NULL) { 179: consumed = ctxt->input->consumed + 180: (ctxt->input->cur - ctxt->input->base); 181: } 182: consumed += ctxt->sizeentities; 183: 184: /* 185: * Check the density of entities for the amount of data 186: * knowing an entity reference will take at least 3 bytes 187: */ 188: if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 189: return (0); 190: } else { 191: /* 192: * strange we got no data for checking just return 193: */ 194: return (0); 195: } 196: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 197: return (1); 198: } 199: 200: /** 201: * xmlParserMaxDepth: 202: * 203: * arbitrary depth limit for the XML documents that we allow to 204: * process. This is not a limitation of the parser but a safety 205: * boundary feature. It can be disabled with the XML_PARSE_HUGE 206: * parser option. 207: */ 208: unsigned int xmlParserMaxDepth = 256; 209: 210: 211: 212: #define SAX2 1 213: #define XML_PARSER_BIG_BUFFER_SIZE 300 214: #define XML_PARSER_BUFFER_SIZE 100 215: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 216: 217: /** 218: * XML_PARSER_CHUNK_SIZE 219: * 220: * When calling GROW that's the minimal amount of data 221: * the parser expected to have received. It is not a hard 222: * limit but an optimization when reading strings like Names 223: * It is not strictly needed as long as inputs available characters 224: * are followed by 0, which should be provided by the I/O level 225: */ 226: #define XML_PARSER_CHUNK_SIZE 100 227: 228: /* 229: * List of XML prefixed PI allowed by W3C specs 230: */ 231: 232: static const char *xmlW3CPIs[] = { 233: "xml-stylesheet", 234: "xml-model", 235: NULL 236: }; 237: 238: 239: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 240: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 241: const xmlChar **str); 242: 243: static xmlParserErrors 244: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 245: xmlSAXHandlerPtr sax, 246: void *user_data, int depth, const xmlChar *URL, 247: const xmlChar *ID, xmlNodePtr *list); 248: 249: static int 250: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 251: const char *encoding); 252: #ifdef LIBXML_LEGACY_ENABLED 253: static void 254: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 255: xmlNodePtr lastNode); 256: #endif /* LIBXML_LEGACY_ENABLED */ 257: 258: static xmlParserErrors 259: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 260: const xmlChar *string, void *user_data, xmlNodePtr *lst); 261: 262: static int 263: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 264: 265: /************************************************************************ 266: * * 267: * Some factorized error routines * 268: * * 269: ************************************************************************/ 270: 271: /** 272: * xmlErrAttributeDup: 273: * @ctxt: an XML parser context 274: * @prefix: the attribute prefix 275: * @localname: the attribute localname 276: * 277: * Handle a redefinition of attribute error 278: */ 279: static void 280: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 281: const xmlChar * localname) 282: { 283: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 284: (ctxt->instate == XML_PARSER_EOF)) 285: return; 286: if (ctxt != NULL) 287: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 288: 289: if (prefix == NULL) 290: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 291: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 292: (const char *) localname, NULL, NULL, 0, 0, 293: "Attribute %s redefined\n", localname); 294: else 295: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 296: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 297: (const char *) prefix, (const char *) localname, 298: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 299: localname); 300: if (ctxt != NULL) { 301: ctxt->wellFormed = 0; 302: if (ctxt->recovery == 0) 303: ctxt->disableSAX = 1; 304: } 305: } 306: 307: /** 308: * xmlFatalErr: 309: * @ctxt: an XML parser context 310: * @error: the error number 311: * @extra: extra information string 312: * 313: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 314: */ 315: static void 316: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 317: { 318: const char *errmsg; 319: char errstr[129] = ""; 320: 321: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 322: (ctxt->instate == XML_PARSER_EOF)) 323: return; 324: switch (error) { 325: case XML_ERR_INVALID_HEX_CHARREF: 326: errmsg = "CharRef: invalid hexadecimal value"; 327: break; 328: case XML_ERR_INVALID_DEC_CHARREF: 329: errmsg = "CharRef: invalid decimal value"; 330: break; 331: case XML_ERR_INVALID_CHARREF: 332: errmsg = "CharRef: invalid value"; 333: break; 334: case XML_ERR_INTERNAL_ERROR: 335: errmsg = "internal error"; 336: break; 337: case XML_ERR_PEREF_AT_EOF: 338: errmsg = "PEReference at end of document"; 339: break; 340: case XML_ERR_PEREF_IN_PROLOG: 341: errmsg = "PEReference in prolog"; 342: break; 343: case XML_ERR_PEREF_IN_EPILOG: 344: errmsg = "PEReference in epilog"; 345: break; 346: case XML_ERR_PEREF_NO_NAME: 347: errmsg = "PEReference: no name"; 348: break; 349: case XML_ERR_PEREF_SEMICOL_MISSING: 350: errmsg = "PEReference: expecting ';'"; 351: break; 352: case XML_ERR_ENTITY_LOOP: 353: errmsg = "Detected an entity reference loop"; 354: break; 355: case XML_ERR_ENTITY_NOT_STARTED: 356: errmsg = "EntityValue: \" or ' expected"; 357: break; 358: case XML_ERR_ENTITY_PE_INTERNAL: 359: errmsg = "PEReferences forbidden in internal subset"; 360: break; 361: case XML_ERR_ENTITY_NOT_FINISHED: 362: errmsg = "EntityValue: \" or ' expected"; 363: break; 364: case XML_ERR_ATTRIBUTE_NOT_STARTED: 365: errmsg = "AttValue: \" or ' expected"; 366: break; 367: case XML_ERR_LT_IN_ATTRIBUTE: 368: errmsg = "Unescaped '<' not allowed in attributes values"; 369: break; 370: case XML_ERR_LITERAL_NOT_STARTED: 371: errmsg = "SystemLiteral \" or ' expected"; 372: break; 373: case XML_ERR_LITERAL_NOT_FINISHED: 374: errmsg = "Unfinished System or Public ID \" or ' expected"; 375: break; 376: case XML_ERR_MISPLACED_CDATA_END: 377: errmsg = "Sequence ']]>' not allowed in content"; 378: break; 379: case XML_ERR_URI_REQUIRED: 380: errmsg = "SYSTEM or PUBLIC, the URI is missing"; 381: break; 382: case XML_ERR_PUBID_REQUIRED: 383: errmsg = "PUBLIC, the Public Identifier is missing"; 384: break; 385: case XML_ERR_HYPHEN_IN_COMMENT: 386: errmsg = "Comment must not contain '--' (double-hyphen)"; 387: break; 388: case XML_ERR_PI_NOT_STARTED: 389: errmsg = "xmlParsePI : no target name"; 390: break; 391: case XML_ERR_RESERVED_XML_NAME: 392: errmsg = "Invalid PI name"; 393: break; 394: case XML_ERR_NOTATION_NOT_STARTED: 395: errmsg = "NOTATION: Name expected here"; 396: break; 397: case XML_ERR_NOTATION_NOT_FINISHED: 398: errmsg = "'>' required to close NOTATION declaration"; 399: break; 400: case XML_ERR_VALUE_REQUIRED: 401: errmsg = "Entity value required"; 402: break; 403: case XML_ERR_URI_FRAGMENT: 404: errmsg = "Fragment not allowed"; 405: break; 406: case XML_ERR_ATTLIST_NOT_STARTED: 407: errmsg = "'(' required to start ATTLIST enumeration"; 408: break; 409: case XML_ERR_NMTOKEN_REQUIRED: 410: errmsg = "NmToken expected in ATTLIST enumeration"; 411: break; 412: case XML_ERR_ATTLIST_NOT_FINISHED: 413: errmsg = "')' required to finish ATTLIST enumeration"; 414: break; 415: case XML_ERR_MIXED_NOT_STARTED: 416: errmsg = "MixedContentDecl : '|' or ')*' expected"; 417: break; 418: case XML_ERR_PCDATA_REQUIRED: 419: errmsg = "MixedContentDecl : '#PCDATA' expected"; 420: break; 421: case XML_ERR_ELEMCONTENT_NOT_STARTED: 422: errmsg = "ContentDecl : Name or '(' expected"; 423: break; 424: case XML_ERR_ELEMCONTENT_NOT_FINISHED: 425: errmsg = "ContentDecl : ',' '|' or ')' expected"; 426: break; 427: case XML_ERR_PEREF_IN_INT_SUBSET: 428: errmsg = 429: "PEReference: forbidden within markup decl in internal subset"; 430: break; 431: case XML_ERR_GT_REQUIRED: 432: errmsg = "expected '>'"; 433: break; 434: case XML_ERR_CONDSEC_INVALID: 435: errmsg = "XML conditional section '[' expected"; 436: break; 437: case XML_ERR_EXT_SUBSET_NOT_FINISHED: 438: errmsg = "Content error in the external subset"; 439: break; 440: case XML_ERR_CONDSEC_INVALID_KEYWORD: 441: errmsg = 442: "conditional section INCLUDE or IGNORE keyword expected"; 443: break; 444: case XML_ERR_CONDSEC_NOT_FINISHED: 445: errmsg = "XML conditional section not closed"; 446: break; 447: case XML_ERR_XMLDECL_NOT_STARTED: 448: errmsg = "Text declaration '<?xml' required"; 449: break; 450: case XML_ERR_XMLDECL_NOT_FINISHED: 451: errmsg = "parsing XML declaration: '?>' expected"; 452: break; 453: case XML_ERR_EXT_ENTITY_STANDALONE: 454: errmsg = "external parsed entities cannot be standalone"; 455: break; 456: case XML_ERR_ENTITYREF_SEMICOL_MISSING: 457: errmsg = "EntityRef: expecting ';'"; 458: break; 459: case XML_ERR_DOCTYPE_NOT_FINISHED: 460: errmsg = "DOCTYPE improperly terminated"; 461: break; 462: case XML_ERR_LTSLASH_REQUIRED: 463: errmsg = "EndTag: '</' not found"; 464: break; 465: case XML_ERR_EQUAL_REQUIRED: 466: errmsg = "expected '='"; 467: break; 468: case XML_ERR_STRING_NOT_CLOSED: 469: errmsg = "String not closed expecting \" or '"; 470: break; 471: case XML_ERR_STRING_NOT_STARTED: 472: errmsg = "String not started expecting ' or \""; 473: break; 474: case XML_ERR_ENCODING_NAME: 475: errmsg = "Invalid XML encoding name"; 476: break; 477: case XML_ERR_STANDALONE_VALUE: 478: errmsg = "standalone accepts only 'yes' or 'no'"; 479: break; 480: case XML_ERR_DOCUMENT_EMPTY: 481: errmsg = "Document is empty"; 482: break; 483: case XML_ERR_DOCUMENT_END: 484: errmsg = "Extra content at the end of the document"; 485: break; 486: case XML_ERR_NOT_WELL_BALANCED: 487: errmsg = "chunk is not well balanced"; 488: break; 489: case XML_ERR_EXTRA_CONTENT: 490: errmsg = "extra content at the end of well balanced chunk"; 491: break; 492: case XML_ERR_VERSION_MISSING: 493: errmsg = "Malformed declaration expecting version"; 494: break; 495: case XML_ERR_NAME_TOO_LONG: 496: errmsg = "Name too long use XML_PARSE_HUGE option"; 497: break; 498: #if 0 499: case: 500: errmsg = ""; 501: break; 502: #endif 503: default: 504: errmsg = "Unregistered error message"; 505: } 506: if (info == NULL) 507: snprintf(errstr, 128, "%s\n", errmsg); 508: else 509: snprintf(errstr, 128, "%s: %%s\n", errmsg); 510: if (ctxt != NULL) 511: ctxt->errNo = error; 512: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 513: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], 514: info); 515: if (ctxt != NULL) { 516: ctxt->wellFormed = 0; 517: if (ctxt->recovery == 0) 518: ctxt->disableSAX = 1; 519: } 520: } 521: 522: /** 523: * xmlFatalErrMsg: 524: * @ctxt: an XML parser context 525: * @error: the error number 526: * @msg: the error message 527: * 528: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 529: */ 530: static void 531: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 532: const char *msg) 533: { 534: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 535: (ctxt->instate == XML_PARSER_EOF)) 536: return; 537: if (ctxt != NULL) 538: ctxt->errNo = error; 539: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 540: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 541: if (ctxt != NULL) { 542: ctxt->wellFormed = 0; 543: if (ctxt->recovery == 0) 544: ctxt->disableSAX = 1; 545: } 546: } 547: 548: /** 549: * xmlWarningMsg: 550: * @ctxt: an XML parser context 551: * @error: the error number 552: * @msg: the error message 553: * @str1: extra data 554: * @str2: extra data 555: * 556: * Handle a warning. 557: */ 558: static void 559: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 560: const char *msg, const xmlChar *str1, const xmlChar *str2) 561: { 562: xmlStructuredErrorFunc schannel = NULL; 563: 564: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 565: (ctxt->instate == XML_PARSER_EOF)) 566: return; 567: if ((ctxt != NULL) && (ctxt->sax != NULL) && 568: (ctxt->sax->initialized == XML_SAX2_MAGIC)) 569: schannel = ctxt->sax->serror; 570: if (ctxt != NULL) { 571: __xmlRaiseError(schannel, 572: (ctxt->sax) ? ctxt->sax->warning : NULL, 573: ctxt->userData, 574: ctxt, NULL, XML_FROM_PARSER, error, 575: XML_ERR_WARNING, NULL, 0, 576: (const char *) str1, (const char *) str2, NULL, 0, 0, 577: msg, (const char *) str1, (const char *) str2); 578: } else { 579: __xmlRaiseError(schannel, NULL, NULL, 580: ctxt, NULL, XML_FROM_PARSER, error, 581: XML_ERR_WARNING, NULL, 0, 582: (const char *) str1, (const char *) str2, NULL, 0, 0, 583: msg, (const char *) str1, (const char *) str2); 584: } 585: } 586: 587: /** 588: * xmlValidityError: 589: * @ctxt: an XML parser context 590: * @error: the error number 591: * @msg: the error message 592: * @str1: extra data 593: * 594: * Handle a validity error. 595: */ 596: static void 597: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 598: const char *msg, const xmlChar *str1, const xmlChar *str2) 599: { 600: xmlStructuredErrorFunc schannel = NULL; 601: 602: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 603: (ctxt->instate == XML_PARSER_EOF)) 604: return; 605: if (ctxt != NULL) { 606: ctxt->errNo = error; 607: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 608: schannel = ctxt->sax->serror; 609: } 610: if (ctxt != NULL) { 611: __xmlRaiseError(schannel, 612: ctxt->vctxt.error, ctxt->vctxt.userData, 613: ctxt, NULL, XML_FROM_DTD, error, 614: XML_ERR_ERROR, NULL, 0, (const char *) str1, 615: (const char *) str2, NULL, 0, 0, 616: msg, (const char *) str1, (const char *) str2); 617: ctxt->valid = 0; 618: } else { 619: __xmlRaiseError(schannel, NULL, NULL, 620: ctxt, NULL, XML_FROM_DTD, error, 621: XML_ERR_ERROR, NULL, 0, (const char *) str1, 622: (const char *) str2, NULL, 0, 0, 623: msg, (const char *) str1, (const char *) str2); 624: } 625: } 626: 627: /** 628: * xmlFatalErrMsgInt: 629: * @ctxt: an XML parser context 630: * @error: the error number 631: * @msg: the error message 632: * @val: an integer value 633: * 634: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 635: */ 636: static void 637: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 638: const char *msg, int val) 639: { 640: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 641: (ctxt->instate == XML_PARSER_EOF)) 642: return; 643: if (ctxt != NULL) 644: ctxt->errNo = error; 645: __xmlRaiseError(NULL, NULL, NULL, 646: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 647: NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 648: if (ctxt != NULL) { 649: ctxt->wellFormed = 0; 650: if (ctxt->recovery == 0) 651: ctxt->disableSAX = 1; 652: } 653: } 654: 655: /** 656: * xmlFatalErrMsgStrIntStr: 657: * @ctxt: an XML parser context 658: * @error: the error number 659: * @msg: the error message 660: * @str1: an string info 661: * @val: an integer value 662: * @str2: an string info 663: * 664: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 665: */ 666: static void 667: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 668: const char *msg, const xmlChar *str1, int val, 669: const xmlChar *str2) 670: { 671: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 672: (ctxt->instate == XML_PARSER_EOF)) 673: return; 674: if (ctxt != NULL) 675: ctxt->errNo = error; 676: __xmlRaiseError(NULL, NULL, NULL, 677: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 678: NULL, 0, (const char *) str1, (const char *) str2, 679: NULL, val, 0, msg, str1, val, str2); 680: if (ctxt != NULL) { 681: ctxt->wellFormed = 0; 682: if (ctxt->recovery == 0) 683: ctxt->disableSAX = 1; 684: } 685: } 686: 687: /** 688: * xmlFatalErrMsgStr: 689: * @ctxt: an XML parser context 690: * @error: the error number 691: * @msg: the error message 692: * @val: a string value 693: * 694: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 695: */ 696: static void 697: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 698: const char *msg, const xmlChar * val) 699: { 700: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 701: (ctxt->instate == XML_PARSER_EOF)) 702: return; 703: if (ctxt != NULL) 704: ctxt->errNo = error; 705: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 706: XML_FROM_PARSER, error, XML_ERR_FATAL, 707: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 708: val); 709: if (ctxt != NULL) { 710: ctxt->wellFormed = 0; 711: if (ctxt->recovery == 0) 712: ctxt->disableSAX = 1; 713: } 714: } 715: 716: /** 717: * xmlErrMsgStr: 718: * @ctxt: an XML parser context 719: * @error: the error number 720: * @msg: the error message 721: * @val: a string value 722: * 723: * Handle a non fatal parser error 724: */ 725: static void 726: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 727: const char *msg, const xmlChar * val) 728: { 729: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 730: (ctxt->instate == XML_PARSER_EOF)) 731: return; 732: if (ctxt != NULL) 733: ctxt->errNo = error; 734: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 735: XML_FROM_PARSER, error, XML_ERR_ERROR, 736: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 737: val); 738: } 739: 740: /** 741: * xmlNsErr: 742: * @ctxt: an XML parser context 743: * @error: the error number 744: * @msg: the message 745: * @info1: extra information string 746: * @info2: extra information string 747: * 748: * Handle a fatal parser error, i.e. violating Well-Formedness constraints 749: */ 750: static void 751: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 752: const char *msg, 753: const xmlChar * info1, const xmlChar * info2, 754: const xmlChar * info3) 755: { 756: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 757: (ctxt->instate == XML_PARSER_EOF)) 758: return; 759: if (ctxt != NULL) 760: ctxt->errNo = error; 761: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 762: XML_ERR_ERROR, NULL, 0, (const char *) info1, 763: (const char *) info2, (const char *) info3, 0, 0, msg, 764: info1, info2, info3); 765: if (ctxt != NULL) 766: ctxt->nsWellFormed = 0; 767: } 768: 769: /** 770: * xmlNsWarn 771: * @ctxt: an XML parser context 772: * @error: the error number 773: * @msg: the message 774: * @info1: extra information string 775: * @info2: extra information string 776: * 777: * Handle a namespace warning error 778: */ 779: static void 780: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 781: const char *msg, 782: const xmlChar * info1, const xmlChar * info2, 783: const xmlChar * info3) 784: { 785: if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 786: (ctxt->instate == XML_PARSER_EOF)) 787: return; 788: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 789: XML_ERR_WARNING, NULL, 0, (const char *) info1, 790: (const char *) info2, (const char *) info3, 0, 0, msg, 791: info1, info2, info3); 792: } 793: 794: /************************************************************************ 795: * * 796: * Library wide options * 797: * * 798: ************************************************************************/ 799: 800: /** 801: * xmlHasFeature: 802: * @feature: the feature to be examined 803: * 804: * Examines if the library has been compiled with a given feature. 805: * 806: * Returns a non-zero value if the feature exist, otherwise zero. 807: * Returns zero (0) if the feature does not exist or an unknown 808: * unknown feature is requested, non-zero otherwise. 809: */ 810: int 811: xmlHasFeature(xmlFeature feature) 812: { 813: switch (feature) { 814: case XML_WITH_THREAD: 815: #ifdef LIBXML_THREAD_ENABLED 816: return(1); 817: #else 818: return(0); 819: #endif 820: case XML_WITH_TREE: 821: #ifdef LIBXML_TREE_ENABLED 822: return(1); 823: #else 824: return(0); 825: #endif 826: case XML_WITH_OUTPUT: 827: #ifdef LIBXML_OUTPUT_ENABLED 828: return(1); 829: #else 830: return(0); 831: #endif 832: case XML_WITH_PUSH: 833: #ifdef LIBXML_PUSH_ENABLED 834: return(1); 835: #else 836: return(0); 837: #endif 838: case XML_WITH_READER: 839: #ifdef LIBXML_READER_ENABLED 840: return(1); 841: #else 842: return(0); 843: #endif 844: case XML_WITH_PATTERN: 845: #ifdef LIBXML_PATTERN_ENABLED 846: return(1); 847: #else 848: return(0); 849: #endif 850: case XML_WITH_WRITER: 851: #ifdef LIBXML_WRITER_ENABLED 852: return(1); 853: #else 854: return(0); 855: #endif 856: case XML_WITH_SAX1: 857: #ifdef LIBXML_SAX1_ENABLED 858: return(1); 859: #else 860: return(0); 861: #endif 862: case XML_WITH_FTP: 863: #ifdef LIBXML_FTP_ENABLED 864: return(1); 865: #else 866: return(0); 867: #endif 868: case XML_WITH_HTTP: 869: #ifdef LIBXML_HTTP_ENABLED 870: return(1); 871: #else 872: return(0); 873: #endif 874: case XML_WITH_VALID: 875: #ifdef LIBXML_VALID_ENABLED 876: return(1); 877: #else 878: return(0); 879: #endif 880: case XML_WITH_HTML: 881: #ifdef LIBXML_HTML_ENABLED 882: return(1); 883: #else 884: return(0); 885: #endif 886: case XML_WITH_LEGACY: 887: #ifdef LIBXML_LEGACY_ENABLED 888: return(1); 889: #else 890: return(0); 891: #endif 892: case XML_WITH_C14N: 893: #ifdef LIBXML_C14N_ENABLED 894: return(1); 895: #else 896: return(0); 897: #endif 898: case XML_WITH_CATALOG: 899: #ifdef LIBXML_CATALOG_ENABLED 900: return(1); 901: #else 902: return(0); 903: #endif 904: case XML_WITH_XPATH: 905: #ifdef LIBXML_XPATH_ENABLED 906: return(1); 907: #else 908: return(0); 909: #endif 910: case XML_WITH_XPTR: 911: #ifdef LIBXML_XPTR_ENABLED 912: return(1); 913: #else 914: return(0); 915: #endif 916: case XML_WITH_XINCLUDE: 917: #ifdef LIBXML_XINCLUDE_ENABLED 918: return(1); 919: #else 920: return(0); 921: #endif 922: case XML_WITH_ICONV: 923: #ifdef LIBXML_ICONV_ENABLED 924: return(1); 925: #else 926: return(0); 927: #endif 928: case XML_WITH_ISO8859X: 929: #ifdef LIBXML_ISO8859X_ENABLED 930: return(1); 931: #else 932: return(0); 933: #endif 934: case XML_WITH_UNICODE: 935: #ifdef LIBXML_UNICODE_ENABLED 936: return(1); 937: #else 938: return(0); 939: #endif 940: case XML_WITH_REGEXP: 941: #ifdef LIBXML_REGEXP_ENABLED 942: return(1); 943: #else 944: return(0); 945: #endif 946: case XML_WITH_AUTOMATA: 947: #ifdef LIBXML_AUTOMATA_ENABLED 948: return(1); 949: #else 950: return(0); 951: #endif 952: case XML_WITH_EXPR: 953: #ifdef LIBXML_EXPR_ENABLED 954: return(1); 955: #else 956: return(0); 957: #endif 958: case XML_WITH_SCHEMAS: 959: #ifdef LIBXML_SCHEMAS_ENABLED 960: return(1); 961: #else 962: return(0); 963: #endif 964: case XML_WITH_SCHEMATRON: 965: #ifdef LIBXML_SCHEMATRON_ENABLED 966: return(1); 967: #else 968: return(0); 969: #endif 970: case XML_WITH_MODULES: 971: #ifdef LIBXML_MODULES_ENABLED 972: return(1); 973: #else 974: return(0); 975: #endif 976: case XML_WITH_DEBUG: 977: #ifdef LIBXML_DEBUG_ENABLED 978: return(1); 979: #else 980: return(0); 981: #endif 982: case XML_WITH_DEBUG_MEM: 983: #ifdef DEBUG_MEMORY_LOCATION 984: return(1); 985: #else 986: return(0); 987: #endif 988: case XML_WITH_DEBUG_RUN: 989: #ifdef LIBXML_DEBUG_RUNTIME 990: return(1); 991: #else 992: return(0); 993: #endif 994: case XML_WITH_ZLIB: 995: #ifdef LIBXML_ZLIB_ENABLED 996: return(1); 997: #else 998: return(0); 999: #endif 1000: case XML_WITH_LZMA: 1001: #ifdef LIBXML_LZMA_ENABLED 1002: return(1); 1003: #else 1004: return(0); 1005: #endif 1006: case XML_WITH_ICU: 1007: #ifdef LIBXML_ICU_ENABLED 1008: return(1); 1009: #else 1010: return(0); 1011: #endif 1012: default: 1013: break; 1014: } 1015: return(0); 1016: } 1017: 1018: /************************************************************************ 1019: * * 1020: * SAX2 defaulted attributes handling * 1021: * * 1022: ************************************************************************/ 1023: 1024: /** 1025: * xmlDetectSAX2: 1026: * @ctxt: an XML parser context 1027: * 1028: * Do the SAX2 detection and specific intialization 1029: */ 1030: static void 1031: xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1032: if (ctxt == NULL) return; 1033: #ifdef LIBXML_SAX1_ENABLED 1034: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1035: ((ctxt->sax->startElementNs != NULL) || 1036: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1037: #else 1038: ctxt->sax2 = 1; 1039: #endif /* LIBXML_SAX1_ENABLED */ 1040: 1041: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1042: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1043: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1044: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1045: (ctxt->str_xml_ns == NULL)) { 1046: xmlErrMemory(ctxt, NULL); 1047: } 1048: } 1049: 1050: typedef struct _xmlDefAttrs xmlDefAttrs; 1051: typedef xmlDefAttrs *xmlDefAttrsPtr; 1052: struct _xmlDefAttrs { 1053: int nbAttrs; /* number of defaulted attributes on that element */ 1054: int maxAttrs; /* the size of the array */ 1055: const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1056: }; 1057: 1058: /** 1059: * xmlAttrNormalizeSpace: 1060: * @src: the source string 1061: * @dst: the target string 1062: * 1063: * Normalize the space in non CDATA attribute values: 1064: * If the attribute type is not CDATA, then the XML processor MUST further 1065: * process the normalized attribute value by discarding any leading and 1066: * trailing space (#x20) characters, and by replacing sequences of space 1067: * (#x20) characters by a single space (#x20) character. 1068: * Note that the size of dst need to be at least src, and if one doesn't need 1069: * to preserve dst (and it doesn't come from a dictionary or read-only) then 1070: * passing src as dst is just fine. 1071: * 1072: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1073: * is needed. 1074: */ 1075: static xmlChar * 1076: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1077: { 1078: if ((src == NULL) || (dst == NULL)) 1079: return(NULL); 1080: 1081: while (*src == 0x20) src++; 1082: while (*src != 0) { 1083: if (*src == 0x20) { 1084: while (*src == 0x20) src++; 1085: if (*src != 0) 1086: *dst++ = 0x20; 1087: } else { 1088: *dst++ = *src++; 1089: } 1090: } 1091: *dst = 0; 1092: if (dst == src) 1093: return(NULL); 1094: return(dst); 1095: } 1096: 1097: /** 1098: * xmlAttrNormalizeSpace2: 1099: * @src: the source string 1100: * 1101: * Normalize the space in non CDATA attribute values, a slightly more complex 1102: * front end to avoid allocation problems when running on attribute values 1103: * coming from the input. 1104: * 1105: * Returns a pointer to the normalized value (dst) or NULL if no conversion 1106: * is needed. 1107: */ 1108: static const xmlChar * 1109: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1110: { 1111: int i; 1112: int remove_head = 0; 1113: int need_realloc = 0; 1114: const xmlChar *cur; 1115: 1116: if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1117: return(NULL); 1118: i = *len; 1119: if (i <= 0) 1120: return(NULL); 1121: 1122: cur = src; 1123: while (*cur == 0x20) { 1124: cur++; 1125: remove_head++; 1126: } 1127: while (*cur != 0) { 1128: if (*cur == 0x20) { 1129: cur++; 1130: if ((*cur == 0x20) || (*cur == 0)) { 1131: need_realloc = 1; 1132: break; 1133: } 1134: } else 1135: cur++; 1136: } 1137: if (need_realloc) { 1138: xmlChar *ret; 1139: 1140: ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1141: if (ret == NULL) { 1142: xmlErrMemory(ctxt, NULL); 1143: return(NULL); 1144: } 1145: xmlAttrNormalizeSpace(ret, ret); 1146: *len = (int) strlen((const char *)ret); 1147: return(ret); 1148: } else if (remove_head) { 1149: *len -= remove_head; 1150: memmove(src, src + remove_head, 1 + *len); 1151: return(src); 1152: } 1153: return(NULL); 1154: } 1155: 1156: /** 1157: * xmlAddDefAttrs: 1158: * @ctxt: an XML parser context 1159: * @fullname: the element fullname 1160: * @fullattr: the attribute fullname 1161: * @value: the attribute value 1162: * 1163: * Add a defaulted attribute for an element 1164: */ 1165: static void 1166: xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1167: const xmlChar *fullname, 1168: const xmlChar *fullattr, 1169: const xmlChar *value) { 1170: xmlDefAttrsPtr defaults; 1171: int len; 1172: const xmlChar *name; 1173: const xmlChar *prefix; 1174: 1175: /* 1176: * Allows to detect attribute redefinitions 1177: */ 1178: if (ctxt->attsSpecial != NULL) { 1179: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1180: return; 1181: } 1182: 1183: if (ctxt->attsDefault == NULL) { 1184: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1185: if (ctxt->attsDefault == NULL) 1186: goto mem_error; 1187: } 1188: 1189: /* 1190: * split the element name into prefix:localname , the string found 1191: * are within the DTD and then not associated to namespace names. 1192: */ 1193: name = xmlSplitQName3(fullname, &len); 1194: if (name == NULL) { 1195: name = xmlDictLookup(ctxt->dict, fullname, -1); 1196: prefix = NULL; 1197: } else { 1198: name = xmlDictLookup(ctxt->dict, name, -1); 1199: prefix = xmlDictLookup(ctxt->dict, fullname, len); 1200: } 1201: 1202: /* 1203: * make sure there is some storage 1204: */ 1205: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1206: if (defaults == NULL) { 1207: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1208: (4 * 5) * sizeof(const xmlChar *)); 1209: if (defaults == NULL) 1210: goto mem_error; 1211: defaults->nbAttrs = 0; 1212: defaults->maxAttrs = 4; 1213: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1214: defaults, NULL) < 0) { 1215: xmlFree(defaults); 1216: goto mem_error; 1217: } 1218: } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1219: xmlDefAttrsPtr temp; 1220: 1221: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1222: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1223: if (temp == NULL) 1224: goto mem_error; 1225: defaults = temp; 1226: defaults->maxAttrs *= 2; 1227: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1228: defaults, NULL) < 0) { 1229: xmlFree(defaults); 1230: goto mem_error; 1231: } 1232: } 1233: 1234: /* 1235: * Split the element name into prefix:localname , the string found 1236: * are within the DTD and hen not associated to namespace names. 1237: */ 1238: name = xmlSplitQName3(fullattr, &len); 1239: if (name == NULL) { 1240: name = xmlDictLookup(ctxt->dict, fullattr, -1); 1241: prefix = NULL; 1242: } else { 1243: name = xmlDictLookup(ctxt->dict, name, -1); 1244: prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1245: } 1246: 1247: defaults->values[5 * defaults->nbAttrs] = name; 1248: defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1249: /* intern the string and precompute the end */ 1250: len = xmlStrlen(value); 1251: value = xmlDictLookup(ctxt->dict, value, len); 1252: defaults->values[5 * defaults->nbAttrs + 2] = value; 1253: defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1254: if (ctxt->external) 1255: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1256: else 1257: defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1258: defaults->nbAttrs++; 1259: 1260: return; 1261: 1262: mem_error: 1263: xmlErrMemory(ctxt, NULL); 1264: return; 1265: } 1266: 1267: /** 1268: * xmlAddSpecialAttr: 1269: * @ctxt: an XML parser context 1270: * @fullname: the element fullname 1271: * @fullattr: the attribute fullname 1272: * @type: the attribute type 1273: * 1274: * Register this attribute type 1275: */ 1276: static void 1277: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1278: const xmlChar *fullname, 1279: const xmlChar *fullattr, 1280: int type) 1281: { 1282: if (ctxt->attsSpecial == NULL) { 1283: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1284: if (ctxt->attsSpecial == NULL) 1285: goto mem_error; 1286: } 1287: 1288: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1289: return; 1290: 1291: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1292: (void *) (long) type); 1293: return; 1294: 1295: mem_error: 1296: xmlErrMemory(ctxt, NULL); 1297: return; 1298: } 1299: 1300: /** 1301: * xmlCleanSpecialAttrCallback: 1302: * 1303: * Removes CDATA attributes from the special attribute table 1304: */ 1305: static void 1306: xmlCleanSpecialAttrCallback(void *payload, void *data, 1307: const xmlChar *fullname, const xmlChar *fullattr, 1308: const xmlChar *unused ATTRIBUTE_UNUSED) { 1309: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1310: 1311: if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1312: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1313: } 1314: } 1315: 1316: /** 1317: * xmlCleanSpecialAttr: 1318: * @ctxt: an XML parser context 1319: * 1320: * Trim the list of attributes defined to remove all those of type 1321: * CDATA as they are not special. This call should be done when finishing 1322: * to parse the DTD and before starting to parse the document root. 1323: */ 1324: static void 1325: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1326: { 1327: if (ctxt->attsSpecial == NULL) 1328: return; 1329: 1330: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1331: 1332: if (xmlHashSize(ctxt->attsSpecial) == 0) { 1333: xmlHashFree(ctxt->attsSpecial, NULL); 1334: ctxt->attsSpecial = NULL; 1335: } 1336: return; 1337: } 1338: 1339: /** 1340: * xmlCheckLanguageID: 1341: * @lang: pointer to the string value 1342: * 1343: * Checks that the value conforms to the LanguageID production: 1344: * 1345: * NOTE: this is somewhat deprecated, those productions were removed from 1346: * the XML Second edition. 1347: * 1348: * [33] LanguageID ::= Langcode ('-' Subcode)* 1349: * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1350: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1351: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1352: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1353: * [38] Subcode ::= ([a-z] | [A-Z])+ 1354: * 1355: * The current REC reference the sucessors of RFC 1766, currently 5646 1356: * 1357: * http://www.rfc-editor.org/rfc/rfc5646.txt 1358: * langtag = language 1359: * ["-" script] 1360: * ["-" region] 1361: * *("-" variant) 1362: * *("-" extension) 1363: * ["-" privateuse] 1364: * language = 2*3ALPHA ; shortest ISO 639 code 1365: * ["-" extlang] ; sometimes followed by 1366: * ; extended language subtags 1367: * / 4ALPHA ; or reserved for future use 1368: * / 5*8ALPHA ; or registered language subtag 1369: * 1370: * extlang = 3ALPHA ; selected ISO 639 codes 1371: * *2("-" 3ALPHA) ; permanently reserved 1372: * 1373: * script = 4ALPHA ; ISO 15924 code 1374: * 1375: * region = 2ALPHA ; ISO 3166-1 code 1376: * / 3DIGIT ; UN M.49 code 1377: * 1378: * variant = 5*8alphanum ; registered variants 1379: * / (DIGIT 3alphanum) 1380: * 1381: * extension = singleton 1*("-" (2*8alphanum)) 1382: * 1383: * ; Single alphanumerics 1384: * ; "x" reserved for private use 1385: * singleton = DIGIT ; 0 - 9 1386: * / %x41-57 ; A - W 1387: * / %x59-5A ; Y - Z 1388: * / %x61-77 ; a - w 1389: * / %x79-7A ; y - z 1390: * 1391: * it sounds right to still allow Irregular i-xxx IANA and user codes too 1392: * The parser below doesn't try to cope with extension or privateuse 1393: * that could be added but that's not interoperable anyway 1394: * 1395: * Returns 1 if correct 0 otherwise 1396: **/ 1397: int 1398: xmlCheckLanguageID(const xmlChar * lang) 1399: { 1400: const xmlChar *cur = lang, *nxt; 1401: 1402: if (cur == NULL) 1403: return (0); 1404: if (((cur[0] == 'i') && (cur[1] == '-')) || 1405: ((cur[0] == 'I') && (cur[1] == '-')) || 1406: ((cur[0] == 'x') && (cur[1] == '-')) || 1407: ((cur[0] == 'X') && (cur[1] == '-'))) { 1408: /* 1409: * Still allow IANA code and user code which were coming 1410: * from the previous version of the XML-1.0 specification 1411: * it's deprecated but we should not fail 1412: */ 1413: cur += 2; 1414: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1415: ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1416: cur++; 1417: return(cur[0] == 0); 1418: } 1419: nxt = cur; 1420: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1421: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1422: nxt++; 1423: if (nxt - cur >= 4) { 1424: /* 1425: * Reserved 1426: */ 1427: if ((nxt - cur > 8) || (nxt[0] != 0)) 1428: return(0); 1429: return(1); 1430: } 1431: if (nxt - cur < 2) 1432: return(0); 1433: /* we got an ISO 639 code */ 1434: if (nxt[0] == 0) 1435: return(1); 1436: if (nxt[0] != '-') 1437: return(0); 1438: 1439: nxt++; 1440: cur = nxt; 1441: /* now we can have extlang or script or region or variant */ 1442: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1443: goto region_m49; 1444: 1445: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1446: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1447: nxt++; 1448: if (nxt - cur == 4) 1449: goto script; 1450: if (nxt - cur == 2) 1451: goto region; 1452: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1453: goto variant; 1454: if (nxt - cur != 3) 1455: return(0); 1456: /* we parsed an extlang */ 1457: if (nxt[0] == 0) 1458: return(1); 1459: if (nxt[0] != '-') 1460: return(0); 1461: 1462: nxt++; 1463: cur = nxt; 1464: /* now we can have script or region or variant */ 1465: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1466: goto region_m49; 1467: 1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1470: nxt++; 1471: if (nxt - cur == 2) 1472: goto region; 1473: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1474: goto variant; 1475: if (nxt - cur != 4) 1476: return(0); 1477: /* we parsed a script */ 1478: script: 1479: if (nxt[0] == 0) 1480: return(1); 1481: if (nxt[0] != '-') 1482: return(0); 1483: 1484: nxt++; 1485: cur = nxt; 1486: /* now we can have region or variant */ 1487: if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1488: goto region_m49; 1489: 1490: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1491: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1492: nxt++; 1493: 1494: if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1495: goto variant; 1496: if (nxt - cur != 2) 1497: return(0); 1498: /* we parsed a region */ 1499: region: 1500: if (nxt[0] == 0) 1501: return(1); 1502: if (nxt[0] != '-') 1503: return(0); 1504: 1505: nxt++; 1506: cur = nxt; 1507: /* now we can just have a variant */ 1508: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1509: ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1510: nxt++; 1511: 1512: if ((nxt - cur < 5) || (nxt - cur > 8)) 1513: return(0); 1514: 1515: /* we parsed a variant */ 1516: variant: 1517: if (nxt[0] == 0) 1518: return(1); 1519: if (nxt[0] != '-') 1520: return(0); 1521: /* extensions and private use subtags not checked */ 1522: return (1); 1523: 1524: region_m49: 1525: if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1526: ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1527: nxt += 3; 1528: goto region; 1529: } 1530: return(0); 1531: } 1532: 1533: /************************************************************************ 1534: * * 1535: * Parser stacks related functions and macros * 1536: * * 1537: ************************************************************************/ 1538: 1539: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1540: const xmlChar ** str); 1541: 1542: #ifdef SAX2 1543: /** 1544: * nsPush: 1545: * @ctxt: an XML parser context 1546: * @prefix: the namespace prefix or NULL 1547: * @URL: the namespace name 1548: * 1549: * Pushes a new parser namespace on top of the ns stack 1550: * 1551: * Returns -1 in case of error, -2 if the namespace should be discarded 1552: * and the index in the stack otherwise. 1553: */ 1554: static int 1555: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1556: { 1557: if (ctxt->options & XML_PARSE_NSCLEAN) { 1558: int i; 1559: for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1560: if (ctxt->nsTab[i] == prefix) { 1561: /* in scope */ 1562: if (ctxt->nsTab[i + 1] == URL) 1563: return(-2); 1564: /* out of scope keep it */ 1565: break; 1566: } 1567: } 1568: } 1569: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1570: ctxt->nsMax = 10; 1571: ctxt->nsNr = 0; 1572: ctxt->nsTab = (const xmlChar **) 1573: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1574: if (ctxt->nsTab == NULL) { 1575: xmlErrMemory(ctxt, NULL); 1576: ctxt->nsMax = 0; 1577: return (-1); 1578: } 1579: } else if (ctxt->nsNr >= ctxt->nsMax) { 1580: const xmlChar ** tmp; 1581: ctxt->nsMax *= 2; 1582: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1583: ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1584: if (tmp == NULL) { 1585: xmlErrMemory(ctxt, NULL); 1586: ctxt->nsMax /= 2; 1587: return (-1); 1588: } 1589: ctxt->nsTab = tmp; 1590: } 1591: ctxt->nsTab[ctxt->nsNr++] = prefix; 1592: ctxt->nsTab[ctxt->nsNr++] = URL; 1593: return (ctxt->nsNr); 1594: } 1595: /** 1596: * nsPop: 1597: * @ctxt: an XML parser context 1598: * @nr: the number to pop 1599: * 1600: * Pops the top @nr parser prefix/namespace from the ns stack 1601: * 1602: * Returns the number of namespaces removed 1603: */ 1604: static int 1605: nsPop(xmlParserCtxtPtr ctxt, int nr) 1606: { 1607: int i; 1608: 1609: if (ctxt->nsTab == NULL) return(0); 1610: if (ctxt->nsNr < nr) { 1611: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1612: nr = ctxt->nsNr; 1613: } 1614: if (ctxt->nsNr <= 0) 1615: return (0); 1616: 1617: for (i = 0;i < nr;i++) { 1618: ctxt->nsNr--; 1619: ctxt->nsTab[ctxt->nsNr] = NULL; 1620: } 1621: return(nr); 1622: } 1623: #endif 1624: 1625: static int 1626: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1627: const xmlChar **atts; 1628: int *attallocs; 1629: int maxatts; 1630: 1631: if (ctxt->atts == NULL) { 1632: maxatts = 55; /* allow for 10 attrs by default */ 1633: atts = (const xmlChar **) 1634: xmlMalloc(maxatts * sizeof(xmlChar *)); 1635: if (atts == NULL) goto mem_error; 1636: ctxt->atts = atts; 1637: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1638: if (attallocs == NULL) goto mem_error; 1639: ctxt->attallocs = attallocs; 1640: ctxt->maxatts = maxatts; 1641: } else if (nr + 5 > ctxt->maxatts) { 1642: maxatts = (nr + 5) * 2; 1643: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1644: maxatts * sizeof(const xmlChar *)); 1645: if (atts == NULL) goto mem_error; 1646: ctxt->atts = atts; 1647: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1648: (maxatts / 5) * sizeof(int)); 1649: if (attallocs == NULL) goto mem_error; 1650: ctxt->attallocs = attallocs; 1651: ctxt->maxatts = maxatts; 1652: } 1653: return(ctxt->maxatts); 1654: mem_error: 1655: xmlErrMemory(ctxt, NULL); 1656: return(-1); 1657: } 1658: 1659: /** 1660: * inputPush: 1661: * @ctxt: an XML parser context 1662: * @value: the parser input 1663: * 1664: * Pushes a new parser input on top of the input stack 1665: * 1666: * Returns -1 in case of error, the index in the stack otherwise 1667: */ 1668: int 1669: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1670: { 1671: if ((ctxt == NULL) || (value == NULL)) 1672: return(-1); 1673: if (ctxt->inputNr >= ctxt->inputMax) { 1674: ctxt->inputMax *= 2; 1675: ctxt->inputTab = 1676: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1677: ctxt->inputMax * 1678: sizeof(ctxt->inputTab[0])); 1679: if (ctxt->inputTab == NULL) { 1680: xmlErrMemory(ctxt, NULL); 1681: xmlFreeInputStream(value); 1682: ctxt->inputMax /= 2; 1683: value = NULL; 1684: return (-1); 1685: } 1686: } 1687: ctxt->inputTab[ctxt->inputNr] = value; 1688: ctxt->input = value; 1689: return (ctxt->inputNr++); 1690: } 1691: /** 1692: * inputPop: 1693: * @ctxt: an XML parser context 1694: * 1695: * Pops the top parser input from the input stack 1696: * 1697: * Returns the input just removed 1698: */ 1699: xmlParserInputPtr 1700: inputPop(xmlParserCtxtPtr ctxt) 1701: { 1702: xmlParserInputPtr ret; 1703: 1704: if (ctxt == NULL) 1705: return(NULL); 1706: if (ctxt->inputNr <= 0) 1707: return (NULL); 1708: ctxt->inputNr--; 1709: if (ctxt->inputNr > 0) 1710: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1711: else 1712: ctxt->input = NULL; 1713: ret = ctxt->inputTab[ctxt->inputNr]; 1714: ctxt->inputTab[ctxt->inputNr] = NULL; 1715: return (ret); 1716: } 1717: /** 1718: * nodePush: 1719: * @ctxt: an XML parser context 1720: * @value: the element node 1721: * 1722: * Pushes a new element node on top of the node stack 1723: * 1724: * Returns -1 in case of error, the index in the stack otherwise 1725: */ 1726: int 1727: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1728: { 1729: if (ctxt == NULL) return(0); 1730: if (ctxt->nodeNr >= ctxt->nodeMax) { 1731: xmlNodePtr *tmp; 1732: 1733: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1734: ctxt->nodeMax * 2 * 1735: sizeof(ctxt->nodeTab[0])); 1736: if (tmp == NULL) { 1737: xmlErrMemory(ctxt, NULL); 1738: return (-1); 1739: } 1740: ctxt->nodeTab = tmp; 1741: ctxt->nodeMax *= 2; 1742: } 1743: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1744: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1745: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1746: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1747: xmlParserMaxDepth); 1748: ctxt->instate = XML_PARSER_EOF; 1749: return(-1); 1750: } 1751: ctxt->nodeTab[ctxt->nodeNr] = value; 1752: ctxt->node = value; 1753: return (ctxt->nodeNr++); 1754: } 1755: 1756: /** 1757: * nodePop: 1758: * @ctxt: an XML parser context 1759: * 1760: * Pops the top element node from the node stack 1761: * 1762: * Returns the node just removed 1763: */ 1764: xmlNodePtr 1765: nodePop(xmlParserCtxtPtr ctxt) 1766: { 1767: xmlNodePtr ret; 1768: 1769: if (ctxt == NULL) return(NULL); 1770: if (ctxt->nodeNr <= 0) 1771: return (NULL); 1772: ctxt->nodeNr--; 1773: if (ctxt->nodeNr > 0) 1774: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1775: else 1776: ctxt->node = NULL; 1777: ret = ctxt->nodeTab[ctxt->nodeNr]; 1778: ctxt->nodeTab[ctxt->nodeNr] = NULL; 1779: return (ret); 1780: } 1781: 1782: #ifdef LIBXML_PUSH_ENABLED 1783: /** 1784: * nameNsPush: 1785: * @ctxt: an XML parser context 1786: * @value: the element name 1787: * @prefix: the element prefix 1788: * @URI: the element namespace name 1789: * 1790: * Pushes a new element name/prefix/URL on top of the name stack 1791: * 1792: * Returns -1 in case of error, the index in the stack otherwise 1793: */ 1794: static int 1795: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1796: const xmlChar *prefix, const xmlChar *URI, int nsNr) 1797: { 1798: if (ctxt->nameNr >= ctxt->nameMax) { 1799: const xmlChar * *tmp; 1800: void **tmp2; 1801: ctxt->nameMax *= 2; 1802: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1803: ctxt->nameMax * 1804: sizeof(ctxt->nameTab[0])); 1805: if (tmp == NULL) { 1806: ctxt->nameMax /= 2; 1807: goto mem_error; 1808: } 1809: ctxt->nameTab = tmp; 1810: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1811: ctxt->nameMax * 3 * 1812: sizeof(ctxt->pushTab[0])); 1813: if (tmp2 == NULL) { 1814: ctxt->nameMax /= 2; 1815: goto mem_error; 1816: } 1817: ctxt->pushTab = tmp2; 1818: } 1819: ctxt->nameTab[ctxt->nameNr] = value; 1820: ctxt->name = value; 1821: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1822: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1823: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1824: return (ctxt->nameNr++); 1825: mem_error: 1826: xmlErrMemory(ctxt, NULL); 1827: return (-1); 1828: } 1829: /** 1830: * nameNsPop: 1831: * @ctxt: an XML parser context 1832: * 1833: * Pops the top element/prefix/URI name from the name stack 1834: * 1835: * Returns the name just removed 1836: */ 1837: static const xmlChar * 1838: nameNsPop(xmlParserCtxtPtr ctxt) 1839: { 1840: const xmlChar *ret; 1841: 1842: if (ctxt->nameNr <= 0) 1843: return (NULL); 1844: ctxt->nameNr--; 1845: if (ctxt->nameNr > 0) 1846: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1847: else 1848: ctxt->name = NULL; 1849: ret = ctxt->nameTab[ctxt->nameNr]; 1850: ctxt->nameTab[ctxt->nameNr] = NULL; 1851: return (ret); 1852: } 1853: #endif /* LIBXML_PUSH_ENABLED */ 1854: 1855: /** 1856: * namePush: 1857: * @ctxt: an XML parser context 1858: * @value: the element name 1859: * 1860: * Pushes a new element name on top of the name stack 1861: * 1862: * Returns -1 in case of error, the index in the stack otherwise 1863: */ 1864: int 1865: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1866: { 1867: if (ctxt == NULL) return (-1); 1868: 1869: if (ctxt->nameNr >= ctxt->nameMax) { 1870: const xmlChar * *tmp; 1871: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1872: ctxt->nameMax * 2 * 1873: sizeof(ctxt->nameTab[0])); 1874: if (tmp == NULL) { 1875: goto mem_error; 1876: } 1877: ctxt->nameTab = tmp; 1878: ctxt->nameMax *= 2; 1879: } 1880: ctxt->nameTab[ctxt->nameNr] = value; 1881: ctxt->name = value; 1882: return (ctxt->nameNr++); 1883: mem_error: 1884: xmlErrMemory(ctxt, NULL); 1885: return (-1); 1886: } 1887: /** 1888: * namePop: 1889: * @ctxt: an XML parser context 1890: * 1891: * Pops the top element name from the name stack 1892: * 1893: * Returns the name just removed 1894: */ 1895: const xmlChar * 1896: namePop(xmlParserCtxtPtr ctxt) 1897: { 1898: const xmlChar *ret; 1899: 1900: if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1901: return (NULL); 1902: ctxt->nameNr--; 1903: if (ctxt->nameNr > 0) 1904: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1905: else 1906: ctxt->name = NULL; 1907: ret = ctxt->nameTab[ctxt->nameNr]; 1908: ctxt->nameTab[ctxt->nameNr] = NULL; 1909: return (ret); 1910: } 1911: 1912: static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1913: if (ctxt->spaceNr >= ctxt->spaceMax) { 1914: int *tmp; 1915: 1916: ctxt->spaceMax *= 2; 1917: tmp = (int *) xmlRealloc(ctxt->spaceTab, 1918: ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1919: if (tmp == NULL) { 1920: xmlErrMemory(ctxt, NULL); 1921: ctxt->spaceMax /=2; 1922: return(-1); 1923: } 1924: ctxt->spaceTab = tmp; 1925: } 1926: ctxt->spaceTab[ctxt->spaceNr] = val; 1927: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1928: return(ctxt->spaceNr++); 1929: } 1930: 1931: static int spacePop(xmlParserCtxtPtr ctxt) { 1932: int ret; 1933: if (ctxt->spaceNr <= 0) return(0); 1934: ctxt->spaceNr--; 1935: if (ctxt->spaceNr > 0) 1936: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1937: else 1938: ctxt->space = &ctxt->spaceTab[0]; 1939: ret = ctxt->spaceTab[ctxt->spaceNr]; 1940: ctxt->spaceTab[ctxt->spaceNr] = -1; 1941: return(ret); 1942: } 1943: 1944: /* 1945: * Macros for accessing the content. Those should be used only by the parser, 1946: * and not exported. 1947: * 1948: * Dirty macros, i.e. one often need to make assumption on the context to 1949: * use them 1950: * 1951: * CUR_PTR return the current pointer to the xmlChar to be parsed. 1952: * To be used with extreme caution since operations consuming 1953: * characters may move the input buffer to a different location ! 1954: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1955: * This should be used internally by the parser 1956: * only to compare to ASCII values otherwise it would break when 1957: * running with UTF-8 encoding. 1958: * RAW same as CUR but in the input buffer, bypass any token 1959: * extraction that may have been done 1960: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1961: * to compare on ASCII based substring. 1962: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1963: * strings without newlines within the parser. 1964: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1965: * defined char within the parser. 1966: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1967: * 1968: * NEXT Skip to the next character, this does the proper decoding 1969: * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1970: * NEXTL(l) Skip the current unicode character of l xmlChars long. 1971: * CUR_CHAR(l) returns the current unicode character (int), set l 1972: * to the number of xmlChars used for the encoding [0-5]. 1973: * CUR_SCHAR same but operate on a string instead of the context 1974: * COPY_BUF copy the current unicode char to the target buffer, increment 1975: * the index 1976: * GROW, SHRINK handling of input buffers 1977: */ 1978: 1979: #define RAW (*ctxt->input->cur) 1980: #define CUR (*ctxt->input->cur) 1981: #define NXT(val) ctxt->input->cur[(val)] 1982: #define CUR_PTR ctxt->input->cur 1983: 1984: #define CMP4( s, c1, c2, c3, c4 ) \ 1985: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1986: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1987: #define CMP5( s, c1, c2, c3, c4, c5 ) \ 1988: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1989: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1990: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1991: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1992: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1993: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1994: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1995: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1996: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1997: ((unsigned char *) s)[ 8 ] == c9 ) 1998: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1999: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2000: ((unsigned char *) s)[ 9 ] == c10 ) 2001: 2002: #define SKIP(val) do { \ 2003: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2004: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2005: if ((*ctxt->input->cur == 0) && \ 2006: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2007: xmlPopInput(ctxt); \ 2008: } while (0) 2009: 2010: #define SKIPL(val) do { \ 2011: int skipl; \ 2012: for(skipl=0; skipl<val; skipl++) { \ 2013: if (*(ctxt->input->cur) == '\n') { \ 2014: ctxt->input->line++; ctxt->input->col = 1; \ 2015: } else ctxt->input->col++; \ 2016: ctxt->nbChars++; \ 2017: ctxt->input->cur++; \ 2018: } \ 2019: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2020: if ((*ctxt->input->cur == 0) && \ 2021: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2022: xmlPopInput(ctxt); \ 2023: } while (0) 2024: 2025: #define SHRINK if ((ctxt->progressive == 0) && \ 2026: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2027: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2028: xmlSHRINK (ctxt); 2029: 2030: static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2031: xmlParserInputShrink(ctxt->input); 2032: if ((*ctxt->input->cur == 0) && 2033: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2034: xmlPopInput(ctxt); 2035: } 2036: 2037: #define GROW if ((ctxt->progressive == 0) && \ 2038: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2039: xmlGROW (ctxt); 2040: 2041: static void xmlGROW (xmlParserCtxtPtr ctxt) { 2042: if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 2043: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 2044: ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2045: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2046: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2047: ctxt->instate = XML_PARSER_EOF; 2048: } 2049: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2050: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2051: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2052: xmlPopInput(ctxt); 2053: } 2054: 2055: #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2056: 2057: #define NEXT xmlNextChar(ctxt) 2058: 2059: #define NEXT1 { \ 2060: ctxt->input->col++; \ 2061: ctxt->input->cur++; \ 2062: ctxt->nbChars++; \ 2063: if (*ctxt->input->cur == 0) \ 2064: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2065: } 2066: 2067: #define NEXTL(l) do { \ 2068: if (*(ctxt->input->cur) == '\n') { \ 2069: ctxt->input->line++; ctxt->input->col = 1; \ 2070: } else ctxt->input->col++; \ 2071: ctxt->input->cur += l; \ 2072: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2073: } while (0) 2074: 2075: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2076: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2077: 2078: #define COPY_BUF(l,b,i,v) \ 2079: if (l == 1) b[i++] = (xmlChar) v; \ 2080: else i += xmlCopyCharMultiByte(&b[i],v) 2081: 2082: /** 2083: * xmlSkipBlankChars: 2084: * @ctxt: the XML parser context 2085: * 2086: * skip all blanks character found at that point in the input streams. 2087: * It pops up finished entities in the process if allowable at that point. 2088: * 2089: * Returns the number of space chars skipped 2090: */ 2091: 2092: int 2093: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2094: int res = 0; 2095: 2096: /* 2097: * It's Okay to use CUR/NEXT here since all the blanks are on 2098: * the ASCII range. 2099: */ 2100: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2101: const xmlChar *cur; 2102: /* 2103: * if we are in the document content, go really fast 2104: */ 2105: cur = ctxt->input->cur; 2106: while (IS_BLANK_CH(*cur)) { 2107: if (*cur == '\n') { 2108: ctxt->input->line++; ctxt->input->col = 1; 2109: } 2110: cur++; 2111: res++; 2112: if (*cur == 0) { 2113: ctxt->input->cur = cur; 2114: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2115: cur = ctxt->input->cur; 2116: } 2117: } 2118: ctxt->input->cur = cur; 2119: } else { 2120: int cur; 2121: do { 2122: cur = CUR; 2123: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 2124: NEXT; 2125: cur = CUR; 2126: res++; 2127: } 2128: while ((cur == 0) && (ctxt->inputNr > 1) && 2129: (ctxt->instate != XML_PARSER_COMMENT)) { 2130: xmlPopInput(ctxt); 2131: cur = CUR; 2132: } 2133: /* 2134: * Need to handle support of entities branching here 2135: */ 2136: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2137: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 2138: } 2139: return(res); 2140: } 2141: 2142: /************************************************************************ 2143: * * 2144: * Commodity functions to handle entities * 2145: * * 2146: ************************************************************************/ 2147: 2148: /** 2149: * xmlPopInput: 2150: * @ctxt: an XML parser context 2151: * 2152: * xmlPopInput: the current input pointed by ctxt->input came to an end 2153: * pop it and return the next char. 2154: * 2155: * Returns the current xmlChar in the parser context 2156: */ 2157: xmlChar 2158: xmlPopInput(xmlParserCtxtPtr ctxt) { 2159: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2160: if (xmlParserDebugEntities) 2161: xmlGenericError(xmlGenericErrorContext, 2162: "Popping input %d\n", ctxt->inputNr); 2163: xmlFreeInputStream(inputPop(ctxt)); 2164: if ((*ctxt->input->cur == 0) && 2165: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2166: return(xmlPopInput(ctxt)); 2167: return(CUR); 2168: } 2169: 2170: /** 2171: * xmlPushInput: 2172: * @ctxt: an XML parser context 2173: * @input: an XML parser input fragment (entity, XML fragment ...). 2174: * 2175: * xmlPushInput: switch to a new input stream which is stacked on top 2176: * of the previous one(s). 2177: * Returns -1 in case of error or the index in the input stack 2178: */ 2179: int 2180: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2181: int ret; 2182: if (input == NULL) return(-1); 2183: 2184: if (xmlParserDebugEntities) { 2185: if ((ctxt->input != NULL) && (ctxt->input->filename)) 2186: xmlGenericError(xmlGenericErrorContext, 2187: "%s(%d): ", ctxt->input->filename, 2188: ctxt->input->line); 2189: xmlGenericError(xmlGenericErrorContext, 2190: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2191: } 2192: ret = inputPush(ctxt, input); 2193: if (ctxt->instate == XML_PARSER_EOF) 2194: return(-1); 2195: GROW; 2196: return(ret); 2197: } 2198: 2199: /** 2200: * xmlParseCharRef: 2201: * @ctxt: an XML parser context 2202: * 2203: * parse Reference declarations 2204: * 2205: * [66] CharRef ::= '&#' [0-9]+ ';' | 2206: * '&#x' [0-9a-fA-F]+ ';' 2207: * 2208: * [ WFC: Legal Character ] 2209: * Characters referred to using character references must match the 2210: * production for Char. 2211: * 2212: * Returns the value parsed (as an int), 0 in case of error 2213: */ 2214: int 2215: xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2216: unsigned int val = 0; 2217: int count = 0; 2218: unsigned int outofrange = 0; 2219: 2220: /* 2221: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2222: */ 2223: if ((RAW == '&') && (NXT(1) == '#') && 2224: (NXT(2) == 'x')) { 2225: SKIP(3); 2226: GROW; 2227: while (RAW != ';') { /* loop blocked by count */ 2228: if (count++ > 20) { 2229: count = 0; 2230: GROW; 2231: if (ctxt->instate == XML_PARSER_EOF) 2232: return(0); 2233: } 2234: if ((RAW >= '0') && (RAW <= '9')) 2235: val = val * 16 + (CUR - '0'); 2236: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2237: val = val * 16 + (CUR - 'a') + 10; 2238: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2239: val = val * 16 + (CUR - 'A') + 10; 2240: else { 2241: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2242: val = 0; 2243: break; 2244: } 2245: if (val > 0x10FFFF) 2246: outofrange = val; 2247: 2248: NEXT; 2249: count++; 2250: } 2251: if (RAW == ';') { 2252: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2253: ctxt->input->col++; 2254: ctxt->nbChars ++; 2255: ctxt->input->cur++; 2256: } 2257: } else if ((RAW == '&') && (NXT(1) == '#')) { 2258: SKIP(2); 2259: GROW; 2260: while (RAW != ';') { /* loop blocked by count */ 2261: if (count++ > 20) { 2262: count = 0; 2263: GROW; 2264: if (ctxt->instate == XML_PARSER_EOF) 2265: return(0); 2266: } 2267: if ((RAW >= '0') && (RAW <= '9')) 2268: val = val * 10 + (CUR - '0'); 2269: else { 2270: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2271: val = 0; 2272: break; 2273: } 2274: if (val > 0x10FFFF) 2275: outofrange = val; 2276: 2277: NEXT; 2278: count++; 2279: } 2280: if (RAW == ';') { 2281: /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2282: ctxt->input->col++; 2283: ctxt->nbChars ++; 2284: ctxt->input->cur++; 2285: } 2286: } else { 2287: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2288: } 2289: 2290: /* 2291: * [ WFC: Legal Character ] 2292: * Characters referred to using character references must match the 2293: * production for Char. 2294: */ 2295: if ((IS_CHAR(val) && (outofrange == 0))) { 2296: return(val); 2297: } else { 2298: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2299: "xmlParseCharRef: invalid xmlChar value %d\n", 2300: val); 2301: } 2302: return(0); 2303: } 2304: 2305: /** 2306: * xmlParseStringCharRef: 2307: * @ctxt: an XML parser context 2308: * @str: a pointer to an index in the string 2309: * 2310: * parse Reference declarations, variant parsing from a string rather 2311: * than an an input flow. 2312: * 2313: * [66] CharRef ::= '&#' [0-9]+ ';' | 2314: * '&#x' [0-9a-fA-F]+ ';' 2315: * 2316: * [ WFC: Legal Character ] 2317: * Characters referred to using character references must match the 2318: * production for Char. 2319: * 2320: * Returns the value parsed (as an int), 0 in case of error, str will be 2321: * updated to the current value of the index 2322: */ 2323: static int 2324: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2325: const xmlChar *ptr; 2326: xmlChar cur; 2327: unsigned int val = 0; 2328: unsigned int outofrange = 0; 2329: 2330: if ((str == NULL) || (*str == NULL)) return(0); 2331: ptr = *str; 2332: cur = *ptr; 2333: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2334: ptr += 3; 2335: cur = *ptr; 2336: while (cur != ';') { /* Non input consuming loop */ 2337: if ((cur >= '0') && (cur <= '9')) 2338: val = val * 16 + (cur - '0'); 2339: else if ((cur >= 'a') && (cur <= 'f')) 2340: val = val * 16 + (cur - 'a') + 10; 2341: else if ((cur >= 'A') && (cur <= 'F')) 2342: val = val * 16 + (cur - 'A') + 10; 2343: else { 2344: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2345: val = 0; 2346: break; 2347: } 2348: if (val > 0x10FFFF) 2349: outofrange = val; 2350: 2351: ptr++; 2352: cur = *ptr; 2353: } 2354: if (cur == ';') 2355: ptr++; 2356: } else if ((cur == '&') && (ptr[1] == '#')){ 2357: ptr += 2; 2358: cur = *ptr; 2359: while (cur != ';') { /* Non input consuming loops */ 2360: if ((cur >= '0') && (cur <= '9')) 2361: val = val * 10 + (cur - '0'); 2362: else { 2363: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2364: val = 0; 2365: break; 2366: } 2367: if (val > 0x10FFFF) 2368: outofrange = val; 2369: 2370: ptr++; 2371: cur = *ptr; 2372: } 2373: if (cur == ';') 2374: ptr++; 2375: } else { 2376: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2377: return(0); 2378: } 2379: *str = ptr; 2380: 2381: /* 2382: * [ WFC: Legal Character ] 2383: * Characters referred to using character references must match the 2384: * production for Char. 2385: */ 2386: if ((IS_CHAR(val) && (outofrange == 0))) { 2387: return(val); 2388: } else { 2389: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2390: "xmlParseStringCharRef: invalid xmlChar value %d\n", 2391: val); 2392: } 2393: return(0); 2394: } 2395: 2396: /** 2397: * xmlNewBlanksWrapperInputStream: 2398: * @ctxt: an XML parser context 2399: * @entity: an Entity pointer 2400: * 2401: * Create a new input stream for wrapping 2402: * blanks around a PEReference 2403: * 2404: * Returns the new input stream or NULL 2405: */ 2406: 2407: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2408: 2409: static xmlParserInputPtr 2410: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2411: xmlParserInputPtr input; 2412: xmlChar *buffer; 2413: size_t length; 2414: if (entity == NULL) { 2415: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2416: "xmlNewBlanksWrapperInputStream entity\n"); 2417: return(NULL); 2418: } 2419: if (xmlParserDebugEntities) 2420: xmlGenericError(xmlGenericErrorContext, 2421: "new blanks wrapper for entity: %s\n", entity->name); 2422: input = xmlNewInputStream(ctxt); 2423: if (input == NULL) { 2424: return(NULL); 2425: } 2426: length = xmlStrlen(entity->name) + 5; 2427: buffer = xmlMallocAtomic(length); 2428: if (buffer == NULL) { 2429: xmlErrMemory(ctxt, NULL); 2430: xmlFree(input); 2431: return(NULL); 2432: } 2433: buffer [0] = ' '; 2434: buffer [1] = '%'; 2435: buffer [length-3] = ';'; 2436: buffer [length-2] = ' '; 2437: buffer [length-1] = 0; 2438: memcpy(buffer + 2, entity->name, length - 5); 2439: input->free = deallocblankswrapper; 2440: input->base = buffer; 2441: input->cur = buffer; 2442: input->length = length; 2443: input->end = &buffer[length]; 2444: return(input); 2445: } 2446: 2447: /** 2448: * xmlParserHandlePEReference: 2449: * @ctxt: the parser context 2450: * 2451: * [69] PEReference ::= '%' Name ';' 2452: * 2453: * [ WFC: No Recursion ] 2454: * A parsed entity must not contain a recursive 2455: * reference to itself, either directly or indirectly. 2456: * 2457: * [ WFC: Entity Declared ] 2458: * In a document without any DTD, a document with only an internal DTD 2459: * subset which contains no parameter entity references, or a document 2460: * with "standalone='yes'", ... ... The declaration of a parameter 2461: * entity must precede any reference to it... 2462: * 2463: * [ VC: Entity Declared ] 2464: * In a document with an external subset or external parameter entities 2465: * with "standalone='no'", ... ... The declaration of a parameter entity 2466: * must precede any reference to it... 2467: * 2468: * [ WFC: In DTD ] 2469: * Parameter-entity references may only appear in the DTD. 2470: * NOTE: misleading but this is handled. 2471: * 2472: * A PEReference may have been detected in the current input stream 2473: * the handling is done accordingly to 2474: * http://www.w3.org/TR/REC-xml#entproc 2475: * i.e. 2476: * - Included in literal in entity values 2477: * - Included as Parameter Entity reference within DTDs 2478: */ 2479: void 2480: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2481: const xmlChar *name; 2482: xmlEntityPtr entity = NULL; 2483: xmlParserInputPtr input; 2484: 2485: if (RAW != '%') return; 2486: switch(ctxt->instate) { 2487: case XML_PARSER_CDATA_SECTION: 2488: return; 2489: case XML_PARSER_COMMENT: 2490: return; 2491: case XML_PARSER_START_TAG: 2492: return; 2493: case XML_PARSER_END_TAG: 2494: return; 2495: case XML_PARSER_EOF: 2496: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2497: return; 2498: case XML_PARSER_PROLOG: 2499: case XML_PARSER_START: 2500: case XML_PARSER_MISC: 2501: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2502: return; 2503: case XML_PARSER_ENTITY_DECL: 2504: case XML_PARSER_CONTENT: 2505: case XML_PARSER_ATTRIBUTE_VALUE: 2506: case XML_PARSER_PI: 2507: case XML_PARSER_SYSTEM_LITERAL: 2508: case XML_PARSER_PUBLIC_LITERAL: 2509: /* we just ignore it there */ 2510: return; 2511: case XML_PARSER_EPILOG: 2512: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2513: return; 2514: case XML_PARSER_ENTITY_VALUE: 2515: /* 2516: * NOTE: in the case of entity values, we don't do the 2517: * substitution here since we need the literal 2518: * entity value to be able to save the internal 2519: * subset of the document. 2520: * This will be handled by xmlStringDecodeEntities 2521: */ 2522: return; 2523: case XML_PARSER_DTD: 2524: /* 2525: * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2526: * In the internal DTD subset, parameter-entity references 2527: * can occur only where markup declarations can occur, not 2528: * within markup declarations. 2529: * In that case this is handled in xmlParseMarkupDecl 2530: */ 2531: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2532: return; 2533: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2534: return; 2535: break; 2536: case XML_PARSER_IGNORE: 2537: return; 2538: } 2539: 2540: NEXT; 2541: name = xmlParseName(ctxt); 2542: if (xmlParserDebugEntities) 2543: xmlGenericError(xmlGenericErrorContext, 2544: "PEReference: %s\n", name); 2545: if (name == NULL) { 2546: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2547: } else { 2548: if (RAW == ';') { 2549: NEXT; 2550: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2551: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2552: if (ctxt->instate == XML_PARSER_EOF) 2553: return; 2554: if (entity == NULL) { 2555: 2556: /* 2557: * [ WFC: Entity Declared ] 2558: * In a document without any DTD, a document with only an 2559: * internal DTD subset which contains no parameter entity 2560: * references, or a document with "standalone='yes'", ... 2561: * ... The declaration of a parameter entity must precede 2562: * any reference to it... 2563: */ 2564: if ((ctxt->standalone == 1) || 2565: ((ctxt->hasExternalSubset == 0) && 2566: (ctxt->hasPErefs == 0))) { 2567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2568: "PEReference: %%%s; not found\n", name); 2569: } else { 2570: /* 2571: * [ VC: Entity Declared ] 2572: * In a document with an external subset or external 2573: * parameter entities with "standalone='no'", ... 2574: * ... The declaration of a parameter entity must precede 2575: * any reference to it... 2576: */ 2577: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2578: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2579: "PEReference: %%%s; not found\n", 2580: name, NULL); 2581: } else 2582: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2583: "PEReference: %%%s; not found\n", 2584: name, NULL); 2585: ctxt->valid = 0; 2586: } 2587: } else if (ctxt->input->free != deallocblankswrapper) { 2588: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2589: if (xmlPushInput(ctxt, input) < 0) 2590: return; 2591: } else { 2592: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2593: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2594: xmlChar start[4]; 2595: xmlCharEncoding enc; 2596: 2597: /* 2598: * handle the extra spaces added before and after 2599: * c.f. http://www.w3.org/TR/REC-xml#as-PE 2600: * this is done independently. 2601: */ 2602: input = xmlNewEntityInputStream(ctxt, entity); 2603: if (xmlPushInput(ctxt, input) < 0) 2604: return; 2605: 2606: /* 2607: * Get the 4 first bytes and decode the charset 2608: * if enc != XML_CHAR_ENCODING_NONE 2609: * plug some encoding conversion routines. 2610: * Note that, since we may have some non-UTF8 2611: * encoding (like UTF16, bug 135229), the 'length' 2612: * is not known, but we can calculate based upon 2613: * the amount of data in the buffer. 2614: */ 2615: GROW 2616: if (ctxt->instate == XML_PARSER_EOF) 2617: return; 2618: if ((ctxt->input->end - ctxt->input->cur)>=4) { 2619: start[0] = RAW; 2620: start[1] = NXT(1); 2621: start[2] = NXT(2); 2622: start[3] = NXT(3); 2623: enc = xmlDetectCharEncoding(start, 4); 2624: if (enc != XML_CHAR_ENCODING_NONE) { 2625: xmlSwitchEncoding(ctxt, enc); 2626: } 2627: } 2628: 2629: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2630: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2631: (IS_BLANK_CH(NXT(5)))) { 2632: xmlParseTextDecl(ctxt); 2633: } 2634: } else { 2635: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2636: "PEReference: %s is not a parameter entity\n", 2637: name); 2638: } 2639: } 2640: } else { 2641: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2642: } 2643: } 2644: } 2645: 2646: /* 2647: * Macro used to grow the current buffer. 2648: * buffer##_size is expected to be a size_t 2649: * mem_error: is expected to handle memory allocation failures 2650: */ 2651: #define growBuffer(buffer, n) { \ 2652: xmlChar *tmp; \ 2653: size_t new_size = buffer##_size * 2 + n; \ 2654: if (new_size < buffer##_size) goto mem_error; \ 2655: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2656: if (tmp == NULL) goto mem_error; \ 2657: buffer = tmp; \ 2658: buffer##_size = new_size; \ 2659: } 2660: 2661: /** 2662: * xmlStringLenDecodeEntities: 2663: * @ctxt: the parser context 2664: * @str: the input string 2665: * @len: the string length 2666: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2667: * @end: an end marker xmlChar, 0 if none 2668: * @end2: an end marker xmlChar, 0 if none 2669: * @end3: an end marker xmlChar, 0 if none 2670: * 2671: * Takes a entity string content and process to do the adequate substitutions. 2672: * 2673: * [67] Reference ::= EntityRef | CharRef 2674: * 2675: * [69] PEReference ::= '%' Name ';' 2676: * 2677: * Returns A newly allocated string with the substitution done. The caller 2678: * must deallocate it ! 2679: */ 2680: xmlChar * 2681: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2682: int what, xmlChar end, xmlChar end2, xmlChar end3) { 2683: xmlChar *buffer = NULL; 2684: size_t buffer_size = 0; 2685: size_t nbchars = 0; 2686: 2687: xmlChar *current = NULL; 2688: xmlChar *rep = NULL; 2689: const xmlChar *last; 2690: xmlEntityPtr ent; 2691: int c,l; 2692: 2693: if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2694: return(NULL); 2695: last = str + len; 2696: 2697: if (((ctxt->depth > 40) && 2698: ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2699: (ctxt->depth > 1024)) { 2700: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2701: return(NULL); 2702: } 2703: 2704: /* 2705: * allocate a translation buffer. 2706: */ 2707: buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2708: buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2709: if (buffer == NULL) goto mem_error; 2710: 2711: /* 2712: * OK loop until we reach one of the ending char or a size limit. 2713: * we are operating on already parsed values. 2714: */ 2715: if (str < last) 2716: c = CUR_SCHAR(str, l); 2717: else 2718: c = 0; 2719: while ((c != 0) && (c != end) && /* non input consuming loop */ 2720: (c != end2) && (c != end3)) { 2721: 2722: if (c == 0) break; 2723: if ((c == '&') && (str[1] == '#')) { 2724: int val = xmlParseStringCharRef(ctxt, &str); 2725: if (val != 0) { 2726: COPY_BUF(0,buffer,nbchars,val); 2727: } 2728: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2729: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2730: } 2731: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2732: if (xmlParserDebugEntities) 2733: xmlGenericError(xmlGenericErrorContext, 2734: "String decoding Entity Reference: %.30s\n", 2735: str); 2736: ent = xmlParseStringEntityRef(ctxt, &str); 2737: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2738: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2739: goto int_error; 2740: if (ent != NULL) 2741: ctxt->nbentities += ent->checked / 2; 2742: if ((ent != NULL) && 2743: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2744: if (ent->content != NULL) { 2745: COPY_BUF(0,buffer,nbchars,ent->content[0]); 2746: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2747: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2748: } 2749: } else { 2750: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2751: "predefined entity has no content\n"); 2752: } 2753: } else if ((ent != NULL) && (ent->content != NULL)) { 2754: ctxt->depth++; 2755: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2756: 0, 0, 0); 2757: ctxt->depth--; 2758: 2759: if (rep != NULL) { 2760: current = rep; 2761: while (*current != 0) { /* non input consuming loop */ 2762: buffer[nbchars++] = *current++; 2763: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2764: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2765: goto int_error; 2766: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2767: } 2768: } 2769: xmlFree(rep); 2770: rep = NULL; 2771: } 2772: } else if (ent != NULL) { 2773: int i = xmlStrlen(ent->name); 2774: const xmlChar *cur = ent->name; 2775: 2776: buffer[nbchars++] = '&'; 2777: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2778: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2779: } 2780: for (;i > 0;i--) 2781: buffer[nbchars++] = *cur++; 2782: buffer[nbchars++] = ';'; 2783: } 2784: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2785: if (xmlParserDebugEntities) 2786: xmlGenericError(xmlGenericErrorContext, 2787: "String decoding PE Reference: %.30s\n", str); 2788: ent = xmlParseStringPEReference(ctxt, &str); 2789: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2790: goto int_error; 2791: if (ent != NULL) 2792: ctxt->nbentities += ent->checked / 2; 2793: if (ent != NULL) { 2794: if (ent->content == NULL) { 2795: xmlLoadEntityContent(ctxt, ent); 2796: } 2797: ctxt->depth++; 2798: rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2799: 0, 0, 0); 2800: ctxt->depth--; 2801: if (rep != NULL) { 2802: current = rep; 2803: while (*current != 0) { /* non input consuming loop */ 2804: buffer[nbchars++] = *current++; 2805: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2806: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2807: goto int_error; 2808: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2809: } 2810: } 2811: xmlFree(rep); 2812: rep = NULL; 2813: } 2814: } 2815: } else { 2816: COPY_BUF(l,buffer,nbchars,c); 2817: str += l; 2818: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2819: growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2820: } 2821: } 2822: if (str < last) 2823: c = CUR_SCHAR(str, l); 2824: else 2825: c = 0; 2826: } 2827: buffer[nbchars] = 0; 2828: return(buffer); 2829: 2830: mem_error: 2831: xmlErrMemory(ctxt, NULL); 2832: int_error: 2833: if (rep != NULL) 2834: xmlFree(rep); 2835: if (buffer != NULL) 2836: xmlFree(buffer); 2837: return(NULL); 2838: } 2839: 2840: /** 2841: * xmlStringDecodeEntities: 2842: * @ctxt: the parser context 2843: * @str: the input string 2844: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2845: * @end: an end marker xmlChar, 0 if none 2846: * @end2: an end marker xmlChar, 0 if none 2847: * @end3: an end marker xmlChar, 0 if none 2848: * 2849: * Takes a entity string content and process to do the adequate substitutions. 2850: * 2851: * [67] Reference ::= EntityRef | CharRef 2852: * 2853: * [69] PEReference ::= '%' Name ';' 2854: * 2855: * Returns A newly allocated string with the substitution done. The caller 2856: * must deallocate it ! 2857: */ 2858: xmlChar * 2859: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2860: xmlChar end, xmlChar end2, xmlChar end3) { 2861: if ((ctxt == NULL) || (str == NULL)) return(NULL); 2862: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2863: end, end2, end3)); 2864: } 2865: 2866: /************************************************************************ 2867: * * 2868: * Commodity functions, cleanup needed ? * 2869: * * 2870: ************************************************************************/ 2871: 2872: /** 2873: * areBlanks: 2874: * @ctxt: an XML parser context 2875: * @str: a xmlChar * 2876: * @len: the size of @str 2877: * @blank_chars: we know the chars are blanks 2878: * 2879: * Is this a sequence of blank chars that one can ignore ? 2880: * 2881: * Returns 1 if ignorable 0 otherwise. 2882: */ 2883: 2884: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2885: int blank_chars) { 2886: int i, ret; 2887: xmlNodePtr lastChild; 2888: 2889: /* 2890: * Don't spend time trying to differentiate them, the same callback is 2891: * used ! 2892: */ 2893: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2894: return(0); 2895: 2896: /* 2897: * Check for xml:space value. 2898: */ 2899: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2900: (*(ctxt->space) == -2)) 2901: return(0); 2902: 2903: /* 2904: * Check that the string is made of blanks 2905: */ 2906: if (blank_chars == 0) { 2907: for (i = 0;i < len;i++) 2908: if (!(IS_BLANK_CH(str[i]))) return(0); 2909: } 2910: 2911: /* 2912: * Look if the element is mixed content in the DTD if available 2913: */ 2914: if (ctxt->node == NULL) return(0); 2915: if (ctxt->myDoc != NULL) { 2916: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2917: if (ret == 0) return(1); 2918: if (ret == 1) return(0); 2919: } 2920: 2921: /* 2922: * Otherwise, heuristic :-\ 2923: */ 2924: if ((RAW != '<') && (RAW != 0xD)) return(0); 2925: if ((ctxt->node->children == NULL) && 2926: (RAW == '<') && (NXT(1) == '/')) return(0); 2927: 2928: lastChild = xmlGetLastChild(ctxt->node); 2929: if (lastChild == NULL) { 2930: if ((ctxt->node->type != XML_ELEMENT_NODE) && 2931: (ctxt->node->content != NULL)) return(0); 2932: } else if (xmlNodeIsText(lastChild)) 2933: return(0); 2934: else if ((ctxt->node->children != NULL) && 2935: (xmlNodeIsText(ctxt->node->children))) 2936: return(0); 2937: return(1); 2938: } 2939: 2940: /************************************************************************ 2941: * * 2942: * Extra stuff for namespace support * 2943: * Relates to http://www.w3.org/TR/WD-xml-names * 2944: * * 2945: ************************************************************************/ 2946: 2947: /** 2948: * xmlSplitQName: 2949: * @ctxt: an XML parser context 2950: * @name: an XML parser context 2951: * @prefix: a xmlChar ** 2952: * 2953: * parse an UTF8 encoded XML qualified name string 2954: * 2955: * [NS 5] QName ::= (Prefix ':')? LocalPart 2956: * 2957: * [NS 6] Prefix ::= NCName 2958: * 2959: * [NS 7] LocalPart ::= NCName 2960: * 2961: * Returns the local part, and prefix is updated 2962: * to get the Prefix if any. 2963: */ 2964: 2965: xmlChar * 2966: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2967: xmlChar buf[XML_MAX_NAMELEN + 5]; 2968: xmlChar *buffer = NULL; 2969: int len = 0; 2970: int max = XML_MAX_NAMELEN; 2971: xmlChar *ret = NULL; 2972: const xmlChar *cur = name; 2973: int c; 2974: 2975: if (prefix == NULL) return(NULL); 2976: *prefix = NULL; 2977: 2978: if (cur == NULL) return(NULL); 2979: 2980: #ifndef XML_XML_NAMESPACE 2981: /* xml: prefix is not really a namespace */ 2982: if ((cur[0] == 'x') && (cur[1] == 'm') && 2983: (cur[2] == 'l') && (cur[3] == ':')) 2984: return(xmlStrdup(name)); 2985: #endif 2986: 2987: /* nasty but well=formed */ 2988: if (cur[0] == ':') 2989: return(xmlStrdup(name)); 2990: 2991: c = *cur++; 2992: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2993: buf[len++] = c; 2994: c = *cur++; 2995: } 2996: if (len >= max) { 2997: /* 2998: * Okay someone managed to make a huge name, so he's ready to pay 2999: * for the processing speed. 3000: */ 3001: max = len * 2; 3002: 3003: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3004: if (buffer == NULL) { 3005: xmlErrMemory(ctxt, NULL); 3006: return(NULL); 3007: } 3008: memcpy(buffer, buf, len); 3009: while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3010: if (len + 10 > max) { 3011: xmlChar *tmp; 3012: 3013: max *= 2; 3014: tmp = (xmlChar *) xmlRealloc(buffer, 3015: max * sizeof(xmlChar)); 3016: if (tmp == NULL) { 3017: xmlFree(buffer); 3018: xmlErrMemory(ctxt, NULL); 3019: return(NULL); 3020: } 3021: buffer = tmp; 3022: } 3023: buffer[len++] = c; 3024: c = *cur++; 3025: } 3026: buffer[len] = 0; 3027: } 3028: 3029: if ((c == ':') && (*cur == 0)) { 3030: if (buffer != NULL) 3031: xmlFree(buffer); 3032: *prefix = NULL; 3033: return(xmlStrdup(name)); 3034: } 3035: 3036: if (buffer == NULL) 3037: ret = xmlStrndup(buf, len); 3038: else { 3039: ret = buffer; 3040: buffer = NULL; 3041: max = XML_MAX_NAMELEN; 3042: } 3043: 3044: 3045: if (c == ':') { 3046: c = *cur; 3047: *prefix = ret; 3048: if (c == 0) { 3049: return(xmlStrndup(BAD_CAST "", 0)); 3050: } 3051: len = 0; 3052: 3053: /* 3054: * Check that the first character is proper to start 3055: * a new name 3056: */ 3057: if (!(((c >= 0x61) && (c <= 0x7A)) || 3058: ((c >= 0x41) && (c <= 0x5A)) || 3059: (c == '_') || (c == ':'))) { 3060: int l; 3061: int first = CUR_SCHAR(cur, l); 3062: 3063: if (!IS_LETTER(first) && (first != '_')) { 3064: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3065: "Name %s is not XML Namespace compliant\n", 3066: name); 3067: } 3068: } 3069: cur++; 3070: 3071: while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3072: buf[len++] = c; 3073: c = *cur++; 3074: } 3075: if (len >= max) { 3076: /* 3077: * Okay someone managed to make a huge name, so he's ready to pay 3078: * for the processing speed. 3079: */ 3080: max = len * 2; 3081: 3082: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3083: if (buffer == NULL) { 3084: xmlErrMemory(ctxt, NULL); 3085: return(NULL); 3086: } 3087: memcpy(buffer, buf, len); 3088: while (c != 0) { /* tested bigname2.xml */ 3089: if (len + 10 > max) { 3090: xmlChar *tmp; 3091: 3092: max *= 2; 3093: tmp = (xmlChar *) xmlRealloc(buffer, 3094: max * sizeof(xmlChar)); 3095: if (tmp == NULL) { 3096: xmlErrMemory(ctxt, NULL); 3097: xmlFree(buffer); 3098: return(NULL); 3099: } 3100: buffer = tmp; 3101: } 3102: buffer[len++] = c; 3103: c = *cur++; 3104: } 3105: buffer[len] = 0; 3106: } 3107: 3108: if (buffer == NULL) 3109: ret = xmlStrndup(buf, len); 3110: else { 3111: ret = buffer; 3112: } 3113: } 3114: 3115: return(ret); 3116: } 3117: 3118: /************************************************************************ 3119: * * 3120: * The parser itself * 3121: * Relates to http://www.w3.org/TR/REC-xml * 3122: * * 3123: ************************************************************************/ 3124: 3125: /************************************************************************ 3126: * * 3127: * Routines to parse Name, NCName and NmToken * 3128: * * 3129: ************************************************************************/ 3130: #ifdef DEBUG 3131: static unsigned long nbParseName = 0; 3132: static unsigned long nbParseNmToken = 0; 3133: static unsigned long nbParseNCName = 0; 3134: static unsigned long nbParseNCNameComplex = 0; 3135: static unsigned long nbParseNameComplex = 0; 3136: static unsigned long nbParseStringName = 0; 3137: #endif 3138: 3139: /* 3140: * The two following functions are related to the change of accepted 3141: * characters for Name and NmToken in the Revision 5 of XML-1.0 3142: * They correspond to the modified production [4] and the new production [4a] 3143: * changes in that revision. Also note that the macros used for the 3144: * productions Letter, Digit, CombiningChar and Extender are not needed 3145: * anymore. 3146: * We still keep compatibility to pre-revision5 parsing semantic if the 3147: * new XML_PARSE_OLD10 option is given to the parser. 3148: */ 3149: static int 3150: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3151: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3152: /* 3153: * Use the new checks of production [4] [4a] amd [5] of the 3154: * Update 5 of XML-1.0 3155: */ 3156: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3157: (((c >= 'a') && (c <= 'z')) || 3158: ((c >= 'A') && (c <= 'Z')) || 3159: (c == '_') || (c == ':') || 3160: ((c >= 0xC0) && (c <= 0xD6)) || 3161: ((c >= 0xD8) && (c <= 0xF6)) || 3162: ((c >= 0xF8) && (c <= 0x2FF)) || 3163: ((c >= 0x370) && (c <= 0x37D)) || 3164: ((c >= 0x37F) && (c <= 0x1FFF)) || 3165: ((c >= 0x200C) && (c <= 0x200D)) || 3166: ((c >= 0x2070) && (c <= 0x218F)) || 3167: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3168: ((c >= 0x3001) && (c <= 0xD7FF)) || 3169: ((c >= 0xF900) && (c <= 0xFDCF)) || 3170: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3171: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3172: return(1); 3173: } else { 3174: if (IS_LETTER(c) || (c == '_') || (c == ':')) 3175: return(1); 3176: } 3177: return(0); 3178: } 3179: 3180: static int 3181: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3182: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3183: /* 3184: * Use the new checks of production [4] [4a] amd [5] of the 3185: * Update 5 of XML-1.0 3186: */ 3187: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3188: (((c >= 'a') && (c <= 'z')) || 3189: ((c >= 'A') && (c <= 'Z')) || 3190: ((c >= '0') && (c <= '9')) || /* !start */ 3191: (c == '_') || (c == ':') || 3192: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3193: ((c >= 0xC0) && (c <= 0xD6)) || 3194: ((c >= 0xD8) && (c <= 0xF6)) || 3195: ((c >= 0xF8) && (c <= 0x2FF)) || 3196: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3197: ((c >= 0x370) && (c <= 0x37D)) || 3198: ((c >= 0x37F) && (c <= 0x1FFF)) || 3199: ((c >= 0x200C) && (c <= 0x200D)) || 3200: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3201: ((c >= 0x2070) && (c <= 0x218F)) || 3202: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3203: ((c >= 0x3001) && (c <= 0xD7FF)) || 3204: ((c >= 0xF900) && (c <= 0xFDCF)) || 3205: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3206: ((c >= 0x10000) && (c <= 0xEFFFF)))) 3207: return(1); 3208: } else { 3209: if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3210: (c == '.') || (c == '-') || 3211: (c == '_') || (c == ':') || 3212: (IS_COMBINING(c)) || 3213: (IS_EXTENDER(c))) 3214: return(1); 3215: } 3216: return(0); 3217: } 3218: 3219: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3220: int *len, int *alloc, int normalize); 3221: 3222: static const xmlChar * 3223: xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3224: int len = 0, l; 3225: int c; 3226: int count = 0; 3227: 3228: #ifdef DEBUG 3229: nbParseNameComplex++; 3230: #endif 3231: 3232: /* 3233: * Handler for more complex cases 3234: */ 3235: GROW; 3236: if (ctxt->instate == XML_PARSER_EOF) 3237: return(NULL); 3238: c = CUR_CHAR(l); 3239: if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3240: /* 3241: * Use the new checks of production [4] [4a] amd [5] of the 3242: * Update 5 of XML-1.0 3243: */ 3244: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3245: (!(((c >= 'a') && (c <= 'z')) || 3246: ((c >= 'A') && (c <= 'Z')) || 3247: (c == '_') || (c == ':') || 3248: ((c >= 0xC0) && (c <= 0xD6)) || 3249: ((c >= 0xD8) && (c <= 0xF6)) || 3250: ((c >= 0xF8) && (c <= 0x2FF)) || 3251: ((c >= 0x370) && (c <= 0x37D)) || 3252: ((c >= 0x37F) && (c <= 0x1FFF)) || 3253: ((c >= 0x200C) && (c <= 0x200D)) || 3254: ((c >= 0x2070) && (c <= 0x218F)) || 3255: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3256: ((c >= 0x3001) && (c <= 0xD7FF)) || 3257: ((c >= 0xF900) && (c <= 0xFDCF)) || 3258: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3259: ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3260: return(NULL); 3261: } 3262: len += l; 3263: NEXTL(l); 3264: c = CUR_CHAR(l); 3265: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3266: (((c >= 'a') && (c <= 'z')) || 3267: ((c >= 'A') && (c <= 'Z')) || 3268: ((c >= '0') && (c <= '9')) || /* !start */ 3269: (c == '_') || (c == ':') || 3270: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3271: ((c >= 0xC0) && (c <= 0xD6)) || 3272: ((c >= 0xD8) && (c <= 0xF6)) || 3273: ((c >= 0xF8) && (c <= 0x2FF)) || 3274: ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3275: ((c >= 0x370) && (c <= 0x37D)) || 3276: ((c >= 0x37F) && (c <= 0x1FFF)) || 3277: ((c >= 0x200C) && (c <= 0x200D)) || 3278: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3279: ((c >= 0x2070) && (c <= 0x218F)) || 3280: ((c >= 0x2C00) && (c <= 0x2FEF)) || 3281: ((c >= 0x3001) && (c <= 0xD7FF)) || 3282: ((c >= 0xF900) && (c <= 0xFDCF)) || 3283: ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3284: ((c >= 0x10000) && (c <= 0xEFFFF)) 3285: )) { 3286: if (count++ > XML_PARSER_CHUNK_SIZE) { 3287: count = 0; 3288: GROW; 3289: if (ctxt->instate == XML_PARSER_EOF) 3290: return(NULL); 3291: } 3292: len += l; 3293: NEXTL(l); 3294: c = CUR_CHAR(l); 3295: } 3296: } else { 3297: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3298: (!IS_LETTER(c) && (c != '_') && 3299: (c != ':'))) { 3300: return(NULL); 3301: } 3302: len += l; 3303: NEXTL(l); 3304: c = CUR_CHAR(l); 3305: 3306: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3307: ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3308: (c == '.') || (c == '-') || 3309: (c == '_') || (c == ':') || 3310: (IS_COMBINING(c)) || 3311: (IS_EXTENDER(c)))) { 3312: if (count++ > XML_PARSER_CHUNK_SIZE) { 3313: count = 0; 3314: GROW; 3315: if (ctxt->instate == XML_PARSER_EOF) 3316: return(NULL); 3317: } 3318: len += l; 3319: NEXTL(l); 3320: c = CUR_CHAR(l); 3321: if (c == 0) { 3322: count = 0; 3323: GROW; 3324: if (ctxt->instate == XML_PARSER_EOF) 3325: return(NULL); 3326: c = CUR_CHAR(l); 3327: } 3328: } 3329: } 3330: if ((len > XML_MAX_NAME_LENGTH) && 3331: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3332: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3333: return(NULL); 3334: } 3335: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3336: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3337: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3338: } 3339: 3340: /** 3341: * xmlParseName: 3342: * @ctxt: an XML parser context 3343: * 3344: * parse an XML name. 3345: * 3346: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3347: * CombiningChar | Extender 3348: * 3349: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3350: * 3351: * [6] Names ::= Name (#x20 Name)* 3352: * 3353: * Returns the Name parsed or NULL 3354: */ 3355: 3356: const xmlChar * 3357: xmlParseName(xmlParserCtxtPtr ctxt) { 3358: const xmlChar *in; 3359: const xmlChar *ret; 3360: int count = 0; 3361: 3362: GROW; 3363: 3364: #ifdef DEBUG 3365: nbParseName++; 3366: #endif 3367: 3368: /* 3369: * Accelerator for simple ASCII names 3370: */ 3371: in = ctxt->input->cur; 3372: if (((*in >= 0x61) && (*in <= 0x7A)) || 3373: ((*in >= 0x41) && (*in <= 0x5A)) || 3374: (*in == '_') || (*in == ':')) { 3375: in++; 3376: while (((*in >= 0x61) && (*in <= 0x7A)) || 3377: ((*in >= 0x41) && (*in <= 0x5A)) || 3378: ((*in >= 0x30) && (*in <= 0x39)) || 3379: (*in == '_') || (*in == '-') || 3380: (*in == ':') || (*in == '.')) 3381: in++; 3382: if ((*in > 0) && (*in < 0x80)) { 3383: count = in - ctxt->input->cur; 3384: if ((count > XML_MAX_NAME_LENGTH) && 3385: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3386: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3387: return(NULL); 3388: } 3389: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3390: ctxt->input->cur = in; 3391: ctxt->nbChars += count; 3392: ctxt->input->col += count; 3393: if (ret == NULL) 3394: xmlErrMemory(ctxt, NULL); 3395: return(ret); 3396: } 3397: } 3398: /* accelerator for special cases */ 3399: return(xmlParseNameComplex(ctxt)); 3400: } 3401: 3402: static const xmlChar * 3403: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3404: int len = 0, l; 3405: int c; 3406: int count = 0; 3407: 3408: #ifdef DEBUG 3409: nbParseNCNameComplex++; 3410: #endif 3411: 3412: /* 3413: * Handler for more complex cases 3414: */ 3415: GROW; 3416: c = CUR_CHAR(l); 3417: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3418: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3419: return(NULL); 3420: } 3421: 3422: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3423: (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3424: if (count++ > XML_PARSER_CHUNK_SIZE) { 3425: if ((len > XML_MAX_NAME_LENGTH) && 3426: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3427: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3428: return(NULL); 3429: } 3430: count = 0; 3431: GROW; 3432: if (ctxt->instate == XML_PARSER_EOF) 3433: return(NULL); 3434: } 3435: len += l; 3436: NEXTL(l); 3437: c = CUR_CHAR(l); 3438: if (c == 0) { 3439: count = 0; 3440: GROW; 3441: if (ctxt->instate == XML_PARSER_EOF) 3442: return(NULL); 3443: c = CUR_CHAR(l); 3444: } 3445: } 3446: if ((len > XML_MAX_NAME_LENGTH) && 3447: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3448: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3449: return(NULL); 3450: } 3451: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3452: } 3453: 3454: /** 3455: * xmlParseNCName: 3456: * @ctxt: an XML parser context 3457: * @len: length of the string parsed 3458: * 3459: * parse an XML name. 3460: * 3461: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3462: * CombiningChar | Extender 3463: * 3464: * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3465: * 3466: * Returns the Name parsed or NULL 3467: */ 3468: 3469: static const xmlChar * 3470: xmlParseNCName(xmlParserCtxtPtr ctxt) { 3471: const xmlChar *in; 3472: const xmlChar *ret; 3473: int count = 0; 3474: 3475: #ifdef DEBUG 3476: nbParseNCName++; 3477: #endif 3478: 3479: /* 3480: * Accelerator for simple ASCII names 3481: */ 3482: in = ctxt->input->cur; 3483: if (((*in >= 0x61) && (*in <= 0x7A)) || 3484: ((*in >= 0x41) && (*in <= 0x5A)) || 3485: (*in == '_')) { 3486: in++; 3487: while (((*in >= 0x61) && (*in <= 0x7A)) || 3488: ((*in >= 0x41) && (*in <= 0x5A)) || 3489: ((*in >= 0x30) && (*in <= 0x39)) || 3490: (*in == '_') || (*in == '-') || 3491: (*in == '.')) 3492: in++; 3493: if ((*in > 0) && (*in < 0x80)) { 3494: count = in - ctxt->input->cur; 3495: if ((count > XML_MAX_NAME_LENGTH) && 3496: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3497: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3498: return(NULL); 3499: } 3500: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3501: ctxt->input->cur = in; 3502: ctxt->nbChars += count; 3503: ctxt->input->col += count; 3504: if (ret == NULL) { 3505: xmlErrMemory(ctxt, NULL); 3506: } 3507: return(ret); 3508: } 3509: } 3510: return(xmlParseNCNameComplex(ctxt)); 3511: } 3512: 3513: /** 3514: * xmlParseNameAndCompare: 3515: * @ctxt: an XML parser context 3516: * 3517: * parse an XML name and compares for match 3518: * (specialized for endtag parsing) 3519: * 3520: * Returns NULL for an illegal name, (xmlChar*) 1 for success 3521: * and the name for mismatch 3522: */ 3523: 3524: static const xmlChar * 3525: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3526: register const xmlChar *cmp = other; 3527: register const xmlChar *in; 3528: const xmlChar *ret; 3529: 3530: GROW; 3531: if (ctxt->instate == XML_PARSER_EOF) 3532: return(NULL); 3533: 3534: in = ctxt->input->cur; 3535: while (*in != 0 && *in == *cmp) { 3536: ++in; 3537: ++cmp; 3538: ctxt->input->col++; 3539: } 3540: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3541: /* success */ 3542: ctxt->input->cur = in; 3543: return (const xmlChar*) 1; 3544: } 3545: /* failure (or end of input buffer), check with full function */ 3546: ret = xmlParseName (ctxt); 3547: /* strings coming from the dictionnary direct compare possible */ 3548: if (ret == other) { 3549: return (const xmlChar*) 1; 3550: } 3551: return ret; 3552: } 3553: 3554: /** 3555: * xmlParseStringName: 3556: * @ctxt: an XML parser context 3557: * @str: a pointer to the string pointer (IN/OUT) 3558: * 3559: * parse an XML name. 3560: * 3561: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3562: * CombiningChar | Extender 3563: * 3564: * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3565: * 3566: * [6] Names ::= Name (#x20 Name)* 3567: * 3568: * Returns the Name parsed or NULL. The @str pointer 3569: * is updated to the current location in the string. 3570: */ 3571: 3572: static xmlChar * 3573: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3574: xmlChar buf[XML_MAX_NAMELEN + 5]; 3575: const xmlChar *cur = *str; 3576: int len = 0, l; 3577: int c; 3578: 3579: #ifdef DEBUG 3580: nbParseStringName++; 3581: #endif 3582: 3583: c = CUR_SCHAR(cur, l); 3584: if (!xmlIsNameStartChar(ctxt, c)) { 3585: return(NULL); 3586: } 3587: 3588: COPY_BUF(l,buf,len,c); 3589: cur += l; 3590: c = CUR_SCHAR(cur, l); 3591: while (xmlIsNameChar(ctxt, c)) { 3592: COPY_BUF(l,buf,len,c); 3593: cur += l; 3594: c = CUR_SCHAR(cur, l); 3595: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3596: /* 3597: * Okay someone managed to make a huge name, so he's ready to pay 3598: * for the processing speed. 3599: */ 3600: xmlChar *buffer; 3601: int max = len * 2; 3602: 3603: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3604: if (buffer == NULL) { 3605: xmlErrMemory(ctxt, NULL); 3606: return(NULL); 3607: } 3608: memcpy(buffer, buf, len); 3609: while (xmlIsNameChar(ctxt, c)) { 3610: if (len + 10 > max) { 3611: xmlChar *tmp; 3612: 3613: if ((len > XML_MAX_NAME_LENGTH) && 3614: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3615: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3616: xmlFree(buffer); 3617: return(NULL); 3618: } 3619: max *= 2; 3620: tmp = (xmlChar *) xmlRealloc(buffer, 3621: max * sizeof(xmlChar)); 3622: if (tmp == NULL) { 3623: xmlErrMemory(ctxt, NULL); 3624: xmlFree(buffer); 3625: return(NULL); 3626: } 3627: buffer = tmp; 3628: } 3629: COPY_BUF(l,buffer,len,c); 3630: cur += l; 3631: c = CUR_SCHAR(cur, l); 3632: } 3633: buffer[len] = 0; 3634: *str = cur; 3635: return(buffer); 3636: } 3637: } 3638: if ((len > XML_MAX_NAME_LENGTH) && 3639: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3640: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3641: return(NULL); 3642: } 3643: *str = cur; 3644: return(xmlStrndup(buf, len)); 3645: } 3646: 3647: /** 3648: * xmlParseNmtoken: 3649: * @ctxt: an XML parser context 3650: * 3651: * parse an XML Nmtoken. 3652: * 3653: * [7] Nmtoken ::= (NameChar)+ 3654: * 3655: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3656: * 3657: * Returns the Nmtoken parsed or NULL 3658: */ 3659: 3660: xmlChar * 3661: xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3662: xmlChar buf[XML_MAX_NAMELEN + 5]; 3663: int len = 0, l; 3664: int c; 3665: int count = 0; 3666: 3667: #ifdef DEBUG 3668: nbParseNmToken++; 3669: #endif 3670: 3671: GROW; 3672: if (ctxt->instate == XML_PARSER_EOF) 3673: return(NULL); 3674: c = CUR_CHAR(l); 3675: 3676: while (xmlIsNameChar(ctxt, c)) { 3677: if (count++ > XML_PARSER_CHUNK_SIZE) { 3678: count = 0; 3679: GROW; 3680: } 3681: COPY_BUF(l,buf,len,c); 3682: NEXTL(l); 3683: c = CUR_CHAR(l); 3684: if (c == 0) { 3685: count = 0; 3686: GROW; 3687: if (ctxt->instate == XML_PARSER_EOF) 3688: return(NULL); 3689: c = CUR_CHAR(l); 3690: } 3691: if (len >= XML_MAX_NAMELEN) { 3692: /* 3693: * Okay someone managed to make a huge token, so he's ready to pay 3694: * for the processing speed. 3695: */ 3696: xmlChar *buffer; 3697: int max = len * 2; 3698: 3699: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3700: if (buffer == NULL) { 3701: xmlErrMemory(ctxt, NULL); 3702: return(NULL); 3703: } 3704: memcpy(buffer, buf, len); 3705: while (xmlIsNameChar(ctxt, c)) { 3706: if (count++ > XML_PARSER_CHUNK_SIZE) { 3707: count = 0; 3708: GROW; 3709: if (ctxt->instate == XML_PARSER_EOF) { 3710: xmlFree(buffer); 3711: return(NULL); 3712: } 3713: } 3714: if (len + 10 > max) { 3715: xmlChar *tmp; 3716: 3717: if ((max > XML_MAX_NAME_LENGTH) && 3718: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3719: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3720: xmlFree(buffer); 3721: return(NULL); 3722: } 3723: max *= 2; 3724: tmp = (xmlChar *) xmlRealloc(buffer, 3725: max * sizeof(xmlChar)); 3726: if (tmp == NULL) { 3727: xmlErrMemory(ctxt, NULL); 3728: xmlFree(buffer); 3729: return(NULL); 3730: } 3731: buffer = tmp; 3732: } 3733: COPY_BUF(l,buffer,len,c); 3734: NEXTL(l); 3735: c = CUR_CHAR(l); 3736: } 3737: buffer[len] = 0; 3738: return(buffer); 3739: } 3740: } 3741: if (len == 0) 3742: return(NULL); 3743: if ((len > XML_MAX_NAME_LENGTH) && 3744: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3745: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3746: return(NULL); 3747: } 3748: return(xmlStrndup(buf, len)); 3749: } 3750: 3751: /** 3752: * xmlParseEntityValue: 3753: * @ctxt: an XML parser context 3754: * @orig: if non-NULL store a copy of the original entity value 3755: * 3756: * parse a value for ENTITY declarations 3757: * 3758: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3759: * "'" ([^%&'] | PEReference | Reference)* "'" 3760: * 3761: * Returns the EntityValue parsed with reference substituted or NULL 3762: */ 3763: 3764: xmlChar * 3765: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3766: xmlChar *buf = NULL; 3767: int len = 0; 3768: int size = XML_PARSER_BUFFER_SIZE; 3769: int c, l; 3770: xmlChar stop; 3771: xmlChar *ret = NULL; 3772: const xmlChar *cur = NULL; 3773: xmlParserInputPtr input; 3774: 3775: if (RAW == '"') stop = '"'; 3776: else if (RAW == '\'') stop = '\''; 3777: else { 3778: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3779: return(NULL); 3780: } 3781: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3782: if (buf == NULL) { 3783: xmlErrMemory(ctxt, NULL); 3784: return(NULL); 3785: } 3786: 3787: /* 3788: * The content of the entity definition is copied in a buffer. 3789: */ 3790: 3791: ctxt->instate = XML_PARSER_ENTITY_VALUE; 3792: input = ctxt->input; 3793: GROW; 3794: if (ctxt->instate == XML_PARSER_EOF) { 3795: xmlFree(buf); 3796: return(NULL); 3797: } 3798: NEXT; 3799: c = CUR_CHAR(l); 3800: /* 3801: * NOTE: 4.4.5 Included in Literal 3802: * When a parameter entity reference appears in a literal entity 3803: * value, ... a single or double quote character in the replacement 3804: * text is always treated as a normal data character and will not 3805: * terminate the literal. 3806: * In practice it means we stop the loop only when back at parsing 3807: * the initial entity and the quote is found 3808: */ 3809: while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3810: (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3811: if (len + 5 >= size) { 3812: xmlChar *tmp; 3813: 3814: size *= 2; 3815: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3816: if (tmp == NULL) { 3817: xmlErrMemory(ctxt, NULL); 3818: xmlFree(buf); 3819: return(NULL); 3820: } 3821: buf = tmp; 3822: } 3823: COPY_BUF(l,buf,len,c); 3824: NEXTL(l); 3825: /* 3826: * Pop-up of finished entities. 3827: */ 3828: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3829: xmlPopInput(ctxt); 3830: 3831: GROW; 3832: c = CUR_CHAR(l); 3833: if (c == 0) { 3834: GROW; 3835: c = CUR_CHAR(l); 3836: } 3837: } 3838: buf[len] = 0; 3839: if (ctxt->instate == XML_PARSER_EOF) { 3840: xmlFree(buf); 3841: return(NULL); 3842: } 3843: 3844: /* 3845: * Raise problem w.r.t. '&' and '%' being used in non-entities 3846: * reference constructs. Note Charref will be handled in 3847: * xmlStringDecodeEntities() 3848: */ 3849: cur = buf; 3850: while (*cur != 0) { /* non input consuming */ 3851: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3852: xmlChar *name; 3853: xmlChar tmp = *cur; 3854: 3855: cur++; 3856: name = xmlParseStringName(ctxt, &cur); 3857: if ((name == NULL) || (*cur != ';')) { 3858: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3859: "EntityValue: '%c' forbidden except for entities references\n", 3860: tmp); 3861: } 3862: if ((tmp == '%') && (ctxt->inSubset == 1) && 3863: (ctxt->inputNr == 1)) { 3864: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3865: } 3866: if (name != NULL) 3867: xmlFree(name); 3868: if (*cur == 0) 3869: break; 3870: } 3871: cur++; 3872: } 3873: 3874: /* 3875: * Then PEReference entities are substituted. 3876: */ 3877: if (c != stop) { 3878: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3879: xmlFree(buf); 3880: } else { 3881: NEXT; 3882: /* 3883: * NOTE: 4.4.7 Bypassed 3884: * When a general entity reference appears in the EntityValue in 3885: * an entity declaration, it is bypassed and left as is. 3886: * so XML_SUBSTITUTE_REF is not set here. 3887: */ 3888: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3889: 0, 0, 0); 3890: if (orig != NULL) 3891: *orig = buf; 3892: else 3893: xmlFree(buf); 3894: } 3895: 3896: return(ret); 3897: } 3898: 3899: /** 3900: * xmlParseAttValueComplex: 3901: * @ctxt: an XML parser context 3902: * @len: the resulting attribute len 3903: * @normalize: wether to apply the inner normalization 3904: * 3905: * parse a value for an attribute, this is the fallback function 3906: * of xmlParseAttValue() when the attribute parsing requires handling 3907: * of non-ASCII characters, or normalization compaction. 3908: * 3909: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3910: */ 3911: static xmlChar * 3912: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3913: xmlChar limit = 0; 3914: xmlChar *buf = NULL; 3915: xmlChar *rep = NULL; 3916: size_t len = 0; 3917: size_t buf_size = 0; 3918: int c, l, in_space = 0; 3919: xmlChar *current = NULL; 3920: xmlEntityPtr ent; 3921: 3922: if (NXT(0) == '"') { 3923: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3924: limit = '"'; 3925: NEXT; 3926: } else if (NXT(0) == '\'') { 3927: limit = '\''; 3928: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3929: NEXT; 3930: } else { 3931: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3932: return(NULL); 3933: } 3934: 3935: /* 3936: * allocate a translation buffer. 3937: */ 3938: buf_size = XML_PARSER_BUFFER_SIZE; 3939: buf = (xmlChar *) xmlMallocAtomic(buf_size); 3940: if (buf == NULL) goto mem_error; 3941: 3942: /* 3943: * OK loop until we reach one of the ending char or a size limit. 3944: */ 3945: c = CUR_CHAR(l); 3946: while (((NXT(0) != limit) && /* checked */ 3947: (IS_CHAR(c)) && (c != '<')) && 3948: (ctxt->instate != XML_PARSER_EOF)) { 3949: /* 3950: * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 3951: * special option is given 3952: */ 3953: if ((len > XML_MAX_TEXT_LENGTH) && 3954: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3955: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3956: "AttValue length too long\n"); 3957: goto mem_error; 3958: } 3959: if (c == 0) break; 3960: if (c == '&') { 3961: in_space = 0; 3962: if (NXT(1) == '#') { 3963: int val = xmlParseCharRef(ctxt); 3964: 3965: if (val == '&') { 3966: if (ctxt->replaceEntities) { 3967: if (len + 10 > buf_size) { 3968: growBuffer(buf, 10); 3969: } 3970: buf[len++] = '&'; 3971: } else { 3972: /* 3973: * The reparsing will be done in xmlStringGetNodeList() 3974: * called by the attribute() function in SAX.c 3975: */ 3976: if (len + 10 > buf_size) { 3977: growBuffer(buf, 10); 3978: } 3979: buf[len++] = '&'; 3980: buf[len++] = '#'; 3981: buf[len++] = '3'; 3982: buf[len++] = '8'; 3983: buf[len++] = ';'; 3984: } 3985: } else if (val != 0) { 3986: if (len + 10 > buf_size) { 3987: growBuffer(buf, 10); 3988: } 3989: len += xmlCopyChar(0, &buf[len], val); 3990: } 3991: } else { 3992: ent = xmlParseEntityRef(ctxt); 3993: ctxt->nbentities++; 3994: if (ent != NULL) 3995: ctxt->nbentities += ent->owner; 3996: if ((ent != NULL) && 3997: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3998: if (len + 10 > buf_size) { 3999: growBuffer(buf, 10); 4000: } 4001: if ((ctxt->replaceEntities == 0) && 4002: (ent->content[0] == '&')) { 4003: buf[len++] = '&'; 4004: buf[len++] = '#'; 4005: buf[len++] = '3'; 4006: buf[len++] = '8'; 4007: buf[len++] = ';'; 4008: } else { 4009: buf[len++] = ent->content[0]; 4010: } 4011: } else if ((ent != NULL) && 4012: (ctxt->replaceEntities != 0)) { 4013: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4014: rep = xmlStringDecodeEntities(ctxt, ent->content, 4015: XML_SUBSTITUTE_REF, 4016: 0, 0, 0); 4017: if (rep != NULL) { 4018: current = rep; 4019: while (*current != 0) { /* non input consuming */ 4020: if ((*current == 0xD) || (*current == 0xA) || 4021: (*current == 0x9)) { 4022: buf[len++] = 0x20; 4023: current++; 4024: } else 4025: buf[len++] = *current++; 4026: if (len + 10 > buf_size) { 4027: growBuffer(buf, 10); 4028: } 4029: } 4030: xmlFree(rep); 4031: rep = NULL; 4032: } 4033: } else { 4034: if (len + 10 > buf_size) { 4035: growBuffer(buf, 10); 4036: } 4037: if (ent->content != NULL) 4038: buf[len++] = ent->content[0]; 4039: } 4040: } else if (ent != NULL) { 4041: int i = xmlStrlen(ent->name); 4042: const xmlChar *cur = ent->name; 4043: 4044: /* 4045: * This may look absurd but is needed to detect 4046: * entities problems 4047: */ 4048: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4049: (ent->content != NULL) && (ent->checked == 0)) { 4050: unsigned long oldnbent = ctxt->nbentities; 4051: 4052: rep = xmlStringDecodeEntities(ctxt, ent->content, 4053: XML_SUBSTITUTE_REF, 0, 0, 0); 4054: 4055: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4056: if (rep != NULL) { 4057: if (xmlStrchr(rep, '<')) 4058: ent->checked |= 1; 4059: xmlFree(rep); 4060: rep = NULL; 4061: } 4062: } 4063: 4064: /* 4065: * Just output the reference 4066: */ 4067: buf[len++] = '&'; 4068: while (len + i + 10 > buf_size) { 4069: growBuffer(buf, i + 10); 4070: } 4071: for (;i > 0;i--) 4072: buf[len++] = *cur++; 4073: buf[len++] = ';'; 4074: } 4075: } 4076: } else { 4077: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4078: if ((len != 0) || (!normalize)) { 4079: if ((!normalize) || (!in_space)) { 4080: COPY_BUF(l,buf,len,0x20); 4081: while (len + 10 > buf_size) { 4082: growBuffer(buf, 10); 4083: } 4084: } 4085: in_space = 1; 4086: } 4087: } else { 4088: in_space = 0; 4089: COPY_BUF(l,buf,len,c); 4090: if (len + 10 > buf_size) { 4091: growBuffer(buf, 10); 4092: } 4093: } 4094: NEXTL(l); 4095: } 4096: GROW; 4097: c = CUR_CHAR(l); 4098: } 4099: if (ctxt->instate == XML_PARSER_EOF) 4100: goto error; 4101: 4102: if ((in_space) && (normalize)) { 4103: while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4104: } 4105: buf[len] = 0; 4106: if (RAW == '<') { 4107: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4108: } else if (RAW != limit) { 4109: if ((c != 0) && (!IS_CHAR(c))) { 4110: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4111: "invalid character in attribute value\n"); 4112: } else { 4113: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4114: "AttValue: ' expected\n"); 4115: } 4116: } else 4117: NEXT; 4118: 4119: /* 4120: * There we potentially risk an overflow, don't allow attribute value of 4121: * length more than INT_MAX it is a very reasonnable assumption ! 4122: */ 4123: if (len >= INT_MAX) { 4124: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4125: "AttValue length too long\n"); 4126: goto mem_error; 4127: } 4128: 4129: if (attlen != NULL) *attlen = (int) len; 4130: return(buf); 4131: 4132: mem_error: 4133: xmlErrMemory(ctxt, NULL); 4134: error: 4135: if (buf != NULL) 4136: xmlFree(buf); 4137: if (rep != NULL) 4138: xmlFree(rep); 4139: return(NULL); 4140: } 4141: 4142: /** 4143: * xmlParseAttValue: 4144: * @ctxt: an XML parser context 4145: * 4146: * parse a value for an attribute 4147: * Note: the parser won't do substitution of entities here, this 4148: * will be handled later in xmlStringGetNodeList 4149: * 4150: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4151: * "'" ([^<&'] | Reference)* "'" 4152: * 4153: * 3.3.3 Attribute-Value Normalization: 4154: * Before the value of an attribute is passed to the application or 4155: * checked for validity, the XML processor must normalize it as follows: 4156: * - a character reference is processed by appending the referenced 4157: * character to the attribute value 4158: * - an entity reference is processed by recursively processing the 4159: * replacement text of the entity 4160: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4161: * appending #x20 to the normalized value, except that only a single 4162: * #x20 is appended for a "#xD#xA" sequence that is part of an external 4163: * parsed entity or the literal entity value of an internal parsed entity 4164: * - other characters are processed by appending them to the normalized value 4165: * If the declared value is not CDATA, then the XML processor must further 4166: * process the normalized attribute value by discarding any leading and 4167: * trailing space (#x20) characters, and by replacing sequences of space 4168: * (#x20) characters by a single space (#x20) character. 4169: * All attributes for which no declaration has been read should be treated 4170: * by a non-validating parser as if declared CDATA. 4171: * 4172: * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4173: */ 4174: 4175: 4176: xmlChar * 4177: xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4178: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4179: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4180: } 4181: 4182: /** 4183: * xmlParseSystemLiteral: 4184: * @ctxt: an XML parser context 4185: * 4186: * parse an XML Literal 4187: * 4188: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4189: * 4190: * Returns the SystemLiteral parsed or NULL 4191: */ 4192: 4193: xmlChar * 4194: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4195: xmlChar *buf = NULL; 4196: int len = 0; 4197: int size = XML_PARSER_BUFFER_SIZE; 4198: int cur, l; 4199: xmlChar stop; 4200: int state = ctxt->instate; 4201: int count = 0; 4202: 4203: SHRINK; 4204: if (RAW == '"') { 4205: NEXT; 4206: stop = '"'; 4207: } else if (RAW == '\'') { 4208: NEXT; 4209: stop = '\''; 4210: } else { 4211: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4212: return(NULL); 4213: } 4214: 4215: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4216: if (buf == NULL) { 4217: xmlErrMemory(ctxt, NULL); 4218: return(NULL); 4219: } 4220: ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4221: cur = CUR_CHAR(l); 4222: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4223: if (len + 5 >= size) { 4224: xmlChar *tmp; 4225: 4226: if ((size > XML_MAX_NAME_LENGTH) && 4227: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4228: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4229: xmlFree(buf); 4230: ctxt->instate = (xmlParserInputState) state; 4231: return(NULL); 4232: } 4233: size *= 2; 4234: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4235: if (tmp == NULL) { 4236: xmlFree(buf); 4237: xmlErrMemory(ctxt, NULL); 4238: ctxt->instate = (xmlParserInputState) state; 4239: return(NULL); 4240: } 4241: buf = tmp; 4242: } 4243: count++; 4244: if (count > 50) { 4245: GROW; 4246: count = 0; 4247: if (ctxt->instate == XML_PARSER_EOF) { 4248: xmlFree(buf); 4249: return(NULL); 4250: } 4251: } 4252: COPY_BUF(l,buf,len,cur); 4253: NEXTL(l); 4254: cur = CUR_CHAR(l); 4255: if (cur == 0) { 4256: GROW; 4257: SHRINK; 4258: cur = CUR_CHAR(l); 4259: } 4260: } 4261: buf[len] = 0; 4262: ctxt->instate = (xmlParserInputState) state; 4263: if (!IS_CHAR(cur)) { 4264: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4265: } else { 4266: NEXT; 4267: } 4268: return(buf); 4269: } 4270: 4271: /** 4272: * xmlParsePubidLiteral: 4273: * @ctxt: an XML parser context 4274: * 4275: * parse an XML public literal 4276: * 4277: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4278: * 4279: * Returns the PubidLiteral parsed or NULL. 4280: */ 4281: 4282: xmlChar * 4283: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4284: xmlChar *buf = NULL; 4285: int len = 0; 4286: int size = XML_PARSER_BUFFER_SIZE; 4287: xmlChar cur; 4288: xmlChar stop; 4289: int count = 0; 4290: xmlParserInputState oldstate = ctxt->instate; 4291: 4292: SHRINK; 4293: if (RAW == '"') { 4294: NEXT; 4295: stop = '"'; 4296: } else if (RAW == '\'') { 4297: NEXT; 4298: stop = '\''; 4299: } else { 4300: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4301: return(NULL); 4302: } 4303: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4304: if (buf == NULL) { 4305: xmlErrMemory(ctxt, NULL); 4306: return(NULL); 4307: } 4308: ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4309: cur = CUR; 4310: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4311: if (len + 1 >= size) { 4312: xmlChar *tmp; 4313: 4314: if ((size > XML_MAX_NAME_LENGTH) && 4315: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4316: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4317: xmlFree(buf); 4318: return(NULL); 4319: } 4320: size *= 2; 4321: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4322: if (tmp == NULL) { 4323: xmlErrMemory(ctxt, NULL); 4324: xmlFree(buf); 4325: return(NULL); 4326: } 4327: buf = tmp; 4328: } 4329: buf[len++] = cur; 4330: count++; 4331: if (count > 50) { 4332: GROW; 4333: count = 0; 4334: if (ctxt->instate == XML_PARSER_EOF) { 4335: xmlFree(buf); 4336: return(NULL); 4337: } 4338: } 4339: NEXT; 4340: cur = CUR; 4341: if (cur == 0) { 4342: GROW; 4343: SHRINK; 4344: cur = CUR; 4345: } 4346: } 4347: buf[len] = 0; 4348: if (cur != stop) { 4349: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4350: } else { 4351: NEXT; 4352: } 4353: ctxt->instate = oldstate; 4354: return(buf); 4355: } 4356: 4357: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4358: 4359: /* 4360: * used for the test in the inner loop of the char data testing 4361: */ 4362: static const unsigned char test_char_data[256] = { 4363: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4364: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4365: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4366: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4367: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4368: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4369: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4370: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4371: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4372: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4373: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4374: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4375: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4376: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4377: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4378: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4379: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4380: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4381: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4382: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4383: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4384: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4385: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4386: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4387: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4388: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4389: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4390: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4391: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4392: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4393: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4394: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4395: }; 4396: 4397: /** 4398: * xmlParseCharData: 4399: * @ctxt: an XML parser context 4400: * @cdata: int indicating whether we are within a CDATA section 4401: * 4402: * parse a CharData section. 4403: * if we are within a CDATA section ']]>' marks an end of section. 4404: * 4405: * The right angle bracket (>) may be represented using the string ">", 4406: * and must, for compatibility, be escaped using ">" or a character 4407: * reference when it appears in the string "]]>" in content, when that 4408: * string is not marking the end of a CDATA section. 4409: * 4410: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4411: */ 4412: 4413: void 4414: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4415: const xmlChar *in; 4416: int nbchar = 0; 4417: int line = ctxt->input->line; 4418: int col = ctxt->input->col; 4419: int ccol; 4420: 4421: SHRINK; 4422: GROW; 4423: /* 4424: * Accelerated common case where input don't need to be 4425: * modified before passing it to the handler. 4426: */ 4427: if (!cdata) { 4428: in = ctxt->input->cur; 4429: do { 4430: get_more_space: 4431: while (*in == 0x20) { in++; ctxt->input->col++; } 4432: if (*in == 0xA) { 4433: do { 4434: ctxt->input->line++; ctxt->input->col = 1; 4435: in++; 4436: } while (*in == 0xA); 4437: goto get_more_space; 4438: } 4439: if (*in == '<') { 4440: nbchar = in - ctxt->input->cur; 4441: if (nbchar > 0) { 4442: const xmlChar *tmp = ctxt->input->cur; 4443: ctxt->input->cur = in; 4444: 4445: if ((ctxt->sax != NULL) && 4446: (ctxt->sax->ignorableWhitespace != 4447: ctxt->sax->characters)) { 4448: if (areBlanks(ctxt, tmp, nbchar, 1)) { 4449: if (ctxt->sax->ignorableWhitespace != NULL) 4450: ctxt->sax->ignorableWhitespace(ctxt->userData, 4451: tmp, nbchar); 4452: } else { 4453: if (ctxt->sax->characters != NULL) 4454: ctxt->sax->characters(ctxt->userData, 4455: tmp, nbchar); 4456: if (*ctxt->space == -1) 4457: *ctxt->space = -2; 4458: } 4459: } else if ((ctxt->sax != NULL) && 4460: (ctxt->sax->characters != NULL)) { 4461: ctxt->sax->characters(ctxt->userData, 4462: tmp, nbchar); 4463: } 4464: } 4465: return; 4466: } 4467: 4468: get_more: 4469: ccol = ctxt->input->col; 4470: while (test_char_data[*in]) { 4471: in++; 4472: ccol++; 4473: } 4474: ctxt->input->col = ccol; 4475: if (*in == 0xA) { 4476: do { 4477: ctxt->input->line++; ctxt->input->col = 1; 4478: in++; 4479: } while (*in == 0xA); 4480: goto get_more; 4481: } 4482: if (*in == ']') { 4483: if ((in[1] == ']') && (in[2] == '>')) { 4484: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4485: ctxt->input->cur = in; 4486: return; 4487: } 4488: in++; 4489: ctxt->input->col++; 4490: goto get_more; 4491: } 4492: nbchar = in - ctxt->input->cur; 4493: if (nbchar > 0) { 4494: if ((ctxt->sax != NULL) && 4495: (ctxt->sax->ignorableWhitespace != 4496: ctxt->sax->characters) && 4497: (IS_BLANK_CH(*ctxt->input->cur))) { 4498: const xmlChar *tmp = ctxt->input->cur; 4499: ctxt->input->cur = in; 4500: 4501: if (areBlanks(ctxt, tmp, nbchar, 0)) { 4502: if (ctxt->sax->ignorableWhitespace != NULL) 4503: ctxt->sax->ignorableWhitespace(ctxt->userData, 4504: tmp, nbchar); 4505: } else { 4506: if (ctxt->sax->characters != NULL) 4507: ctxt->sax->characters(ctxt->userData, 4508: tmp, nbchar); 4509: if (*ctxt->space == -1) 4510: *ctxt->space = -2; 4511: } 4512: line = ctxt->input->line; 4513: col = ctxt->input->col; 4514: } else if (ctxt->sax != NULL) { 4515: if (ctxt->sax->characters != NULL) 4516: ctxt->sax->characters(ctxt->userData, 4517: ctxt->input->cur, nbchar); 4518: line = ctxt->input->line; 4519: col = ctxt->input->col; 4520: } 4521: /* something really bad happened in the SAX callback */ 4522: if (ctxt->instate != XML_PARSER_CONTENT) 4523: return; 4524: } 4525: ctxt->input->cur = in; 4526: if (*in == 0xD) { 4527: in++; 4528: if (*in == 0xA) { 4529: ctxt->input->cur = in; 4530: in++; 4531: ctxt->input->line++; ctxt->input->col = 1; 4532: continue; /* while */ 4533: } 4534: in--; 4535: } 4536: if (*in == '<') { 4537: return; 4538: } 4539: if (*in == '&') { 4540: return; 4541: } 4542: SHRINK; 4543: GROW; 4544: if (ctxt->instate == XML_PARSER_EOF) 4545: return; 4546: in = ctxt->input->cur; 4547: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4548: nbchar = 0; 4549: } 4550: ctxt->input->line = line; 4551: ctxt->input->col = col; 4552: xmlParseCharDataComplex(ctxt, cdata); 4553: } 4554: 4555: /** 4556: * xmlParseCharDataComplex: 4557: * @ctxt: an XML parser context 4558: * @cdata: int indicating whether we are within a CDATA section 4559: * 4560: * parse a CharData section.this is the fallback function 4561: * of xmlParseCharData() when the parsing requires handling 4562: * of non-ASCII characters. 4563: */ 4564: static void 4565: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4566: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4567: int nbchar = 0; 4568: int cur, l; 4569: int count = 0; 4570: 4571: SHRINK; 4572: GROW; 4573: cur = CUR_CHAR(l); 4574: while ((cur != '<') && /* checked */ 4575: (cur != '&') && 4576: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4577: if ((cur == ']') && (NXT(1) == ']') && 4578: (NXT(2) == '>')) { 4579: if (cdata) break; 4580: else { 4581: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4582: } 4583: } 4584: COPY_BUF(l,buf,nbchar,cur); 4585: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4586: buf[nbchar] = 0; 4587: 4588: /* 4589: * OK the segment is to be consumed as chars. 4590: */ 4591: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4592: if (areBlanks(ctxt, buf, nbchar, 0)) { 4593: if (ctxt->sax->ignorableWhitespace != NULL) 4594: ctxt->sax->ignorableWhitespace(ctxt->userData, 4595: buf, nbchar); 4596: } else { 4597: if (ctxt->sax->characters != NULL) 4598: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4599: if ((ctxt->sax->characters != 4600: ctxt->sax->ignorableWhitespace) && 4601: (*ctxt->space == -1)) 4602: *ctxt->space = -2; 4603: } 4604: } 4605: nbchar = 0; 4606: /* something really bad happened in the SAX callback */ 4607: if (ctxt->instate != XML_PARSER_CONTENT) 4608: return; 4609: } 4610: count++; 4611: if (count > 50) { 4612: GROW; 4613: count = 0; 4614: if (ctxt->instate == XML_PARSER_EOF) 4615: return; 4616: } 4617: NEXTL(l); 4618: cur = CUR_CHAR(l); 4619: } 4620: if (nbchar != 0) { 4621: buf[nbchar] = 0; 4622: /* 4623: * OK the segment is to be consumed as chars. 4624: */ 4625: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4626: if (areBlanks(ctxt, buf, nbchar, 0)) { 4627: if (ctxt->sax->ignorableWhitespace != NULL) 4628: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4629: } else { 4630: if (ctxt->sax->characters != NULL) 4631: ctxt->sax->characters(ctxt->userData, buf, nbchar); 4632: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4633: (*ctxt->space == -1)) 4634: *ctxt->space = -2; 4635: } 4636: } 4637: } 4638: if ((cur != 0) && (!IS_CHAR(cur))) { 4639: /* Generate the error and skip the offending character */ 4640: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4641: "PCDATA invalid Char value %d\n", 4642: cur); 4643: NEXTL(l); 4644: } 4645: } 4646: 4647: /** 4648: * xmlParseExternalID: 4649: * @ctxt: an XML parser context 4650: * @publicID: a xmlChar** receiving PubidLiteral 4651: * @strict: indicate whether we should restrict parsing to only 4652: * production [75], see NOTE below 4653: * 4654: * Parse an External ID or a Public ID 4655: * 4656: * NOTE: Productions [75] and [83] interact badly since [75] can generate 4657: * 'PUBLIC' S PubidLiteral S SystemLiteral 4658: * 4659: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4660: * | 'PUBLIC' S PubidLiteral S SystemLiteral 4661: * 4662: * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4663: * 4664: * Returns the function returns SystemLiteral and in the second 4665: * case publicID receives PubidLiteral, is strict is off 4666: * it is possible to return NULL and have publicID set. 4667: */ 4668: 4669: xmlChar * 4670: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4671: xmlChar *URI = NULL; 4672: 4673: SHRINK; 4674: 4675: *publicID = NULL; 4676: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4677: SKIP(6); 4678: if (!IS_BLANK_CH(CUR)) { 4679: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4680: "Space required after 'SYSTEM'\n"); 4681: } 4682: SKIP_BLANKS; 4683: URI = xmlParseSystemLiteral(ctxt); 4684: if (URI == NULL) { 4685: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4686: } 4687: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4688: SKIP(6); 4689: if (!IS_BLANK_CH(CUR)) { 4690: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4691: "Space required after 'PUBLIC'\n"); 4692: } 4693: SKIP_BLANKS; 4694: *publicID = xmlParsePubidLiteral(ctxt); 4695: if (*publicID == NULL) { 4696: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4697: } 4698: if (strict) { 4699: /* 4700: * We don't handle [83] so "S SystemLiteral" is required. 4701: */ 4702: if (!IS_BLANK_CH(CUR)) { 4703: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4704: "Space required after the Public Identifier\n"); 4705: } 4706: } else { 4707: /* 4708: * We handle [83] so we return immediately, if 4709: * "S SystemLiteral" is not detected. From a purely parsing 4710: * point of view that's a nice mess. 4711: */ 4712: const xmlChar *ptr; 4713: GROW; 4714: 4715: ptr = CUR_PTR; 4716: if (!IS_BLANK_CH(*ptr)) return(NULL); 4717: 4718: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4719: if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4720: } 4721: SKIP_BLANKS; 4722: URI = xmlParseSystemLiteral(ctxt); 4723: if (URI == NULL) { 4724: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4725: } 4726: } 4727: return(URI); 4728: } 4729: 4730: /** 4731: * xmlParseCommentComplex: 4732: * @ctxt: an XML parser context 4733: * @buf: the already parsed part of the buffer 4734: * @len: number of bytes filles in the buffer 4735: * @size: allocated size of the buffer 4736: * 4737: * Skip an XML (SGML) comment  4738: * The spec says that "For compatibility, the string "--" (double-hyphen) 4739: * must not occur within comments. " 4740: * This is the slow routine in case the accelerator for ascii didn't work 4741: * 4742: * [15] Comment ::= '' 4743: */ 4744: static void 4745: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4746: size_t len, size_t size) { 4747: int q, ql; 4748: int r, rl; 4749: int cur, l; 4750: size_t count = 0; 4751: int inputid; 4752: 4753: inputid = ctxt->input->id; 4754: 4755: if (buf == NULL) { 4756: len = 0; 4757: size = XML_PARSER_BUFFER_SIZE; 4758: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4759: if (buf == NULL) { 4760: xmlErrMemory(ctxt, NULL); 4761: return; 4762: } 4763: } 4764: GROW; /* Assure there's enough input data */ 4765: q = CUR_CHAR(ql); 4766: if (q == 0) 4767: goto not_terminated; 4768: if (!IS_CHAR(q)) { 4769: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4770: "xmlParseComment: invalid xmlChar value %d\n", 4771: q); 4772: xmlFree (buf); 4773: return; 4774: } 4775: NEXTL(ql); 4776: r = CUR_CHAR(rl); 4777: if (r == 0) 4778: goto not_terminated; 4779: if (!IS_CHAR(r)) { 4780: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4781: "xmlParseComment: invalid xmlChar value %d\n", 4782: q); 4783: xmlFree (buf); 4784: return; 4785: } 4786: NEXTL(rl); 4787: cur = CUR_CHAR(l); 4788: if (cur == 0) 4789: goto not_terminated; 4790: while (IS_CHAR(cur) && /* checked */ 4791: ((cur != '>') || 4792: (r != '-') || (q != '-'))) { 4793: if ((r == '-') && (q == '-')) { 4794: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4795: } 4796: if ((len > XML_MAX_TEXT_LENGTH) && 4797: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4798: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4799: "Comment too big found", NULL); 4800: xmlFree (buf); 4801: return; 4802: } 4803: if (len + 5 >= size) { 4804: xmlChar *new_buf; 4805: size_t new_size; 4806: 4807: new_size = size * 2; 4808: new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4809: if (new_buf == NULL) { 4810: xmlFree (buf); 4811: xmlErrMemory(ctxt, NULL); 4812: return; 4813: } 4814: buf = new_buf; 4815: size = new_size; 4816: } 4817: COPY_BUF(ql,buf,len,q); 4818: q = r; 4819: ql = rl; 4820: r = cur; 4821: rl = l; 4822: 4823: count++; 4824: if (count > 50) { 4825: GROW; 4826: count = 0; 4827: if (ctxt->instate == XML_PARSER_EOF) { 4828: xmlFree(buf); 4829: return; 4830: } 4831: } 4832: NEXTL(l); 4833: cur = CUR_CHAR(l); 4834: if (cur == 0) { 4835: SHRINK; 4836: GROW; 4837: cur = CUR_CHAR(l); 4838: } 4839: } 4840: buf[len] = 0; 4841: if (cur == 0) { 4842: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4843: "Comment not terminated \n 4872: * The spec says that "For compatibility, the string "--" (double-hyphen) 4873: * must not occur within comments. " 4874: * 4875: * [15] Comment ::= '' 4876: */ 4877: void 4878: xmlParseComment(xmlParserCtxtPtr ctxt) { 4879: xmlChar *buf = NULL; 4880: size_t size = XML_PARSER_BUFFER_SIZE; 4881: size_t len = 0; 4882: xmlParserInputState state; 4883: const xmlChar *in; 4884: size_t nbchar = 0; 4885: int ccol; 4886: int inputid; 4887: 4888: /* 4889: * Check that there is a comment right here. 4890: */ 4891: if ((RAW != '<') || (NXT(1) != '!') || 4892: (NXT(2) != '-') || (NXT(3) != '-')) return; 4893: state = ctxt->instate; 4894: ctxt->instate = XML_PARSER_COMMENT; 4895: inputid = ctxt->input->id; 4896: SKIP(4); 4897: SHRINK; 4898: GROW; 4899: 4900: /* 4901: * Accelerated common case where input don't need to be 4902: * modified before passing it to the handler. 4903: */ 4904: in = ctxt->input->cur; 4905: do { 4906: if (*in == 0xA) { 4907: do { 4908: ctxt->input->line++; ctxt->input->col = 1; 4909: in++; 4910: } while (*in == 0xA); 4911: } 4912: get_more: 4913: ccol = ctxt->input->col; 4914: while (((*in > '-') && (*in <= 0x7F)) || 4915: ((*in >= 0x20) && (*in < '-')) || 4916: (*in == 0x09)) { 4917: in++; 4918: ccol++; 4919: } 4920: ctxt->input->col = ccol; 4921: if (*in == 0xA) { 4922: do { 4923: ctxt->input->line++; ctxt->input->col = 1; 4924: in++; 4925: } while (*in == 0xA); 4926: goto get_more; 4927: } 4928: nbchar = in - ctxt->input->cur; 4929: /* 4930: * save current set of data 4931: */ 4932: if (nbchar > 0) { 4933: if ((ctxt->sax != NULL) && 4934: (ctxt->sax->comment != NULL)) { 4935: if (buf == NULL) { 4936: if ((*in == '-') && (in[1] == '-')) 4937: size = nbchar + 1; 4938: else 4939: size = XML_PARSER_BUFFER_SIZE + nbchar; 4940: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4941: if (buf == NULL) { 4942: xmlErrMemory(ctxt, NULL); 4943: ctxt->instate = state; 4944: return; 4945: } 4946: len = 0; 4947: } else if (len + nbchar + 1 >= size) { 4948: xmlChar *new_buf; 4949: size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4950: new_buf = (xmlChar *) xmlRealloc(buf, 4951: size * sizeof(xmlChar)); 4952: if (new_buf == NULL) { 4953: xmlFree (buf); 4954: xmlErrMemory(ctxt, NULL); 4955: ctxt->instate = state; 4956: return; 4957: } 4958: buf = new_buf; 4959: } 4960: memcpy(&buf[len], ctxt->input->cur, nbchar); 4961: len += nbchar; 4962: buf[len] = 0; 4963: } 4964: } 4965: if ((len > XML_MAX_TEXT_LENGTH) && 4966: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4967: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4968: "Comment too big found", NULL); 4969: xmlFree (buf); 4970: return; 4971: } 4972: ctxt->input->cur = in; 4973: if (*in == 0xA) { 4974: in++; 4975: ctxt->input->line++; ctxt->input->col = 1; 4976: } 4977: if (*in == 0xD) { 4978: in++; 4979: if (*in == 0xA) { 4980: ctxt->input->cur = in; 4981: in++; 4982: ctxt->input->line++; ctxt->input->col = 1; 4983: continue; /* while */ 4984: } 4985: in--; 4986: } 4987: SHRINK; 4988: GROW; 4989: if (ctxt->instate == XML_PARSER_EOF) { 4990: xmlFree(buf); 4991: return; 4992: } 4993: in = ctxt->input->cur; 4994: if (*in == '-') { 4995: if (in[1] == '-') { 4996: if (in[2] == '>') { 4997: if (ctxt->input->id != inputid) { 4998: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4999: "comment doesn't start and stop in the same entity\n"); 5000: } 5001: SKIP(3); 5002: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5003: (!ctxt->disableSAX)) { 5004: if (buf != NULL) 5005: ctxt->sax->comment(ctxt->userData, buf); 5006: else 5007: ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5008: } 5009: if (buf != NULL) 5010: xmlFree(buf); 5011: if (ctxt->instate != XML_PARSER_EOF) 5012: ctxt->instate = state; 5013: return; 5014: } 5015: if (buf != NULL) { 5016: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5017: "Double hyphen within comment: " 5018: "<!--%.50s\n", 5019: buf); 5020: } else 5021: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5022: "Double hyphen within comment\n", NULL); 5023: in++; 5024: ctxt->input->col++; 5025: } 5026: in++; 5027: ctxt->input->col++; 5028: goto get_more; 5029: } 5030: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5031: xmlParseCommentComplex(ctxt, buf, len, size); 5032: ctxt->instate = state; 5033: return; 5034: } 5035: 5036: 5037: /** 5038: * xmlParsePITarget: 5039: * @ctxt: an XML parser context 5040: * 5041: * parse the name of a PI 5042: * 5043: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5044: * 5045: * Returns the PITarget name or NULL 5046: */ 5047: 5048: const xmlChar * 5049: xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5050: const xmlChar *name; 5051: 5052: name = xmlParseName(ctxt); 5053: if ((name != NULL) && 5054: ((name[0] == 'x') || (name[0] == 'X')) && 5055: ((name[1] == 'm') || (name[1] == 'M')) && 5056: ((name[2] == 'l') || (name[2] == 'L'))) { 5057: int i; 5058: if ((name[0] == 'x') && (name[1] == 'm') && 5059: (name[2] == 'l') && (name[3] == 0)) { 5060: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5061: "XML declaration allowed only at the start of the document\n"); 5062: return(name); 5063: } else if (name[3] == 0) { 5064: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5065: return(name); 5066: } 5067: for (i = 0;;i++) { 5068: if (xmlW3CPIs[i] == NULL) break; 5069: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5070: return(name); 5071: } 5072: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5073: "xmlParsePITarget: invalid name prefix 'xml'\n", 5074: NULL, NULL); 5075: } 5076: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5077: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5078: "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 5079: } 5080: return(name); 5081: } 5082: 5083: #ifdef LIBXML_CATALOG_ENABLED 5084: /** 5085: * xmlParseCatalogPI: 5086: * @ctxt: an XML parser context 5087: * @catalog: the PI value string 5088: * 5089: * parse an XML Catalog Processing Instruction. 5090: * 5091: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5092: * 5093: * Occurs only if allowed by the user and if happening in the Misc 5094: * part of the document before any doctype informations 5095: * This will add the given catalog to the parsing context in order 5096: * to be used if there is a resolution need further down in the document 5097: */ 5098: 5099: static void 5100: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5101: xmlChar *URL = NULL; 5102: const xmlChar *tmp, *base; 5103: xmlChar marker; 5104: 5105: tmp = catalog; 5106: while (IS_BLANK_CH(*tmp)) tmp++; 5107: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5108: goto error; 5109: tmp += 7; 5110: while (IS_BLANK_CH(*tmp)) tmp++; 5111: if (*tmp != '=') { 5112: return; 5113: } 5114: tmp++; 5115: while (IS_BLANK_CH(*tmp)) tmp++; 5116: marker = *tmp; 5117: if ((marker != '\'') && (marker != '"')) 5118: goto error; 5119: tmp++; 5120: base = tmp; 5121: while ((*tmp != 0) && (*tmp != marker)) tmp++; 5122: if (*tmp == 0) 5123: goto error; 5124: URL = xmlStrndup(base, tmp - base); 5125: tmp++; 5126: while (IS_BLANK_CH(*tmp)) tmp++; 5127: if (*tmp != 0) 5128: goto error; 5129: 5130: if (URL != NULL) { 5131: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5132: xmlFree(URL); 5133: } 5134: return; 5135: 5136: error: 5137: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5138: "Catalog PI syntax error: %s\n", 5139: catalog, NULL); 5140: if (URL != NULL) 5141: xmlFree(URL); 5142: } 5143: #endif 5144: 5145: /** 5146: * xmlParsePI: 5147: * @ctxt: an XML parser context 5148: * 5149: * parse an XML Processing Instruction. 5150: * 5151: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5152: * 5153: * The processing is transfered to SAX once parsed. 5154: */ 5155: 5156: void 5157: xmlParsePI(xmlParserCtxtPtr ctxt) { 5158: xmlChar *buf = NULL; 5159: size_t len = 0; 5160: size_t size = XML_PARSER_BUFFER_SIZE; 5161: int cur, l; 5162: const xmlChar *target; 5163: xmlParserInputState state; 5164: int count = 0; 5165: 5166: if ((RAW == '<') && (NXT(1) == '?')) { 5167: xmlParserInputPtr input = ctxt->input; 5168: state = ctxt->instate; 5169: ctxt->instate = XML_PARSER_PI; 5170: /* 5171: * this is a Processing Instruction. 5172: */ 5173: SKIP(2); 5174: SHRINK; 5175: 5176: /* 5177: * Parse the target name and check for special support like 5178: * namespace. 5179: */ 5180: target = xmlParsePITarget(ctxt); 5181: if (target != NULL) { 5182: if ((RAW == '?') && (NXT(1) == '>')) { 5183: if (input != ctxt->input) { 5184: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5185: "PI declaration doesn't start and stop in the same entity\n"); 5186: } 5187: SKIP(2); 5188: 5189: /* 5190: * SAX: PI detected. 5191: */ 5192: if ((ctxt->sax) && (!ctxt->disableSAX) && 5193: (ctxt->sax->processingInstruction != NULL)) 5194: ctxt->sax->processingInstruction(ctxt->userData, 5195: target, NULL); 5196: if (ctxt->instate != XML_PARSER_EOF) 5197: ctxt->instate = state; 5198: return; 5199: } 5200: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5201: if (buf == NULL) { 5202: xmlErrMemory(ctxt, NULL); 5203: ctxt->instate = state; 5204: return; 5205: } 5206: cur = CUR; 5207: if (!IS_BLANK(cur)) { 5208: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5209: "ParsePI: PI %s space expected\n", target); 5210: } 5211: SKIP_BLANKS; 5212: cur = CUR_CHAR(l); 5213: while (IS_CHAR(cur) && /* checked */ 5214: ((cur != '?') || (NXT(1) != '>'))) { 5215: if (len + 5 >= size) { 5216: xmlChar *tmp; 5217: size_t new_size = size * 2; 5218: tmp = (xmlChar *) xmlRealloc(buf, new_size); 5219: if (tmp == NULL) { 5220: xmlErrMemory(ctxt, NULL); 5221: xmlFree(buf); 5222: ctxt->instate = state; 5223: return; 5224: } 5225: buf = tmp; 5226: size = new_size; 5227: } 5228: count++; 5229: if (count > 50) { 5230: GROW; 5231: if (ctxt->instate == XML_PARSER_EOF) { 5232: xmlFree(buf); 5233: return; 5234: } 5235: count = 0; 5236: if ((len > XML_MAX_TEXT_LENGTH) && 5237: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5238: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5239: "PI %s too big found", target); 5240: xmlFree(buf); 5241: ctxt->instate = state; 5242: return; 5243: } 5244: } 5245: COPY_BUF(l,buf,len,cur); 5246: NEXTL(l); 5247: cur = CUR_CHAR(l); 5248: if (cur == 0) { 5249: SHRINK; 5250: GROW; 5251: cur = CUR_CHAR(l); 5252: } 5253: } 5254: if ((len > XML_MAX_TEXT_LENGTH) && 5255: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5256: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5257: "PI %s too big found", target); 5258: xmlFree(buf); 5259: ctxt->instate = state; 5260: return; 5261: } 5262: buf[len] = 0; 5263: if (cur != '?') { 5264: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5265: "ParsePI: PI %s never end ...\n", target); 5266: } else { 5267: if (input != ctxt->input) { 5268: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5269: "PI declaration doesn't start and stop in the same entity\n"); 5270: } 5271: SKIP(2); 5272: 5273: #ifdef LIBXML_CATALOG_ENABLED 5274: if (((state == XML_PARSER_MISC) || 5275: (state == XML_PARSER_START)) && 5276: (xmlStrEqual(target, XML_CATALOG_PI))) { 5277: xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5278: if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5279: (allow == XML_CATA_ALLOW_ALL)) 5280: xmlParseCatalogPI(ctxt, buf); 5281: } 5282: #endif 5283: 5284: 5285: /* 5286: * SAX: PI detected. 5287: */ 5288: if ((ctxt->sax) && (!ctxt->disableSAX) && 5289: (ctxt->sax->processingInstruction != NULL)) 5290: ctxt->sax->processingInstruction(ctxt->userData, 5291: target, buf); 5292: } 5293: xmlFree(buf); 5294: } else { 5295: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5296: } 5297: if (ctxt->instate != XML_PARSER_EOF) 5298: ctxt->instate = state; 5299: } 5300: } 5301: 5302: /** 5303: * xmlParseNotationDecl: 5304: * @ctxt: an XML parser context 5305: * 5306: * parse a notation declaration 5307: * 5308: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5309: * 5310: * Hence there is actually 3 choices: 5311: * 'PUBLIC' S PubidLiteral 5312: * 'PUBLIC' S PubidLiteral S SystemLiteral 5313: * and 'SYSTEM' S SystemLiteral 5314: * 5315: * See the NOTE on xmlParseExternalID(). 5316: */ 5317: 5318: void 5319: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5320: const xmlChar *name; 5321: xmlChar *Pubid; 5322: xmlChar *Systemid; 5323: 5324: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5325: xmlParserInputPtr input = ctxt->input; 5326: SHRINK; 5327: SKIP(10); 5328: if (!IS_BLANK_CH(CUR)) { 5329: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5330: "Space required after '<!NOTATION'\n"); 5331: return; 5332: } 5333: SKIP_BLANKS; 5334: 5335: name = xmlParseName(ctxt); 5336: if (name == NULL) { 5337: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5338: return; 5339: } 5340: if (!IS_BLANK_CH(CUR)) { 5341: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5342: "Space required after the NOTATION name'\n"); 5343: return; 5344: } 5345: if (xmlStrchr(name, ':') != NULL) { 5346: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5347: "colon are forbidden from notation names '%s'\n", 5348: name, NULL, NULL); 5349: } 5350: SKIP_BLANKS; 5351: 5352: /* 5353: * Parse the IDs. 5354: */ 5355: Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5356: SKIP_BLANKS; 5357: 5358: if (RAW == '>') { 5359: if (input != ctxt->input) { 5360: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5361: "Notation declaration doesn't start and stop in the same entity\n"); 5362: } 5363: NEXT; 5364: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5365: (ctxt->sax->notationDecl != NULL)) 5366: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5367: } else { 5368: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5369: } 5370: if (Systemid != NULL) xmlFree(Systemid); 5371: if (Pubid != NULL) xmlFree(Pubid); 5372: } 5373: } 5374: 5375: /** 5376: * xmlParseEntityDecl: 5377: * @ctxt: an XML parser context 5378: * 5379: * parse <!ENTITY declarations 5380: * 5381: * [70] EntityDecl ::= GEDecl | PEDecl 5382: * 5383: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5384: * 5385: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5386: * 5387: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5388: * 5389: * [74] PEDef ::= EntityValue | ExternalID 5390: * 5391: * [76] NDataDecl ::= S 'NDATA' S Name 5392: * 5393: * [ VC: Notation Declared ] 5394: * The Name must match the declared name of a notation. 5395: */ 5396: 5397: void 5398: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5399: const xmlChar *name = NULL; 5400: xmlChar *value = NULL; 5401: xmlChar *URI = NULL, *literal = NULL; 5402: const xmlChar *ndata = NULL; 5403: int isParameter = 0; 5404: xmlChar *orig = NULL; 5405: int skipped; 5406: 5407: /* GROW; done in the caller */ 5408: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5409: xmlParserInputPtr input = ctxt->input; 5410: SHRINK; 5411: SKIP(8); 5412: skipped = SKIP_BLANKS; 5413: if (skipped == 0) { 5414: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5415: "Space required after '<!ENTITY'\n"); 5416: } 5417: 5418: if (RAW == '%') { 5419: NEXT; 5420: skipped = SKIP_BLANKS; 5421: if (skipped == 0) { 5422: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5423: "Space required after '%'\n"); 5424: } 5425: isParameter = 1; 5426: } 5427: 5428: name = xmlParseName(ctxt); 5429: if (name == NULL) { 5430: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5431: "xmlParseEntityDecl: no name\n"); 5432: return; 5433: } 5434: if (xmlStrchr(name, ':') != NULL) { 5435: xmlNsErr(ctxt, XML_NS_ERR_COLON, 5436: "colon are forbidden from entities names '%s'\n", 5437: name, NULL, NULL); 5438: } 5439: skipped = SKIP_BLANKS; 5440: if (skipped == 0) { 5441: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5442: "Space required after the entity name\n"); 5443: } 5444: 5445: ctxt->instate = XML_PARSER_ENTITY_DECL; 5446: /* 5447: * handle the various case of definitions... 5448: */ 5449: if (isParameter) { 5450: if ((RAW == '"') || (RAW == '\'')) { 5451: value = xmlParseEntityValue(ctxt, &orig); 5452: if (value) { 5453: if ((ctxt->sax != NULL) && 5454: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5455: ctxt->sax->entityDecl(ctxt->userData, name, 5456: XML_INTERNAL_PARAMETER_ENTITY, 5457: NULL, NULL, value); 5458: } 5459: } else { 5460: URI = xmlParseExternalID(ctxt, &literal, 1); 5461: if ((URI == NULL) && (literal == NULL)) { 5462: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5463: } 5464: if (URI) { 5465: xmlURIPtr uri; 5466: 5467: uri = xmlParseURI((const char *) URI); 5468: if (uri == NULL) { 5469: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5470: "Invalid URI: %s\n", URI); 5471: /* 5472: * This really ought to be a well formedness error 5473: * but the XML Core WG decided otherwise c.f. issue 5474: * E26 of the XML erratas. 5475: */ 5476: } else { 5477: if (uri->fragment != NULL) { 5478: /* 5479: * Okay this is foolish to block those but not 5480: * invalid URIs. 5481: */ 5482: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5483: } else { 5484: if ((ctxt->sax != NULL) && 5485: (!ctxt->disableSAX) && 5486: (ctxt->sax->entityDecl != NULL)) 5487: ctxt->sax->entityDecl(ctxt->userData, name, 5488: XML_EXTERNAL_PARAMETER_ENTITY, 5489: literal, URI, NULL); 5490: } 5491: xmlFreeURI(uri); 5492: } 5493: } 5494: } 5495: } else { 5496: if ((RAW == '"') || (RAW == '\'')) { 5497: value = xmlParseEntityValue(ctxt, &orig); 5498: if ((ctxt->sax != NULL) && 5499: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5500: ctxt->sax->entityDecl(ctxt->userData, name, 5501: XML_INTERNAL_GENERAL_ENTITY, 5502: NULL, NULL, value); 5503: /* 5504: * For expat compatibility in SAX mode. 5505: */ 5506: if ((ctxt->myDoc == NULL) || 5507: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5508: if (ctxt->myDoc == NULL) { 5509: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5510: if (ctxt->myDoc == NULL) { 5511: xmlErrMemory(ctxt, "New Doc failed"); 5512: return; 5513: } 5514: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5515: } 5516: if (ctxt->myDoc->intSubset == NULL) 5517: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5518: BAD_CAST "fake", NULL, NULL); 5519: 5520: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5521: NULL, NULL, value); 5522: } 5523: } else { 5524: URI = xmlParseExternalID(ctxt, &literal, 1); 5525: if ((URI == NULL) && (literal == NULL)) { 5526: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5527: } 5528: if (URI) { 5529: xmlURIPtr uri; 5530: 5531: uri = xmlParseURI((const char *)URI); 5532: if (uri == NULL) { 5533: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5534: "Invalid URI: %s\n", URI); 5535: /* 5536: * This really ought to be a well formedness error 5537: * but the XML Core WG decided otherwise c.f. issue 5538: * E26 of the XML erratas. 5539: */ 5540: } else { 5541: if (uri->fragment != NULL) { 5542: /* 5543: * Okay this is foolish to block those but not 5544: * invalid URIs. 5545: */ 5546: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5547: } 5548: xmlFreeURI(uri); 5549: } 5550: } 5551: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5552: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5553: "Space required before 'NDATA'\n"); 5554: } 5555: SKIP_BLANKS; 5556: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5557: SKIP(5); 5558: if (!IS_BLANK_CH(CUR)) { 5559: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5560: "Space required after 'NDATA'\n"); 5561: } 5562: SKIP_BLANKS; 5563: ndata = xmlParseName(ctxt); 5564: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5565: (ctxt->sax->unparsedEntityDecl != NULL)) 5566: ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5567: literal, URI, ndata); 5568: } else { 5569: if ((ctxt->sax != NULL) && 5570: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5571: ctxt->sax->entityDecl(ctxt->userData, name, 5572: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5573: literal, URI, NULL); 5574: /* 5575: * For expat compatibility in SAX mode. 5576: * assuming the entity repalcement was asked for 5577: */ 5578: if ((ctxt->replaceEntities != 0) && 5579: ((ctxt->myDoc == NULL) || 5580: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5581: if (ctxt->myDoc == NULL) { 5582: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5583: if (ctxt->myDoc == NULL) { 5584: xmlErrMemory(ctxt, "New Doc failed"); 5585: return; 5586: } 5587: ctxt->myDoc->properties = XML_DOC_INTERNAL; 5588: } 5589: 5590: if (ctxt->myDoc->intSubset == NULL) 5591: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5592: BAD_CAST "fake", NULL, NULL); 5593: xmlSAX2EntityDecl(ctxt, name, 5594: XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5595: literal, URI, NULL); 5596: } 5597: } 5598: } 5599: } 5600: if (ctxt->instate == XML_PARSER_EOF) 5601: return; 5602: SKIP_BLANKS; 5603: if (RAW != '>') { 5604: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5605: "xmlParseEntityDecl: entity %s not terminated\n", name); 5606: } else { 5607: if (input != ctxt->input) { 5608: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5609: "Entity declaration doesn't start and stop in the same entity\n"); 5610: } 5611: NEXT; 5612: } 5613: if (orig != NULL) { 5614: /* 5615: * Ugly mechanism to save the raw entity value. 5616: */ 5617: xmlEntityPtr cur = NULL; 5618: 5619: if (isParameter) { 5620: if ((ctxt->sax != NULL) && 5621: (ctxt->sax->getParameterEntity != NULL)) 5622: cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5623: } else { 5624: if ((ctxt->sax != NULL) && 5625: (ctxt->sax->getEntity != NULL)) 5626: cur = ctxt->sax->getEntity(ctxt->userData, name); 5627: if ((cur == NULL) && (ctxt->userData==ctxt)) { 5628: cur = xmlSAX2GetEntity(ctxt, name); 5629: } 5630: } 5631: if (cur != NULL) { 5632: if (cur->orig != NULL) 5633: xmlFree(orig); 5634: else 5635: cur->orig = orig; 5636: } else 5637: xmlFree(orig); 5638: } 5639: if (value != NULL) xmlFree(value); 5640: if (URI != NULL) xmlFree(URI); 5641: if (literal != NULL) xmlFree(literal); 5642: } 5643: } 5644: 5645: /** 5646: * xmlParseDefaultDecl: 5647: * @ctxt: an XML parser context 5648: * @value: Receive a possible fixed default value for the attribute 5649: * 5650: * Parse an attribute default declaration 5651: * 5652: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5653: * 5654: * [ VC: Required Attribute ] 5655: * if the default declaration is the keyword #REQUIRED, then the 5656: * attribute must be specified for all elements of the type in the 5657: * attribute-list declaration. 5658: * 5659: * [ VC: Attribute Default Legal ] 5660: * The declared default value must meet the lexical constraints of 5661: * the declared attribute type c.f. xmlValidateAttributeDecl() 5662: * 5663: * [ VC: Fixed Attribute Default ] 5664: * if an attribute has a default value declared with the #FIXED 5665: * keyword, instances of that attribute must match the default value. 5666: * 5667: * [ WFC: No < in Attribute Values ] 5668: * handled in xmlParseAttValue() 5669: * 5670: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5671: * or XML_ATTRIBUTE_FIXED. 5672: */ 5673: 5674: int 5675: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5676: int val; 5677: xmlChar *ret; 5678: 5679: *value = NULL; 5680: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5681: SKIP(9); 5682: return(XML_ATTRIBUTE_REQUIRED); 5683: } 5684: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5685: SKIP(8); 5686: return(XML_ATTRIBUTE_IMPLIED); 5687: } 5688: val = XML_ATTRIBUTE_NONE; 5689: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5690: SKIP(6); 5691: val = XML_ATTRIBUTE_FIXED; 5692: if (!IS_BLANK_CH(CUR)) { 5693: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5694: "Space required after '#FIXED'\n"); 5695: } 5696: SKIP_BLANKS; 5697: } 5698: ret = xmlParseAttValue(ctxt); 5699: ctxt->instate = XML_PARSER_DTD; 5700: if (ret == NULL) { 5701: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5702: "Attribute default value declaration error\n"); 5703: } else 5704: *value = ret; 5705: return(val); 5706: } 5707: 5708: /** 5709: * xmlParseNotationType: 5710: * @ctxt: an XML parser context 5711: * 5712: * parse an Notation attribute type. 5713: * 5714: * Note: the leading 'NOTATION' S part has already being parsed... 5715: * 5716: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5717: * 5718: * [ VC: Notation Attributes ] 5719: * Values of this type must match one of the notation names included 5720: * in the declaration; all notation names in the declaration must be declared. 5721: * 5722: * Returns: the notation attribute tree built while parsing 5723: */ 5724: 5725: xmlEnumerationPtr 5726: xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5727: const xmlChar *name; 5728: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5729: 5730: if (RAW != '(') { 5731: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5732: return(NULL); 5733: } 5734: SHRINK; 5735: do { 5736: NEXT; 5737: SKIP_BLANKS; 5738: name = xmlParseName(ctxt); 5739: if (name == NULL) { 5740: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5741: "Name expected in NOTATION declaration\n"); 5742: xmlFreeEnumeration(ret); 5743: return(NULL); 5744: } 5745: tmp = ret; 5746: while (tmp != NULL) { 5747: if (xmlStrEqual(name, tmp->name)) { 5748: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5749: "standalone: attribute notation value token %s duplicated\n", 5750: name, NULL); 5751: if (!xmlDictOwns(ctxt->dict, name)) 5752: xmlFree((xmlChar *) name); 5753: break; 5754: } 5755: tmp = tmp->next; 5756: } 5757: if (tmp == NULL) { 5758: cur = xmlCreateEnumeration(name); 5759: if (cur == NULL) { 5760: xmlFreeEnumeration(ret); 5761: return(NULL); 5762: } 5763: if (last == NULL) ret = last = cur; 5764: else { 5765: last->next = cur; 5766: last = cur; 5767: } 5768: } 5769: SKIP_BLANKS; 5770: } while (RAW == '|'); 5771: if (RAW != ')') { 5772: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5773: xmlFreeEnumeration(ret); 5774: return(NULL); 5775: } 5776: NEXT; 5777: return(ret); 5778: } 5779: 5780: /** 5781: * xmlParseEnumerationType: 5782: * @ctxt: an XML parser context 5783: * 5784: * parse an Enumeration attribute type. 5785: * 5786: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5787: * 5788: * [ VC: Enumeration ] 5789: * Values of this type must match one of the Nmtoken tokens in 5790: * the declaration 5791: * 5792: * Returns: the enumeration attribute tree built while parsing 5793: */ 5794: 5795: xmlEnumerationPtr 5796: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5797: xmlChar *name; 5798: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5799: 5800: if (RAW != '(') { 5801: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5802: return(NULL); 5803: } 5804: SHRINK; 5805: do { 5806: NEXT; 5807: SKIP_BLANKS; 5808: name = xmlParseNmtoken(ctxt); 5809: if (name == NULL) { 5810: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5811: return(ret); 5812: } 5813: tmp = ret; 5814: while (tmp != NULL) { 5815: if (xmlStrEqual(name, tmp->name)) { 5816: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5817: "standalone: attribute enumeration value token %s duplicated\n", 5818: name, NULL); 5819: if (!xmlDictOwns(ctxt->dict, name)) 5820: xmlFree(name); 5821: break; 5822: } 5823: tmp = tmp->next; 5824: } 5825: if (tmp == NULL) { 5826: cur = xmlCreateEnumeration(name); 5827: if (!xmlDictOwns(ctxt->dict, name)) 5828: xmlFree(name); 5829: if (cur == NULL) { 5830: xmlFreeEnumeration(ret); 5831: return(NULL); 5832: } 5833: if (last == NULL) ret = last = cur; 5834: else { 5835: last->next = cur; 5836: last = cur; 5837: } 5838: } 5839: SKIP_BLANKS; 5840: } while (RAW == '|'); 5841: if (RAW != ')') { 5842: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5843: return(ret); 5844: } 5845: NEXT; 5846: return(ret); 5847: } 5848: 5849: /** 5850: * xmlParseEnumeratedType: 5851: * @ctxt: an XML parser context 5852: * @tree: the enumeration tree built while parsing 5853: * 5854: * parse an Enumerated attribute type. 5855: * 5856: * [57] EnumeratedType ::= NotationType | Enumeration 5857: * 5858: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5859: * 5860: * 5861: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5862: */ 5863: 5864: int 5865: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5866: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5867: SKIP(8); 5868: if (!IS_BLANK_CH(CUR)) { 5869: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5870: "Space required after 'NOTATION'\n"); 5871: return(0); 5872: } 5873: SKIP_BLANKS; 5874: *tree = xmlParseNotationType(ctxt); 5875: if (*tree == NULL) return(0); 5876: return(XML_ATTRIBUTE_NOTATION); 5877: } 5878: *tree = xmlParseEnumerationType(ctxt); 5879: if (*tree == NULL) return(0); 5880: return(XML_ATTRIBUTE_ENUMERATION); 5881: } 5882: 5883: /** 5884: * xmlParseAttributeType: 5885: * @ctxt: an XML parser context 5886: * @tree: the enumeration tree built while parsing 5887: * 5888: * parse the Attribute list def for an element 5889: * 5890: * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5891: * 5892: * [55] StringType ::= 'CDATA' 5893: * 5894: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5895: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5896: * 5897: * Validity constraints for attribute values syntax are checked in 5898: * xmlValidateAttributeValue() 5899: * 5900: * [ VC: ID ] 5901: * Values of type ID must match the Name production. A name must not 5902: * appear more than once in an XML document as a value of this type; 5903: * i.e., ID values must uniquely identify the elements which bear them. 5904: * 5905: * [ VC: One ID per Element Type ] 5906: * No element type may have more than one ID attribute specified. 5907: * 5908: * [ VC: ID Attribute Default ] 5909: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5910: * 5911: * [ VC: IDREF ] 5912: * Values of type IDREF must match the Name production, and values 5913: * of type IDREFS must match Names; each IDREF Name must match the value 5914: * of an ID attribute on some element in the XML document; i.e. IDREF 5915: * values must match the value of some ID attribute. 5916: * 5917: * [ VC: Entity Name ] 5918: * Values of type ENTITY must match the Name production, values 5919: * of type ENTITIES must match Names; each Entity Name must match the 5920: * name of an unparsed entity declared in the DTD. 5921: * 5922: * [ VC: Name Token ] 5923: * Values of type NMTOKEN must match the Nmtoken production; values 5924: * of type NMTOKENS must match Nmtokens. 5925: * 5926: * Returns the attribute type 5927: */ 5928: int 5929: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5930: SHRINK; 5931: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5932: SKIP(5); 5933: return(XML_ATTRIBUTE_CDATA); 5934: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5935: SKIP(6); 5936: return(XML_ATTRIBUTE_IDREFS); 5937: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5938: SKIP(5); 5939: return(XML_ATTRIBUTE_IDREF); 5940: } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5941: SKIP(2); 5942: return(XML_ATTRIBUTE_ID); 5943: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5944: SKIP(6); 5945: return(XML_ATTRIBUTE_ENTITY); 5946: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5947: SKIP(8); 5948: return(XML_ATTRIBUTE_ENTITIES); 5949: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5950: SKIP(8); 5951: return(XML_ATTRIBUTE_NMTOKENS); 5952: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5953: SKIP(7); 5954: return(XML_ATTRIBUTE_NMTOKEN); 5955: } 5956: return(xmlParseEnumeratedType(ctxt, tree)); 5957: } 5958: 5959: /** 5960: * xmlParseAttributeListDecl: 5961: * @ctxt: an XML parser context 5962: * 5963: * : parse the Attribute list def for an element 5964: * 5965: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5966: * 5967: * [53] AttDef ::= S Name S AttType S DefaultDecl 5968: * 5969: */ 5970: void 5971: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5972: const xmlChar *elemName; 5973: const xmlChar *attrName; 5974: xmlEnumerationPtr tree; 5975: 5976: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5977: xmlParserInputPtr input = ctxt->input; 5978: 5979: SKIP(9); 5980: if (!IS_BLANK_CH(CUR)) { 5981: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5982: "Space required after '<!ATTLIST'\n"); 5983: } 5984: SKIP_BLANKS; 5985: elemName = xmlParseName(ctxt); 5986: if (elemName == NULL) { 5987: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5988: "ATTLIST: no name for Element\n"); 5989: return; 5990: } 5991: SKIP_BLANKS; 5992: GROW; 5993: while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 5994: const xmlChar *check = CUR_PTR; 5995: int type; 5996: int def; 5997: xmlChar *defaultValue = NULL; 5998: 5999: GROW; 6000: tree = NULL; 6001: attrName = xmlParseName(ctxt); 6002: if (attrName == NULL) { 6003: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6004: "ATTLIST: no name for Attribute\n"); 6005: break; 6006: } 6007: GROW; 6008: if (!IS_BLANK_CH(CUR)) { 6009: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6010: "Space required after the attribute name\n"); 6011: break; 6012: } 6013: SKIP_BLANKS; 6014: 6015: type = xmlParseAttributeType(ctxt, &tree); 6016: if (type <= 0) { 6017: break; 6018: } 6019: 6020: GROW; 6021: if (!IS_BLANK_CH(CUR)) { 6022: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6023: "Space required after the attribute type\n"); 6024: if (tree != NULL) 6025: xmlFreeEnumeration(tree); 6026: break; 6027: } 6028: SKIP_BLANKS; 6029: 6030: def = xmlParseDefaultDecl(ctxt, &defaultValue); 6031: if (def <= 0) { 6032: if (defaultValue != NULL) 6033: xmlFree(defaultValue); 6034: if (tree != NULL) 6035: xmlFreeEnumeration(tree); 6036: break; 6037: } 6038: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6039: xmlAttrNormalizeSpace(defaultValue, defaultValue); 6040: 6041: GROW; 6042: if (RAW != '>') { 6043: if (!IS_BLANK_CH(CUR)) { 6044: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6045: "Space required after the attribute default value\n"); 6046: if (defaultValue != NULL) 6047: xmlFree(defaultValue); 6048: if (tree != NULL) 6049: xmlFreeEnumeration(tree); 6050: break; 6051: } 6052: SKIP_BLANKS; 6053: } 6054: if (check == CUR_PTR) { 6055: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6056: "in xmlParseAttributeListDecl\n"); 6057: if (defaultValue != NULL) 6058: xmlFree(defaultValue); 6059: if (tree != NULL) 6060: xmlFreeEnumeration(tree); 6061: break; 6062: } 6063: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6064: (ctxt->sax->attributeDecl != NULL)) 6065: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6066: type, def, defaultValue, tree); 6067: else if (tree != NULL) 6068: xmlFreeEnumeration(tree); 6069: 6070: if ((ctxt->sax2) && (defaultValue != NULL) && 6071: (def != XML_ATTRIBUTE_IMPLIED) && 6072: (def != XML_ATTRIBUTE_REQUIRED)) { 6073: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6074: } 6075: if (ctxt->sax2) { 6076: xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6077: } 6078: if (defaultValue != NULL) 6079: xmlFree(defaultValue); 6080: GROW; 6081: } 6082: if (RAW == '>') { 6083: if (input != ctxt->input) { 6084: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6085: "Attribute list declaration doesn't start and stop in the same entity\n", 6086: NULL, NULL); 6087: } 6088: NEXT; 6089: } 6090: } 6091: } 6092: 6093: /** 6094: * xmlParseElementMixedContentDecl: 6095: * @ctxt: an XML parser context 6096: * @inputchk: the input used for the current entity, needed for boundary checks 6097: * 6098: * parse the declaration for a Mixed Element content 6099: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6100: * 6101: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6102: * '(' S? '#PCDATA' S? ')' 6103: * 6104: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6105: * 6106: * [ VC: No Duplicate Types ] 6107: * The same name must not appear more than once in a single 6108: * mixed-content declaration. 6109: * 6110: * returns: the list of the xmlElementContentPtr describing the element choices 6111: */ 6112: xmlElementContentPtr 6113: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6114: xmlElementContentPtr ret = NULL, cur = NULL, n; 6115: const xmlChar *elem = NULL; 6116: 6117: GROW; 6118: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6119: SKIP(7); 6120: SKIP_BLANKS; 6121: SHRINK; 6122: if (RAW == ')') { 6123: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6124: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6125: "Element content declaration doesn't start and stop in the same entity\n", 6126: NULL, NULL); 6127: } 6128: NEXT; 6129: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6130: if (ret == NULL) 6131: return(NULL); 6132: if (RAW == '*') { 6133: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6134: NEXT; 6135: } 6136: return(ret); 6137: } 6138: if ((RAW == '(') || (RAW == '|')) { 6139: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6140: if (ret == NULL) return(NULL); 6141: } 6142: while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6143: NEXT; 6144: if (elem == NULL) { 6145: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6146: if (ret == NULL) return(NULL); 6147: ret->c1 = cur; 6148: if (cur != NULL) 6149: cur->parent = ret; 6150: cur = ret; 6151: } else { 6152: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6153: if (n == NULL) return(NULL); 6154: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6155: if (n->c1 != NULL) 6156: n->c1->parent = n; 6157: cur->c2 = n; 6158: if (n != NULL) 6159: n->parent = cur; 6160: cur = n; 6161: } 6162: SKIP_BLANKS; 6163: elem = xmlParseName(ctxt); 6164: if (elem == NULL) { 6165: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6166: "xmlParseElementMixedContentDecl : Name expected\n"); 6167: xmlFreeDocElementContent(ctxt->myDoc, cur); 6168: return(NULL); 6169: } 6170: SKIP_BLANKS; 6171: GROW; 6172: } 6173: if ((RAW == ')') && (NXT(1) == '*')) { 6174: if (elem != NULL) { 6175: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6176: XML_ELEMENT_CONTENT_ELEMENT); 6177: if (cur->c2 != NULL) 6178: cur->c2->parent = cur; 6179: } 6180: if (ret != NULL) 6181: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6182: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6183: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6184: "Element content declaration doesn't start and stop in the same entity\n", 6185: NULL, NULL); 6186: } 6187: SKIP(2); 6188: } else { 6189: xmlFreeDocElementContent(ctxt->myDoc, ret); 6190: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6191: return(NULL); 6192: } 6193: 6194: } else { 6195: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6196: } 6197: return(ret); 6198: } 6199: 6200: /** 6201: * xmlParseElementChildrenContentDeclPriv: 6202: * @ctxt: an XML parser context 6203: * @inputchk: the input used for the current entity, needed for boundary checks 6204: * @depth: the level of recursion 6205: * 6206: * parse the declaration for a Mixed Element content 6207: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6208: * 6209: * 6210: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6211: * 6212: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6213: * 6214: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6215: * 6216: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6217: * 6218: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6219: * TODO Parameter-entity replacement text must be properly nested 6220: * with parenthesized groups. That is to say, if either of the 6221: * opening or closing parentheses in a choice, seq, or Mixed 6222: * construct is contained in the replacement text for a parameter 6223: * entity, both must be contained in the same replacement text. For 6224: * interoperability, if a parameter-entity reference appears in a 6225: * choice, seq, or Mixed construct, its replacement text should not 6226: * be empty, and neither the first nor last non-blank character of 6227: * the replacement text should be a connector (| or ,). 6228: * 6229: * Returns the tree of xmlElementContentPtr describing the element 6230: * hierarchy. 6231: */ 6232: static xmlElementContentPtr 6233: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6234: int depth) { 6235: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6236: const xmlChar *elem; 6237: xmlChar type = 0; 6238: 6239: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6240: (depth > 2048)) { 6241: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6242: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6243: depth); 6244: return(NULL); 6245: } 6246: SKIP_BLANKS; 6247: GROW; 6248: if (RAW == '(') { 6249: int inputid = ctxt->input->id; 6250: 6251: /* Recurse on first child */ 6252: NEXT; 6253: SKIP_BLANKS; 6254: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6255: depth + 1); 6256: SKIP_BLANKS; 6257: GROW; 6258: } else { 6259: elem = xmlParseName(ctxt); 6260: if (elem == NULL) { 6261: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6262: return(NULL); 6263: } 6264: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6265: if (cur == NULL) { 6266: xmlErrMemory(ctxt, NULL); 6267: return(NULL); 6268: } 6269: GROW; 6270: if (RAW == '?') { 6271: cur->ocur = XML_ELEMENT_CONTENT_OPT; 6272: NEXT; 6273: } else if (RAW == '*') { 6274: cur->ocur = XML_ELEMENT_CONTENT_MULT; 6275: NEXT; 6276: } else if (RAW == '+') { 6277: cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6278: NEXT; 6279: } else { 6280: cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6281: } 6282: GROW; 6283: } 6284: SKIP_BLANKS; 6285: SHRINK; 6286: while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6287: /* 6288: * Each loop we parse one separator and one element. 6289: */ 6290: if (RAW == ',') { 6291: if (type == 0) type = CUR; 6292: 6293: /* 6294: * Detect "Name | Name , Name" error 6295: */ 6296: else if (type != CUR) { 6297: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6298: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6299: type); 6300: if ((last != NULL) && (last != ret)) 6301: xmlFreeDocElementContent(ctxt->myDoc, last); 6302: if (ret != NULL) 6303: xmlFreeDocElementContent(ctxt->myDoc, ret); 6304: return(NULL); 6305: } 6306: NEXT; 6307: 6308: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6309: if (op == NULL) { 6310: if ((last != NULL) && (last != ret)) 6311: xmlFreeDocElementContent(ctxt->myDoc, last); 6312: xmlFreeDocElementContent(ctxt->myDoc, ret); 6313: return(NULL); 6314: } 6315: if (last == NULL) { 6316: op->c1 = ret; 6317: if (ret != NULL) 6318: ret->parent = op; 6319: ret = cur = op; 6320: } else { 6321: cur->c2 = op; 6322: if (op != NULL) 6323: op->parent = cur; 6324: op->c1 = last; 6325: if (last != NULL) 6326: last->parent = op; 6327: cur =op; 6328: last = NULL; 6329: } 6330: } else if (RAW == '|') { 6331: if (type == 0) type = CUR; 6332: 6333: /* 6334: * Detect "Name , Name | Name" error 6335: */ 6336: else if (type != CUR) { 6337: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6338: "xmlParseElementChildrenContentDecl : '%c' expected\n", 6339: type); 6340: if ((last != NULL) && (last != ret)) 6341: xmlFreeDocElementContent(ctxt->myDoc, last); 6342: if (ret != NULL) 6343: xmlFreeDocElementContent(ctxt->myDoc, ret); 6344: return(NULL); 6345: } 6346: NEXT; 6347: 6348: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6349: if (op == NULL) { 6350: if ((last != NULL) && (last != ret)) 6351: xmlFreeDocElementContent(ctxt->myDoc, last); 6352: if (ret != NULL) 6353: xmlFreeDocElementContent(ctxt->myDoc, ret); 6354: return(NULL); 6355: } 6356: if (last == NULL) { 6357: op->c1 = ret; 6358: if (ret != NULL) 6359: ret->parent = op; 6360: ret = cur = op; 6361: } else { 6362: cur->c2 = op; 6363: if (op != NULL) 6364: op->parent = cur; 6365: op->c1 = last; 6366: if (last != NULL) 6367: last->parent = op; 6368: cur =op; 6369: last = NULL; 6370: } 6371: } else { 6372: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6373: if ((last != NULL) && (last != ret)) 6374: xmlFreeDocElementContent(ctxt->myDoc, last); 6375: if (ret != NULL) 6376: xmlFreeDocElementContent(ctxt->myDoc, ret); 6377: return(NULL); 6378: } 6379: GROW; 6380: SKIP_BLANKS; 6381: GROW; 6382: if (RAW == '(') { 6383: int inputid = ctxt->input->id; 6384: /* Recurse on second child */ 6385: NEXT; 6386: SKIP_BLANKS; 6387: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6388: depth + 1); 6389: SKIP_BLANKS; 6390: } else { 6391: elem = xmlParseName(ctxt); 6392: if (elem == NULL) { 6393: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6394: if (ret != NULL) 6395: xmlFreeDocElementContent(ctxt->myDoc, ret); 6396: return(NULL); 6397: } 6398: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6399: if (last == NULL) { 6400: if (ret != NULL) 6401: xmlFreeDocElementContent(ctxt->myDoc, ret); 6402: return(NULL); 6403: } 6404: if (RAW == '?') { 6405: last->ocur = XML_ELEMENT_CONTENT_OPT; 6406: NEXT; 6407: } else if (RAW == '*') { 6408: last->ocur = XML_ELEMENT_CONTENT_MULT; 6409: NEXT; 6410: } else if (RAW == '+') { 6411: last->ocur = XML_ELEMENT_CONTENT_PLUS; 6412: NEXT; 6413: } else { 6414: last->ocur = XML_ELEMENT_CONTENT_ONCE; 6415: } 6416: } 6417: SKIP_BLANKS; 6418: GROW; 6419: } 6420: if ((cur != NULL) && (last != NULL)) { 6421: cur->c2 = last; 6422: if (last != NULL) 6423: last->parent = cur; 6424: } 6425: if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6426: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6427: "Element content declaration doesn't start and stop in the same entity\n", 6428: NULL, NULL); 6429: } 6430: NEXT; 6431: if (RAW == '?') { 6432: if (ret != NULL) { 6433: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6434: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6435: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6436: else 6437: ret->ocur = XML_ELEMENT_CONTENT_OPT; 6438: } 6439: NEXT; 6440: } else if (RAW == '*') { 6441: if (ret != NULL) { 6442: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6443: cur = ret; 6444: /* 6445: * Some normalization: 6446: * (a | b* | c?)* == (a | b | c)* 6447: */ 6448: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6449: if ((cur->c1 != NULL) && 6450: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6451: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6452: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6453: if ((cur->c2 != NULL) && 6454: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6455: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6456: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6457: cur = cur->c2; 6458: } 6459: } 6460: NEXT; 6461: } else if (RAW == '+') { 6462: if (ret != NULL) { 6463: int found = 0; 6464: 6465: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6466: (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6467: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6468: else 6469: ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6470: /* 6471: * Some normalization: 6472: * (a | b*)+ == (a | b)* 6473: * (a | b?)+ == (a | b)* 6474: */ 6475: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6476: if ((cur->c1 != NULL) && 6477: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6478: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6479: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6480: found = 1; 6481: } 6482: if ((cur->c2 != NULL) && 6483: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6484: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6485: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6486: found = 1; 6487: } 6488: cur = cur->c2; 6489: } 6490: if (found) 6491: ret->ocur = XML_ELEMENT_CONTENT_MULT; 6492: } 6493: NEXT; 6494: } 6495: return(ret); 6496: } 6497: 6498: /** 6499: * xmlParseElementChildrenContentDecl: 6500: * @ctxt: an XML parser context 6501: * @inputchk: the input used for the current entity, needed for boundary checks 6502: * 6503: * parse the declaration for a Mixed Element content 6504: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6505: * 6506: * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6507: * 6508: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6509: * 6510: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6511: * 6512: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6513: * 6514: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6515: * TODO Parameter-entity replacement text must be properly nested 6516: * with parenthesized groups. That is to say, if either of the 6517: * opening or closing parentheses in a choice, seq, or Mixed 6518: * construct is contained in the replacement text for a parameter 6519: * entity, both must be contained in the same replacement text. For 6520: * interoperability, if a parameter-entity reference appears in a 6521: * choice, seq, or Mixed construct, its replacement text should not 6522: * be empty, and neither the first nor last non-blank character of 6523: * the replacement text should be a connector (| or ,). 6524: * 6525: * Returns the tree of xmlElementContentPtr describing the element 6526: * hierarchy. 6527: */ 6528: xmlElementContentPtr 6529: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6530: /* stub left for API/ABI compat */ 6531: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6532: } 6533: 6534: /** 6535: * xmlParseElementContentDecl: 6536: * @ctxt: an XML parser context 6537: * @name: the name of the element being defined. 6538: * @result: the Element Content pointer will be stored here if any 6539: * 6540: * parse the declaration for an Element content either Mixed or Children, 6541: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6542: * 6543: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6544: * 6545: * returns: the type of element content XML_ELEMENT_TYPE_xxx 6546: */ 6547: 6548: int 6549: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6550: xmlElementContentPtr *result) { 6551: 6552: xmlElementContentPtr tree = NULL; 6553: int inputid = ctxt->input->id; 6554: int res; 6555: 6556: *result = NULL; 6557: 6558: if (RAW != '(') { 6559: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6560: "xmlParseElementContentDecl : %s '(' expected\n", name); 6561: return(-1); 6562: } 6563: NEXT; 6564: GROW; 6565: if (ctxt->instate == XML_PARSER_EOF) 6566: return(-1); 6567: SKIP_BLANKS; 6568: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6569: tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6570: res = XML_ELEMENT_TYPE_MIXED; 6571: } else { 6572: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6573: res = XML_ELEMENT_TYPE_ELEMENT; 6574: } 6575: SKIP_BLANKS; 6576: *result = tree; 6577: return(res); 6578: } 6579: 6580: /** 6581: * xmlParseElementDecl: 6582: * @ctxt: an XML parser context 6583: * 6584: * parse an Element declaration. 6585: * 6586: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6587: * 6588: * [ VC: Unique Element Type Declaration ] 6589: * No element type may be declared more than once 6590: * 6591: * Returns the type of the element, or -1 in case of error 6592: */ 6593: int 6594: xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6595: const xmlChar *name; 6596: int ret = -1; 6597: xmlElementContentPtr content = NULL; 6598: 6599: /* GROW; done in the caller */ 6600: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6601: xmlParserInputPtr input = ctxt->input; 6602: 6603: SKIP(9); 6604: if (!IS_BLANK_CH(CUR)) { 6605: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6606: "Space required after 'ELEMENT'\n"); 6607: } 6608: SKIP_BLANKS; 6609: name = xmlParseName(ctxt); 6610: if (name == NULL) { 6611: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6612: "xmlParseElementDecl: no name for Element\n"); 6613: return(-1); 6614: } 6615: while ((RAW == 0) && (ctxt->inputNr > 1)) 6616: xmlPopInput(ctxt); 6617: if (!IS_BLANK_CH(CUR)) { 6618: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6619: "Space required after the element name\n"); 6620: } 6621: SKIP_BLANKS; 6622: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6623: SKIP(5); 6624: /* 6625: * Element must always be empty. 6626: */ 6627: ret = XML_ELEMENT_TYPE_EMPTY; 6628: } else if ((RAW == 'A') && (NXT(1) == 'N') && 6629: (NXT(2) == 'Y')) { 6630: SKIP(3); 6631: /* 6632: * Element is a generic container. 6633: */ 6634: ret = XML_ELEMENT_TYPE_ANY; 6635: } else if (RAW == '(') { 6636: ret = xmlParseElementContentDecl(ctxt, name, &content); 6637: } else { 6638: /* 6639: * [ WFC: PEs in Internal Subset ] error handling. 6640: */ 6641: if ((RAW == '%') && (ctxt->external == 0) && 6642: (ctxt->inputNr == 1)) { 6643: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6644: "PEReference: forbidden within markup decl in internal subset\n"); 6645: } else { 6646: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6647: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6648: } 6649: return(-1); 6650: } 6651: 6652: SKIP_BLANKS; 6653: /* 6654: * Pop-up of finished entities. 6655: */ 6656: while ((RAW == 0) && (ctxt->inputNr > 1)) 6657: xmlPopInput(ctxt); 6658: SKIP_BLANKS; 6659: 6660: if (RAW != '>') { 6661: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6662: if (content != NULL) { 6663: xmlFreeDocElementContent(ctxt->myDoc, content); 6664: } 6665: } else { 6666: if (input != ctxt->input) { 6667: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6668: "Element declaration doesn't start and stop in the same entity\n"); 6669: } 6670: 6671: NEXT; 6672: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6673: (ctxt->sax->elementDecl != NULL)) { 6674: if (content != NULL) 6675: content->parent = NULL; 6676: ctxt->sax->elementDecl(ctxt->userData, name, ret, 6677: content); 6678: if ((content != NULL) && (content->parent == NULL)) { 6679: /* 6680: * this is a trick: if xmlAddElementDecl is called, 6681: * instead of copying the full tree it is plugged directly 6682: * if called from the parser. Avoid duplicating the 6683: * interfaces or change the API/ABI 6684: */ 6685: xmlFreeDocElementContent(ctxt->myDoc, content); 6686: } 6687: } else if (content != NULL) { 6688: xmlFreeDocElementContent(ctxt->myDoc, content); 6689: } 6690: } 6691: } 6692: return(ret); 6693: } 6694: 6695: /** 6696: * xmlParseConditionalSections 6697: * @ctxt: an XML parser context 6698: * 6699: * [61] conditionalSect ::= includeSect | ignoreSect 6700: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6701: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6702: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6703: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6704: */ 6705: 6706: static void 6707: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6708: int id = ctxt->input->id; 6709: 6710: SKIP(3); 6711: SKIP_BLANKS; 6712: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6713: SKIP(7); 6714: SKIP_BLANKS; 6715: if (RAW != '[') { 6716: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6717: } else { 6718: if (ctxt->input->id != id) { 6719: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6720: "All markup of the conditional section is not in the same entity\n", 6721: NULL, NULL); 6722: } 6723: NEXT; 6724: } 6725: if (xmlParserDebugEntities) { 6726: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6727: xmlGenericError(xmlGenericErrorContext, 6728: "%s(%d): ", ctxt->input->filename, 6729: ctxt->input->line); 6730: xmlGenericError(xmlGenericErrorContext, 6731: "Entering INCLUDE Conditional Section\n"); 6732: } 6733: 6734: while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6735: (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6736: const xmlChar *check = CUR_PTR; 6737: unsigned int cons = ctxt->input->consumed; 6738: 6739: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6740: xmlParseConditionalSections(ctxt); 6741: } else if (IS_BLANK_CH(CUR)) { 6742: NEXT; 6743: } else if (RAW == '%') { 6744: xmlParsePEReference(ctxt); 6745: } else 6746: xmlParseMarkupDecl(ctxt); 6747: 6748: /* 6749: * Pop-up of finished entities. 6750: */ 6751: while ((RAW == 0) && (ctxt->inputNr > 1)) 6752: xmlPopInput(ctxt); 6753: 6754: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6755: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6756: break; 6757: } 6758: } 6759: if (xmlParserDebugEntities) { 6760: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6761: xmlGenericError(xmlGenericErrorContext, 6762: "%s(%d): ", ctxt->input->filename, 6763: ctxt->input->line); 6764: xmlGenericError(xmlGenericErrorContext, 6765: "Leaving INCLUDE Conditional Section\n"); 6766: } 6767: 6768: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6769: int state; 6770: xmlParserInputState instate; 6771: int depth = 0; 6772: 6773: SKIP(6); 6774: SKIP_BLANKS; 6775: if (RAW != '[') { 6776: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6777: } else { 6778: if (ctxt->input->id != id) { 6779: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6780: "All markup of the conditional section is not in the same entity\n", 6781: NULL, NULL); 6782: } 6783: NEXT; 6784: } 6785: if (xmlParserDebugEntities) { 6786: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6787: xmlGenericError(xmlGenericErrorContext, 6788: "%s(%d): ", ctxt->input->filename, 6789: ctxt->input->line); 6790: xmlGenericError(xmlGenericErrorContext, 6791: "Entering IGNORE Conditional Section\n"); 6792: } 6793: 6794: /* 6795: * Parse up to the end of the conditional section 6796: * But disable SAX event generating DTD building in the meantime 6797: */ 6798: state = ctxt->disableSAX; 6799: instate = ctxt->instate; 6800: if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6801: ctxt->instate = XML_PARSER_IGNORE; 6802: 6803: while (((depth >= 0) && (RAW != 0)) && 6804: (ctxt->instate != XML_PARSER_EOF)) { 6805: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6806: depth++; 6807: SKIP(3); 6808: continue; 6809: } 6810: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6811: if (--depth >= 0) SKIP(3); 6812: continue; 6813: } 6814: NEXT; 6815: continue; 6816: } 6817: 6818: ctxt->disableSAX = state; 6819: ctxt->instate = instate; 6820: 6821: if (xmlParserDebugEntities) { 6822: if ((ctxt->input != NULL) && (ctxt->input->filename)) 6823: xmlGenericError(xmlGenericErrorContext, 6824: "%s(%d): ", ctxt->input->filename, 6825: ctxt->input->line); 6826: xmlGenericError(xmlGenericErrorContext, 6827: "Leaving IGNORE Conditional Section\n"); 6828: } 6829: 6830: } else { 6831: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6832: } 6833: 6834: if (RAW == 0) 6835: SHRINK; 6836: 6837: if (RAW == 0) { 6838: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6839: } else { 6840: if (ctxt->input->id != id) { 6841: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6842: "All markup of the conditional section is not in the same entity\n", 6843: NULL, NULL); 6844: } 6845: SKIP(3); 6846: } 6847: } 6848: 6849: /** 6850: * xmlParseMarkupDecl: 6851: * @ctxt: an XML parser context 6852: * 6853: * parse Markup declarations 6854: * 6855: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6856: * NotationDecl | PI | Comment 6857: * 6858: * [ VC: Proper Declaration/PE Nesting ] 6859: * Parameter-entity replacement text must be properly nested with 6860: * markup declarations. That is to say, if either the first character 6861: * or the last character of a markup declaration (markupdecl above) is 6862: * contained in the replacement text for a parameter-entity reference, 6863: * both must be contained in the same replacement text. 6864: * 6865: * [ WFC: PEs in Internal Subset ] 6866: * In the internal DTD subset, parameter-entity references can occur 6867: * only where markup declarations can occur, not within markup declarations. 6868: * (This does not apply to references that occur in external parameter 6869: * entities or to the external subset.) 6870: */ 6871: void 6872: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6873: GROW; 6874: if (CUR == '<') { 6875: if (NXT(1) == '!') { 6876: switch (NXT(2)) { 6877: case 'E': 6878: if (NXT(3) == 'L') 6879: xmlParseElementDecl(ctxt); 6880: else if (NXT(3) == 'N') 6881: xmlParseEntityDecl(ctxt); 6882: break; 6883: case 'A': 6884: xmlParseAttributeListDecl(ctxt); 6885: break; 6886: case 'N': 6887: xmlParseNotationDecl(ctxt); 6888: break; 6889: case '-': 6890: xmlParseComment(ctxt); 6891: break; 6892: default: 6893: /* there is an error but it will be detected later */ 6894: break; 6895: } 6896: } else if (NXT(1) == '?') { 6897: xmlParsePI(ctxt); 6898: } 6899: } 6900: /* 6901: * This is only for internal subset. On external entities, 6902: * the replacement is done before parsing stage 6903: */ 6904: if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6905: xmlParsePEReference(ctxt); 6906: 6907: /* 6908: * Conditional sections are allowed from entities included 6909: * by PE References in the internal subset. 6910: */ 6911: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6912: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6913: xmlParseConditionalSections(ctxt); 6914: } 6915: } 6916: 6917: ctxt->instate = XML_PARSER_DTD; 6918: } 6919: 6920: /** 6921: * xmlParseTextDecl: 6922: * @ctxt: an XML parser context 6923: * 6924: * parse an XML declaration header for external entities 6925: * 6926: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6927: */ 6928: 6929: void 6930: xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6931: xmlChar *version; 6932: const xmlChar *encoding; 6933: 6934: /* 6935: * We know that '<?xml' is here. 6936: */ 6937: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6938: SKIP(5); 6939: } else { 6940: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6941: return; 6942: } 6943: 6944: if (!IS_BLANK_CH(CUR)) { 6945: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6946: "Space needed after '<?xml'\n"); 6947: } 6948: SKIP_BLANKS; 6949: 6950: /* 6951: * We may have the VersionInfo here. 6952: */ 6953: version = xmlParseVersionInfo(ctxt); 6954: if (version == NULL) 6955: version = xmlCharStrdup(XML_DEFAULT_VERSION); 6956: else { 6957: if (!IS_BLANK_CH(CUR)) { 6958: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6959: "Space needed here\n"); 6960: } 6961: } 6962: ctxt->input->version = version; 6963: 6964: /* 6965: * We must have the encoding declaration 6966: */ 6967: encoding = xmlParseEncodingDecl(ctxt); 6968: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6969: /* 6970: * The XML REC instructs us to stop parsing right here 6971: */ 6972: return; 6973: } 6974: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6975: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6976: "Missing encoding in text declaration\n"); 6977: } 6978: 6979: SKIP_BLANKS; 6980: if ((RAW == '?') && (NXT(1) == '>')) { 6981: SKIP(2); 6982: } else if (RAW == '>') { 6983: /* Deprecated old WD ... */ 6984: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6985: NEXT; 6986: } else { 6987: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6988: MOVETO_ENDTAG(CUR_PTR); 6989: NEXT; 6990: } 6991: } 6992: 6993: /** 6994: * xmlParseExternalSubset: 6995: * @ctxt: an XML parser context 6996: * @ExternalID: the external identifier 6997: * @SystemID: the system identifier (or URL) 6998: * 6999: * parse Markup declarations from an external subset 7000: * 7001: * [30] extSubset ::= textDecl? extSubsetDecl 7002: * 7003: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7004: */ 7005: void 7006: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7007: const xmlChar *SystemID) { 7008: xmlDetectSAX2(ctxt); 7009: GROW; 7010: 7011: if ((ctxt->encoding == NULL) && 7012: (ctxt->input->end - ctxt->input->cur >= 4)) { 7013: xmlChar start[4]; 7014: xmlCharEncoding enc; 7015: 7016: start[0] = RAW; 7017: start[1] = NXT(1); 7018: start[2] = NXT(2); 7019: start[3] = NXT(3); 7020: enc = xmlDetectCharEncoding(start, 4); 7021: if (enc != XML_CHAR_ENCODING_NONE) 7022: xmlSwitchEncoding(ctxt, enc); 7023: } 7024: 7025: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7026: xmlParseTextDecl(ctxt); 7027: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7028: /* 7029: * The XML REC instructs us to stop parsing right here 7030: */ 7031: ctxt->instate = XML_PARSER_EOF; 7032: return; 7033: } 7034: } 7035: if (ctxt->myDoc == NULL) { 7036: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7037: if (ctxt->myDoc == NULL) { 7038: xmlErrMemory(ctxt, "New Doc failed"); 7039: return; 7040: } 7041: ctxt->myDoc->properties = XML_DOC_INTERNAL; 7042: } 7043: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7044: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7045: 7046: ctxt->instate = XML_PARSER_DTD; 7047: ctxt->external = 1; 7048: while (((RAW == '<') && (NXT(1) == '?')) || 7049: ((RAW == '<') && (NXT(1) == '!')) || 7050: (RAW == '%') || IS_BLANK_CH(CUR)) { 7051: const xmlChar *check = CUR_PTR; 7052: unsigned int cons = ctxt->input->consumed; 7053: 7054: GROW; 7055: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7056: xmlParseConditionalSections(ctxt); 7057: } else if (IS_BLANK_CH(CUR)) { 7058: NEXT; 7059: } else if (RAW == '%') { 7060: xmlParsePEReference(ctxt); 7061: } else 7062: xmlParseMarkupDecl(ctxt); 7063: 7064: /* 7065: * Pop-up of finished entities. 7066: */ 7067: while ((RAW == 0) && (ctxt->inputNr > 1)) 7068: xmlPopInput(ctxt); 7069: 7070: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7071: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7072: break; 7073: } 7074: } 7075: 7076: if (RAW != 0) { 7077: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7078: } 7079: 7080: } 7081: 7082: /** 7083: * xmlParseReference: 7084: * @ctxt: an XML parser context 7085: * 7086: * parse and handle entity references in content, depending on the SAX 7087: * interface, this may end-up in a call to character() if this is a 7088: * CharRef, a predefined entity, if there is no reference() callback. 7089: * or if the parser was asked to switch to that mode. 7090: * 7091: * [67] Reference ::= EntityRef | CharRef 7092: */ 7093: void 7094: xmlParseReference(xmlParserCtxtPtr ctxt) { 7095: xmlEntityPtr ent; 7096: xmlChar *val; 7097: int was_checked; 7098: xmlNodePtr list = NULL; 7099: xmlParserErrors ret = XML_ERR_OK; 7100: 7101: 7102: if (RAW != '&') 7103: return; 7104: 7105: /* 7106: * Simple case of a CharRef 7107: */ 7108: if (NXT(1) == '#') { 7109: int i = 0; 7110: xmlChar out[10]; 7111: int hex = NXT(2); 7112: int value = xmlParseCharRef(ctxt); 7113: 7114: if (value == 0) 7115: return; 7116: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7117: /* 7118: * So we are using non-UTF-8 buffers 7119: * Check that the char fit on 8bits, if not 7120: * generate a CharRef. 7121: */ 7122: if (value <= 0xFF) { 7123: out[0] = value; 7124: out[1] = 0; 7125: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7126: (!ctxt->disableSAX)) 7127: ctxt->sax->characters(ctxt->userData, out, 1); 7128: } else { 7129: if ((hex == 'x') || (hex == 'X')) 7130: snprintf((char *)out, sizeof(out), "#x%X", value); 7131: else 7132: snprintf((char *)out, sizeof(out), "#%d", value); 7133: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7134: (!ctxt->disableSAX)) 7135: ctxt->sax->reference(ctxt->userData, out); 7136: } 7137: } else { 7138: /* 7139: * Just encode the value in UTF-8 7140: */ 7141: COPY_BUF(0 ,out, i, value); 7142: out[i] = 0; 7143: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7144: (!ctxt->disableSAX)) 7145: ctxt->sax->characters(ctxt->userData, out, i); 7146: } 7147: return; 7148: } 7149: 7150: /* 7151: * We are seeing an entity reference 7152: */ 7153: ent = xmlParseEntityRef(ctxt); 7154: if (ent == NULL) return; 7155: if (!ctxt->wellFormed) 7156: return; 7157: was_checked = ent->checked; 7158: 7159: /* special case of predefined entities */ 7160: if ((ent->name == NULL) || 7161: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7162: val = ent->content; 7163: if (val == NULL) return; 7164: /* 7165: * inline the entity. 7166: */ 7167: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7168: (!ctxt->disableSAX)) 7169: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7170: return; 7171: } 7172: 7173: /* 7174: * The first reference to the entity trigger a parsing phase 7175: * where the ent->children is filled with the result from 7176: * the parsing. 7177: * Note: external parsed entities will not be loaded, it is not 7178: * required for a non-validating parser, unless the parsing option 7179: * of validating, or substituting entities were given. Doing so is 7180: * far more secure as the parser will only process data coming from 7181: * the document entity by default. 7182: */ 7183: if ((ent->checked == 0) && 7184: ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7185: (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7186: unsigned long oldnbent = ctxt->nbentities; 7187: 7188: /* 7189: * This is a bit hackish but this seems the best 7190: * way to make sure both SAX and DOM entity support 7191: * behaves okay. 7192: */ 7193: void *user_data; 7194: if (ctxt->userData == ctxt) 7195: user_data = NULL; 7196: else 7197: user_data = ctxt->userData; 7198: 7199: /* 7200: * Check that this entity is well formed 7201: * 4.3.2: An internal general parsed entity is well-formed 7202: * if its replacement text matches the production labeled 7203: * content. 7204: */ 7205: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7206: ctxt->depth++; 7207: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7208: user_data, &list); 7209: ctxt->depth--; 7210: 7211: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7212: ctxt->depth++; 7213: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7214: user_data, ctxt->depth, ent->URI, 7215: ent->ExternalID, &list); 7216: ctxt->depth--; 7217: } else { 7218: ret = XML_ERR_ENTITY_PE_INTERNAL; 7219: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7220: "invalid entity type found\n", NULL); 7221: } 7222: 7223: /* 7224: * Store the number of entities needing parsing for this entity 7225: * content and do checkings 7226: */ 7227: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7228: if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7229: ent->checked |= 1; 7230: if (ret == XML_ERR_ENTITY_LOOP) { 7231: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7232: xmlFreeNodeList(list); 7233: return; 7234: } 7235: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7236: xmlFreeNodeList(list); 7237: return; 7238: } 7239: 7240: if ((ret == XML_ERR_OK) && (list != NULL)) { 7241: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7242: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7243: (ent->children == NULL)) { 7244: ent->children = list; 7245: if (ctxt->replaceEntities) { 7246: /* 7247: * Prune it directly in the generated document 7248: * except for single text nodes. 7249: */ 7250: if (((list->type == XML_TEXT_NODE) && 7251: (list->next == NULL)) || 7252: (ctxt->parseMode == XML_PARSE_READER)) { 7253: list->parent = (xmlNodePtr) ent; 7254: list = NULL; 7255: ent->owner = 1; 7256: } else { 7257: ent->owner = 0; 7258: while (list != NULL) { 7259: list->parent = (xmlNodePtr) ctxt->node; 7260: list->doc = ctxt->myDoc; 7261: if (list->next == NULL) 7262: ent->last = list; 7263: list = list->next; 7264: } 7265: list = ent->children; 7266: #ifdef LIBXML_LEGACY_ENABLED 7267: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7268: xmlAddEntityReference(ent, list, NULL); 7269: #endif /* LIBXML_LEGACY_ENABLED */ 7270: } 7271: } else { 7272: ent->owner = 1; 7273: while (list != NULL) { 7274: list->parent = (xmlNodePtr) ent; 7275: xmlSetTreeDoc(list, ent->doc); 7276: if (list->next == NULL) 7277: ent->last = list; 7278: list = list->next; 7279: } 7280: } 7281: } else { 7282: xmlFreeNodeList(list); 7283: list = NULL; 7284: } 7285: } else if ((ret != XML_ERR_OK) && 7286: (ret != XML_WAR_UNDECLARED_ENTITY)) { 7287: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7288: "Entity '%s' failed to parse\n", ent->name); 7289: } else if (list != NULL) { 7290: xmlFreeNodeList(list); 7291: list = NULL; 7292: } 7293: if (ent->checked == 0) 7294: ent->checked = 2; 7295: } else if (ent->checked != 1) { 7296: ctxt->nbentities += ent->checked / 2; 7297: } 7298: 7299: /* 7300: * Now that the entity content has been gathered 7301: * provide it to the application, this can take different forms based 7302: * on the parsing modes. 7303: */ 7304: if (ent->children == NULL) { 7305: /* 7306: * Probably running in SAX mode and the callbacks don't 7307: * build the entity content. So unless we already went 7308: * though parsing for first checking go though the entity 7309: * content to generate callbacks associated to the entity 7310: */ 7311: if (was_checked != 0) { 7312: void *user_data; 7313: /* 7314: * This is a bit hackish but this seems the best 7315: * way to make sure both SAX and DOM entity support 7316: * behaves okay. 7317: */ 7318: if (ctxt->userData == ctxt) 7319: user_data = NULL; 7320: else 7321: user_data = ctxt->userData; 7322: 7323: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7324: ctxt->depth++; 7325: ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7326: ent->content, user_data, NULL); 7327: ctxt->depth--; 7328: } else if (ent->etype == 7329: XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7330: ctxt->depth++; 7331: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7332: ctxt->sax, user_data, ctxt->depth, 7333: ent->URI, ent->ExternalID, NULL); 7334: ctxt->depth--; 7335: } else { 7336: ret = XML_ERR_ENTITY_PE_INTERNAL; 7337: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7338: "invalid entity type found\n", NULL); 7339: } 7340: if (ret == XML_ERR_ENTITY_LOOP) { 7341: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7342: return; 7343: } 7344: } 7345: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7346: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7347: /* 7348: * Entity reference callback comes second, it's somewhat 7349: * superfluous but a compatibility to historical behaviour 7350: */ 7351: ctxt->sax->reference(ctxt->userData, ent->name); 7352: } 7353: return; 7354: } 7355: 7356: /* 7357: * If we didn't get any children for the entity being built 7358: */ 7359: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7360: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7361: /* 7362: * Create a node. 7363: */ 7364: ctxt->sax->reference(ctxt->userData, ent->name); 7365: return; 7366: } 7367: 7368: if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7369: /* 7370: * There is a problem on the handling of _private for entities 7371: * (bug 155816): Should we copy the content of the field from 7372: * the entity (possibly overwriting some value set by the user 7373: * when a copy is created), should we leave it alone, or should 7374: * we try to take care of different situations? The problem 7375: * is exacerbated by the usage of this field by the xmlReader. 7376: * To fix this bug, we look at _private on the created node 7377: * and, if it's NULL, we copy in whatever was in the entity. 7378: * If it's not NULL we leave it alone. This is somewhat of a 7379: * hack - maybe we should have further tests to determine 7380: * what to do. 7381: */ 7382: if ((ctxt->node != NULL) && (ent->children != NULL)) { 7383: /* 7384: * Seems we are generating the DOM content, do 7385: * a simple tree copy for all references except the first 7386: * In the first occurrence list contains the replacement. 7387: */ 7388: if (((list == NULL) && (ent->owner == 0)) || 7389: (ctxt->parseMode == XML_PARSE_READER)) { 7390: xmlNodePtr nw = NULL, cur, firstChild = NULL; 7391: 7392: /* 7393: * We are copying here, make sure there is no abuse 7394: */ 7395: ctxt->sizeentcopy += ent->length; 7396: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7397: return; 7398: 7399: /* 7400: * when operating on a reader, the entities definitions 7401: * are always owning the entities subtree. 7402: if (ctxt->parseMode == XML_PARSE_READER) 7403: ent->owner = 1; 7404: */ 7405: 7406: cur = ent->children; 7407: while (cur != NULL) { 7408: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7409: if (nw != NULL) { 7410: if (nw->_private == NULL) 7411: nw->_private = cur->_private; 7412: if (firstChild == NULL){ 7413: firstChild = nw; 7414: } 7415: nw = xmlAddChild(ctxt->node, nw); 7416: } 7417: if (cur == ent->last) { 7418: /* 7419: * needed to detect some strange empty 7420: * node cases in the reader tests 7421: */ 7422: if ((ctxt->parseMode == XML_PARSE_READER) && 7423: (nw != NULL) && 7424: (nw->type == XML_ELEMENT_NODE) && 7425: (nw->children == NULL)) 7426: nw->extra = 1; 7427: 7428: break; 7429: } 7430: cur = cur->next; 7431: } 7432: #ifdef LIBXML_LEGACY_ENABLED 7433: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7434: xmlAddEntityReference(ent, firstChild, nw); 7435: #endif /* LIBXML_LEGACY_ENABLED */ 7436: } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7437: xmlNodePtr nw = NULL, cur, next, last, 7438: firstChild = NULL; 7439: 7440: /* 7441: * We are copying here, make sure there is no abuse 7442: */ 7443: ctxt->sizeentcopy += ent->length; 7444: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7445: return; 7446: 7447: /* 7448: * Copy the entity child list and make it the new 7449: * entity child list. The goal is to make sure any 7450: * ID or REF referenced will be the one from the 7451: * document content and not the entity copy. 7452: */ 7453: cur = ent->children; 7454: ent->children = NULL; 7455: last = ent->last; 7456: ent->last = NULL; 7457: while (cur != NULL) { 7458: next = cur->next; 7459: cur->next = NULL; 7460: cur->parent = NULL; 7461: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7462: if (nw != NULL) { 7463: if (nw->_private == NULL) 7464: nw->_private = cur->_private; 7465: if (firstChild == NULL){ 7466: firstChild = cur; 7467: } 7468: xmlAddChild((xmlNodePtr) ent, nw); 7469: xmlAddChild(ctxt->node, cur); 7470: } 7471: if (cur == last) 7472: break; 7473: cur = next; 7474: } 7475: if (ent->owner == 0) 7476: ent->owner = 1; 7477: #ifdef LIBXML_LEGACY_ENABLED 7478: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7479: xmlAddEntityReference(ent, firstChild, nw); 7480: #endif /* LIBXML_LEGACY_ENABLED */ 7481: } else { 7482: const xmlChar *nbktext; 7483: 7484: /* 7485: * the name change is to avoid coalescing of the 7486: * node with a possible previous text one which 7487: * would make ent->children a dangling pointer 7488: */ 7489: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7490: -1); 7491: if (ent->children->type == XML_TEXT_NODE) 7492: ent->children->name = nbktext; 7493: if ((ent->last != ent->children) && 7494: (ent->last->type == XML_TEXT_NODE)) 7495: ent->last->name = nbktext; 7496: xmlAddChildList(ctxt->node, ent->children); 7497: } 7498: 7499: /* 7500: * This is to avoid a nasty side effect, see 7501: * characters() in SAX.c 7502: */ 7503: ctxt->nodemem = 0; 7504: ctxt->nodelen = 0; 7505: return; 7506: } 7507: } 7508: } 7509: 7510: /** 7511: * xmlParseEntityRef: 7512: * @ctxt: an XML parser context 7513: * 7514: * parse ENTITY references declarations 7515: * 7516: * [68] EntityRef ::= '&' Name ';' 7517: * 7518: * [ WFC: Entity Declared ] 7519: * In a document without any DTD, a document with only an internal DTD 7520: * subset which contains no parameter entity references, or a document 7521: * with "standalone='yes'", the Name given in the entity reference 7522: * must match that in an entity declaration, except that well-formed 7523: * documents need not declare any of the following entities: amp, lt, 7524: * gt, apos, quot. The declaration of a parameter entity must precede 7525: * any reference to it. Similarly, the declaration of a general entity 7526: * must precede any reference to it which appears in a default value in an 7527: * attribute-list declaration. Note that if entities are declared in the 7528: * external subset or in external parameter entities, a non-validating 7529: * processor is not obligated to read and process their declarations; 7530: * for such documents, the rule that an entity must be declared is a 7531: * well-formedness constraint only if standalone='yes'. 7532: * 7533: * [ WFC: Parsed Entity ] 7534: * An entity reference must not contain the name of an unparsed entity 7535: * 7536: * Returns the xmlEntityPtr if found, or NULL otherwise. 7537: */ 7538: xmlEntityPtr 7539: xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7540: const xmlChar *name; 7541: xmlEntityPtr ent = NULL; 7542: 7543: GROW; 7544: if (ctxt->instate == XML_PARSER_EOF) 7545: return(NULL); 7546: 7547: if (RAW != '&') 7548: return(NULL); 7549: NEXT; 7550: name = xmlParseName(ctxt); 7551: if (name == NULL) { 7552: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7553: "xmlParseEntityRef: no name\n"); 7554: return(NULL); 7555: } 7556: if (RAW != ';') { 7557: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7558: return(NULL); 7559: } 7560: NEXT; 7561: 7562: /* 7563: * Predefined entities override any extra definition 7564: */ 7565: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7566: ent = xmlGetPredefinedEntity(name); 7567: if (ent != NULL) 7568: return(ent); 7569: } 7570: 7571: /* 7572: * Increase the number of entity references parsed 7573: */ 7574: ctxt->nbentities++; 7575: 7576: /* 7577: * Ask first SAX for entity resolution, otherwise try the 7578: * entities which may have stored in the parser context. 7579: */ 7580: if (ctxt->sax != NULL) { 7581: if (ctxt->sax->getEntity != NULL) 7582: ent = ctxt->sax->getEntity(ctxt->userData, name); 7583: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7584: (ctxt->options & XML_PARSE_OLDSAX)) 7585: ent = xmlGetPredefinedEntity(name); 7586: if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7587: (ctxt->userData==ctxt)) { 7588: ent = xmlSAX2GetEntity(ctxt, name); 7589: } 7590: } 7591: if (ctxt->instate == XML_PARSER_EOF) 7592: return(NULL); 7593: /* 7594: * [ WFC: Entity Declared ] 7595: * In a document without any DTD, a document with only an 7596: * internal DTD subset which contains no parameter entity 7597: * references, or a document with "standalone='yes'", the 7598: * Name given in the entity reference must match that in an 7599: * entity declaration, except that well-formed documents 7600: * need not declare any of the following entities: amp, lt, 7601: * gt, apos, quot. 7602: * The declaration of a parameter entity must precede any 7603: * reference to it. 7604: * Similarly, the declaration of a general entity must 7605: * precede any reference to it which appears in a default 7606: * value in an attribute-list declaration. Note that if 7607: * entities are declared in the external subset or in 7608: * external parameter entities, a non-validating processor 7609: * is not obligated to read and process their declarations; 7610: * for such documents, the rule that an entity must be 7611: * declared is a well-formedness constraint only if 7612: * standalone='yes'. 7613: */ 7614: if (ent == NULL) { 7615: if ((ctxt->standalone == 1) || 7616: ((ctxt->hasExternalSubset == 0) && 7617: (ctxt->hasPErefs == 0))) { 7618: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7619: "Entity '%s' not defined\n", name); 7620: } else { 7621: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7622: "Entity '%s' not defined\n", name); 7623: if ((ctxt->inSubset == 0) && 7624: (ctxt->sax != NULL) && 7625: (ctxt->sax->reference != NULL)) { 7626: ctxt->sax->reference(ctxt->userData, name); 7627: } 7628: } 7629: ctxt->valid = 0; 7630: } 7631: 7632: /* 7633: * [ WFC: Parsed Entity ] 7634: * An entity reference must not contain the name of an 7635: * unparsed entity 7636: */ 7637: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7638: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7639: "Entity reference to unparsed entity %s\n", name); 7640: } 7641: 7642: /* 7643: * [ WFC: No External Entity References ] 7644: * Attribute values cannot contain direct or indirect 7645: * entity references to external entities. 7646: */ 7647: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7648: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7649: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7650: "Attribute references external entity '%s'\n", name); 7651: } 7652: /* 7653: * [ WFC: No < in Attribute Values ] 7654: * The replacement text of any entity referred to directly or 7655: * indirectly in an attribute value (other than "<") must 7656: * not contain a <. 7657: */ 7658: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7659: (ent != NULL) && 7660: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7661: if ((ent->checked & 1) || ((ent->checked == 0) && 7662: (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) { 7663: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7664: "'<' in entity '%s' is not allowed in attributes values\n", name); 7665: } 7666: } 7667: 7668: /* 7669: * Internal check, no parameter entities here ... 7670: */ 7671: else { 7672: switch (ent->etype) { 7673: case XML_INTERNAL_PARAMETER_ENTITY: 7674: case XML_EXTERNAL_PARAMETER_ENTITY: 7675: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7676: "Attempt to reference the parameter entity '%s'\n", 7677: name); 7678: break; 7679: default: 7680: break; 7681: } 7682: } 7683: 7684: /* 7685: * [ WFC: No Recursion ] 7686: * A parsed entity must not contain a recursive reference 7687: * to itself, either directly or indirectly. 7688: * Done somewhere else 7689: */ 7690: return(ent); 7691: } 7692: 7693: /** 7694: * xmlParseStringEntityRef: 7695: * @ctxt: an XML parser context 7696: * @str: a pointer to an index in the string 7697: * 7698: * parse ENTITY references declarations, but this version parses it from 7699: * a string value. 7700: * 7701: * [68] EntityRef ::= '&' Name ';' 7702: * 7703: * [ WFC: Entity Declared ] 7704: * In a document without any DTD, a document with only an internal DTD 7705: * subset which contains no parameter entity references, or a document 7706: * with "standalone='yes'", the Name given in the entity reference 7707: * must match that in an entity declaration, except that well-formed 7708: * documents need not declare any of the following entities: amp, lt, 7709: * gt, apos, quot. The declaration of a parameter entity must precede 7710: * any reference to it. Similarly, the declaration of a general entity 7711: * must precede any reference to it which appears in a default value in an 7712: * attribute-list declaration. Note that if entities are declared in the 7713: * external subset or in external parameter entities, a non-validating 7714: * processor is not obligated to read and process their declarations; 7715: * for such documents, the rule that an entity must be declared is a 7716: * well-formedness constraint only if standalone='yes'. 7717: * 7718: * [ WFC: Parsed Entity ] 7719: * An entity reference must not contain the name of an unparsed entity 7720: * 7721: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7722: * is updated to the current location in the string. 7723: */ 7724: static xmlEntityPtr 7725: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7726: xmlChar *name; 7727: const xmlChar *ptr; 7728: xmlChar cur; 7729: xmlEntityPtr ent = NULL; 7730: 7731: if ((str == NULL) || (*str == NULL)) 7732: return(NULL); 7733: ptr = *str; 7734: cur = *ptr; 7735: if (cur != '&') 7736: return(NULL); 7737: 7738: ptr++; 7739: name = xmlParseStringName(ctxt, &ptr); 7740: if (name == NULL) { 7741: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7742: "xmlParseStringEntityRef: no name\n"); 7743: *str = ptr; 7744: return(NULL); 7745: } 7746: if (*ptr != ';') { 7747: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7748: xmlFree(name); 7749: *str = ptr; 7750: return(NULL); 7751: } 7752: ptr++; 7753: 7754: 7755: /* 7756: * Predefined entites override any extra definition 7757: */ 7758: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7759: ent = xmlGetPredefinedEntity(name); 7760: if (ent != NULL) { 7761: xmlFree(name); 7762: *str = ptr; 7763: return(ent); 7764: } 7765: } 7766: 7767: /* 7768: * Increate the number of entity references parsed 7769: */ 7770: ctxt->nbentities++; 7771: 7772: /* 7773: * Ask first SAX for entity resolution, otherwise try the 7774: * entities which may have stored in the parser context. 7775: */ 7776: if (ctxt->sax != NULL) { 7777: if (ctxt->sax->getEntity != NULL) 7778: ent = ctxt->sax->getEntity(ctxt->userData, name); 7779: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7780: ent = xmlGetPredefinedEntity(name); 7781: if ((ent == NULL) && (ctxt->userData==ctxt)) { 7782: ent = xmlSAX2GetEntity(ctxt, name); 7783: } 7784: } 7785: if (ctxt->instate == XML_PARSER_EOF) { 7786: xmlFree(name); 7787: return(NULL); 7788: } 7789: 7790: /* 7791: * [ WFC: Entity Declared ] 7792: * In a document without any DTD, a document with only an 7793: * internal DTD subset which contains no parameter entity 7794: * references, or a document with "standalone='yes'", the 7795: * Name given in the entity reference must match that in an 7796: * entity declaration, except that well-formed documents 7797: * need not declare any of the following entities: amp, lt, 7798: * gt, apos, quot. 7799: * The declaration of a parameter entity must precede any 7800: * reference to it. 7801: * Similarly, the declaration of a general entity must 7802: * precede any reference to it which appears in a default 7803: * value in an attribute-list declaration. Note that if 7804: * entities are declared in the external subset or in 7805: * external parameter entities, a non-validating processor 7806: * is not obligated to read and process their declarations; 7807: * for such documents, the rule that an entity must be 7808: * declared is a well-formedness constraint only if 7809: * standalone='yes'. 7810: */ 7811: if (ent == NULL) { 7812: if ((ctxt->standalone == 1) || 7813: ((ctxt->hasExternalSubset == 0) && 7814: (ctxt->hasPErefs == 0))) { 7815: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7816: "Entity '%s' not defined\n", name); 7817: } else { 7818: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7819: "Entity '%s' not defined\n", 7820: name); 7821: } 7822: /* TODO ? check regressions ctxt->valid = 0; */ 7823: } 7824: 7825: /* 7826: * [ WFC: Parsed Entity ] 7827: * An entity reference must not contain the name of an 7828: * unparsed entity 7829: */ 7830: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7831: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7832: "Entity reference to unparsed entity %s\n", name); 7833: } 7834: 7835: /* 7836: * [ WFC: No External Entity References ] 7837: * Attribute values cannot contain direct or indirect 7838: * entity references to external entities. 7839: */ 7840: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7841: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7842: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7843: "Attribute references external entity '%s'\n", name); 7844: } 7845: /* 7846: * [ WFC: No < in Attribute Values ] 7847: * The replacement text of any entity referred to directly or 7848: * indirectly in an attribute value (other than "<") must 7849: * not contain a <. 7850: */ 7851: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7852: (ent != NULL) && (ent->content != NULL) && 7853: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7854: (xmlStrchr(ent->content, '<'))) { 7855: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7856: "'<' in entity '%s' is not allowed in attributes values\n", 7857: name); 7858: } 7859: 7860: /* 7861: * Internal check, no parameter entities here ... 7862: */ 7863: else { 7864: switch (ent->etype) { 7865: case XML_INTERNAL_PARAMETER_ENTITY: 7866: case XML_EXTERNAL_PARAMETER_ENTITY: 7867: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7868: "Attempt to reference the parameter entity '%s'\n", 7869: name); 7870: break; 7871: default: 7872: break; 7873: } 7874: } 7875: 7876: /* 7877: * [ WFC: No Recursion ] 7878: * A parsed entity must not contain a recursive reference 7879: * to itself, either directly or indirectly. 7880: * Done somewhere else 7881: */ 7882: 7883: xmlFree(name); 7884: *str = ptr; 7885: return(ent); 7886: } 7887: 7888: /** 7889: * xmlParsePEReference: 7890: * @ctxt: an XML parser context 7891: * 7892: * parse PEReference declarations 7893: * The entity content is handled directly by pushing it's content as 7894: * a new input stream. 7895: * 7896: * [69] PEReference ::= '%' Name ';' 7897: * 7898: * [ WFC: No Recursion ] 7899: * A parsed entity must not contain a recursive 7900: * reference to itself, either directly or indirectly. 7901: * 7902: * [ WFC: Entity Declared ] 7903: * In a document without any DTD, a document with only an internal DTD 7904: * subset which contains no parameter entity references, or a document 7905: * with "standalone='yes'", ... ... The declaration of a parameter 7906: * entity must precede any reference to it... 7907: * 7908: * [ VC: Entity Declared ] 7909: * In a document with an external subset or external parameter entities 7910: * with "standalone='no'", ... ... The declaration of a parameter entity 7911: * must precede any reference to it... 7912: * 7913: * [ WFC: In DTD ] 7914: * Parameter-entity references may only appear in the DTD. 7915: * NOTE: misleading but this is handled. 7916: */ 7917: void 7918: xmlParsePEReference(xmlParserCtxtPtr ctxt) 7919: { 7920: const xmlChar *name; 7921: xmlEntityPtr entity = NULL; 7922: xmlParserInputPtr input; 7923: 7924: if (RAW != '%') 7925: return; 7926: NEXT; 7927: name = xmlParseName(ctxt); 7928: if (name == NULL) { 7929: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7930: "xmlParsePEReference: no name\n"); 7931: return; 7932: } 7933: if (RAW != ';') { 7934: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7935: return; 7936: } 7937: 7938: NEXT; 7939: 7940: /* 7941: * Increate the number of entity references parsed 7942: */ 7943: ctxt->nbentities++; 7944: 7945: /* 7946: * Request the entity from SAX 7947: */ 7948: if ((ctxt->sax != NULL) && 7949: (ctxt->sax->getParameterEntity != NULL)) 7950: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 7951: if (ctxt->instate == XML_PARSER_EOF) 7952: return; 7953: if (entity == NULL) { 7954: /* 7955: * [ WFC: Entity Declared ] 7956: * In a document without any DTD, a document with only an 7957: * internal DTD subset which contains no parameter entity 7958: * references, or a document with "standalone='yes'", ... 7959: * ... The declaration of a parameter entity must precede 7960: * any reference to it... 7961: */ 7962: if ((ctxt->standalone == 1) || 7963: ((ctxt->hasExternalSubset == 0) && 7964: (ctxt->hasPErefs == 0))) { 7965: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7966: "PEReference: %%%s; not found\n", 7967: name); 7968: } else { 7969: /* 7970: * [ VC: Entity Declared ] 7971: * In a document with an external subset or external 7972: * parameter entities with "standalone='no'", ... 7973: * ... The declaration of a parameter entity must 7974: * precede any reference to it... 7975: */ 7976: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7977: "PEReference: %%%s; not found\n", 7978: name, NULL); 7979: ctxt->valid = 0; 7980: } 7981: } else { 7982: /* 7983: * Internal checking in case the entity quest barfed 7984: */ 7985: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7986: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7987: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7988: "Internal: %%%s; is not a parameter entity\n", 7989: name, NULL); 7990: } else if (ctxt->input->free != deallocblankswrapper) { 7991: input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7992: if (xmlPushInput(ctxt, input) < 0) 7993: return; 7994: } else { 7995: /* 7996: * TODO !!! 7997: * handle the extra spaces added before and after 7998: * c.f. http://www.w3.org/TR/REC-xml#as-PE 7999: */ 8000: input = xmlNewEntityInputStream(ctxt, entity); 8001: if (xmlPushInput(ctxt, input) < 0) 8002: return; 8003: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8004: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8005: (IS_BLANK_CH(NXT(5)))) { 8006: xmlParseTextDecl(ctxt); 8007: if (ctxt->errNo == 8008: XML_ERR_UNSUPPORTED_ENCODING) { 8009: /* 8010: * The XML REC instructs us to stop parsing 8011: * right here 8012: */ 8013: ctxt->instate = XML_PARSER_EOF; 8014: return; 8015: } 8016: } 8017: } 8018: } 8019: ctxt->hasPErefs = 1; 8020: } 8021: 8022: /** 8023: * xmlLoadEntityContent: 8024: * @ctxt: an XML parser context 8025: * @entity: an unloaded system entity 8026: * 8027: * Load the original content of the given system entity from the 8028: * ExternalID/SystemID given. This is to be used for Included in Literal 8029: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8030: * 8031: * Returns 0 in case of success and -1 in case of failure 8032: */ 8033: static int 8034: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8035: xmlParserInputPtr input; 8036: xmlBufferPtr buf; 8037: int l, c; 8038: int count = 0; 8039: 8040: if ((ctxt == NULL) || (entity == NULL) || 8041: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8042: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8043: (entity->content != NULL)) { 8044: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8045: "xmlLoadEntityContent parameter error"); 8046: return(-1); 8047: } 8048: 8049: if (xmlParserDebugEntities) 8050: xmlGenericError(xmlGenericErrorContext, 8051: "Reading %s entity content input\n", entity->name); 8052: 8053: buf = xmlBufferCreate(); 8054: if (buf == NULL) { 8055: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8056: "xmlLoadEntityContent parameter error"); 8057: return(-1); 8058: } 8059: 8060: input = xmlNewEntityInputStream(ctxt, entity); 8061: if (input == NULL) { 8062: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8063: "xmlLoadEntityContent input error"); 8064: xmlBufferFree(buf); 8065: return(-1); 8066: } 8067: 8068: /* 8069: * Push the entity as the current input, read char by char 8070: * saving to the buffer until the end of the entity or an error 8071: */ 8072: if (xmlPushInput(ctxt, input) < 0) { 8073: xmlBufferFree(buf); 8074: return(-1); 8075: } 8076: 8077: GROW; 8078: c = CUR_CHAR(l); 8079: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8080: (IS_CHAR(c))) { 8081: xmlBufferAdd(buf, ctxt->input->cur, l); 8082: if (count++ > XML_PARSER_CHUNK_SIZE) { 8083: count = 0; 8084: GROW; 8085: if (ctxt->instate == XML_PARSER_EOF) { 8086: xmlBufferFree(buf); 8087: return(-1); 8088: } 8089: } 8090: NEXTL(l); 8091: c = CUR_CHAR(l); 8092: if (c == 0) { 8093: count = 0; 8094: GROW; 8095: if (ctxt->instate == XML_PARSER_EOF) { 8096: xmlBufferFree(buf); 8097: return(-1); 8098: } 8099: c = CUR_CHAR(l); 8100: } 8101: } 8102: 8103: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8104: xmlPopInput(ctxt); 8105: } else if (!IS_CHAR(c)) { 8106: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8107: "xmlLoadEntityContent: invalid char value %d\n", 8108: c); 8109: xmlBufferFree(buf); 8110: return(-1); 8111: } 8112: entity->content = buf->content; 8113: buf->content = NULL; 8114: xmlBufferFree(buf); 8115: 8116: return(0); 8117: } 8118: 8119: /** 8120: * xmlParseStringPEReference: 8121: * @ctxt: an XML parser context 8122: * @str: a pointer to an index in the string 8123: * 8124: * parse PEReference declarations 8125: * 8126: * [69] PEReference ::= '%' Name ';' 8127: * 8128: * [ WFC: No Recursion ] 8129: * A parsed entity must not contain a recursive 8130: * reference to itself, either directly or indirectly. 8131: * 8132: * [ WFC: Entity Declared ] 8133: * In a document without any DTD, a document with only an internal DTD 8134: * subset which contains no parameter entity references, or a document 8135: * with "standalone='yes'", ... ... The declaration of a parameter 8136: * entity must precede any reference to it... 8137: * 8138: * [ VC: Entity Declared ] 8139: * In a document with an external subset or external parameter entities 8140: * with "standalone='no'", ... ... The declaration of a parameter entity 8141: * must precede any reference to it... 8142: * 8143: * [ WFC: In DTD ] 8144: * Parameter-entity references may only appear in the DTD. 8145: * NOTE: misleading but this is handled. 8146: * 8147: * Returns the string of the entity content. 8148: * str is updated to the current value of the index 8149: */ 8150: static xmlEntityPtr 8151: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8152: const xmlChar *ptr; 8153: xmlChar cur; 8154: xmlChar *name; 8155: xmlEntityPtr entity = NULL; 8156: 8157: if ((str == NULL) || (*str == NULL)) return(NULL); 8158: ptr = *str; 8159: cur = *ptr; 8160: if (cur != '%') 8161: return(NULL); 8162: ptr++; 8163: name = xmlParseStringName(ctxt, &ptr); 8164: if (name == NULL) { 8165: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8166: "xmlParseStringPEReference: no name\n"); 8167: *str = ptr; 8168: return(NULL); 8169: } 8170: cur = *ptr; 8171: if (cur != ';') { 8172: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8173: xmlFree(name); 8174: *str = ptr; 8175: return(NULL); 8176: } 8177: ptr++; 8178: 8179: /* 8180: * Increate the number of entity references parsed 8181: */ 8182: ctxt->nbentities++; 8183: 8184: /* 8185: * Request the entity from SAX 8186: */ 8187: if ((ctxt->sax != NULL) && 8188: (ctxt->sax->getParameterEntity != NULL)) 8189: entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8190: if (ctxt->instate == XML_PARSER_EOF) { 8191: xmlFree(name); 8192: return(NULL); 8193: } 8194: if (entity == NULL) { 8195: /* 8196: * [ WFC: Entity Declared ] 8197: * In a document without any DTD, a document with only an 8198: * internal DTD subset which contains no parameter entity 8199: * references, or a document with "standalone='yes'", ... 8200: * ... The declaration of a parameter entity must precede 8201: * any reference to it... 8202: */ 8203: if ((ctxt->standalone == 1) || 8204: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8205: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8206: "PEReference: %%%s; not found\n", name); 8207: } else { 8208: /* 8209: * [ VC: Entity Declared ] 8210: * In a document with an external subset or external 8211: * parameter entities with "standalone='no'", ... 8212: * ... The declaration of a parameter entity must 8213: * precede any reference to it... 8214: */ 8215: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8216: "PEReference: %%%s; not found\n", 8217: name, NULL); 8218: ctxt->valid = 0; 8219: } 8220: } else { 8221: /* 8222: * Internal checking in case the entity quest barfed 8223: */ 8224: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8225: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8226: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8227: "%%%s; is not a parameter entity\n", 8228: name, NULL); 8229: } 8230: } 8231: ctxt->hasPErefs = 1; 8232: xmlFree(name); 8233: *str = ptr; 8234: return(entity); 8235: } 8236: 8237: /** 8238: * xmlParseDocTypeDecl: 8239: * @ctxt: an XML parser context 8240: * 8241: * parse a DOCTYPE declaration 8242: * 8243: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8244: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8245: * 8246: * [ VC: Root Element Type ] 8247: * The Name in the document type declaration must match the element 8248: * type of the root element. 8249: */ 8250: 8251: void 8252: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8253: const xmlChar *name = NULL; 8254: xmlChar *ExternalID = NULL; 8255: xmlChar *URI = NULL; 8256: 8257: /* 8258: * We know that '<!DOCTYPE' has been detected. 8259: */ 8260: SKIP(9); 8261: 8262: SKIP_BLANKS; 8263: 8264: /* 8265: * Parse the DOCTYPE name. 8266: */ 8267: name = xmlParseName(ctxt); 8268: if (name == NULL) { 8269: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8270: "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8271: } 8272: ctxt->intSubName = name; 8273: 8274: SKIP_BLANKS; 8275: 8276: /* 8277: * Check for SystemID and ExternalID 8278: */ 8279: URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8280: 8281: if ((URI != NULL) || (ExternalID != NULL)) { 8282: ctxt->hasExternalSubset = 1; 8283: } 8284: ctxt->extSubURI = URI; 8285: ctxt->extSubSystem = ExternalID; 8286: 8287: SKIP_BLANKS; 8288: 8289: /* 8290: * Create and update the internal subset. 8291: */ 8292: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8293: (!ctxt->disableSAX)) 8294: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8295: if (ctxt->instate == XML_PARSER_EOF) 8296: return; 8297: 8298: /* 8299: * Is there any internal subset declarations ? 8300: * they are handled separately in xmlParseInternalSubset() 8301: */ 8302: if (RAW == '[') 8303: return; 8304: 8305: /* 8306: * We should be at the end of the DOCTYPE declaration. 8307: */ 8308: if (RAW != '>') { 8309: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8310: } 8311: NEXT; 8312: } 8313: 8314: /** 8315: * xmlParseInternalSubset: 8316: * @ctxt: an XML parser context 8317: * 8318: * parse the internal subset declaration 8319: * 8320: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8321: */ 8322: 8323: static void 8324: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8325: /* 8326: * Is there any DTD definition ? 8327: */ 8328: if (RAW == '[') { 8329: ctxt->instate = XML_PARSER_DTD; 8330: NEXT; 8331: /* 8332: * Parse the succession of Markup declarations and 8333: * PEReferences. 8334: * Subsequence (markupdecl | PEReference | S)* 8335: */ 8336: while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8337: const xmlChar *check = CUR_PTR; 8338: unsigned int cons = ctxt->input->consumed; 8339: 8340: SKIP_BLANKS; 8341: xmlParseMarkupDecl(ctxt); 8342: xmlParsePEReference(ctxt); 8343: 8344: /* 8345: * Pop-up of finished entities. 8346: */ 8347: while ((RAW == 0) && (ctxt->inputNr > 1)) 8348: xmlPopInput(ctxt); 8349: 8350: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8351: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8352: "xmlParseInternalSubset: error detected in Markup declaration\n"); 8353: break; 8354: } 8355: } 8356: if (RAW == ']') { 8357: NEXT; 8358: SKIP_BLANKS; 8359: } 8360: } 8361: 8362: /* 8363: * We should be at the end of the DOCTYPE declaration. 8364: */ 8365: if (RAW != '>') { 8366: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8367: } 8368: NEXT; 8369: } 8370: 8371: #ifdef LIBXML_SAX1_ENABLED 8372: /** 8373: * xmlParseAttribute: 8374: * @ctxt: an XML parser context 8375: * @value: a xmlChar ** used to store the value of the attribute 8376: * 8377: * parse an attribute 8378: * 8379: * [41] Attribute ::= Name Eq AttValue 8380: * 8381: * [ WFC: No External Entity References ] 8382: * Attribute values cannot contain direct or indirect entity references 8383: * to external entities. 8384: * 8385: * [ WFC: No < in Attribute Values ] 8386: * The replacement text of any entity referred to directly or indirectly in 8387: * an attribute value (other than "<") must not contain a <. 8388: * 8389: * [ VC: Attribute Value Type ] 8390: * The attribute must have been declared; the value must be of the type 8391: * declared for it. 8392: * 8393: * [25] Eq ::= S? '=' S? 8394: * 8395: * With namespace: 8396: * 8397: * [NS 11] Attribute ::= QName Eq AttValue 8398: * 8399: * Also the case QName == xmlns:??? is handled independently as a namespace 8400: * definition. 8401: * 8402: * Returns the attribute name, and the value in *value. 8403: */ 8404: 8405: const xmlChar * 8406: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8407: const xmlChar *name; 8408: xmlChar *val; 8409: 8410: *value = NULL; 8411: GROW; 8412: name = xmlParseName(ctxt); 8413: if (name == NULL) { 8414: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8415: "error parsing attribute name\n"); 8416: return(NULL); 8417: } 8418: 8419: /* 8420: * read the value 8421: */ 8422: SKIP_BLANKS; 8423: if (RAW == '=') { 8424: NEXT; 8425: SKIP_BLANKS; 8426: val = xmlParseAttValue(ctxt); 8427: ctxt->instate = XML_PARSER_CONTENT; 8428: } else { 8429: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8430: "Specification mandate value for attribute %s\n", name); 8431: return(NULL); 8432: } 8433: 8434: /* 8435: * Check that xml:lang conforms to the specification 8436: * No more registered as an error, just generate a warning now 8437: * since this was deprecated in XML second edition 8438: */ 8439: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8440: if (!xmlCheckLanguageID(val)) { 8441: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8442: "Malformed value for xml:lang : %s\n", 8443: val, NULL); 8444: } 8445: } 8446: 8447: /* 8448: * Check that xml:space conforms to the specification 8449: */ 8450: if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8451: if (xmlStrEqual(val, BAD_CAST "default")) 8452: *(ctxt->space) = 0; 8453: else if (xmlStrEqual(val, BAD_CAST "preserve")) 8454: *(ctxt->space) = 1; 8455: else { 8456: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8457: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8458: val, NULL); 8459: } 8460: } 8461: 8462: *value = val; 8463: return(name); 8464: } 8465: 8466: /** 8467: * xmlParseStartTag: 8468: * @ctxt: an XML parser context 8469: * 8470: * parse a start of tag either for rule element or 8471: * EmptyElement. In both case we don't parse the tag closing chars. 8472: * 8473: * [40] STag ::= '<' Name (S Attribute)* S? '>' 8474: * 8475: * [ WFC: Unique Att Spec ] 8476: * No attribute name may appear more than once in the same start-tag or 8477: * empty-element tag. 8478: * 8479: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8480: * 8481: * [ WFC: Unique Att Spec ] 8482: * No attribute name may appear more than once in the same start-tag or 8483: * empty-element tag. 8484: * 8485: * With namespace: 8486: * 8487: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8488: * 8489: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8490: * 8491: * Returns the element name parsed 8492: */ 8493: 8494: const xmlChar * 8495: xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8496: const xmlChar *name; 8497: const xmlChar *attname; 8498: xmlChar *attvalue; 8499: const xmlChar **atts = ctxt->atts; 8500: int nbatts = 0; 8501: int maxatts = ctxt->maxatts; 8502: int i; 8503: 8504: if (RAW != '<') return(NULL); 8505: NEXT1; 8506: 8507: name = xmlParseName(ctxt); 8508: if (name == NULL) { 8509: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8510: "xmlParseStartTag: invalid element name\n"); 8511: return(NULL); 8512: } 8513: 8514: /* 8515: * Now parse the attributes, it ends up with the ending 8516: * 8517: * (S Attribute)* S? 8518: */ 8519: SKIP_BLANKS; 8520: GROW; 8521: 8522: while (((RAW != '>') && 8523: ((RAW != '/') || (NXT(1) != '>')) && 8524: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8525: const xmlChar *q = CUR_PTR; 8526: unsigned int cons = ctxt->input->consumed; 8527: 8528: attname = xmlParseAttribute(ctxt, &attvalue); 8529: if ((attname != NULL) && (attvalue != NULL)) { 8530: /* 8531: * [ WFC: Unique Att Spec ] 8532: * No attribute name may appear more than once in the same 8533: * start-tag or empty-element tag. 8534: */ 8535: for (i = 0; i < nbatts;i += 2) { 8536: if (xmlStrEqual(atts[i], attname)) { 8537: xmlErrAttributeDup(ctxt, NULL, attname); 8538: xmlFree(attvalue); 8539: goto failed; 8540: } 8541: } 8542: /* 8543: * Add the pair to atts 8544: */ 8545: if (atts == NULL) { 8546: maxatts = 22; /* allow for 10 attrs by default */ 8547: atts = (const xmlChar **) 8548: xmlMalloc(maxatts * sizeof(xmlChar *)); 8549: if (atts == NULL) { 8550: xmlErrMemory(ctxt, NULL); 8551: if (attvalue != NULL) 8552: xmlFree(attvalue); 8553: goto failed; 8554: } 8555: ctxt->atts = atts; 8556: ctxt->maxatts = maxatts; 8557: } else if (nbatts + 4 > maxatts) { 8558: const xmlChar **n; 8559: 8560: maxatts *= 2; 8561: n = (const xmlChar **) xmlRealloc((void *) atts, 8562: maxatts * sizeof(const xmlChar *)); 8563: if (n == NULL) { 8564: xmlErrMemory(ctxt, NULL); 8565: if (attvalue != NULL) 8566: xmlFree(attvalue); 8567: goto failed; 8568: } 8569: atts = n; 8570: ctxt->atts = atts; 8571: ctxt->maxatts = maxatts; 8572: } 8573: atts[nbatts++] = attname; 8574: atts[nbatts++] = attvalue; 8575: atts[nbatts] = NULL; 8576: atts[nbatts + 1] = NULL; 8577: } else { 8578: if (attvalue != NULL) 8579: xmlFree(attvalue); 8580: } 8581: 8582: failed: 8583: 8584: GROW 8585: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8586: break; 8587: if (!IS_BLANK_CH(RAW)) { 8588: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8589: "attributes construct error\n"); 8590: } 8591: SKIP_BLANKS; 8592: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8593: (attname == NULL) && (attvalue == NULL)) { 8594: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8595: "xmlParseStartTag: problem parsing attributes\n"); 8596: break; 8597: } 8598: SHRINK; 8599: GROW; 8600: } 8601: 8602: /* 8603: * SAX: Start of Element ! 8604: */ 8605: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8606: (!ctxt->disableSAX)) { 8607: if (nbatts > 0) 8608: ctxt->sax->startElement(ctxt->userData, name, atts); 8609: else 8610: ctxt->sax->startElement(ctxt->userData, name, NULL); 8611: } 8612: 8613: if (atts != NULL) { 8614: /* Free only the content strings */ 8615: for (i = 1;i < nbatts;i+=2) 8616: if (atts[i] != NULL) 8617: xmlFree((xmlChar *) atts[i]); 8618: } 8619: return(name); 8620: } 8621: 8622: /** 8623: * xmlParseEndTag1: 8624: * @ctxt: an XML parser context 8625: * @line: line of the start tag 8626: * @nsNr: number of namespaces on the start tag 8627: * 8628: * parse an end of tag 8629: * 8630: * [42] ETag ::= '</' Name S? '>' 8631: * 8632: * With namespace 8633: * 8634: * [NS 9] ETag ::= '</' QName S? '>' 8635: */ 8636: 8637: static void 8638: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8639: const xmlChar *name; 8640: 8641: GROW; 8642: if ((RAW != '<') || (NXT(1) != '/')) { 8643: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8644: "xmlParseEndTag: '</' not found\n"); 8645: return; 8646: } 8647: SKIP(2); 8648: 8649: name = xmlParseNameAndCompare(ctxt,ctxt->name); 8650: 8651: /* 8652: * We should definitely be at the ending "S? '>'" part 8653: */ 8654: GROW; 8655: SKIP_BLANKS; 8656: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8657: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8658: } else 8659: NEXT1; 8660: 8661: /* 8662: * [ WFC: Element Type Match ] 8663: * The Name in an element's end-tag must match the element type in the 8664: * start-tag. 8665: * 8666: */ 8667: if (name != (xmlChar*)1) { 8668: if (name == NULL) name = BAD_CAST "unparseable"; 8669: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8670: "Opening and ending tag mismatch: %s line %d and %s\n", 8671: ctxt->name, line, name); 8672: } 8673: 8674: /* 8675: * SAX: End of Tag 8676: */ 8677: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8678: (!ctxt->disableSAX)) 8679: ctxt->sax->endElement(ctxt->userData, ctxt->name); 8680: 8681: namePop(ctxt); 8682: spacePop(ctxt); 8683: return; 8684: } 8685: 8686: /** 8687: * xmlParseEndTag: 8688: * @ctxt: an XML parser context 8689: * 8690: * parse an end of tag 8691: * 8692: * [42] ETag ::= '</' Name S? '>' 8693: * 8694: * With namespace 8695: * 8696: * [NS 9] ETag ::= '</' QName S? '>' 8697: */ 8698: 8699: void 8700: xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8701: xmlParseEndTag1(ctxt, 0); 8702: } 8703: #endif /* LIBXML_SAX1_ENABLED */ 8704: 8705: /************************************************************************ 8706: * * 8707: * SAX 2 specific operations * 8708: * * 8709: ************************************************************************/ 8710: 8711: /* 8712: * xmlGetNamespace: 8713: * @ctxt: an XML parser context 8714: * @prefix: the prefix to lookup 8715: * 8716: * Lookup the namespace name for the @prefix (which ca be NULL) 8717: * The prefix must come from the @ctxt->dict dictionnary 8718: * 8719: * Returns the namespace name or NULL if not bound 8720: */ 8721: static const xmlChar * 8722: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8723: int i; 8724: 8725: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8726: for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8727: if (ctxt->nsTab[i] == prefix) { 8728: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8729: return(NULL); 8730: return(ctxt->nsTab[i + 1]); 8731: } 8732: return(NULL); 8733: } 8734: 8735: /** 8736: * xmlParseQName: 8737: * @ctxt: an XML parser context 8738: * @prefix: pointer to store the prefix part 8739: * 8740: * parse an XML Namespace QName 8741: * 8742: * [6] QName ::= (Prefix ':')? LocalPart 8743: * [7] Prefix ::= NCName 8744: * [8] LocalPart ::= NCName 8745: * 8746: * Returns the Name parsed or NULL 8747: */ 8748: 8749: static const xmlChar * 8750: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8751: const xmlChar *l, *p; 8752: 8753: GROW; 8754: 8755: l = xmlParseNCName(ctxt); 8756: if (l == NULL) { 8757: if (CUR == ':') { 8758: l = xmlParseName(ctxt); 8759: if (l != NULL) { 8760: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8761: "Failed to parse QName '%s'\n", l, NULL, NULL); 8762: *prefix = NULL; 8763: return(l); 8764: } 8765: } 8766: return(NULL); 8767: } 8768: if (CUR == ':') { 8769: NEXT; 8770: p = l; 8771: l = xmlParseNCName(ctxt); 8772: if (l == NULL) { 8773: xmlChar *tmp; 8774: 8775: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8776: "Failed to parse QName '%s:'\n", p, NULL, NULL); 8777: l = xmlParseNmtoken(ctxt); 8778: if (l == NULL) 8779: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8780: else { 8781: tmp = xmlBuildQName(l, p, NULL, 0); 8782: xmlFree((char *)l); 8783: } 8784: p = xmlDictLookup(ctxt->dict, tmp, -1); 8785: if (tmp != NULL) xmlFree(tmp); 8786: *prefix = NULL; 8787: return(p); 8788: } 8789: if (CUR == ':') { 8790: xmlChar *tmp; 8791: 8792: xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8793: "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8794: NEXT; 8795: tmp = (xmlChar *) xmlParseName(ctxt); 8796: if (tmp != NULL) { 8797: tmp = xmlBuildQName(tmp, l, NULL, 0); 8798: l = xmlDictLookup(ctxt->dict, tmp, -1); 8799: if (tmp != NULL) xmlFree(tmp); 8800: *prefix = p; 8801: return(l); 8802: } 8803: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8804: l = xmlDictLookup(ctxt->dict, tmp, -1); 8805: if (tmp != NULL) xmlFree(tmp); 8806: *prefix = p; 8807: return(l); 8808: } 8809: *prefix = p; 8810: } else 8811: *prefix = NULL; 8812: return(l); 8813: } 8814: 8815: /** 8816: * xmlParseQNameAndCompare: 8817: * @ctxt: an XML parser context 8818: * @name: the localname 8819: * @prefix: the prefix, if any. 8820: * 8821: * parse an XML name and compares for match 8822: * (specialized for endtag parsing) 8823: * 8824: * Returns NULL for an illegal name, (xmlChar*) 1 for success 8825: * and the name for mismatch 8826: */ 8827: 8828: static const xmlChar * 8829: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8830: xmlChar const *prefix) { 8831: const xmlChar *cmp; 8832: const xmlChar *in; 8833: const xmlChar *ret; 8834: const xmlChar *prefix2; 8835: 8836: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8837: 8838: GROW; 8839: in = ctxt->input->cur; 8840: 8841: cmp = prefix; 8842: while (*in != 0 && *in == *cmp) { 8843: ++in; 8844: ++cmp; 8845: } 8846: if ((*cmp == 0) && (*in == ':')) { 8847: in++; 8848: cmp = name; 8849: while (*in != 0 && *in == *cmp) { 8850: ++in; 8851: ++cmp; 8852: } 8853: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8854: /* success */ 8855: ctxt->input->cur = in; 8856: return((const xmlChar*) 1); 8857: } 8858: } 8859: /* 8860: * all strings coms from the dictionary, equality can be done directly 8861: */ 8862: ret = xmlParseQName (ctxt, &prefix2); 8863: if ((ret == name) && (prefix == prefix2)) 8864: return((const xmlChar*) 1); 8865: return ret; 8866: } 8867: 8868: /** 8869: * xmlParseAttValueInternal: 8870: * @ctxt: an XML parser context 8871: * @len: attribute len result 8872: * @alloc: whether the attribute was reallocated as a new string 8873: * @normalize: if 1 then further non-CDATA normalization must be done 8874: * 8875: * parse a value for an attribute. 8876: * NOTE: if no normalization is needed, the routine will return pointers 8877: * directly from the data buffer. 8878: * 8879: * 3.3.3 Attribute-Value Normalization: 8880: * Before the value of an attribute is passed to the application or 8881: * checked for validity, the XML processor must normalize it as follows: 8882: * - a character reference is processed by appending the referenced 8883: * character to the attribute value 8884: * - an entity reference is processed by recursively processing the 8885: * replacement text of the entity 8886: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8887: * appending #x20 to the normalized value, except that only a single 8888: * #x20 is appended for a "#xD#xA" sequence that is part of an external 8889: * parsed entity or the literal entity value of an internal parsed entity 8890: * - other characters are processed by appending them to the normalized value 8891: * If the declared value is not CDATA, then the XML processor must further 8892: * process the normalized attribute value by discarding any leading and 8893: * trailing space (#x20) characters, and by replacing sequences of space 8894: * (#x20) characters by a single space (#x20) character. 8895: * All attributes for which no declaration has been read should be treated 8896: * by a non-validating parser as if declared CDATA. 8897: * 8898: * Returns the AttValue parsed or NULL. The value has to be freed by the 8899: * caller if it was copied, this can be detected by val[*len] == 0. 8900: */ 8901: 8902: static xmlChar * 8903: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8904: int normalize) 8905: { 8906: xmlChar limit = 0; 8907: const xmlChar *in = NULL, *start, *end, *last; 8908: xmlChar *ret = NULL; 8909: 8910: GROW; 8911: in = (xmlChar *) CUR_PTR; 8912: if (*in != '"' && *in != '\'') { 8913: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8914: return (NULL); 8915: } 8916: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8917: 8918: /* 8919: * try to handle in this routine the most common case where no 8920: * allocation of a new string is required and where content is 8921: * pure ASCII. 8922: */ 8923: limit = *in++; 8924: end = ctxt->input->end; 8925: start = in; 8926: if (in >= end) { 8927: const xmlChar *oldbase = ctxt->input->base; 8928: GROW; 8929: if (oldbase != ctxt->input->base) { 8930: long delta = ctxt->input->base - oldbase; 8931: start = start + delta; 8932: in = in + delta; 8933: } 8934: end = ctxt->input->end; 8935: } 8936: if (normalize) { 8937: /* 8938: * Skip any leading spaces 8939: */ 8940: while ((in < end) && (*in != limit) && 8941: ((*in == 0x20) || (*in == 0x9) || 8942: (*in == 0xA) || (*in == 0xD))) { 8943: in++; 8944: start = in; 8945: if (in >= end) { 8946: const xmlChar *oldbase = ctxt->input->base; 8947: GROW; 8948: if (ctxt->instate == XML_PARSER_EOF) 8949: return(NULL); 8950: if (oldbase != ctxt->input->base) { 8951: long delta = ctxt->input->base - oldbase; 8952: start = start + delta; 8953: in = in + delta; 8954: } 8955: end = ctxt->input->end; 8956: if (((in - start) > XML_MAX_TEXT_LENGTH) && 8957: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8958: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8959: "AttValue length too long\n"); 8960: return(NULL); 8961: } 8962: } 8963: } 8964: while ((in < end) && (*in != limit) && (*in >= 0x20) && 8965: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8966: if ((*in++ == 0x20) && (*in == 0x20)) break; 8967: if (in >= end) { 8968: const xmlChar *oldbase = ctxt->input->base; 8969: GROW; 8970: if (ctxt->instate == XML_PARSER_EOF) 8971: return(NULL); 8972: if (oldbase != ctxt->input->base) { 8973: long delta = ctxt->input->base - oldbase; 8974: start = start + delta; 8975: in = in + delta; 8976: } 8977: end = ctxt->input->end; 8978: if (((in - start) > XML_MAX_TEXT_LENGTH) && 8979: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 8980: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 8981: "AttValue length too long\n"); 8982: return(NULL); 8983: } 8984: } 8985: } 8986: last = in; 8987: /* 8988: * skip the trailing blanks 8989: */ 8990: while ((last[-1] == 0x20) && (last > start)) last--; 8991: while ((in < end) && (*in != limit) && 8992: ((*in == 0x20) || (*in == 0x9) || 8993: (*in == 0xA) || (*in == 0xD))) { 8994: in++; 8995: if (in >= end) { 8996: const xmlChar *oldbase = ctxt->input->base; 8997: GROW; 8998: if (ctxt->instate == XML_PARSER_EOF) 8999: return(NULL); 9000: if (oldbase != ctxt->input->base) { 9001: long delta = ctxt->input->base - oldbase; 9002: start = start + delta; 9003: in = in + delta; 9004: last = last + delta; 9005: } 9006: end = ctxt->input->end; 9007: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9008: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9009: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9010: "AttValue length too long\n"); 9011: return(NULL); 9012: } 9013: } 9014: } 9015: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9016: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9017: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9018: "AttValue length too long\n"); 9019: return(NULL); 9020: } 9021: if (*in != limit) goto need_complex; 9022: } else { 9023: while ((in < end) && (*in != limit) && (*in >= 0x20) && 9024: (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9025: in++; 9026: if (in >= end) { 9027: const xmlChar *oldbase = ctxt->input->base; 9028: GROW; 9029: if (ctxt->instate == XML_PARSER_EOF) 9030: return(NULL); 9031: if (oldbase != ctxt->input->base) { 9032: long delta = ctxt->input->base - oldbase; 9033: start = start + delta; 9034: in = in + delta; 9035: } 9036: end = ctxt->input->end; 9037: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9038: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9039: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9040: "AttValue length too long\n"); 9041: return(NULL); 9042: } 9043: } 9044: } 9045: last = in; 9046: if (((in - start) > XML_MAX_TEXT_LENGTH) && 9047: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9048: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9049: "AttValue length too long\n"); 9050: return(NULL); 9051: } 9052: if (*in != limit) goto need_complex; 9053: } 9054: in++; 9055: if (len != NULL) { 9056: *len = last - start; 9057: ret = (xmlChar *) start; 9058: } else { 9059: if (alloc) *alloc = 1; 9060: ret = xmlStrndup(start, last - start); 9061: } 9062: CUR_PTR = in; 9063: if (alloc) *alloc = 0; 9064: return ret; 9065: need_complex: 9066: if (alloc) *alloc = 1; 9067: return xmlParseAttValueComplex(ctxt, len, normalize); 9068: } 9069: 9070: /** 9071: * xmlParseAttribute2: 9072: * @ctxt: an XML parser context 9073: * @pref: the element prefix 9074: * @elem: the element name 9075: * @prefix: a xmlChar ** used to store the value of the attribute prefix 9076: * @value: a xmlChar ** used to store the value of the attribute 9077: * @len: an int * to save the length of the attribute 9078: * @alloc: an int * to indicate if the attribute was allocated 9079: * 9080: * parse an attribute in the new SAX2 framework. 9081: * 9082: * Returns the attribute name, and the value in *value, . 9083: */ 9084: 9085: static const xmlChar * 9086: xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9087: const xmlChar * pref, const xmlChar * elem, 9088: const xmlChar ** prefix, xmlChar ** value, 9089: int *len, int *alloc) 9090: { 9091: const xmlChar *name; 9092: xmlChar *val, *internal_val = NULL; 9093: int normalize = 0; 9094: 9095: *value = NULL; 9096: GROW; 9097: name = xmlParseQName(ctxt, prefix); 9098: if (name == NULL) { 9099: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9100: "error parsing attribute name\n"); 9101: return (NULL); 9102: } 9103: 9104: /* 9105: * get the type if needed 9106: */ 9107: if (ctxt->attsSpecial != NULL) { 9108: int type; 9109: 9110: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9111: pref, elem, *prefix, name); 9112: if (type != 0) 9113: normalize = 1; 9114: } 9115: 9116: /* 9117: * read the value 9118: */ 9119: SKIP_BLANKS; 9120: if (RAW == '=') { 9121: NEXT; 9122: SKIP_BLANKS; 9123: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9124: if (normalize) { 9125: /* 9126: * Sometimes a second normalisation pass for spaces is needed 9127: * but that only happens if charrefs or entities refernces 9128: * have been used in the attribute value, i.e. the attribute 9129: * value have been extracted in an allocated string already. 9130: */ 9131: if (*alloc) { 9132: const xmlChar *val2; 9133: 9134: val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9135: if ((val2 != NULL) && (val2 != val)) { 9136: xmlFree(val); 9137: val = (xmlChar *) val2; 9138: } 9139: } 9140: } 9141: ctxt->instate = XML_PARSER_CONTENT; 9142: } else { 9143: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9144: "Specification mandate value for attribute %s\n", 9145: name); 9146: return (NULL); 9147: } 9148: 9149: if (*prefix == ctxt->str_xml) { 9150: /* 9151: * Check that xml:lang conforms to the specification 9152: * No more registered as an error, just generate a warning now 9153: * since this was deprecated in XML second edition 9154: */ 9155: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9156: internal_val = xmlStrndup(val, *len); 9157: if (!xmlCheckLanguageID(internal_val)) { 9158: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9159: "Malformed value for xml:lang : %s\n", 9160: internal_val, NULL); 9161: } 9162: } 9163: 9164: /* 9165: * Check that xml:space conforms to the specification 9166: */ 9167: if (xmlStrEqual(name, BAD_CAST "space")) { 9168: internal_val = xmlStrndup(val, *len); 9169: if (xmlStrEqual(internal_val, BAD_CAST "default")) 9170: *(ctxt->space) = 0; 9171: else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9172: *(ctxt->space) = 1; 9173: else { 9174: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9175: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9176: internal_val, NULL); 9177: } 9178: } 9179: if (internal_val) { 9180: xmlFree(internal_val); 9181: } 9182: } 9183: 9184: *value = val; 9185: return (name); 9186: } 9187: /** 9188: * xmlParseStartTag2: 9189: * @ctxt: an XML parser context 9190: * 9191: * parse a start of tag either for rule element or 9192: * EmptyElement. In both case we don't parse the tag closing chars. 9193: * This routine is called when running SAX2 parsing 9194: * 9195: * [40] STag ::= '<' Name (S Attribute)* S? '>' 9196: * 9197: * [ WFC: Unique Att Spec ] 9198: * No attribute name may appear more than once in the same start-tag or 9199: * empty-element tag. 9200: * 9201: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9202: * 9203: * [ WFC: Unique Att Spec ] 9204: * No attribute name may appear more than once in the same start-tag or 9205: * empty-element tag. 9206: * 9207: * With namespace: 9208: * 9209: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9210: * 9211: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9212: * 9213: * Returns the element name parsed 9214: */ 9215: 9216: static const xmlChar * 9217: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9218: const xmlChar **URI, int *tlen) { 9219: const xmlChar *localname; 9220: const xmlChar *prefix; 9221: const xmlChar *attname; 9222: const xmlChar *aprefix; 9223: const xmlChar *nsname; 9224: xmlChar *attvalue; 9225: const xmlChar **atts = ctxt->atts; 9226: int maxatts = ctxt->maxatts; 9227: int nratts, nbatts, nbdef; 9228: int i, j, nbNs, attval, oldline, oldcol; 9229: const xmlChar *base; 9230: unsigned long cur; 9231: int nsNr = ctxt->nsNr; 9232: 9233: if (RAW != '<') return(NULL); 9234: NEXT1; 9235: 9236: /* 9237: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9238: * point since the attribute values may be stored as pointers to 9239: * the buffer and calling SHRINK would destroy them ! 9240: * The Shrinking is only possible once the full set of attribute 9241: * callbacks have been done. 9242: */ 9243: reparse: 9244: SHRINK; 9245: base = ctxt->input->base; 9246: cur = ctxt->input->cur - ctxt->input->base; 9247: oldline = ctxt->input->line; 9248: oldcol = ctxt->input->col; 9249: nbatts = 0; 9250: nratts = 0; 9251: nbdef = 0; 9252: nbNs = 0; 9253: attval = 0; 9254: /* Forget any namespaces added during an earlier parse of this element. */ 9255: ctxt->nsNr = nsNr; 9256: 9257: localname = xmlParseQName(ctxt, &prefix); 9258: if (localname == NULL) { 9259: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9260: "StartTag: invalid element name\n"); 9261: return(NULL); 9262: } 9263: *tlen = ctxt->input->cur - ctxt->input->base - cur; 9264: 9265: /* 9266: * Now parse the attributes, it ends up with the ending 9267: * 9268: * (S Attribute)* S? 9269: */ 9270: SKIP_BLANKS; 9271: GROW; 9272: if (ctxt->input->base != base) goto base_changed; 9273: 9274: while (((RAW != '>') && 9275: ((RAW != '/') || (NXT(1) != '>')) && 9276: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9277: const xmlChar *q = CUR_PTR; 9278: unsigned int cons = ctxt->input->consumed; 9279: int len = -1, alloc = 0; 9280: 9281: attname = xmlParseAttribute2(ctxt, prefix, localname, 9282: &aprefix, &attvalue, &len, &alloc); 9283: if (ctxt->input->base != base) { 9284: if ((attvalue != NULL) && (alloc != 0)) 9285: xmlFree(attvalue); 9286: attvalue = NULL; 9287: goto base_changed; 9288: } 9289: if ((attname != NULL) && (attvalue != NULL)) { 9290: if (len < 0) len = xmlStrlen(attvalue); 9291: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9292: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9293: xmlURIPtr uri; 9294: 9295: if (*URL != 0) { 9296: uri = xmlParseURI((const char *) URL); 9297: if (uri == NULL) { 9298: xmlNsErr(ctxt, XML_WAR_NS_URI, 9299: "xmlns: '%s' is not a valid URI\n", 9300: URL, NULL, NULL); 9301: } else { 9302: if (uri->scheme == NULL) { 9303: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9304: "xmlns: URI %s is not absolute\n", 9305: URL, NULL, NULL); 9306: } 9307: xmlFreeURI(uri); 9308: } 9309: if (URL == ctxt->str_xml_ns) { 9310: if (attname != ctxt->str_xml) { 9311: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9312: "xml namespace URI cannot be the default namespace\n", 9313: NULL, NULL, NULL); 9314: } 9315: goto skip_default_ns; 9316: } 9317: if ((len == 29) && 9318: (xmlStrEqual(URL, 9319: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9320: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9321: "reuse of the xmlns namespace name is forbidden\n", 9322: NULL, NULL, NULL); 9323: goto skip_default_ns; 9324: } 9325: } 9326: /* 9327: * check that it's not a defined namespace 9328: */ 9329: for (j = 1;j <= nbNs;j++) 9330: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9331: break; 9332: if (j <= nbNs) 9333: xmlErrAttributeDup(ctxt, NULL, attname); 9334: else 9335: if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9336: skip_default_ns: 9337: if (alloc != 0) xmlFree(attvalue); 9338: SKIP_BLANKS; 9339: continue; 9340: } 9341: if (aprefix == ctxt->str_xmlns) { 9342: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9343: xmlURIPtr uri; 9344: 9345: if (attname == ctxt->str_xml) { 9346: if (URL != ctxt->str_xml_ns) { 9347: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9348: "xml namespace prefix mapped to wrong URI\n", 9349: NULL, NULL, NULL); 9350: } 9351: /* 9352: * Do not keep a namespace definition node 9353: */ 9354: goto skip_ns; 9355: } 9356: if (URL == ctxt->str_xml_ns) { 9357: if (attname != ctxt->str_xml) { 9358: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9359: "xml namespace URI mapped to wrong prefix\n", 9360: NULL, NULL, NULL); 9361: } 9362: goto skip_ns; 9363: } 9364: if (attname == ctxt->str_xmlns) { 9365: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9366: "redefinition of the xmlns prefix is forbidden\n", 9367: NULL, NULL, NULL); 9368: goto skip_ns; 9369: } 9370: if ((len == 29) && 9371: (xmlStrEqual(URL, 9372: BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9373: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9374: "reuse of the xmlns namespace name is forbidden\n", 9375: NULL, NULL, NULL); 9376: goto skip_ns; 9377: } 9378: if ((URL == NULL) || (URL[0] == 0)) { 9379: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9380: "xmlns:%s: Empty XML namespace is not allowed\n", 9381: attname, NULL, NULL); 9382: goto skip_ns; 9383: } else { 9384: uri = xmlParseURI((const char *) URL); 9385: if (uri == NULL) { 9386: xmlNsErr(ctxt, XML_WAR_NS_URI, 9387: "xmlns:%s: '%s' is not a valid URI\n", 9388: attname, URL, NULL); 9389: } else { 9390: if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9391: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9392: "xmlns:%s: URI %s is not absolute\n", 9393: attname, URL, NULL); 9394: } 9395: xmlFreeURI(uri); 9396: } 9397: } 9398: 9399: /* 9400: * check that it's not a defined namespace 9401: */ 9402: for (j = 1;j <= nbNs;j++) 9403: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9404: break; 9405: if (j <= nbNs) 9406: xmlErrAttributeDup(ctxt, aprefix, attname); 9407: else 9408: if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9409: skip_ns: 9410: if (alloc != 0) xmlFree(attvalue); 9411: SKIP_BLANKS; 9412: if (ctxt->input->base != base) goto base_changed; 9413: continue; 9414: } 9415: 9416: /* 9417: * Add the pair to atts 9418: */ 9419: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9420: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9421: if (attvalue[len] == 0) 9422: xmlFree(attvalue); 9423: goto failed; 9424: } 9425: maxatts = ctxt->maxatts; 9426: atts = ctxt->atts; 9427: } 9428: ctxt->attallocs[nratts++] = alloc; 9429: atts[nbatts++] = attname; 9430: atts[nbatts++] = aprefix; 9431: atts[nbatts++] = NULL; /* the URI will be fetched later */ 9432: atts[nbatts++] = attvalue; 9433: attvalue += len; 9434: atts[nbatts++] = attvalue; 9435: /* 9436: * tag if some deallocation is needed 9437: */ 9438: if (alloc != 0) attval = 1; 9439: } else { 9440: if ((attvalue != NULL) && (attvalue[len] == 0)) 9441: xmlFree(attvalue); 9442: } 9443: 9444: failed: 9445: 9446: GROW 9447: if (ctxt->instate == XML_PARSER_EOF) 9448: break; 9449: if (ctxt->input->base != base) goto base_changed; 9450: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9451: break; 9452: if (!IS_BLANK_CH(RAW)) { 9453: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9454: "attributes construct error\n"); 9455: break; 9456: } 9457: SKIP_BLANKS; 9458: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9459: (attname == NULL) && (attvalue == NULL)) { 9460: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9461: "xmlParseStartTag: problem parsing attributes\n"); 9462: break; 9463: } 9464: GROW; 9465: if (ctxt->input->base != base) goto base_changed; 9466: } 9467: 9468: /* 9469: * The attributes defaulting 9470: */ 9471: if (ctxt->attsDefault != NULL) { 9472: xmlDefAttrsPtr defaults; 9473: 9474: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9475: if (defaults != NULL) { 9476: for (i = 0;i < defaults->nbAttrs;i++) { 9477: attname = defaults->values[5 * i]; 9478: aprefix = defaults->values[5 * i + 1]; 9479: 9480: /* 9481: * special work for namespaces defaulted defs 9482: */ 9483: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9484: /* 9485: * check that it's not a defined namespace 9486: */ 9487: for (j = 1;j <= nbNs;j++) 9488: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9489: break; 9490: if (j <= nbNs) continue; 9491: 9492: nsname = xmlGetNamespace(ctxt, NULL); 9493: if (nsname != defaults->values[5 * i + 2]) { 9494: if (nsPush(ctxt, NULL, 9495: defaults->values[5 * i + 2]) > 0) 9496: nbNs++; 9497: } 9498: } else if (aprefix == ctxt->str_xmlns) { 9499: /* 9500: * check that it's not a defined namespace 9501: */ 9502: for (j = 1;j <= nbNs;j++) 9503: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9504: break; 9505: if (j <= nbNs) continue; 9506: 9507: nsname = xmlGetNamespace(ctxt, attname); 9508: if (nsname != defaults->values[2]) { 9509: if (nsPush(ctxt, attname, 9510: defaults->values[5 * i + 2]) > 0) 9511: nbNs++; 9512: } 9513: } else { 9514: /* 9515: * check that it's not a defined attribute 9516: */ 9517: for (j = 0;j < nbatts;j+=5) { 9518: if ((attname == atts[j]) && (aprefix == atts[j+1])) 9519: break; 9520: } 9521: if (j < nbatts) continue; 9522: 9523: if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9524: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9525: return(NULL); 9526: } 9527: maxatts = ctxt->maxatts; 9528: atts = ctxt->atts; 9529: } 9530: atts[nbatts++] = attname; 9531: atts[nbatts++] = aprefix; 9532: if (aprefix == NULL) 9533: atts[nbatts++] = NULL; 9534: else 9535: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9536: atts[nbatts++] = defaults->values[5 * i + 2]; 9537: atts[nbatts++] = defaults->values[5 * i + 3]; 9538: if ((ctxt->standalone == 1) && 9539: (defaults->values[5 * i + 4] != NULL)) { 9540: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9541: "standalone: attribute %s on %s defaulted from external subset\n", 9542: attname, localname); 9543: } 9544: nbdef++; 9545: } 9546: } 9547: } 9548: } 9549: 9550: /* 9551: * The attributes checkings 9552: */ 9553: for (i = 0; i < nbatts;i += 5) { 9554: /* 9555: * The default namespace does not apply to attribute names. 9556: */ 9557: if (atts[i + 1] != NULL) { 9558: nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9559: if (nsname == NULL) { 9560: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9561: "Namespace prefix %s for %s on %s is not defined\n", 9562: atts[i + 1], atts[i], localname); 9563: } 9564: atts[i + 2] = nsname; 9565: } else 9566: nsname = NULL; 9567: /* 9568: * [ WFC: Unique Att Spec ] 9569: * No attribute name may appear more than once in the same 9570: * start-tag or empty-element tag. 9571: * As extended by the Namespace in XML REC. 9572: */ 9573: for (j = 0; j < i;j += 5) { 9574: if (atts[i] == atts[j]) { 9575: if (atts[i+1] == atts[j+1]) { 9576: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9577: break; 9578: } 9579: if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9580: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9581: "Namespaced Attribute %s in '%s' redefined\n", 9582: atts[i], nsname, NULL); 9583: break; 9584: } 9585: } 9586: } 9587: } 9588: 9589: nsname = xmlGetNamespace(ctxt, prefix); 9590: if ((prefix != NULL) && (nsname == NULL)) { 9591: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9592: "Namespace prefix %s on %s is not defined\n", 9593: prefix, localname, NULL); 9594: } 9595: *pref = prefix; 9596: *URI = nsname; 9597: 9598: /* 9599: * SAX: Start of Element ! 9600: */ 9601: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9602: (!ctxt->disableSAX)) { 9603: if (nbNs > 0) 9604: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9605: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9606: nbatts / 5, nbdef, atts); 9607: else 9608: ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9609: nsname, 0, NULL, nbatts / 5, nbdef, atts); 9610: } 9611: 9612: /* 9613: * Free up attribute allocated strings if needed 9614: */ 9615: if (attval != 0) { 9616: for (i = 3,j = 0; j < nratts;i += 5,j++) 9617: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9618: xmlFree((xmlChar *) atts[i]); 9619: } 9620: 9621: return(localname); 9622: 9623: base_changed: 9624: /* 9625: * the attribute strings are valid iif the base didn't changed 9626: */ 9627: if (attval != 0) { 9628: for (i = 3,j = 0; j < nratts;i += 5,j++) 9629: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9630: xmlFree((xmlChar *) atts[i]); 9631: } 9632: ctxt->input->cur = ctxt->input->base + cur; 9633: ctxt->input->line = oldline; 9634: ctxt->input->col = oldcol; 9635: if (ctxt->wellFormed == 1) { 9636: goto reparse; 9637: } 9638: return(NULL); 9639: } 9640: 9641: /** 9642: * xmlParseEndTag2: 9643: * @ctxt: an XML parser context 9644: * @line: line of the start tag 9645: * @nsNr: number of namespaces on the start tag 9646: * 9647: * parse an end of tag 9648: * 9649: * [42] ETag ::= '</' Name S? '>' 9650: * 9651: * With namespace 9652: * 9653: * [NS 9] ETag ::= '</' QName S? '>' 9654: */ 9655: 9656: static void 9657: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9658: const xmlChar *URI, int line, int nsNr, int tlen) { 9659: const xmlChar *name; 9660: 9661: GROW; 9662: if ((RAW != '<') || (NXT(1) != '/')) { 9663: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9664: return; 9665: } 9666: SKIP(2); 9667: 9668: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9669: if (ctxt->input->cur[tlen] == '>') { 9670: ctxt->input->cur += tlen + 1; 9671: goto done; 9672: } 9673: ctxt->input->cur += tlen; 9674: name = (xmlChar*)1; 9675: } else { 9676: if (prefix == NULL) 9677: name = xmlParseNameAndCompare(ctxt, ctxt->name); 9678: else 9679: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9680: } 9681: 9682: /* 9683: * We should definitely be at the ending "S? '>'" part 9684: */ 9685: GROW; 9686: if (ctxt->instate == XML_PARSER_EOF) 9687: return; 9688: SKIP_BLANKS; 9689: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9690: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9691: } else 9692: NEXT1; 9693: 9694: /* 9695: * [ WFC: Element Type Match ] 9696: * The Name in an element's end-tag must match the element type in the 9697: * start-tag. 9698: * 9699: */ 9700: if (name != (xmlChar*)1) { 9701: if (name == NULL) name = BAD_CAST "unparseable"; 9702: if ((line == 0) && (ctxt->node != NULL)) 9703: line = ctxt->node->line; 9704: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9705: "Opening and ending tag mismatch: %s line %d and %s\n", 9706: ctxt->name, line, name); 9707: } 9708: 9709: /* 9710: * SAX: End of Tag 9711: */ 9712: done: 9713: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9714: (!ctxt->disableSAX)) 9715: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9716: 9717: spacePop(ctxt); 9718: if (nsNr != 0) 9719: nsPop(ctxt, nsNr); 9720: return; 9721: } 9722: 9723: /** 9724: * xmlParseCDSect: 9725: * @ctxt: an XML parser context 9726: * 9727: * Parse escaped pure raw content. 9728: * 9729: * [18] CDSect ::= CDStart CData CDEnd 9730: * 9731: * [19] CDStart ::= '<![CDATA[' 9732: * 9733: * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9734: * 9735: * [21] CDEnd ::= ']]>' 9736: */ 9737: void 9738: xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9739: xmlChar *buf = NULL; 9740: int len = 0; 9741: int size = XML_PARSER_BUFFER_SIZE; 9742: int r, rl; 9743: int s, sl; 9744: int cur, l; 9745: int count = 0; 9746: 9747: /* Check 2.6.0 was NXT(0) not RAW */ 9748: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9749: SKIP(9); 9750: } else 9751: return; 9752: 9753: ctxt->instate = XML_PARSER_CDATA_SECTION; 9754: r = CUR_CHAR(rl); 9755: if (!IS_CHAR(r)) { 9756: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9757: ctxt->instate = XML_PARSER_CONTENT; 9758: return; 9759: } 9760: NEXTL(rl); 9761: s = CUR_CHAR(sl); 9762: if (!IS_CHAR(s)) { 9763: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9764: ctxt->instate = XML_PARSER_CONTENT; 9765: return; 9766: } 9767: NEXTL(sl); 9768: cur = CUR_CHAR(l); 9769: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9770: if (buf == NULL) { 9771: xmlErrMemory(ctxt, NULL); 9772: return; 9773: } 9774: while (IS_CHAR(cur) && 9775: ((r != ']') || (s != ']') || (cur != '>'))) { 9776: if (len + 5 >= size) { 9777: xmlChar *tmp; 9778: 9779: if ((size > XML_MAX_TEXT_LENGTH) && 9780: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9781: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9782: "CData section too big found", NULL); 9783: xmlFree (buf); 9784: return; 9785: } 9786: tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9787: if (tmp == NULL) { 9788: xmlFree(buf); 9789: xmlErrMemory(ctxt, NULL); 9790: return; 9791: } 9792: buf = tmp; 9793: size *= 2; 9794: } 9795: COPY_BUF(rl,buf,len,r); 9796: r = s; 9797: rl = sl; 9798: s = cur; 9799: sl = l; 9800: count++; 9801: if (count > 50) { 9802: GROW; 9803: if (ctxt->instate == XML_PARSER_EOF) { 9804: xmlFree(buf); 9805: return; 9806: } 9807: count = 0; 9808: } 9809: NEXTL(l); 9810: cur = CUR_CHAR(l); 9811: } 9812: buf[len] = 0; 9813: ctxt->instate = XML_PARSER_CONTENT; 9814: if (cur != '>') { 9815: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9816: "CData section not finished\n%.50s\n", buf); 9817: xmlFree(buf); 9818: return; 9819: } 9820: NEXTL(l); 9821: 9822: /* 9823: * OK the buffer is to be consumed as cdata. 9824: */ 9825: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9826: if (ctxt->sax->cdataBlock != NULL) 9827: ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9828: else if (ctxt->sax->characters != NULL) 9829: ctxt->sax->characters(ctxt->userData, buf, len); 9830: } 9831: xmlFree(buf); 9832: } 9833: 9834: /** 9835: * xmlParseContent: 9836: * @ctxt: an XML parser context 9837: * 9838: * Parse a content: 9839: * 9840: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9841: */ 9842: 9843: void 9844: xmlParseContent(xmlParserCtxtPtr ctxt) { 9845: GROW; 9846: while ((RAW != 0) && 9847: ((RAW != '<') || (NXT(1) != '/')) && 9848: (ctxt->instate != XML_PARSER_EOF)) { 9849: const xmlChar *test = CUR_PTR; 9850: unsigned int cons = ctxt->input->consumed; 9851: const xmlChar *cur = ctxt->input->cur; 9852: 9853: /* 9854: * First case : a Processing Instruction. 9855: */ 9856: if ((*cur == '<') && (cur[1] == '?')) { 9857: xmlParsePI(ctxt); 9858: } 9859: 9860: /* 9861: * Second case : a CDSection 9862: */ 9863: /* 2.6.0 test was *cur not RAW */ 9864: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9865: xmlParseCDSect(ctxt); 9866: } 9867: 9868: /* 9869: * Third case : a comment 9870: */ 9871: else if ((*cur == '<') && (NXT(1) == '!') && 9872: (NXT(2) == '-') && (NXT(3) == '-')) { 9873: xmlParseComment(ctxt); 9874: ctxt->instate = XML_PARSER_CONTENT; 9875: } 9876: 9877: /* 9878: * Fourth case : a sub-element. 9879: */ 9880: else if (*cur == '<') { 9881: xmlParseElement(ctxt); 9882: } 9883: 9884: /* 9885: * Fifth case : a reference. If if has not been resolved, 9886: * parsing returns it's Name, create the node 9887: */ 9888: 9889: else if (*cur == '&') { 9890: xmlParseReference(ctxt); 9891: } 9892: 9893: /* 9894: * Last case, text. Note that References are handled directly. 9895: */ 9896: else { 9897: xmlParseCharData(ctxt, 0); 9898: } 9899: 9900: GROW; 9901: /* 9902: * Pop-up of finished entities. 9903: */ 9904: while ((RAW == 0) && (ctxt->inputNr > 1)) 9905: xmlPopInput(ctxt); 9906: SHRINK; 9907: 9908: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9909: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9910: "detected an error in element content\n"); 9911: ctxt->instate = XML_PARSER_EOF; 9912: break; 9913: } 9914: } 9915: } 9916: 9917: /** 9918: * xmlParseElement: 9919: * @ctxt: an XML parser context 9920: * 9921: * parse an XML element, this is highly recursive 9922: * 9923: * [39] element ::= EmptyElemTag | STag content ETag 9924: * 9925: * [ WFC: Element Type Match ] 9926: * The Name in an element's end-tag must match the element type in the 9927: * start-tag. 9928: * 9929: */ 9930: 9931: void 9932: xmlParseElement(xmlParserCtxtPtr ctxt) { 9933: const xmlChar *name; 9934: const xmlChar *prefix = NULL; 9935: const xmlChar *URI = NULL; 9936: xmlParserNodeInfo node_info; 9937: int line, tlen = 0; 9938: xmlNodePtr ret; 9939: int nsNr = ctxt->nsNr; 9940: 9941: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9942: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9943: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9944: "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9945: xmlParserMaxDepth); 9946: ctxt->instate = XML_PARSER_EOF; 9947: return; 9948: } 9949: 9950: /* Capture start position */ 9951: if (ctxt->record_info) { 9952: node_info.begin_pos = ctxt->input->consumed + 9953: (CUR_PTR - ctxt->input->base); 9954: node_info.begin_line = ctxt->input->line; 9955: } 9956: 9957: if (ctxt->spaceNr == 0) 9958: spacePush(ctxt, -1); 9959: else if (*ctxt->space == -2) 9960: spacePush(ctxt, -1); 9961: else 9962: spacePush(ctxt, *ctxt->space); 9963: 9964: line = ctxt->input->line; 9965: #ifdef LIBXML_SAX1_ENABLED 9966: if (ctxt->sax2) 9967: #endif /* LIBXML_SAX1_ENABLED */ 9968: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9969: #ifdef LIBXML_SAX1_ENABLED 9970: else 9971: name = xmlParseStartTag(ctxt); 9972: #endif /* LIBXML_SAX1_ENABLED */ 9973: if (ctxt->instate == XML_PARSER_EOF) 9974: return; 9975: if (name == NULL) { 9976: spacePop(ctxt); 9977: return; 9978: } 9979: namePush(ctxt, name); 9980: ret = ctxt->node; 9981: 9982: #ifdef LIBXML_VALID_ENABLED 9983: /* 9984: * [ VC: Root Element Type ] 9985: * The Name in the document type declaration must match the element 9986: * type of the root element. 9987: */ 9988: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9989: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9990: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9991: #endif /* LIBXML_VALID_ENABLED */ 9992: 9993: /* 9994: * Check for an Empty Element. 9995: */ 9996: if ((RAW == '/') && (NXT(1) == '>')) { 9997: SKIP(2); 9998: if (ctxt->sax2) { 9999: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10000: (!ctxt->disableSAX)) 10001: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10002: #ifdef LIBXML_SAX1_ENABLED 10003: } else { 10004: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10005: (!ctxt->disableSAX)) 10006: ctxt->sax->endElement(ctxt->userData, name); 10007: #endif /* LIBXML_SAX1_ENABLED */ 10008: } 10009: namePop(ctxt); 10010: spacePop(ctxt); 10011: if (nsNr != ctxt->nsNr) 10012: nsPop(ctxt, ctxt->nsNr - nsNr); 10013: if ( ret != NULL && ctxt->record_info ) { 10014: node_info.end_pos = ctxt->input->consumed + 10015: (CUR_PTR - ctxt->input->base); 10016: node_info.end_line = ctxt->input->line; 10017: node_info.node = ret; 10018: xmlParserAddNodeInfo(ctxt, &node_info); 10019: } 10020: return; 10021: } 10022: if (RAW == '>') { 10023: NEXT1; 10024: } else { 10025: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10026: "Couldn't find end of Start Tag %s line %d\n", 10027: name, line, NULL); 10028: 10029: /* 10030: * end of parsing of this node. 10031: */ 10032: nodePop(ctxt); 10033: namePop(ctxt); 10034: spacePop(ctxt); 10035: if (nsNr != ctxt->nsNr) 10036: nsPop(ctxt, ctxt->nsNr - nsNr); 10037: 10038: /* 10039: * Capture end position and add node 10040: */ 10041: if ( ret != NULL && ctxt->record_info ) { 10042: node_info.end_pos = ctxt->input->consumed + 10043: (CUR_PTR - ctxt->input->base); 10044: node_info.end_line = ctxt->input->line; 10045: node_info.node = ret; 10046: xmlParserAddNodeInfo(ctxt, &node_info); 10047: } 10048: return; 10049: } 10050: 10051: /* 10052: * Parse the content of the element: 10053: */ 10054: xmlParseContent(ctxt); 10055: if (ctxt->instate == XML_PARSER_EOF) 10056: return; 10057: if (!IS_BYTE_CHAR(RAW)) { 10058: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10059: "Premature end of data in tag %s line %d\n", 10060: name, line, NULL); 10061: 10062: /* 10063: * end of parsing of this node. 10064: */ 10065: nodePop(ctxt); 10066: namePop(ctxt); 10067: spacePop(ctxt); 10068: if (nsNr != ctxt->nsNr) 10069: nsPop(ctxt, ctxt->nsNr - nsNr); 10070: return; 10071: } 10072: 10073: /* 10074: * parse the end of tag: '</' should be here. 10075: */ 10076: if (ctxt->sax2) { 10077: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10078: namePop(ctxt); 10079: } 10080: #ifdef LIBXML_SAX1_ENABLED 10081: else 10082: xmlParseEndTag1(ctxt, line); 10083: #endif /* LIBXML_SAX1_ENABLED */ 10084: 10085: /* 10086: * Capture end position and add node 10087: */ 10088: if ( ret != NULL && ctxt->record_info ) { 10089: node_info.end_pos = ctxt->input->consumed + 10090: (CUR_PTR - ctxt->input->base); 10091: node_info.end_line = ctxt->input->line; 10092: node_info.node = ret; 10093: xmlParserAddNodeInfo(ctxt, &node_info); 10094: } 10095: } 10096: 10097: /** 10098: * xmlParseVersionNum: 10099: * @ctxt: an XML parser context 10100: * 10101: * parse the XML version value. 10102: * 10103: * [26] VersionNum ::= '1.' [0-9]+ 10104: * 10105: * In practice allow [0-9].[0-9]+ at that level 10106: * 10107: * Returns the string giving the XML version number, or NULL 10108: */ 10109: xmlChar * 10110: xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10111: xmlChar *buf = NULL; 10112: int len = 0; 10113: int size = 10; 10114: xmlChar cur; 10115: 10116: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10117: if (buf == NULL) { 10118: xmlErrMemory(ctxt, NULL); 10119: return(NULL); 10120: } 10121: cur = CUR; 10122: if (!((cur >= '0') && (cur <= '9'))) { 10123: xmlFree(buf); 10124: return(NULL); 10125: } 10126: buf[len++] = cur; 10127: NEXT; 10128: cur=CUR; 10129: if (cur != '.') { 10130: xmlFree(buf); 10131: return(NULL); 10132: } 10133: buf[len++] = cur; 10134: NEXT; 10135: cur=CUR; 10136: while ((cur >= '0') && (cur <= '9')) { 10137: if (len + 1 >= size) { 10138: xmlChar *tmp; 10139: 10140: size *= 2; 10141: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10142: if (tmp == NULL) { 10143: xmlFree(buf); 10144: xmlErrMemory(ctxt, NULL); 10145: return(NULL); 10146: } 10147: buf = tmp; 10148: } 10149: buf[len++] = cur; 10150: NEXT; 10151: cur=CUR; 10152: } 10153: buf[len] = 0; 10154: return(buf); 10155: } 10156: 10157: /** 10158: * xmlParseVersionInfo: 10159: * @ctxt: an XML parser context 10160: * 10161: * parse the XML version. 10162: * 10163: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10164: * 10165: * [25] Eq ::= S? '=' S? 10166: * 10167: * Returns the version string, e.g. "1.0" 10168: */ 10169: 10170: xmlChar * 10171: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10172: xmlChar *version = NULL; 10173: 10174: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10175: SKIP(7); 10176: SKIP_BLANKS; 10177: if (RAW != '=') { 10178: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10179: return(NULL); 10180: } 10181: NEXT; 10182: SKIP_BLANKS; 10183: if (RAW == '"') { 10184: NEXT; 10185: version = xmlParseVersionNum(ctxt); 10186: if (RAW != '"') { 10187: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10188: } else 10189: NEXT; 10190: } else if (RAW == '\''){ 10191: NEXT; 10192: version = xmlParseVersionNum(ctxt); 10193: if (RAW != '\'') { 10194: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10195: } else 10196: NEXT; 10197: } else { 10198: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10199: } 10200: } 10201: return(version); 10202: } 10203: 10204: /** 10205: * xmlParseEncName: 10206: * @ctxt: an XML parser context 10207: * 10208: * parse the XML encoding name 10209: * 10210: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10211: * 10212: * Returns the encoding name value or NULL 10213: */ 10214: xmlChar * 10215: xmlParseEncName(xmlParserCtxtPtr ctxt) { 10216: xmlChar *buf = NULL; 10217: int len = 0; 10218: int size = 10; 10219: xmlChar cur; 10220: 10221: cur = CUR; 10222: if (((cur >= 'a') && (cur <= 'z')) || 10223: ((cur >= 'A') && (cur <= 'Z'))) { 10224: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10225: if (buf == NULL) { 10226: xmlErrMemory(ctxt, NULL); 10227: return(NULL); 10228: } 10229: 10230: buf[len++] = cur; 10231: NEXT; 10232: cur = CUR; 10233: while (((cur >= 'a') && (cur <= 'z')) || 10234: ((cur >= 'A') && (cur <= 'Z')) || 10235: ((cur >= '0') && (cur <= '9')) || 10236: (cur == '.') || (cur == '_') || 10237: (cur == '-')) { 10238: if (len + 1 >= size) { 10239: xmlChar *tmp; 10240: 10241: size *= 2; 10242: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10243: if (tmp == NULL) { 10244: xmlErrMemory(ctxt, NULL); 10245: xmlFree(buf); 10246: return(NULL); 10247: } 10248: buf = tmp; 10249: } 10250: buf[len++] = cur; 10251: NEXT; 10252: cur = CUR; 10253: if (cur == 0) { 10254: SHRINK; 10255: GROW; 10256: cur = CUR; 10257: } 10258: } 10259: buf[len] = 0; 10260: } else { 10261: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10262: } 10263: return(buf); 10264: } 10265: 10266: /** 10267: * xmlParseEncodingDecl: 10268: * @ctxt: an XML parser context 10269: * 10270: * parse the XML encoding declaration 10271: * 10272: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10273: * 10274: * this setups the conversion filters. 10275: * 10276: * Returns the encoding value or NULL 10277: */ 10278: 10279: const xmlChar * 10280: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10281: xmlChar *encoding = NULL; 10282: 10283: SKIP_BLANKS; 10284: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10285: SKIP(8); 10286: SKIP_BLANKS; 10287: if (RAW != '=') { 10288: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10289: return(NULL); 10290: } 10291: NEXT; 10292: SKIP_BLANKS; 10293: if (RAW == '"') { 10294: NEXT; 10295: encoding = xmlParseEncName(ctxt); 10296: if (RAW != '"') { 10297: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10298: } else 10299: NEXT; 10300: } else if (RAW == '\''){ 10301: NEXT; 10302: encoding = xmlParseEncName(ctxt); 10303: if (RAW != '\'') { 10304: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10305: } else 10306: NEXT; 10307: } else { 10308: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10309: } 10310: 10311: /* 10312: * Non standard parsing, allowing the user to ignore encoding 10313: */ 10314: if (ctxt->options & XML_PARSE_IGNORE_ENC) 10315: return(encoding); 10316: 10317: /* 10318: * UTF-16 encoding stwich has already taken place at this stage, 10319: * more over the little-endian/big-endian selection is already done 10320: */ 10321: if ((encoding != NULL) && 10322: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10323: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10324: /* 10325: * If no encoding was passed to the parser, that we are 10326: * using UTF-16 and no decoder is present i.e. the 10327: * document is apparently UTF-8 compatible, then raise an 10328: * encoding mismatch fatal error 10329: */ 10330: if ((ctxt->encoding == NULL) && 10331: (ctxt->input->buf != NULL) && 10332: (ctxt->input->buf->encoder == NULL)) { 10333: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10334: "Document labelled UTF-16 but has UTF-8 content\n"); 10335: } 10336: if (ctxt->encoding != NULL) 10337: xmlFree((xmlChar *) ctxt->encoding); 10338: ctxt->encoding = encoding; 10339: } 10340: /* 10341: * UTF-8 encoding is handled natively 10342: */ 10343: else if ((encoding != NULL) && 10344: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10345: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10346: if (ctxt->encoding != NULL) 10347: xmlFree((xmlChar *) ctxt->encoding); 10348: ctxt->encoding = encoding; 10349: } 10350: else if (encoding != NULL) { 10351: xmlCharEncodingHandlerPtr handler; 10352: 10353: if (ctxt->input->encoding != NULL) 10354: xmlFree((xmlChar *) ctxt->input->encoding); 10355: ctxt->input->encoding = encoding; 10356: 10357: handler = xmlFindCharEncodingHandler((const char *) encoding); 10358: if (handler != NULL) { 10359: xmlSwitchToEncoding(ctxt, handler); 10360: } else { 10361: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10362: "Unsupported encoding %s\n", encoding); 10363: return(NULL); 10364: } 10365: } 10366: } 10367: return(encoding); 10368: } 10369: 10370: /** 10371: * xmlParseSDDecl: 10372: * @ctxt: an XML parser context 10373: * 10374: * parse the XML standalone declaration 10375: * 10376: * [32] SDDecl ::= S 'standalone' Eq 10377: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10378: * 10379: * [ VC: Standalone Document Declaration ] 10380: * TODO The standalone document declaration must have the value "no" 10381: * if any external markup declarations contain declarations of: 10382: * - attributes with default values, if elements to which these 10383: * attributes apply appear in the document without specifications 10384: * of values for these attributes, or 10385: * - entities (other than amp, lt, gt, apos, quot), if references 10386: * to those entities appear in the document, or 10387: * - attributes with values subject to normalization, where the 10388: * attribute appears in the document with a value which will change 10389: * as a result of normalization, or 10390: * - element types with element content, if white space occurs directly 10391: * within any instance of those types. 10392: * 10393: * Returns: 10394: * 1 if standalone="yes" 10395: * 0 if standalone="no" 10396: * -2 if standalone attribute is missing or invalid 10397: * (A standalone value of -2 means that the XML declaration was found, 10398: * but no value was specified for the standalone attribute). 10399: */ 10400: 10401: int 10402: xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10403: int standalone = -2; 10404: 10405: SKIP_BLANKS; 10406: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10407: SKIP(10); 10408: SKIP_BLANKS; 10409: if (RAW != '=') { 10410: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10411: return(standalone); 10412: } 10413: NEXT; 10414: SKIP_BLANKS; 10415: if (RAW == '\''){ 10416: NEXT; 10417: if ((RAW == 'n') && (NXT(1) == 'o')) { 10418: standalone = 0; 10419: SKIP(2); 10420: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10421: (NXT(2) == 's')) { 10422: standalone = 1; 10423: SKIP(3); 10424: } else { 10425: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10426: } 10427: if (RAW != '\'') { 10428: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10429: } else 10430: NEXT; 10431: } else if (RAW == '"'){ 10432: NEXT; 10433: if ((RAW == 'n') && (NXT(1) == 'o')) { 10434: standalone = 0; 10435: SKIP(2); 10436: } else if ((RAW == 'y') && (NXT(1) == 'e') && 10437: (NXT(2) == 's')) { 10438: standalone = 1; 10439: SKIP(3); 10440: } else { 10441: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10442: } 10443: if (RAW != '"') { 10444: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10445: } else 10446: NEXT; 10447: } else { 10448: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10449: } 10450: } 10451: return(standalone); 10452: } 10453: 10454: /** 10455: * xmlParseXMLDecl: 10456: * @ctxt: an XML parser context 10457: * 10458: * parse an XML declaration header 10459: * 10460: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10461: */ 10462: 10463: void 10464: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10465: xmlChar *version; 10466: 10467: /* 10468: * This value for standalone indicates that the document has an 10469: * XML declaration but it does not have a standalone attribute. 10470: * It will be overwritten later if a standalone attribute is found. 10471: */ 10472: ctxt->input->standalone = -2; 10473: 10474: /* 10475: * We know that '<?xml' is here. 10476: */ 10477: SKIP(5); 10478: 10479: if (!IS_BLANK_CH(RAW)) { 10480: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10481: "Blank needed after '<?xml'\n"); 10482: } 10483: SKIP_BLANKS; 10484: 10485: /* 10486: * We must have the VersionInfo here. 10487: */ 10488: version = xmlParseVersionInfo(ctxt); 10489: if (version == NULL) { 10490: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10491: } else { 10492: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10493: /* 10494: * Changed here for XML-1.0 5th edition 10495: */ 10496: if (ctxt->options & XML_PARSE_OLD10) { 10497: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10498: "Unsupported version '%s'\n", 10499: version); 10500: } else { 10501: if ((version[0] == '1') && ((version[1] == '.'))) { 10502: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10503: "Unsupported version '%s'\n", 10504: version, NULL); 10505: } else { 10506: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10507: "Unsupported version '%s'\n", 10508: version); 10509: } 10510: } 10511: } 10512: if (ctxt->version != NULL) 10513: xmlFree((void *) ctxt->version); 10514: ctxt->version = version; 10515: } 10516: 10517: /* 10518: * We may have the encoding declaration 10519: */ 10520: if (!IS_BLANK_CH(RAW)) { 10521: if ((RAW == '?') && (NXT(1) == '>')) { 10522: SKIP(2); 10523: return; 10524: } 10525: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10526: } 10527: xmlParseEncodingDecl(ctxt); 10528: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10529: /* 10530: * The XML REC instructs us to stop parsing right here 10531: */ 10532: return; 10533: } 10534: 10535: /* 10536: * We may have the standalone status. 10537: */ 10538: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10539: if ((RAW == '?') && (NXT(1) == '>')) { 10540: SKIP(2); 10541: return; 10542: } 10543: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10544: } 10545: 10546: /* 10547: * We can grow the input buffer freely at that point 10548: */ 10549: GROW; 10550: 10551: SKIP_BLANKS; 10552: ctxt->input->standalone = xmlParseSDDecl(ctxt); 10553: 10554: SKIP_BLANKS; 10555: if ((RAW == '?') && (NXT(1) == '>')) { 10556: SKIP(2); 10557: } else if (RAW == '>') { 10558: /* Deprecated old WD ... */ 10559: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10560: NEXT; 10561: } else { 10562: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10563: MOVETO_ENDTAG(CUR_PTR); 10564: NEXT; 10565: } 10566: } 10567: 10568: /** 10569: * xmlParseMisc: 10570: * @ctxt: an XML parser context 10571: * 10572: * parse an XML Misc* optional field. 10573: * 10574: * [27] Misc ::= Comment | PI | S 10575: */ 10576: 10577: void 10578: xmlParseMisc(xmlParserCtxtPtr ctxt) { 10579: while ((ctxt->instate != XML_PARSER_EOF) && 10580: (((RAW == '<') && (NXT(1) == '?')) || 10581: (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10582: IS_BLANK_CH(CUR))) { 10583: if ((RAW == '<') && (NXT(1) == '?')) { 10584: xmlParsePI(ctxt); 10585: } else if (IS_BLANK_CH(CUR)) { 10586: NEXT; 10587: } else 10588: xmlParseComment(ctxt); 10589: } 10590: } 10591: 10592: /** 10593: * xmlParseDocument: 10594: * @ctxt: an XML parser context 10595: * 10596: * parse an XML document (and build a tree if using the standard SAX 10597: * interface). 10598: * 10599: * [1] document ::= prolog element Misc* 10600: * 10601: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10602: * 10603: * Returns 0, -1 in case of error. the parser context is augmented 10604: * as a result of the parsing. 10605: */ 10606: 10607: int 10608: xmlParseDocument(xmlParserCtxtPtr ctxt) { 10609: xmlChar start[4]; 10610: xmlCharEncoding enc; 10611: 10612: xmlInitParser(); 10613: 10614: if ((ctxt == NULL) || (ctxt->input == NULL)) 10615: return(-1); 10616: 10617: GROW; 10618: 10619: /* 10620: * SAX: detecting the level. 10621: */ 10622: xmlDetectSAX2(ctxt); 10623: 10624: /* 10625: * SAX: beginning of the document processing. 10626: */ 10627: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10628: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10629: if (ctxt->instate == XML_PARSER_EOF) 10630: return(-1); 10631: 10632: if ((ctxt->encoding == NULL) && 10633: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10634: /* 10635: * Get the 4 first bytes and decode the charset 10636: * if enc != XML_CHAR_ENCODING_NONE 10637: * plug some encoding conversion routines. 10638: */ 10639: start[0] = RAW; 10640: start[1] = NXT(1); 10641: start[2] = NXT(2); 10642: start[3] = NXT(3); 10643: enc = xmlDetectCharEncoding(&start[0], 4); 10644: if (enc != XML_CHAR_ENCODING_NONE) { 10645: xmlSwitchEncoding(ctxt, enc); 10646: } 10647: } 10648: 10649: 10650: if (CUR == 0) { 10651: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10652: } 10653: 10654: /* 10655: * Check for the XMLDecl in the Prolog. 10656: * do not GROW here to avoid the detected encoder to decode more 10657: * than just the first line, unless the amount of data is really 10658: * too small to hold "<?xml version="1.0" encoding="foo" 10659: */ 10660: if ((ctxt->input->end - ctxt->input->cur) < 35) { 10661: GROW; 10662: } 10663: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10664: 10665: /* 10666: * Note that we will switch encoding on the fly. 10667: */ 10668: xmlParseXMLDecl(ctxt); 10669: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10670: /* 10671: * The XML REC instructs us to stop parsing right here 10672: */ 10673: return(-1); 10674: } 10675: ctxt->standalone = ctxt->input->standalone; 10676: SKIP_BLANKS; 10677: } else { 10678: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10679: } 10680: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10681: ctxt->sax->startDocument(ctxt->userData); 10682: if (ctxt->instate == XML_PARSER_EOF) 10683: return(-1); 10684: 10685: /* 10686: * The Misc part of the Prolog 10687: */ 10688: GROW; 10689: xmlParseMisc(ctxt); 10690: 10691: /* 10692: * Then possibly doc type declaration(s) and more Misc 10693: * (doctypedecl Misc*)? 10694: */ 10695: GROW; 10696: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10697: 10698: ctxt->inSubset = 1; 10699: xmlParseDocTypeDecl(ctxt); 10700: if (RAW == '[') { 10701: ctxt->instate = XML_PARSER_DTD; 10702: xmlParseInternalSubset(ctxt); 10703: if (ctxt->instate == XML_PARSER_EOF) 10704: return(-1); 10705: } 10706: 10707: /* 10708: * Create and update the external subset. 10709: */ 10710: ctxt->inSubset = 2; 10711: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10712: (!ctxt->disableSAX)) 10713: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10714: ctxt->extSubSystem, ctxt->extSubURI); 10715: if (ctxt->instate == XML_PARSER_EOF) 10716: return(-1); 10717: ctxt->inSubset = 0; 10718: 10719: xmlCleanSpecialAttr(ctxt); 10720: 10721: ctxt->instate = XML_PARSER_PROLOG; 10722: xmlParseMisc(ctxt); 10723: } 10724: 10725: /* 10726: * Time to start parsing the tree itself 10727: */ 10728: GROW; 10729: if (RAW != '<') { 10730: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10731: "Start tag expected, '<' not found\n"); 10732: } else { 10733: ctxt->instate = XML_PARSER_CONTENT; 10734: xmlParseElement(ctxt); 10735: ctxt->instate = XML_PARSER_EPILOG; 10736: 10737: 10738: /* 10739: * The Misc part at the end 10740: */ 10741: xmlParseMisc(ctxt); 10742: 10743: if (RAW != 0) { 10744: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10745: } 10746: ctxt->instate = XML_PARSER_EOF; 10747: } 10748: 10749: /* 10750: * SAX: end of the document processing. 10751: */ 10752: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10753: ctxt->sax->endDocument(ctxt->userData); 10754: 10755: /* 10756: * Remove locally kept entity definitions if the tree was not built 10757: */ 10758: if ((ctxt->myDoc != NULL) && 10759: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10760: xmlFreeDoc(ctxt->myDoc); 10761: ctxt->myDoc = NULL; 10762: } 10763: 10764: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10765: ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10766: if (ctxt->valid) 10767: ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10768: if (ctxt->nsWellFormed) 10769: ctxt->myDoc->properties |= XML_DOC_NSVALID; 10770: if (ctxt->options & XML_PARSE_OLD10) 10771: ctxt->myDoc->properties |= XML_DOC_OLD10; 10772: } 10773: if (! ctxt->wellFormed) { 10774: ctxt->valid = 0; 10775: return(-1); 10776: } 10777: return(0); 10778: } 10779: 10780: /** 10781: * xmlParseExtParsedEnt: 10782: * @ctxt: an XML parser context 10783: * 10784: * parse a general parsed entity 10785: * An external general parsed entity is well-formed if it matches the 10786: * production labeled extParsedEnt. 10787: * 10788: * [78] extParsedEnt ::= TextDecl? content 10789: * 10790: * Returns 0, -1 in case of error. the parser context is augmented 10791: * as a result of the parsing. 10792: */ 10793: 10794: int 10795: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10796: xmlChar start[4]; 10797: xmlCharEncoding enc; 10798: 10799: if ((ctxt == NULL) || (ctxt->input == NULL)) 10800: return(-1); 10801: 10802: xmlDefaultSAXHandlerInit(); 10803: 10804: xmlDetectSAX2(ctxt); 10805: 10806: GROW; 10807: 10808: /* 10809: * SAX: beginning of the document processing. 10810: */ 10811: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10812: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10813: 10814: /* 10815: * Get the 4 first bytes and decode the charset 10816: * if enc != XML_CHAR_ENCODING_NONE 10817: * plug some encoding conversion routines. 10818: */ 10819: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10820: start[0] = RAW; 10821: start[1] = NXT(1); 10822: start[2] = NXT(2); 10823: start[3] = NXT(3); 10824: enc = xmlDetectCharEncoding(start, 4); 10825: if (enc != XML_CHAR_ENCODING_NONE) { 10826: xmlSwitchEncoding(ctxt, enc); 10827: } 10828: } 10829: 10830: 10831: if (CUR == 0) { 10832: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10833: } 10834: 10835: /* 10836: * Check for the XMLDecl in the Prolog. 10837: */ 10838: GROW; 10839: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10840: 10841: /* 10842: * Note that we will switch encoding on the fly. 10843: */ 10844: xmlParseXMLDecl(ctxt); 10845: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10846: /* 10847: * The XML REC instructs us to stop parsing right here 10848: */ 10849: return(-1); 10850: } 10851: SKIP_BLANKS; 10852: } else { 10853: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10854: } 10855: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10856: ctxt->sax->startDocument(ctxt->userData); 10857: if (ctxt->instate == XML_PARSER_EOF) 10858: return(-1); 10859: 10860: /* 10861: * Doing validity checking on chunk doesn't make sense 10862: */ 10863: ctxt->instate = XML_PARSER_CONTENT; 10864: ctxt->validate = 0; 10865: ctxt->loadsubset = 0; 10866: ctxt->depth = 0; 10867: 10868: xmlParseContent(ctxt); 10869: if (ctxt->instate == XML_PARSER_EOF) 10870: return(-1); 10871: 10872: if ((RAW == '<') && (NXT(1) == '/')) { 10873: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10874: } else if (RAW != 0) { 10875: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10876: } 10877: 10878: /* 10879: * SAX: end of the document processing. 10880: */ 10881: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10882: ctxt->sax->endDocument(ctxt->userData); 10883: 10884: if (! ctxt->wellFormed) return(-1); 10885: return(0); 10886: } 10887: 10888: #ifdef LIBXML_PUSH_ENABLED 10889: /************************************************************************ 10890: * * 10891: * Progressive parsing interfaces * 10892: * * 10893: ************************************************************************/ 10894: 10895: /** 10896: * xmlParseLookupSequence: 10897: * @ctxt: an XML parser context 10898: * @first: the first char to lookup 10899: * @next: the next char to lookup or zero 10900: * @third: the next char to lookup or zero 10901: * 10902: * Try to find if a sequence (first, next, third) or just (first next) or 10903: * (first) is available in the input stream. 10904: * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10905: * to avoid rescanning sequences of bytes, it DOES change the state of the 10906: * parser, do not use liberally. 10907: * 10908: * Returns the index to the current parsing point if the full sequence 10909: * is available, -1 otherwise. 10910: */ 10911: static int 10912: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10913: xmlChar next, xmlChar third) { 10914: int base, len; 10915: xmlParserInputPtr in; 10916: const xmlChar *buf; 10917: 10918: in = ctxt->input; 10919: if (in == NULL) return(-1); 10920: base = in->cur - in->base; 10921: if (base < 0) return(-1); 10922: if (ctxt->checkIndex > base) 10923: base = ctxt->checkIndex; 10924: if (in->buf == NULL) { 10925: buf = in->base; 10926: len = in->length; 10927: } else { 10928: buf = xmlBufContent(in->buf->buffer); 10929: len = xmlBufUse(in->buf->buffer); 10930: } 10931: /* take into account the sequence length */ 10932: if (third) len -= 2; 10933: else if (next) len --; 10934: for (;base < len;base++) { 10935: if (buf[base] == first) { 10936: if (third != 0) { 10937: if ((buf[base + 1] != next) || 10938: (buf[base + 2] != third)) continue; 10939: } else if (next != 0) { 10940: if (buf[base + 1] != next) continue; 10941: } 10942: ctxt->checkIndex = 0; 10943: #ifdef DEBUG_PUSH 10944: if (next == 0) 10945: xmlGenericError(xmlGenericErrorContext, 10946: "PP: lookup '%c' found at %d\n", 10947: first, base); 10948: else if (third == 0) 10949: xmlGenericError(xmlGenericErrorContext, 10950: "PP: lookup '%c%c' found at %d\n", 10951: first, next, base); 10952: else 10953: xmlGenericError(xmlGenericErrorContext, 10954: "PP: lookup '%c%c%c' found at %d\n", 10955: first, next, third, base); 10956: #endif 10957: return(base - (in->cur - in->base)); 10958: } 10959: } 10960: ctxt->checkIndex = base; 10961: #ifdef DEBUG_PUSH 10962: if (next == 0) 10963: xmlGenericError(xmlGenericErrorContext, 10964: "PP: lookup '%c' failed\n", first); 10965: else if (third == 0) 10966: xmlGenericError(xmlGenericErrorContext, 10967: "PP: lookup '%c%c' failed\n", first, next); 10968: else 10969: xmlGenericError(xmlGenericErrorContext, 10970: "PP: lookup '%c%c%c' failed\n", first, next, third); 10971: #endif 10972: return(-1); 10973: } 10974: 10975: /** 10976: * xmlParseGetLasts: 10977: * @ctxt: an XML parser context 10978: * @lastlt: pointer to store the last '<' from the input 10979: * @lastgt: pointer to store the last '>' from the input 10980: * 10981: * Lookup the last < and > in the current chunk 10982: */ 10983: static void 10984: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10985: const xmlChar **lastgt) { 10986: const xmlChar *tmp; 10987: 10988: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10989: xmlGenericError(xmlGenericErrorContext, 10990: "Internal error: xmlParseGetLasts\n"); 10991: return; 10992: } 10993: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10994: tmp = ctxt->input->end; 10995: tmp--; 10996: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10997: if (tmp < ctxt->input->base) { 10998: *lastlt = NULL; 10999: *lastgt = NULL; 11000: } else { 11001: *lastlt = tmp; 11002: tmp++; 11003: while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11004: if (*tmp == '\'') { 11005: tmp++; 11006: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11007: if (tmp < ctxt->input->end) tmp++; 11008: } else if (*tmp == '"') { 11009: tmp++; 11010: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11011: if (tmp < ctxt->input->end) tmp++; 11012: } else 11013: tmp++; 11014: } 11015: if (tmp < ctxt->input->end) 11016: *lastgt = tmp; 11017: else { 11018: tmp = *lastlt; 11019: tmp--; 11020: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11021: if (tmp >= ctxt->input->base) 11022: *lastgt = tmp; 11023: else 11024: *lastgt = NULL; 11025: } 11026: } 11027: } else { 11028: *lastlt = NULL; 11029: *lastgt = NULL; 11030: } 11031: } 11032: /** 11033: * xmlCheckCdataPush: 11034: * @cur: pointer to the bock of characters 11035: * @len: length of the block in bytes 11036: * 11037: * Check that the block of characters is okay as SCdata content [20] 11038: * 11039: * Returns the number of bytes to pass if okay, a negative index where an 11040: * UTF-8 error occured otherwise 11041: */ 11042: static int 11043: xmlCheckCdataPush(const xmlChar *utf, int len) { 11044: int ix; 11045: unsigned char c; 11046: int codepoint; 11047: 11048: if ((utf == NULL) || (len <= 0)) 11049: return(0); 11050: 11051: for (ix = 0; ix < len;) { /* string is 0-terminated */ 11052: c = utf[ix]; 11053: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11054: if (c >= 0x20) 11055: ix++; 11056: else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11057: ix++; 11058: else 11059: return(-ix); 11060: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11061: if (ix + 2 > len) return(ix); 11062: if ((utf[ix+1] & 0xc0 ) != 0x80) 11063: return(-ix); 11064: codepoint = (utf[ix] & 0x1f) << 6; 11065: codepoint |= utf[ix+1] & 0x3f; 11066: if (!xmlIsCharQ(codepoint)) 11067: return(-ix); 11068: ix += 2; 11069: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11070: if (ix + 3 > len) return(ix); 11071: if (((utf[ix+1] & 0xc0) != 0x80) || 11072: ((utf[ix+2] & 0xc0) != 0x80)) 11073: return(-ix); 11074: codepoint = (utf[ix] & 0xf) << 12; 11075: codepoint |= (utf[ix+1] & 0x3f) << 6; 11076: codepoint |= utf[ix+2] & 0x3f; 11077: if (!xmlIsCharQ(codepoint)) 11078: return(-ix); 11079: ix += 3; 11080: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11081: if (ix + 4 > len) return(ix); 11082: if (((utf[ix+1] & 0xc0) != 0x80) || 11083: ((utf[ix+2] & 0xc0) != 0x80) || 11084: ((utf[ix+3] & 0xc0) != 0x80)) 11085: return(-ix); 11086: codepoint = (utf[ix] & 0x7) << 18; 11087: codepoint |= (utf[ix+1] & 0x3f) << 12; 11088: codepoint |= (utf[ix+2] & 0x3f) << 6; 11089: codepoint |= utf[ix+3] & 0x3f; 11090: if (!xmlIsCharQ(codepoint)) 11091: return(-ix); 11092: ix += 4; 11093: } else /* unknown encoding */ 11094: return(-ix); 11095: } 11096: return(ix); 11097: } 11098: 11099: /** 11100: * xmlParseTryOrFinish: 11101: * @ctxt: an XML parser context 11102: * @terminate: last chunk indicator 11103: * 11104: * Try to progress on parsing 11105: * 11106: * Returns zero if no parsing was possible 11107: */ 11108: static int 11109: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11110: int ret = 0; 11111: int avail, tlen; 11112: xmlChar cur, next; 11113: const xmlChar *lastlt, *lastgt; 11114: 11115: if (ctxt->input == NULL) 11116: return(0); 11117: 11118: #ifdef DEBUG_PUSH 11119: switch (ctxt->instate) { 11120: case XML_PARSER_EOF: 11121: xmlGenericError(xmlGenericErrorContext, 11122: "PP: try EOF\n"); break; 11123: case XML_PARSER_START: 11124: xmlGenericError(xmlGenericErrorContext, 11125: "PP: try START\n"); break; 11126: case XML_PARSER_MISC: 11127: xmlGenericError(xmlGenericErrorContext, 11128: "PP: try MISC\n");break; 11129: case XML_PARSER_COMMENT: 11130: xmlGenericError(xmlGenericErrorContext, 11131: "PP: try COMMENT\n");break; 11132: case XML_PARSER_PROLOG: 11133: xmlGenericError(xmlGenericErrorContext, 11134: "PP: try PROLOG\n");break; 11135: case XML_PARSER_START_TAG: 11136: xmlGenericError(xmlGenericErrorContext, 11137: "PP: try START_TAG\n");break; 11138: case XML_PARSER_CONTENT: 11139: xmlGenericError(xmlGenericErrorContext, 11140: "PP: try CONTENT\n");break; 11141: case XML_PARSER_CDATA_SECTION: 11142: xmlGenericError(xmlGenericErrorContext, 11143: "PP: try CDATA_SECTION\n");break; 11144: case XML_PARSER_END_TAG: 11145: xmlGenericError(xmlGenericErrorContext, 11146: "PP: try END_TAG\n");break; 11147: case XML_PARSER_ENTITY_DECL: 11148: xmlGenericError(xmlGenericErrorContext, 11149: "PP: try ENTITY_DECL\n");break; 11150: case XML_PARSER_ENTITY_VALUE: 11151: xmlGenericError(xmlGenericErrorContext, 11152: "PP: try ENTITY_VALUE\n");break; 11153: case XML_PARSER_ATTRIBUTE_VALUE: 11154: xmlGenericError(xmlGenericErrorContext, 11155: "PP: try ATTRIBUTE_VALUE\n");break; 11156: case XML_PARSER_DTD: 11157: xmlGenericError(xmlGenericErrorContext, 11158: "PP: try DTD\n");break; 11159: case XML_PARSER_EPILOG: 11160: xmlGenericError(xmlGenericErrorContext, 11161: "PP: try EPILOG\n");break; 11162: case XML_PARSER_PI: 11163: xmlGenericError(xmlGenericErrorContext, 11164: "PP: try PI\n");break; 11165: case XML_PARSER_IGNORE: 11166: xmlGenericError(xmlGenericErrorContext, 11167: "PP: try IGNORE\n");break; 11168: } 11169: #endif 11170: 11171: if ((ctxt->input != NULL) && 11172: (ctxt->input->cur - ctxt->input->base > 4096)) { 11173: xmlSHRINK(ctxt); 11174: ctxt->checkIndex = 0; 11175: } 11176: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11177: 11178: while (ctxt->instate != XML_PARSER_EOF) { 11179: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11180: return(0); 11181: 11182: 11183: /* 11184: * Pop-up of finished entities. 11185: */ 11186: while ((RAW == 0) && (ctxt->inputNr > 1)) 11187: xmlPopInput(ctxt); 11188: 11189: if (ctxt->input == NULL) break; 11190: if (ctxt->input->buf == NULL) 11191: avail = ctxt->input->length - 11192: (ctxt->input->cur - ctxt->input->base); 11193: else { 11194: /* 11195: * If we are operating on converted input, try to flush 11196: * remainng chars to avoid them stalling in the non-converted 11197: * buffer. But do not do this in document start where 11198: * encoding="..." may not have been read and we work on a 11199: * guessed encoding. 11200: */ 11201: if ((ctxt->instate != XML_PARSER_START) && 11202: (ctxt->input->buf->raw != NULL) && 11203: (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11204: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11205: ctxt->input); 11206: size_t current = ctxt->input->cur - ctxt->input->base; 11207: 11208: xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11209: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11210: base, current); 11211: } 11212: avail = xmlBufUse(ctxt->input->buf->buffer) - 11213: (ctxt->input->cur - ctxt->input->base); 11214: } 11215: if (avail < 1) 11216: goto done; 11217: switch (ctxt->instate) { 11218: case XML_PARSER_EOF: 11219: /* 11220: * Document parsing is done ! 11221: */ 11222: goto done; 11223: case XML_PARSER_START: 11224: if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11225: xmlChar start[4]; 11226: xmlCharEncoding enc; 11227: 11228: /* 11229: * Very first chars read from the document flow. 11230: */ 11231: if (avail < 4) 11232: goto done; 11233: 11234: /* 11235: * Get the 4 first bytes and decode the charset 11236: * if enc != XML_CHAR_ENCODING_NONE 11237: * plug some encoding conversion routines, 11238: * else xmlSwitchEncoding will set to (default) 11239: * UTF8. 11240: */ 11241: start[0] = RAW; 11242: start[1] = NXT(1); 11243: start[2] = NXT(2); 11244: start[3] = NXT(3); 11245: enc = xmlDetectCharEncoding(start, 4); 11246: xmlSwitchEncoding(ctxt, enc); 11247: break; 11248: } 11249: 11250: if (avail < 2) 11251: goto done; 11252: cur = ctxt->input->cur[0]; 11253: next = ctxt->input->cur[1]; 11254: if (cur == 0) { 11255: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11256: ctxt->sax->setDocumentLocator(ctxt->userData, 11257: &xmlDefaultSAXLocator); 11258: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11259: ctxt->instate = XML_PARSER_EOF; 11260: #ifdef DEBUG_PUSH 11261: xmlGenericError(xmlGenericErrorContext, 11262: "PP: entering EOF\n"); 11263: #endif 11264: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11265: ctxt->sax->endDocument(ctxt->userData); 11266: goto done; 11267: } 11268: if ((cur == '<') && (next == '?')) { 11269: /* PI or XML decl */ 11270: if (avail < 5) return(ret); 11271: if ((!terminate) && 11272: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11273: return(ret); 11274: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11275: ctxt->sax->setDocumentLocator(ctxt->userData, 11276: &xmlDefaultSAXLocator); 11277: if ((ctxt->input->cur[2] == 'x') && 11278: (ctxt->input->cur[3] == 'm') && 11279: (ctxt->input->cur[4] == 'l') && 11280: (IS_BLANK_CH(ctxt->input->cur[5]))) { 11281: ret += 5; 11282: #ifdef DEBUG_PUSH 11283: xmlGenericError(xmlGenericErrorContext, 11284: "PP: Parsing XML Decl\n"); 11285: #endif 11286: xmlParseXMLDecl(ctxt); 11287: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11288: /* 11289: * The XML REC instructs us to stop parsing right 11290: * here 11291: */ 11292: ctxt->instate = XML_PARSER_EOF; 11293: return(0); 11294: } 11295: ctxt->standalone = ctxt->input->standalone; 11296: if ((ctxt->encoding == NULL) && 11297: (ctxt->input->encoding != NULL)) 11298: ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11299: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11300: (!ctxt->disableSAX)) 11301: ctxt->sax->startDocument(ctxt->userData); 11302: ctxt->instate = XML_PARSER_MISC; 11303: #ifdef DEBUG_PUSH 11304: xmlGenericError(xmlGenericErrorContext, 11305: "PP: entering MISC\n"); 11306: #endif 11307: } else { 11308: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11309: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11310: (!ctxt->disableSAX)) 11311: ctxt->sax->startDocument(ctxt->userData); 11312: ctxt->instate = XML_PARSER_MISC; 11313: #ifdef DEBUG_PUSH 11314: xmlGenericError(xmlGenericErrorContext, 11315: "PP: entering MISC\n"); 11316: #endif 11317: } 11318: } else { 11319: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11320: ctxt->sax->setDocumentLocator(ctxt->userData, 11321: &xmlDefaultSAXLocator); 11322: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11323: if (ctxt->version == NULL) { 11324: xmlErrMemory(ctxt, NULL); 11325: break; 11326: } 11327: if ((ctxt->sax) && (ctxt->sax->startDocument) && 11328: (!ctxt->disableSAX)) 11329: ctxt->sax->startDocument(ctxt->userData); 11330: ctxt->instate = XML_PARSER_MISC; 11331: #ifdef DEBUG_PUSH 11332: xmlGenericError(xmlGenericErrorContext, 11333: "PP: entering MISC\n"); 11334: #endif 11335: } 11336: break; 11337: case XML_PARSER_START_TAG: { 11338: const xmlChar *name; 11339: const xmlChar *prefix = NULL; 11340: const xmlChar *URI = NULL; 11341: int nsNr = ctxt->nsNr; 11342: 11343: if ((avail < 2) && (ctxt->inputNr == 1)) 11344: goto done; 11345: cur = ctxt->input->cur[0]; 11346: if (cur != '<') { 11347: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11348: ctxt->instate = XML_PARSER_EOF; 11349: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11350: ctxt->sax->endDocument(ctxt->userData); 11351: goto done; 11352: } 11353: if (!terminate) { 11354: if (ctxt->progressive) { 11355: /* > can be found unescaped in attribute values */ 11356: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11357: goto done; 11358: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11359: goto done; 11360: } 11361: } 11362: if (ctxt->spaceNr == 0) 11363: spacePush(ctxt, -1); 11364: else if (*ctxt->space == -2) 11365: spacePush(ctxt, -1); 11366: else 11367: spacePush(ctxt, *ctxt->space); 11368: #ifdef LIBXML_SAX1_ENABLED 11369: if (ctxt->sax2) 11370: #endif /* LIBXML_SAX1_ENABLED */ 11371: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11372: #ifdef LIBXML_SAX1_ENABLED 11373: else 11374: name = xmlParseStartTag(ctxt); 11375: #endif /* LIBXML_SAX1_ENABLED */ 11376: if (ctxt->instate == XML_PARSER_EOF) 11377: goto done; 11378: if (name == NULL) { 11379: spacePop(ctxt); 11380: ctxt->instate = XML_PARSER_EOF; 11381: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11382: ctxt->sax->endDocument(ctxt->userData); 11383: goto done; 11384: } 11385: #ifdef LIBXML_VALID_ENABLED 11386: /* 11387: * [ VC: Root Element Type ] 11388: * The Name in the document type declaration must match 11389: * the element type of the root element. 11390: */ 11391: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11392: ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11393: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11394: #endif /* LIBXML_VALID_ENABLED */ 11395: 11396: /* 11397: * Check for an Empty Element. 11398: */ 11399: if ((RAW == '/') && (NXT(1) == '>')) { 11400: SKIP(2); 11401: 11402: if (ctxt->sax2) { 11403: if ((ctxt->sax != NULL) && 11404: (ctxt->sax->endElementNs != NULL) && 11405: (!ctxt->disableSAX)) 11406: ctxt->sax->endElementNs(ctxt->userData, name, 11407: prefix, URI); 11408: if (ctxt->nsNr - nsNr > 0) 11409: nsPop(ctxt, ctxt->nsNr - nsNr); 11410: #ifdef LIBXML_SAX1_ENABLED 11411: } else { 11412: if ((ctxt->sax != NULL) && 11413: (ctxt->sax->endElement != NULL) && 11414: (!ctxt->disableSAX)) 11415: ctxt->sax->endElement(ctxt->userData, name); 11416: #endif /* LIBXML_SAX1_ENABLED */ 11417: } 11418: if (ctxt->instate == XML_PARSER_EOF) 11419: goto done; 11420: spacePop(ctxt); 11421: if (ctxt->nameNr == 0) { 11422: ctxt->instate = XML_PARSER_EPILOG; 11423: } else { 11424: ctxt->instate = XML_PARSER_CONTENT; 11425: } 11426: ctxt->progressive = 1; 11427: break; 11428: } 11429: if (RAW == '>') { 11430: NEXT; 11431: } else { 11432: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11433: "Couldn't find end of Start Tag %s\n", 11434: name); 11435: nodePop(ctxt); 11436: spacePop(ctxt); 11437: } 11438: if (ctxt->sax2) 11439: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11440: #ifdef LIBXML_SAX1_ENABLED 11441: else 11442: namePush(ctxt, name); 11443: #endif /* LIBXML_SAX1_ENABLED */ 11444: 11445: ctxt->instate = XML_PARSER_CONTENT; 11446: ctxt->progressive = 1; 11447: break; 11448: } 11449: case XML_PARSER_CONTENT: { 11450: const xmlChar *test; 11451: unsigned int cons; 11452: if ((avail < 2) && (ctxt->inputNr == 1)) 11453: goto done; 11454: cur = ctxt->input->cur[0]; 11455: next = ctxt->input->cur[1]; 11456: 11457: test = CUR_PTR; 11458: cons = ctxt->input->consumed; 11459: if ((cur == '<') && (next == '/')) { 11460: ctxt->instate = XML_PARSER_END_TAG; 11461: break; 11462: } else if ((cur == '<') && (next == '?')) { 11463: if ((!terminate) && 11464: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11465: ctxt->progressive = XML_PARSER_PI; 11466: goto done; 11467: } 11468: xmlParsePI(ctxt); 11469: ctxt->instate = XML_PARSER_CONTENT; 11470: ctxt->progressive = 1; 11471: } else if ((cur == '<') && (next != '!')) { 11472: ctxt->instate = XML_PARSER_START_TAG; 11473: break; 11474: } else if ((cur == '<') && (next == '!') && 11475: (ctxt->input->cur[2] == '-') && 11476: (ctxt->input->cur[3] == '-')) { 11477: int term; 11478: 11479: if (avail < 4) 11480: goto done; 11481: ctxt->input->cur += 4; 11482: term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11483: ctxt->input->cur -= 4; 11484: if ((!terminate) && (term < 0)) { 11485: ctxt->progressive = XML_PARSER_COMMENT; 11486: goto done; 11487: } 11488: xmlParseComment(ctxt); 11489: ctxt->instate = XML_PARSER_CONTENT; 11490: ctxt->progressive = 1; 11491: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11492: (ctxt->input->cur[2] == '[') && 11493: (ctxt->input->cur[3] == 'C') && 11494: (ctxt->input->cur[4] == 'D') && 11495: (ctxt->input->cur[5] == 'A') && 11496: (ctxt->input->cur[6] == 'T') && 11497: (ctxt->input->cur[7] == 'A') && 11498: (ctxt->input->cur[8] == '[')) { 11499: SKIP(9); 11500: ctxt->instate = XML_PARSER_CDATA_SECTION; 11501: break; 11502: } else if ((cur == '<') && (next == '!') && 11503: (avail < 9)) { 11504: goto done; 11505: } else if (cur == '&') { 11506: if ((!terminate) && 11507: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11508: goto done; 11509: xmlParseReference(ctxt); 11510: } else { 11511: /* TODO Avoid the extra copy, handle directly !!! */ 11512: /* 11513: * Goal of the following test is: 11514: * - minimize calls to the SAX 'character' callback 11515: * when they are mergeable 11516: * - handle an problem for isBlank when we only parse 11517: * a sequence of blank chars and the next one is 11518: * not available to check against '<' presence. 11519: * - tries to homogenize the differences in SAX 11520: * callbacks between the push and pull versions 11521: * of the parser. 11522: */ 11523: if ((ctxt->inputNr == 1) && 11524: (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11525: if (!terminate) { 11526: if (ctxt->progressive) { 11527: if ((lastlt == NULL) || 11528: (ctxt->input->cur > lastlt)) 11529: goto done; 11530: } else if (xmlParseLookupSequence(ctxt, 11531: '<', 0, 0) < 0) { 11532: goto done; 11533: } 11534: } 11535: } 11536: ctxt->checkIndex = 0; 11537: xmlParseCharData(ctxt, 0); 11538: } 11539: /* 11540: * Pop-up of finished entities. 11541: */ 11542: while ((RAW == 0) && (ctxt->inputNr > 1)) 11543: xmlPopInput(ctxt); 11544: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11545: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11546: "detected an error in element content\n"); 11547: ctxt->instate = XML_PARSER_EOF; 11548: break; 11549: } 11550: break; 11551: } 11552: case XML_PARSER_END_TAG: 11553: if (avail < 2) 11554: goto done; 11555: if (!terminate) { 11556: if (ctxt->progressive) { 11557: /* > can be found unescaped in attribute values */ 11558: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11559: goto done; 11560: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11561: goto done; 11562: } 11563: } 11564: if (ctxt->sax2) { 11565: xmlParseEndTag2(ctxt, 11566: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11567: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11568: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11569: nameNsPop(ctxt); 11570: } 11571: #ifdef LIBXML_SAX1_ENABLED 11572: else 11573: xmlParseEndTag1(ctxt, 0); 11574: #endif /* LIBXML_SAX1_ENABLED */ 11575: if (ctxt->instate == XML_PARSER_EOF) { 11576: /* Nothing */ 11577: } else if (ctxt->nameNr == 0) { 11578: ctxt->instate = XML_PARSER_EPILOG; 11579: } else { 11580: ctxt->instate = XML_PARSER_CONTENT; 11581: } 11582: break; 11583: case XML_PARSER_CDATA_SECTION: { 11584: /* 11585: * The Push mode need to have the SAX callback for 11586: * cdataBlock merge back contiguous callbacks. 11587: */ 11588: int base; 11589: 11590: base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11591: if (base < 0) { 11592: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11593: int tmp; 11594: 11595: tmp = xmlCheckCdataPush(ctxt->input->cur, 11596: XML_PARSER_BIG_BUFFER_SIZE); 11597: if (tmp < 0) { 11598: tmp = -tmp; 11599: ctxt->input->cur += tmp; 11600: goto encoding_error; 11601: } 11602: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11603: if (ctxt->sax->cdataBlock != NULL) 11604: ctxt->sax->cdataBlock(ctxt->userData, 11605: ctxt->input->cur, tmp); 11606: else if (ctxt->sax->characters != NULL) 11607: ctxt->sax->characters(ctxt->userData, 11608: ctxt->input->cur, tmp); 11609: } 11610: if (ctxt->instate == XML_PARSER_EOF) 11611: goto done; 11612: SKIPL(tmp); 11613: ctxt->checkIndex = 0; 11614: } 11615: goto done; 11616: } else { 11617: int tmp; 11618: 11619: tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11620: if ((tmp < 0) || (tmp != base)) { 11621: tmp = -tmp; 11622: ctxt->input->cur += tmp; 11623: goto encoding_error; 11624: } 11625: if ((ctxt->sax != NULL) && (base == 0) && 11626: (ctxt->sax->cdataBlock != NULL) && 11627: (!ctxt->disableSAX)) { 11628: /* 11629: * Special case to provide identical behaviour 11630: * between pull and push parsers on enpty CDATA 11631: * sections 11632: */ 11633: if ((ctxt->input->cur - ctxt->input->base >= 9) && 11634: (!strncmp((const char *)&ctxt->input->cur[-9], 11635: "<![CDATA[", 9))) 11636: ctxt->sax->cdataBlock(ctxt->userData, 11637: BAD_CAST "", 0); 11638: } else if ((ctxt->sax != NULL) && (base > 0) && 11639: (!ctxt->disableSAX)) { 11640: if (ctxt->sax->cdataBlock != NULL) 11641: ctxt->sax->cdataBlock(ctxt->userData, 11642: ctxt->input->cur, base); 11643: else if (ctxt->sax->characters != NULL) 11644: ctxt->sax->characters(ctxt->userData, 11645: ctxt->input->cur, base); 11646: } 11647: if (ctxt->instate == XML_PARSER_EOF) 11648: goto done; 11649: SKIPL(base + 3); 11650: ctxt->checkIndex = 0; 11651: ctxt->instate = XML_PARSER_CONTENT; 11652: #ifdef DEBUG_PUSH 11653: xmlGenericError(xmlGenericErrorContext, 11654: "PP: entering CONTENT\n"); 11655: #endif 11656: } 11657: break; 11658: } 11659: case XML_PARSER_MISC: 11660: SKIP_BLANKS; 11661: if (ctxt->input->buf == NULL) 11662: avail = ctxt->input->length - 11663: (ctxt->input->cur - ctxt->input->base); 11664: else 11665: avail = xmlBufUse(ctxt->input->buf->buffer) - 11666: (ctxt->input->cur - ctxt->input->base); 11667: if (avail < 2) 11668: goto done; 11669: cur = ctxt->input->cur[0]; 11670: next = ctxt->input->cur[1]; 11671: if ((cur == '<') && (next == '?')) { 11672: if ((!terminate) && 11673: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11674: ctxt->progressive = XML_PARSER_PI; 11675: goto done; 11676: } 11677: #ifdef DEBUG_PUSH 11678: xmlGenericError(xmlGenericErrorContext, 11679: "PP: Parsing PI\n"); 11680: #endif 11681: xmlParsePI(ctxt); 11682: if (ctxt->instate == XML_PARSER_EOF) 11683: goto done; 11684: ctxt->instate = XML_PARSER_MISC; 11685: ctxt->progressive = 1; 11686: ctxt->checkIndex = 0; 11687: } else if ((cur == '<') && (next == '!') && 11688: (ctxt->input->cur[2] == '-') && 11689: (ctxt->input->cur[3] == '-')) { 11690: if ((!terminate) && 11691: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11692: ctxt->progressive = XML_PARSER_COMMENT; 11693: goto done; 11694: } 11695: #ifdef DEBUG_PUSH 11696: xmlGenericError(xmlGenericErrorContext, 11697: "PP: Parsing Comment\n"); 11698: #endif 11699: xmlParseComment(ctxt); 11700: if (ctxt->instate == XML_PARSER_EOF) 11701: goto done; 11702: ctxt->instate = XML_PARSER_MISC; 11703: ctxt->progressive = 1; 11704: ctxt->checkIndex = 0; 11705: } else if ((cur == '<') && (next == '!') && 11706: (ctxt->input->cur[2] == 'D') && 11707: (ctxt->input->cur[3] == 'O') && 11708: (ctxt->input->cur[4] == 'C') && 11709: (ctxt->input->cur[5] == 'T') && 11710: (ctxt->input->cur[6] == 'Y') && 11711: (ctxt->input->cur[7] == 'P') && 11712: (ctxt->input->cur[8] == 'E')) { 11713: if ((!terminate) && 11714: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11715: ctxt->progressive = XML_PARSER_DTD; 11716: goto done; 11717: } 11718: #ifdef DEBUG_PUSH 11719: xmlGenericError(xmlGenericErrorContext, 11720: "PP: Parsing internal subset\n"); 11721: #endif 11722: ctxt->inSubset = 1; 11723: ctxt->progressive = 0; 11724: ctxt->checkIndex = 0; 11725: xmlParseDocTypeDecl(ctxt); 11726: if (ctxt->instate == XML_PARSER_EOF) 11727: goto done; 11728: if (RAW == '[') { 11729: ctxt->instate = XML_PARSER_DTD; 11730: #ifdef DEBUG_PUSH 11731: xmlGenericError(xmlGenericErrorContext, 11732: "PP: entering DTD\n"); 11733: #endif 11734: } else { 11735: /* 11736: * Create and update the external subset. 11737: */ 11738: ctxt->inSubset = 2; 11739: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11740: (ctxt->sax->externalSubset != NULL)) 11741: ctxt->sax->externalSubset(ctxt->userData, 11742: ctxt->intSubName, ctxt->extSubSystem, 11743: ctxt->extSubURI); 11744: ctxt->inSubset = 0; 11745: xmlCleanSpecialAttr(ctxt); 11746: ctxt->instate = XML_PARSER_PROLOG; 11747: #ifdef DEBUG_PUSH 11748: xmlGenericError(xmlGenericErrorContext, 11749: "PP: entering PROLOG\n"); 11750: #endif 11751: } 11752: } else if ((cur == '<') && (next == '!') && 11753: (avail < 9)) { 11754: goto done; 11755: } else { 11756: ctxt->instate = XML_PARSER_START_TAG; 11757: ctxt->progressive = XML_PARSER_START_TAG; 11758: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11759: #ifdef DEBUG_PUSH 11760: xmlGenericError(xmlGenericErrorContext, 11761: "PP: entering START_TAG\n"); 11762: #endif 11763: } 11764: break; 11765: case XML_PARSER_PROLOG: 11766: SKIP_BLANKS; 11767: if (ctxt->input->buf == NULL) 11768: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11769: else 11770: avail = xmlBufUse(ctxt->input->buf->buffer) - 11771: (ctxt->input->cur - ctxt->input->base); 11772: if (avail < 2) 11773: goto done; 11774: cur = ctxt->input->cur[0]; 11775: next = ctxt->input->cur[1]; 11776: if ((cur == '<') && (next == '?')) { 11777: if ((!terminate) && 11778: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11779: ctxt->progressive = XML_PARSER_PI; 11780: goto done; 11781: } 11782: #ifdef DEBUG_PUSH 11783: xmlGenericError(xmlGenericErrorContext, 11784: "PP: Parsing PI\n"); 11785: #endif 11786: xmlParsePI(ctxt); 11787: if (ctxt->instate == XML_PARSER_EOF) 11788: goto done; 11789: ctxt->instate = XML_PARSER_PROLOG; 11790: ctxt->progressive = 1; 11791: } else if ((cur == '<') && (next == '!') && 11792: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11793: if ((!terminate) && 11794: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11795: ctxt->progressive = XML_PARSER_COMMENT; 11796: goto done; 11797: } 11798: #ifdef DEBUG_PUSH 11799: xmlGenericError(xmlGenericErrorContext, 11800: "PP: Parsing Comment\n"); 11801: #endif 11802: xmlParseComment(ctxt); 11803: if (ctxt->instate == XML_PARSER_EOF) 11804: goto done; 11805: ctxt->instate = XML_PARSER_PROLOG; 11806: ctxt->progressive = 1; 11807: } else if ((cur == '<') && (next == '!') && 11808: (avail < 4)) { 11809: goto done; 11810: } else { 11811: ctxt->instate = XML_PARSER_START_TAG; 11812: if (ctxt->progressive == 0) 11813: ctxt->progressive = XML_PARSER_START_TAG; 11814: xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11815: #ifdef DEBUG_PUSH 11816: xmlGenericError(xmlGenericErrorContext, 11817: "PP: entering START_TAG\n"); 11818: #endif 11819: } 11820: break; 11821: case XML_PARSER_EPILOG: 11822: SKIP_BLANKS; 11823: if (ctxt->input->buf == NULL) 11824: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11825: else 11826: avail = xmlBufUse(ctxt->input->buf->buffer) - 11827: (ctxt->input->cur - ctxt->input->base); 11828: if (avail < 2) 11829: goto done; 11830: cur = ctxt->input->cur[0]; 11831: next = ctxt->input->cur[1]; 11832: if ((cur == '<') && (next == '?')) { 11833: if ((!terminate) && 11834: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11835: ctxt->progressive = XML_PARSER_PI; 11836: goto done; 11837: } 11838: #ifdef DEBUG_PUSH 11839: xmlGenericError(xmlGenericErrorContext, 11840: "PP: Parsing PI\n"); 11841: #endif 11842: xmlParsePI(ctxt); 11843: if (ctxt->instate == XML_PARSER_EOF) 11844: goto done; 11845: ctxt->instate = XML_PARSER_EPILOG; 11846: ctxt->progressive = 1; 11847: } else if ((cur == '<') && (next == '!') && 11848: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11849: if ((!terminate) && 11850: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11851: ctxt->progressive = XML_PARSER_COMMENT; 11852: goto done; 11853: } 11854: #ifdef DEBUG_PUSH 11855: xmlGenericError(xmlGenericErrorContext, 11856: "PP: Parsing Comment\n"); 11857: #endif 11858: xmlParseComment(ctxt); 11859: if (ctxt->instate == XML_PARSER_EOF) 11860: goto done; 11861: ctxt->instate = XML_PARSER_EPILOG; 11862: ctxt->progressive = 1; 11863: } else if ((cur == '<') && (next == '!') && 11864: (avail < 4)) { 11865: goto done; 11866: } else { 11867: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11868: ctxt->instate = XML_PARSER_EOF; 11869: #ifdef DEBUG_PUSH 11870: xmlGenericError(xmlGenericErrorContext, 11871: "PP: entering EOF\n"); 11872: #endif 11873: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11874: ctxt->sax->endDocument(ctxt->userData); 11875: goto done; 11876: } 11877: break; 11878: case XML_PARSER_DTD: { 11879: /* 11880: * Sorry but progressive parsing of the internal subset 11881: * is not expected to be supported. We first check that 11882: * the full content of the internal subset is available and 11883: * the parsing is launched only at that point. 11884: * Internal subset ends up with "']' S? '>'" in an unescaped 11885: * section and not in a ']]>' sequence which are conditional 11886: * sections (whoever argued to keep that crap in XML deserve 11887: * a place in hell !). 11888: */ 11889: int base, i; 11890: xmlChar *buf; 11891: xmlChar quote = 0; 11892: size_t use; 11893: 11894: base = ctxt->input->cur - ctxt->input->base; 11895: if (base < 0) return(0); 11896: if (ctxt->checkIndex > base) 11897: base = ctxt->checkIndex; 11898: buf = xmlBufContent(ctxt->input->buf->buffer); 11899: use = xmlBufUse(ctxt->input->buf->buffer); 11900: for (;(unsigned int) base < use; base++) { 11901: if (quote != 0) { 11902: if (buf[base] == quote) 11903: quote = 0; 11904: continue; 11905: } 11906: if ((quote == 0) && (buf[base] == '<')) { 11907: int found = 0; 11908: /* special handling of comments */ 11909: if (((unsigned int) base + 4 < use) && 11910: (buf[base + 1] == '!') && 11911: (buf[base + 2] == '-') && 11912: (buf[base + 3] == '-')) { 11913: for (;(unsigned int) base + 3 < use; base++) { 11914: if ((buf[base] == '-') && 11915: (buf[base + 1] == '-') && 11916: (buf[base + 2] == '>')) { 11917: found = 1; 11918: base += 2; 11919: break; 11920: } 11921: } 11922: if (!found) { 11923: #if 0 11924: fprintf(stderr, "unfinished comment\n"); 11925: #endif 11926: break; /* for */ 11927: } 11928: continue; 11929: } 11930: } 11931: if (buf[base] == '"') { 11932: quote = '"'; 11933: continue; 11934: } 11935: if (buf[base] == '\'') { 11936: quote = '\''; 11937: continue; 11938: } 11939: if (buf[base] == ']') { 11940: #if 0 11941: fprintf(stderr, "%c%c%c%c: ", buf[base], 11942: buf[base + 1], buf[base + 2], buf[base + 3]); 11943: #endif 11944: if ((unsigned int) base +1 >= use) 11945: break; 11946: if (buf[base + 1] == ']') { 11947: /* conditional crap, skip both ']' ! */ 11948: base++; 11949: continue; 11950: } 11951: for (i = 1; (unsigned int) base + i < use; i++) { 11952: if (buf[base + i] == '>') { 11953: #if 0 11954: fprintf(stderr, "found\n"); 11955: #endif 11956: goto found_end_int_subset; 11957: } 11958: if (!IS_BLANK_CH(buf[base + i])) { 11959: #if 0 11960: fprintf(stderr, "not found\n"); 11961: #endif 11962: goto not_end_of_int_subset; 11963: } 11964: } 11965: #if 0 11966: fprintf(stderr, "end of stream\n"); 11967: #endif 11968: break; 11969: 11970: } 11971: not_end_of_int_subset: 11972: continue; /* for */ 11973: } 11974: /* 11975: * We didn't found the end of the Internal subset 11976: */ 11977: if (quote == 0) 11978: ctxt->checkIndex = base; 11979: else 11980: ctxt->checkIndex = 0; 11981: #ifdef DEBUG_PUSH 11982: if (next == 0) 11983: xmlGenericError(xmlGenericErrorContext, 11984: "PP: lookup of int subset end filed\n"); 11985: #endif 11986: goto done; 11987: 11988: found_end_int_subset: 11989: ctxt->checkIndex = 0; 11990: xmlParseInternalSubset(ctxt); 11991: if (ctxt->instate == XML_PARSER_EOF) 11992: goto done; 11993: ctxt->inSubset = 2; 11994: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11995: (ctxt->sax->externalSubset != NULL)) 11996: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11997: ctxt->extSubSystem, ctxt->extSubURI); 11998: ctxt->inSubset = 0; 11999: xmlCleanSpecialAttr(ctxt); 12000: if (ctxt->instate == XML_PARSER_EOF) 12001: goto done; 12002: ctxt->instate = XML_PARSER_PROLOG; 12003: ctxt->checkIndex = 0; 12004: #ifdef DEBUG_PUSH 12005: xmlGenericError(xmlGenericErrorContext, 12006: "PP: entering PROLOG\n"); 12007: #endif 12008: break; 12009: } 12010: case XML_PARSER_COMMENT: 12011: xmlGenericError(xmlGenericErrorContext, 12012: "PP: internal error, state == COMMENT\n"); 12013: ctxt->instate = XML_PARSER_CONTENT; 12014: #ifdef DEBUG_PUSH 12015: xmlGenericError(xmlGenericErrorContext, 12016: "PP: entering CONTENT\n"); 12017: #endif 12018: break; 12019: case XML_PARSER_IGNORE: 12020: xmlGenericError(xmlGenericErrorContext, 12021: "PP: internal error, state == IGNORE"); 12022: ctxt->instate = XML_PARSER_DTD; 12023: #ifdef DEBUG_PUSH 12024: xmlGenericError(xmlGenericErrorContext, 12025: "PP: entering DTD\n"); 12026: #endif 12027: break; 12028: case XML_PARSER_PI: 12029: xmlGenericError(xmlGenericErrorContext, 12030: "PP: internal error, state == PI\n"); 12031: ctxt->instate = XML_PARSER_CONTENT; 12032: #ifdef DEBUG_PUSH 12033: xmlGenericError(xmlGenericErrorContext, 12034: "PP: entering CONTENT\n"); 12035: #endif 12036: break; 12037: case XML_PARSER_ENTITY_DECL: 12038: xmlGenericError(xmlGenericErrorContext, 12039: "PP: internal error, state == ENTITY_DECL\n"); 12040: ctxt->instate = XML_PARSER_DTD; 12041: #ifdef DEBUG_PUSH 12042: xmlGenericError(xmlGenericErrorContext, 12043: "PP: entering DTD\n"); 12044: #endif 12045: break; 12046: case XML_PARSER_ENTITY_VALUE: 12047: xmlGenericError(xmlGenericErrorContext, 12048: "PP: internal error, state == ENTITY_VALUE\n"); 12049: ctxt->instate = XML_PARSER_CONTENT; 12050: #ifdef DEBUG_PUSH 12051: xmlGenericError(xmlGenericErrorContext, 12052: "PP: entering DTD\n"); 12053: #endif 12054: break; 12055: case XML_PARSER_ATTRIBUTE_VALUE: 12056: xmlGenericError(xmlGenericErrorContext, 12057: "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12058: ctxt->instate = XML_PARSER_START_TAG; 12059: #ifdef DEBUG_PUSH 12060: xmlGenericError(xmlGenericErrorContext, 12061: "PP: entering START_TAG\n"); 12062: #endif 12063: break; 12064: case XML_PARSER_SYSTEM_LITERAL: 12065: xmlGenericError(xmlGenericErrorContext, 12066: "PP: internal error, state == SYSTEM_LITERAL\n"); 12067: ctxt->instate = XML_PARSER_START_TAG; 12068: #ifdef DEBUG_PUSH 12069: xmlGenericError(xmlGenericErrorContext, 12070: "PP: entering START_TAG\n"); 12071: #endif 12072: break; 12073: case XML_PARSER_PUBLIC_LITERAL: 12074: xmlGenericError(xmlGenericErrorContext, 12075: "PP: internal error, state == PUBLIC_LITERAL\n"); 12076: ctxt->instate = XML_PARSER_START_TAG; 12077: #ifdef DEBUG_PUSH 12078: xmlGenericError(xmlGenericErrorContext, 12079: "PP: entering START_TAG\n"); 12080: #endif 12081: break; 12082: } 12083: } 12084: done: 12085: #ifdef DEBUG_PUSH 12086: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12087: #endif 12088: return(ret); 12089: encoding_error: 12090: { 12091: char buffer[150]; 12092: 12093: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12094: ctxt->input->cur[0], ctxt->input->cur[1], 12095: ctxt->input->cur[2], ctxt->input->cur[3]); 12096: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12097: "Input is not proper UTF-8, indicate encoding !\n%s", 12098: BAD_CAST buffer, NULL); 12099: } 12100: return(0); 12101: } 12102: 12103: /** 12104: * xmlParseCheckTransition: 12105: * @ctxt: an XML parser context 12106: * @chunk: a char array 12107: * @size: the size in byte of the chunk 12108: * 12109: * Check depending on the current parser state if the chunk given must be 12110: * processed immediately or one need more data to advance on parsing. 12111: * 12112: * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12113: */ 12114: static int 12115: xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12116: if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12117: return(-1); 12118: if (ctxt->instate == XML_PARSER_START_TAG) { 12119: if (memchr(chunk, '>', size) != NULL) 12120: return(1); 12121: return(0); 12122: } 12123: if (ctxt->progressive == XML_PARSER_COMMENT) { 12124: if (memchr(chunk, '>', size) != NULL) 12125: return(1); 12126: return(0); 12127: } 12128: if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12129: if (memchr(chunk, '>', size) != NULL) 12130: return(1); 12131: return(0); 12132: } 12133: if (ctxt->progressive == XML_PARSER_PI) { 12134: if (memchr(chunk, '>', size) != NULL) 12135: return(1); 12136: return(0); 12137: } 12138: if (ctxt->instate == XML_PARSER_END_TAG) { 12139: if (memchr(chunk, '>', size) != NULL) 12140: return(1); 12141: return(0); 12142: } 12143: if ((ctxt->progressive == XML_PARSER_DTD) || 12144: (ctxt->instate == XML_PARSER_DTD)) { 12145: if (memchr(chunk, '>', size) != NULL) 12146: return(1); 12147: return(0); 12148: } 12149: return(1); 12150: } 12151: 12152: /** 12153: * xmlParseChunk: 12154: * @ctxt: an XML parser context 12155: * @chunk: an char array 12156: * @size: the size in byte of the chunk 12157: * @terminate: last chunk indicator 12158: * 12159: * Parse a Chunk of memory 12160: * 12161: * Returns zero if no error, the xmlParserErrors otherwise. 12162: */ 12163: int 12164: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12165: int terminate) { 12166: int end_in_lf = 0; 12167: int remain = 0; 12168: size_t old_avail = 0; 12169: size_t avail = 0; 12170: 12171: if (ctxt == NULL) 12172: return(XML_ERR_INTERNAL_ERROR); 12173: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12174: return(ctxt->errNo); 12175: if (ctxt->instate == XML_PARSER_EOF) 12176: return(-1); 12177: if (ctxt->instate == XML_PARSER_START) 12178: xmlDetectSAX2(ctxt); 12179: if ((size > 0) && (chunk != NULL) && (!terminate) && 12180: (chunk[size - 1] == '\r')) { 12181: end_in_lf = 1; 12182: size--; 12183: } 12184: 12185: xmldecl_done: 12186: 12187: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12188: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12189: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12190: size_t cur = ctxt->input->cur - ctxt->input->base; 12191: int res; 12192: 12193: old_avail = xmlBufUse(ctxt->input->buf->buffer); 12194: /* 12195: * Specific handling if we autodetected an encoding, we should not 12196: * push more than the first line ... which depend on the encoding 12197: * And only push the rest once the final encoding was detected 12198: */ 12199: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12200: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12201: unsigned int len = 45; 12202: 12203: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12204: BAD_CAST "UTF-16")) || 12205: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12206: BAD_CAST "UTF16"))) 12207: len = 90; 12208: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12209: BAD_CAST "UCS-4")) || 12210: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12211: BAD_CAST "UCS4"))) 12212: len = 180; 12213: 12214: if (ctxt->input->buf->rawconsumed < len) 12215: len -= ctxt->input->buf->rawconsumed; 12216: 12217: /* 12218: * Change size for reading the initial declaration only 12219: * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12220: * will blindly copy extra bytes from memory. 12221: */ 12222: if ((unsigned int) size > len) { 12223: remain = size - len; 12224: size = len; 12225: } else { 12226: remain = 0; 12227: } 12228: } 12229: res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12230: if (res < 0) { 12231: ctxt->errNo = XML_PARSER_EOF; 12232: ctxt->disableSAX = 1; 12233: return (XML_PARSER_EOF); 12234: } 12235: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12236: #ifdef DEBUG_PUSH 12237: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12238: #endif 12239: 12240: } else if (ctxt->instate != XML_PARSER_EOF) { 12241: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12242: xmlParserInputBufferPtr in = ctxt->input->buf; 12243: if ((in->encoder != NULL) && (in->buffer != NULL) && 12244: (in->raw != NULL)) { 12245: int nbchars; 12246: size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12247: size_t current = ctxt->input->cur - ctxt->input->base; 12248: 12249: nbchars = xmlCharEncInput(in, terminate); 12250: if (nbchars < 0) { 12251: /* TODO 2.6.0 */ 12252: xmlGenericError(xmlGenericErrorContext, 12253: "xmlParseChunk: encoder error\n"); 12254: return(XML_ERR_INVALID_ENCODING); 12255: } 12256: xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12257: } 12258: } 12259: } 12260: if (remain != 0) { 12261: xmlParseTryOrFinish(ctxt, 0); 12262: } else { 12263: if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12264: avail = xmlBufUse(ctxt->input->buf->buffer); 12265: /* 12266: * Depending on the current state it may not be such 12267: * a good idea to try parsing if there is nothing in the chunk 12268: * which would be worth doing a parser state transition and we 12269: * need to wait for more data 12270: */ 12271: if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12272: (old_avail == 0) || (avail == 0) || 12273: (xmlParseCheckTransition(ctxt, 12274: (const char *)&ctxt->input->base[old_avail], 12275: avail - old_avail))) 12276: xmlParseTryOrFinish(ctxt, terminate); 12277: } 12278: if (ctxt->instate == XML_PARSER_EOF) 12279: return(ctxt->errNo); 12280: 12281: if ((ctxt->input != NULL) && 12282: (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12283: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12284: ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12285: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12286: ctxt->instate = XML_PARSER_EOF; 12287: } 12288: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12289: return(ctxt->errNo); 12290: 12291: if (remain != 0) { 12292: chunk += size; 12293: size = remain; 12294: remain = 0; 12295: goto xmldecl_done; 12296: } 12297: if ((end_in_lf == 1) && (ctxt->input != NULL) && 12298: (ctxt->input->buf != NULL)) { 12299: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12300: ctxt->input); 12301: size_t current = ctxt->input->cur - ctxt->input->base; 12302: 12303: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12304: 12305: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12306: base, current); 12307: } 12308: if (terminate) { 12309: /* 12310: * Check for termination 12311: */ 12312: int cur_avail = 0; 12313: 12314: if (ctxt->input != NULL) { 12315: if (ctxt->input->buf == NULL) 12316: cur_avail = ctxt->input->length - 12317: (ctxt->input->cur - ctxt->input->base); 12318: else 12319: cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12320: (ctxt->input->cur - ctxt->input->base); 12321: } 12322: 12323: if ((ctxt->instate != XML_PARSER_EOF) && 12324: (ctxt->instate != XML_PARSER_EPILOG)) { 12325: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12326: } 12327: if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12328: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12329: } 12330: if (ctxt->instate != XML_PARSER_EOF) { 12331: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12332: ctxt->sax->endDocument(ctxt->userData); 12333: } 12334: ctxt->instate = XML_PARSER_EOF; 12335: } 12336: if (ctxt->wellFormed == 0) 12337: return((xmlParserErrors) ctxt->errNo); 12338: else 12339: return(0); 12340: } 12341: 12342: /************************************************************************ 12343: * * 12344: * I/O front end functions to the parser * 12345: * * 12346: ************************************************************************/ 12347: 12348: /** 12349: * xmlCreatePushParserCtxt: 12350: * @sax: a SAX handler 12351: * @user_data: The user data returned on SAX callbacks 12352: * @chunk: a pointer to an array of chars 12353: * @size: number of chars in the array 12354: * @filename: an optional file name or URI 12355: * 12356: * Create a parser context for using the XML parser in push mode. 12357: * If @buffer and @size are non-NULL, the data is used to detect 12358: * the encoding. The remaining characters will be parsed so they 12359: * don't need to be fed in again through xmlParseChunk. 12360: * To allow content encoding detection, @size should be >= 4 12361: * The value of @filename is used for fetching external entities 12362: * and error/warning reports. 12363: * 12364: * Returns the new parser context or NULL 12365: */ 12366: 12367: xmlParserCtxtPtr 12368: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12369: const char *chunk, int size, const char *filename) { 12370: xmlParserCtxtPtr ctxt; 12371: xmlParserInputPtr inputStream; 12372: xmlParserInputBufferPtr buf; 12373: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12374: 12375: /* 12376: * plug some encoding conversion routines 12377: */ 12378: if ((chunk != NULL) && (size >= 4)) 12379: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12380: 12381: buf = xmlAllocParserInputBuffer(enc); 12382: if (buf == NULL) return(NULL); 12383: 12384: ctxt = xmlNewParserCtxt(); 12385: if (ctxt == NULL) { 12386: xmlErrMemory(NULL, "creating parser: out of memory\n"); 12387: xmlFreeParserInputBuffer(buf); 12388: return(NULL); 12389: } 12390: ctxt->dictNames = 1; 12391: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12392: if (ctxt->pushTab == NULL) { 12393: xmlErrMemory(ctxt, NULL); 12394: xmlFreeParserInputBuffer(buf); 12395: xmlFreeParserCtxt(ctxt); 12396: return(NULL); 12397: } 12398: if (sax != NULL) { 12399: #ifdef LIBXML_SAX1_ENABLED 12400: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12401: #endif /* LIBXML_SAX1_ENABLED */ 12402: xmlFree(ctxt->sax); 12403: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12404: if (ctxt->sax == NULL) { 12405: xmlErrMemory(ctxt, NULL); 12406: xmlFreeParserInputBuffer(buf); 12407: xmlFreeParserCtxt(ctxt); 12408: return(NULL); 12409: } 12410: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12411: if (sax->initialized == XML_SAX2_MAGIC) 12412: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12413: else 12414: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12415: if (user_data != NULL) 12416: ctxt->userData = user_data; 12417: } 12418: if (filename == NULL) { 12419: ctxt->directory = NULL; 12420: } else { 12421: ctxt->directory = xmlParserGetDirectory(filename); 12422: } 12423: 12424: inputStream = xmlNewInputStream(ctxt); 12425: if (inputStream == NULL) { 12426: xmlFreeParserCtxt(ctxt); 12427: xmlFreeParserInputBuffer(buf); 12428: return(NULL); 12429: } 12430: 12431: if (filename == NULL) 12432: inputStream->filename = NULL; 12433: else { 12434: inputStream->filename = (char *) 12435: xmlCanonicPath((const xmlChar *) filename); 12436: if (inputStream->filename == NULL) { 12437: xmlFreeParserCtxt(ctxt); 12438: xmlFreeParserInputBuffer(buf); 12439: return(NULL); 12440: } 12441: } 12442: inputStream->buf = buf; 12443: xmlBufResetInput(inputStream->buf->buffer, inputStream); 12444: inputPush(ctxt, inputStream); 12445: 12446: /* 12447: * If the caller didn't provide an initial 'chunk' for determining 12448: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12449: * that it can be automatically determined later 12450: */ 12451: if ((size == 0) || (chunk == NULL)) { 12452: ctxt->charset = XML_CHAR_ENCODING_NONE; 12453: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12454: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12455: size_t cur = ctxt->input->cur - ctxt->input->base; 12456: 12457: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12458: 12459: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12460: #ifdef DEBUG_PUSH 12461: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12462: #endif 12463: } 12464: 12465: if (enc != XML_CHAR_ENCODING_NONE) { 12466: xmlSwitchEncoding(ctxt, enc); 12467: } 12468: 12469: return(ctxt); 12470: } 12471: #endif /* LIBXML_PUSH_ENABLED */ 12472: 12473: /** 12474: * xmlStopParser: 12475: * @ctxt: an XML parser context 12476: * 12477: * Blocks further parser processing 12478: */ 12479: void 12480: xmlStopParser(xmlParserCtxtPtr ctxt) { 12481: if (ctxt == NULL) 12482: return; 12483: ctxt->instate = XML_PARSER_EOF; 12484: ctxt->errNo = XML_ERR_USER_STOP; 12485: ctxt->disableSAX = 1; 12486: if (ctxt->input != NULL) { 12487: ctxt->input->cur = BAD_CAST""; 12488: ctxt->input->base = ctxt->input->cur; 12489: } 12490: } 12491: 12492: /** 12493: * xmlCreateIOParserCtxt: 12494: * @sax: a SAX handler 12495: * @user_data: The user data returned on SAX callbacks 12496: * @ioread: an I/O read function 12497: * @ioclose: an I/O close function 12498: * @ioctx: an I/O handler 12499: * @enc: the charset encoding if known 12500: * 12501: * Create a parser context for using the XML parser with an existing 12502: * I/O stream 12503: * 12504: * Returns the new parser context or NULL 12505: */ 12506: xmlParserCtxtPtr 12507: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12508: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12509: void *ioctx, xmlCharEncoding enc) { 12510: xmlParserCtxtPtr ctxt; 12511: xmlParserInputPtr inputStream; 12512: xmlParserInputBufferPtr buf; 12513: 12514: if (ioread == NULL) return(NULL); 12515: 12516: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12517: if (buf == NULL) { 12518: if (ioclose != NULL) 12519: ioclose(ioctx); 12520: return (NULL); 12521: } 12522: 12523: ctxt = xmlNewParserCtxt(); 12524: if (ctxt == NULL) { 12525: xmlFreeParserInputBuffer(buf); 12526: return(NULL); 12527: } 12528: if (sax != NULL) { 12529: #ifdef LIBXML_SAX1_ENABLED 12530: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12531: #endif /* LIBXML_SAX1_ENABLED */ 12532: xmlFree(ctxt->sax); 12533: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12534: if (ctxt->sax == NULL) { 12535: xmlErrMemory(ctxt, NULL); 12536: xmlFreeParserCtxt(ctxt); 12537: return(NULL); 12538: } 12539: memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12540: if (sax->initialized == XML_SAX2_MAGIC) 12541: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12542: else 12543: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12544: if (user_data != NULL) 12545: ctxt->userData = user_data; 12546: } 12547: 12548: inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12549: if (inputStream == NULL) { 12550: xmlFreeParserCtxt(ctxt); 12551: return(NULL); 12552: } 12553: inputPush(ctxt, inputStream); 12554: 12555: return(ctxt); 12556: } 12557: 12558: #ifdef LIBXML_VALID_ENABLED 12559: /************************************************************************ 12560: * * 12561: * Front ends when parsing a DTD * 12562: * * 12563: ************************************************************************/ 12564: 12565: /** 12566: * xmlIOParseDTD: 12567: * @sax: the SAX handler block or NULL 12568: * @input: an Input Buffer 12569: * @enc: the charset encoding if known 12570: * 12571: * Load and parse a DTD 12572: * 12573: * Returns the resulting xmlDtdPtr or NULL in case of error. 12574: * @input will be freed by the function in any case. 12575: */ 12576: 12577: xmlDtdPtr 12578: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12579: xmlCharEncoding enc) { 12580: xmlDtdPtr ret = NULL; 12581: xmlParserCtxtPtr ctxt; 12582: xmlParserInputPtr pinput = NULL; 12583: xmlChar start[4]; 12584: 12585: if (input == NULL) 12586: return(NULL); 12587: 12588: ctxt = xmlNewParserCtxt(); 12589: if (ctxt == NULL) { 12590: xmlFreeParserInputBuffer(input); 12591: return(NULL); 12592: } 12593: 12594: /* 12595: * Set-up the SAX context 12596: */ 12597: if (sax != NULL) { 12598: if (ctxt->sax != NULL) 12599: xmlFree(ctxt->sax); 12600: ctxt->sax = sax; 12601: ctxt->userData = ctxt; 12602: } 12603: xmlDetectSAX2(ctxt); 12604: 12605: /* 12606: * generate a parser input from the I/O handler 12607: */ 12608: 12609: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12610: if (pinput == NULL) { 12611: if (sax != NULL) ctxt->sax = NULL; 12612: xmlFreeParserInputBuffer(input); 12613: xmlFreeParserCtxt(ctxt); 12614: return(NULL); 12615: } 12616: 12617: /* 12618: * plug some encoding conversion routines here. 12619: */ 12620: if (xmlPushInput(ctxt, pinput) < 0) { 12621: if (sax != NULL) ctxt->sax = NULL; 12622: xmlFreeParserCtxt(ctxt); 12623: return(NULL); 12624: } 12625: if (enc != XML_CHAR_ENCODING_NONE) { 12626: xmlSwitchEncoding(ctxt, enc); 12627: } 12628: 12629: pinput->filename = NULL; 12630: pinput->line = 1; 12631: pinput->col = 1; 12632: pinput->base = ctxt->input->cur; 12633: pinput->cur = ctxt->input->cur; 12634: pinput->free = NULL; 12635: 12636: /* 12637: * let's parse that entity knowing it's an external subset. 12638: */ 12639: ctxt->inSubset = 2; 12640: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12641: if (ctxt->myDoc == NULL) { 12642: xmlErrMemory(ctxt, "New Doc failed"); 12643: return(NULL); 12644: } 12645: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12646: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12647: BAD_CAST "none", BAD_CAST "none"); 12648: 12649: if ((enc == XML_CHAR_ENCODING_NONE) && 12650: ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12651: /* 12652: * Get the 4 first bytes and decode the charset 12653: * if enc != XML_CHAR_ENCODING_NONE 12654: * plug some encoding conversion routines. 12655: */ 12656: start[0] = RAW; 12657: start[1] = NXT(1); 12658: start[2] = NXT(2); 12659: start[3] = NXT(3); 12660: enc = xmlDetectCharEncoding(start, 4); 12661: if (enc != XML_CHAR_ENCODING_NONE) { 12662: xmlSwitchEncoding(ctxt, enc); 12663: } 12664: } 12665: 12666: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12667: 12668: if (ctxt->myDoc != NULL) { 12669: if (ctxt->wellFormed) { 12670: ret = ctxt->myDoc->extSubset; 12671: ctxt->myDoc->extSubset = NULL; 12672: if (ret != NULL) { 12673: xmlNodePtr tmp; 12674: 12675: ret->doc = NULL; 12676: tmp = ret->children; 12677: while (tmp != NULL) { 12678: tmp->doc = NULL; 12679: tmp = tmp->next; 12680: } 12681: } 12682: } else { 12683: ret = NULL; 12684: } 12685: xmlFreeDoc(ctxt->myDoc); 12686: ctxt->myDoc = NULL; 12687: } 12688: if (sax != NULL) ctxt->sax = NULL; 12689: xmlFreeParserCtxt(ctxt); 12690: 12691: return(ret); 12692: } 12693: 12694: /** 12695: * xmlSAXParseDTD: 12696: * @sax: the SAX handler block 12697: * @ExternalID: a NAME* containing the External ID of the DTD 12698: * @SystemID: a NAME* containing the URL to the DTD 12699: * 12700: * Load and parse an external subset. 12701: * 12702: * Returns the resulting xmlDtdPtr or NULL in case of error. 12703: */ 12704: 12705: xmlDtdPtr 12706: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12707: const xmlChar *SystemID) { 12708: xmlDtdPtr ret = NULL; 12709: xmlParserCtxtPtr ctxt; 12710: xmlParserInputPtr input = NULL; 12711: xmlCharEncoding enc; 12712: xmlChar* systemIdCanonic; 12713: 12714: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12715: 12716: ctxt = xmlNewParserCtxt(); 12717: if (ctxt == NULL) { 12718: return(NULL); 12719: } 12720: 12721: /* 12722: * Set-up the SAX context 12723: */ 12724: if (sax != NULL) { 12725: if (ctxt->sax != NULL) 12726: xmlFree(ctxt->sax); 12727: ctxt->sax = sax; 12728: ctxt->userData = ctxt; 12729: } 12730: 12731: /* 12732: * Canonicalise the system ID 12733: */ 12734: systemIdCanonic = xmlCanonicPath(SystemID); 12735: if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12736: xmlFreeParserCtxt(ctxt); 12737: return(NULL); 12738: } 12739: 12740: /* 12741: * Ask the Entity resolver to load the damn thing 12742: */ 12743: 12744: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12745: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12746: systemIdCanonic); 12747: if (input == NULL) { 12748: if (sax != NULL) ctxt->sax = NULL; 12749: xmlFreeParserCtxt(ctxt); 12750: if (systemIdCanonic != NULL) 12751: xmlFree(systemIdCanonic); 12752: return(NULL); 12753: } 12754: 12755: /* 12756: * plug some encoding conversion routines here. 12757: */ 12758: if (xmlPushInput(ctxt, input) < 0) { 12759: if (sax != NULL) ctxt->sax = NULL; 12760: xmlFreeParserCtxt(ctxt); 12761: if (systemIdCanonic != NULL) 12762: xmlFree(systemIdCanonic); 12763: return(NULL); 12764: } 12765: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12766: enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12767: xmlSwitchEncoding(ctxt, enc); 12768: } 12769: 12770: if (input->filename == NULL) 12771: input->filename = (char *) systemIdCanonic; 12772: else 12773: xmlFree(systemIdCanonic); 12774: input->line = 1; 12775: input->col = 1; 12776: input->base = ctxt->input->cur; 12777: input->cur = ctxt->input->cur; 12778: input->free = NULL; 12779: 12780: /* 12781: * let's parse that entity knowing it's an external subset. 12782: */ 12783: ctxt->inSubset = 2; 12784: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12785: if (ctxt->myDoc == NULL) { 12786: xmlErrMemory(ctxt, "New Doc failed"); 12787: if (sax != NULL) ctxt->sax = NULL; 12788: xmlFreeParserCtxt(ctxt); 12789: return(NULL); 12790: } 12791: ctxt->myDoc->properties = XML_DOC_INTERNAL; 12792: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12793: ExternalID, SystemID); 12794: xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12795: 12796: if (ctxt->myDoc != NULL) { 12797: if (ctxt->wellFormed) { 12798: ret = ctxt->myDoc->extSubset; 12799: ctxt->myDoc->extSubset = NULL; 12800: if (ret != NULL) { 12801: xmlNodePtr tmp; 12802: 12803: ret->doc = NULL; 12804: tmp = ret->children; 12805: while (tmp != NULL) { 12806: tmp->doc = NULL; 12807: tmp = tmp->next; 12808: } 12809: } 12810: } else { 12811: ret = NULL; 12812: } 12813: xmlFreeDoc(ctxt->myDoc); 12814: ctxt->myDoc = NULL; 12815: } 12816: if (sax != NULL) ctxt->sax = NULL; 12817: xmlFreeParserCtxt(ctxt); 12818: 12819: return(ret); 12820: } 12821: 12822: 12823: /** 12824: * xmlParseDTD: 12825: * @ExternalID: a NAME* containing the External ID of the DTD 12826: * @SystemID: a NAME* containing the URL to the DTD 12827: * 12828: * Load and parse an external subset. 12829: * 12830: * Returns the resulting xmlDtdPtr or NULL in case of error. 12831: */ 12832: 12833: xmlDtdPtr 12834: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12835: return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12836: } 12837: #endif /* LIBXML_VALID_ENABLED */ 12838: 12839: /************************************************************************ 12840: * * 12841: * Front ends when parsing an Entity * 12842: * * 12843: ************************************************************************/ 12844: 12845: /** 12846: * xmlParseCtxtExternalEntity: 12847: * @ctx: the existing parsing context 12848: * @URL: the URL for the entity to load 12849: * @ID: the System ID for the entity to load 12850: * @lst: the return value for the set of parsed nodes 12851: * 12852: * Parse an external general entity within an existing parsing context 12853: * An external general parsed entity is well-formed if it matches the 12854: * production labeled extParsedEnt. 12855: * 12856: * [78] extParsedEnt ::= TextDecl? content 12857: * 12858: * Returns 0 if the entity is well formed, -1 in case of args problem and 12859: * the parser error code otherwise 12860: */ 12861: 12862: int 12863: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12864: const xmlChar *ID, xmlNodePtr *lst) { 12865: xmlParserCtxtPtr ctxt; 12866: xmlDocPtr newDoc; 12867: xmlNodePtr newRoot; 12868: xmlSAXHandlerPtr oldsax = NULL; 12869: int ret = 0; 12870: xmlChar start[4]; 12871: xmlCharEncoding enc; 12872: 12873: if (ctx == NULL) return(-1); 12874: 12875: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12876: (ctx->depth > 1024)) { 12877: return(XML_ERR_ENTITY_LOOP); 12878: } 12879: 12880: if (lst != NULL) 12881: *lst = NULL; 12882: if ((URL == NULL) && (ID == NULL)) 12883: return(-1); 12884: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12885: return(-1); 12886: 12887: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12888: if (ctxt == NULL) { 12889: return(-1); 12890: } 12891: 12892: oldsax = ctxt->sax; 12893: ctxt->sax = ctx->sax; 12894: xmlDetectSAX2(ctxt); 12895: newDoc = xmlNewDoc(BAD_CAST "1.0"); 12896: if (newDoc == NULL) { 12897: xmlFreeParserCtxt(ctxt); 12898: return(-1); 12899: } 12900: newDoc->properties = XML_DOC_INTERNAL; 12901: if (ctx->myDoc->dict) { 12902: newDoc->dict = ctx->myDoc->dict; 12903: xmlDictReference(newDoc->dict); 12904: } 12905: if (ctx->myDoc != NULL) { 12906: newDoc->intSubset = ctx->myDoc->intSubset; 12907: newDoc->extSubset = ctx->myDoc->extSubset; 12908: } 12909: if (ctx->myDoc->URL != NULL) { 12910: newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12911: } 12912: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12913: if (newRoot == NULL) { 12914: ctxt->sax = oldsax; 12915: xmlFreeParserCtxt(ctxt); 12916: newDoc->intSubset = NULL; 12917: newDoc->extSubset = NULL; 12918: xmlFreeDoc(newDoc); 12919: return(-1); 12920: } 12921: xmlAddChild((xmlNodePtr) newDoc, newRoot); 12922: nodePush(ctxt, newDoc->children); 12923: if (ctx->myDoc == NULL) { 12924: ctxt->myDoc = newDoc; 12925: } else { 12926: ctxt->myDoc = ctx->myDoc; 12927: newDoc->children->doc = ctx->myDoc; 12928: } 12929: 12930: /* 12931: * Get the 4 first bytes and decode the charset 12932: * if enc != XML_CHAR_ENCODING_NONE 12933: * plug some encoding conversion routines. 12934: */ 12935: GROW 12936: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12937: start[0] = RAW; 12938: start[1] = NXT(1); 12939: start[2] = NXT(2); 12940: start[3] = NXT(3); 12941: enc = xmlDetectCharEncoding(start, 4); 12942: if (enc != XML_CHAR_ENCODING_NONE) { 12943: xmlSwitchEncoding(ctxt, enc); 12944: } 12945: } 12946: 12947: /* 12948: * Parse a possible text declaration first 12949: */ 12950: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12951: xmlParseTextDecl(ctxt); 12952: /* 12953: * An XML-1.0 document can't reference an entity not XML-1.0 12954: */ 12955: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12956: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12957: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12958: "Version mismatch between document and entity\n"); 12959: } 12960: } 12961: 12962: /* 12963: * If the user provided its own SAX callbacks then reuse the 12964: * useData callback field, otherwise the expected setup in a 12965: * DOM builder is to have userData == ctxt 12966: */ 12967: if (ctx->userData == ctx) 12968: ctxt->userData = ctxt; 12969: else 12970: ctxt->userData = ctx->userData; 12971: 12972: /* 12973: * Doing validity checking on chunk doesn't make sense 12974: */ 12975: ctxt->instate = XML_PARSER_CONTENT; 12976: ctxt->validate = ctx->validate; 12977: ctxt->valid = ctx->valid; 12978: ctxt->loadsubset = ctx->loadsubset; 12979: ctxt->depth = ctx->depth + 1; 12980: ctxt->replaceEntities = ctx->replaceEntities; 12981: if (ctxt->validate) { 12982: ctxt->vctxt.error = ctx->vctxt.error; 12983: ctxt->vctxt.warning = ctx->vctxt.warning; 12984: } else { 12985: ctxt->vctxt.error = NULL; 12986: ctxt->vctxt.warning = NULL; 12987: } 12988: ctxt->vctxt.nodeTab = NULL; 12989: ctxt->vctxt.nodeNr = 0; 12990: ctxt->vctxt.nodeMax = 0; 12991: ctxt->vctxt.node = NULL; 12992: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12993: ctxt->dict = ctx->dict; 12994: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12995: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12996: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12997: ctxt->dictNames = ctx->dictNames; 12998: ctxt->attsDefault = ctx->attsDefault; 12999: ctxt->attsSpecial = ctx->attsSpecial; 13000: ctxt->linenumbers = ctx->linenumbers; 13001: 13002: xmlParseContent(ctxt); 13003: 13004: ctx->validate = ctxt->validate; 13005: ctx->valid = ctxt->valid; 13006: if ((RAW == '<') && (NXT(1) == '/')) { 13007: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13008: } else if (RAW != 0) { 13009: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13010: } 13011: if (ctxt->node != newDoc->children) { 13012: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13013: } 13014: 13015: if (!ctxt->wellFormed) { 13016: if (ctxt->errNo == 0) 13017: ret = 1; 13018: else 13019: ret = ctxt->errNo; 13020: } else { 13021: if (lst != NULL) { 13022: xmlNodePtr cur; 13023: 13024: /* 13025: * Return the newly created nodeset after unlinking it from 13026: * they pseudo parent. 13027: */ 13028: cur = newDoc->children->children; 13029: *lst = cur; 13030: while (cur != NULL) { 13031: cur->parent = NULL; 13032: cur = cur->next; 13033: } 13034: newDoc->children->children = NULL; 13035: } 13036: ret = 0; 13037: } 13038: ctxt->sax = oldsax; 13039: ctxt->dict = NULL; 13040: ctxt->attsDefault = NULL; 13041: ctxt->attsSpecial = NULL; 13042: xmlFreeParserCtxt(ctxt); 13043: newDoc->intSubset = NULL; 13044: newDoc->extSubset = NULL; 13045: xmlFreeDoc(newDoc); 13046: 13047: return(ret); 13048: } 13049: 13050: /** 13051: * xmlParseExternalEntityPrivate: 13052: * @doc: the document the chunk pertains to 13053: * @oldctxt: the previous parser context if available 13054: * @sax: the SAX handler bloc (possibly NULL) 13055: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13056: * @depth: Used for loop detection, use 0 13057: * @URL: the URL for the entity to load 13058: * @ID: the System ID for the entity to load 13059: * @list: the return value for the set of parsed nodes 13060: * 13061: * Private version of xmlParseExternalEntity() 13062: * 13063: * Returns 0 if the entity is well formed, -1 in case of args problem and 13064: * the parser error code otherwise 13065: */ 13066: 13067: static xmlParserErrors 13068: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13069: xmlSAXHandlerPtr sax, 13070: void *user_data, int depth, const xmlChar *URL, 13071: const xmlChar *ID, xmlNodePtr *list) { 13072: xmlParserCtxtPtr ctxt; 13073: xmlDocPtr newDoc; 13074: xmlNodePtr newRoot; 13075: xmlSAXHandlerPtr oldsax = NULL; 13076: xmlParserErrors ret = XML_ERR_OK; 13077: xmlChar start[4]; 13078: xmlCharEncoding enc; 13079: 13080: if (((depth > 40) && 13081: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13082: (depth > 1024)) { 13083: return(XML_ERR_ENTITY_LOOP); 13084: } 13085: 13086: if (list != NULL) 13087: *list = NULL; 13088: if ((URL == NULL) && (ID == NULL)) 13089: return(XML_ERR_INTERNAL_ERROR); 13090: if (doc == NULL) 13091: return(XML_ERR_INTERNAL_ERROR); 13092: 13093: 13094: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13095: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13096: ctxt->userData = ctxt; 13097: if (oldctxt != NULL) { 13098: ctxt->_private = oldctxt->_private; 13099: ctxt->loadsubset = oldctxt->loadsubset; 13100: ctxt->validate = oldctxt->validate; 13101: ctxt->external = oldctxt->external; 13102: ctxt->record_info = oldctxt->record_info; 13103: ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13104: ctxt->node_seq.length = oldctxt->node_seq.length; 13105: ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13106: } else { 13107: /* 13108: * Doing validity checking on chunk without context 13109: * doesn't make sense 13110: */ 13111: ctxt->_private = NULL; 13112: ctxt->validate = 0; 13113: ctxt->external = 2; 13114: ctxt->loadsubset = 0; 13115: } 13116: if (sax != NULL) { 13117: oldsax = ctxt->sax; 13118: ctxt->sax = sax; 13119: if (user_data != NULL) 13120: ctxt->userData = user_data; 13121: } 13122: xmlDetectSAX2(ctxt); 13123: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13124: if (newDoc == NULL) { 13125: ctxt->node_seq.maximum = 0; 13126: ctxt->node_seq.length = 0; 13127: ctxt->node_seq.buffer = NULL; 13128: xmlFreeParserCtxt(ctxt); 13129: return(XML_ERR_INTERNAL_ERROR); 13130: } 13131: newDoc->properties = XML_DOC_INTERNAL; 13132: newDoc->intSubset = doc->intSubset; 13133: newDoc->extSubset = doc->extSubset; 13134: newDoc->dict = doc->dict; 13135: xmlDictReference(newDoc->dict); 13136: 13137: if (doc->URL != NULL) { 13138: newDoc->URL = xmlStrdup(doc->URL); 13139: } 13140: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13141: if (newRoot == NULL) { 13142: if (sax != NULL) 13143: ctxt->sax = oldsax; 13144: ctxt->node_seq.maximum = 0; 13145: ctxt->node_seq.length = 0; 13146: ctxt->node_seq.buffer = NULL; 13147: xmlFreeParserCtxt(ctxt); 13148: newDoc->intSubset = NULL; 13149: newDoc->extSubset = NULL; 13150: xmlFreeDoc(newDoc); 13151: return(XML_ERR_INTERNAL_ERROR); 13152: } 13153: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13154: nodePush(ctxt, newDoc->children); 13155: ctxt->myDoc = doc; 13156: newRoot->doc = doc; 13157: 13158: /* 13159: * Get the 4 first bytes and decode the charset 13160: * if enc != XML_CHAR_ENCODING_NONE 13161: * plug some encoding conversion routines. 13162: */ 13163: GROW; 13164: if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13165: start[0] = RAW; 13166: start[1] = NXT(1); 13167: start[2] = NXT(2); 13168: start[3] = NXT(3); 13169: enc = xmlDetectCharEncoding(start, 4); 13170: if (enc != XML_CHAR_ENCODING_NONE) { 13171: xmlSwitchEncoding(ctxt, enc); 13172: } 13173: } 13174: 13175: /* 13176: * Parse a possible text declaration first 13177: */ 13178: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13179: xmlParseTextDecl(ctxt); 13180: } 13181: 13182: ctxt->instate = XML_PARSER_CONTENT; 13183: ctxt->depth = depth; 13184: 13185: xmlParseContent(ctxt); 13186: 13187: if ((RAW == '<') && (NXT(1) == '/')) { 13188: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13189: } else if (RAW != 0) { 13190: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13191: } 13192: if (ctxt->node != newDoc->children) { 13193: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13194: } 13195: 13196: if (!ctxt->wellFormed) { 13197: if (ctxt->errNo == 0) 13198: ret = XML_ERR_INTERNAL_ERROR; 13199: else 13200: ret = (xmlParserErrors)ctxt->errNo; 13201: } else { 13202: if (list != NULL) { 13203: xmlNodePtr cur; 13204: 13205: /* 13206: * Return the newly created nodeset after unlinking it from 13207: * they pseudo parent. 13208: */ 13209: cur = newDoc->children->children; 13210: *list = cur; 13211: while (cur != NULL) { 13212: cur->parent = NULL; 13213: cur = cur->next; 13214: } 13215: newDoc->children->children = NULL; 13216: } 13217: ret = XML_ERR_OK; 13218: } 13219: 13220: /* 13221: * Record in the parent context the number of entities replacement 13222: * done when parsing that reference. 13223: */ 13224: if (oldctxt != NULL) 13225: oldctxt->nbentities += ctxt->nbentities; 13226: 13227: /* 13228: * Also record the size of the entity parsed 13229: */ 13230: if (ctxt->input != NULL) { 13231: oldctxt->sizeentities += ctxt->input->consumed; 13232: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13233: } 13234: /* 13235: * And record the last error if any 13236: */ 13237: if (ctxt->lastError.code != XML_ERR_OK) 13238: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13239: 13240: if (sax != NULL) 13241: ctxt->sax = oldsax; 13242: oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13243: oldctxt->node_seq.length = ctxt->node_seq.length; 13244: oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13245: ctxt->node_seq.maximum = 0; 13246: ctxt->node_seq.length = 0; 13247: ctxt->node_seq.buffer = NULL; 13248: xmlFreeParserCtxt(ctxt); 13249: newDoc->intSubset = NULL; 13250: newDoc->extSubset = NULL; 13251: xmlFreeDoc(newDoc); 13252: 13253: return(ret); 13254: } 13255: 13256: #ifdef LIBXML_SAX1_ENABLED 13257: /** 13258: * xmlParseExternalEntity: 13259: * @doc: the document the chunk pertains to 13260: * @sax: the SAX handler bloc (possibly NULL) 13261: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13262: * @depth: Used for loop detection, use 0 13263: * @URL: the URL for the entity to load 13264: * @ID: the System ID for the entity to load 13265: * @lst: the return value for the set of parsed nodes 13266: * 13267: * Parse an external general entity 13268: * An external general parsed entity is well-formed if it matches the 13269: * production labeled extParsedEnt. 13270: * 13271: * [78] extParsedEnt ::= TextDecl? content 13272: * 13273: * Returns 0 if the entity is well formed, -1 in case of args problem and 13274: * the parser error code otherwise 13275: */ 13276: 13277: int 13278: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13279: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13280: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13281: ID, lst)); 13282: } 13283: 13284: /** 13285: * xmlParseBalancedChunkMemory: 13286: * @doc: the document the chunk pertains to 13287: * @sax: the SAX handler bloc (possibly NULL) 13288: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13289: * @depth: Used for loop detection, use 0 13290: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13291: * @lst: the return value for the set of parsed nodes 13292: * 13293: * Parse a well-balanced chunk of an XML document 13294: * called by the parser 13295: * The allowed sequence for the Well Balanced Chunk is the one defined by 13296: * the content production in the XML grammar: 13297: * 13298: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13299: * 13300: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13301: * the parser error code otherwise 13302: */ 13303: 13304: int 13305: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13306: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13307: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13308: depth, string, lst, 0 ); 13309: } 13310: #endif /* LIBXML_SAX1_ENABLED */ 13311: 13312: /** 13313: * xmlParseBalancedChunkMemoryInternal: 13314: * @oldctxt: the existing parsing context 13315: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13316: * @user_data: the user data field for the parser context 13317: * @lst: the return value for the set of parsed nodes 13318: * 13319: * 13320: * Parse a well-balanced chunk of an XML document 13321: * called by the parser 13322: * The allowed sequence for the Well Balanced Chunk is the one defined by 13323: * the content production in the XML grammar: 13324: * 13325: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13326: * 13327: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13328: * error code otherwise 13329: * 13330: * In case recover is set to 1, the nodelist will not be empty even if 13331: * the parsed chunk is not well balanced. 13332: */ 13333: static xmlParserErrors 13334: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13335: const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13336: xmlParserCtxtPtr ctxt; 13337: xmlDocPtr newDoc = NULL; 13338: xmlNodePtr newRoot; 13339: xmlSAXHandlerPtr oldsax = NULL; 13340: xmlNodePtr content = NULL; 13341: xmlNodePtr last = NULL; 13342: int size; 13343: xmlParserErrors ret = XML_ERR_OK; 13344: #ifdef SAX2 13345: int i; 13346: #endif 13347: 13348: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13349: (oldctxt->depth > 1024)) { 13350: return(XML_ERR_ENTITY_LOOP); 13351: } 13352: 13353: 13354: if (lst != NULL) 13355: *lst = NULL; 13356: if (string == NULL) 13357: return(XML_ERR_INTERNAL_ERROR); 13358: 13359: size = xmlStrlen(string); 13360: 13361: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13362: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13363: if (user_data != NULL) 13364: ctxt->userData = user_data; 13365: else 13366: ctxt->userData = ctxt; 13367: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13368: ctxt->dict = oldctxt->dict; 13369: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13370: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13371: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13372: 13373: #ifdef SAX2 13374: /* propagate namespaces down the entity */ 13375: for (i = 0;i < oldctxt->nsNr;i += 2) { 13376: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13377: } 13378: #endif 13379: 13380: oldsax = ctxt->sax; 13381: ctxt->sax = oldctxt->sax; 13382: xmlDetectSAX2(ctxt); 13383: ctxt->replaceEntities = oldctxt->replaceEntities; 13384: ctxt->options = oldctxt->options; 13385: 13386: ctxt->_private = oldctxt->_private; 13387: if (oldctxt->myDoc == NULL) { 13388: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13389: if (newDoc == NULL) { 13390: ctxt->sax = oldsax; 13391: ctxt->dict = NULL; 13392: xmlFreeParserCtxt(ctxt); 13393: return(XML_ERR_INTERNAL_ERROR); 13394: } 13395: newDoc->properties = XML_DOC_INTERNAL; 13396: newDoc->dict = ctxt->dict; 13397: xmlDictReference(newDoc->dict); 13398: ctxt->myDoc = newDoc; 13399: } else { 13400: ctxt->myDoc = oldctxt->myDoc; 13401: content = ctxt->myDoc->children; 13402: last = ctxt->myDoc->last; 13403: } 13404: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13405: if (newRoot == NULL) { 13406: ctxt->sax = oldsax; 13407: ctxt->dict = NULL; 13408: xmlFreeParserCtxt(ctxt); 13409: if (newDoc != NULL) { 13410: xmlFreeDoc(newDoc); 13411: } 13412: return(XML_ERR_INTERNAL_ERROR); 13413: } 13414: ctxt->myDoc->children = NULL; 13415: ctxt->myDoc->last = NULL; 13416: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13417: nodePush(ctxt, ctxt->myDoc->children); 13418: ctxt->instate = XML_PARSER_CONTENT; 13419: ctxt->depth = oldctxt->depth + 1; 13420: 13421: ctxt->validate = 0; 13422: ctxt->loadsubset = oldctxt->loadsubset; 13423: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13424: /* 13425: * ID/IDREF registration will be done in xmlValidateElement below 13426: */ 13427: ctxt->loadsubset |= XML_SKIP_IDS; 13428: } 13429: ctxt->dictNames = oldctxt->dictNames; 13430: ctxt->attsDefault = oldctxt->attsDefault; 13431: ctxt->attsSpecial = oldctxt->attsSpecial; 13432: 13433: xmlParseContent(ctxt); 13434: if ((RAW == '<') && (NXT(1) == '/')) { 13435: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13436: } else if (RAW != 0) { 13437: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13438: } 13439: if (ctxt->node != ctxt->myDoc->children) { 13440: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13441: } 13442: 13443: if (!ctxt->wellFormed) { 13444: if (ctxt->errNo == 0) 13445: ret = XML_ERR_INTERNAL_ERROR; 13446: else 13447: ret = (xmlParserErrors)ctxt->errNo; 13448: } else { 13449: ret = XML_ERR_OK; 13450: } 13451: 13452: if ((lst != NULL) && (ret == XML_ERR_OK)) { 13453: xmlNodePtr cur; 13454: 13455: /* 13456: * Return the newly created nodeset after unlinking it from 13457: * they pseudo parent. 13458: */ 13459: cur = ctxt->myDoc->children->children; 13460: *lst = cur; 13461: while (cur != NULL) { 13462: #ifdef LIBXML_VALID_ENABLED 13463: if ((oldctxt->validate) && (oldctxt->wellFormed) && 13464: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13465: (cur->type == XML_ELEMENT_NODE)) { 13466: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13467: oldctxt->myDoc, cur); 13468: } 13469: #endif /* LIBXML_VALID_ENABLED */ 13470: cur->parent = NULL; 13471: cur = cur->next; 13472: } 13473: ctxt->myDoc->children->children = NULL; 13474: } 13475: if (ctxt->myDoc != NULL) { 13476: xmlFreeNode(ctxt->myDoc->children); 13477: ctxt->myDoc->children = content; 13478: ctxt->myDoc->last = last; 13479: } 13480: 13481: /* 13482: * Record in the parent context the number of entities replacement 13483: * done when parsing that reference. 13484: */ 13485: if (oldctxt != NULL) 13486: oldctxt->nbentities += ctxt->nbentities; 13487: 13488: /* 13489: * Also record the last error if any 13490: */ 13491: if (ctxt->lastError.code != XML_ERR_OK) 13492: xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13493: 13494: ctxt->sax = oldsax; 13495: ctxt->dict = NULL; 13496: ctxt->attsDefault = NULL; 13497: ctxt->attsSpecial = NULL; 13498: xmlFreeParserCtxt(ctxt); 13499: if (newDoc != NULL) { 13500: xmlFreeDoc(newDoc); 13501: } 13502: 13503: return(ret); 13504: } 13505: 13506: /** 13507: * xmlParseInNodeContext: 13508: * @node: the context node 13509: * @data: the input string 13510: * @datalen: the input string length in bytes 13511: * @options: a combination of xmlParserOption 13512: * @lst: the return value for the set of parsed nodes 13513: * 13514: * Parse a well-balanced chunk of an XML document 13515: * within the context (DTD, namespaces, etc ...) of the given node. 13516: * 13517: * The allowed sequence for the data is a Well Balanced Chunk defined by 13518: * the content production in the XML grammar: 13519: * 13520: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13521: * 13522: * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13523: * error code otherwise 13524: */ 13525: xmlParserErrors 13526: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13527: int options, xmlNodePtr *lst) { 13528: #ifdef SAX2 13529: xmlParserCtxtPtr ctxt; 13530: xmlDocPtr doc = NULL; 13531: xmlNodePtr fake, cur; 13532: int nsnr = 0; 13533: 13534: xmlParserErrors ret = XML_ERR_OK; 13535: 13536: /* 13537: * check all input parameters, grab the document 13538: */ 13539: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13540: return(XML_ERR_INTERNAL_ERROR); 13541: switch (node->type) { 13542: case XML_ELEMENT_NODE: 13543: case XML_ATTRIBUTE_NODE: 13544: case XML_TEXT_NODE: 13545: case XML_CDATA_SECTION_NODE: 13546: case XML_ENTITY_REF_NODE: 13547: case XML_PI_NODE: 13548: case XML_COMMENT_NODE: 13549: case XML_DOCUMENT_NODE: 13550: case XML_HTML_DOCUMENT_NODE: 13551: break; 13552: default: 13553: return(XML_ERR_INTERNAL_ERROR); 13554: 13555: } 13556: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13557: (node->type != XML_DOCUMENT_NODE) && 13558: (node->type != XML_HTML_DOCUMENT_NODE)) 13559: node = node->parent; 13560: if (node == NULL) 13561: return(XML_ERR_INTERNAL_ERROR); 13562: if (node->type == XML_ELEMENT_NODE) 13563: doc = node->doc; 13564: else 13565: doc = (xmlDocPtr) node; 13566: if (doc == NULL) 13567: return(XML_ERR_INTERNAL_ERROR); 13568: 13569: /* 13570: * allocate a context and set-up everything not related to the 13571: * node position in the tree 13572: */ 13573: if (doc->type == XML_DOCUMENT_NODE) 13574: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13575: #ifdef LIBXML_HTML_ENABLED 13576: else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13577: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13578: /* 13579: * When parsing in context, it makes no sense to add implied 13580: * elements like html/body/etc... 13581: */ 13582: options |= HTML_PARSE_NOIMPLIED; 13583: } 13584: #endif 13585: else 13586: return(XML_ERR_INTERNAL_ERROR); 13587: 13588: if (ctxt == NULL) 13589: return(XML_ERR_NO_MEMORY); 13590: 13591: /* 13592: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13593: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13594: * we must wait until the last moment to free the original one. 13595: */ 13596: if (doc->dict != NULL) { 13597: if (ctxt->dict != NULL) 13598: xmlDictFree(ctxt->dict); 13599: ctxt->dict = doc->dict; 13600: } else 13601: options |= XML_PARSE_NODICT; 13602: 13603: if (doc->encoding != NULL) { 13604: xmlCharEncodingHandlerPtr hdlr; 13605: 13606: if (ctxt->encoding != NULL) 13607: xmlFree((xmlChar *) ctxt->encoding); 13608: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13609: 13610: hdlr = xmlFindCharEncodingHandler(doc->encoding); 13611: if (hdlr != NULL) { 13612: xmlSwitchToEncoding(ctxt, hdlr); 13613: } else { 13614: return(XML_ERR_UNSUPPORTED_ENCODING); 13615: } 13616: } 13617: 13618: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13619: xmlDetectSAX2(ctxt); 13620: ctxt->myDoc = doc; 13621: 13622: fake = xmlNewComment(NULL); 13623: if (fake == NULL) { 13624: xmlFreeParserCtxt(ctxt); 13625: return(XML_ERR_NO_MEMORY); 13626: } 13627: xmlAddChild(node, fake); 13628: 13629: if (node->type == XML_ELEMENT_NODE) { 13630: nodePush(ctxt, node); 13631: /* 13632: * initialize the SAX2 namespaces stack 13633: */ 13634: cur = node; 13635: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13636: xmlNsPtr ns = cur->nsDef; 13637: const xmlChar *iprefix, *ihref; 13638: 13639: while (ns != NULL) { 13640: if (ctxt->dict) { 13641: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13642: ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13643: } else { 13644: iprefix = ns->prefix; 13645: ihref = ns->href; 13646: } 13647: 13648: if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13649: nsPush(ctxt, iprefix, ihref); 13650: nsnr++; 13651: } 13652: ns = ns->next; 13653: } 13654: cur = cur->parent; 13655: } 13656: ctxt->instate = XML_PARSER_CONTENT; 13657: } 13658: 13659: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13660: /* 13661: * ID/IDREF registration will be done in xmlValidateElement below 13662: */ 13663: ctxt->loadsubset |= XML_SKIP_IDS; 13664: } 13665: 13666: #ifdef LIBXML_HTML_ENABLED 13667: if (doc->type == XML_HTML_DOCUMENT_NODE) 13668: __htmlParseContent(ctxt); 13669: else 13670: #endif 13671: xmlParseContent(ctxt); 13672: 13673: nsPop(ctxt, nsnr); 13674: if ((RAW == '<') && (NXT(1) == '/')) { 13675: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13676: } else if (RAW != 0) { 13677: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13678: } 13679: if ((ctxt->node != NULL) && (ctxt->node != node)) { 13680: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13681: ctxt->wellFormed = 0; 13682: } 13683: 13684: if (!ctxt->wellFormed) { 13685: if (ctxt->errNo == 0) 13686: ret = XML_ERR_INTERNAL_ERROR; 13687: else 13688: ret = (xmlParserErrors)ctxt->errNo; 13689: } else { 13690: ret = XML_ERR_OK; 13691: } 13692: 13693: /* 13694: * Return the newly created nodeset after unlinking it from 13695: * the pseudo sibling. 13696: */ 13697: 13698: cur = fake->next; 13699: fake->next = NULL; 13700: node->last = fake; 13701: 13702: if (cur != NULL) { 13703: cur->prev = NULL; 13704: } 13705: 13706: *lst = cur; 13707: 13708: while (cur != NULL) { 13709: cur->parent = NULL; 13710: cur = cur->next; 13711: } 13712: 13713: xmlUnlinkNode(fake); 13714: xmlFreeNode(fake); 13715: 13716: 13717: if (ret != XML_ERR_OK) { 13718: xmlFreeNodeList(*lst); 13719: *lst = NULL; 13720: } 13721: 13722: if (doc->dict != NULL) 13723: ctxt->dict = NULL; 13724: xmlFreeParserCtxt(ctxt); 13725: 13726: return(ret); 13727: #else /* !SAX2 */ 13728: return(XML_ERR_INTERNAL_ERROR); 13729: #endif 13730: } 13731: 13732: #ifdef LIBXML_SAX1_ENABLED 13733: /** 13734: * xmlParseBalancedChunkMemoryRecover: 13735: * @doc: the document the chunk pertains to 13736: * @sax: the SAX handler bloc (possibly NULL) 13737: * @user_data: The user data returned on SAX callbacks (possibly NULL) 13738: * @depth: Used for loop detection, use 0 13739: * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13740: * @lst: the return value for the set of parsed nodes 13741: * @recover: return nodes even if the data is broken (use 0) 13742: * 13743: * 13744: * Parse a well-balanced chunk of an XML document 13745: * called by the parser 13746: * The allowed sequence for the Well Balanced Chunk is the one defined by 13747: * the content production in the XML grammar: 13748: * 13749: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13750: * 13751: * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13752: * the parser error code otherwise 13753: * 13754: * In case recover is set to 1, the nodelist will not be empty even if 13755: * the parsed chunk is not well balanced, assuming the parsing succeeded to 13756: * some extent. 13757: */ 13758: int 13759: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13760: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13761: int recover) { 13762: xmlParserCtxtPtr ctxt; 13763: xmlDocPtr newDoc; 13764: xmlSAXHandlerPtr oldsax = NULL; 13765: xmlNodePtr content, newRoot; 13766: int size; 13767: int ret = 0; 13768: 13769: if (depth > 40) { 13770: return(XML_ERR_ENTITY_LOOP); 13771: } 13772: 13773: 13774: if (lst != NULL) 13775: *lst = NULL; 13776: if (string == NULL) 13777: return(-1); 13778: 13779: size = xmlStrlen(string); 13780: 13781: ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13782: if (ctxt == NULL) return(-1); 13783: ctxt->userData = ctxt; 13784: if (sax != NULL) { 13785: oldsax = ctxt->sax; 13786: ctxt->sax = sax; 13787: if (user_data != NULL) 13788: ctxt->userData = user_data; 13789: } 13790: newDoc = xmlNewDoc(BAD_CAST "1.0"); 13791: if (newDoc == NULL) { 13792: xmlFreeParserCtxt(ctxt); 13793: return(-1); 13794: } 13795: newDoc->properties = XML_DOC_INTERNAL; 13796: if ((doc != NULL) && (doc->dict != NULL)) { 13797: xmlDictFree(ctxt->dict); 13798: ctxt->dict = doc->dict; 13799: xmlDictReference(ctxt->dict); 13800: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13801: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13802: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13803: ctxt->dictNames = 1; 13804: } else { 13805: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13806: } 13807: if (doc != NULL) { 13808: newDoc->intSubset = doc->intSubset; 13809: newDoc->extSubset = doc->extSubset; 13810: } 13811: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13812: if (newRoot == NULL) { 13813: if (sax != NULL) 13814: ctxt->sax = oldsax; 13815: xmlFreeParserCtxt(ctxt); 13816: newDoc->intSubset = NULL; 13817: newDoc->extSubset = NULL; 13818: xmlFreeDoc(newDoc); 13819: return(-1); 13820: } 13821: xmlAddChild((xmlNodePtr) newDoc, newRoot); 13822: nodePush(ctxt, newRoot); 13823: if (doc == NULL) { 13824: ctxt->myDoc = newDoc; 13825: } else { 13826: ctxt->myDoc = newDoc; 13827: newDoc->children->doc = doc; 13828: /* Ensure that doc has XML spec namespace */ 13829: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13830: newDoc->oldNs = doc->oldNs; 13831: } 13832: ctxt->instate = XML_PARSER_CONTENT; 13833: ctxt->depth = depth; 13834: 13835: /* 13836: * Doing validity checking on chunk doesn't make sense 13837: */ 13838: ctxt->validate = 0; 13839: ctxt->loadsubset = 0; 13840: xmlDetectSAX2(ctxt); 13841: 13842: if ( doc != NULL ){ 13843: content = doc->children; 13844: doc->children = NULL; 13845: xmlParseContent(ctxt); 13846: doc->children = content; 13847: } 13848: else { 13849: xmlParseContent(ctxt); 13850: } 13851: if ((RAW == '<') && (NXT(1) == '/')) { 13852: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13853: } else if (RAW != 0) { 13854: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13855: } 13856: if (ctxt->node != newDoc->children) { 13857: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13858: } 13859: 13860: if (!ctxt->wellFormed) { 13861: if (ctxt->errNo == 0) 13862: ret = 1; 13863: else 13864: ret = ctxt->errNo; 13865: } else { 13866: ret = 0; 13867: } 13868: 13869: if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13870: xmlNodePtr cur; 13871: 13872: /* 13873: * Return the newly created nodeset after unlinking it from 13874: * they pseudo parent. 13875: */ 13876: cur = newDoc->children->children; 13877: *lst = cur; 13878: while (cur != NULL) { 13879: xmlSetTreeDoc(cur, doc); 13880: cur->parent = NULL; 13881: cur = cur->next; 13882: } 13883: newDoc->children->children = NULL; 13884: } 13885: 13886: if (sax != NULL) 13887: ctxt->sax = oldsax; 13888: xmlFreeParserCtxt(ctxt); 13889: newDoc->intSubset = NULL; 13890: newDoc->extSubset = NULL; 13891: newDoc->oldNs = NULL; 13892: xmlFreeDoc(newDoc); 13893: 13894: return(ret); 13895: } 13896: 13897: /** 13898: * xmlSAXParseEntity: 13899: * @sax: the SAX handler block 13900: * @filename: the filename 13901: * 13902: * parse an XML external entity out of context and build a tree. 13903: * It use the given SAX function block to handle the parsing callback. 13904: * If sax is NULL, fallback to the default DOM tree building routines. 13905: * 13906: * [78] extParsedEnt ::= TextDecl? content 13907: * 13908: * This correspond to a "Well Balanced" chunk 13909: * 13910: * Returns the resulting document tree 13911: */ 13912: 13913: xmlDocPtr 13914: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13915: xmlDocPtr ret; 13916: xmlParserCtxtPtr ctxt; 13917: 13918: ctxt = xmlCreateFileParserCtxt(filename); 13919: if (ctxt == NULL) { 13920: return(NULL); 13921: } 13922: if (sax != NULL) { 13923: if (ctxt->sax != NULL) 13924: xmlFree(ctxt->sax); 13925: ctxt->sax = sax; 13926: ctxt->userData = NULL; 13927: } 13928: 13929: xmlParseExtParsedEnt(ctxt); 13930: 13931: if (ctxt->wellFormed) 13932: ret = ctxt->myDoc; 13933: else { 13934: ret = NULL; 13935: xmlFreeDoc(ctxt->myDoc); 13936: ctxt->myDoc = NULL; 13937: } 13938: if (sax != NULL) 13939: ctxt->sax = NULL; 13940: xmlFreeParserCtxt(ctxt); 13941: 13942: return(ret); 13943: } 13944: 13945: /** 13946: * xmlParseEntity: 13947: * @filename: the filename 13948: * 13949: * parse an XML external entity out of context and build a tree. 13950: * 13951: * [78] extParsedEnt ::= TextDecl? content 13952: * 13953: * This correspond to a "Well Balanced" chunk 13954: * 13955: * Returns the resulting document tree 13956: */ 13957: 13958: xmlDocPtr 13959: xmlParseEntity(const char *filename) { 13960: return(xmlSAXParseEntity(NULL, filename)); 13961: } 13962: #endif /* LIBXML_SAX1_ENABLED */ 13963: 13964: /** 13965: * xmlCreateEntityParserCtxtInternal: 13966: * @URL: the entity URL 13967: * @ID: the entity PUBLIC ID 13968: * @base: a possible base for the target URI 13969: * @pctx: parser context used to set options on new context 13970: * 13971: * Create a parser context for an external entity 13972: * Automatic support for ZLIB/Compress compressed document is provided 13973: * by default if found at compile-time. 13974: * 13975: * Returns the new parser context or NULL 13976: */ 13977: static xmlParserCtxtPtr 13978: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13979: const xmlChar *base, xmlParserCtxtPtr pctx) { 13980: xmlParserCtxtPtr ctxt; 13981: xmlParserInputPtr inputStream; 13982: char *directory = NULL; 13983: xmlChar *uri; 13984: 13985: ctxt = xmlNewParserCtxt(); 13986: if (ctxt == NULL) { 13987: return(NULL); 13988: } 13989: 13990: if (pctx != NULL) { 13991: ctxt->options = pctx->options; 13992: ctxt->_private = pctx->_private; 13993: } 13994: 13995: uri = xmlBuildURI(URL, base); 13996: 13997: if (uri == NULL) { 13998: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13999: if (inputStream == NULL) { 14000: xmlFreeParserCtxt(ctxt); 14001: return(NULL); 14002: } 14003: 14004: inputPush(ctxt, inputStream); 14005: 14006: if ((ctxt->directory == NULL) && (directory == NULL)) 14007: directory = xmlParserGetDirectory((char *)URL); 14008: if ((ctxt->directory == NULL) && (directory != NULL)) 14009: ctxt->directory = directory; 14010: } else { 14011: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14012: if (inputStream == NULL) { 14013: xmlFree(uri); 14014: xmlFreeParserCtxt(ctxt); 14015: return(NULL); 14016: } 14017: 14018: inputPush(ctxt, inputStream); 14019: 14020: if ((ctxt->directory == NULL) && (directory == NULL)) 14021: directory = xmlParserGetDirectory((char *)uri); 14022: if ((ctxt->directory == NULL) && (directory != NULL)) 14023: ctxt->directory = directory; 14024: xmlFree(uri); 14025: } 14026: return(ctxt); 14027: } 14028: 14029: /** 14030: * xmlCreateEntityParserCtxt: 14031: * @URL: the entity URL 14032: * @ID: the entity PUBLIC ID 14033: * @base: a possible base for the target URI 14034: * 14035: * Create a parser context for an external entity 14036: * Automatic support for ZLIB/Compress compressed document is provided 14037: * by default if found at compile-time. 14038: * 14039: * Returns the new parser context or NULL 14040: */ 14041: xmlParserCtxtPtr 14042: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14043: const xmlChar *base) { 14044: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14045: 14046: } 14047: 14048: /************************************************************************ 14049: * * 14050: * Front ends when parsing from a file * 14051: * * 14052: ************************************************************************/ 14053: 14054: /** 14055: * xmlCreateURLParserCtxt: 14056: * @filename: the filename or URL 14057: * @options: a combination of xmlParserOption 14058: * 14059: * Create a parser context for a file or URL content. 14060: * Automatic support for ZLIB/Compress compressed document is provided 14061: * by default if found at compile-time and for file accesses 14062: * 14063: * Returns the new parser context or NULL 14064: */ 14065: xmlParserCtxtPtr 14066: xmlCreateURLParserCtxt(const char *filename, int options) 14067: { 14068: xmlParserCtxtPtr ctxt; 14069: xmlParserInputPtr inputStream; 14070: char *directory = NULL; 14071: 14072: ctxt = xmlNewParserCtxt(); 14073: if (ctxt == NULL) { 14074: xmlErrMemory(NULL, "cannot allocate parser context"); 14075: return(NULL); 14076: } 14077: 14078: if (options) 14079: xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14080: ctxt->linenumbers = 1; 14081: 14082: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14083: if (inputStream == NULL) { 14084: xmlFreeParserCtxt(ctxt); 14085: return(NULL); 14086: } 14087: 14088: inputPush(ctxt, inputStream); 14089: if ((ctxt->directory == NULL) && (directory == NULL)) 14090: directory = xmlParserGetDirectory(filename); 14091: if ((ctxt->directory == NULL) && (directory != NULL)) 14092: ctxt->directory = directory; 14093: 14094: return(ctxt); 14095: } 14096: 14097: /** 14098: * xmlCreateFileParserCtxt: 14099: * @filename: the filename 14100: * 14101: * Create a parser context for a file content. 14102: * Automatic support for ZLIB/Compress compressed document is provided 14103: * by default if found at compile-time. 14104: * 14105: * Returns the new parser context or NULL 14106: */ 14107: xmlParserCtxtPtr 14108: xmlCreateFileParserCtxt(const char *filename) 14109: { 14110: return(xmlCreateURLParserCtxt(filename, 0)); 14111: } 14112: 14113: #ifdef LIBXML_SAX1_ENABLED 14114: /** 14115: * xmlSAXParseFileWithData: 14116: * @sax: the SAX handler block 14117: * @filename: the filename 14118: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14119: * documents 14120: * @data: the userdata 14121: * 14122: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14123: * compressed document is provided by default if found at compile-time. 14124: * It use the given SAX function block to handle the parsing callback. 14125: * If sax is NULL, fallback to the default DOM tree building routines. 14126: * 14127: * User data (void *) is stored within the parser context in the 14128: * context's _private member, so it is available nearly everywhere in libxml 14129: * 14130: * Returns the resulting document tree 14131: */ 14132: 14133: xmlDocPtr 14134: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14135: int recovery, void *data) { 14136: xmlDocPtr ret; 14137: xmlParserCtxtPtr ctxt; 14138: 14139: xmlInitParser(); 14140: 14141: ctxt = xmlCreateFileParserCtxt(filename); 14142: if (ctxt == NULL) { 14143: return(NULL); 14144: } 14145: if (sax != NULL) { 14146: if (ctxt->sax != NULL) 14147: xmlFree(ctxt->sax); 14148: ctxt->sax = sax; 14149: } 14150: xmlDetectSAX2(ctxt); 14151: if (data!=NULL) { 14152: ctxt->_private = data; 14153: } 14154: 14155: if (ctxt->directory == NULL) 14156: ctxt->directory = xmlParserGetDirectory(filename); 14157: 14158: ctxt->recovery = recovery; 14159: 14160: xmlParseDocument(ctxt); 14161: 14162: if ((ctxt->wellFormed) || recovery) { 14163: ret = ctxt->myDoc; 14164: if (ret != NULL) { 14165: if (ctxt->input->buf->compressed > 0) 14166: ret->compression = 9; 14167: else 14168: ret->compression = ctxt->input->buf->compressed; 14169: } 14170: } 14171: else { 14172: ret = NULL; 14173: xmlFreeDoc(ctxt->myDoc); 14174: ctxt->myDoc = NULL; 14175: } 14176: if (sax != NULL) 14177: ctxt->sax = NULL; 14178: xmlFreeParserCtxt(ctxt); 14179: 14180: return(ret); 14181: } 14182: 14183: /** 14184: * xmlSAXParseFile: 14185: * @sax: the SAX handler block 14186: * @filename: the filename 14187: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14188: * documents 14189: * 14190: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14191: * compressed document is provided by default if found at compile-time. 14192: * It use the given SAX function block to handle the parsing callback. 14193: * If sax is NULL, fallback to the default DOM tree building routines. 14194: * 14195: * Returns the resulting document tree 14196: */ 14197: 14198: xmlDocPtr 14199: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14200: int recovery) { 14201: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14202: } 14203: 14204: /** 14205: * xmlRecoverDoc: 14206: * @cur: a pointer to an array of xmlChar 14207: * 14208: * parse an XML in-memory document and build a tree. 14209: * In the case the document is not Well Formed, a attempt to build a 14210: * tree is tried anyway 14211: * 14212: * Returns the resulting document tree or NULL in case of failure 14213: */ 14214: 14215: xmlDocPtr 14216: xmlRecoverDoc(const xmlChar *cur) { 14217: return(xmlSAXParseDoc(NULL, cur, 1)); 14218: } 14219: 14220: /** 14221: * xmlParseFile: 14222: * @filename: the filename 14223: * 14224: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14225: * compressed document is provided by default if found at compile-time. 14226: * 14227: * Returns the resulting document tree if the file was wellformed, 14228: * NULL otherwise. 14229: */ 14230: 14231: xmlDocPtr 14232: xmlParseFile(const char *filename) { 14233: return(xmlSAXParseFile(NULL, filename, 0)); 14234: } 14235: 14236: /** 14237: * xmlRecoverFile: 14238: * @filename: the filename 14239: * 14240: * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14241: * compressed document is provided by default if found at compile-time. 14242: * In the case the document is not Well Formed, it attempts to build 14243: * a tree anyway 14244: * 14245: * Returns the resulting document tree or NULL in case of failure 14246: */ 14247: 14248: xmlDocPtr 14249: xmlRecoverFile(const char *filename) { 14250: return(xmlSAXParseFile(NULL, filename, 1)); 14251: } 14252: 14253: 14254: /** 14255: * xmlSetupParserForBuffer: 14256: * @ctxt: an XML parser context 14257: * @buffer: a xmlChar * buffer 14258: * @filename: a file name 14259: * 14260: * Setup the parser context to parse a new buffer; Clears any prior 14261: * contents from the parser context. The buffer parameter must not be 14262: * NULL, but the filename parameter can be 14263: */ 14264: void 14265: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14266: const char* filename) 14267: { 14268: xmlParserInputPtr input; 14269: 14270: if ((ctxt == NULL) || (buffer == NULL)) 14271: return; 14272: 14273: input = xmlNewInputStream(ctxt); 14274: if (input == NULL) { 14275: xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14276: xmlClearParserCtxt(ctxt); 14277: return; 14278: } 14279: 14280: xmlClearParserCtxt(ctxt); 14281: if (filename != NULL) 14282: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14283: input->base = buffer; 14284: input->cur = buffer; 14285: input->end = &buffer[xmlStrlen(buffer)]; 14286: inputPush(ctxt, input); 14287: } 14288: 14289: /** 14290: * xmlSAXUserParseFile: 14291: * @sax: a SAX handler 14292: * @user_data: The user data returned on SAX callbacks 14293: * @filename: a file name 14294: * 14295: * parse an XML file and call the given SAX handler routines. 14296: * Automatic support for ZLIB/Compress compressed document is provided 14297: * 14298: * Returns 0 in case of success or a error number otherwise 14299: */ 14300: int 14301: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14302: const char *filename) { 14303: int ret = 0; 14304: xmlParserCtxtPtr ctxt; 14305: 14306: ctxt = xmlCreateFileParserCtxt(filename); 14307: if (ctxt == NULL) return -1; 14308: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14309: xmlFree(ctxt->sax); 14310: ctxt->sax = sax; 14311: xmlDetectSAX2(ctxt); 14312: 14313: if (user_data != NULL) 14314: ctxt->userData = user_data; 14315: 14316: xmlParseDocument(ctxt); 14317: 14318: if (ctxt->wellFormed) 14319: ret = 0; 14320: else { 14321: if (ctxt->errNo != 0) 14322: ret = ctxt->errNo; 14323: else 14324: ret = -1; 14325: } 14326: if (sax != NULL) 14327: ctxt->sax = NULL; 14328: if (ctxt->myDoc != NULL) { 14329: xmlFreeDoc(ctxt->myDoc); 14330: ctxt->myDoc = NULL; 14331: } 14332: xmlFreeParserCtxt(ctxt); 14333: 14334: return ret; 14335: } 14336: #endif /* LIBXML_SAX1_ENABLED */ 14337: 14338: /************************************************************************ 14339: * * 14340: * Front ends when parsing from memory * 14341: * * 14342: ************************************************************************/ 14343: 14344: /** 14345: * xmlCreateMemoryParserCtxt: 14346: * @buffer: a pointer to a char array 14347: * @size: the size of the array 14348: * 14349: * Create a parser context for an XML in-memory document. 14350: * 14351: * Returns the new parser context or NULL 14352: */ 14353: xmlParserCtxtPtr 14354: xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14355: xmlParserCtxtPtr ctxt; 14356: xmlParserInputPtr input; 14357: xmlParserInputBufferPtr buf; 14358: 14359: if (buffer == NULL) 14360: return(NULL); 14361: if (size <= 0) 14362: return(NULL); 14363: 14364: ctxt = xmlNewParserCtxt(); 14365: if (ctxt == NULL) 14366: return(NULL); 14367: 14368: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14369: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14370: if (buf == NULL) { 14371: xmlFreeParserCtxt(ctxt); 14372: return(NULL); 14373: } 14374: 14375: input = xmlNewInputStream(ctxt); 14376: if (input == NULL) { 14377: xmlFreeParserInputBuffer(buf); 14378: xmlFreeParserCtxt(ctxt); 14379: return(NULL); 14380: } 14381: 14382: input->filename = NULL; 14383: input->buf = buf; 14384: xmlBufResetInput(input->buf->buffer, input); 14385: 14386: inputPush(ctxt, input); 14387: return(ctxt); 14388: } 14389: 14390: #ifdef LIBXML_SAX1_ENABLED 14391: /** 14392: * xmlSAXParseMemoryWithData: 14393: * @sax: the SAX handler block 14394: * @buffer: an pointer to a char array 14395: * @size: the size of the array 14396: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14397: * documents 14398: * @data: the userdata 14399: * 14400: * parse an XML in-memory block and use the given SAX function block 14401: * to handle the parsing callback. If sax is NULL, fallback to the default 14402: * DOM tree building routines. 14403: * 14404: * User data (void *) is stored within the parser context in the 14405: * context's _private member, so it is available nearly everywhere in libxml 14406: * 14407: * Returns the resulting document tree 14408: */ 14409: 14410: xmlDocPtr 14411: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14412: int size, int recovery, void *data) { 14413: xmlDocPtr ret; 14414: xmlParserCtxtPtr ctxt; 14415: 14416: xmlInitParser(); 14417: 14418: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14419: if (ctxt == NULL) return(NULL); 14420: if (sax != NULL) { 14421: if (ctxt->sax != NULL) 14422: xmlFree(ctxt->sax); 14423: ctxt->sax = sax; 14424: } 14425: xmlDetectSAX2(ctxt); 14426: if (data!=NULL) { 14427: ctxt->_private=data; 14428: } 14429: 14430: ctxt->recovery = recovery; 14431: 14432: xmlParseDocument(ctxt); 14433: 14434: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14435: else { 14436: ret = NULL; 14437: xmlFreeDoc(ctxt->myDoc); 14438: ctxt->myDoc = NULL; 14439: } 14440: if (sax != NULL) 14441: ctxt->sax = NULL; 14442: xmlFreeParserCtxt(ctxt); 14443: 14444: return(ret); 14445: } 14446: 14447: /** 14448: * xmlSAXParseMemory: 14449: * @sax: the SAX handler block 14450: * @buffer: an pointer to a char array 14451: * @size: the size of the array 14452: * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14453: * documents 14454: * 14455: * parse an XML in-memory block and use the given SAX function block 14456: * to handle the parsing callback. If sax is NULL, fallback to the default 14457: * DOM tree building routines. 14458: * 14459: * Returns the resulting document tree 14460: */ 14461: xmlDocPtr 14462: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14463: int size, int recovery) { 14464: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14465: } 14466: 14467: /** 14468: * xmlParseMemory: 14469: * @buffer: an pointer to a char array 14470: * @size: the size of the array 14471: * 14472: * parse an XML in-memory block and build a tree. 14473: * 14474: * Returns the resulting document tree 14475: */ 14476: 14477: xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14478: return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14479: } 14480: 14481: /** 14482: * xmlRecoverMemory: 14483: * @buffer: an pointer to a char array 14484: * @size: the size of the array 14485: * 14486: * parse an XML in-memory block and build a tree. 14487: * In the case the document is not Well Formed, an attempt to 14488: * build a tree is tried anyway 14489: * 14490: * Returns the resulting document tree or NULL in case of error 14491: */ 14492: 14493: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14494: return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14495: } 14496: 14497: /** 14498: * xmlSAXUserParseMemory: 14499: * @sax: a SAX handler 14500: * @user_data: The user data returned on SAX callbacks 14501: * @buffer: an in-memory XML document input 14502: * @size: the length of the XML document in bytes 14503: * 14504: * A better SAX parsing routine. 14505: * parse an XML in-memory buffer and call the given SAX handler routines. 14506: * 14507: * Returns 0 in case of success or a error number otherwise 14508: */ 14509: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14510: const char *buffer, int size) { 14511: int ret = 0; 14512: xmlParserCtxtPtr ctxt; 14513: 14514: xmlInitParser(); 14515: 14516: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14517: if (ctxt == NULL) return -1; 14518: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14519: xmlFree(ctxt->sax); 14520: ctxt->sax = sax; 14521: xmlDetectSAX2(ctxt); 14522: 14523: if (user_data != NULL) 14524: ctxt->userData = user_data; 14525: 14526: xmlParseDocument(ctxt); 14527: 14528: if (ctxt->wellFormed) 14529: ret = 0; 14530: else { 14531: if (ctxt->errNo != 0) 14532: ret = ctxt->errNo; 14533: else 14534: ret = -1; 14535: } 14536: if (sax != NULL) 14537: ctxt->sax = NULL; 14538: if (ctxt->myDoc != NULL) { 14539: xmlFreeDoc(ctxt->myDoc); 14540: ctxt->myDoc = NULL; 14541: } 14542: xmlFreeParserCtxt(ctxt); 14543: 14544: return ret; 14545: } 14546: #endif /* LIBXML_SAX1_ENABLED */ 14547: 14548: /** 14549: * xmlCreateDocParserCtxt: 14550: * @cur: a pointer to an array of xmlChar 14551: * 14552: * Creates a parser context for an XML in-memory document. 14553: * 14554: * Returns the new parser context or NULL 14555: */ 14556: xmlParserCtxtPtr 14557: xmlCreateDocParserCtxt(const xmlChar *cur) { 14558: int len; 14559: 14560: if (cur == NULL) 14561: return(NULL); 14562: len = xmlStrlen(cur); 14563: return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14564: } 14565: 14566: #ifdef LIBXML_SAX1_ENABLED 14567: /** 14568: * xmlSAXParseDoc: 14569: * @sax: the SAX handler block 14570: * @cur: a pointer to an array of xmlChar 14571: * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14572: * documents 14573: * 14574: * parse an XML in-memory document and build a tree. 14575: * It use the given SAX function block to handle the parsing callback. 14576: * If sax is NULL, fallback to the default DOM tree building routines. 14577: * 14578: * Returns the resulting document tree 14579: */ 14580: 14581: xmlDocPtr 14582: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14583: xmlDocPtr ret; 14584: xmlParserCtxtPtr ctxt; 14585: xmlSAXHandlerPtr oldsax = NULL; 14586: 14587: if (cur == NULL) return(NULL); 14588: 14589: 14590: ctxt = xmlCreateDocParserCtxt(cur); 14591: if (ctxt == NULL) return(NULL); 14592: if (sax != NULL) { 14593: oldsax = ctxt->sax; 14594: ctxt->sax = sax; 14595: ctxt->userData = NULL; 14596: } 14597: xmlDetectSAX2(ctxt); 14598: 14599: xmlParseDocument(ctxt); 14600: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14601: else { 14602: ret = NULL; 14603: xmlFreeDoc(ctxt->myDoc); 14604: ctxt->myDoc = NULL; 14605: } 14606: if (sax != NULL) 14607: ctxt->sax = oldsax; 14608: xmlFreeParserCtxt(ctxt); 14609: 14610: return(ret); 14611: } 14612: 14613: /** 14614: * xmlParseDoc: 14615: * @cur: a pointer to an array of xmlChar 14616: * 14617: * parse an XML in-memory document and build a tree. 14618: * 14619: * Returns the resulting document tree 14620: */ 14621: 14622: xmlDocPtr 14623: xmlParseDoc(const xmlChar *cur) { 14624: return(xmlSAXParseDoc(NULL, cur, 0)); 14625: } 14626: #endif /* LIBXML_SAX1_ENABLED */ 14627: 14628: #ifdef LIBXML_LEGACY_ENABLED 14629: /************************************************************************ 14630: * * 14631: * Specific function to keep track of entities references * 14632: * and used by the XSLT debugger * 14633: * * 14634: ************************************************************************/ 14635: 14636: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14637: 14638: /** 14639: * xmlAddEntityReference: 14640: * @ent : A valid entity 14641: * @firstNode : A valid first node for children of entity 14642: * @lastNode : A valid last node of children entity 14643: * 14644: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14645: */ 14646: static void 14647: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14648: xmlNodePtr lastNode) 14649: { 14650: if (xmlEntityRefFunc != NULL) { 14651: (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14652: } 14653: } 14654: 14655: 14656: /** 14657: * xmlSetEntityReferenceFunc: 14658: * @func: A valid function 14659: * 14660: * Set the function to call call back when a xml reference has been made 14661: */ 14662: void 14663: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14664: { 14665: xmlEntityRefFunc = func; 14666: } 14667: #endif /* LIBXML_LEGACY_ENABLED */ 14668: 14669: /************************************************************************ 14670: * * 14671: * Miscellaneous * 14672: * * 14673: ************************************************************************/ 14674: 14675: #ifdef LIBXML_XPATH_ENABLED 14676: #include <libxml/xpath.h> 14677: #endif 14678: 14679: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14680: static int xmlParserInitialized = 0; 14681: 14682: /** 14683: * xmlInitParser: 14684: * 14685: * Initialization function for the XML parser. 14686: * This is not reentrant. Call once before processing in case of 14687: * use in multithreaded programs. 14688: */ 14689: 14690: void 14691: xmlInitParser(void) { 14692: if (xmlParserInitialized != 0) 14693: return; 14694: 14695: #ifdef LIBXML_THREAD_ENABLED 14696: __xmlGlobalInitMutexLock(); 14697: if (xmlParserInitialized == 0) { 14698: #endif 14699: xmlInitThreads(); 14700: xmlInitGlobals(); 14701: if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14702: (xmlGenericError == NULL)) 14703: initGenericErrorDefaultFunc(NULL); 14704: xmlInitMemory(); 14705: xmlInitializeDict(); 14706: xmlInitCharEncodingHandlers(); 14707: xmlDefaultSAXHandlerInit(); 14708: xmlRegisterDefaultInputCallbacks(); 14709: #ifdef LIBXML_OUTPUT_ENABLED 14710: xmlRegisterDefaultOutputCallbacks(); 14711: #endif /* LIBXML_OUTPUT_ENABLED */ 14712: #ifdef LIBXML_HTML_ENABLED 14713: htmlInitAutoClose(); 14714: htmlDefaultSAXHandlerInit(); 14715: #endif 14716: #ifdef LIBXML_XPATH_ENABLED 14717: xmlXPathInit(); 14718: #endif 14719: xmlParserInitialized = 1; 14720: #ifdef LIBXML_THREAD_ENABLED 14721: } 14722: __xmlGlobalInitMutexUnlock(); 14723: #endif 14724: } 14725: 14726: /** 14727: * xmlCleanupParser: 14728: * 14729: * This function name is somewhat misleading. It does not clean up 14730: * parser state, it cleans up memory allocated by the library itself. 14731: * It is a cleanup function for the XML library. It tries to reclaim all 14732: * related global memory allocated for the library processing. 14733: * It doesn't deallocate any document related memory. One should 14734: * call xmlCleanupParser() only when the process has finished using 14735: * the library and all XML/HTML documents built with it. 14736: * See also xmlInitParser() which has the opposite function of preparing 14737: * the library for operations. 14738: * 14739: * WARNING: if your application is multithreaded or has plugin support 14740: * calling this may crash the application if another thread or 14741: * a plugin is still using libxml2. It's sometimes very hard to 14742: * guess if libxml2 is in use in the application, some libraries 14743: * or plugins may use it without notice. In case of doubt abstain 14744: * from calling this function or do it just before calling exit() 14745: * to avoid leak reports from valgrind ! 14746: */ 14747: 14748: void 14749: xmlCleanupParser(void) { 14750: if (!xmlParserInitialized) 14751: return; 14752: 14753: xmlCleanupCharEncodingHandlers(); 14754: #ifdef LIBXML_CATALOG_ENABLED 14755: xmlCatalogCleanup(); 14756: #endif 14757: xmlDictCleanup(); 14758: xmlCleanupInputCallbacks(); 14759: #ifdef LIBXML_OUTPUT_ENABLED 14760: xmlCleanupOutputCallbacks(); 14761: #endif 14762: #ifdef LIBXML_SCHEMAS_ENABLED 14763: xmlSchemaCleanupTypes(); 14764: xmlRelaxNGCleanupTypes(); 14765: #endif 14766: xmlCleanupGlobals(); 14767: xmlResetLastError(); 14768: xmlCleanupThreads(); /* must be last if called not from the main thread */ 14769: xmlCleanupMemory(); 14770: xmlParserInitialized = 0; 14771: } 14772: 14773: /************************************************************************ 14774: * * 14775: * New set (2.6.0) of simpler and more flexible APIs * 14776: * * 14777: ************************************************************************/ 14778: 14779: /** 14780: * DICT_FREE: 14781: * @str: a string 14782: * 14783: * Free a string if it is not owned by the "dict" dictionnary in the 14784: * current scope 14785: */ 14786: #define DICT_FREE(str) \ 14787: if ((str) && ((!dict) || \ 14788: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14789: xmlFree((char *)(str)); 14790: 14791: /** 14792: * xmlCtxtReset: 14793: * @ctxt: an XML parser context 14794: * 14795: * Reset a parser context 14796: */ 14797: void 14798: xmlCtxtReset(xmlParserCtxtPtr ctxt) 14799: { 14800: xmlParserInputPtr input; 14801: xmlDictPtr dict; 14802: 14803: if (ctxt == NULL) 14804: return; 14805: 14806: dict = ctxt->dict; 14807: 14808: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14809: xmlFreeInputStream(input); 14810: } 14811: ctxt->inputNr = 0; 14812: ctxt->input = NULL; 14813: 14814: ctxt->spaceNr = 0; 14815: if (ctxt->spaceTab != NULL) { 14816: ctxt->spaceTab[0] = -1; 14817: ctxt->space = &ctxt->spaceTab[0]; 14818: } else { 14819: ctxt->space = NULL; 14820: } 14821: 14822: 14823: ctxt->nodeNr = 0; 14824: ctxt->node = NULL; 14825: 14826: ctxt->nameNr = 0; 14827: ctxt->name = NULL; 14828: 14829: DICT_FREE(ctxt->version); 14830: ctxt->version = NULL; 14831: DICT_FREE(ctxt->encoding); 14832: ctxt->encoding = NULL; 14833: DICT_FREE(ctxt->directory); 14834: ctxt->directory = NULL; 14835: DICT_FREE(ctxt->extSubURI); 14836: ctxt->extSubURI = NULL; 14837: DICT_FREE(ctxt->extSubSystem); 14838: ctxt->extSubSystem = NULL; 14839: if (ctxt->myDoc != NULL) 14840: xmlFreeDoc(ctxt->myDoc); 14841: ctxt->myDoc = NULL; 14842: 14843: ctxt->standalone = -1; 14844: ctxt->hasExternalSubset = 0; 14845: ctxt->hasPErefs = 0; 14846: ctxt->html = 0; 14847: ctxt->external = 0; 14848: ctxt->instate = XML_PARSER_START; 14849: ctxt->token = 0; 14850: 14851: ctxt->wellFormed = 1; 14852: ctxt->nsWellFormed = 1; 14853: ctxt->disableSAX = 0; 14854: ctxt->valid = 1; 14855: #if 0 14856: ctxt->vctxt.userData = ctxt; 14857: ctxt->vctxt.error = xmlParserValidityError; 14858: ctxt->vctxt.warning = xmlParserValidityWarning; 14859: #endif 14860: ctxt->record_info = 0; 14861: ctxt->nbChars = 0; 14862: ctxt->checkIndex = 0; 14863: ctxt->inSubset = 0; 14864: ctxt->errNo = XML_ERR_OK; 14865: ctxt->depth = 0; 14866: ctxt->charset = XML_CHAR_ENCODING_UTF8; 14867: ctxt->catalogs = NULL; 14868: ctxt->nbentities = 0; 14869: ctxt->sizeentities = 0; 14870: ctxt->sizeentcopy = 0; 14871: xmlInitNodeInfoSeq(&ctxt->node_seq); 14872: 14873: if (ctxt->attsDefault != NULL) { 14874: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14875: ctxt->attsDefault = NULL; 14876: } 14877: if (ctxt->attsSpecial != NULL) { 14878: xmlHashFree(ctxt->attsSpecial, NULL); 14879: ctxt->attsSpecial = NULL; 14880: } 14881: 14882: #ifdef LIBXML_CATALOG_ENABLED 14883: if (ctxt->catalogs != NULL) 14884: xmlCatalogFreeLocal(ctxt->catalogs); 14885: #endif 14886: if (ctxt->lastError.code != XML_ERR_OK) 14887: xmlResetError(&ctxt->lastError); 14888: } 14889: 14890: /** 14891: * xmlCtxtResetPush: 14892: * @ctxt: an XML parser context 14893: * @chunk: a pointer to an array of chars 14894: * @size: number of chars in the array 14895: * @filename: an optional file name or URI 14896: * @encoding: the document encoding, or NULL 14897: * 14898: * Reset a push parser context 14899: * 14900: * Returns 0 in case of success and 1 in case of error 14901: */ 14902: int 14903: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14904: int size, const char *filename, const char *encoding) 14905: { 14906: xmlParserInputPtr inputStream; 14907: xmlParserInputBufferPtr buf; 14908: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14909: 14910: if (ctxt == NULL) 14911: return(1); 14912: 14913: if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14914: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14915: 14916: buf = xmlAllocParserInputBuffer(enc); 14917: if (buf == NULL) 14918: return(1); 14919: 14920: if (ctxt == NULL) { 14921: xmlFreeParserInputBuffer(buf); 14922: return(1); 14923: } 14924: 14925: xmlCtxtReset(ctxt); 14926: 14927: if (ctxt->pushTab == NULL) { 14928: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14929: sizeof(xmlChar *)); 14930: if (ctxt->pushTab == NULL) { 14931: xmlErrMemory(ctxt, NULL); 14932: xmlFreeParserInputBuffer(buf); 14933: return(1); 14934: } 14935: } 14936: 14937: if (filename == NULL) { 14938: ctxt->directory = NULL; 14939: } else { 14940: ctxt->directory = xmlParserGetDirectory(filename); 14941: } 14942: 14943: inputStream = xmlNewInputStream(ctxt); 14944: if (inputStream == NULL) { 14945: xmlFreeParserInputBuffer(buf); 14946: return(1); 14947: } 14948: 14949: if (filename == NULL) 14950: inputStream->filename = NULL; 14951: else 14952: inputStream->filename = (char *) 14953: xmlCanonicPath((const xmlChar *) filename); 14954: inputStream->buf = buf; 14955: xmlBufResetInput(buf->buffer, inputStream); 14956: 14957: inputPush(ctxt, inputStream); 14958: 14959: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14960: (ctxt->input->buf != NULL)) { 14961: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 14962: size_t cur = ctxt->input->cur - ctxt->input->base; 14963: 14964: xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14965: 14966: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 14967: #ifdef DEBUG_PUSH 14968: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14969: #endif 14970: } 14971: 14972: if (encoding != NULL) { 14973: xmlCharEncodingHandlerPtr hdlr; 14974: 14975: if (ctxt->encoding != NULL) 14976: xmlFree((xmlChar *) ctxt->encoding); 14977: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14978: 14979: hdlr = xmlFindCharEncodingHandler(encoding); 14980: if (hdlr != NULL) { 14981: xmlSwitchToEncoding(ctxt, hdlr); 14982: } else { 14983: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14984: "Unsupported encoding %s\n", BAD_CAST encoding); 14985: } 14986: } else if (enc != XML_CHAR_ENCODING_NONE) { 14987: xmlSwitchEncoding(ctxt, enc); 14988: } 14989: 14990: return(0); 14991: } 14992: 14993: 14994: /** 14995: * xmlCtxtUseOptionsInternal: 14996: * @ctxt: an XML parser context 14997: * @options: a combination of xmlParserOption 14998: * @encoding: the user provided encoding to use 14999: * 15000: * Applies the options to the parser context 15001: * 15002: * Returns 0 in case of success, the set of unknown or unimplemented options 15003: * in case of error. 15004: */ 15005: static int 15006: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15007: { 15008: if (ctxt == NULL) 15009: return(-1); 15010: if (encoding != NULL) { 15011: if (ctxt->encoding != NULL) 15012: xmlFree((xmlChar *) ctxt->encoding); 15013: ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15014: } 15015: if (options & XML_PARSE_RECOVER) { 15016: ctxt->recovery = 1; 15017: options -= XML_PARSE_RECOVER; 15018: ctxt->options |= XML_PARSE_RECOVER; 15019: } else 15020: ctxt->recovery = 0; 15021: if (options & XML_PARSE_DTDLOAD) { 15022: ctxt->loadsubset = XML_DETECT_IDS; 15023: options -= XML_PARSE_DTDLOAD; 15024: ctxt->options |= XML_PARSE_DTDLOAD; 15025: } else 15026: ctxt->loadsubset = 0; 15027: if (options & XML_PARSE_DTDATTR) { 15028: ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15029: options -= XML_PARSE_DTDATTR; 15030: ctxt->options |= XML_PARSE_DTDATTR; 15031: } 15032: if (options & XML_PARSE_NOENT) { 15033: ctxt->replaceEntities = 1; 15034: /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15035: options -= XML_PARSE_NOENT; 15036: ctxt->options |= XML_PARSE_NOENT; 15037: } else 15038: ctxt->replaceEntities = 0; 15039: if (options & XML_PARSE_PEDANTIC) { 15040: ctxt->pedantic = 1; 15041: options -= XML_PARSE_PEDANTIC; 15042: ctxt->options |= XML_PARSE_PEDANTIC; 15043: } else 15044: ctxt->pedantic = 0; 15045: if (options & XML_PARSE_NOBLANKS) { 15046: ctxt->keepBlanks = 0; 15047: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15048: options -= XML_PARSE_NOBLANKS; 15049: ctxt->options |= XML_PARSE_NOBLANKS; 15050: } else 15051: ctxt->keepBlanks = 1; 15052: if (options & XML_PARSE_DTDVALID) { 15053: ctxt->validate = 1; 15054: if (options & XML_PARSE_NOWARNING) 15055: ctxt->vctxt.warning = NULL; 15056: if (options & XML_PARSE_NOERROR) 15057: ctxt->vctxt.error = NULL; 15058: options -= XML_PARSE_DTDVALID; 15059: ctxt->options |= XML_PARSE_DTDVALID; 15060: } else 15061: ctxt->validate = 0; 15062: if (options & XML_PARSE_NOWARNING) { 15063: ctxt->sax->warning = NULL; 15064: options -= XML_PARSE_NOWARNING; 15065: } 15066: if (options & XML_PARSE_NOERROR) { 15067: ctxt->sax->error = NULL; 15068: ctxt->sax->fatalError = NULL; 15069: options -= XML_PARSE_NOERROR; 15070: } 15071: #ifdef LIBXML_SAX1_ENABLED 15072: if (options & XML_PARSE_SAX1) { 15073: ctxt->sax->startElement = xmlSAX2StartElement; 15074: ctxt->sax->endElement = xmlSAX2EndElement; 15075: ctxt->sax->startElementNs = NULL; 15076: ctxt->sax->endElementNs = NULL; 15077: ctxt->sax->initialized = 1; 15078: options -= XML_PARSE_SAX1; 15079: ctxt->options |= XML_PARSE_SAX1; 15080: } 15081: #endif /* LIBXML_SAX1_ENABLED */ 15082: if (options & XML_PARSE_NODICT) { 15083: ctxt->dictNames = 0; 15084: options -= XML_PARSE_NODICT; 15085: ctxt->options |= XML_PARSE_NODICT; 15086: } else { 15087: ctxt->dictNames = 1; 15088: } 15089: if (options & XML_PARSE_NOCDATA) { 15090: ctxt->sax->cdataBlock = NULL; 15091: options -= XML_PARSE_NOCDATA; 15092: ctxt->options |= XML_PARSE_NOCDATA; 15093: } 15094: if (options & XML_PARSE_NSCLEAN) { 15095: ctxt->options |= XML_PARSE_NSCLEAN; 15096: options -= XML_PARSE_NSCLEAN; 15097: } 15098: if (options & XML_PARSE_NONET) { 15099: ctxt->options |= XML_PARSE_NONET; 15100: options -= XML_PARSE_NONET; 15101: } 15102: if (options & XML_PARSE_COMPACT) { 15103: ctxt->options |= XML_PARSE_COMPACT; 15104: options -= XML_PARSE_COMPACT; 15105: } 15106: if (options & XML_PARSE_OLD10) { 15107: ctxt->options |= XML_PARSE_OLD10; 15108: options -= XML_PARSE_OLD10; 15109: } 15110: if (options & XML_PARSE_NOBASEFIX) { 15111: ctxt->options |= XML_PARSE_NOBASEFIX; 15112: options -= XML_PARSE_NOBASEFIX; 15113: } 15114: if (options & XML_PARSE_HUGE) { 15115: ctxt->options |= XML_PARSE_HUGE; 15116: options -= XML_PARSE_HUGE; 15117: if (ctxt->dict != NULL) 15118: xmlDictSetLimit(ctxt->dict, 0); 15119: } 15120: if (options & XML_PARSE_OLDSAX) { 15121: ctxt->options |= XML_PARSE_OLDSAX; 15122: options -= XML_PARSE_OLDSAX; 15123: } 15124: if (options & XML_PARSE_IGNORE_ENC) { 15125: ctxt->options |= XML_PARSE_IGNORE_ENC; 15126: options -= XML_PARSE_IGNORE_ENC; 15127: } 15128: if (options & XML_PARSE_BIG_LINES) { 15129: ctxt->options |= XML_PARSE_BIG_LINES; 15130: options -= XML_PARSE_BIG_LINES; 15131: } 15132: ctxt->linenumbers = 1; 15133: return (options); 15134: } 15135: 15136: /** 15137: * xmlCtxtUseOptions: 15138: * @ctxt: an XML parser context 15139: * @options: a combination of xmlParserOption 15140: * 15141: * Applies the options to the parser context 15142: * 15143: * Returns 0 in case of success, the set of unknown or unimplemented options 15144: * in case of error. 15145: */ 15146: int 15147: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15148: { 15149: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15150: } 15151: 15152: /** 15153: * xmlDoRead: 15154: * @ctxt: an XML parser context 15155: * @URL: the base URL to use for the document 15156: * @encoding: the document encoding, or NULL 15157: * @options: a combination of xmlParserOption 15158: * @reuse: keep the context for reuse 15159: * 15160: * Common front-end for the xmlRead functions 15161: * 15162: * Returns the resulting document tree or NULL 15163: */ 15164: static xmlDocPtr 15165: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15166: int options, int reuse) 15167: { 15168: xmlDocPtr ret; 15169: 15170: xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15171: if (encoding != NULL) { 15172: xmlCharEncodingHandlerPtr hdlr; 15173: 15174: hdlr = xmlFindCharEncodingHandler(encoding); 15175: if (hdlr != NULL) 15176: xmlSwitchToEncoding(ctxt, hdlr); 15177: } 15178: if ((URL != NULL) && (ctxt->input != NULL) && 15179: (ctxt->input->filename == NULL)) 15180: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15181: xmlParseDocument(ctxt); 15182: if ((ctxt->wellFormed) || ctxt->recovery) 15183: ret = ctxt->myDoc; 15184: else { 15185: ret = NULL; 15186: if (ctxt->myDoc != NULL) { 15187: xmlFreeDoc(ctxt->myDoc); 15188: } 15189: } 15190: ctxt->myDoc = NULL; 15191: if (!reuse) { 15192: xmlFreeParserCtxt(ctxt); 15193: } 15194: 15195: return (ret); 15196: } 15197: 15198: /** 15199: * xmlReadDoc: 15200: * @cur: a pointer to a zero terminated string 15201: * @URL: the base URL to use for the document 15202: * @encoding: the document encoding, or NULL 15203: * @options: a combination of xmlParserOption 15204: * 15205: * parse an XML in-memory document and build a tree. 15206: * 15207: * Returns the resulting document tree 15208: */ 15209: xmlDocPtr 15210: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15211: { 15212: xmlParserCtxtPtr ctxt; 15213: 15214: if (cur == NULL) 15215: return (NULL); 15216: 15217: ctxt = xmlCreateDocParserCtxt(cur); 15218: if (ctxt == NULL) 15219: return (NULL); 15220: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15221: } 15222: 15223: /** 15224: * xmlReadFile: 15225: * @filename: a file or URL 15226: * @encoding: the document encoding, or NULL 15227: * @options: a combination of xmlParserOption 15228: * 15229: * parse an XML file from the filesystem or the network. 15230: * 15231: * Returns the resulting document tree 15232: */ 15233: xmlDocPtr 15234: xmlReadFile(const char *filename, const char *encoding, int options) 15235: { 15236: xmlParserCtxtPtr ctxt; 15237: 15238: ctxt = xmlCreateURLParserCtxt(filename, options); 15239: if (ctxt == NULL) 15240: return (NULL); 15241: return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15242: } 15243: 15244: /** 15245: * xmlReadMemory: 15246: * @buffer: a pointer to a char array 15247: * @size: the size of the array 15248: * @URL: the base URL to use for the document 15249: * @encoding: the document encoding, or NULL 15250: * @options: a combination of xmlParserOption 15251: * 15252: * parse an XML in-memory document and build a tree. 15253: * 15254: * Returns the resulting document tree 15255: */ 15256: xmlDocPtr 15257: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15258: { 15259: xmlParserCtxtPtr ctxt; 15260: 15261: ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15262: if (ctxt == NULL) 15263: return (NULL); 15264: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15265: } 15266: 15267: /** 15268: * xmlReadFd: 15269: * @fd: an open file descriptor 15270: * @URL: the base URL to use for the document 15271: * @encoding: the document encoding, or NULL 15272: * @options: a combination of xmlParserOption 15273: * 15274: * parse an XML from a file descriptor and build a tree. 15275: * NOTE that the file descriptor will not be closed when the 15276: * reader is closed or reset. 15277: * 15278: * Returns the resulting document tree 15279: */ 15280: xmlDocPtr 15281: xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15282: { 15283: xmlParserCtxtPtr ctxt; 15284: xmlParserInputBufferPtr input; 15285: xmlParserInputPtr stream; 15286: 15287: if (fd < 0) 15288: return (NULL); 15289: 15290: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15291: if (input == NULL) 15292: return (NULL); 15293: input->closecallback = NULL; 15294: ctxt = xmlNewParserCtxt(); 15295: if (ctxt == NULL) { 15296: xmlFreeParserInputBuffer(input); 15297: return (NULL); 15298: } 15299: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15300: if (stream == NULL) { 15301: xmlFreeParserInputBuffer(input); 15302: xmlFreeParserCtxt(ctxt); 15303: return (NULL); 15304: } 15305: inputPush(ctxt, stream); 15306: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15307: } 15308: 15309: /** 15310: * xmlReadIO: 15311: * @ioread: an I/O read function 15312: * @ioclose: an I/O close function 15313: * @ioctx: an I/O handler 15314: * @URL: the base URL to use for the document 15315: * @encoding: the document encoding, or NULL 15316: * @options: a combination of xmlParserOption 15317: * 15318: * parse an XML document from I/O functions and source and build a tree. 15319: * 15320: * Returns the resulting document tree 15321: */ 15322: xmlDocPtr 15323: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15324: void *ioctx, const char *URL, const char *encoding, int options) 15325: { 15326: xmlParserCtxtPtr ctxt; 15327: xmlParserInputBufferPtr input; 15328: xmlParserInputPtr stream; 15329: 15330: if (ioread == NULL) 15331: return (NULL); 15332: 15333: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15334: XML_CHAR_ENCODING_NONE); 15335: if (input == NULL) { 15336: if (ioclose != NULL) 15337: ioclose(ioctx); 15338: return (NULL); 15339: } 15340: ctxt = xmlNewParserCtxt(); 15341: if (ctxt == NULL) { 15342: xmlFreeParserInputBuffer(input); 15343: return (NULL); 15344: } 15345: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15346: if (stream == NULL) { 15347: xmlFreeParserInputBuffer(input); 15348: xmlFreeParserCtxt(ctxt); 15349: return (NULL); 15350: } 15351: inputPush(ctxt, stream); 15352: return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15353: } 15354: 15355: /** 15356: * xmlCtxtReadDoc: 15357: * @ctxt: an XML parser context 15358: * @cur: a pointer to a zero terminated string 15359: * @URL: the base URL to use for the document 15360: * @encoding: the document encoding, or NULL 15361: * @options: a combination of xmlParserOption 15362: * 15363: * parse an XML in-memory document and build a tree. 15364: * This reuses the existing @ctxt parser context 15365: * 15366: * Returns the resulting document tree 15367: */ 15368: xmlDocPtr 15369: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15370: const char *URL, const char *encoding, int options) 15371: { 15372: xmlParserInputPtr stream; 15373: 15374: if (cur == NULL) 15375: return (NULL); 15376: if (ctxt == NULL) 15377: return (NULL); 15378: 15379: xmlCtxtReset(ctxt); 15380: 15381: stream = xmlNewStringInputStream(ctxt, cur); 15382: if (stream == NULL) { 15383: return (NULL); 15384: } 15385: inputPush(ctxt, stream); 15386: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15387: } 15388: 15389: /** 15390: * xmlCtxtReadFile: 15391: * @ctxt: an XML parser context 15392: * @filename: a file or URL 15393: * @encoding: the document encoding, or NULL 15394: * @options: a combination of xmlParserOption 15395: * 15396: * parse an XML file from the filesystem or the network. 15397: * This reuses the existing @ctxt parser context 15398: * 15399: * Returns the resulting document tree 15400: */ 15401: xmlDocPtr 15402: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15403: const char *encoding, int options) 15404: { 15405: xmlParserInputPtr stream; 15406: 15407: if (filename == NULL) 15408: return (NULL); 15409: if (ctxt == NULL) 15410: return (NULL); 15411: 15412: xmlCtxtReset(ctxt); 15413: 15414: stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15415: if (stream == NULL) { 15416: return (NULL); 15417: } 15418: inputPush(ctxt, stream); 15419: return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15420: } 15421: 15422: /** 15423: * xmlCtxtReadMemory: 15424: * @ctxt: an XML parser context 15425: * @buffer: a pointer to a char array 15426: * @size: the size of the array 15427: * @URL: the base URL to use for the document 15428: * @encoding: the document encoding, or NULL 15429: * @options: a combination of xmlParserOption 15430: * 15431: * parse an XML in-memory document and build a tree. 15432: * This reuses the existing @ctxt parser context 15433: * 15434: * Returns the resulting document tree 15435: */ 15436: xmlDocPtr 15437: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15438: const char *URL, const char *encoding, int options) 15439: { 15440: xmlParserInputBufferPtr input; 15441: xmlParserInputPtr stream; 15442: 15443: if (ctxt == NULL) 15444: return (NULL); 15445: if (buffer == NULL) 15446: return (NULL); 15447: 15448: xmlCtxtReset(ctxt); 15449: 15450: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15451: if (input == NULL) { 15452: return(NULL); 15453: } 15454: 15455: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15456: if (stream == NULL) { 15457: xmlFreeParserInputBuffer(input); 15458: return(NULL); 15459: } 15460: 15461: inputPush(ctxt, stream); 15462: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15463: } 15464: 15465: /** 15466: * xmlCtxtReadFd: 15467: * @ctxt: an XML parser context 15468: * @fd: an open file descriptor 15469: * @URL: the base URL to use for the document 15470: * @encoding: the document encoding, or NULL 15471: * @options: a combination of xmlParserOption 15472: * 15473: * parse an XML from a file descriptor and build a tree. 15474: * This reuses the existing @ctxt parser context 15475: * NOTE that the file descriptor will not be closed when the 15476: * reader is closed or reset. 15477: * 15478: * Returns the resulting document tree 15479: */ 15480: xmlDocPtr 15481: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15482: const char *URL, const char *encoding, int options) 15483: { 15484: xmlParserInputBufferPtr input; 15485: xmlParserInputPtr stream; 15486: 15487: if (fd < 0) 15488: return (NULL); 15489: if (ctxt == NULL) 15490: return (NULL); 15491: 15492: xmlCtxtReset(ctxt); 15493: 15494: 15495: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15496: if (input == NULL) 15497: return (NULL); 15498: input->closecallback = NULL; 15499: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15500: if (stream == NULL) { 15501: xmlFreeParserInputBuffer(input); 15502: return (NULL); 15503: } 15504: inputPush(ctxt, stream); 15505: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15506: } 15507: 15508: /** 15509: * xmlCtxtReadIO: 15510: * @ctxt: an XML parser context 15511: * @ioread: an I/O read function 15512: * @ioclose: an I/O close function 15513: * @ioctx: an I/O handler 15514: * @URL: the base URL to use for the document 15515: * @encoding: the document encoding, or NULL 15516: * @options: a combination of xmlParserOption 15517: * 15518: * parse an XML document from I/O functions and source and build a tree. 15519: * This reuses the existing @ctxt parser context 15520: * 15521: * Returns the resulting document tree 15522: */ 15523: xmlDocPtr 15524: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15525: xmlInputCloseCallback ioclose, void *ioctx, 15526: const char *URL, 15527: const char *encoding, int options) 15528: { 15529: xmlParserInputBufferPtr input; 15530: xmlParserInputPtr stream; 15531: 15532: if (ioread == NULL) 15533: return (NULL); 15534: if (ctxt == NULL) 15535: return (NULL); 15536: 15537: xmlCtxtReset(ctxt); 15538: 15539: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15540: XML_CHAR_ENCODING_NONE); 15541: if (input == NULL) { 15542: if (ioclose != NULL) 15543: ioclose(ioctx); 15544: return (NULL); 15545: } 15546: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15547: if (stream == NULL) { 15548: xmlFreeParserInputBuffer(input); 15549: return (NULL); 15550: } 15551: inputPush(ctxt, stream); 15552: return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15553: } 15554: 15555: #define bottom_parser 15556: #include "elfgcchack.h"