Annotation of embedaddon/libxml2/parser.c, revision 1.1.1.3
1.1 misho 1: /*
2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
4: *
5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscellaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
1.1.1.3 ! misho 20: * different ranges of character are actually implanted either in
1.1 misho 21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAX callbacks or as standalone functions using a preparsed
26: * document.
27: *
28: * See Copyright for the status of this software.
29: *
30: * daniel@veillard.com
31: */
32:
33: #define IN_LIBXML
34: #include "libxml.h"
35:
36: #if defined(WIN32) && !defined (__CYGWIN__)
37: #define XML_DIR_SEP '\\'
38: #else
39: #define XML_DIR_SEP '/'
40: #endif
41:
42: #include <stdlib.h>
1.1.1.3 ! misho 43: #include <limits.h>
1.1 misho 44: #include <string.h>
45: #include <stdarg.h>
46: #include <libxml/xmlmemory.h>
47: #include <libxml/threads.h>
48: #include <libxml/globals.h>
49: #include <libxml/tree.h>
50: #include <libxml/parser.h>
51: #include <libxml/parserInternals.h>
52: #include <libxml/valid.h>
53: #include <libxml/entities.h>
54: #include <libxml/xmlerror.h>
55: #include <libxml/encoding.h>
56: #include <libxml/xmlIO.h>
57: #include <libxml/uri.h>
58: #ifdef LIBXML_CATALOG_ENABLED
59: #include <libxml/catalog.h>
60: #endif
61: #ifdef LIBXML_SCHEMAS_ENABLED
62: #include <libxml/xmlschemastypes.h>
63: #include <libxml/relaxng.h>
64: #endif
65: #ifdef HAVE_CTYPE_H
66: #include <ctype.h>
67: #endif
68: #ifdef HAVE_STDLIB_H
69: #include <stdlib.h>
70: #endif
71: #ifdef HAVE_SYS_STAT_H
72: #include <sys/stat.h>
73: #endif
74: #ifdef HAVE_FCNTL_H
75: #include <fcntl.h>
76: #endif
77: #ifdef HAVE_UNISTD_H
78: #include <unistd.h>
79: #endif
80: #ifdef HAVE_ZLIB_H
81: #include <zlib.h>
82: #endif
1.1.1.2 misho 83: #ifdef HAVE_LZMA_H
84: #include <lzma.h>
85: #endif
1.1 misho 86:
1.1.1.3 ! misho 87: #include "buf.h"
! 88: #include "enc.h"
! 89:
1.1 misho 90: static void
91: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92:
93: static xmlParserCtxtPtr
94: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95: const xmlChar *base, xmlParserCtxtPtr pctx);
96:
97: /************************************************************************
98: * *
99: * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100: * *
101: ************************************************************************/
102:
103: #define XML_PARSER_BIG_ENTITY 1000
104: #define XML_PARSER_LOT_ENTITY 5000
105:
106: /*
107: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108: * replacement over the size in byte of the input indicates that you have
109: * and eponential behaviour. A value of 10 correspond to at least 3 entity
110: * replacement per byte of input.
111: */
112: #define XML_PARSER_NON_LINEAR 10
113:
114: /*
115: * xmlParserEntityCheck
116: *
117: * Function to check non-linear entity expansion behaviour
118: * This is here to detect and stop exponential linear entity expansion
119: * This is not a limitation of the parser but a safety
120: * boundary feature. It can be disabled with the XML_PARSE_HUGE
121: * parser option.
122: */
123: static int
1.1.1.3 ! misho 124: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
! 125: xmlEntityPtr ent, size_t replacement)
1.1 misho 126: {
1.1.1.3 ! misho 127: size_t consumed = 0;
1.1 misho 128:
129: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130: return (0);
131: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132: return (1);
1.1.1.3 ! misho 133: if (replacement != 0) {
! 134: if (replacement < XML_MAX_TEXT_LENGTH)
! 135: return(0);
! 136:
! 137: /*
! 138: * If the volume of entity copy reaches 10 times the
! 139: * amount of parsed data and over the large text threshold
! 140: * then that's very likely to be an abuse.
! 141: */
! 142: if (ctxt->input != NULL) {
! 143: consumed = ctxt->input->consumed +
! 144: (ctxt->input->cur - ctxt->input->base);
! 145: }
! 146: consumed += ctxt->sizeentities;
! 147:
! 148: if (replacement < XML_PARSER_NON_LINEAR * consumed)
! 149: return(0);
! 150: } else if (size != 0) {
1.1 misho 151: /*
152: * Do the check based on the replacement size of the entity
153: */
154: if (size < XML_PARSER_BIG_ENTITY)
155: return(0);
156:
157: /*
158: * A limit on the amount of text data reasonably used
159: */
160: if (ctxt->input != NULL) {
161: consumed = ctxt->input->consumed +
162: (ctxt->input->cur - ctxt->input->base);
163: }
164: consumed += ctxt->sizeentities;
165:
166: if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168: return (0);
169: } else if (ent != NULL) {
170: /*
171: * use the number of parsed entities in the replacement
172: */
1.1.1.3 ! misho 173: size = ent->checked / 2;
1.1 misho 174:
175: /*
176: * The amount of data parsed counting entities size only once
177: */
178: if (ctxt->input != NULL) {
179: consumed = ctxt->input->consumed +
180: (ctxt->input->cur - ctxt->input->base);
181: }
182: consumed += ctxt->sizeentities;
183:
184: /*
185: * Check the density of entities for the amount of data
186: * knowing an entity reference will take at least 3 bytes
187: */
188: if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189: return (0);
190: } else {
191: /*
192: * strange we got no data for checking just return
193: */
194: return (0);
195: }
196: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197: return (1);
198: }
199:
200: /**
201: * xmlParserMaxDepth:
202: *
203: * arbitrary depth limit for the XML documents that we allow to
204: * process. This is not a limitation of the parser but a safety
205: * boundary feature. It can be disabled with the XML_PARSE_HUGE
206: * parser option.
207: */
208: unsigned int xmlParserMaxDepth = 256;
209:
210:
211:
212: #define SAX2 1
213: #define XML_PARSER_BIG_BUFFER_SIZE 300
214: #define XML_PARSER_BUFFER_SIZE 100
215: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216:
1.1.1.3 ! misho 217: /**
! 218: * XML_PARSER_CHUNK_SIZE
! 219: *
! 220: * When calling GROW that's the minimal amount of data
! 221: * the parser expected to have received. It is not a hard
! 222: * limit but an optimization when reading strings like Names
! 223: * It is not strictly needed as long as inputs available characters
! 224: * are followed by 0, which should be provided by the I/O level
! 225: */
! 226: #define XML_PARSER_CHUNK_SIZE 100
! 227:
1.1 misho 228: /*
229: * List of XML prefixed PI allowed by W3C specs
230: */
231:
232: static const char *xmlW3CPIs[] = {
233: "xml-stylesheet",
1.1.1.2 misho 234: "xml-model",
1.1 misho 235: NULL
236: };
237:
238:
239: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241: const xmlChar **str);
242:
243: static xmlParserErrors
244: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245: xmlSAXHandlerPtr sax,
246: void *user_data, int depth, const xmlChar *URL,
247: const xmlChar *ID, xmlNodePtr *list);
248:
249: static int
250: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251: const char *encoding);
252: #ifdef LIBXML_LEGACY_ENABLED
253: static void
254: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255: xmlNodePtr lastNode);
256: #endif /* LIBXML_LEGACY_ENABLED */
257:
258: static xmlParserErrors
259: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260: const xmlChar *string, void *user_data, xmlNodePtr *lst);
261:
262: static int
263: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264:
265: /************************************************************************
266: * *
1.1.1.3 ! misho 267: * Some factorized error routines *
1.1 misho 268: * *
269: ************************************************************************/
270:
271: /**
272: * xmlErrAttributeDup:
273: * @ctxt: an XML parser context
274: * @prefix: the attribute prefix
275: * @localname: the attribute localname
276: *
277: * Handle a redefinition of attribute error
278: */
279: static void
280: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281: const xmlChar * localname)
282: {
283: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284: (ctxt->instate == XML_PARSER_EOF))
285: return;
286: if (ctxt != NULL)
287: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
288:
289: if (prefix == NULL)
290: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292: (const char *) localname, NULL, NULL, 0, 0,
293: "Attribute %s redefined\n", localname);
294: else
295: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297: (const char *) prefix, (const char *) localname,
298: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299: localname);
300: if (ctxt != NULL) {
301: ctxt->wellFormed = 0;
302: if (ctxt->recovery == 0)
303: ctxt->disableSAX = 1;
304: }
305: }
306:
307: /**
308: * xmlFatalErr:
309: * @ctxt: an XML parser context
310: * @error: the error number
311: * @extra: extra information string
312: *
313: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314: */
315: static void
316: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
317: {
318: const char *errmsg;
1.1.1.3 ! misho 319: char errstr[129] = "";
1.1 misho 320:
321: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322: (ctxt->instate == XML_PARSER_EOF))
323: return;
324: switch (error) {
325: case XML_ERR_INVALID_HEX_CHARREF:
1.1.1.3 ! misho 326: errmsg = "CharRef: invalid hexadecimal value";
1.1 misho 327: break;
328: case XML_ERR_INVALID_DEC_CHARREF:
1.1.1.3 ! misho 329: errmsg = "CharRef: invalid decimal value";
1.1 misho 330: break;
331: case XML_ERR_INVALID_CHARREF:
1.1.1.3 ! misho 332: errmsg = "CharRef: invalid value";
1.1 misho 333: break;
334: case XML_ERR_INTERNAL_ERROR:
335: errmsg = "internal error";
336: break;
337: case XML_ERR_PEREF_AT_EOF:
1.1.1.3 ! misho 338: errmsg = "PEReference at end of document";
1.1 misho 339: break;
340: case XML_ERR_PEREF_IN_PROLOG:
1.1.1.3 ! misho 341: errmsg = "PEReference in prolog";
1.1 misho 342: break;
343: case XML_ERR_PEREF_IN_EPILOG:
1.1.1.3 ! misho 344: errmsg = "PEReference in epilog";
1.1 misho 345: break;
346: case XML_ERR_PEREF_NO_NAME:
1.1.1.3 ! misho 347: errmsg = "PEReference: no name";
1.1 misho 348: break;
349: case XML_ERR_PEREF_SEMICOL_MISSING:
1.1.1.3 ! misho 350: errmsg = "PEReference: expecting ';'";
1.1 misho 351: break;
352: case XML_ERR_ENTITY_LOOP:
1.1.1.3 ! misho 353: errmsg = "Detected an entity reference loop";
1.1 misho 354: break;
355: case XML_ERR_ENTITY_NOT_STARTED:
1.1.1.3 ! misho 356: errmsg = "EntityValue: \" or ' expected";
1.1 misho 357: break;
358: case XML_ERR_ENTITY_PE_INTERNAL:
1.1.1.3 ! misho 359: errmsg = "PEReferences forbidden in internal subset";
1.1 misho 360: break;
361: case XML_ERR_ENTITY_NOT_FINISHED:
1.1.1.3 ! misho 362: errmsg = "EntityValue: \" or ' expected";
1.1 misho 363: break;
364: case XML_ERR_ATTRIBUTE_NOT_STARTED:
1.1.1.3 ! misho 365: errmsg = "AttValue: \" or ' expected";
1.1 misho 366: break;
367: case XML_ERR_LT_IN_ATTRIBUTE:
1.1.1.3 ! misho 368: errmsg = "Unescaped '<' not allowed in attributes values";
1.1 misho 369: break;
370: case XML_ERR_LITERAL_NOT_STARTED:
1.1.1.3 ! misho 371: errmsg = "SystemLiteral \" or ' expected";
1.1 misho 372: break;
373: case XML_ERR_LITERAL_NOT_FINISHED:
1.1.1.3 ! misho 374: errmsg = "Unfinished System or Public ID \" or ' expected";
1.1 misho 375: break;
376: case XML_ERR_MISPLACED_CDATA_END:
1.1.1.3 ! misho 377: errmsg = "Sequence ']]>' not allowed in content";
1.1 misho 378: break;
379: case XML_ERR_URI_REQUIRED:
1.1.1.3 ! misho 380: errmsg = "SYSTEM or PUBLIC, the URI is missing";
1.1 misho 381: break;
382: case XML_ERR_PUBID_REQUIRED:
1.1.1.3 ! misho 383: errmsg = "PUBLIC, the Public Identifier is missing";
1.1 misho 384: break;
385: case XML_ERR_HYPHEN_IN_COMMENT:
1.1.1.3 ! misho 386: errmsg = "Comment must not contain '--' (double-hyphen)";
1.1 misho 387: break;
388: case XML_ERR_PI_NOT_STARTED:
1.1.1.3 ! misho 389: errmsg = "xmlParsePI : no target name";
1.1 misho 390: break;
391: case XML_ERR_RESERVED_XML_NAME:
1.1.1.3 ! misho 392: errmsg = "Invalid PI name";
1.1 misho 393: break;
394: case XML_ERR_NOTATION_NOT_STARTED:
1.1.1.3 ! misho 395: errmsg = "NOTATION: Name expected here";
1.1 misho 396: break;
397: case XML_ERR_NOTATION_NOT_FINISHED:
1.1.1.3 ! misho 398: errmsg = "'>' required to close NOTATION declaration";
1.1 misho 399: break;
400: case XML_ERR_VALUE_REQUIRED:
1.1.1.3 ! misho 401: errmsg = "Entity value required";
1.1 misho 402: break;
403: case XML_ERR_URI_FRAGMENT:
404: errmsg = "Fragment not allowed";
405: break;
406: case XML_ERR_ATTLIST_NOT_STARTED:
1.1.1.3 ! misho 407: errmsg = "'(' required to start ATTLIST enumeration";
1.1 misho 408: break;
409: case XML_ERR_NMTOKEN_REQUIRED:
1.1.1.3 ! misho 410: errmsg = "NmToken expected in ATTLIST enumeration";
1.1 misho 411: break;
412: case XML_ERR_ATTLIST_NOT_FINISHED:
1.1.1.3 ! misho 413: errmsg = "')' required to finish ATTLIST enumeration";
1.1 misho 414: break;
415: case XML_ERR_MIXED_NOT_STARTED:
1.1.1.3 ! misho 416: errmsg = "MixedContentDecl : '|' or ')*' expected";
1.1 misho 417: break;
418: case XML_ERR_PCDATA_REQUIRED:
1.1.1.3 ! misho 419: errmsg = "MixedContentDecl : '#PCDATA' expected";
1.1 misho 420: break;
421: case XML_ERR_ELEMCONTENT_NOT_STARTED:
1.1.1.3 ! misho 422: errmsg = "ContentDecl : Name or '(' expected";
1.1 misho 423: break;
424: case XML_ERR_ELEMCONTENT_NOT_FINISHED:
1.1.1.3 ! misho 425: errmsg = "ContentDecl : ',' '|' or ')' expected";
1.1 misho 426: break;
427: case XML_ERR_PEREF_IN_INT_SUBSET:
428: errmsg =
1.1.1.3 ! misho 429: "PEReference: forbidden within markup decl in internal subset";
1.1 misho 430: break;
431: case XML_ERR_GT_REQUIRED:
1.1.1.3 ! misho 432: errmsg = "expected '>'";
1.1 misho 433: break;
434: case XML_ERR_CONDSEC_INVALID:
1.1.1.3 ! misho 435: errmsg = "XML conditional section '[' expected";
1.1 misho 436: break;
437: case XML_ERR_EXT_SUBSET_NOT_FINISHED:
1.1.1.3 ! misho 438: errmsg = "Content error in the external subset";
1.1 misho 439: break;
440: case XML_ERR_CONDSEC_INVALID_KEYWORD:
441: errmsg =
1.1.1.3 ! misho 442: "conditional section INCLUDE or IGNORE keyword expected";
1.1 misho 443: break;
444: case XML_ERR_CONDSEC_NOT_FINISHED:
1.1.1.3 ! misho 445: errmsg = "XML conditional section not closed";
1.1 misho 446: break;
447: case XML_ERR_XMLDECL_NOT_STARTED:
1.1.1.3 ! misho 448: errmsg = "Text declaration '<?xml' required";
1.1 misho 449: break;
450: case XML_ERR_XMLDECL_NOT_FINISHED:
1.1.1.3 ! misho 451: errmsg = "parsing XML declaration: '?>' expected";
1.1 misho 452: break;
453: case XML_ERR_EXT_ENTITY_STANDALONE:
1.1.1.3 ! misho 454: errmsg = "external parsed entities cannot be standalone";
1.1 misho 455: break;
456: case XML_ERR_ENTITYREF_SEMICOL_MISSING:
1.1.1.3 ! misho 457: errmsg = "EntityRef: expecting ';'";
1.1 misho 458: break;
459: case XML_ERR_DOCTYPE_NOT_FINISHED:
1.1.1.3 ! misho 460: errmsg = "DOCTYPE improperly terminated";
1.1 misho 461: break;
462: case XML_ERR_LTSLASH_REQUIRED:
1.1.1.3 ! misho 463: errmsg = "EndTag: '</' not found";
1.1 misho 464: break;
465: case XML_ERR_EQUAL_REQUIRED:
1.1.1.3 ! misho 466: errmsg = "expected '='";
1.1 misho 467: break;
468: case XML_ERR_STRING_NOT_CLOSED:
1.1.1.3 ! misho 469: errmsg = "String not closed expecting \" or '";
1.1 misho 470: break;
471: case XML_ERR_STRING_NOT_STARTED:
1.1.1.3 ! misho 472: errmsg = "String not started expecting ' or \"";
1.1 misho 473: break;
474: case XML_ERR_ENCODING_NAME:
1.1.1.3 ! misho 475: errmsg = "Invalid XML encoding name";
1.1 misho 476: break;
477: case XML_ERR_STANDALONE_VALUE:
1.1.1.3 ! misho 478: errmsg = "standalone accepts only 'yes' or 'no'";
1.1 misho 479: break;
480: case XML_ERR_DOCUMENT_EMPTY:
1.1.1.3 ! misho 481: errmsg = "Document is empty";
1.1 misho 482: break;
483: case XML_ERR_DOCUMENT_END:
1.1.1.3 ! misho 484: errmsg = "Extra content at the end of the document";
1.1 misho 485: break;
486: case XML_ERR_NOT_WELL_BALANCED:
1.1.1.3 ! misho 487: errmsg = "chunk is not well balanced";
1.1 misho 488: break;
489: case XML_ERR_EXTRA_CONTENT:
1.1.1.3 ! misho 490: errmsg = "extra content at the end of well balanced chunk";
1.1 misho 491: break;
492: case XML_ERR_VERSION_MISSING:
1.1.1.3 ! misho 493: errmsg = "Malformed declaration expecting version";
! 494: break;
! 495: case XML_ERR_NAME_TOO_LONG:
! 496: errmsg = "Name too long use XML_PARSE_HUGE option";
1.1 misho 497: break;
498: #if 0
499: case:
1.1.1.3 ! misho 500: errmsg = "";
1.1 misho 501: break;
502: #endif
503: default:
1.1.1.3 ! misho 504: errmsg = "Unregistered error message";
1.1 misho 505: }
1.1.1.3 ! misho 506: if (info == NULL)
! 507: snprintf(errstr, 128, "%s\n", errmsg);
! 508: else
! 509: snprintf(errstr, 128, "%s: %%s\n", errmsg);
1.1 misho 510: if (ctxt != NULL)
511: ctxt->errNo = error;
512: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
1.1.1.3 ! misho 513: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
1.1 misho 514: info);
515: if (ctxt != NULL) {
516: ctxt->wellFormed = 0;
517: if (ctxt->recovery == 0)
518: ctxt->disableSAX = 1;
519: }
520: }
521:
522: /**
523: * xmlFatalErrMsg:
524: * @ctxt: an XML parser context
525: * @error: the error number
526: * @msg: the error message
527: *
528: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529: */
530: static void
531: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532: const char *msg)
533: {
534: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535: (ctxt->instate == XML_PARSER_EOF))
536: return;
537: if (ctxt != NULL)
538: ctxt->errNo = error;
539: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
541: if (ctxt != NULL) {
542: ctxt->wellFormed = 0;
543: if (ctxt->recovery == 0)
544: ctxt->disableSAX = 1;
545: }
546: }
547:
548: /**
549: * xmlWarningMsg:
550: * @ctxt: an XML parser context
551: * @error: the error number
552: * @msg: the error message
553: * @str1: extra data
554: * @str2: extra data
555: *
556: * Handle a warning.
557: */
558: static void
559: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560: const char *msg, const xmlChar *str1, const xmlChar *str2)
561: {
562: xmlStructuredErrorFunc schannel = NULL;
563:
564: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565: (ctxt->instate == XML_PARSER_EOF))
566: return;
567: if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568: (ctxt->sax->initialized == XML_SAX2_MAGIC))
569: schannel = ctxt->sax->serror;
570: if (ctxt != NULL) {
571: __xmlRaiseError(schannel,
572: (ctxt->sax) ? ctxt->sax->warning : NULL,
573: ctxt->userData,
574: ctxt, NULL, XML_FROM_PARSER, error,
575: XML_ERR_WARNING, NULL, 0,
576: (const char *) str1, (const char *) str2, NULL, 0, 0,
577: msg, (const char *) str1, (const char *) str2);
578: } else {
579: __xmlRaiseError(schannel, NULL, NULL,
580: ctxt, NULL, XML_FROM_PARSER, error,
581: XML_ERR_WARNING, NULL, 0,
582: (const char *) str1, (const char *) str2, NULL, 0, 0,
583: msg, (const char *) str1, (const char *) str2);
584: }
585: }
586:
587: /**
588: * xmlValidityError:
589: * @ctxt: an XML parser context
590: * @error: the error number
591: * @msg: the error message
592: * @str1: extra data
593: *
594: * Handle a validity error.
595: */
596: static void
597: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598: const char *msg, const xmlChar *str1, const xmlChar *str2)
599: {
600: xmlStructuredErrorFunc schannel = NULL;
601:
602: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603: (ctxt->instate == XML_PARSER_EOF))
604: return;
605: if (ctxt != NULL) {
606: ctxt->errNo = error;
607: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608: schannel = ctxt->sax->serror;
609: }
610: if (ctxt != NULL) {
611: __xmlRaiseError(schannel,
612: ctxt->vctxt.error, ctxt->vctxt.userData,
613: ctxt, NULL, XML_FROM_DTD, error,
614: XML_ERR_ERROR, NULL, 0, (const char *) str1,
615: (const char *) str2, NULL, 0, 0,
616: msg, (const char *) str1, (const char *) str2);
617: ctxt->valid = 0;
618: } else {
619: __xmlRaiseError(schannel, NULL, NULL,
620: ctxt, NULL, XML_FROM_DTD, error,
621: XML_ERR_ERROR, NULL, 0, (const char *) str1,
622: (const char *) str2, NULL, 0, 0,
623: msg, (const char *) str1, (const char *) str2);
624: }
625: }
626:
627: /**
628: * xmlFatalErrMsgInt:
629: * @ctxt: an XML parser context
630: * @error: the error number
631: * @msg: the error message
632: * @val: an integer value
633: *
634: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635: */
636: static void
637: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638: const char *msg, int val)
639: {
640: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641: (ctxt->instate == XML_PARSER_EOF))
642: return;
643: if (ctxt != NULL)
644: ctxt->errNo = error;
645: __xmlRaiseError(NULL, NULL, NULL,
646: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
648: if (ctxt != NULL) {
649: ctxt->wellFormed = 0;
650: if (ctxt->recovery == 0)
651: ctxt->disableSAX = 1;
652: }
653: }
654:
655: /**
656: * xmlFatalErrMsgStrIntStr:
657: * @ctxt: an XML parser context
658: * @error: the error number
659: * @msg: the error message
660: * @str1: an string info
661: * @val: an integer value
662: * @str2: an string info
663: *
664: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665: */
666: static void
667: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
1.1.1.3 ! misho 668: const char *msg, const xmlChar *str1, int val,
1.1 misho 669: const xmlChar *str2)
670: {
671: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672: (ctxt->instate == XML_PARSER_EOF))
673: return;
674: if (ctxt != NULL)
675: ctxt->errNo = error;
676: __xmlRaiseError(NULL, NULL, NULL,
677: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678: NULL, 0, (const char *) str1, (const char *) str2,
679: NULL, val, 0, msg, str1, val, str2);
680: if (ctxt != NULL) {
681: ctxt->wellFormed = 0;
682: if (ctxt->recovery == 0)
683: ctxt->disableSAX = 1;
684: }
685: }
686:
687: /**
688: * xmlFatalErrMsgStr:
689: * @ctxt: an XML parser context
690: * @error: the error number
691: * @msg: the error message
692: * @val: a string value
693: *
694: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695: */
696: static void
697: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698: const char *msg, const xmlChar * val)
699: {
700: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701: (ctxt->instate == XML_PARSER_EOF))
702: return;
703: if (ctxt != NULL)
704: ctxt->errNo = error;
705: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706: XML_FROM_PARSER, error, XML_ERR_FATAL,
707: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708: val);
709: if (ctxt != NULL) {
710: ctxt->wellFormed = 0;
711: if (ctxt->recovery == 0)
712: ctxt->disableSAX = 1;
713: }
714: }
715:
716: /**
717: * xmlErrMsgStr:
718: * @ctxt: an XML parser context
719: * @error: the error number
720: * @msg: the error message
721: * @val: a string value
722: *
723: * Handle a non fatal parser error
724: */
725: static void
726: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727: const char *msg, const xmlChar * val)
728: {
729: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730: (ctxt->instate == XML_PARSER_EOF))
731: return;
732: if (ctxt != NULL)
733: ctxt->errNo = error;
734: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735: XML_FROM_PARSER, error, XML_ERR_ERROR,
736: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737: val);
738: }
739:
740: /**
741: * xmlNsErr:
742: * @ctxt: an XML parser context
743: * @error: the error number
744: * @msg: the message
745: * @info1: extra information string
746: * @info2: extra information string
747: *
748: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749: */
750: static void
751: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752: const char *msg,
753: const xmlChar * info1, const xmlChar * info2,
754: const xmlChar * info3)
755: {
756: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757: (ctxt->instate == XML_PARSER_EOF))
758: return;
759: if (ctxt != NULL)
760: ctxt->errNo = error;
761: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762: XML_ERR_ERROR, NULL, 0, (const char *) info1,
763: (const char *) info2, (const char *) info3, 0, 0, msg,
764: info1, info2, info3);
765: if (ctxt != NULL)
766: ctxt->nsWellFormed = 0;
767: }
768:
769: /**
770: * xmlNsWarn
771: * @ctxt: an XML parser context
772: * @error: the error number
773: * @msg: the message
774: * @info1: extra information string
775: * @info2: extra information string
776: *
1.1.1.2 misho 777: * Handle a namespace warning error
1.1 misho 778: */
779: static void
780: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781: const char *msg,
782: const xmlChar * info1, const xmlChar * info2,
783: const xmlChar * info3)
784: {
785: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786: (ctxt->instate == XML_PARSER_EOF))
787: return;
788: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789: XML_ERR_WARNING, NULL, 0, (const char *) info1,
790: (const char *) info2, (const char *) info3, 0, 0, msg,
791: info1, info2, info3);
792: }
793:
794: /************************************************************************
795: * *
1.1.1.3 ! misho 796: * Library wide options *
1.1 misho 797: * *
798: ************************************************************************/
799:
800: /**
801: * xmlHasFeature:
802: * @feature: the feature to be examined
803: *
804: * Examines if the library has been compiled with a given feature.
805: *
806: * Returns a non-zero value if the feature exist, otherwise zero.
807: * Returns zero (0) if the feature does not exist or an unknown
808: * unknown feature is requested, non-zero otherwise.
809: */
810: int
811: xmlHasFeature(xmlFeature feature)
812: {
813: switch (feature) {
814: case XML_WITH_THREAD:
815: #ifdef LIBXML_THREAD_ENABLED
816: return(1);
817: #else
818: return(0);
819: #endif
820: case XML_WITH_TREE:
821: #ifdef LIBXML_TREE_ENABLED
822: return(1);
823: #else
824: return(0);
825: #endif
826: case XML_WITH_OUTPUT:
827: #ifdef LIBXML_OUTPUT_ENABLED
828: return(1);
829: #else
830: return(0);
831: #endif
832: case XML_WITH_PUSH:
833: #ifdef LIBXML_PUSH_ENABLED
834: return(1);
835: #else
836: return(0);
837: #endif
838: case XML_WITH_READER:
839: #ifdef LIBXML_READER_ENABLED
840: return(1);
841: #else
842: return(0);
843: #endif
844: case XML_WITH_PATTERN:
845: #ifdef LIBXML_PATTERN_ENABLED
846: return(1);
847: #else
848: return(0);
849: #endif
850: case XML_WITH_WRITER:
851: #ifdef LIBXML_WRITER_ENABLED
852: return(1);
853: #else
854: return(0);
855: #endif
856: case XML_WITH_SAX1:
857: #ifdef LIBXML_SAX1_ENABLED
858: return(1);
859: #else
860: return(0);
861: #endif
862: case XML_WITH_FTP:
863: #ifdef LIBXML_FTP_ENABLED
864: return(1);
865: #else
866: return(0);
867: #endif
868: case XML_WITH_HTTP:
869: #ifdef LIBXML_HTTP_ENABLED
870: return(1);
871: #else
872: return(0);
873: #endif
874: case XML_WITH_VALID:
875: #ifdef LIBXML_VALID_ENABLED
876: return(1);
877: #else
878: return(0);
879: #endif
880: case XML_WITH_HTML:
881: #ifdef LIBXML_HTML_ENABLED
882: return(1);
883: #else
884: return(0);
885: #endif
886: case XML_WITH_LEGACY:
887: #ifdef LIBXML_LEGACY_ENABLED
888: return(1);
889: #else
890: return(0);
891: #endif
892: case XML_WITH_C14N:
893: #ifdef LIBXML_C14N_ENABLED
894: return(1);
895: #else
896: return(0);
897: #endif
898: case XML_WITH_CATALOG:
899: #ifdef LIBXML_CATALOG_ENABLED
900: return(1);
901: #else
902: return(0);
903: #endif
904: case XML_WITH_XPATH:
905: #ifdef LIBXML_XPATH_ENABLED
906: return(1);
907: #else
908: return(0);
909: #endif
910: case XML_WITH_XPTR:
911: #ifdef LIBXML_XPTR_ENABLED
912: return(1);
913: #else
914: return(0);
915: #endif
916: case XML_WITH_XINCLUDE:
917: #ifdef LIBXML_XINCLUDE_ENABLED
918: return(1);
919: #else
920: return(0);
921: #endif
922: case XML_WITH_ICONV:
923: #ifdef LIBXML_ICONV_ENABLED
924: return(1);
925: #else
926: return(0);
927: #endif
928: case XML_WITH_ISO8859X:
929: #ifdef LIBXML_ISO8859X_ENABLED
930: return(1);
931: #else
932: return(0);
933: #endif
934: case XML_WITH_UNICODE:
935: #ifdef LIBXML_UNICODE_ENABLED
936: return(1);
937: #else
938: return(0);
939: #endif
940: case XML_WITH_REGEXP:
941: #ifdef LIBXML_REGEXP_ENABLED
942: return(1);
943: #else
944: return(0);
945: #endif
946: case XML_WITH_AUTOMATA:
947: #ifdef LIBXML_AUTOMATA_ENABLED
948: return(1);
949: #else
950: return(0);
951: #endif
952: case XML_WITH_EXPR:
953: #ifdef LIBXML_EXPR_ENABLED
954: return(1);
955: #else
956: return(0);
957: #endif
958: case XML_WITH_SCHEMAS:
959: #ifdef LIBXML_SCHEMAS_ENABLED
960: return(1);
961: #else
962: return(0);
963: #endif
964: case XML_WITH_SCHEMATRON:
965: #ifdef LIBXML_SCHEMATRON_ENABLED
966: return(1);
967: #else
968: return(0);
969: #endif
970: case XML_WITH_MODULES:
971: #ifdef LIBXML_MODULES_ENABLED
972: return(1);
973: #else
974: return(0);
975: #endif
976: case XML_WITH_DEBUG:
977: #ifdef LIBXML_DEBUG_ENABLED
978: return(1);
979: #else
980: return(0);
981: #endif
982: case XML_WITH_DEBUG_MEM:
983: #ifdef DEBUG_MEMORY_LOCATION
984: return(1);
985: #else
986: return(0);
987: #endif
988: case XML_WITH_DEBUG_RUN:
989: #ifdef LIBXML_DEBUG_RUNTIME
990: return(1);
991: #else
992: return(0);
993: #endif
994: case XML_WITH_ZLIB:
995: #ifdef LIBXML_ZLIB_ENABLED
996: return(1);
997: #else
998: return(0);
999: #endif
1.1.1.2 misho 1000: case XML_WITH_LZMA:
1001: #ifdef LIBXML_LZMA_ENABLED
1002: return(1);
1003: #else
1004: return(0);
1005: #endif
1.1 misho 1006: case XML_WITH_ICU:
1007: #ifdef LIBXML_ICU_ENABLED
1008: return(1);
1009: #else
1010: return(0);
1011: #endif
1012: default:
1013: break;
1014: }
1015: return(0);
1016: }
1017:
1018: /************************************************************************
1019: * *
1.1.1.3 ! misho 1020: * SAX2 defaulted attributes handling *
1.1 misho 1021: * *
1022: ************************************************************************/
1023:
1024: /**
1025: * xmlDetectSAX2:
1026: * @ctxt: an XML parser context
1027: *
1028: * Do the SAX2 detection and specific intialization
1029: */
1030: static void
1031: xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032: if (ctxt == NULL) return;
1033: #ifdef LIBXML_SAX1_ENABLED
1034: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035: ((ctxt->sax->startElementNs != NULL) ||
1036: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1037: #else
1038: ctxt->sax2 = 1;
1039: #endif /* LIBXML_SAX1_ENABLED */
1040:
1041: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1.1.1.3 ! misho 1044: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
! 1045: (ctxt->str_xml_ns == NULL)) {
1.1 misho 1046: xmlErrMemory(ctxt, NULL);
1047: }
1048: }
1049:
1050: typedef struct _xmlDefAttrs xmlDefAttrs;
1051: typedef xmlDefAttrs *xmlDefAttrsPtr;
1052: struct _xmlDefAttrs {
1053: int nbAttrs; /* number of defaulted attributes on that element */
1054: int maxAttrs; /* the size of the array */
1055: const xmlChar *values[5]; /* array of localname/prefix/values/external */
1056: };
1057:
1058: /**
1059: * xmlAttrNormalizeSpace:
1060: * @src: the source string
1061: * @dst: the target string
1062: *
1063: * Normalize the space in non CDATA attribute values:
1064: * If the attribute type is not CDATA, then the XML processor MUST further
1065: * process the normalized attribute value by discarding any leading and
1066: * trailing space (#x20) characters, and by replacing sequences of space
1067: * (#x20) characters by a single space (#x20) character.
1068: * Note that the size of dst need to be at least src, and if one doesn't need
1069: * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070: * passing src as dst is just fine.
1071: *
1072: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073: * is needed.
1074: */
1075: static xmlChar *
1076: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077: {
1078: if ((src == NULL) || (dst == NULL))
1079: return(NULL);
1080:
1081: while (*src == 0x20) src++;
1082: while (*src != 0) {
1083: if (*src == 0x20) {
1084: while (*src == 0x20) src++;
1085: if (*src != 0)
1086: *dst++ = 0x20;
1087: } else {
1088: *dst++ = *src++;
1089: }
1090: }
1091: *dst = 0;
1092: if (dst == src)
1093: return(NULL);
1094: return(dst);
1095: }
1096:
1097: /**
1098: * xmlAttrNormalizeSpace2:
1099: * @src: the source string
1100: *
1101: * Normalize the space in non CDATA attribute values, a slightly more complex
1102: * front end to avoid allocation problems when running on attribute values
1103: * coming from the input.
1104: *
1105: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106: * is needed.
1107: */
1108: static const xmlChar *
1109: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1110: {
1111: int i;
1112: int remove_head = 0;
1113: int need_realloc = 0;
1114: const xmlChar *cur;
1115:
1116: if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117: return(NULL);
1118: i = *len;
1119: if (i <= 0)
1120: return(NULL);
1121:
1122: cur = src;
1123: while (*cur == 0x20) {
1124: cur++;
1125: remove_head++;
1126: }
1127: while (*cur != 0) {
1128: if (*cur == 0x20) {
1129: cur++;
1130: if ((*cur == 0x20) || (*cur == 0)) {
1131: need_realloc = 1;
1132: break;
1133: }
1134: } else
1135: cur++;
1136: }
1137: if (need_realloc) {
1138: xmlChar *ret;
1139:
1140: ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141: if (ret == NULL) {
1142: xmlErrMemory(ctxt, NULL);
1143: return(NULL);
1144: }
1145: xmlAttrNormalizeSpace(ret, ret);
1146: *len = (int) strlen((const char *)ret);
1147: return(ret);
1148: } else if (remove_head) {
1149: *len -= remove_head;
1150: memmove(src, src + remove_head, 1 + *len);
1151: return(src);
1152: }
1153: return(NULL);
1154: }
1155:
1156: /**
1157: * xmlAddDefAttrs:
1158: * @ctxt: an XML parser context
1159: * @fullname: the element fullname
1160: * @fullattr: the attribute fullname
1161: * @value: the attribute value
1162: *
1163: * Add a defaulted attribute for an element
1164: */
1165: static void
1166: xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167: const xmlChar *fullname,
1168: const xmlChar *fullattr,
1169: const xmlChar *value) {
1170: xmlDefAttrsPtr defaults;
1171: int len;
1172: const xmlChar *name;
1173: const xmlChar *prefix;
1174:
1175: /*
1176: * Allows to detect attribute redefinitions
1177: */
1178: if (ctxt->attsSpecial != NULL) {
1179: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180: return;
1181: }
1182:
1183: if (ctxt->attsDefault == NULL) {
1184: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185: if (ctxt->attsDefault == NULL)
1186: goto mem_error;
1187: }
1188:
1189: /*
1190: * split the element name into prefix:localname , the string found
1191: * are within the DTD and then not associated to namespace names.
1192: */
1193: name = xmlSplitQName3(fullname, &len);
1194: if (name == NULL) {
1195: name = xmlDictLookup(ctxt->dict, fullname, -1);
1196: prefix = NULL;
1197: } else {
1198: name = xmlDictLookup(ctxt->dict, name, -1);
1199: prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200: }
1201:
1202: /*
1203: * make sure there is some storage
1204: */
1205: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206: if (defaults == NULL) {
1207: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208: (4 * 5) * sizeof(const xmlChar *));
1209: if (defaults == NULL)
1210: goto mem_error;
1211: defaults->nbAttrs = 0;
1212: defaults->maxAttrs = 4;
1213: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214: defaults, NULL) < 0) {
1215: xmlFree(defaults);
1216: goto mem_error;
1217: }
1218: } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219: xmlDefAttrsPtr temp;
1220:
1221: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1223: if (temp == NULL)
1224: goto mem_error;
1225: defaults = temp;
1226: defaults->maxAttrs *= 2;
1227: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228: defaults, NULL) < 0) {
1229: xmlFree(defaults);
1230: goto mem_error;
1231: }
1232: }
1233:
1234: /*
1235: * Split the element name into prefix:localname , the string found
1236: * are within the DTD and hen not associated to namespace names.
1237: */
1238: name = xmlSplitQName3(fullattr, &len);
1239: if (name == NULL) {
1240: name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241: prefix = NULL;
1242: } else {
1243: name = xmlDictLookup(ctxt->dict, name, -1);
1244: prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245: }
1246:
1247: defaults->values[5 * defaults->nbAttrs] = name;
1248: defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249: /* intern the string and precompute the end */
1250: len = xmlStrlen(value);
1251: value = xmlDictLookup(ctxt->dict, value, len);
1252: defaults->values[5 * defaults->nbAttrs + 2] = value;
1253: defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254: if (ctxt->external)
1255: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256: else
1257: defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258: defaults->nbAttrs++;
1259:
1260: return;
1261:
1262: mem_error:
1263: xmlErrMemory(ctxt, NULL);
1264: return;
1265: }
1266:
1267: /**
1268: * xmlAddSpecialAttr:
1269: * @ctxt: an XML parser context
1270: * @fullname: the element fullname
1271: * @fullattr: the attribute fullname
1272: * @type: the attribute type
1273: *
1274: * Register this attribute type
1275: */
1276: static void
1277: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278: const xmlChar *fullname,
1279: const xmlChar *fullattr,
1280: int type)
1281: {
1282: if (ctxt->attsSpecial == NULL) {
1283: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284: if (ctxt->attsSpecial == NULL)
1285: goto mem_error;
1286: }
1287:
1288: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289: return;
1290:
1291: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292: (void *) (long) type);
1293: return;
1294:
1295: mem_error:
1296: xmlErrMemory(ctxt, NULL);
1297: return;
1298: }
1299:
1300: /**
1301: * xmlCleanSpecialAttrCallback:
1302: *
1303: * Removes CDATA attributes from the special attribute table
1304: */
1305: static void
1306: xmlCleanSpecialAttrCallback(void *payload, void *data,
1307: const xmlChar *fullname, const xmlChar *fullattr,
1308: const xmlChar *unused ATTRIBUTE_UNUSED) {
1309: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310:
1311: if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313: }
1314: }
1315:
1316: /**
1317: * xmlCleanSpecialAttr:
1318: * @ctxt: an XML parser context
1319: *
1320: * Trim the list of attributes defined to remove all those of type
1321: * CDATA as they are not special. This call should be done when finishing
1322: * to parse the DTD and before starting to parse the document root.
1323: */
1324: static void
1325: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326: {
1327: if (ctxt->attsSpecial == NULL)
1328: return;
1329:
1330: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331:
1332: if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333: xmlHashFree(ctxt->attsSpecial, NULL);
1334: ctxt->attsSpecial = NULL;
1335: }
1336: return;
1337: }
1338:
1339: /**
1340: * xmlCheckLanguageID:
1341: * @lang: pointer to the string value
1342: *
1343: * Checks that the value conforms to the LanguageID production:
1344: *
1345: * NOTE: this is somewhat deprecated, those productions were removed from
1346: * the XML Second edition.
1347: *
1348: * [33] LanguageID ::= Langcode ('-' Subcode)*
1349: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353: * [38] Subcode ::= ([a-z] | [A-Z])+
1354: *
1355: * The current REC reference the sucessors of RFC 1766, currently 5646
1356: *
1357: * http://www.rfc-editor.org/rfc/rfc5646.txt
1358: * langtag = language
1359: * ["-" script]
1360: * ["-" region]
1361: * *("-" variant)
1362: * *("-" extension)
1363: * ["-" privateuse]
1364: * language = 2*3ALPHA ; shortest ISO 639 code
1365: * ["-" extlang] ; sometimes followed by
1366: * ; extended language subtags
1367: * / 4ALPHA ; or reserved for future use
1368: * / 5*8ALPHA ; or registered language subtag
1369: *
1370: * extlang = 3ALPHA ; selected ISO 639 codes
1371: * *2("-" 3ALPHA) ; permanently reserved
1372: *
1373: * script = 4ALPHA ; ISO 15924 code
1374: *
1375: * region = 2ALPHA ; ISO 3166-1 code
1376: * / 3DIGIT ; UN M.49 code
1377: *
1378: * variant = 5*8alphanum ; registered variants
1379: * / (DIGIT 3alphanum)
1380: *
1381: * extension = singleton 1*("-" (2*8alphanum))
1382: *
1383: * ; Single alphanumerics
1384: * ; "x" reserved for private use
1385: * singleton = DIGIT ; 0 - 9
1386: * / %x41-57 ; A - W
1387: * / %x59-5A ; Y - Z
1388: * / %x61-77 ; a - w
1389: * / %x79-7A ; y - z
1390: *
1391: * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392: * The parser below doesn't try to cope with extension or privateuse
1393: * that could be added but that's not interoperable anyway
1394: *
1395: * Returns 1 if correct 0 otherwise
1396: **/
1397: int
1398: xmlCheckLanguageID(const xmlChar * lang)
1399: {
1400: const xmlChar *cur = lang, *nxt;
1401:
1402: if (cur == NULL)
1403: return (0);
1404: if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405: ((cur[0] == 'I') && (cur[1] == '-')) ||
1406: ((cur[0] == 'x') && (cur[1] == '-')) ||
1407: ((cur[0] == 'X') && (cur[1] == '-'))) {
1408: /*
1409: * Still allow IANA code and user code which were coming
1410: * from the previous version of the XML-1.0 specification
1411: * it's deprecated but we should not fail
1412: */
1413: cur += 2;
1414: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415: ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416: cur++;
1417: return(cur[0] == 0);
1418: }
1419: nxt = cur;
1420: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422: nxt++;
1423: if (nxt - cur >= 4) {
1424: /*
1425: * Reserved
1426: */
1427: if ((nxt - cur > 8) || (nxt[0] != 0))
1428: return(0);
1429: return(1);
1430: }
1431: if (nxt - cur < 2)
1432: return(0);
1433: /* we got an ISO 639 code */
1434: if (nxt[0] == 0)
1435: return(1);
1436: if (nxt[0] != '-')
1437: return(0);
1438:
1439: nxt++;
1440: cur = nxt;
1441: /* now we can have extlang or script or region or variant */
1442: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443: goto region_m49;
1444:
1445: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447: nxt++;
1448: if (nxt - cur == 4)
1449: goto script;
1450: if (nxt - cur == 2)
1451: goto region;
1452: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453: goto variant;
1454: if (nxt - cur != 3)
1455: return(0);
1456: /* we parsed an extlang */
1457: if (nxt[0] == 0)
1458: return(1);
1459: if (nxt[0] != '-')
1460: return(0);
1461:
1462: nxt++;
1463: cur = nxt;
1464: /* now we can have script or region or variant */
1465: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466: goto region_m49;
1467:
1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470: nxt++;
1471: if (nxt - cur == 2)
1472: goto region;
1473: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474: goto variant;
1475: if (nxt - cur != 4)
1476: return(0);
1477: /* we parsed a script */
1478: script:
1479: if (nxt[0] == 0)
1480: return(1);
1481: if (nxt[0] != '-')
1482: return(0);
1483:
1484: nxt++;
1485: cur = nxt;
1486: /* now we can have region or variant */
1487: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488: goto region_m49;
1489:
1490: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492: nxt++;
1493:
1494: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495: goto variant;
1496: if (nxt - cur != 2)
1497: return(0);
1498: /* we parsed a region */
1499: region:
1500: if (nxt[0] == 0)
1501: return(1);
1502: if (nxt[0] != '-')
1503: return(0);
1504:
1505: nxt++;
1506: cur = nxt;
1507: /* now we can just have a variant */
1508: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510: nxt++;
1511:
1512: if ((nxt - cur < 5) || (nxt - cur > 8))
1513: return(0);
1514:
1515: /* we parsed a variant */
1516: variant:
1517: if (nxt[0] == 0)
1518: return(1);
1519: if (nxt[0] != '-')
1520: return(0);
1521: /* extensions and private use subtags not checked */
1522: return (1);
1523:
1524: region_m49:
1525: if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526: ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527: nxt += 3;
1528: goto region;
1529: }
1530: return(0);
1531: }
1532:
1533: /************************************************************************
1534: * *
1535: * Parser stacks related functions and macros *
1536: * *
1537: ************************************************************************/
1538:
1539: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540: const xmlChar ** str);
1541:
1542: #ifdef SAX2
1543: /**
1544: * nsPush:
1545: * @ctxt: an XML parser context
1546: * @prefix: the namespace prefix or NULL
1547: * @URL: the namespace name
1548: *
1549: * Pushes a new parser namespace on top of the ns stack
1550: *
1551: * Returns -1 in case of error, -2 if the namespace should be discarded
1552: * and the index in the stack otherwise.
1553: */
1554: static int
1555: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556: {
1557: if (ctxt->options & XML_PARSE_NSCLEAN) {
1558: int i;
1.1.1.3 ! misho 1559: for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1.1 misho 1560: if (ctxt->nsTab[i] == prefix) {
1561: /* in scope */
1562: if (ctxt->nsTab[i + 1] == URL)
1563: return(-2);
1564: /* out of scope keep it */
1565: break;
1566: }
1567: }
1568: }
1569: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570: ctxt->nsMax = 10;
1571: ctxt->nsNr = 0;
1572: ctxt->nsTab = (const xmlChar **)
1573: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574: if (ctxt->nsTab == NULL) {
1575: xmlErrMemory(ctxt, NULL);
1576: ctxt->nsMax = 0;
1577: return (-1);
1578: }
1579: } else if (ctxt->nsNr >= ctxt->nsMax) {
1580: const xmlChar ** tmp;
1581: ctxt->nsMax *= 2;
1582: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583: ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584: if (tmp == NULL) {
1585: xmlErrMemory(ctxt, NULL);
1586: ctxt->nsMax /= 2;
1587: return (-1);
1588: }
1589: ctxt->nsTab = tmp;
1590: }
1591: ctxt->nsTab[ctxt->nsNr++] = prefix;
1592: ctxt->nsTab[ctxt->nsNr++] = URL;
1593: return (ctxt->nsNr);
1594: }
1595: /**
1596: * nsPop:
1597: * @ctxt: an XML parser context
1598: * @nr: the number to pop
1599: *
1600: * Pops the top @nr parser prefix/namespace from the ns stack
1601: *
1602: * Returns the number of namespaces removed
1603: */
1604: static int
1605: nsPop(xmlParserCtxtPtr ctxt, int nr)
1606: {
1607: int i;
1608:
1609: if (ctxt->nsTab == NULL) return(0);
1610: if (ctxt->nsNr < nr) {
1611: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612: nr = ctxt->nsNr;
1613: }
1614: if (ctxt->nsNr <= 0)
1615: return (0);
1616:
1617: for (i = 0;i < nr;i++) {
1618: ctxt->nsNr--;
1619: ctxt->nsTab[ctxt->nsNr] = NULL;
1620: }
1621: return(nr);
1622: }
1623: #endif
1624:
1625: static int
1626: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627: const xmlChar **atts;
1628: int *attallocs;
1629: int maxatts;
1630:
1631: if (ctxt->atts == NULL) {
1632: maxatts = 55; /* allow for 10 attrs by default */
1633: atts = (const xmlChar **)
1634: xmlMalloc(maxatts * sizeof(xmlChar *));
1635: if (atts == NULL) goto mem_error;
1636: ctxt->atts = atts;
1637: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638: if (attallocs == NULL) goto mem_error;
1639: ctxt->attallocs = attallocs;
1640: ctxt->maxatts = maxatts;
1641: } else if (nr + 5 > ctxt->maxatts) {
1642: maxatts = (nr + 5) * 2;
1643: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644: maxatts * sizeof(const xmlChar *));
1645: if (atts == NULL) goto mem_error;
1646: ctxt->atts = atts;
1647: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648: (maxatts / 5) * sizeof(int));
1649: if (attallocs == NULL) goto mem_error;
1650: ctxt->attallocs = attallocs;
1651: ctxt->maxatts = maxatts;
1652: }
1653: return(ctxt->maxatts);
1654: mem_error:
1655: xmlErrMemory(ctxt, NULL);
1656: return(-1);
1657: }
1658:
1659: /**
1660: * inputPush:
1661: * @ctxt: an XML parser context
1662: * @value: the parser input
1663: *
1664: * Pushes a new parser input on top of the input stack
1665: *
1666: * Returns -1 in case of error, the index in the stack otherwise
1667: */
1668: int
1669: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670: {
1671: if ((ctxt == NULL) || (value == NULL))
1672: return(-1);
1673: if (ctxt->inputNr >= ctxt->inputMax) {
1674: ctxt->inputMax *= 2;
1675: ctxt->inputTab =
1676: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677: ctxt->inputMax *
1678: sizeof(ctxt->inputTab[0]));
1679: if (ctxt->inputTab == NULL) {
1680: xmlErrMemory(ctxt, NULL);
1681: xmlFreeInputStream(value);
1682: ctxt->inputMax /= 2;
1683: value = NULL;
1684: return (-1);
1685: }
1686: }
1687: ctxt->inputTab[ctxt->inputNr] = value;
1688: ctxt->input = value;
1689: return (ctxt->inputNr++);
1690: }
1691: /**
1692: * inputPop:
1693: * @ctxt: an XML parser context
1694: *
1695: * Pops the top parser input from the input stack
1696: *
1697: * Returns the input just removed
1698: */
1699: xmlParserInputPtr
1700: inputPop(xmlParserCtxtPtr ctxt)
1701: {
1702: xmlParserInputPtr ret;
1703:
1704: if (ctxt == NULL)
1705: return(NULL);
1706: if (ctxt->inputNr <= 0)
1707: return (NULL);
1708: ctxt->inputNr--;
1709: if (ctxt->inputNr > 0)
1710: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711: else
1712: ctxt->input = NULL;
1713: ret = ctxt->inputTab[ctxt->inputNr];
1714: ctxt->inputTab[ctxt->inputNr] = NULL;
1715: return (ret);
1716: }
1717: /**
1718: * nodePush:
1719: * @ctxt: an XML parser context
1720: * @value: the element node
1721: *
1722: * Pushes a new element node on top of the node stack
1723: *
1724: * Returns -1 in case of error, the index in the stack otherwise
1725: */
1726: int
1727: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728: {
1729: if (ctxt == NULL) return(0);
1730: if (ctxt->nodeNr >= ctxt->nodeMax) {
1731: xmlNodePtr *tmp;
1732:
1733: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734: ctxt->nodeMax * 2 *
1735: sizeof(ctxt->nodeTab[0]));
1736: if (tmp == NULL) {
1737: xmlErrMemory(ctxt, NULL);
1738: return (-1);
1739: }
1740: ctxt->nodeTab = tmp;
1741: ctxt->nodeMax *= 2;
1742: }
1743: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1747: xmlParserMaxDepth);
1748: ctxt->instate = XML_PARSER_EOF;
1749: return(-1);
1750: }
1751: ctxt->nodeTab[ctxt->nodeNr] = value;
1752: ctxt->node = value;
1753: return (ctxt->nodeNr++);
1754: }
1755:
1756: /**
1757: * nodePop:
1758: * @ctxt: an XML parser context
1759: *
1760: * Pops the top element node from the node stack
1761: *
1762: * Returns the node just removed
1763: */
1764: xmlNodePtr
1765: nodePop(xmlParserCtxtPtr ctxt)
1766: {
1767: xmlNodePtr ret;
1768:
1769: if (ctxt == NULL) return(NULL);
1770: if (ctxt->nodeNr <= 0)
1771: return (NULL);
1772: ctxt->nodeNr--;
1773: if (ctxt->nodeNr > 0)
1774: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775: else
1776: ctxt->node = NULL;
1777: ret = ctxt->nodeTab[ctxt->nodeNr];
1778: ctxt->nodeTab[ctxt->nodeNr] = NULL;
1779: return (ret);
1780: }
1781:
1782: #ifdef LIBXML_PUSH_ENABLED
1783: /**
1784: * nameNsPush:
1785: * @ctxt: an XML parser context
1786: * @value: the element name
1787: * @prefix: the element prefix
1788: * @URI: the element namespace name
1789: *
1790: * Pushes a new element name/prefix/URL on top of the name stack
1791: *
1792: * Returns -1 in case of error, the index in the stack otherwise
1793: */
1794: static int
1795: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796: const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797: {
1798: if (ctxt->nameNr >= ctxt->nameMax) {
1799: const xmlChar * *tmp;
1800: void **tmp2;
1801: ctxt->nameMax *= 2;
1802: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803: ctxt->nameMax *
1804: sizeof(ctxt->nameTab[0]));
1805: if (tmp == NULL) {
1806: ctxt->nameMax /= 2;
1807: goto mem_error;
1808: }
1809: ctxt->nameTab = tmp;
1810: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811: ctxt->nameMax * 3 *
1812: sizeof(ctxt->pushTab[0]));
1813: if (tmp2 == NULL) {
1814: ctxt->nameMax /= 2;
1815: goto mem_error;
1816: }
1817: ctxt->pushTab = tmp2;
1818: }
1819: ctxt->nameTab[ctxt->nameNr] = value;
1820: ctxt->name = value;
1821: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824: return (ctxt->nameNr++);
1825: mem_error:
1826: xmlErrMemory(ctxt, NULL);
1827: return (-1);
1828: }
1829: /**
1830: * nameNsPop:
1831: * @ctxt: an XML parser context
1832: *
1833: * Pops the top element/prefix/URI name from the name stack
1834: *
1835: * Returns the name just removed
1836: */
1837: static const xmlChar *
1838: nameNsPop(xmlParserCtxtPtr ctxt)
1839: {
1840: const xmlChar *ret;
1841:
1842: if (ctxt->nameNr <= 0)
1843: return (NULL);
1844: ctxt->nameNr--;
1845: if (ctxt->nameNr > 0)
1846: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847: else
1848: ctxt->name = NULL;
1849: ret = ctxt->nameTab[ctxt->nameNr];
1850: ctxt->nameTab[ctxt->nameNr] = NULL;
1851: return (ret);
1852: }
1853: #endif /* LIBXML_PUSH_ENABLED */
1854:
1855: /**
1856: * namePush:
1857: * @ctxt: an XML parser context
1858: * @value: the element name
1859: *
1860: * Pushes a new element name on top of the name stack
1861: *
1862: * Returns -1 in case of error, the index in the stack otherwise
1863: */
1864: int
1865: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1866: {
1867: if (ctxt == NULL) return (-1);
1868:
1869: if (ctxt->nameNr >= ctxt->nameMax) {
1870: const xmlChar * *tmp;
1871: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1.1.1.2 misho 1872: ctxt->nameMax * 2 *
1.1 misho 1873: sizeof(ctxt->nameTab[0]));
1874: if (tmp == NULL) {
1875: goto mem_error;
1876: }
1877: ctxt->nameTab = tmp;
1.1.1.2 misho 1878: ctxt->nameMax *= 2;
1.1 misho 1879: }
1880: ctxt->nameTab[ctxt->nameNr] = value;
1881: ctxt->name = value;
1882: return (ctxt->nameNr++);
1883: mem_error:
1884: xmlErrMemory(ctxt, NULL);
1885: return (-1);
1886: }
1887: /**
1888: * namePop:
1889: * @ctxt: an XML parser context
1890: *
1891: * Pops the top element name from the name stack
1892: *
1893: * Returns the name just removed
1894: */
1895: const xmlChar *
1896: namePop(xmlParserCtxtPtr ctxt)
1897: {
1898: const xmlChar *ret;
1899:
1900: if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901: return (NULL);
1902: ctxt->nameNr--;
1903: if (ctxt->nameNr > 0)
1904: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905: else
1906: ctxt->name = NULL;
1907: ret = ctxt->nameTab[ctxt->nameNr];
1908: ctxt->nameTab[ctxt->nameNr] = NULL;
1909: return (ret);
1910: }
1911:
1912: static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913: if (ctxt->spaceNr >= ctxt->spaceMax) {
1914: int *tmp;
1915:
1916: ctxt->spaceMax *= 2;
1917: tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919: if (tmp == NULL) {
1920: xmlErrMemory(ctxt, NULL);
1921: ctxt->spaceMax /=2;
1922: return(-1);
1923: }
1924: ctxt->spaceTab = tmp;
1925: }
1926: ctxt->spaceTab[ctxt->spaceNr] = val;
1927: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928: return(ctxt->spaceNr++);
1929: }
1930:
1931: static int spacePop(xmlParserCtxtPtr ctxt) {
1932: int ret;
1933: if (ctxt->spaceNr <= 0) return(0);
1934: ctxt->spaceNr--;
1935: if (ctxt->spaceNr > 0)
1936: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937: else
1938: ctxt->space = &ctxt->spaceTab[0];
1939: ret = ctxt->spaceTab[ctxt->spaceNr];
1940: ctxt->spaceTab[ctxt->spaceNr] = -1;
1941: return(ret);
1942: }
1943:
1944: /*
1945: * Macros for accessing the content. Those should be used only by the parser,
1946: * and not exported.
1947: *
1948: * Dirty macros, i.e. one often need to make assumption on the context to
1949: * use them
1950: *
1951: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952: * To be used with extreme caution since operations consuming
1953: * characters may move the input buffer to a different location !
1954: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955: * This should be used internally by the parser
1956: * only to compare to ASCII values otherwise it would break when
1957: * running with UTF-8 encoding.
1958: * RAW same as CUR but in the input buffer, bypass any token
1959: * extraction that may have been done
1960: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961: * to compare on ASCII based substring.
1962: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963: * strings without newlines within the parser.
1.1.1.3 ! misho 1964: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1.1 misho 1965: * defined char within the parser.
1966: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967: *
1968: * NEXT Skip to the next character, this does the proper decoding
1969: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970: * NEXTL(l) Skip the current unicode character of l xmlChars long.
1971: * CUR_CHAR(l) returns the current unicode character (int), set l
1972: * to the number of xmlChars used for the encoding [0-5].
1973: * CUR_SCHAR same but operate on a string instead of the context
1974: * COPY_BUF copy the current unicode char to the target buffer, increment
1975: * the index
1976: * GROW, SHRINK handling of input buffers
1977: */
1978:
1979: #define RAW (*ctxt->input->cur)
1980: #define CUR (*ctxt->input->cur)
1981: #define NXT(val) ctxt->input->cur[(val)]
1982: #define CUR_PTR ctxt->input->cur
1983:
1984: #define CMP4( s, c1, c2, c3, c4 ) \
1985: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987: #define CMP5( s, c1, c2, c3, c4, c5 ) \
1988: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997: ((unsigned char *) s)[ 8 ] == c9 )
1998: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000: ((unsigned char *) s)[ 9 ] == c10 )
2001:
2002: #define SKIP(val) do { \
2003: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005: if ((*ctxt->input->cur == 0) && \
2006: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007: xmlPopInput(ctxt); \
2008: } while (0)
2009:
2010: #define SKIPL(val) do { \
2011: int skipl; \
2012: for(skipl=0; skipl<val; skipl++) { \
1.1.1.3 ! misho 2013: if (*(ctxt->input->cur) == '\n') { \
1.1 misho 2014: ctxt->input->line++; ctxt->input->col = 1; \
1.1.1.3 ! misho 2015: } else ctxt->input->col++; \
! 2016: ctxt->nbChars++; \
1.1 misho 2017: ctxt->input->cur++; \
2018: } \
2019: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020: if ((*ctxt->input->cur == 0) && \
2021: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022: xmlPopInput(ctxt); \
2023: } while (0)
2024:
2025: #define SHRINK if ((ctxt->progressive == 0) && \
2026: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2028: xmlSHRINK (ctxt);
2029:
2030: static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031: xmlParserInputShrink(ctxt->input);
2032: if ((*ctxt->input->cur == 0) &&
2033: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034: xmlPopInput(ctxt);
2035: }
2036:
2037: #define GROW if ((ctxt->progressive == 0) && \
2038: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2039: xmlGROW (ctxt);
2040:
2041: static void xmlGROW (xmlParserCtxtPtr ctxt) {
1.1.1.3 ! misho 2042: if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
! 2043: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
! 2044: ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
! 2045: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 2046: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
! 2047: ctxt->instate = XML_PARSER_EOF;
! 2048: }
1.1 misho 2049: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052: xmlPopInput(ctxt);
2053: }
2054:
2055: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056:
2057: #define NEXT xmlNextChar(ctxt)
2058:
2059: #define NEXT1 { \
2060: ctxt->input->col++; \
2061: ctxt->input->cur++; \
2062: ctxt->nbChars++; \
2063: if (*ctxt->input->cur == 0) \
2064: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065: }
2066:
2067: #define NEXTL(l) do { \
2068: if (*(ctxt->input->cur) == '\n') { \
2069: ctxt->input->line++; ctxt->input->col = 1; \
2070: } else ctxt->input->col++; \
2071: ctxt->input->cur += l; \
2072: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2073: } while (0)
2074:
2075: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077:
2078: #define COPY_BUF(l,b,i,v) \
2079: if (l == 1) b[i++] = (xmlChar) v; \
2080: else i += xmlCopyCharMultiByte(&b[i],v)
2081:
2082: /**
2083: * xmlSkipBlankChars:
2084: * @ctxt: the XML parser context
2085: *
2086: * skip all blanks character found at that point in the input streams.
2087: * It pops up finished entities in the process if allowable at that point.
2088: *
2089: * Returns the number of space chars skipped
2090: */
2091:
2092: int
2093: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2094: int res = 0;
2095:
2096: /*
2097: * It's Okay to use CUR/NEXT here since all the blanks are on
2098: * the ASCII range.
2099: */
2100: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101: const xmlChar *cur;
2102: /*
2103: * if we are in the document content, go really fast
2104: */
2105: cur = ctxt->input->cur;
2106: while (IS_BLANK_CH(*cur)) {
2107: if (*cur == '\n') {
2108: ctxt->input->line++; ctxt->input->col = 1;
2109: }
2110: cur++;
2111: res++;
2112: if (*cur == 0) {
2113: ctxt->input->cur = cur;
2114: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115: cur = ctxt->input->cur;
2116: }
2117: }
2118: ctxt->input->cur = cur;
2119: } else {
2120: int cur;
2121: do {
2122: cur = CUR;
2123: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2124: NEXT;
2125: cur = CUR;
2126: res++;
2127: }
2128: while ((cur == 0) && (ctxt->inputNr > 1) &&
2129: (ctxt->instate != XML_PARSER_COMMENT)) {
2130: xmlPopInput(ctxt);
2131: cur = CUR;
2132: }
2133: /*
2134: * Need to handle support of entities branching here
2135: */
2136: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138: }
2139: return(res);
2140: }
2141:
2142: /************************************************************************
2143: * *
2144: * Commodity functions to handle entities *
2145: * *
2146: ************************************************************************/
2147:
2148: /**
2149: * xmlPopInput:
2150: * @ctxt: an XML parser context
2151: *
2152: * xmlPopInput: the current input pointed by ctxt->input came to an end
2153: * pop it and return the next char.
2154: *
2155: * Returns the current xmlChar in the parser context
2156: */
2157: xmlChar
2158: xmlPopInput(xmlParserCtxtPtr ctxt) {
2159: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160: if (xmlParserDebugEntities)
2161: xmlGenericError(xmlGenericErrorContext,
2162: "Popping input %d\n", ctxt->inputNr);
2163: xmlFreeInputStream(inputPop(ctxt));
2164: if ((*ctxt->input->cur == 0) &&
2165: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166: return(xmlPopInput(ctxt));
2167: return(CUR);
2168: }
2169:
2170: /**
2171: * xmlPushInput:
2172: * @ctxt: an XML parser context
2173: * @input: an XML parser input fragment (entity, XML fragment ...).
2174: *
2175: * xmlPushInput: switch to a new input stream which is stacked on top
2176: * of the previous one(s).
2177: * Returns -1 in case of error or the index in the input stack
2178: */
2179: int
2180: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2181: int ret;
2182: if (input == NULL) return(-1);
2183:
2184: if (xmlParserDebugEntities) {
2185: if ((ctxt->input != NULL) && (ctxt->input->filename))
2186: xmlGenericError(xmlGenericErrorContext,
2187: "%s(%d): ", ctxt->input->filename,
2188: ctxt->input->line);
2189: xmlGenericError(xmlGenericErrorContext,
2190: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191: }
2192: ret = inputPush(ctxt, input);
1.1.1.3 ! misho 2193: if (ctxt->instate == XML_PARSER_EOF)
! 2194: return(-1);
1.1 misho 2195: GROW;
2196: return(ret);
2197: }
2198:
2199: /**
2200: * xmlParseCharRef:
2201: * @ctxt: an XML parser context
2202: *
2203: * parse Reference declarations
2204: *
2205: * [66] CharRef ::= '&#' [0-9]+ ';' |
2206: * '&#x' [0-9a-fA-F]+ ';'
2207: *
2208: * [ WFC: Legal Character ]
2209: * Characters referred to using character references must match the
1.1.1.3 ! misho 2210: * production for Char.
1.1 misho 2211: *
2212: * Returns the value parsed (as an int), 0 in case of error
2213: */
2214: int
2215: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216: unsigned int val = 0;
2217: int count = 0;
2218: unsigned int outofrange = 0;
2219:
2220: /*
2221: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222: */
2223: if ((RAW == '&') && (NXT(1) == '#') &&
2224: (NXT(2) == 'x')) {
2225: SKIP(3);
2226: GROW;
2227: while (RAW != ';') { /* loop blocked by count */
2228: if (count++ > 20) {
2229: count = 0;
2230: GROW;
1.1.1.3 ! misho 2231: if (ctxt->instate == XML_PARSER_EOF)
! 2232: return(0);
1.1 misho 2233: }
1.1.1.3 ! misho 2234: if ((RAW >= '0') && (RAW <= '9'))
1.1 misho 2235: val = val * 16 + (CUR - '0');
2236: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237: val = val * 16 + (CUR - 'a') + 10;
2238: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239: val = val * 16 + (CUR - 'A') + 10;
2240: else {
2241: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2242: val = 0;
2243: break;
2244: }
2245: if (val > 0x10FFFF)
2246: outofrange = val;
2247:
2248: NEXT;
2249: count++;
2250: }
2251: if (RAW == ';') {
2252: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2253: ctxt->input->col++;
2254: ctxt->nbChars ++;
2255: ctxt->input->cur++;
2256: }
2257: } else if ((RAW == '&') && (NXT(1) == '#')) {
2258: SKIP(2);
2259: GROW;
2260: while (RAW != ';') { /* loop blocked by count */
2261: if (count++ > 20) {
2262: count = 0;
2263: GROW;
1.1.1.3 ! misho 2264: if (ctxt->instate == XML_PARSER_EOF)
! 2265: return(0);
1.1 misho 2266: }
1.1.1.3 ! misho 2267: if ((RAW >= '0') && (RAW <= '9'))
1.1 misho 2268: val = val * 10 + (CUR - '0');
2269: else {
2270: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2271: val = 0;
2272: break;
2273: }
2274: if (val > 0x10FFFF)
2275: outofrange = val;
2276:
2277: NEXT;
2278: count++;
2279: }
2280: if (RAW == ';') {
2281: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2282: ctxt->input->col++;
2283: ctxt->nbChars ++;
2284: ctxt->input->cur++;
2285: }
2286: } else {
2287: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2288: }
2289:
2290: /*
2291: * [ WFC: Legal Character ]
2292: * Characters referred to using character references must match the
1.1.1.3 ! misho 2293: * production for Char.
1.1 misho 2294: */
2295: if ((IS_CHAR(val) && (outofrange == 0))) {
2296: return(val);
2297: } else {
2298: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299: "xmlParseCharRef: invalid xmlChar value %d\n",
2300: val);
2301: }
2302: return(0);
2303: }
2304:
2305: /**
2306: * xmlParseStringCharRef:
2307: * @ctxt: an XML parser context
2308: * @str: a pointer to an index in the string
2309: *
2310: * parse Reference declarations, variant parsing from a string rather
2311: * than an an input flow.
2312: *
2313: * [66] CharRef ::= '&#' [0-9]+ ';' |
2314: * '&#x' [0-9a-fA-F]+ ';'
2315: *
2316: * [ WFC: Legal Character ]
2317: * Characters referred to using character references must match the
1.1.1.3 ! misho 2318: * production for Char.
1.1 misho 2319: *
2320: * Returns the value parsed (as an int), 0 in case of error, str will be
2321: * updated to the current value of the index
2322: */
2323: static int
2324: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325: const xmlChar *ptr;
2326: xmlChar cur;
2327: unsigned int val = 0;
2328: unsigned int outofrange = 0;
2329:
2330: if ((str == NULL) || (*str == NULL)) return(0);
2331: ptr = *str;
2332: cur = *ptr;
2333: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334: ptr += 3;
2335: cur = *ptr;
2336: while (cur != ';') { /* Non input consuming loop */
1.1.1.3 ! misho 2337: if ((cur >= '0') && (cur <= '9'))
1.1 misho 2338: val = val * 16 + (cur - '0');
2339: else if ((cur >= 'a') && (cur <= 'f'))
2340: val = val * 16 + (cur - 'a') + 10;
2341: else if ((cur >= 'A') && (cur <= 'F'))
2342: val = val * 16 + (cur - 'A') + 10;
2343: else {
2344: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2345: val = 0;
2346: break;
2347: }
2348: if (val > 0x10FFFF)
2349: outofrange = val;
2350:
2351: ptr++;
2352: cur = *ptr;
2353: }
2354: if (cur == ';')
2355: ptr++;
2356: } else if ((cur == '&') && (ptr[1] == '#')){
2357: ptr += 2;
2358: cur = *ptr;
2359: while (cur != ';') { /* Non input consuming loops */
1.1.1.3 ! misho 2360: if ((cur >= '0') && (cur <= '9'))
1.1 misho 2361: val = val * 10 + (cur - '0');
2362: else {
2363: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2364: val = 0;
2365: break;
2366: }
2367: if (val > 0x10FFFF)
2368: outofrange = val;
2369:
2370: ptr++;
2371: cur = *ptr;
2372: }
2373: if (cur == ';')
2374: ptr++;
2375: } else {
2376: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2377: return(0);
2378: }
2379: *str = ptr;
2380:
2381: /*
2382: * [ WFC: Legal Character ]
2383: * Characters referred to using character references must match the
1.1.1.3 ! misho 2384: * production for Char.
1.1 misho 2385: */
2386: if ((IS_CHAR(val) && (outofrange == 0))) {
2387: return(val);
2388: } else {
2389: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390: "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391: val);
2392: }
2393: return(0);
2394: }
2395:
2396: /**
2397: * xmlNewBlanksWrapperInputStream:
2398: * @ctxt: an XML parser context
2399: * @entity: an Entity pointer
2400: *
2401: * Create a new input stream for wrapping
2402: * blanks around a PEReference
2403: *
2404: * Returns the new input stream or NULL
2405: */
1.1.1.3 ! misho 2406:
1.1 misho 2407: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1.1.1.3 ! misho 2408:
1.1 misho 2409: static xmlParserInputPtr
2410: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411: xmlParserInputPtr input;
2412: xmlChar *buffer;
2413: size_t length;
2414: if (entity == NULL) {
2415: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416: "xmlNewBlanksWrapperInputStream entity\n");
2417: return(NULL);
2418: }
2419: if (xmlParserDebugEntities)
2420: xmlGenericError(xmlGenericErrorContext,
2421: "new blanks wrapper for entity: %s\n", entity->name);
2422: input = xmlNewInputStream(ctxt);
2423: if (input == NULL) {
2424: return(NULL);
2425: }
2426: length = xmlStrlen(entity->name) + 5;
2427: buffer = xmlMallocAtomic(length);
2428: if (buffer == NULL) {
2429: xmlErrMemory(ctxt, NULL);
2430: xmlFree(input);
1.1.1.3 ! misho 2431: return(NULL);
1.1 misho 2432: }
2433: buffer [0] = ' ';
2434: buffer [1] = '%';
2435: buffer [length-3] = ';';
2436: buffer [length-2] = ' ';
2437: buffer [length-1] = 0;
2438: memcpy(buffer + 2, entity->name, length - 5);
2439: input->free = deallocblankswrapper;
2440: input->base = buffer;
2441: input->cur = buffer;
2442: input->length = length;
2443: input->end = &buffer[length];
2444: return(input);
2445: }
2446:
2447: /**
2448: * xmlParserHandlePEReference:
2449: * @ctxt: the parser context
1.1.1.3 ! misho 2450: *
1.1 misho 2451: * [69] PEReference ::= '%' Name ';'
2452: *
2453: * [ WFC: No Recursion ]
2454: * A parsed entity must not contain a recursive
1.1.1.3 ! misho 2455: * reference to itself, either directly or indirectly.
1.1 misho 2456: *
2457: * [ WFC: Entity Declared ]
2458: * In a document without any DTD, a document with only an internal DTD
2459: * subset which contains no parameter entity references, or a document
2460: * with "standalone='yes'", ... ... The declaration of a parameter
2461: * entity must precede any reference to it...
2462: *
2463: * [ VC: Entity Declared ]
2464: * In a document with an external subset or external parameter entities
2465: * with "standalone='no'", ... ... The declaration of a parameter entity
2466: * must precede any reference to it...
2467: *
2468: * [ WFC: In DTD ]
2469: * Parameter-entity references may only appear in the DTD.
2470: * NOTE: misleading but this is handled.
2471: *
2472: * A PEReference may have been detected in the current input stream
1.1.1.3 ! misho 2473: * the handling is done accordingly to
1.1 misho 2474: * http://www.w3.org/TR/REC-xml#entproc
1.1.1.3 ! misho 2475: * i.e.
1.1 misho 2476: * - Included in literal in entity values
2477: * - Included as Parameter Entity reference within DTDs
2478: */
2479: void
2480: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481: const xmlChar *name;
2482: xmlEntityPtr entity = NULL;
2483: xmlParserInputPtr input;
2484:
2485: if (RAW != '%') return;
2486: switch(ctxt->instate) {
2487: case XML_PARSER_CDATA_SECTION:
2488: return;
2489: case XML_PARSER_COMMENT:
2490: return;
2491: case XML_PARSER_START_TAG:
2492: return;
2493: case XML_PARSER_END_TAG:
2494: return;
2495: case XML_PARSER_EOF:
2496: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2497: return;
2498: case XML_PARSER_PROLOG:
2499: case XML_PARSER_START:
2500: case XML_PARSER_MISC:
2501: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2502: return;
2503: case XML_PARSER_ENTITY_DECL:
2504: case XML_PARSER_CONTENT:
2505: case XML_PARSER_ATTRIBUTE_VALUE:
2506: case XML_PARSER_PI:
2507: case XML_PARSER_SYSTEM_LITERAL:
2508: case XML_PARSER_PUBLIC_LITERAL:
2509: /* we just ignore it there */
2510: return;
2511: case XML_PARSER_EPILOG:
2512: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2513: return;
2514: case XML_PARSER_ENTITY_VALUE:
2515: /*
2516: * NOTE: in the case of entity values, we don't do the
2517: * substitution here since we need the literal
2518: * entity value to be able to save the internal
2519: * subset of the document.
2520: * This will be handled by xmlStringDecodeEntities
2521: */
2522: return;
2523: case XML_PARSER_DTD:
2524: /*
2525: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526: * In the internal DTD subset, parameter-entity references
2527: * can occur only where markup declarations can occur, not
2528: * within markup declarations.
2529: * In that case this is handled in xmlParseMarkupDecl
2530: */
2531: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532: return;
2533: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2534: return;
2535: break;
2536: case XML_PARSER_IGNORE:
2537: return;
2538: }
2539:
2540: NEXT;
2541: name = xmlParseName(ctxt);
2542: if (xmlParserDebugEntities)
2543: xmlGenericError(xmlGenericErrorContext,
2544: "PEReference: %s\n", name);
2545: if (name == NULL) {
2546: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2547: } else {
2548: if (RAW == ';') {
2549: NEXT;
2550: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.1.1.3 ! misho 2552: if (ctxt->instate == XML_PARSER_EOF)
! 2553: return;
1.1 misho 2554: if (entity == NULL) {
1.1.1.3 ! misho 2555:
1.1 misho 2556: /*
2557: * [ WFC: Entity Declared ]
2558: * In a document without any DTD, a document with only an
2559: * internal DTD subset which contains no parameter entity
2560: * references, or a document with "standalone='yes'", ...
2561: * ... The declaration of a parameter entity must precede
2562: * any reference to it...
2563: */
2564: if ((ctxt->standalone == 1) ||
2565: ((ctxt->hasExternalSubset == 0) &&
2566: (ctxt->hasPErefs == 0))) {
2567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568: "PEReference: %%%s; not found\n", name);
2569: } else {
2570: /*
2571: * [ VC: Entity Declared ]
2572: * In a document with an external subset or external
2573: * parameter entities with "standalone='no'", ...
2574: * ... The declaration of a parameter entity must precede
2575: * any reference to it...
2576: */
2577: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579: "PEReference: %%%s; not found\n",
2580: name, NULL);
1.1.1.3 ! misho 2581: } else
1.1 misho 2582: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583: "PEReference: %%%s; not found\n",
2584: name, NULL);
2585: ctxt->valid = 0;
2586: }
2587: } else if (ctxt->input->free != deallocblankswrapper) {
2588: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589: if (xmlPushInput(ctxt, input) < 0)
2590: return;
2591: } else {
2592: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2594: xmlChar start[4];
2595: xmlCharEncoding enc;
2596:
2597: /*
2598: * handle the extra spaces added before and after
2599: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2600: * this is done independently.
2601: */
2602: input = xmlNewEntityInputStream(ctxt, entity);
2603: if (xmlPushInput(ctxt, input) < 0)
2604: return;
2605:
1.1.1.3 ! misho 2606: /*
1.1 misho 2607: * Get the 4 first bytes and decode the charset
2608: * if enc != XML_CHAR_ENCODING_NONE
2609: * plug some encoding conversion routines.
2610: * Note that, since we may have some non-UTF8
2611: * encoding (like UTF16, bug 135229), the 'length'
2612: * is not known, but we can calculate based upon
2613: * the amount of data in the buffer.
2614: */
2615: GROW
1.1.1.3 ! misho 2616: if (ctxt->instate == XML_PARSER_EOF)
! 2617: return;
1.1 misho 2618: if ((ctxt->input->end - ctxt->input->cur)>=4) {
2619: start[0] = RAW;
2620: start[1] = NXT(1);
2621: start[2] = NXT(2);
2622: start[3] = NXT(3);
2623: enc = xmlDetectCharEncoding(start, 4);
2624: if (enc != XML_CHAR_ENCODING_NONE) {
2625: xmlSwitchEncoding(ctxt, enc);
2626: }
2627: }
2628:
2629: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2630: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631: (IS_BLANK_CH(NXT(5)))) {
2632: xmlParseTextDecl(ctxt);
2633: }
2634: } else {
2635: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636: "PEReference: %s is not a parameter entity\n",
2637: name);
2638: }
2639: }
2640: } else {
2641: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2642: }
2643: }
2644: }
2645:
2646: /*
2647: * Macro used to grow the current buffer.
1.1.1.3 ! misho 2648: * buffer##_size is expected to be a size_t
! 2649: * mem_error: is expected to handle memory allocation failures
1.1 misho 2650: */
2651: #define growBuffer(buffer, n) { \
2652: xmlChar *tmp; \
1.1.1.3 ! misho 2653: size_t new_size = buffer##_size * 2 + n; \
! 2654: if (new_size < buffer##_size) goto mem_error; \
! 2655: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
1.1 misho 2656: if (tmp == NULL) goto mem_error; \
2657: buffer = tmp; \
1.1.1.3 ! misho 2658: buffer##_size = new_size; \
1.1 misho 2659: }
2660:
2661: /**
2662: * xmlStringLenDecodeEntities:
2663: * @ctxt: the parser context
2664: * @str: the input string
2665: * @len: the string length
2666: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667: * @end: an end marker xmlChar, 0 if none
2668: * @end2: an end marker xmlChar, 0 if none
2669: * @end3: an end marker xmlChar, 0 if none
1.1.1.3 ! misho 2670: *
1.1 misho 2671: * Takes a entity string content and process to do the adequate substitutions.
2672: *
2673: * [67] Reference ::= EntityRef | CharRef
2674: *
2675: * [69] PEReference ::= '%' Name ';'
2676: *
2677: * Returns A newly allocated string with the substitution done. The caller
2678: * must deallocate it !
2679: */
2680: xmlChar *
2681: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682: int what, xmlChar end, xmlChar end2, xmlChar end3) {
2683: xmlChar *buffer = NULL;
1.1.1.3 ! misho 2684: size_t buffer_size = 0;
! 2685: size_t nbchars = 0;
1.1 misho 2686:
2687: xmlChar *current = NULL;
2688: xmlChar *rep = NULL;
2689: const xmlChar *last;
2690: xmlEntityPtr ent;
2691: int c,l;
2692:
2693: if ((ctxt == NULL) || (str == NULL) || (len < 0))
2694: return(NULL);
2695: last = str + len;
2696:
2697: if (((ctxt->depth > 40) &&
2698: ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699: (ctxt->depth > 1024)) {
2700: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2701: return(NULL);
2702: }
2703:
2704: /*
2705: * allocate a translation buffer.
2706: */
2707: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.1.1.3 ! misho 2708: buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
1.1 misho 2709: if (buffer == NULL) goto mem_error;
2710:
2711: /*
2712: * OK loop until we reach one of the ending char or a size limit.
2713: * we are operating on already parsed values.
2714: */
2715: if (str < last)
2716: c = CUR_SCHAR(str, l);
2717: else
2718: c = 0;
2719: while ((c != 0) && (c != end) && /* non input consuming loop */
2720: (c != end2) && (c != end3)) {
2721:
2722: if (c == 0) break;
2723: if ((c == '&') && (str[1] == '#')) {
2724: int val = xmlParseStringCharRef(ctxt, &str);
2725: if (val != 0) {
2726: COPY_BUF(0,buffer,nbchars,val);
2727: }
1.1.1.3 ! misho 2728: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2729: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2730: }
2731: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732: if (xmlParserDebugEntities)
2733: xmlGenericError(xmlGenericErrorContext,
2734: "String decoding Entity Reference: %.30s\n",
2735: str);
2736: ent = xmlParseStringEntityRef(ctxt, &str);
2737: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2739: goto int_error;
2740: if (ent != NULL)
1.1.1.3 ! misho 2741: ctxt->nbentities += ent->checked / 2;
1.1 misho 2742: if ((ent != NULL) &&
2743: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744: if (ent->content != NULL) {
2745: COPY_BUF(0,buffer,nbchars,ent->content[0]);
1.1.1.3 ! misho 2746: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2747: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2748: }
2749: } else {
2750: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751: "predefined entity has no content\n");
2752: }
2753: } else if ((ent != NULL) && (ent->content != NULL)) {
2754: ctxt->depth++;
2755: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2756: 0, 0, 0);
2757: ctxt->depth--;
2758:
2759: if (rep != NULL) {
2760: current = rep;
2761: while (*current != 0) { /* non input consuming loop */
2762: buffer[nbchars++] = *current++;
1.1.1.3 ! misho 2763: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2764: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2765: goto int_error;
2766: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2767: }
2768: }
2769: xmlFree(rep);
2770: rep = NULL;
2771: }
2772: } else if (ent != NULL) {
2773: int i = xmlStrlen(ent->name);
2774: const xmlChar *cur = ent->name;
2775:
2776: buffer[nbchars++] = '&';
1.1.1.3 ! misho 2777: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1.1.2 misho 2778: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
1.1 misho 2779: }
2780: for (;i > 0;i--)
2781: buffer[nbchars++] = *cur++;
2782: buffer[nbchars++] = ';';
2783: }
2784: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785: if (xmlParserDebugEntities)
2786: xmlGenericError(xmlGenericErrorContext,
2787: "String decoding PE Reference: %.30s\n", str);
2788: ent = xmlParseStringPEReference(ctxt, &str);
2789: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2790: goto int_error;
2791: if (ent != NULL)
1.1.1.3 ! misho 2792: ctxt->nbentities += ent->checked / 2;
1.1 misho 2793: if (ent != NULL) {
2794: if (ent->content == NULL) {
2795: xmlLoadEntityContent(ctxt, ent);
2796: }
2797: ctxt->depth++;
2798: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2799: 0, 0, 0);
2800: ctxt->depth--;
2801: if (rep != NULL) {
2802: current = rep;
2803: while (*current != 0) { /* non input consuming loop */
2804: buffer[nbchars++] = *current++;
1.1.1.3 ! misho 2805: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2806: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2807: goto int_error;
2808: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2809: }
2810: }
2811: xmlFree(rep);
2812: rep = NULL;
2813: }
2814: }
2815: } else {
2816: COPY_BUF(l,buffer,nbchars,c);
2817: str += l;
1.1.1.3 ! misho 2818: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2819: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
1.1 misho 2820: }
2821: }
2822: if (str < last)
2823: c = CUR_SCHAR(str, l);
2824: else
2825: c = 0;
2826: }
2827: buffer[nbchars] = 0;
2828: return(buffer);
2829:
2830: mem_error:
2831: xmlErrMemory(ctxt, NULL);
2832: int_error:
2833: if (rep != NULL)
2834: xmlFree(rep);
2835: if (buffer != NULL)
2836: xmlFree(buffer);
2837: return(NULL);
2838: }
2839:
2840: /**
2841: * xmlStringDecodeEntities:
2842: * @ctxt: the parser context
2843: * @str: the input string
2844: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845: * @end: an end marker xmlChar, 0 if none
2846: * @end2: an end marker xmlChar, 0 if none
2847: * @end3: an end marker xmlChar, 0 if none
1.1.1.3 ! misho 2848: *
1.1 misho 2849: * Takes a entity string content and process to do the adequate substitutions.
2850: *
2851: * [67] Reference ::= EntityRef | CharRef
2852: *
2853: * [69] PEReference ::= '%' Name ';'
2854: *
2855: * Returns A newly allocated string with the substitution done. The caller
2856: * must deallocate it !
2857: */
2858: xmlChar *
2859: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860: xmlChar end, xmlChar end2, xmlChar end3) {
2861: if ((ctxt == NULL) || (str == NULL)) return(NULL);
2862: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2863: end, end2, end3));
2864: }
2865:
2866: /************************************************************************
2867: * *
2868: * Commodity functions, cleanup needed ? *
2869: * *
2870: ************************************************************************/
2871:
2872: /**
2873: * areBlanks:
2874: * @ctxt: an XML parser context
2875: * @str: a xmlChar *
2876: * @len: the size of @str
2877: * @blank_chars: we know the chars are blanks
2878: *
2879: * Is this a sequence of blank chars that one can ignore ?
2880: *
2881: * Returns 1 if ignorable 0 otherwise.
2882: */
2883:
2884: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2885: int blank_chars) {
2886: int i, ret;
2887: xmlNodePtr lastChild;
2888:
2889: /*
2890: * Don't spend time trying to differentiate them, the same callback is
2891: * used !
2892: */
2893: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2894: return(0);
2895:
2896: /*
2897: * Check for xml:space value.
2898: */
2899: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900: (*(ctxt->space) == -2))
2901: return(0);
2902:
2903: /*
2904: * Check that the string is made of blanks
2905: */
2906: if (blank_chars == 0) {
2907: for (i = 0;i < len;i++)
2908: if (!(IS_BLANK_CH(str[i]))) return(0);
2909: }
2910:
2911: /*
2912: * Look if the element is mixed content in the DTD if available
2913: */
2914: if (ctxt->node == NULL) return(0);
2915: if (ctxt->myDoc != NULL) {
2916: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917: if (ret == 0) return(1);
2918: if (ret == 1) return(0);
2919: }
2920:
2921: /*
2922: * Otherwise, heuristic :-\
2923: */
2924: if ((RAW != '<') && (RAW != 0xD)) return(0);
2925: if ((ctxt->node->children == NULL) &&
2926: (RAW == '<') && (NXT(1) == '/')) return(0);
2927:
2928: lastChild = xmlGetLastChild(ctxt->node);
2929: if (lastChild == NULL) {
2930: if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931: (ctxt->node->content != NULL)) return(0);
2932: } else if (xmlNodeIsText(lastChild))
2933: return(0);
2934: else if ((ctxt->node->children != NULL) &&
2935: (xmlNodeIsText(ctxt->node->children)))
2936: return(0);
2937: return(1);
2938: }
2939:
2940: /************************************************************************
2941: * *
2942: * Extra stuff for namespace support *
2943: * Relates to http://www.w3.org/TR/WD-xml-names *
2944: * *
2945: ************************************************************************/
2946:
2947: /**
2948: * xmlSplitQName:
2949: * @ctxt: an XML parser context
2950: * @name: an XML parser context
2951: * @prefix: a xmlChar **
2952: *
2953: * parse an UTF8 encoded XML qualified name string
2954: *
2955: * [NS 5] QName ::= (Prefix ':')? LocalPart
2956: *
2957: * [NS 6] Prefix ::= NCName
2958: *
2959: * [NS 7] LocalPart ::= NCName
2960: *
2961: * Returns the local part, and prefix is updated
2962: * to get the Prefix if any.
2963: */
2964:
2965: xmlChar *
2966: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967: xmlChar buf[XML_MAX_NAMELEN + 5];
2968: xmlChar *buffer = NULL;
2969: int len = 0;
2970: int max = XML_MAX_NAMELEN;
2971: xmlChar *ret = NULL;
2972: const xmlChar *cur = name;
2973: int c;
2974:
2975: if (prefix == NULL) return(NULL);
2976: *prefix = NULL;
2977:
2978: if (cur == NULL) return(NULL);
2979:
2980: #ifndef XML_XML_NAMESPACE
2981: /* xml: prefix is not really a namespace */
2982: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983: (cur[2] == 'l') && (cur[3] == ':'))
2984: return(xmlStrdup(name));
2985: #endif
2986:
2987: /* nasty but well=formed */
2988: if (cur[0] == ':')
2989: return(xmlStrdup(name));
2990:
2991: c = *cur++;
2992: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2993: buf[len++] = c;
2994: c = *cur++;
2995: }
2996: if (len >= max) {
2997: /*
2998: * Okay someone managed to make a huge name, so he's ready to pay
2999: * for the processing speed.
3000: */
3001: max = len * 2;
3002:
3003: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004: if (buffer == NULL) {
3005: xmlErrMemory(ctxt, NULL);
3006: return(NULL);
3007: }
3008: memcpy(buffer, buf, len);
3009: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010: if (len + 10 > max) {
3011: xmlChar *tmp;
3012:
3013: max *= 2;
3014: tmp = (xmlChar *) xmlRealloc(buffer,
3015: max * sizeof(xmlChar));
3016: if (tmp == NULL) {
3017: xmlFree(buffer);
3018: xmlErrMemory(ctxt, NULL);
3019: return(NULL);
3020: }
3021: buffer = tmp;
3022: }
3023: buffer[len++] = c;
3024: c = *cur++;
3025: }
3026: buffer[len] = 0;
3027: }
3028:
3029: if ((c == ':') && (*cur == 0)) {
3030: if (buffer != NULL)
3031: xmlFree(buffer);
3032: *prefix = NULL;
3033: return(xmlStrdup(name));
3034: }
3035:
3036: if (buffer == NULL)
3037: ret = xmlStrndup(buf, len);
3038: else {
3039: ret = buffer;
3040: buffer = NULL;
3041: max = XML_MAX_NAMELEN;
3042: }
3043:
3044:
3045: if (c == ':') {
3046: c = *cur;
3047: *prefix = ret;
3048: if (c == 0) {
3049: return(xmlStrndup(BAD_CAST "", 0));
3050: }
3051: len = 0;
3052:
3053: /*
3054: * Check that the first character is proper to start
3055: * a new name
3056: */
3057: if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058: ((c >= 0x41) && (c <= 0x5A)) ||
3059: (c == '_') || (c == ':'))) {
3060: int l;
3061: int first = CUR_SCHAR(cur, l);
3062:
3063: if (!IS_LETTER(first) && (first != '_')) {
3064: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3065: "Name %s is not XML Namespace compliant\n",
3066: name);
3067: }
3068: }
3069: cur++;
3070:
3071: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3072: buf[len++] = c;
3073: c = *cur++;
3074: }
3075: if (len >= max) {
3076: /*
3077: * Okay someone managed to make a huge name, so he's ready to pay
3078: * for the processing speed.
3079: */
3080: max = len * 2;
3081:
3082: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3083: if (buffer == NULL) {
3084: xmlErrMemory(ctxt, NULL);
3085: return(NULL);
3086: }
3087: memcpy(buffer, buf, len);
3088: while (c != 0) { /* tested bigname2.xml */
3089: if (len + 10 > max) {
3090: xmlChar *tmp;
3091:
3092: max *= 2;
3093: tmp = (xmlChar *) xmlRealloc(buffer,
3094: max * sizeof(xmlChar));
3095: if (tmp == NULL) {
3096: xmlErrMemory(ctxt, NULL);
3097: xmlFree(buffer);
3098: return(NULL);
3099: }
3100: buffer = tmp;
3101: }
3102: buffer[len++] = c;
3103: c = *cur++;
3104: }
3105: buffer[len] = 0;
3106: }
3107:
3108: if (buffer == NULL)
3109: ret = xmlStrndup(buf, len);
3110: else {
3111: ret = buffer;
3112: }
3113: }
3114:
3115: return(ret);
3116: }
3117:
3118: /************************************************************************
3119: * *
3120: * The parser itself *
3121: * Relates to http://www.w3.org/TR/REC-xml *
3122: * *
3123: ************************************************************************/
3124:
3125: /************************************************************************
3126: * *
3127: * Routines to parse Name, NCName and NmToken *
3128: * *
3129: ************************************************************************/
3130: #ifdef DEBUG
3131: static unsigned long nbParseName = 0;
3132: static unsigned long nbParseNmToken = 0;
3133: static unsigned long nbParseNCName = 0;
3134: static unsigned long nbParseNCNameComplex = 0;
3135: static unsigned long nbParseNameComplex = 0;
3136: static unsigned long nbParseStringName = 0;
3137: #endif
3138:
3139: /*
3140: * The two following functions are related to the change of accepted
3141: * characters for Name and NmToken in the Revision 5 of XML-1.0
3142: * They correspond to the modified production [4] and the new production [4a]
3143: * changes in that revision. Also note that the macros used for the
3144: * productions Letter, Digit, CombiningChar and Extender are not needed
3145: * anymore.
3146: * We still keep compatibility to pre-revision5 parsing semantic if the
3147: * new XML_PARSE_OLD10 option is given to the parser.
3148: */
3149: static int
3150: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3152: /*
3153: * Use the new checks of production [4] [4a] amd [5] of the
3154: * Update 5 of XML-1.0
3155: */
3156: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157: (((c >= 'a') && (c <= 'z')) ||
3158: ((c >= 'A') && (c <= 'Z')) ||
3159: (c == '_') || (c == ':') ||
3160: ((c >= 0xC0) && (c <= 0xD6)) ||
3161: ((c >= 0xD8) && (c <= 0xF6)) ||
3162: ((c >= 0xF8) && (c <= 0x2FF)) ||
3163: ((c >= 0x370) && (c <= 0x37D)) ||
3164: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3165: ((c >= 0x200C) && (c <= 0x200D)) ||
3166: ((c >= 0x2070) && (c <= 0x218F)) ||
3167: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3169: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3170: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171: ((c >= 0x10000) && (c <= 0xEFFFF))))
3172: return(1);
3173: } else {
3174: if (IS_LETTER(c) || (c == '_') || (c == ':'))
3175: return(1);
3176: }
3177: return(0);
3178: }
3179:
3180: static int
3181: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183: /*
3184: * Use the new checks of production [4] [4a] amd [5] of the
3185: * Update 5 of XML-1.0
3186: */
3187: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188: (((c >= 'a') && (c <= 'z')) ||
3189: ((c >= 'A') && (c <= 'Z')) ||
3190: ((c >= '0') && (c <= '9')) || /* !start */
3191: (c == '_') || (c == ':') ||
3192: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193: ((c >= 0xC0) && (c <= 0xD6)) ||
3194: ((c >= 0xD8) && (c <= 0xF6)) ||
3195: ((c >= 0xF8) && (c <= 0x2FF)) ||
3196: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197: ((c >= 0x370) && (c <= 0x37D)) ||
3198: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199: ((c >= 0x200C) && (c <= 0x200D)) ||
3200: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201: ((c >= 0x2070) && (c <= 0x218F)) ||
3202: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206: ((c >= 0x10000) && (c <= 0xEFFFF))))
3207: return(1);
3208: } else {
3209: if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210: (c == '.') || (c == '-') ||
1.1.1.3 ! misho 3211: (c == '_') || (c == ':') ||
1.1 misho 3212: (IS_COMBINING(c)) ||
3213: (IS_EXTENDER(c)))
3214: return(1);
3215: }
3216: return(0);
3217: }
3218:
3219: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3220: int *len, int *alloc, int normalize);
3221:
3222: static const xmlChar *
3223: xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3224: int len = 0, l;
3225: int c;
3226: int count = 0;
3227:
3228: #ifdef DEBUG
3229: nbParseNameComplex++;
3230: #endif
3231:
3232: /*
3233: * Handler for more complex cases
3234: */
3235: GROW;
1.1.1.3 ! misho 3236: if (ctxt->instate == XML_PARSER_EOF)
! 3237: return(NULL);
1.1 misho 3238: c = CUR_CHAR(l);
3239: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240: /*
3241: * Use the new checks of production [4] [4a] amd [5] of the
3242: * Update 5 of XML-1.0
3243: */
3244: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245: (!(((c >= 'a') && (c <= 'z')) ||
3246: ((c >= 'A') && (c <= 'Z')) ||
3247: (c == '_') || (c == ':') ||
3248: ((c >= 0xC0) && (c <= 0xD6)) ||
3249: ((c >= 0xD8) && (c <= 0xF6)) ||
3250: ((c >= 0xF8) && (c <= 0x2FF)) ||
3251: ((c >= 0x370) && (c <= 0x37D)) ||
3252: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253: ((c >= 0x200C) && (c <= 0x200D)) ||
3254: ((c >= 0x2070) && (c <= 0x218F)) ||
3255: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259: ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3260: return(NULL);
3261: }
3262: len += l;
3263: NEXTL(l);
3264: c = CUR_CHAR(l);
3265: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266: (((c >= 'a') && (c <= 'z')) ||
3267: ((c >= 'A') && (c <= 'Z')) ||
3268: ((c >= '0') && (c <= '9')) || /* !start */
3269: (c == '_') || (c == ':') ||
3270: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271: ((c >= 0xC0) && (c <= 0xD6)) ||
3272: ((c >= 0xD8) && (c <= 0xF6)) ||
3273: ((c >= 0xF8) && (c <= 0x2FF)) ||
3274: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275: ((c >= 0x370) && (c <= 0x37D)) ||
3276: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3277: ((c >= 0x200C) && (c <= 0x200D)) ||
3278: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279: ((c >= 0x2070) && (c <= 0x218F)) ||
3280: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3282: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3283: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284: ((c >= 0x10000) && (c <= 0xEFFFF))
3285: )) {
1.1.1.3 ! misho 3286: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3287: count = 0;
3288: GROW;
1.1.1.3 ! misho 3289: if (ctxt->instate == XML_PARSER_EOF)
! 3290: return(NULL);
1.1 misho 3291: }
3292: len += l;
3293: NEXTL(l);
3294: c = CUR_CHAR(l);
3295: }
3296: } else {
3297: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298: (!IS_LETTER(c) && (c != '_') &&
3299: (c != ':'))) {
3300: return(NULL);
3301: }
3302: len += l;
3303: NEXTL(l);
3304: c = CUR_CHAR(l);
3305:
3306: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308: (c == '.') || (c == '-') ||
1.1.1.3 ! misho 3309: (c == '_') || (c == ':') ||
1.1 misho 3310: (IS_COMBINING(c)) ||
3311: (IS_EXTENDER(c)))) {
1.1.1.3 ! misho 3312: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3313: count = 0;
3314: GROW;
1.1.1.3 ! misho 3315: if (ctxt->instate == XML_PARSER_EOF)
! 3316: return(NULL);
1.1 misho 3317: }
3318: len += l;
3319: NEXTL(l);
3320: c = CUR_CHAR(l);
1.1.1.3 ! misho 3321: if (c == 0) {
! 3322: count = 0;
! 3323: GROW;
! 3324: if (ctxt->instate == XML_PARSER_EOF)
! 3325: return(NULL);
! 3326: c = CUR_CHAR(l);
! 3327: }
1.1 misho 3328: }
3329: }
1.1.1.3 ! misho 3330: if ((len > XML_MAX_NAME_LENGTH) &&
! 3331: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3332: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
! 3333: return(NULL);
! 3334: }
1.1 misho 3335: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3338: }
3339:
3340: /**
3341: * xmlParseName:
3342: * @ctxt: an XML parser context
3343: *
3344: * parse an XML name.
3345: *
3346: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347: * CombiningChar | Extender
3348: *
3349: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3350: *
3351: * [6] Names ::= Name (#x20 Name)*
3352: *
3353: * Returns the Name parsed or NULL
3354: */
3355:
3356: const xmlChar *
3357: xmlParseName(xmlParserCtxtPtr ctxt) {
3358: const xmlChar *in;
3359: const xmlChar *ret;
3360: int count = 0;
3361:
3362: GROW;
3363:
3364: #ifdef DEBUG
3365: nbParseName++;
3366: #endif
3367:
3368: /*
3369: * Accelerator for simple ASCII names
3370: */
3371: in = ctxt->input->cur;
3372: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373: ((*in >= 0x41) && (*in <= 0x5A)) ||
3374: (*in == '_') || (*in == ':')) {
3375: in++;
3376: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377: ((*in >= 0x41) && (*in <= 0x5A)) ||
3378: ((*in >= 0x30) && (*in <= 0x39)) ||
3379: (*in == '_') || (*in == '-') ||
3380: (*in == ':') || (*in == '.'))
3381: in++;
3382: if ((*in > 0) && (*in < 0x80)) {
3383: count = in - ctxt->input->cur;
1.1.1.3 ! misho 3384: if ((count > XML_MAX_NAME_LENGTH) &&
! 3385: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3386: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
! 3387: return(NULL);
! 3388: }
1.1 misho 3389: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3390: ctxt->input->cur = in;
3391: ctxt->nbChars += count;
3392: ctxt->input->col += count;
3393: if (ret == NULL)
3394: xmlErrMemory(ctxt, NULL);
3395: return(ret);
3396: }
3397: }
3398: /* accelerator for special cases */
3399: return(xmlParseNameComplex(ctxt));
3400: }
3401:
3402: static const xmlChar *
3403: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3404: int len = 0, l;
3405: int c;
3406: int count = 0;
3407:
3408: #ifdef DEBUG
3409: nbParseNCNameComplex++;
3410: #endif
3411:
3412: /*
3413: * Handler for more complex cases
3414: */
3415: GROW;
3416: c = CUR_CHAR(l);
3417: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3418: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3419: return(NULL);
3420: }
3421:
3422: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3423: (xmlIsNameChar(ctxt, c) && (c != ':'))) {
1.1.1.3 ! misho 3424: if (count++ > XML_PARSER_CHUNK_SIZE) {
! 3425: if ((len > XML_MAX_NAME_LENGTH) &&
! 3426: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3427: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
! 3428: return(NULL);
! 3429: }
1.1 misho 3430: count = 0;
3431: GROW;
1.1.1.3 ! misho 3432: if (ctxt->instate == XML_PARSER_EOF)
! 3433: return(NULL);
1.1 misho 3434: }
3435: len += l;
3436: NEXTL(l);
3437: c = CUR_CHAR(l);
1.1.1.3 ! misho 3438: if (c == 0) {
! 3439: count = 0;
! 3440: GROW;
! 3441: if (ctxt->instate == XML_PARSER_EOF)
! 3442: return(NULL);
! 3443: c = CUR_CHAR(l);
! 3444: }
! 3445: }
! 3446: if ((len > XML_MAX_NAME_LENGTH) &&
! 3447: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3448: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
! 3449: return(NULL);
1.1 misho 3450: }
3451: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3452: }
3453:
3454: /**
3455: * xmlParseNCName:
3456: * @ctxt: an XML parser context
1.1.1.3 ! misho 3457: * @len: length of the string parsed
1.1 misho 3458: *
3459: * parse an XML name.
3460: *
3461: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3462: * CombiningChar | Extender
3463: *
3464: * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3465: *
3466: * Returns the Name parsed or NULL
3467: */
3468:
3469: static const xmlChar *
3470: xmlParseNCName(xmlParserCtxtPtr ctxt) {
3471: const xmlChar *in;
3472: const xmlChar *ret;
3473: int count = 0;
3474:
3475: #ifdef DEBUG
3476: nbParseNCName++;
3477: #endif
3478:
3479: /*
3480: * Accelerator for simple ASCII names
3481: */
3482: in = ctxt->input->cur;
3483: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3484: ((*in >= 0x41) && (*in <= 0x5A)) ||
3485: (*in == '_')) {
3486: in++;
3487: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3488: ((*in >= 0x41) && (*in <= 0x5A)) ||
3489: ((*in >= 0x30) && (*in <= 0x39)) ||
3490: (*in == '_') || (*in == '-') ||
3491: (*in == '.'))
3492: in++;
3493: if ((*in > 0) && (*in < 0x80)) {
3494: count = in - ctxt->input->cur;
1.1.1.3 ! misho 3495: if ((count > XML_MAX_NAME_LENGTH) &&
! 3496: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3497: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
! 3498: return(NULL);
! 3499: }
1.1 misho 3500: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501: ctxt->input->cur = in;
3502: ctxt->nbChars += count;
3503: ctxt->input->col += count;
3504: if (ret == NULL) {
3505: xmlErrMemory(ctxt, NULL);
3506: }
3507: return(ret);
3508: }
3509: }
3510: return(xmlParseNCNameComplex(ctxt));
3511: }
3512:
3513: /**
3514: * xmlParseNameAndCompare:
3515: * @ctxt: an XML parser context
3516: *
3517: * parse an XML name and compares for match
3518: * (specialized for endtag parsing)
3519: *
3520: * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521: * and the name for mismatch
3522: */
3523:
3524: static const xmlChar *
3525: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526: register const xmlChar *cmp = other;
3527: register const xmlChar *in;
3528: const xmlChar *ret;
3529:
3530: GROW;
1.1.1.3 ! misho 3531: if (ctxt->instate == XML_PARSER_EOF)
! 3532: return(NULL);
1.1 misho 3533:
3534: in = ctxt->input->cur;
3535: while (*in != 0 && *in == *cmp) {
3536: ++in;
3537: ++cmp;
3538: ctxt->input->col++;
3539: }
3540: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3541: /* success */
3542: ctxt->input->cur = in;
3543: return (const xmlChar*) 1;
3544: }
3545: /* failure (or end of input buffer), check with full function */
3546: ret = xmlParseName (ctxt);
3547: /* strings coming from the dictionnary direct compare possible */
3548: if (ret == other) {
3549: return (const xmlChar*) 1;
3550: }
3551: return ret;
3552: }
3553:
3554: /**
3555: * xmlParseStringName:
3556: * @ctxt: an XML parser context
3557: * @str: a pointer to the string pointer (IN/OUT)
3558: *
3559: * parse an XML name.
3560: *
3561: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562: * CombiningChar | Extender
3563: *
3564: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565: *
3566: * [6] Names ::= Name (#x20 Name)*
3567: *
1.1.1.3 ! misho 3568: * Returns the Name parsed or NULL. The @str pointer
1.1 misho 3569: * is updated to the current location in the string.
3570: */
3571:
3572: static xmlChar *
3573: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574: xmlChar buf[XML_MAX_NAMELEN + 5];
3575: const xmlChar *cur = *str;
3576: int len = 0, l;
3577: int c;
3578:
3579: #ifdef DEBUG
3580: nbParseStringName++;
3581: #endif
3582:
3583: c = CUR_SCHAR(cur, l);
3584: if (!xmlIsNameStartChar(ctxt, c)) {
3585: return(NULL);
3586: }
3587:
3588: COPY_BUF(l,buf,len,c);
3589: cur += l;
3590: c = CUR_SCHAR(cur, l);
3591: while (xmlIsNameChar(ctxt, c)) {
3592: COPY_BUF(l,buf,len,c);
3593: cur += l;
3594: c = CUR_SCHAR(cur, l);
3595: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3596: /*
3597: * Okay someone managed to make a huge name, so he's ready to pay
3598: * for the processing speed.
3599: */
3600: xmlChar *buffer;
3601: int max = len * 2;
3602:
3603: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3604: if (buffer == NULL) {
3605: xmlErrMemory(ctxt, NULL);
3606: return(NULL);
3607: }
3608: memcpy(buffer, buf, len);
3609: while (xmlIsNameChar(ctxt, c)) {
3610: if (len + 10 > max) {
3611: xmlChar *tmp;
1.1.1.3 ! misho 3612:
! 3613: if ((len > XML_MAX_NAME_LENGTH) &&
! 3614: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3615: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
! 3616: xmlFree(buffer);
! 3617: return(NULL);
! 3618: }
1.1 misho 3619: max *= 2;
3620: tmp = (xmlChar *) xmlRealloc(buffer,
3621: max * sizeof(xmlChar));
3622: if (tmp == NULL) {
3623: xmlErrMemory(ctxt, NULL);
3624: xmlFree(buffer);
3625: return(NULL);
3626: }
3627: buffer = tmp;
3628: }
3629: COPY_BUF(l,buffer,len,c);
3630: cur += l;
3631: c = CUR_SCHAR(cur, l);
3632: }
3633: buffer[len] = 0;
3634: *str = cur;
3635: return(buffer);
3636: }
3637: }
1.1.1.3 ! misho 3638: if ((len > XML_MAX_NAME_LENGTH) &&
! 3639: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3640: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
! 3641: return(NULL);
! 3642: }
1.1 misho 3643: *str = cur;
3644: return(xmlStrndup(buf, len));
3645: }
3646:
3647: /**
3648: * xmlParseNmtoken:
3649: * @ctxt: an XML parser context
3650: *
3651: * parse an XML Nmtoken.
3652: *
3653: * [7] Nmtoken ::= (NameChar)+
3654: *
3655: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3656: *
3657: * Returns the Nmtoken parsed or NULL
3658: */
3659:
3660: xmlChar *
3661: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3662: xmlChar buf[XML_MAX_NAMELEN + 5];
3663: int len = 0, l;
3664: int c;
3665: int count = 0;
3666:
3667: #ifdef DEBUG
3668: nbParseNmToken++;
3669: #endif
3670:
3671: GROW;
1.1.1.3 ! misho 3672: if (ctxt->instate == XML_PARSER_EOF)
! 3673: return(NULL);
1.1 misho 3674: c = CUR_CHAR(l);
3675:
3676: while (xmlIsNameChar(ctxt, c)) {
1.1.1.3 ! misho 3677: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3678: count = 0;
3679: GROW;
3680: }
3681: COPY_BUF(l,buf,len,c);
3682: NEXTL(l);
3683: c = CUR_CHAR(l);
1.1.1.3 ! misho 3684: if (c == 0) {
! 3685: count = 0;
! 3686: GROW;
! 3687: if (ctxt->instate == XML_PARSER_EOF)
! 3688: return(NULL);
! 3689: c = CUR_CHAR(l);
! 3690: }
1.1 misho 3691: if (len >= XML_MAX_NAMELEN) {
3692: /*
3693: * Okay someone managed to make a huge token, so he's ready to pay
3694: * for the processing speed.
3695: */
3696: xmlChar *buffer;
3697: int max = len * 2;
3698:
3699: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3700: if (buffer == NULL) {
3701: xmlErrMemory(ctxt, NULL);
3702: return(NULL);
3703: }
3704: memcpy(buffer, buf, len);
3705: while (xmlIsNameChar(ctxt, c)) {
1.1.1.3 ! misho 3706: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3707: count = 0;
3708: GROW;
1.1.1.3 ! misho 3709: if (ctxt->instate == XML_PARSER_EOF) {
! 3710: xmlFree(buffer);
! 3711: return(NULL);
! 3712: }
1.1 misho 3713: }
3714: if (len + 10 > max) {
3715: xmlChar *tmp;
3716:
1.1.1.3 ! misho 3717: if ((max > XML_MAX_NAME_LENGTH) &&
! 3718: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3719: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
! 3720: xmlFree(buffer);
! 3721: return(NULL);
! 3722: }
1.1 misho 3723: max *= 2;
3724: tmp = (xmlChar *) xmlRealloc(buffer,
3725: max * sizeof(xmlChar));
3726: if (tmp == NULL) {
3727: xmlErrMemory(ctxt, NULL);
3728: xmlFree(buffer);
3729: return(NULL);
3730: }
3731: buffer = tmp;
3732: }
3733: COPY_BUF(l,buffer,len,c);
3734: NEXTL(l);
3735: c = CUR_CHAR(l);
3736: }
3737: buffer[len] = 0;
3738: return(buffer);
3739: }
3740: }
3741: if (len == 0)
3742: return(NULL);
1.1.1.3 ! misho 3743: if ((len > XML_MAX_NAME_LENGTH) &&
! 3744: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3745: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
! 3746: return(NULL);
! 3747: }
1.1 misho 3748: return(xmlStrndup(buf, len));
3749: }
3750:
3751: /**
3752: * xmlParseEntityValue:
3753: * @ctxt: an XML parser context
3754: * @orig: if non-NULL store a copy of the original entity value
3755: *
3756: * parse a value for ENTITY declarations
3757: *
3758: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3759: * "'" ([^%&'] | PEReference | Reference)* "'"
3760: *
3761: * Returns the EntityValue parsed with reference substituted or NULL
3762: */
3763:
3764: xmlChar *
3765: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3766: xmlChar *buf = NULL;
3767: int len = 0;
3768: int size = XML_PARSER_BUFFER_SIZE;
3769: int c, l;
3770: xmlChar stop;
3771: xmlChar *ret = NULL;
3772: const xmlChar *cur = NULL;
3773: xmlParserInputPtr input;
3774:
3775: if (RAW == '"') stop = '"';
3776: else if (RAW == '\'') stop = '\'';
3777: else {
3778: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3779: return(NULL);
3780: }
3781: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3782: if (buf == NULL) {
3783: xmlErrMemory(ctxt, NULL);
3784: return(NULL);
3785: }
3786:
3787: /*
3788: * The content of the entity definition is copied in a buffer.
3789: */
3790:
3791: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3792: input = ctxt->input;
3793: GROW;
1.1.1.3 ! misho 3794: if (ctxt->instate == XML_PARSER_EOF) {
! 3795: xmlFree(buf);
! 3796: return(NULL);
! 3797: }
1.1 misho 3798: NEXT;
3799: c = CUR_CHAR(l);
3800: /*
3801: * NOTE: 4.4.5 Included in Literal
3802: * When a parameter entity reference appears in a literal entity
3803: * value, ... a single or double quote character in the replacement
3804: * text is always treated as a normal data character and will not
1.1.1.3 ! misho 3805: * terminate the literal.
1.1 misho 3806: * In practice it means we stop the loop only when back at parsing
3807: * the initial entity and the quote is found
3808: */
1.1.1.3 ! misho 3809: while (((IS_CHAR(c)) && ((c != stop) || /* checked */
! 3810: (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 3811: if (len + 5 >= size) {
3812: xmlChar *tmp;
3813:
3814: size *= 2;
3815: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3816: if (tmp == NULL) {
3817: xmlErrMemory(ctxt, NULL);
3818: xmlFree(buf);
3819: return(NULL);
3820: }
3821: buf = tmp;
3822: }
3823: COPY_BUF(l,buf,len,c);
3824: NEXTL(l);
3825: /*
3826: * Pop-up of finished entities.
3827: */
3828: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3829: xmlPopInput(ctxt);
3830:
3831: GROW;
3832: c = CUR_CHAR(l);
3833: if (c == 0) {
3834: GROW;
3835: c = CUR_CHAR(l);
3836: }
3837: }
3838: buf[len] = 0;
1.1.1.3 ! misho 3839: if (ctxt->instate == XML_PARSER_EOF) {
! 3840: xmlFree(buf);
! 3841: return(NULL);
! 3842: }
1.1 misho 3843:
3844: /*
3845: * Raise problem w.r.t. '&' and '%' being used in non-entities
3846: * reference constructs. Note Charref will be handled in
3847: * xmlStringDecodeEntities()
3848: */
3849: cur = buf;
3850: while (*cur != 0) { /* non input consuming */
3851: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3852: xmlChar *name;
3853: xmlChar tmp = *cur;
3854:
3855: cur++;
3856: name = xmlParseStringName(ctxt, &cur);
3857: if ((name == NULL) || (*cur != ';')) {
3858: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3859: "EntityValue: '%c' forbidden except for entities references\n",
3860: tmp);
3861: }
3862: if ((tmp == '%') && (ctxt->inSubset == 1) &&
3863: (ctxt->inputNr == 1)) {
3864: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3865: }
3866: if (name != NULL)
3867: xmlFree(name);
3868: if (*cur == 0)
3869: break;
3870: }
3871: cur++;
3872: }
3873:
3874: /*
3875: * Then PEReference entities are substituted.
3876: */
3877: if (c != stop) {
3878: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3879: xmlFree(buf);
3880: } else {
3881: NEXT;
3882: /*
3883: * NOTE: 4.4.7 Bypassed
3884: * When a general entity reference appears in the EntityValue in
3885: * an entity declaration, it is bypassed and left as is.
3886: * so XML_SUBSTITUTE_REF is not set here.
3887: */
3888: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3889: 0, 0, 0);
1.1.1.3 ! misho 3890: if (orig != NULL)
1.1 misho 3891: *orig = buf;
3892: else
3893: xmlFree(buf);
3894: }
1.1.1.3 ! misho 3895:
1.1 misho 3896: return(ret);
3897: }
3898:
3899: /**
3900: * xmlParseAttValueComplex:
3901: * @ctxt: an XML parser context
3902: * @len: the resulting attribute len
3903: * @normalize: wether to apply the inner normalization
3904: *
3905: * parse a value for an attribute, this is the fallback function
3906: * of xmlParseAttValue() when the attribute parsing requires handling
3907: * of non-ASCII characters, or normalization compaction.
3908: *
3909: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3910: */
3911: static xmlChar *
3912: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3913: xmlChar limit = 0;
3914: xmlChar *buf = NULL;
3915: xmlChar *rep = NULL;
1.1.1.3 ! misho 3916: size_t len = 0;
! 3917: size_t buf_size = 0;
1.1 misho 3918: int c, l, in_space = 0;
3919: xmlChar *current = NULL;
3920: xmlEntityPtr ent;
3921:
3922: if (NXT(0) == '"') {
3923: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924: limit = '"';
3925: NEXT;
3926: } else if (NXT(0) == '\'') {
3927: limit = '\'';
3928: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3929: NEXT;
3930: } else {
3931: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3932: return(NULL);
3933: }
3934:
3935: /*
3936: * allocate a translation buffer.
3937: */
3938: buf_size = XML_PARSER_BUFFER_SIZE;
1.1.1.3 ! misho 3939: buf = (xmlChar *) xmlMallocAtomic(buf_size);
1.1 misho 3940: if (buf == NULL) goto mem_error;
3941:
3942: /*
3943: * OK loop until we reach one of the ending char or a size limit.
3944: */
3945: c = CUR_CHAR(l);
1.1.1.3 ! misho 3946: while (((NXT(0) != limit) && /* checked */
! 3947: (IS_CHAR(c)) && (c != '<')) &&
! 3948: (ctxt->instate != XML_PARSER_EOF)) {
! 3949: /*
! 3950: * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
! 3951: * special option is given
! 3952: */
! 3953: if ((len > XML_MAX_TEXT_LENGTH) &&
! 3954: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 3955: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 3956: "AttValue length too long\n");
! 3957: goto mem_error;
! 3958: }
1.1 misho 3959: if (c == 0) break;
3960: if (c == '&') {
3961: in_space = 0;
3962: if (NXT(1) == '#') {
3963: int val = xmlParseCharRef(ctxt);
3964:
3965: if (val == '&') {
3966: if (ctxt->replaceEntities) {
1.1.1.3 ! misho 3967: if (len + 10 > buf_size) {
1.1 misho 3968: growBuffer(buf, 10);
3969: }
3970: buf[len++] = '&';
3971: } else {
3972: /*
3973: * The reparsing will be done in xmlStringGetNodeList()
3974: * called by the attribute() function in SAX.c
3975: */
1.1.1.3 ! misho 3976: if (len + 10 > buf_size) {
1.1 misho 3977: growBuffer(buf, 10);
3978: }
3979: buf[len++] = '&';
3980: buf[len++] = '#';
3981: buf[len++] = '3';
3982: buf[len++] = '8';
3983: buf[len++] = ';';
3984: }
3985: } else if (val != 0) {
1.1.1.3 ! misho 3986: if (len + 10 > buf_size) {
1.1 misho 3987: growBuffer(buf, 10);
3988: }
3989: len += xmlCopyChar(0, &buf[len], val);
3990: }
3991: } else {
3992: ent = xmlParseEntityRef(ctxt);
3993: ctxt->nbentities++;
3994: if (ent != NULL)
3995: ctxt->nbentities += ent->owner;
3996: if ((ent != NULL) &&
3997: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.1.1.3 ! misho 3998: if (len + 10 > buf_size) {
1.1 misho 3999: growBuffer(buf, 10);
4000: }
4001: if ((ctxt->replaceEntities == 0) &&
4002: (ent->content[0] == '&')) {
4003: buf[len++] = '&';
4004: buf[len++] = '#';
4005: buf[len++] = '3';
4006: buf[len++] = '8';
4007: buf[len++] = ';';
4008: } else {
4009: buf[len++] = ent->content[0];
4010: }
1.1.1.3 ! misho 4011: } else if ((ent != NULL) &&
1.1 misho 4012: (ctxt->replaceEntities != 0)) {
4013: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4014: rep = xmlStringDecodeEntities(ctxt, ent->content,
4015: XML_SUBSTITUTE_REF,
4016: 0, 0, 0);
4017: if (rep != NULL) {
4018: current = rep;
4019: while (*current != 0) { /* non input consuming */
4020: if ((*current == 0xD) || (*current == 0xA) ||
4021: (*current == 0x9)) {
4022: buf[len++] = 0x20;
4023: current++;
4024: } else
4025: buf[len++] = *current++;
1.1.1.3 ! misho 4026: if (len + 10 > buf_size) {
1.1 misho 4027: growBuffer(buf, 10);
4028: }
4029: }
4030: xmlFree(rep);
4031: rep = NULL;
4032: }
4033: } else {
1.1.1.3 ! misho 4034: if (len + 10 > buf_size) {
1.1 misho 4035: growBuffer(buf, 10);
4036: }
4037: if (ent->content != NULL)
4038: buf[len++] = ent->content[0];
4039: }
4040: } else if (ent != NULL) {
4041: int i = xmlStrlen(ent->name);
4042: const xmlChar *cur = ent->name;
4043:
4044: /*
4045: * This may look absurd but is needed to detect
4046: * entities problems
4047: */
4048: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.1.1.3 ! misho 4049: (ent->content != NULL) && (ent->checked == 0)) {
! 4050: unsigned long oldnbent = ctxt->nbentities;
! 4051:
1.1 misho 4052: rep = xmlStringDecodeEntities(ctxt, ent->content,
4053: XML_SUBSTITUTE_REF, 0, 0, 0);
1.1.1.3 ! misho 4054:
! 4055: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
1.1 misho 4056: if (rep != NULL) {
1.1.1.3 ! misho 4057: if (xmlStrchr(rep, '<'))
! 4058: ent->checked |= 1;
1.1 misho 4059: xmlFree(rep);
4060: rep = NULL;
4061: }
4062: }
4063:
4064: /*
4065: * Just output the reference
4066: */
4067: buf[len++] = '&';
1.1.1.3 ! misho 4068: while (len + i + 10 > buf_size) {
1.1 misho 4069: growBuffer(buf, i + 10);
4070: }
4071: for (;i > 0;i--)
4072: buf[len++] = *cur++;
4073: buf[len++] = ';';
4074: }
4075: }
4076: } else {
4077: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4078: if ((len != 0) || (!normalize)) {
4079: if ((!normalize) || (!in_space)) {
4080: COPY_BUF(l,buf,len,0x20);
1.1.1.3 ! misho 4081: while (len + 10 > buf_size) {
1.1 misho 4082: growBuffer(buf, 10);
4083: }
4084: }
4085: in_space = 1;
4086: }
4087: } else {
4088: in_space = 0;
4089: COPY_BUF(l,buf,len,c);
1.1.1.3 ! misho 4090: if (len + 10 > buf_size) {
1.1 misho 4091: growBuffer(buf, 10);
4092: }
4093: }
4094: NEXTL(l);
4095: }
4096: GROW;
4097: c = CUR_CHAR(l);
4098: }
1.1.1.3 ! misho 4099: if (ctxt->instate == XML_PARSER_EOF)
! 4100: goto error;
! 4101:
1.1 misho 4102: if ((in_space) && (normalize)) {
1.1.1.3 ! misho 4103: while ((len > 0) && (buf[len - 1] == 0x20)) len--;
1.1 misho 4104: }
4105: buf[len] = 0;
4106: if (RAW == '<') {
4107: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4108: } else if (RAW != limit) {
4109: if ((c != 0) && (!IS_CHAR(c))) {
4110: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4111: "invalid character in attribute value\n");
4112: } else {
4113: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4114: "AttValue: ' expected\n");
4115: }
4116: } else
4117: NEXT;
1.1.1.3 ! misho 4118:
! 4119: /*
! 4120: * There we potentially risk an overflow, don't allow attribute value of
! 4121: * length more than INT_MAX it is a very reasonnable assumption !
! 4122: */
! 4123: if (len >= INT_MAX) {
! 4124: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 4125: "AttValue length too long\n");
! 4126: goto mem_error;
! 4127: }
! 4128:
! 4129: if (attlen != NULL) *attlen = (int) len;
1.1 misho 4130: return(buf);
4131:
4132: mem_error:
4133: xmlErrMemory(ctxt, NULL);
1.1.1.3 ! misho 4134: error:
1.1 misho 4135: if (buf != NULL)
4136: xmlFree(buf);
4137: if (rep != NULL)
4138: xmlFree(rep);
4139: return(NULL);
4140: }
4141:
4142: /**
4143: * xmlParseAttValue:
4144: * @ctxt: an XML parser context
4145: *
4146: * parse a value for an attribute
4147: * Note: the parser won't do substitution of entities here, this
4148: * will be handled later in xmlStringGetNodeList
4149: *
4150: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4151: * "'" ([^<&'] | Reference)* "'"
4152: *
4153: * 3.3.3 Attribute-Value Normalization:
4154: * Before the value of an attribute is passed to the application or
1.1.1.3 ! misho 4155: * checked for validity, the XML processor must normalize it as follows:
1.1 misho 4156: * - a character reference is processed by appending the referenced
4157: * character to the attribute value
4158: * - an entity reference is processed by recursively processing the
1.1.1.3 ! misho 4159: * replacement text of the entity
1.1 misho 4160: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4161: * appending #x20 to the normalized value, except that only a single
4162: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1.1.1.3 ! misho 4163: * parsed entity or the literal entity value of an internal parsed entity
! 4164: * - other characters are processed by appending them to the normalized value
1.1 misho 4165: * If the declared value is not CDATA, then the XML processor must further
4166: * process the normalized attribute value by discarding any leading and
4167: * trailing space (#x20) characters, and by replacing sequences of space
1.1.1.3 ! misho 4168: * (#x20) characters by a single space (#x20) character.
1.1 misho 4169: * All attributes for which no declaration has been read should be treated
4170: * by a non-validating parser as if declared CDATA.
4171: *
4172: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4173: */
4174:
4175:
4176: xmlChar *
4177: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4178: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4179: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4180: }
4181:
4182: /**
4183: * xmlParseSystemLiteral:
4184: * @ctxt: an XML parser context
1.1.1.3 ! misho 4185: *
1.1 misho 4186: * parse an XML Literal
4187: *
4188: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4189: *
4190: * Returns the SystemLiteral parsed or NULL
4191: */
4192:
4193: xmlChar *
4194: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4195: xmlChar *buf = NULL;
4196: int len = 0;
4197: int size = XML_PARSER_BUFFER_SIZE;
4198: int cur, l;
4199: xmlChar stop;
4200: int state = ctxt->instate;
4201: int count = 0;
4202:
4203: SHRINK;
4204: if (RAW == '"') {
4205: NEXT;
4206: stop = '"';
4207: } else if (RAW == '\'') {
4208: NEXT;
4209: stop = '\'';
4210: } else {
4211: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4212: return(NULL);
4213: }
1.1.1.3 ! misho 4214:
1.1 misho 4215: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4216: if (buf == NULL) {
4217: xmlErrMemory(ctxt, NULL);
4218: return(NULL);
4219: }
4220: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4221: cur = CUR_CHAR(l);
4222: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4223: if (len + 5 >= size) {
4224: xmlChar *tmp;
4225:
1.1.1.3 ! misho 4226: if ((size > XML_MAX_NAME_LENGTH) &&
! 4227: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 4228: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
! 4229: xmlFree(buf);
! 4230: ctxt->instate = (xmlParserInputState) state;
! 4231: return(NULL);
! 4232: }
1.1 misho 4233: size *= 2;
4234: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4235: if (tmp == NULL) {
4236: xmlFree(buf);
4237: xmlErrMemory(ctxt, NULL);
4238: ctxt->instate = (xmlParserInputState) state;
4239: return(NULL);
4240: }
4241: buf = tmp;
4242: }
4243: count++;
4244: if (count > 50) {
4245: GROW;
4246: count = 0;
1.1.1.3 ! misho 4247: if (ctxt->instate == XML_PARSER_EOF) {
! 4248: xmlFree(buf);
! 4249: return(NULL);
! 4250: }
1.1 misho 4251: }
4252: COPY_BUF(l,buf,len,cur);
4253: NEXTL(l);
4254: cur = CUR_CHAR(l);
4255: if (cur == 0) {
4256: GROW;
4257: SHRINK;
4258: cur = CUR_CHAR(l);
4259: }
4260: }
4261: buf[len] = 0;
4262: ctxt->instate = (xmlParserInputState) state;
4263: if (!IS_CHAR(cur)) {
4264: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4265: } else {
4266: NEXT;
4267: }
4268: return(buf);
4269: }
4270:
4271: /**
4272: * xmlParsePubidLiteral:
4273: * @ctxt: an XML parser context
4274: *
4275: * parse an XML public literal
4276: *
4277: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4278: *
4279: * Returns the PubidLiteral parsed or NULL.
4280: */
4281:
4282: xmlChar *
4283: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4284: xmlChar *buf = NULL;
4285: int len = 0;
4286: int size = XML_PARSER_BUFFER_SIZE;
4287: xmlChar cur;
4288: xmlChar stop;
4289: int count = 0;
4290: xmlParserInputState oldstate = ctxt->instate;
4291:
4292: SHRINK;
4293: if (RAW == '"') {
4294: NEXT;
4295: stop = '"';
4296: } else if (RAW == '\'') {
4297: NEXT;
4298: stop = '\'';
4299: } else {
4300: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4301: return(NULL);
4302: }
4303: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4304: if (buf == NULL) {
4305: xmlErrMemory(ctxt, NULL);
4306: return(NULL);
4307: }
4308: ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4309: cur = CUR;
4310: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4311: if (len + 1 >= size) {
4312: xmlChar *tmp;
4313:
1.1.1.3 ! misho 4314: if ((size > XML_MAX_NAME_LENGTH) &&
! 4315: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 4316: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
! 4317: xmlFree(buf);
! 4318: return(NULL);
! 4319: }
1.1 misho 4320: size *= 2;
4321: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4322: if (tmp == NULL) {
4323: xmlErrMemory(ctxt, NULL);
4324: xmlFree(buf);
4325: return(NULL);
4326: }
4327: buf = tmp;
4328: }
4329: buf[len++] = cur;
4330: count++;
4331: if (count > 50) {
4332: GROW;
4333: count = 0;
1.1.1.3 ! misho 4334: if (ctxt->instate == XML_PARSER_EOF) {
! 4335: xmlFree(buf);
! 4336: return(NULL);
! 4337: }
1.1 misho 4338: }
4339: NEXT;
4340: cur = CUR;
4341: if (cur == 0) {
4342: GROW;
4343: SHRINK;
4344: cur = CUR;
4345: }
4346: }
4347: buf[len] = 0;
4348: if (cur != stop) {
4349: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4350: } else {
4351: NEXT;
4352: }
4353: ctxt->instate = oldstate;
4354: return(buf);
4355: }
4356:
4357: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4358:
4359: /*
4360: * used for the test in the inner loop of the char data testing
4361: */
4362: static const unsigned char test_char_data[256] = {
4363: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4395: };
4396:
4397: /**
4398: * xmlParseCharData:
4399: * @ctxt: an XML parser context
4400: * @cdata: int indicating whether we are within a CDATA section
4401: *
4402: * parse a CharData section.
4403: * if we are within a CDATA section ']]>' marks an end of section.
4404: *
4405: * The right angle bracket (>) may be represented using the string ">",
4406: * and must, for compatibility, be escaped using ">" or a character
4407: * reference when it appears in the string "]]>" in content, when that
1.1.1.3 ! misho 4408: * string is not marking the end of a CDATA section.
1.1 misho 4409: *
4410: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4411: */
4412:
4413: void
4414: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4415: const xmlChar *in;
4416: int nbchar = 0;
4417: int line = ctxt->input->line;
4418: int col = ctxt->input->col;
4419: int ccol;
4420:
4421: SHRINK;
4422: GROW;
4423: /*
4424: * Accelerated common case where input don't need to be
4425: * modified before passing it to the handler.
4426: */
4427: if (!cdata) {
4428: in = ctxt->input->cur;
4429: do {
4430: get_more_space:
4431: while (*in == 0x20) { in++; ctxt->input->col++; }
4432: if (*in == 0xA) {
4433: do {
4434: ctxt->input->line++; ctxt->input->col = 1;
4435: in++;
4436: } while (*in == 0xA);
4437: goto get_more_space;
4438: }
4439: if (*in == '<') {
4440: nbchar = in - ctxt->input->cur;
4441: if (nbchar > 0) {
4442: const xmlChar *tmp = ctxt->input->cur;
4443: ctxt->input->cur = in;
4444:
4445: if ((ctxt->sax != NULL) &&
4446: (ctxt->sax->ignorableWhitespace !=
4447: ctxt->sax->characters)) {
4448: if (areBlanks(ctxt, tmp, nbchar, 1)) {
4449: if (ctxt->sax->ignorableWhitespace != NULL)
4450: ctxt->sax->ignorableWhitespace(ctxt->userData,
4451: tmp, nbchar);
4452: } else {
4453: if (ctxt->sax->characters != NULL)
4454: ctxt->sax->characters(ctxt->userData,
4455: tmp, nbchar);
4456: if (*ctxt->space == -1)
4457: *ctxt->space = -2;
4458: }
4459: } else if ((ctxt->sax != NULL) &&
4460: (ctxt->sax->characters != NULL)) {
4461: ctxt->sax->characters(ctxt->userData,
4462: tmp, nbchar);
4463: }
4464: }
4465: return;
4466: }
4467:
4468: get_more:
4469: ccol = ctxt->input->col;
4470: while (test_char_data[*in]) {
4471: in++;
4472: ccol++;
4473: }
4474: ctxt->input->col = ccol;
4475: if (*in == 0xA) {
4476: do {
4477: ctxt->input->line++; ctxt->input->col = 1;
4478: in++;
4479: } while (*in == 0xA);
4480: goto get_more;
4481: }
4482: if (*in == ']') {
4483: if ((in[1] == ']') && (in[2] == '>')) {
4484: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4485: ctxt->input->cur = in;
4486: return;
4487: }
4488: in++;
4489: ctxt->input->col++;
4490: goto get_more;
4491: }
4492: nbchar = in - ctxt->input->cur;
4493: if (nbchar > 0) {
4494: if ((ctxt->sax != NULL) &&
4495: (ctxt->sax->ignorableWhitespace !=
4496: ctxt->sax->characters) &&
4497: (IS_BLANK_CH(*ctxt->input->cur))) {
4498: const xmlChar *tmp = ctxt->input->cur;
4499: ctxt->input->cur = in;
4500:
4501: if (areBlanks(ctxt, tmp, nbchar, 0)) {
4502: if (ctxt->sax->ignorableWhitespace != NULL)
4503: ctxt->sax->ignorableWhitespace(ctxt->userData,
4504: tmp, nbchar);
4505: } else {
4506: if (ctxt->sax->characters != NULL)
4507: ctxt->sax->characters(ctxt->userData,
4508: tmp, nbchar);
4509: if (*ctxt->space == -1)
4510: *ctxt->space = -2;
4511: }
4512: line = ctxt->input->line;
4513: col = ctxt->input->col;
4514: } else if (ctxt->sax != NULL) {
4515: if (ctxt->sax->characters != NULL)
4516: ctxt->sax->characters(ctxt->userData,
4517: ctxt->input->cur, nbchar);
4518: line = ctxt->input->line;
4519: col = ctxt->input->col;
4520: }
4521: /* something really bad happened in the SAX callback */
4522: if (ctxt->instate != XML_PARSER_CONTENT)
4523: return;
4524: }
4525: ctxt->input->cur = in;
4526: if (*in == 0xD) {
4527: in++;
4528: if (*in == 0xA) {
4529: ctxt->input->cur = in;
4530: in++;
4531: ctxt->input->line++; ctxt->input->col = 1;
4532: continue; /* while */
4533: }
4534: in--;
4535: }
4536: if (*in == '<') {
4537: return;
4538: }
4539: if (*in == '&') {
4540: return;
4541: }
4542: SHRINK;
4543: GROW;
1.1.1.3 ! misho 4544: if (ctxt->instate == XML_PARSER_EOF)
! 4545: return;
1.1 misho 4546: in = ctxt->input->cur;
4547: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4548: nbchar = 0;
4549: }
4550: ctxt->input->line = line;
4551: ctxt->input->col = col;
4552: xmlParseCharDataComplex(ctxt, cdata);
4553: }
4554:
4555: /**
4556: * xmlParseCharDataComplex:
4557: * @ctxt: an XML parser context
4558: * @cdata: int indicating whether we are within a CDATA section
4559: *
4560: * parse a CharData section.this is the fallback function
4561: * of xmlParseCharData() when the parsing requires handling
4562: * of non-ASCII characters.
4563: */
4564: static void
4565: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4566: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4567: int nbchar = 0;
4568: int cur, l;
4569: int count = 0;
4570:
4571: SHRINK;
4572: GROW;
4573: cur = CUR_CHAR(l);
4574: while ((cur != '<') && /* checked */
1.1.1.3 ! misho 4575: (cur != '&') &&
1.1 misho 4576: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4577: if ((cur == ']') && (NXT(1) == ']') &&
4578: (NXT(2) == '>')) {
4579: if (cdata) break;
4580: else {
4581: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4582: }
4583: }
4584: COPY_BUF(l,buf,nbchar,cur);
4585: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4586: buf[nbchar] = 0;
4587:
4588: /*
4589: * OK the segment is to be consumed as chars.
4590: */
4591: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4592: if (areBlanks(ctxt, buf, nbchar, 0)) {
4593: if (ctxt->sax->ignorableWhitespace != NULL)
4594: ctxt->sax->ignorableWhitespace(ctxt->userData,
4595: buf, nbchar);
4596: } else {
4597: if (ctxt->sax->characters != NULL)
4598: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4599: if ((ctxt->sax->characters !=
4600: ctxt->sax->ignorableWhitespace) &&
4601: (*ctxt->space == -1))
4602: *ctxt->space = -2;
4603: }
4604: }
4605: nbchar = 0;
4606: /* something really bad happened in the SAX callback */
4607: if (ctxt->instate != XML_PARSER_CONTENT)
4608: return;
4609: }
4610: count++;
4611: if (count > 50) {
4612: GROW;
4613: count = 0;
1.1.1.3 ! misho 4614: if (ctxt->instate == XML_PARSER_EOF)
! 4615: return;
1.1 misho 4616: }
4617: NEXTL(l);
4618: cur = CUR_CHAR(l);
4619: }
4620: if (nbchar != 0) {
4621: buf[nbchar] = 0;
4622: /*
4623: * OK the segment is to be consumed as chars.
4624: */
4625: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4626: if (areBlanks(ctxt, buf, nbchar, 0)) {
4627: if (ctxt->sax->ignorableWhitespace != NULL)
4628: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4629: } else {
4630: if (ctxt->sax->characters != NULL)
4631: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4632: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4633: (*ctxt->space == -1))
4634: *ctxt->space = -2;
4635: }
4636: }
4637: }
4638: if ((cur != 0) && (!IS_CHAR(cur))) {
4639: /* Generate the error and skip the offending character */
4640: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4641: "PCDATA invalid Char value %d\n",
4642: cur);
4643: NEXTL(l);
4644: }
4645: }
4646:
4647: /**
4648: * xmlParseExternalID:
4649: * @ctxt: an XML parser context
4650: * @publicID: a xmlChar** receiving PubidLiteral
4651: * @strict: indicate whether we should restrict parsing to only
4652: * production [75], see NOTE below
4653: *
4654: * Parse an External ID or a Public ID
4655: *
4656: * NOTE: Productions [75] and [83] interact badly since [75] can generate
4657: * 'PUBLIC' S PubidLiteral S SystemLiteral
4658: *
4659: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4660: * | 'PUBLIC' S PubidLiteral S SystemLiteral
4661: *
4662: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4663: *
4664: * Returns the function returns SystemLiteral and in the second
4665: * case publicID receives PubidLiteral, is strict is off
4666: * it is possible to return NULL and have publicID set.
4667: */
4668:
4669: xmlChar *
4670: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4671: xmlChar *URI = NULL;
4672:
4673: SHRINK;
4674:
4675: *publicID = NULL;
4676: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4677: SKIP(6);
4678: if (!IS_BLANK_CH(CUR)) {
4679: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4680: "Space required after 'SYSTEM'\n");
4681: }
4682: SKIP_BLANKS;
4683: URI = xmlParseSystemLiteral(ctxt);
4684: if (URI == NULL) {
4685: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4686: }
4687: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4688: SKIP(6);
4689: if (!IS_BLANK_CH(CUR)) {
4690: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4691: "Space required after 'PUBLIC'\n");
4692: }
4693: SKIP_BLANKS;
4694: *publicID = xmlParsePubidLiteral(ctxt);
4695: if (*publicID == NULL) {
4696: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4697: }
4698: if (strict) {
4699: /*
4700: * We don't handle [83] so "S SystemLiteral" is required.
4701: */
4702: if (!IS_BLANK_CH(CUR)) {
4703: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704: "Space required after the Public Identifier\n");
4705: }
4706: } else {
4707: /*
1.1.1.3 ! misho 4708: * We handle [83] so we return immediately, if
1.1 misho 4709: * "S SystemLiteral" is not detected. From a purely parsing
4710: * point of view that's a nice mess.
4711: */
4712: const xmlChar *ptr;
4713: GROW;
4714:
4715: ptr = CUR_PTR;
4716: if (!IS_BLANK_CH(*ptr)) return(NULL);
1.1.1.3 ! misho 4717:
1.1 misho 4718: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4719: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4720: }
4721: SKIP_BLANKS;
4722: URI = xmlParseSystemLiteral(ctxt);
4723: if (URI == NULL) {
4724: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4725: }
4726: }
4727: return(URI);
4728: }
4729:
4730: /**
4731: * xmlParseCommentComplex:
4732: * @ctxt: an XML parser context
4733: * @buf: the already parsed part of the buffer
4734: * @len: number of bytes filles in the buffer
4735: * @size: allocated size of the buffer
4736: *
4737: * Skip an XML (SGML) comment <!-- .... -->
4738: * The spec says that "For compatibility, the string "--" (double-hyphen)
4739: * must not occur within comments. "
4740: * This is the slow routine in case the accelerator for ascii didn't work
4741: *
4742: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4743: */
4744: static void
1.1.1.3 ! misho 4745: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
! 4746: size_t len, size_t size) {
1.1 misho 4747: int q, ql;
4748: int r, rl;
4749: int cur, l;
1.1.1.3 ! misho 4750: size_t count = 0;
1.1 misho 4751: int inputid;
4752:
4753: inputid = ctxt->input->id;
4754:
4755: if (buf == NULL) {
4756: len = 0;
4757: size = XML_PARSER_BUFFER_SIZE;
4758: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4759: if (buf == NULL) {
4760: xmlErrMemory(ctxt, NULL);
4761: return;
4762: }
4763: }
4764: GROW; /* Assure there's enough input data */
4765: q = CUR_CHAR(ql);
4766: if (q == 0)
4767: goto not_terminated;
4768: if (!IS_CHAR(q)) {
4769: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4770: "xmlParseComment: invalid xmlChar value %d\n",
4771: q);
4772: xmlFree (buf);
4773: return;
4774: }
4775: NEXTL(ql);
4776: r = CUR_CHAR(rl);
4777: if (r == 0)
4778: goto not_terminated;
4779: if (!IS_CHAR(r)) {
4780: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4781: "xmlParseComment: invalid xmlChar value %d\n",
4782: q);
4783: xmlFree (buf);
4784: return;
4785: }
4786: NEXTL(rl);
4787: cur = CUR_CHAR(l);
4788: if (cur == 0)
4789: goto not_terminated;
4790: while (IS_CHAR(cur) && /* checked */
4791: ((cur != '>') ||
4792: (r != '-') || (q != '-'))) {
4793: if ((r == '-') && (q == '-')) {
4794: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4795: }
1.1.1.3 ! misho 4796: if ((len > XML_MAX_TEXT_LENGTH) &&
! 4797: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 4798: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
! 4799: "Comment too big found", NULL);
! 4800: xmlFree (buf);
! 4801: return;
! 4802: }
1.1 misho 4803: if (len + 5 >= size) {
4804: xmlChar *new_buf;
1.1.1.3 ! misho 4805: size_t new_size;
! 4806:
! 4807: new_size = size * 2;
! 4808: new_buf = (xmlChar *) xmlRealloc(buf, new_size);
1.1 misho 4809: if (new_buf == NULL) {
4810: xmlFree (buf);
4811: xmlErrMemory(ctxt, NULL);
4812: return;
4813: }
4814: buf = new_buf;
1.1.1.3 ! misho 4815: size = new_size;
1.1 misho 4816: }
4817: COPY_BUF(ql,buf,len,q);
4818: q = r;
4819: ql = rl;
4820: r = cur;
4821: rl = l;
4822:
4823: count++;
4824: if (count > 50) {
4825: GROW;
4826: count = 0;
1.1.1.3 ! misho 4827: if (ctxt->instate == XML_PARSER_EOF) {
! 4828: xmlFree(buf);
! 4829: return;
! 4830: }
1.1 misho 4831: }
4832: NEXTL(l);
4833: cur = CUR_CHAR(l);
4834: if (cur == 0) {
4835: SHRINK;
4836: GROW;
4837: cur = CUR_CHAR(l);
4838: }
4839: }
4840: buf[len] = 0;
4841: if (cur == 0) {
4842: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4843: "Comment not terminated \n<!--%.50s\n", buf);
4844: } else if (!IS_CHAR(cur)) {
4845: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4846: "xmlParseComment: invalid xmlChar value %d\n",
4847: cur);
4848: } else {
4849: if (inputid != ctxt->input->id) {
4850: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4851: "Comment doesn't start and stop in the same entity\n");
4852: }
4853: NEXT;
4854: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855: (!ctxt->disableSAX))
4856: ctxt->sax->comment(ctxt->userData, buf);
4857: }
4858: xmlFree(buf);
4859: return;
4860: not_terminated:
4861: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862: "Comment not terminated\n", NULL);
4863: xmlFree(buf);
4864: return;
4865: }
4866:
4867: /**
4868: * xmlParseComment:
4869: * @ctxt: an XML parser context
4870: *
4871: * Skip an XML (SGML) comment <!-- .... -->
4872: * The spec says that "For compatibility, the string "--" (double-hyphen)
4873: * must not occur within comments. "
4874: *
4875: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4876: */
4877: void
4878: xmlParseComment(xmlParserCtxtPtr ctxt) {
4879: xmlChar *buf = NULL;
1.1.1.3 ! misho 4880: size_t size = XML_PARSER_BUFFER_SIZE;
! 4881: size_t len = 0;
1.1 misho 4882: xmlParserInputState state;
4883: const xmlChar *in;
1.1.1.3 ! misho 4884: size_t nbchar = 0;
! 4885: int ccol;
1.1 misho 4886: int inputid;
4887:
4888: /*
4889: * Check that there is a comment right here.
4890: */
4891: if ((RAW != '<') || (NXT(1) != '!') ||
4892: (NXT(2) != '-') || (NXT(3) != '-')) return;
4893: state = ctxt->instate;
4894: ctxt->instate = XML_PARSER_COMMENT;
4895: inputid = ctxt->input->id;
4896: SKIP(4);
4897: SHRINK;
4898: GROW;
4899:
4900: /*
4901: * Accelerated common case where input don't need to be
4902: * modified before passing it to the handler.
4903: */
4904: in = ctxt->input->cur;
4905: do {
4906: if (*in == 0xA) {
4907: do {
4908: ctxt->input->line++; ctxt->input->col = 1;
4909: in++;
4910: } while (*in == 0xA);
4911: }
4912: get_more:
4913: ccol = ctxt->input->col;
4914: while (((*in > '-') && (*in <= 0x7F)) ||
4915: ((*in >= 0x20) && (*in < '-')) ||
4916: (*in == 0x09)) {
4917: in++;
4918: ccol++;
4919: }
4920: ctxt->input->col = ccol;
4921: if (*in == 0xA) {
4922: do {
4923: ctxt->input->line++; ctxt->input->col = 1;
4924: in++;
4925: } while (*in == 0xA);
4926: goto get_more;
4927: }
4928: nbchar = in - ctxt->input->cur;
4929: /*
4930: * save current set of data
4931: */
4932: if (nbchar > 0) {
4933: if ((ctxt->sax != NULL) &&
4934: (ctxt->sax->comment != NULL)) {
4935: if (buf == NULL) {
4936: if ((*in == '-') && (in[1] == '-'))
4937: size = nbchar + 1;
4938: else
4939: size = XML_PARSER_BUFFER_SIZE + nbchar;
4940: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941: if (buf == NULL) {
4942: xmlErrMemory(ctxt, NULL);
4943: ctxt->instate = state;
4944: return;
4945: }
4946: len = 0;
4947: } else if (len + nbchar + 1 >= size) {
4948: xmlChar *new_buf;
4949: size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950: new_buf = (xmlChar *) xmlRealloc(buf,
4951: size * sizeof(xmlChar));
4952: if (new_buf == NULL) {
4953: xmlFree (buf);
4954: xmlErrMemory(ctxt, NULL);
4955: ctxt->instate = state;
4956: return;
4957: }
4958: buf = new_buf;
4959: }
4960: memcpy(&buf[len], ctxt->input->cur, nbchar);
4961: len += nbchar;
4962: buf[len] = 0;
4963: }
4964: }
1.1.1.3 ! misho 4965: if ((len > XML_MAX_TEXT_LENGTH) &&
! 4966: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 4967: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
! 4968: "Comment too big found", NULL);
! 4969: xmlFree (buf);
! 4970: return;
! 4971: }
1.1 misho 4972: ctxt->input->cur = in;
4973: if (*in == 0xA) {
4974: in++;
4975: ctxt->input->line++; ctxt->input->col = 1;
4976: }
4977: if (*in == 0xD) {
4978: in++;
4979: if (*in == 0xA) {
4980: ctxt->input->cur = in;
4981: in++;
4982: ctxt->input->line++; ctxt->input->col = 1;
4983: continue; /* while */
4984: }
4985: in--;
4986: }
4987: SHRINK;
4988: GROW;
1.1.1.3 ! misho 4989: if (ctxt->instate == XML_PARSER_EOF) {
! 4990: xmlFree(buf);
! 4991: return;
! 4992: }
1.1 misho 4993: in = ctxt->input->cur;
4994: if (*in == '-') {
4995: if (in[1] == '-') {
4996: if (in[2] == '>') {
4997: if (ctxt->input->id != inputid) {
4998: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999: "comment doesn't start and stop in the same entity\n");
5000: }
5001: SKIP(3);
5002: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5003: (!ctxt->disableSAX)) {
5004: if (buf != NULL)
5005: ctxt->sax->comment(ctxt->userData, buf);
5006: else
5007: ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5008: }
5009: if (buf != NULL)
5010: xmlFree(buf);
1.1.1.3 ! misho 5011: if (ctxt->instate != XML_PARSER_EOF)
! 5012: ctxt->instate = state;
1.1 misho 5013: return;
5014: }
1.1.1.2 misho 5015: if (buf != NULL) {
5016: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5017: "Double hyphen within comment: "
5018: "<!--%.50s\n",
1.1 misho 5019: buf);
1.1.1.2 misho 5020: } else
5021: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5022: "Double hyphen within comment\n", NULL);
1.1 misho 5023: in++;
5024: ctxt->input->col++;
5025: }
5026: in++;
5027: ctxt->input->col++;
5028: goto get_more;
5029: }
5030: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5031: xmlParseCommentComplex(ctxt, buf, len, size);
5032: ctxt->instate = state;
5033: return;
5034: }
5035:
5036:
5037: /**
5038: * xmlParsePITarget:
5039: * @ctxt: an XML parser context
1.1.1.3 ! misho 5040: *
1.1 misho 5041: * parse the name of a PI
5042: *
5043: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5044: *
5045: * Returns the PITarget name or NULL
5046: */
5047:
5048: const xmlChar *
5049: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5050: const xmlChar *name;
5051:
5052: name = xmlParseName(ctxt);
5053: if ((name != NULL) &&
5054: ((name[0] == 'x') || (name[0] == 'X')) &&
5055: ((name[1] == 'm') || (name[1] == 'M')) &&
5056: ((name[2] == 'l') || (name[2] == 'L'))) {
5057: int i;
5058: if ((name[0] == 'x') && (name[1] == 'm') &&
5059: (name[2] == 'l') && (name[3] == 0)) {
5060: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5061: "XML declaration allowed only at the start of the document\n");
5062: return(name);
5063: } else if (name[3] == 0) {
5064: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5065: return(name);
5066: }
5067: for (i = 0;;i++) {
5068: if (xmlW3CPIs[i] == NULL) break;
5069: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5070: return(name);
5071: }
5072: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5073: "xmlParsePITarget: invalid name prefix 'xml'\n",
5074: NULL, NULL);
5075: }
5076: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
1.1.1.3 ! misho 5077: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5078: "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5079: }
5080: return(name);
5081: }
5082:
5083: #ifdef LIBXML_CATALOG_ENABLED
5084: /**
5085: * xmlParseCatalogPI:
5086: * @ctxt: an XML parser context
5087: * @catalog: the PI value string
1.1.1.3 ! misho 5088: *
1.1 misho 5089: * parse an XML Catalog Processing Instruction.
5090: *
5091: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5092: *
5093: * Occurs only if allowed by the user and if happening in the Misc
5094: * part of the document before any doctype informations
5095: * This will add the given catalog to the parsing context in order
5096: * to be used if there is a resolution need further down in the document
5097: */
5098:
5099: static void
5100: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5101: xmlChar *URL = NULL;
5102: const xmlChar *tmp, *base;
5103: xmlChar marker;
5104:
5105: tmp = catalog;
5106: while (IS_BLANK_CH(*tmp)) tmp++;
5107: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5108: goto error;
5109: tmp += 7;
5110: while (IS_BLANK_CH(*tmp)) tmp++;
5111: if (*tmp != '=') {
5112: return;
5113: }
5114: tmp++;
5115: while (IS_BLANK_CH(*tmp)) tmp++;
5116: marker = *tmp;
5117: if ((marker != '\'') && (marker != '"'))
5118: goto error;
5119: tmp++;
5120: base = tmp;
5121: while ((*tmp != 0) && (*tmp != marker)) tmp++;
5122: if (*tmp == 0)
5123: goto error;
5124: URL = xmlStrndup(base, tmp - base);
5125: tmp++;
5126: while (IS_BLANK_CH(*tmp)) tmp++;
5127: if (*tmp != 0)
5128: goto error;
5129:
5130: if (URL != NULL) {
5131: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5132: xmlFree(URL);
5133: }
5134: return;
5135:
5136: error:
5137: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5138: "Catalog PI syntax error: %s\n",
5139: catalog, NULL);
5140: if (URL != NULL)
5141: xmlFree(URL);
5142: }
5143: #endif
5144:
5145: /**
5146: * xmlParsePI:
5147: * @ctxt: an XML parser context
1.1.1.3 ! misho 5148: *
1.1 misho 5149: * parse an XML Processing Instruction.
5150: *
5151: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5152: *
5153: * The processing is transfered to SAX once parsed.
5154: */
5155:
5156: void
5157: xmlParsePI(xmlParserCtxtPtr ctxt) {
5158: xmlChar *buf = NULL;
1.1.1.3 ! misho 5159: size_t len = 0;
! 5160: size_t size = XML_PARSER_BUFFER_SIZE;
1.1 misho 5161: int cur, l;
5162: const xmlChar *target;
5163: xmlParserInputState state;
5164: int count = 0;
5165:
5166: if ((RAW == '<') && (NXT(1) == '?')) {
5167: xmlParserInputPtr input = ctxt->input;
5168: state = ctxt->instate;
5169: ctxt->instate = XML_PARSER_PI;
5170: /*
5171: * this is a Processing Instruction.
5172: */
5173: SKIP(2);
5174: SHRINK;
5175:
5176: /*
5177: * Parse the target name and check for special support like
5178: * namespace.
5179: */
5180: target = xmlParsePITarget(ctxt);
5181: if (target != NULL) {
5182: if ((RAW == '?') && (NXT(1) == '>')) {
5183: if (input != ctxt->input) {
5184: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5185: "PI declaration doesn't start and stop in the same entity\n");
5186: }
5187: SKIP(2);
5188:
5189: /*
5190: * SAX: PI detected.
5191: */
5192: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5193: (ctxt->sax->processingInstruction != NULL))
5194: ctxt->sax->processingInstruction(ctxt->userData,
5195: target, NULL);
1.1.1.2 misho 5196: if (ctxt->instate != XML_PARSER_EOF)
5197: ctxt->instate = state;
1.1 misho 5198: return;
5199: }
5200: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5201: if (buf == NULL) {
5202: xmlErrMemory(ctxt, NULL);
5203: ctxt->instate = state;
5204: return;
5205: }
5206: cur = CUR;
5207: if (!IS_BLANK(cur)) {
5208: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5209: "ParsePI: PI %s space expected\n", target);
5210: }
5211: SKIP_BLANKS;
5212: cur = CUR_CHAR(l);
5213: while (IS_CHAR(cur) && /* checked */
5214: ((cur != '?') || (NXT(1) != '>'))) {
5215: if (len + 5 >= size) {
5216: xmlChar *tmp;
1.1.1.3 ! misho 5217: size_t new_size = size * 2;
! 5218: tmp = (xmlChar *) xmlRealloc(buf, new_size);
1.1 misho 5219: if (tmp == NULL) {
5220: xmlErrMemory(ctxt, NULL);
5221: xmlFree(buf);
5222: ctxt->instate = state;
5223: return;
5224: }
5225: buf = tmp;
1.1.1.3 ! misho 5226: size = new_size;
1.1 misho 5227: }
5228: count++;
5229: if (count > 50) {
5230: GROW;
1.1.1.3 ! misho 5231: if (ctxt->instate == XML_PARSER_EOF) {
! 5232: xmlFree(buf);
! 5233: return;
! 5234: }
1.1 misho 5235: count = 0;
1.1.1.3 ! misho 5236: if ((len > XML_MAX_TEXT_LENGTH) &&
! 5237: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 5238: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
! 5239: "PI %s too big found", target);
! 5240: xmlFree(buf);
! 5241: ctxt->instate = state;
! 5242: return;
! 5243: }
1.1 misho 5244: }
5245: COPY_BUF(l,buf,len,cur);
5246: NEXTL(l);
5247: cur = CUR_CHAR(l);
5248: if (cur == 0) {
5249: SHRINK;
5250: GROW;
5251: cur = CUR_CHAR(l);
5252: }
5253: }
1.1.1.3 ! misho 5254: if ((len > XML_MAX_TEXT_LENGTH) &&
! 5255: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 5256: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
! 5257: "PI %s too big found", target);
! 5258: xmlFree(buf);
! 5259: ctxt->instate = state;
! 5260: return;
! 5261: }
1.1 misho 5262: buf[len] = 0;
5263: if (cur != '?') {
5264: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265: "ParsePI: PI %s never end ...\n", target);
5266: } else {
5267: if (input != ctxt->input) {
5268: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5269: "PI declaration doesn't start and stop in the same entity\n");
5270: }
5271: SKIP(2);
5272:
5273: #ifdef LIBXML_CATALOG_ENABLED
5274: if (((state == XML_PARSER_MISC) ||
5275: (state == XML_PARSER_START)) &&
5276: (xmlStrEqual(target, XML_CATALOG_PI))) {
5277: xmlCatalogAllow allow = xmlCatalogGetDefaults();
5278: if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5279: (allow == XML_CATA_ALLOW_ALL))
5280: xmlParseCatalogPI(ctxt, buf);
5281: }
5282: #endif
5283:
5284:
5285: /*
5286: * SAX: PI detected.
5287: */
5288: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5289: (ctxt->sax->processingInstruction != NULL))
5290: ctxt->sax->processingInstruction(ctxt->userData,
5291: target, buf);
5292: }
5293: xmlFree(buf);
5294: } else {
5295: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5296: }
1.1.1.2 misho 5297: if (ctxt->instate != XML_PARSER_EOF)
5298: ctxt->instate = state;
1.1 misho 5299: }
5300: }
5301:
5302: /**
5303: * xmlParseNotationDecl:
5304: * @ctxt: an XML parser context
5305: *
5306: * parse a notation declaration
5307: *
5308: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5309: *
5310: * Hence there is actually 3 choices:
5311: * 'PUBLIC' S PubidLiteral
5312: * 'PUBLIC' S PubidLiteral S SystemLiteral
5313: * and 'SYSTEM' S SystemLiteral
5314: *
5315: * See the NOTE on xmlParseExternalID().
5316: */
5317:
5318: void
5319: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5320: const xmlChar *name;
5321: xmlChar *Pubid;
5322: xmlChar *Systemid;
1.1.1.3 ! misho 5323:
1.1 misho 5324: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5325: xmlParserInputPtr input = ctxt->input;
5326: SHRINK;
5327: SKIP(10);
5328: if (!IS_BLANK_CH(CUR)) {
5329: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5330: "Space required after '<!NOTATION'\n");
5331: return;
5332: }
5333: SKIP_BLANKS;
5334:
5335: name = xmlParseName(ctxt);
5336: if (name == NULL) {
5337: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5338: return;
5339: }
5340: if (!IS_BLANK_CH(CUR)) {
5341: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342: "Space required after the NOTATION name'\n");
5343: return;
5344: }
5345: if (xmlStrchr(name, ':') != NULL) {
1.1.1.3 ! misho 5346: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5347: "colon are forbidden from notation names '%s'\n",
5348: name, NULL, NULL);
5349: }
5350: SKIP_BLANKS;
5351:
5352: /*
5353: * Parse the IDs.
5354: */
5355: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5356: SKIP_BLANKS;
5357:
5358: if (RAW == '>') {
5359: if (input != ctxt->input) {
5360: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5361: "Notation declaration doesn't start and stop in the same entity\n");
5362: }
5363: NEXT;
5364: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5365: (ctxt->sax->notationDecl != NULL))
5366: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5367: } else {
5368: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5369: }
5370: if (Systemid != NULL) xmlFree(Systemid);
5371: if (Pubid != NULL) xmlFree(Pubid);
5372: }
5373: }
5374:
5375: /**
5376: * xmlParseEntityDecl:
5377: * @ctxt: an XML parser context
5378: *
5379: * parse <!ENTITY declarations
5380: *
5381: * [70] EntityDecl ::= GEDecl | PEDecl
5382: *
5383: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5384: *
5385: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5386: *
5387: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5388: *
5389: * [74] PEDef ::= EntityValue | ExternalID
5390: *
5391: * [76] NDataDecl ::= S 'NDATA' S Name
5392: *
5393: * [ VC: Notation Declared ]
5394: * The Name must match the declared name of a notation.
5395: */
5396:
5397: void
5398: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5399: const xmlChar *name = NULL;
5400: xmlChar *value = NULL;
5401: xmlChar *URI = NULL, *literal = NULL;
5402: const xmlChar *ndata = NULL;
5403: int isParameter = 0;
5404: xmlChar *orig = NULL;
5405: int skipped;
1.1.1.3 ! misho 5406:
1.1 misho 5407: /* GROW; done in the caller */
5408: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5409: xmlParserInputPtr input = ctxt->input;
5410: SHRINK;
5411: SKIP(8);
5412: skipped = SKIP_BLANKS;
5413: if (skipped == 0) {
5414: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5415: "Space required after '<!ENTITY'\n");
5416: }
5417:
5418: if (RAW == '%') {
5419: NEXT;
5420: skipped = SKIP_BLANKS;
5421: if (skipped == 0) {
5422: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423: "Space required after '%'\n");
5424: }
5425: isParameter = 1;
5426: }
5427:
5428: name = xmlParseName(ctxt);
5429: if (name == NULL) {
5430: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5431: "xmlParseEntityDecl: no name\n");
5432: return;
5433: }
5434: if (xmlStrchr(name, ':') != NULL) {
1.1.1.3 ! misho 5435: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5436: "colon are forbidden from entities names '%s'\n",
5437: name, NULL, NULL);
5438: }
5439: skipped = SKIP_BLANKS;
5440: if (skipped == 0) {
5441: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442: "Space required after the entity name\n");
5443: }
5444:
5445: ctxt->instate = XML_PARSER_ENTITY_DECL;
5446: /*
5447: * handle the various case of definitions...
5448: */
5449: if (isParameter) {
5450: if ((RAW == '"') || (RAW == '\'')) {
5451: value = xmlParseEntityValue(ctxt, &orig);
5452: if (value) {
5453: if ((ctxt->sax != NULL) &&
5454: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5455: ctxt->sax->entityDecl(ctxt->userData, name,
5456: XML_INTERNAL_PARAMETER_ENTITY,
5457: NULL, NULL, value);
5458: }
5459: } else {
5460: URI = xmlParseExternalID(ctxt, &literal, 1);
5461: if ((URI == NULL) && (literal == NULL)) {
5462: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5463: }
5464: if (URI) {
5465: xmlURIPtr uri;
5466:
5467: uri = xmlParseURI((const char *) URI);
5468: if (uri == NULL) {
5469: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5470: "Invalid URI: %s\n", URI);
5471: /*
5472: * This really ought to be a well formedness error
5473: * but the XML Core WG decided otherwise c.f. issue
5474: * E26 of the XML erratas.
5475: */
5476: } else {
5477: if (uri->fragment != NULL) {
5478: /*
5479: * Okay this is foolish to block those but not
5480: * invalid URIs.
5481: */
5482: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5483: } else {
5484: if ((ctxt->sax != NULL) &&
5485: (!ctxt->disableSAX) &&
5486: (ctxt->sax->entityDecl != NULL))
5487: ctxt->sax->entityDecl(ctxt->userData, name,
5488: XML_EXTERNAL_PARAMETER_ENTITY,
5489: literal, URI, NULL);
5490: }
5491: xmlFreeURI(uri);
5492: }
5493: }
5494: }
5495: } else {
5496: if ((RAW == '"') || (RAW == '\'')) {
5497: value = xmlParseEntityValue(ctxt, &orig);
5498: if ((ctxt->sax != NULL) &&
5499: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5500: ctxt->sax->entityDecl(ctxt->userData, name,
5501: XML_INTERNAL_GENERAL_ENTITY,
5502: NULL, NULL, value);
5503: /*
5504: * For expat compatibility in SAX mode.
5505: */
5506: if ((ctxt->myDoc == NULL) ||
5507: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5508: if (ctxt->myDoc == NULL) {
5509: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5510: if (ctxt->myDoc == NULL) {
5511: xmlErrMemory(ctxt, "New Doc failed");
5512: return;
5513: }
5514: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5515: }
5516: if (ctxt->myDoc->intSubset == NULL)
5517: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5518: BAD_CAST "fake", NULL, NULL);
5519:
5520: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521: NULL, NULL, value);
5522: }
5523: } else {
5524: URI = xmlParseExternalID(ctxt, &literal, 1);
5525: if ((URI == NULL) && (literal == NULL)) {
5526: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5527: }
5528: if (URI) {
5529: xmlURIPtr uri;
5530:
5531: uri = xmlParseURI((const char *)URI);
5532: if (uri == NULL) {
5533: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5534: "Invalid URI: %s\n", URI);
5535: /*
5536: * This really ought to be a well formedness error
5537: * but the XML Core WG decided otherwise c.f. issue
5538: * E26 of the XML erratas.
5539: */
5540: } else {
5541: if (uri->fragment != NULL) {
5542: /*
5543: * Okay this is foolish to block those but not
5544: * invalid URIs.
5545: */
5546: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5547: }
5548: xmlFreeURI(uri);
5549: }
5550: }
5551: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5552: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5553: "Space required before 'NDATA'\n");
5554: }
5555: SKIP_BLANKS;
5556: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5557: SKIP(5);
5558: if (!IS_BLANK_CH(CUR)) {
5559: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560: "Space required after 'NDATA'\n");
5561: }
5562: SKIP_BLANKS;
5563: ndata = xmlParseName(ctxt);
5564: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5565: (ctxt->sax->unparsedEntityDecl != NULL))
5566: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5567: literal, URI, ndata);
5568: } else {
5569: if ((ctxt->sax != NULL) &&
5570: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5571: ctxt->sax->entityDecl(ctxt->userData, name,
5572: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5573: literal, URI, NULL);
5574: /*
5575: * For expat compatibility in SAX mode.
5576: * assuming the entity repalcement was asked for
5577: */
5578: if ((ctxt->replaceEntities != 0) &&
5579: ((ctxt->myDoc == NULL) ||
5580: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5581: if (ctxt->myDoc == NULL) {
5582: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5583: if (ctxt->myDoc == NULL) {
5584: xmlErrMemory(ctxt, "New Doc failed");
5585: return;
5586: }
5587: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5588: }
5589:
5590: if (ctxt->myDoc->intSubset == NULL)
5591: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5592: BAD_CAST "fake", NULL, NULL);
5593: xmlSAX2EntityDecl(ctxt, name,
5594: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5595: literal, URI, NULL);
5596: }
5597: }
5598: }
5599: }
1.1.1.3 ! misho 5600: if (ctxt->instate == XML_PARSER_EOF)
! 5601: return;
1.1 misho 5602: SKIP_BLANKS;
5603: if (RAW != '>') {
5604: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5605: "xmlParseEntityDecl: entity %s not terminated\n", name);
5606: } else {
5607: if (input != ctxt->input) {
5608: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609: "Entity declaration doesn't start and stop in the same entity\n");
5610: }
5611: NEXT;
5612: }
5613: if (orig != NULL) {
5614: /*
5615: * Ugly mechanism to save the raw entity value.
5616: */
5617: xmlEntityPtr cur = NULL;
5618:
5619: if (isParameter) {
5620: if ((ctxt->sax != NULL) &&
5621: (ctxt->sax->getParameterEntity != NULL))
5622: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5623: } else {
5624: if ((ctxt->sax != NULL) &&
5625: (ctxt->sax->getEntity != NULL))
5626: cur = ctxt->sax->getEntity(ctxt->userData, name);
5627: if ((cur == NULL) && (ctxt->userData==ctxt)) {
5628: cur = xmlSAX2GetEntity(ctxt, name);
5629: }
5630: }
5631: if (cur != NULL) {
5632: if (cur->orig != NULL)
5633: xmlFree(orig);
5634: else
5635: cur->orig = orig;
5636: } else
5637: xmlFree(orig);
5638: }
5639: if (value != NULL) xmlFree(value);
5640: if (URI != NULL) xmlFree(URI);
5641: if (literal != NULL) xmlFree(literal);
5642: }
5643: }
5644:
5645: /**
5646: * xmlParseDefaultDecl:
5647: * @ctxt: an XML parser context
5648: * @value: Receive a possible fixed default value for the attribute
5649: *
5650: * Parse an attribute default declaration
5651: *
5652: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5653: *
5654: * [ VC: Required Attribute ]
5655: * if the default declaration is the keyword #REQUIRED, then the
5656: * attribute must be specified for all elements of the type in the
5657: * attribute-list declaration.
5658: *
5659: * [ VC: Attribute Default Legal ]
5660: * The declared default value must meet the lexical constraints of
5661: * the declared attribute type c.f. xmlValidateAttributeDecl()
5662: *
5663: * [ VC: Fixed Attribute Default ]
5664: * if an attribute has a default value declared with the #FIXED
1.1.1.3 ! misho 5665: * keyword, instances of that attribute must match the default value.
1.1 misho 5666: *
5667: * [ WFC: No < in Attribute Values ]
5668: * handled in xmlParseAttValue()
5669: *
5670: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
1.1.1.3 ! misho 5671: * or XML_ATTRIBUTE_FIXED.
1.1 misho 5672: */
5673:
5674: int
5675: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5676: int val;
5677: xmlChar *ret;
5678:
5679: *value = NULL;
5680: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5681: SKIP(9);
5682: return(XML_ATTRIBUTE_REQUIRED);
5683: }
5684: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5685: SKIP(8);
5686: return(XML_ATTRIBUTE_IMPLIED);
5687: }
5688: val = XML_ATTRIBUTE_NONE;
5689: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5690: SKIP(6);
5691: val = XML_ATTRIBUTE_FIXED;
5692: if (!IS_BLANK_CH(CUR)) {
5693: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5694: "Space required after '#FIXED'\n");
5695: }
5696: SKIP_BLANKS;
5697: }
5698: ret = xmlParseAttValue(ctxt);
5699: ctxt->instate = XML_PARSER_DTD;
5700: if (ret == NULL) {
5701: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5702: "Attribute default value declaration error\n");
5703: } else
5704: *value = ret;
5705: return(val);
5706: }
5707:
5708: /**
5709: * xmlParseNotationType:
5710: * @ctxt: an XML parser context
5711: *
5712: * parse an Notation attribute type.
5713: *
5714: * Note: the leading 'NOTATION' S part has already being parsed...
5715: *
5716: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5717: *
5718: * [ VC: Notation Attributes ]
5719: * Values of this type must match one of the notation names included
1.1.1.3 ! misho 5720: * in the declaration; all notation names in the declaration must be declared.
1.1 misho 5721: *
5722: * Returns: the notation attribute tree built while parsing
5723: */
5724:
5725: xmlEnumerationPtr
5726: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5727: const xmlChar *name;
5728: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5729:
5730: if (RAW != '(') {
5731: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5732: return(NULL);
5733: }
5734: SHRINK;
5735: do {
5736: NEXT;
5737: SKIP_BLANKS;
5738: name = xmlParseName(ctxt);
5739: if (name == NULL) {
5740: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741: "Name expected in NOTATION declaration\n");
5742: xmlFreeEnumeration(ret);
5743: return(NULL);
5744: }
5745: tmp = ret;
5746: while (tmp != NULL) {
5747: if (xmlStrEqual(name, tmp->name)) {
5748: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749: "standalone: attribute notation value token %s duplicated\n",
5750: name, NULL);
5751: if (!xmlDictOwns(ctxt->dict, name))
5752: xmlFree((xmlChar *) name);
5753: break;
5754: }
5755: tmp = tmp->next;
5756: }
5757: if (tmp == NULL) {
5758: cur = xmlCreateEnumeration(name);
5759: if (cur == NULL) {
5760: xmlFreeEnumeration(ret);
5761: return(NULL);
5762: }
5763: if (last == NULL) ret = last = cur;
5764: else {
5765: last->next = cur;
5766: last = cur;
5767: }
5768: }
5769: SKIP_BLANKS;
5770: } while (RAW == '|');
5771: if (RAW != ')') {
5772: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5773: xmlFreeEnumeration(ret);
5774: return(NULL);
5775: }
5776: NEXT;
5777: return(ret);
5778: }
5779:
5780: /**
5781: * xmlParseEnumerationType:
5782: * @ctxt: an XML parser context
5783: *
5784: * parse an Enumeration attribute type.
5785: *
5786: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5787: *
5788: * [ VC: Enumeration ]
5789: * Values of this type must match one of the Nmtoken tokens in
5790: * the declaration
5791: *
5792: * Returns: the enumeration attribute tree built while parsing
5793: */
5794:
5795: xmlEnumerationPtr
5796: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5797: xmlChar *name;
5798: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5799:
5800: if (RAW != '(') {
5801: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5802: return(NULL);
5803: }
5804: SHRINK;
5805: do {
5806: NEXT;
5807: SKIP_BLANKS;
5808: name = xmlParseNmtoken(ctxt);
5809: if (name == NULL) {
5810: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5811: return(ret);
5812: }
5813: tmp = ret;
5814: while (tmp != NULL) {
5815: if (xmlStrEqual(name, tmp->name)) {
5816: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817: "standalone: attribute enumeration value token %s duplicated\n",
5818: name, NULL);
5819: if (!xmlDictOwns(ctxt->dict, name))
5820: xmlFree(name);
5821: break;
5822: }
5823: tmp = tmp->next;
5824: }
5825: if (tmp == NULL) {
5826: cur = xmlCreateEnumeration(name);
5827: if (!xmlDictOwns(ctxt->dict, name))
5828: xmlFree(name);
5829: if (cur == NULL) {
5830: xmlFreeEnumeration(ret);
5831: return(NULL);
5832: }
5833: if (last == NULL) ret = last = cur;
5834: else {
5835: last->next = cur;
5836: last = cur;
5837: }
5838: }
5839: SKIP_BLANKS;
5840: } while (RAW == '|');
5841: if (RAW != ')') {
5842: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5843: return(ret);
5844: }
5845: NEXT;
5846: return(ret);
5847: }
5848:
5849: /**
5850: * xmlParseEnumeratedType:
5851: * @ctxt: an XML parser context
5852: * @tree: the enumeration tree built while parsing
5853: *
5854: * parse an Enumerated attribute type.
5855: *
5856: * [57] EnumeratedType ::= NotationType | Enumeration
5857: *
5858: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5859: *
5860: *
5861: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5862: */
5863:
5864: int
5865: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5866: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5867: SKIP(8);
5868: if (!IS_BLANK_CH(CUR)) {
5869: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870: "Space required after 'NOTATION'\n");
5871: return(0);
5872: }
5873: SKIP_BLANKS;
5874: *tree = xmlParseNotationType(ctxt);
5875: if (*tree == NULL) return(0);
5876: return(XML_ATTRIBUTE_NOTATION);
5877: }
5878: *tree = xmlParseEnumerationType(ctxt);
5879: if (*tree == NULL) return(0);
5880: return(XML_ATTRIBUTE_ENUMERATION);
5881: }
5882:
5883: /**
5884: * xmlParseAttributeType:
5885: * @ctxt: an XML parser context
5886: * @tree: the enumeration tree built while parsing
5887: *
5888: * parse the Attribute list def for an element
5889: *
5890: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5891: *
5892: * [55] StringType ::= 'CDATA'
5893: *
5894: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5895: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5896: *
5897: * Validity constraints for attribute values syntax are checked in
5898: * xmlValidateAttributeValue()
5899: *
5900: * [ VC: ID ]
5901: * Values of type ID must match the Name production. A name must not
5902: * appear more than once in an XML document as a value of this type;
5903: * i.e., ID values must uniquely identify the elements which bear them.
5904: *
5905: * [ VC: One ID per Element Type ]
5906: * No element type may have more than one ID attribute specified.
5907: *
5908: * [ VC: ID Attribute Default ]
5909: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5910: *
5911: * [ VC: IDREF ]
5912: * Values of type IDREF must match the Name production, and values
5913: * of type IDREFS must match Names; each IDREF Name must match the value
5914: * of an ID attribute on some element in the XML document; i.e. IDREF
5915: * values must match the value of some ID attribute.
5916: *
5917: * [ VC: Entity Name ]
5918: * Values of type ENTITY must match the Name production, values
5919: * of type ENTITIES must match Names; each Entity Name must match the
1.1.1.3 ! misho 5920: * name of an unparsed entity declared in the DTD.
1.1 misho 5921: *
5922: * [ VC: Name Token ]
5923: * Values of type NMTOKEN must match the Nmtoken production; values
1.1.1.3 ! misho 5924: * of type NMTOKENS must match Nmtokens.
1.1 misho 5925: *
5926: * Returns the attribute type
5927: */
1.1.1.3 ! misho 5928: int
1.1 misho 5929: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5930: SHRINK;
5931: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5932: SKIP(5);
5933: return(XML_ATTRIBUTE_CDATA);
5934: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5935: SKIP(6);
5936: return(XML_ATTRIBUTE_IDREFS);
5937: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5938: SKIP(5);
5939: return(XML_ATTRIBUTE_IDREF);
5940: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5941: SKIP(2);
5942: return(XML_ATTRIBUTE_ID);
5943: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5944: SKIP(6);
5945: return(XML_ATTRIBUTE_ENTITY);
5946: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5947: SKIP(8);
5948: return(XML_ATTRIBUTE_ENTITIES);
5949: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5950: SKIP(8);
5951: return(XML_ATTRIBUTE_NMTOKENS);
5952: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5953: SKIP(7);
5954: return(XML_ATTRIBUTE_NMTOKEN);
5955: }
5956: return(xmlParseEnumeratedType(ctxt, tree));
5957: }
5958:
5959: /**
5960: * xmlParseAttributeListDecl:
5961: * @ctxt: an XML parser context
5962: *
5963: * : parse the Attribute list def for an element
5964: *
5965: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5966: *
5967: * [53] AttDef ::= S Name S AttType S DefaultDecl
5968: *
5969: */
5970: void
5971: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5972: const xmlChar *elemName;
5973: const xmlChar *attrName;
5974: xmlEnumerationPtr tree;
5975:
5976: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5977: xmlParserInputPtr input = ctxt->input;
5978:
5979: SKIP(9);
5980: if (!IS_BLANK_CH(CUR)) {
5981: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982: "Space required after '<!ATTLIST'\n");
5983: }
5984: SKIP_BLANKS;
5985: elemName = xmlParseName(ctxt);
5986: if (elemName == NULL) {
5987: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988: "ATTLIST: no name for Element\n");
5989: return;
5990: }
5991: SKIP_BLANKS;
5992: GROW;
1.1.1.3 ! misho 5993: while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 5994: const xmlChar *check = CUR_PTR;
5995: int type;
5996: int def;
5997: xmlChar *defaultValue = NULL;
5998:
5999: GROW;
6000: tree = NULL;
6001: attrName = xmlParseName(ctxt);
6002: if (attrName == NULL) {
6003: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6004: "ATTLIST: no name for Attribute\n");
6005: break;
6006: }
6007: GROW;
6008: if (!IS_BLANK_CH(CUR)) {
6009: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6010: "Space required after the attribute name\n");
6011: break;
6012: }
6013: SKIP_BLANKS;
6014:
6015: type = xmlParseAttributeType(ctxt, &tree);
6016: if (type <= 0) {
6017: break;
6018: }
6019:
6020: GROW;
6021: if (!IS_BLANK_CH(CUR)) {
6022: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023: "Space required after the attribute type\n");
6024: if (tree != NULL)
6025: xmlFreeEnumeration(tree);
6026: break;
6027: }
6028: SKIP_BLANKS;
6029:
6030: def = xmlParseDefaultDecl(ctxt, &defaultValue);
6031: if (def <= 0) {
6032: if (defaultValue != NULL)
6033: xmlFree(defaultValue);
6034: if (tree != NULL)
6035: xmlFreeEnumeration(tree);
6036: break;
6037: }
6038: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6039: xmlAttrNormalizeSpace(defaultValue, defaultValue);
6040:
6041: GROW;
6042: if (RAW != '>') {
6043: if (!IS_BLANK_CH(CUR)) {
6044: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6045: "Space required after the attribute default value\n");
6046: if (defaultValue != NULL)
6047: xmlFree(defaultValue);
6048: if (tree != NULL)
6049: xmlFreeEnumeration(tree);
6050: break;
6051: }
6052: SKIP_BLANKS;
6053: }
6054: if (check == CUR_PTR) {
6055: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6056: "in xmlParseAttributeListDecl\n");
6057: if (defaultValue != NULL)
6058: xmlFree(defaultValue);
6059: if (tree != NULL)
6060: xmlFreeEnumeration(tree);
6061: break;
6062: }
6063: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6064: (ctxt->sax->attributeDecl != NULL))
6065: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6066: type, def, defaultValue, tree);
6067: else if (tree != NULL)
6068: xmlFreeEnumeration(tree);
6069:
6070: if ((ctxt->sax2) && (defaultValue != NULL) &&
1.1.1.3 ! misho 6071: (def != XML_ATTRIBUTE_IMPLIED) &&
1.1 misho 6072: (def != XML_ATTRIBUTE_REQUIRED)) {
6073: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6074: }
6075: if (ctxt->sax2) {
6076: xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6077: }
6078: if (defaultValue != NULL)
6079: xmlFree(defaultValue);
6080: GROW;
6081: }
6082: if (RAW == '>') {
6083: if (input != ctxt->input) {
6084: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6085: "Attribute list declaration doesn't start and stop in the same entity\n",
6086: NULL, NULL);
6087: }
6088: NEXT;
6089: }
6090: }
6091: }
6092:
6093: /**
6094: * xmlParseElementMixedContentDecl:
6095: * @ctxt: an XML parser context
6096: * @inputchk: the input used for the current entity, needed for boundary checks
6097: *
6098: * parse the declaration for a Mixed Element content
6099: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.1.1.3 ! misho 6100: *
1.1 misho 6101: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6102: * '(' S? '#PCDATA' S? ')'
6103: *
6104: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6105: *
6106: * [ VC: No Duplicate Types ]
6107: * The same name must not appear more than once in a single
1.1.1.3 ! misho 6108: * mixed-content declaration.
1.1 misho 6109: *
6110: * returns: the list of the xmlElementContentPtr describing the element choices
6111: */
6112: xmlElementContentPtr
6113: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6114: xmlElementContentPtr ret = NULL, cur = NULL, n;
6115: const xmlChar *elem = NULL;
6116:
6117: GROW;
6118: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6119: SKIP(7);
6120: SKIP_BLANKS;
6121: SHRINK;
6122: if (RAW == ')') {
6123: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6124: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6125: "Element content declaration doesn't start and stop in the same entity\n",
6126: NULL, NULL);
6127: }
6128: NEXT;
6129: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6130: if (ret == NULL)
6131: return(NULL);
6132: if (RAW == '*') {
6133: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6134: NEXT;
6135: }
6136: return(ret);
6137: }
6138: if ((RAW == '(') || (RAW == '|')) {
6139: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6140: if (ret == NULL) return(NULL);
6141: }
1.1.1.3 ! misho 6142: while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6143: NEXT;
6144: if (elem == NULL) {
6145: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6146: if (ret == NULL) return(NULL);
6147: ret->c1 = cur;
6148: if (cur != NULL)
6149: cur->parent = ret;
6150: cur = ret;
6151: } else {
6152: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6153: if (n == NULL) return(NULL);
6154: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6155: if (n->c1 != NULL)
6156: n->c1->parent = n;
6157: cur->c2 = n;
6158: if (n != NULL)
6159: n->parent = cur;
6160: cur = n;
6161: }
6162: SKIP_BLANKS;
6163: elem = xmlParseName(ctxt);
6164: if (elem == NULL) {
6165: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6166: "xmlParseElementMixedContentDecl : Name expected\n");
6167: xmlFreeDocElementContent(ctxt->myDoc, cur);
6168: return(NULL);
6169: }
6170: SKIP_BLANKS;
6171: GROW;
6172: }
6173: if ((RAW == ')') && (NXT(1) == '*')) {
6174: if (elem != NULL) {
6175: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6176: XML_ELEMENT_CONTENT_ELEMENT);
6177: if (cur->c2 != NULL)
6178: cur->c2->parent = cur;
6179: }
6180: if (ret != NULL)
6181: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6182: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6183: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6184: "Element content declaration doesn't start and stop in the same entity\n",
6185: NULL, NULL);
6186: }
6187: SKIP(2);
6188: } else {
6189: xmlFreeDocElementContent(ctxt->myDoc, ret);
6190: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6191: return(NULL);
6192: }
6193:
6194: } else {
6195: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6196: }
6197: return(ret);
6198: }
6199:
6200: /**
6201: * xmlParseElementChildrenContentDeclPriv:
6202: * @ctxt: an XML parser context
6203: * @inputchk: the input used for the current entity, needed for boundary checks
6204: * @depth: the level of recursion
6205: *
6206: * parse the declaration for a Mixed Element content
6207: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.1.1.3 ! misho 6208: *
1.1 misho 6209: *
6210: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6211: *
6212: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6213: *
6214: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6215: *
6216: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6217: *
6218: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6219: * TODO Parameter-entity replacement text must be properly nested
6220: * with parenthesized groups. That is to say, if either of the
6221: * opening or closing parentheses in a choice, seq, or Mixed
6222: * construct is contained in the replacement text for a parameter
6223: * entity, both must be contained in the same replacement text. For
6224: * interoperability, if a parameter-entity reference appears in a
6225: * choice, seq, or Mixed construct, its replacement text should not
6226: * be empty, and neither the first nor last non-blank character of
6227: * the replacement text should be a connector (| or ,).
6228: *
1.1.1.3 ! misho 6229: * Returns the tree of xmlElementContentPtr describing the element
1.1 misho 6230: * hierarchy.
6231: */
6232: static xmlElementContentPtr
6233: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6234: int depth) {
6235: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6236: const xmlChar *elem;
6237: xmlChar type = 0;
6238:
6239: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6240: (depth > 2048)) {
6241: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6242: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6243: depth);
6244: return(NULL);
6245: }
6246: SKIP_BLANKS;
6247: GROW;
6248: if (RAW == '(') {
6249: int inputid = ctxt->input->id;
6250:
6251: /* Recurse on first child */
6252: NEXT;
6253: SKIP_BLANKS;
6254: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6255: depth + 1);
6256: SKIP_BLANKS;
6257: GROW;
6258: } else {
6259: elem = xmlParseName(ctxt);
6260: if (elem == NULL) {
6261: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6262: return(NULL);
6263: }
6264: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6265: if (cur == NULL) {
6266: xmlErrMemory(ctxt, NULL);
6267: return(NULL);
6268: }
6269: GROW;
6270: if (RAW == '?') {
6271: cur->ocur = XML_ELEMENT_CONTENT_OPT;
6272: NEXT;
6273: } else if (RAW == '*') {
6274: cur->ocur = XML_ELEMENT_CONTENT_MULT;
6275: NEXT;
6276: } else if (RAW == '+') {
6277: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6278: NEXT;
6279: } else {
6280: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6281: }
6282: GROW;
6283: }
6284: SKIP_BLANKS;
6285: SHRINK;
1.1.1.3 ! misho 6286: while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6287: /*
6288: * Each loop we parse one separator and one element.
6289: */
6290: if (RAW == ',') {
6291: if (type == 0) type = CUR;
6292:
6293: /*
6294: * Detect "Name | Name , Name" error
6295: */
6296: else if (type != CUR) {
6297: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6298: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6299: type);
6300: if ((last != NULL) && (last != ret))
6301: xmlFreeDocElementContent(ctxt->myDoc, last);
6302: if (ret != NULL)
6303: xmlFreeDocElementContent(ctxt->myDoc, ret);
6304: return(NULL);
6305: }
6306: NEXT;
6307:
6308: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6309: if (op == NULL) {
6310: if ((last != NULL) && (last != ret))
6311: xmlFreeDocElementContent(ctxt->myDoc, last);
6312: xmlFreeDocElementContent(ctxt->myDoc, ret);
6313: return(NULL);
6314: }
6315: if (last == NULL) {
6316: op->c1 = ret;
6317: if (ret != NULL)
6318: ret->parent = op;
6319: ret = cur = op;
6320: } else {
6321: cur->c2 = op;
6322: if (op != NULL)
6323: op->parent = cur;
6324: op->c1 = last;
6325: if (last != NULL)
6326: last->parent = op;
6327: cur =op;
6328: last = NULL;
6329: }
6330: } else if (RAW == '|') {
6331: if (type == 0) type = CUR;
6332:
6333: /*
6334: * Detect "Name , Name | Name" error
6335: */
6336: else if (type != CUR) {
6337: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339: type);
6340: if ((last != NULL) && (last != ret))
6341: xmlFreeDocElementContent(ctxt->myDoc, last);
6342: if (ret != NULL)
6343: xmlFreeDocElementContent(ctxt->myDoc, ret);
6344: return(NULL);
6345: }
6346: NEXT;
6347:
6348: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6349: if (op == NULL) {
6350: if ((last != NULL) && (last != ret))
6351: xmlFreeDocElementContent(ctxt->myDoc, last);
6352: if (ret != NULL)
6353: xmlFreeDocElementContent(ctxt->myDoc, ret);
6354: return(NULL);
6355: }
6356: if (last == NULL) {
6357: op->c1 = ret;
6358: if (ret != NULL)
6359: ret->parent = op;
6360: ret = cur = op;
6361: } else {
6362: cur->c2 = op;
6363: if (op != NULL)
6364: op->parent = cur;
6365: op->c1 = last;
6366: if (last != NULL)
6367: last->parent = op;
6368: cur =op;
6369: last = NULL;
6370: }
6371: } else {
6372: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6373: if ((last != NULL) && (last != ret))
6374: xmlFreeDocElementContent(ctxt->myDoc, last);
6375: if (ret != NULL)
6376: xmlFreeDocElementContent(ctxt->myDoc, ret);
6377: return(NULL);
6378: }
6379: GROW;
6380: SKIP_BLANKS;
6381: GROW;
6382: if (RAW == '(') {
6383: int inputid = ctxt->input->id;
6384: /* Recurse on second child */
6385: NEXT;
6386: SKIP_BLANKS;
6387: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6388: depth + 1);
6389: SKIP_BLANKS;
6390: } else {
6391: elem = xmlParseName(ctxt);
6392: if (elem == NULL) {
6393: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6394: if (ret != NULL)
6395: xmlFreeDocElementContent(ctxt->myDoc, ret);
6396: return(NULL);
6397: }
6398: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6399: if (last == NULL) {
6400: if (ret != NULL)
6401: xmlFreeDocElementContent(ctxt->myDoc, ret);
6402: return(NULL);
6403: }
6404: if (RAW == '?') {
6405: last->ocur = XML_ELEMENT_CONTENT_OPT;
6406: NEXT;
6407: } else if (RAW == '*') {
6408: last->ocur = XML_ELEMENT_CONTENT_MULT;
6409: NEXT;
6410: } else if (RAW == '+') {
6411: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6412: NEXT;
6413: } else {
6414: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6415: }
6416: }
6417: SKIP_BLANKS;
6418: GROW;
6419: }
6420: if ((cur != NULL) && (last != NULL)) {
6421: cur->c2 = last;
6422: if (last != NULL)
6423: last->parent = cur;
6424: }
6425: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6426: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6427: "Element content declaration doesn't start and stop in the same entity\n",
6428: NULL, NULL);
6429: }
6430: NEXT;
6431: if (RAW == '?') {
6432: if (ret != NULL) {
6433: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6434: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6435: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6436: else
6437: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6438: }
6439: NEXT;
6440: } else if (RAW == '*') {
6441: if (ret != NULL) {
6442: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6443: cur = ret;
6444: /*
6445: * Some normalization:
6446: * (a | b* | c?)* == (a | b | c)*
6447: */
6448: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6449: if ((cur->c1 != NULL) &&
6450: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6452: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6453: if ((cur->c2 != NULL) &&
6454: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6456: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6457: cur = cur->c2;
6458: }
6459: }
6460: NEXT;
6461: } else if (RAW == '+') {
6462: if (ret != NULL) {
6463: int found = 0;
6464:
6465: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6467: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468: else
6469: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6470: /*
6471: * Some normalization:
6472: * (a | b*)+ == (a | b)*
6473: * (a | b?)+ == (a | b)*
6474: */
6475: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6476: if ((cur->c1 != NULL) &&
6477: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6480: found = 1;
6481: }
6482: if ((cur->c2 != NULL) &&
6483: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6484: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6485: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6486: found = 1;
6487: }
6488: cur = cur->c2;
6489: }
6490: if (found)
6491: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6492: }
6493: NEXT;
6494: }
6495: return(ret);
6496: }
6497:
6498: /**
6499: * xmlParseElementChildrenContentDecl:
6500: * @ctxt: an XML parser context
6501: * @inputchk: the input used for the current entity, needed for boundary checks
6502: *
6503: * parse the declaration for a Mixed Element content
6504: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6505: *
6506: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6507: *
6508: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6509: *
6510: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6511: *
6512: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6513: *
6514: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6515: * TODO Parameter-entity replacement text must be properly nested
6516: * with parenthesized groups. That is to say, if either of the
6517: * opening or closing parentheses in a choice, seq, or Mixed
6518: * construct is contained in the replacement text for a parameter
6519: * entity, both must be contained in the same replacement text. For
6520: * interoperability, if a parameter-entity reference appears in a
6521: * choice, seq, or Mixed construct, its replacement text should not
6522: * be empty, and neither the first nor last non-blank character of
6523: * the replacement text should be a connector (| or ,).
6524: *
6525: * Returns the tree of xmlElementContentPtr describing the element
6526: * hierarchy.
6527: */
6528: xmlElementContentPtr
6529: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6530: /* stub left for API/ABI compat */
6531: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6532: }
6533:
6534: /**
6535: * xmlParseElementContentDecl:
6536: * @ctxt: an XML parser context
6537: * @name: the name of the element being defined.
6538: * @result: the Element Content pointer will be stored here if any
6539: *
6540: * parse the declaration for an Element content either Mixed or Children,
6541: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
1.1.1.3 ! misho 6542: *
1.1 misho 6543: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6544: *
6545: * returns: the type of element content XML_ELEMENT_TYPE_xxx
6546: */
6547:
6548: int
6549: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6550: xmlElementContentPtr *result) {
6551:
6552: xmlElementContentPtr tree = NULL;
6553: int inputid = ctxt->input->id;
6554: int res;
6555:
6556: *result = NULL;
6557:
6558: if (RAW != '(') {
6559: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6560: "xmlParseElementContentDecl : %s '(' expected\n", name);
6561: return(-1);
6562: }
6563: NEXT;
6564: GROW;
1.1.1.3 ! misho 6565: if (ctxt->instate == XML_PARSER_EOF)
! 6566: return(-1);
1.1 misho 6567: SKIP_BLANKS;
6568: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6569: tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6570: res = XML_ELEMENT_TYPE_MIXED;
6571: } else {
6572: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6573: res = XML_ELEMENT_TYPE_ELEMENT;
6574: }
6575: SKIP_BLANKS;
6576: *result = tree;
6577: return(res);
6578: }
6579:
6580: /**
6581: * xmlParseElementDecl:
6582: * @ctxt: an XML parser context
6583: *
6584: * parse an Element declaration.
6585: *
6586: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6587: *
6588: * [ VC: Unique Element Type Declaration ]
6589: * No element type may be declared more than once
6590: *
6591: * Returns the type of the element, or -1 in case of error
6592: */
6593: int
6594: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6595: const xmlChar *name;
6596: int ret = -1;
6597: xmlElementContentPtr content = NULL;
6598:
6599: /* GROW; done in the caller */
6600: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6601: xmlParserInputPtr input = ctxt->input;
6602:
6603: SKIP(9);
6604: if (!IS_BLANK_CH(CUR)) {
6605: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6606: "Space required after 'ELEMENT'\n");
6607: }
6608: SKIP_BLANKS;
6609: name = xmlParseName(ctxt);
6610: if (name == NULL) {
6611: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612: "xmlParseElementDecl: no name for Element\n");
6613: return(-1);
6614: }
6615: while ((RAW == 0) && (ctxt->inputNr > 1))
6616: xmlPopInput(ctxt);
6617: if (!IS_BLANK_CH(CUR)) {
6618: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619: "Space required after the element name\n");
6620: }
6621: SKIP_BLANKS;
6622: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6623: SKIP(5);
6624: /*
6625: * Element must always be empty.
6626: */
6627: ret = XML_ELEMENT_TYPE_EMPTY;
6628: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6629: (NXT(2) == 'Y')) {
6630: SKIP(3);
6631: /*
6632: * Element is a generic container.
6633: */
6634: ret = XML_ELEMENT_TYPE_ANY;
6635: } else if (RAW == '(') {
6636: ret = xmlParseElementContentDecl(ctxt, name, &content);
6637: } else {
6638: /*
6639: * [ WFC: PEs in Internal Subset ] error handling.
6640: */
6641: if ((RAW == '%') && (ctxt->external == 0) &&
6642: (ctxt->inputNr == 1)) {
6643: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6644: "PEReference: forbidden within markup decl in internal subset\n");
6645: } else {
6646: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6647: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6648: }
6649: return(-1);
6650: }
6651:
6652: SKIP_BLANKS;
6653: /*
6654: * Pop-up of finished entities.
6655: */
6656: while ((RAW == 0) && (ctxt->inputNr > 1))
6657: xmlPopInput(ctxt);
6658: SKIP_BLANKS;
6659:
6660: if (RAW != '>') {
6661: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6662: if (content != NULL) {
6663: xmlFreeDocElementContent(ctxt->myDoc, content);
6664: }
6665: } else {
6666: if (input != ctxt->input) {
6667: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6668: "Element declaration doesn't start and stop in the same entity\n");
6669: }
1.1.1.3 ! misho 6670:
1.1 misho 6671: NEXT;
6672: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6673: (ctxt->sax->elementDecl != NULL)) {
6674: if (content != NULL)
6675: content->parent = NULL;
6676: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6677: content);
6678: if ((content != NULL) && (content->parent == NULL)) {
6679: /*
6680: * this is a trick: if xmlAddElementDecl is called,
6681: * instead of copying the full tree it is plugged directly
1.1.1.3 ! misho 6682: * if called from the parser. Avoid duplicating the
1.1 misho 6683: * interfaces or change the API/ABI
6684: */
6685: xmlFreeDocElementContent(ctxt->myDoc, content);
6686: }
6687: } else if (content != NULL) {
6688: xmlFreeDocElementContent(ctxt->myDoc, content);
6689: }
6690: }
6691: }
6692: return(ret);
6693: }
6694:
6695: /**
6696: * xmlParseConditionalSections
6697: * @ctxt: an XML parser context
6698: *
1.1.1.3 ! misho 6699: * [61] conditionalSect ::= includeSect | ignoreSect
! 6700: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
1.1 misho 6701: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6702: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6703: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6704: */
6705:
6706: static void
6707: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6708: int id = ctxt->input->id;
6709:
6710: SKIP(3);
6711: SKIP_BLANKS;
6712: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6713: SKIP(7);
6714: SKIP_BLANKS;
6715: if (RAW != '[') {
6716: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6717: } else {
6718: if (ctxt->input->id != id) {
6719: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6720: "All markup of the conditional section is not in the same entity\n",
6721: NULL, NULL);
6722: }
6723: NEXT;
6724: }
6725: if (xmlParserDebugEntities) {
6726: if ((ctxt->input != NULL) && (ctxt->input->filename))
6727: xmlGenericError(xmlGenericErrorContext,
6728: "%s(%d): ", ctxt->input->filename,
6729: ctxt->input->line);
6730: xmlGenericError(xmlGenericErrorContext,
6731: "Entering INCLUDE Conditional Section\n");
6732: }
6733:
1.1.1.3 ! misho 6734: while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
! 6735: (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6736: const xmlChar *check = CUR_PTR;
6737: unsigned int cons = ctxt->input->consumed;
6738:
6739: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6740: xmlParseConditionalSections(ctxt);
6741: } else if (IS_BLANK_CH(CUR)) {
6742: NEXT;
6743: } else if (RAW == '%') {
6744: xmlParsePEReference(ctxt);
6745: } else
6746: xmlParseMarkupDecl(ctxt);
6747:
6748: /*
6749: * Pop-up of finished entities.
6750: */
6751: while ((RAW == 0) && (ctxt->inputNr > 1))
6752: xmlPopInput(ctxt);
6753:
6754: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6755: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756: break;
6757: }
6758: }
6759: if (xmlParserDebugEntities) {
6760: if ((ctxt->input != NULL) && (ctxt->input->filename))
6761: xmlGenericError(xmlGenericErrorContext,
6762: "%s(%d): ", ctxt->input->filename,
6763: ctxt->input->line);
6764: xmlGenericError(xmlGenericErrorContext,
6765: "Leaving INCLUDE Conditional Section\n");
6766: }
6767:
6768: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6769: int state;
6770: xmlParserInputState instate;
6771: int depth = 0;
6772:
6773: SKIP(6);
6774: SKIP_BLANKS;
6775: if (RAW != '[') {
6776: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6777: } else {
6778: if (ctxt->input->id != id) {
6779: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6780: "All markup of the conditional section is not in the same entity\n",
6781: NULL, NULL);
6782: }
6783: NEXT;
6784: }
6785: if (xmlParserDebugEntities) {
6786: if ((ctxt->input != NULL) && (ctxt->input->filename))
6787: xmlGenericError(xmlGenericErrorContext,
6788: "%s(%d): ", ctxt->input->filename,
6789: ctxt->input->line);
6790: xmlGenericError(xmlGenericErrorContext,
6791: "Entering IGNORE Conditional Section\n");
6792: }
6793:
6794: /*
6795: * Parse up to the end of the conditional section
6796: * But disable SAX event generating DTD building in the meantime
6797: */
6798: state = ctxt->disableSAX;
6799: instate = ctxt->instate;
6800: if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6801: ctxt->instate = XML_PARSER_IGNORE;
6802:
1.1.1.3 ! misho 6803: while (((depth >= 0) && (RAW != 0)) &&
! 6804: (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6805: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6806: depth++;
6807: SKIP(3);
6808: continue;
6809: }
6810: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6811: if (--depth >= 0) SKIP(3);
6812: continue;
6813: }
6814: NEXT;
6815: continue;
6816: }
6817:
6818: ctxt->disableSAX = state;
6819: ctxt->instate = instate;
6820:
6821: if (xmlParserDebugEntities) {
6822: if ((ctxt->input != NULL) && (ctxt->input->filename))
6823: xmlGenericError(xmlGenericErrorContext,
6824: "%s(%d): ", ctxt->input->filename,
6825: ctxt->input->line);
6826: xmlGenericError(xmlGenericErrorContext,
6827: "Leaving IGNORE Conditional Section\n");
6828: }
6829:
6830: } else {
6831: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6832: }
6833:
6834: if (RAW == 0)
6835: SHRINK;
6836:
6837: if (RAW == 0) {
6838: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6839: } else {
6840: if (ctxt->input->id != id) {
6841: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6842: "All markup of the conditional section is not in the same entity\n",
6843: NULL, NULL);
6844: }
6845: SKIP(3);
6846: }
6847: }
6848:
6849: /**
6850: * xmlParseMarkupDecl:
6851: * @ctxt: an XML parser context
1.1.1.3 ! misho 6852: *
1.1 misho 6853: * parse Markup declarations
6854: *
6855: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856: * NotationDecl | PI | Comment
6857: *
6858: * [ VC: Proper Declaration/PE Nesting ]
6859: * Parameter-entity replacement text must be properly nested with
6860: * markup declarations. That is to say, if either the first character
6861: * or the last character of a markup declaration (markupdecl above) is
6862: * contained in the replacement text for a parameter-entity reference,
6863: * both must be contained in the same replacement text.
6864: *
6865: * [ WFC: PEs in Internal Subset ]
6866: * In the internal DTD subset, parameter-entity references can occur
6867: * only where markup declarations can occur, not within markup declarations.
6868: * (This does not apply to references that occur in external parameter
1.1.1.3 ! misho 6869: * entities or to the external subset.)
1.1 misho 6870: */
6871: void
6872: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6873: GROW;
6874: if (CUR == '<') {
6875: if (NXT(1) == '!') {
6876: switch (NXT(2)) {
6877: case 'E':
6878: if (NXT(3) == 'L')
6879: xmlParseElementDecl(ctxt);
6880: else if (NXT(3) == 'N')
6881: xmlParseEntityDecl(ctxt);
6882: break;
6883: case 'A':
6884: xmlParseAttributeListDecl(ctxt);
6885: break;
6886: case 'N':
6887: xmlParseNotationDecl(ctxt);
6888: break;
6889: case '-':
6890: xmlParseComment(ctxt);
6891: break;
6892: default:
6893: /* there is an error but it will be detected later */
6894: break;
6895: }
6896: } else if (NXT(1) == '?') {
6897: xmlParsePI(ctxt);
6898: }
6899: }
6900: /*
6901: * This is only for internal subset. On external entities,
6902: * the replacement is done before parsing stage
6903: */
6904: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6905: xmlParsePEReference(ctxt);
6906:
6907: /*
6908: * Conditional sections are allowed from entities included
6909: * by PE References in the internal subset.
6910: */
6911: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6912: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6913: xmlParseConditionalSections(ctxt);
6914: }
6915: }
6916:
6917: ctxt->instate = XML_PARSER_DTD;
6918: }
6919:
6920: /**
6921: * xmlParseTextDecl:
6922: * @ctxt: an XML parser context
6923: *
6924: * parse an XML declaration header for external entities
6925: *
6926: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6927: */
6928:
6929: void
6930: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6931: xmlChar *version;
6932: const xmlChar *encoding;
6933:
6934: /*
6935: * We know that '<?xml' is here.
6936: */
6937: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6938: SKIP(5);
6939: } else {
6940: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6941: return;
6942: }
6943:
6944: if (!IS_BLANK_CH(CUR)) {
6945: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946: "Space needed after '<?xml'\n");
6947: }
6948: SKIP_BLANKS;
6949:
6950: /*
6951: * We may have the VersionInfo here.
6952: */
6953: version = xmlParseVersionInfo(ctxt);
6954: if (version == NULL)
6955: version = xmlCharStrdup(XML_DEFAULT_VERSION);
6956: else {
6957: if (!IS_BLANK_CH(CUR)) {
6958: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6959: "Space needed here\n");
6960: }
6961: }
6962: ctxt->input->version = version;
6963:
6964: /*
6965: * We must have the encoding declaration
6966: */
6967: encoding = xmlParseEncodingDecl(ctxt);
6968: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6969: /*
6970: * The XML REC instructs us to stop parsing right here
6971: */
6972: return;
6973: }
6974: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976: "Missing encoding in text declaration\n");
6977: }
6978:
6979: SKIP_BLANKS;
6980: if ((RAW == '?') && (NXT(1) == '>')) {
6981: SKIP(2);
6982: } else if (RAW == '>') {
6983: /* Deprecated old WD ... */
6984: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6985: NEXT;
6986: } else {
6987: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6988: MOVETO_ENDTAG(CUR_PTR);
6989: NEXT;
6990: }
6991: }
6992:
6993: /**
6994: * xmlParseExternalSubset:
6995: * @ctxt: an XML parser context
6996: * @ExternalID: the external identifier
6997: * @SystemID: the system identifier (or URL)
1.1.1.3 ! misho 6998: *
1.1 misho 6999: * parse Markup declarations from an external subset
7000: *
7001: * [30] extSubset ::= textDecl? extSubsetDecl
7002: *
7003: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7004: */
7005: void
7006: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7007: const xmlChar *SystemID) {
7008: xmlDetectSAX2(ctxt);
7009: GROW;
7010:
7011: if ((ctxt->encoding == NULL) &&
7012: (ctxt->input->end - ctxt->input->cur >= 4)) {
7013: xmlChar start[4];
7014: xmlCharEncoding enc;
7015:
7016: start[0] = RAW;
7017: start[1] = NXT(1);
7018: start[2] = NXT(2);
7019: start[3] = NXT(3);
7020: enc = xmlDetectCharEncoding(start, 4);
7021: if (enc != XML_CHAR_ENCODING_NONE)
7022: xmlSwitchEncoding(ctxt, enc);
7023: }
7024:
7025: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7026: xmlParseTextDecl(ctxt);
7027: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7028: /*
7029: * The XML REC instructs us to stop parsing right here
7030: */
7031: ctxt->instate = XML_PARSER_EOF;
7032: return;
7033: }
7034: }
7035: if (ctxt->myDoc == NULL) {
7036: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7037: if (ctxt->myDoc == NULL) {
7038: xmlErrMemory(ctxt, "New Doc failed");
7039: return;
7040: }
7041: ctxt->myDoc->properties = XML_DOC_INTERNAL;
7042: }
7043: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7044: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7045:
7046: ctxt->instate = XML_PARSER_DTD;
7047: ctxt->external = 1;
7048: while (((RAW == '<') && (NXT(1) == '?')) ||
7049: ((RAW == '<') && (NXT(1) == '!')) ||
7050: (RAW == '%') || IS_BLANK_CH(CUR)) {
7051: const xmlChar *check = CUR_PTR;
7052: unsigned int cons = ctxt->input->consumed;
7053:
7054: GROW;
7055: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7056: xmlParseConditionalSections(ctxt);
7057: } else if (IS_BLANK_CH(CUR)) {
7058: NEXT;
7059: } else if (RAW == '%') {
7060: xmlParsePEReference(ctxt);
7061: } else
7062: xmlParseMarkupDecl(ctxt);
7063:
7064: /*
7065: * Pop-up of finished entities.
7066: */
7067: while ((RAW == 0) && (ctxt->inputNr > 1))
7068: xmlPopInput(ctxt);
7069:
7070: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7071: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7072: break;
7073: }
7074: }
1.1.1.3 ! misho 7075:
1.1 misho 7076: if (RAW != 0) {
7077: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7078: }
7079:
7080: }
7081:
7082: /**
7083: * xmlParseReference:
7084: * @ctxt: an XML parser context
7085: *
7086: * parse and handle entity references in content, depending on the SAX
7087: * interface, this may end-up in a call to character() if this is a
7088: * CharRef, a predefined entity, if there is no reference() callback.
7089: * or if the parser was asked to switch to that mode.
7090: *
7091: * [67] Reference ::= EntityRef | CharRef
7092: */
7093: void
7094: xmlParseReference(xmlParserCtxtPtr ctxt) {
7095: xmlEntityPtr ent;
7096: xmlChar *val;
7097: int was_checked;
7098: xmlNodePtr list = NULL;
7099: xmlParserErrors ret = XML_ERR_OK;
7100:
7101:
7102: if (RAW != '&')
7103: return;
7104:
7105: /*
7106: * Simple case of a CharRef
7107: */
7108: if (NXT(1) == '#') {
7109: int i = 0;
7110: xmlChar out[10];
7111: int hex = NXT(2);
7112: int value = xmlParseCharRef(ctxt);
7113:
7114: if (value == 0)
7115: return;
7116: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7117: /*
7118: * So we are using non-UTF-8 buffers
7119: * Check that the char fit on 8bits, if not
7120: * generate a CharRef.
7121: */
7122: if (value <= 0xFF) {
7123: out[0] = value;
7124: out[1] = 0;
7125: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7126: (!ctxt->disableSAX))
7127: ctxt->sax->characters(ctxt->userData, out, 1);
7128: } else {
7129: if ((hex == 'x') || (hex == 'X'))
7130: snprintf((char *)out, sizeof(out), "#x%X", value);
7131: else
7132: snprintf((char *)out, sizeof(out), "#%d", value);
7133: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7134: (!ctxt->disableSAX))
7135: ctxt->sax->reference(ctxt->userData, out);
7136: }
7137: } else {
7138: /*
7139: * Just encode the value in UTF-8
7140: */
7141: COPY_BUF(0 ,out, i, value);
7142: out[i] = 0;
7143: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144: (!ctxt->disableSAX))
7145: ctxt->sax->characters(ctxt->userData, out, i);
7146: }
7147: return;
7148: }
7149:
7150: /*
7151: * We are seeing an entity reference
7152: */
7153: ent = xmlParseEntityRef(ctxt);
7154: if (ent == NULL) return;
7155: if (!ctxt->wellFormed)
7156: return;
7157: was_checked = ent->checked;
7158:
7159: /* special case of predefined entities */
7160: if ((ent->name == NULL) ||
7161: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7162: val = ent->content;
7163: if (val == NULL) return;
7164: /*
7165: * inline the entity.
7166: */
7167: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7168: (!ctxt->disableSAX))
7169: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7170: return;
7171: }
7172:
7173: /*
7174: * The first reference to the entity trigger a parsing phase
7175: * where the ent->children is filled with the result from
7176: * the parsing.
1.1.1.3 ! misho 7177: * Note: external parsed entities will not be loaded, it is not
! 7178: * required for a non-validating parser, unless the parsing option
! 7179: * of validating, or substituting entities were given. Doing so is
! 7180: * far more secure as the parser will only process data coming from
! 7181: * the document entity by default.
! 7182: */
! 7183: if ((ent->checked == 0) &&
! 7184: ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
! 7185: (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
1.1 misho 7186: unsigned long oldnbent = ctxt->nbentities;
7187:
7188: /*
7189: * This is a bit hackish but this seems the best
7190: * way to make sure both SAX and DOM entity support
7191: * behaves okay.
7192: */
7193: void *user_data;
7194: if (ctxt->userData == ctxt)
7195: user_data = NULL;
7196: else
7197: user_data = ctxt->userData;
7198:
7199: /*
7200: * Check that this entity is well formed
7201: * 4.3.2: An internal general parsed entity is well-formed
7202: * if its replacement text matches the production labeled
7203: * content.
7204: */
7205: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7206: ctxt->depth++;
7207: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7208: user_data, &list);
7209: ctxt->depth--;
7210:
7211: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7212: ctxt->depth++;
7213: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7214: user_data, ctxt->depth, ent->URI,
7215: ent->ExternalID, &list);
7216: ctxt->depth--;
7217: } else {
7218: ret = XML_ERR_ENTITY_PE_INTERNAL;
7219: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7220: "invalid entity type found\n", NULL);
7221: }
7222:
7223: /*
7224: * Store the number of entities needing parsing for this entity
7225: * content and do checkings
7226: */
1.1.1.3 ! misho 7227: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
! 7228: if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
! 7229: ent->checked |= 1;
1.1 misho 7230: if (ret == XML_ERR_ENTITY_LOOP) {
7231: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7232: xmlFreeNodeList(list);
7233: return;
7234: }
1.1.1.3 ! misho 7235: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
1.1 misho 7236: xmlFreeNodeList(list);
7237: return;
7238: }
7239:
7240: if ((ret == XML_ERR_OK) && (list != NULL)) {
7241: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7242: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7243: (ent->children == NULL)) {
7244: ent->children = list;
7245: if (ctxt->replaceEntities) {
7246: /*
7247: * Prune it directly in the generated document
7248: * except for single text nodes.
7249: */
7250: if (((list->type == XML_TEXT_NODE) &&
7251: (list->next == NULL)) ||
7252: (ctxt->parseMode == XML_PARSE_READER)) {
7253: list->parent = (xmlNodePtr) ent;
7254: list = NULL;
7255: ent->owner = 1;
7256: } else {
7257: ent->owner = 0;
7258: while (list != NULL) {
7259: list->parent = (xmlNodePtr) ctxt->node;
7260: list->doc = ctxt->myDoc;
7261: if (list->next == NULL)
7262: ent->last = list;
7263: list = list->next;
7264: }
7265: list = ent->children;
7266: #ifdef LIBXML_LEGACY_ENABLED
7267: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7268: xmlAddEntityReference(ent, list, NULL);
7269: #endif /* LIBXML_LEGACY_ENABLED */
7270: }
7271: } else {
7272: ent->owner = 1;
7273: while (list != NULL) {
7274: list->parent = (xmlNodePtr) ent;
1.1.1.2 misho 7275: xmlSetTreeDoc(list, ent->doc);
1.1 misho 7276: if (list->next == NULL)
7277: ent->last = list;
7278: list = list->next;
7279: }
7280: }
7281: } else {
7282: xmlFreeNodeList(list);
7283: list = NULL;
7284: }
7285: } else if ((ret != XML_ERR_OK) &&
7286: (ret != XML_WAR_UNDECLARED_ENTITY)) {
7287: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7288: "Entity '%s' failed to parse\n", ent->name);
7289: } else if (list != NULL) {
7290: xmlFreeNodeList(list);
7291: list = NULL;
7292: }
7293: if (ent->checked == 0)
1.1.1.3 ! misho 7294: ent->checked = 2;
1.1 misho 7295: } else if (ent->checked != 1) {
1.1.1.3 ! misho 7296: ctxt->nbentities += ent->checked / 2;
1.1 misho 7297: }
7298:
7299: /*
7300: * Now that the entity content has been gathered
7301: * provide it to the application, this can take different forms based
7302: * on the parsing modes.
7303: */
7304: if (ent->children == NULL) {
7305: /*
7306: * Probably running in SAX mode and the callbacks don't
7307: * build the entity content. So unless we already went
7308: * though parsing for first checking go though the entity
7309: * content to generate callbacks associated to the entity
7310: */
7311: if (was_checked != 0) {
7312: void *user_data;
7313: /*
7314: * This is a bit hackish but this seems the best
7315: * way to make sure both SAX and DOM entity support
7316: * behaves okay.
7317: */
7318: if (ctxt->userData == ctxt)
7319: user_data = NULL;
7320: else
7321: user_data = ctxt->userData;
7322:
7323: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7324: ctxt->depth++;
7325: ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7326: ent->content, user_data, NULL);
7327: ctxt->depth--;
7328: } else if (ent->etype ==
7329: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7330: ctxt->depth++;
7331: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7332: ctxt->sax, user_data, ctxt->depth,
7333: ent->URI, ent->ExternalID, NULL);
7334: ctxt->depth--;
7335: } else {
7336: ret = XML_ERR_ENTITY_PE_INTERNAL;
7337: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7338: "invalid entity type found\n", NULL);
7339: }
7340: if (ret == XML_ERR_ENTITY_LOOP) {
7341: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7342: return;
7343: }
7344: }
7345: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7346: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7347: /*
7348: * Entity reference callback comes second, it's somewhat
7349: * superfluous but a compatibility to historical behaviour
7350: */
7351: ctxt->sax->reference(ctxt->userData, ent->name);
7352: }
7353: return;
7354: }
7355:
7356: /*
7357: * If we didn't get any children for the entity being built
7358: */
7359: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7360: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7361: /*
7362: * Create a node.
7363: */
7364: ctxt->sax->reference(ctxt->userData, ent->name);
7365: return;
7366: }
7367:
7368: if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7369: /*
7370: * There is a problem on the handling of _private for entities
7371: * (bug 155816): Should we copy the content of the field from
7372: * the entity (possibly overwriting some value set by the user
7373: * when a copy is created), should we leave it alone, or should
7374: * we try to take care of different situations? The problem
7375: * is exacerbated by the usage of this field by the xmlReader.
7376: * To fix this bug, we look at _private on the created node
7377: * and, if it's NULL, we copy in whatever was in the entity.
7378: * If it's not NULL we leave it alone. This is somewhat of a
7379: * hack - maybe we should have further tests to determine
7380: * what to do.
7381: */
7382: if ((ctxt->node != NULL) && (ent->children != NULL)) {
7383: /*
7384: * Seems we are generating the DOM content, do
7385: * a simple tree copy for all references except the first
7386: * In the first occurrence list contains the replacement.
7387: */
7388: if (((list == NULL) && (ent->owner == 0)) ||
7389: (ctxt->parseMode == XML_PARSE_READER)) {
7390: xmlNodePtr nw = NULL, cur, firstChild = NULL;
7391:
7392: /*
1.1.1.3 ! misho 7393: * We are copying here, make sure there is no abuse
! 7394: */
! 7395: ctxt->sizeentcopy += ent->length;
! 7396: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
! 7397: return;
! 7398:
! 7399: /*
1.1 misho 7400: * when operating on a reader, the entities definitions
7401: * are always owning the entities subtree.
7402: if (ctxt->parseMode == XML_PARSE_READER)
7403: ent->owner = 1;
7404: */
7405:
7406: cur = ent->children;
7407: while (cur != NULL) {
7408: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7409: if (nw != NULL) {
7410: if (nw->_private == NULL)
7411: nw->_private = cur->_private;
7412: if (firstChild == NULL){
7413: firstChild = nw;
7414: }
7415: nw = xmlAddChild(ctxt->node, nw);
7416: }
7417: if (cur == ent->last) {
7418: /*
7419: * needed to detect some strange empty
7420: * node cases in the reader tests
7421: */
7422: if ((ctxt->parseMode == XML_PARSE_READER) &&
7423: (nw != NULL) &&
7424: (nw->type == XML_ELEMENT_NODE) &&
7425: (nw->children == NULL))
7426: nw->extra = 1;
7427:
7428: break;
7429: }
7430: cur = cur->next;
7431: }
7432: #ifdef LIBXML_LEGACY_ENABLED
7433: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434: xmlAddEntityReference(ent, firstChild, nw);
7435: #endif /* LIBXML_LEGACY_ENABLED */
1.1.1.3 ! misho 7436: } else if ((list == NULL) || (ctxt->inputNr > 0)) {
1.1 misho 7437: xmlNodePtr nw = NULL, cur, next, last,
7438: firstChild = NULL;
1.1.1.3 ! misho 7439:
! 7440: /*
! 7441: * We are copying here, make sure there is no abuse
! 7442: */
! 7443: ctxt->sizeentcopy += ent->length;
! 7444: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
! 7445: return;
! 7446:
1.1 misho 7447: /*
7448: * Copy the entity child list and make it the new
7449: * entity child list. The goal is to make sure any
7450: * ID or REF referenced will be the one from the
7451: * document content and not the entity copy.
7452: */
7453: cur = ent->children;
7454: ent->children = NULL;
7455: last = ent->last;
7456: ent->last = NULL;
7457: while (cur != NULL) {
7458: next = cur->next;
7459: cur->next = NULL;
7460: cur->parent = NULL;
7461: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7462: if (nw != NULL) {
7463: if (nw->_private == NULL)
7464: nw->_private = cur->_private;
7465: if (firstChild == NULL){
7466: firstChild = cur;
7467: }
7468: xmlAddChild((xmlNodePtr) ent, nw);
7469: xmlAddChild(ctxt->node, cur);
7470: }
7471: if (cur == last)
7472: break;
7473: cur = next;
7474: }
7475: if (ent->owner == 0)
7476: ent->owner = 1;
7477: #ifdef LIBXML_LEGACY_ENABLED
7478: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7479: xmlAddEntityReference(ent, firstChild, nw);
7480: #endif /* LIBXML_LEGACY_ENABLED */
7481: } else {
7482: const xmlChar *nbktext;
7483:
7484: /*
7485: * the name change is to avoid coalescing of the
7486: * node with a possible previous text one which
7487: * would make ent->children a dangling pointer
7488: */
7489: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7490: -1);
7491: if (ent->children->type == XML_TEXT_NODE)
7492: ent->children->name = nbktext;
7493: if ((ent->last != ent->children) &&
7494: (ent->last->type == XML_TEXT_NODE))
7495: ent->last->name = nbktext;
7496: xmlAddChildList(ctxt->node, ent->children);
7497: }
7498:
7499: /*
7500: * This is to avoid a nasty side effect, see
7501: * characters() in SAX.c
7502: */
7503: ctxt->nodemem = 0;
7504: ctxt->nodelen = 0;
7505: return;
7506: }
7507: }
7508: }
7509:
7510: /**
7511: * xmlParseEntityRef:
7512: * @ctxt: an XML parser context
7513: *
7514: * parse ENTITY references declarations
7515: *
7516: * [68] EntityRef ::= '&' Name ';'
7517: *
7518: * [ WFC: Entity Declared ]
7519: * In a document without any DTD, a document with only an internal DTD
7520: * subset which contains no parameter entity references, or a document
7521: * with "standalone='yes'", the Name given in the entity reference
7522: * must match that in an entity declaration, except that well-formed
7523: * documents need not declare any of the following entities: amp, lt,
7524: * gt, apos, quot. The declaration of a parameter entity must precede
7525: * any reference to it. Similarly, the declaration of a general entity
7526: * must precede any reference to it which appears in a default value in an
7527: * attribute-list declaration. Note that if entities are declared in the
7528: * external subset or in external parameter entities, a non-validating
7529: * processor is not obligated to read and process their declarations;
7530: * for such documents, the rule that an entity must be declared is a
7531: * well-formedness constraint only if standalone='yes'.
7532: *
7533: * [ WFC: Parsed Entity ]
7534: * An entity reference must not contain the name of an unparsed entity
7535: *
7536: * Returns the xmlEntityPtr if found, or NULL otherwise.
7537: */
7538: xmlEntityPtr
7539: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7540: const xmlChar *name;
7541: xmlEntityPtr ent = NULL;
7542:
7543: GROW;
1.1.1.3 ! misho 7544: if (ctxt->instate == XML_PARSER_EOF)
! 7545: return(NULL);
1.1 misho 7546:
7547: if (RAW != '&')
7548: return(NULL);
7549: NEXT;
7550: name = xmlParseName(ctxt);
7551: if (name == NULL) {
7552: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7553: "xmlParseEntityRef: no name\n");
7554: return(NULL);
7555: }
7556: if (RAW != ';') {
7557: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7558: return(NULL);
7559: }
7560: NEXT;
7561:
7562: /*
1.1.1.3 ! misho 7563: * Predefined entities override any extra definition
1.1 misho 7564: */
7565: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7566: ent = xmlGetPredefinedEntity(name);
7567: if (ent != NULL)
7568: return(ent);
7569: }
7570:
7571: /*
1.1.1.3 ! misho 7572: * Increase the number of entity references parsed
1.1 misho 7573: */
7574: ctxt->nbentities++;
7575:
7576: /*
7577: * Ask first SAX for entity resolution, otherwise try the
7578: * entities which may have stored in the parser context.
7579: */
7580: if (ctxt->sax != NULL) {
7581: if (ctxt->sax->getEntity != NULL)
7582: ent = ctxt->sax->getEntity(ctxt->userData, name);
1.1.1.3 ! misho 7583: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
1.1 misho 7584: (ctxt->options & XML_PARSE_OLDSAX))
7585: ent = xmlGetPredefinedEntity(name);
7586: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7587: (ctxt->userData==ctxt)) {
7588: ent = xmlSAX2GetEntity(ctxt, name);
7589: }
7590: }
1.1.1.3 ! misho 7591: if (ctxt->instate == XML_PARSER_EOF)
! 7592: return(NULL);
1.1 misho 7593: /*
7594: * [ WFC: Entity Declared ]
7595: * In a document without any DTD, a document with only an
7596: * internal DTD subset which contains no parameter entity
7597: * references, or a document with "standalone='yes'", the
7598: * Name given in the entity reference must match that in an
7599: * entity declaration, except that well-formed documents
7600: * need not declare any of the following entities: amp, lt,
7601: * gt, apos, quot.
7602: * The declaration of a parameter entity must precede any
7603: * reference to it.
7604: * Similarly, the declaration of a general entity must
7605: * precede any reference to it which appears in a default
7606: * value in an attribute-list declaration. Note that if
7607: * entities are declared in the external subset or in
7608: * external parameter entities, a non-validating processor
7609: * is not obligated to read and process their declarations;
7610: * for such documents, the rule that an entity must be
7611: * declared is a well-formedness constraint only if
7612: * standalone='yes'.
7613: */
7614: if (ent == NULL) {
7615: if ((ctxt->standalone == 1) ||
7616: ((ctxt->hasExternalSubset == 0) &&
7617: (ctxt->hasPErefs == 0))) {
7618: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619: "Entity '%s' not defined\n", name);
7620: } else {
7621: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7622: "Entity '%s' not defined\n", name);
7623: if ((ctxt->inSubset == 0) &&
7624: (ctxt->sax != NULL) &&
7625: (ctxt->sax->reference != NULL)) {
7626: ctxt->sax->reference(ctxt->userData, name);
7627: }
7628: }
7629: ctxt->valid = 0;
7630: }
7631:
7632: /*
7633: * [ WFC: Parsed Entity ]
7634: * An entity reference must not contain the name of an
7635: * unparsed entity
7636: */
7637: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7638: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7639: "Entity reference to unparsed entity %s\n", name);
7640: }
7641:
7642: /*
7643: * [ WFC: No External Entity References ]
7644: * Attribute values cannot contain direct or indirect
7645: * entity references to external entities.
7646: */
7647: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7648: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7649: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7650: "Attribute references external entity '%s'\n", name);
7651: }
7652: /*
7653: * [ WFC: No < in Attribute Values ]
7654: * The replacement text of any entity referred to directly or
7655: * indirectly in an attribute value (other than "<") must
1.1.1.3 ! misho 7656: * not contain a <.
1.1 misho 7657: */
7658: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.1.1.3 ! misho 7659: (ent != NULL) &&
! 7660: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
! 7661: if ((ent->checked & 1) || ((ent->checked == 0) &&
! 7662: (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
! 7663: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
! 7664: "'<' in entity '%s' is not allowed in attributes values\n", name);
! 7665: }
1.1 misho 7666: }
7667:
7668: /*
7669: * Internal check, no parameter entities here ...
7670: */
7671: else {
7672: switch (ent->etype) {
7673: case XML_INTERNAL_PARAMETER_ENTITY:
7674: case XML_EXTERNAL_PARAMETER_ENTITY:
7675: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7676: "Attempt to reference the parameter entity '%s'\n",
7677: name);
7678: break;
7679: default:
7680: break;
7681: }
7682: }
7683:
7684: /*
7685: * [ WFC: No Recursion ]
7686: * A parsed entity must not contain a recursive reference
1.1.1.3 ! misho 7687: * to itself, either directly or indirectly.
1.1 misho 7688: * Done somewhere else
7689: */
7690: return(ent);
7691: }
7692:
7693: /**
7694: * xmlParseStringEntityRef:
7695: * @ctxt: an XML parser context
7696: * @str: a pointer to an index in the string
7697: *
7698: * parse ENTITY references declarations, but this version parses it from
7699: * a string value.
7700: *
7701: * [68] EntityRef ::= '&' Name ';'
7702: *
7703: * [ WFC: Entity Declared ]
7704: * In a document without any DTD, a document with only an internal DTD
7705: * subset which contains no parameter entity references, or a document
7706: * with "standalone='yes'", the Name given in the entity reference
7707: * must match that in an entity declaration, except that well-formed
7708: * documents need not declare any of the following entities: amp, lt,
7709: * gt, apos, quot. The declaration of a parameter entity must precede
7710: * any reference to it. Similarly, the declaration of a general entity
7711: * must precede any reference to it which appears in a default value in an
7712: * attribute-list declaration. Note that if entities are declared in the
7713: * external subset or in external parameter entities, a non-validating
7714: * processor is not obligated to read and process their declarations;
7715: * for such documents, the rule that an entity must be declared is a
7716: * well-formedness constraint only if standalone='yes'.
7717: *
7718: * [ WFC: Parsed Entity ]
7719: * An entity reference must not contain the name of an unparsed entity
7720: *
7721: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7722: * is updated to the current location in the string.
7723: */
7724: static xmlEntityPtr
7725: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7726: xmlChar *name;
7727: const xmlChar *ptr;
7728: xmlChar cur;
7729: xmlEntityPtr ent = NULL;
7730:
7731: if ((str == NULL) || (*str == NULL))
7732: return(NULL);
7733: ptr = *str;
7734: cur = *ptr;
7735: if (cur != '&')
7736: return(NULL);
7737:
7738: ptr++;
7739: name = xmlParseStringName(ctxt, &ptr);
7740: if (name == NULL) {
7741: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742: "xmlParseStringEntityRef: no name\n");
7743: *str = ptr;
7744: return(NULL);
7745: }
7746: if (*ptr != ';') {
7747: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7748: xmlFree(name);
7749: *str = ptr;
7750: return(NULL);
7751: }
7752: ptr++;
7753:
7754:
7755: /*
7756: * Predefined entites override any extra definition
7757: */
7758: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7759: ent = xmlGetPredefinedEntity(name);
7760: if (ent != NULL) {
7761: xmlFree(name);
7762: *str = ptr;
7763: return(ent);
7764: }
7765: }
7766:
7767: /*
7768: * Increate the number of entity references parsed
7769: */
7770: ctxt->nbentities++;
7771:
7772: /*
7773: * Ask first SAX for entity resolution, otherwise try the
7774: * entities which may have stored in the parser context.
7775: */
7776: if (ctxt->sax != NULL) {
7777: if (ctxt->sax->getEntity != NULL)
7778: ent = ctxt->sax->getEntity(ctxt->userData, name);
7779: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7780: ent = xmlGetPredefinedEntity(name);
7781: if ((ent == NULL) && (ctxt->userData==ctxt)) {
7782: ent = xmlSAX2GetEntity(ctxt, name);
7783: }
7784: }
1.1.1.3 ! misho 7785: if (ctxt->instate == XML_PARSER_EOF) {
! 7786: xmlFree(name);
! 7787: return(NULL);
! 7788: }
1.1 misho 7789:
7790: /*
7791: * [ WFC: Entity Declared ]
7792: * In a document without any DTD, a document with only an
7793: * internal DTD subset which contains no parameter entity
7794: * references, or a document with "standalone='yes'", the
7795: * Name given in the entity reference must match that in an
7796: * entity declaration, except that well-formed documents
7797: * need not declare any of the following entities: amp, lt,
7798: * gt, apos, quot.
7799: * The declaration of a parameter entity must precede any
7800: * reference to it.
7801: * Similarly, the declaration of a general entity must
7802: * precede any reference to it which appears in a default
7803: * value in an attribute-list declaration. Note that if
7804: * entities are declared in the external subset or in
7805: * external parameter entities, a non-validating processor
7806: * is not obligated to read and process their declarations;
7807: * for such documents, the rule that an entity must be
7808: * declared is a well-formedness constraint only if
1.1.1.3 ! misho 7809: * standalone='yes'.
1.1 misho 7810: */
7811: if (ent == NULL) {
7812: if ((ctxt->standalone == 1) ||
7813: ((ctxt->hasExternalSubset == 0) &&
7814: (ctxt->hasPErefs == 0))) {
7815: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7816: "Entity '%s' not defined\n", name);
7817: } else {
7818: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7819: "Entity '%s' not defined\n",
7820: name);
7821: }
7822: /* TODO ? check regressions ctxt->valid = 0; */
7823: }
7824:
7825: /*
7826: * [ WFC: Parsed Entity ]
7827: * An entity reference must not contain the name of an
7828: * unparsed entity
7829: */
7830: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7831: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7832: "Entity reference to unparsed entity %s\n", name);
7833: }
7834:
7835: /*
7836: * [ WFC: No External Entity References ]
7837: * Attribute values cannot contain direct or indirect
7838: * entity references to external entities.
7839: */
7840: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7841: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7842: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7843: "Attribute references external entity '%s'\n", name);
7844: }
7845: /*
7846: * [ WFC: No < in Attribute Values ]
7847: * The replacement text of any entity referred to directly or
7848: * indirectly in an attribute value (other than "<") must
7849: * not contain a <.
7850: */
7851: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7852: (ent != NULL) && (ent->content != NULL) &&
7853: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7854: (xmlStrchr(ent->content, '<'))) {
7855: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7856: "'<' in entity '%s' is not allowed in attributes values\n",
7857: name);
7858: }
7859:
7860: /*
7861: * Internal check, no parameter entities here ...
7862: */
7863: else {
7864: switch (ent->etype) {
7865: case XML_INTERNAL_PARAMETER_ENTITY:
7866: case XML_EXTERNAL_PARAMETER_ENTITY:
7867: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7868: "Attempt to reference the parameter entity '%s'\n",
7869: name);
7870: break;
7871: default:
7872: break;
7873: }
7874: }
7875:
7876: /*
7877: * [ WFC: No Recursion ]
7878: * A parsed entity must not contain a recursive reference
7879: * to itself, either directly or indirectly.
7880: * Done somewhere else
7881: */
7882:
7883: xmlFree(name);
7884: *str = ptr;
7885: return(ent);
7886: }
7887:
7888: /**
7889: * xmlParsePEReference:
7890: * @ctxt: an XML parser context
7891: *
7892: * parse PEReference declarations
7893: * The entity content is handled directly by pushing it's content as
7894: * a new input stream.
7895: *
7896: * [69] PEReference ::= '%' Name ';'
7897: *
7898: * [ WFC: No Recursion ]
7899: * A parsed entity must not contain a recursive
1.1.1.3 ! misho 7900: * reference to itself, either directly or indirectly.
1.1 misho 7901: *
7902: * [ WFC: Entity Declared ]
7903: * In a document without any DTD, a document with only an internal DTD
7904: * subset which contains no parameter entity references, or a document
7905: * with "standalone='yes'", ... ... The declaration of a parameter
7906: * entity must precede any reference to it...
7907: *
7908: * [ VC: Entity Declared ]
7909: * In a document with an external subset or external parameter entities
7910: * with "standalone='no'", ... ... The declaration of a parameter entity
7911: * must precede any reference to it...
7912: *
7913: * [ WFC: In DTD ]
7914: * Parameter-entity references may only appear in the DTD.
7915: * NOTE: misleading but this is handled.
7916: */
7917: void
7918: xmlParsePEReference(xmlParserCtxtPtr ctxt)
7919: {
7920: const xmlChar *name;
7921: xmlEntityPtr entity = NULL;
7922: xmlParserInputPtr input;
7923:
7924: if (RAW != '%')
7925: return;
7926: NEXT;
7927: name = xmlParseName(ctxt);
7928: if (name == NULL) {
7929: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7930: "xmlParsePEReference: no name\n");
7931: return;
7932: }
7933: if (RAW != ';') {
7934: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7935: return;
7936: }
7937:
7938: NEXT;
7939:
7940: /*
7941: * Increate the number of entity references parsed
7942: */
7943: ctxt->nbentities++;
7944:
7945: /*
7946: * Request the entity from SAX
7947: */
7948: if ((ctxt->sax != NULL) &&
7949: (ctxt->sax->getParameterEntity != NULL))
1.1.1.3 ! misho 7950: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
! 7951: if (ctxt->instate == XML_PARSER_EOF)
! 7952: return;
1.1 misho 7953: if (entity == NULL) {
7954: /*
7955: * [ WFC: Entity Declared ]
7956: * In a document without any DTD, a document with only an
7957: * internal DTD subset which contains no parameter entity
7958: * references, or a document with "standalone='yes'", ...
7959: * ... The declaration of a parameter entity must precede
7960: * any reference to it...
7961: */
7962: if ((ctxt->standalone == 1) ||
7963: ((ctxt->hasExternalSubset == 0) &&
7964: (ctxt->hasPErefs == 0))) {
7965: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7966: "PEReference: %%%s; not found\n",
7967: name);
7968: } else {
7969: /*
7970: * [ VC: Entity Declared ]
7971: * In a document with an external subset or external
7972: * parameter entities with "standalone='no'", ...
7973: * ... The declaration of a parameter entity must
7974: * precede any reference to it...
7975: */
7976: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7977: "PEReference: %%%s; not found\n",
7978: name, NULL);
7979: ctxt->valid = 0;
7980: }
7981: } else {
7982: /*
7983: * Internal checking in case the entity quest barfed
7984: */
7985: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7986: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7987: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7988: "Internal: %%%s; is not a parameter entity\n",
7989: name, NULL);
7990: } else if (ctxt->input->free != deallocblankswrapper) {
7991: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7992: if (xmlPushInput(ctxt, input) < 0)
7993: return;
7994: } else {
7995: /*
7996: * TODO !!!
7997: * handle the extra spaces added before and after
7998: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7999: */
8000: input = xmlNewEntityInputStream(ctxt, entity);
8001: if (xmlPushInput(ctxt, input) < 0)
8002: return;
8003: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8004: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8005: (IS_BLANK_CH(NXT(5)))) {
8006: xmlParseTextDecl(ctxt);
8007: if (ctxt->errNo ==
8008: XML_ERR_UNSUPPORTED_ENCODING) {
8009: /*
8010: * The XML REC instructs us to stop parsing
8011: * right here
8012: */
8013: ctxt->instate = XML_PARSER_EOF;
8014: return;
8015: }
8016: }
8017: }
8018: }
8019: ctxt->hasPErefs = 1;
8020: }
8021:
8022: /**
8023: * xmlLoadEntityContent:
8024: * @ctxt: an XML parser context
8025: * @entity: an unloaded system entity
8026: *
8027: * Load the original content of the given system entity from the
8028: * ExternalID/SystemID given. This is to be used for Included in Literal
8029: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8030: *
8031: * Returns 0 in case of success and -1 in case of failure
8032: */
8033: static int
8034: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8035: xmlParserInputPtr input;
8036: xmlBufferPtr buf;
8037: int l, c;
8038: int count = 0;
8039:
8040: if ((ctxt == NULL) || (entity == NULL) ||
8041: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8042: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8043: (entity->content != NULL)) {
8044: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8045: "xmlLoadEntityContent parameter error");
8046: return(-1);
8047: }
8048:
8049: if (xmlParserDebugEntities)
8050: xmlGenericError(xmlGenericErrorContext,
8051: "Reading %s entity content input\n", entity->name);
8052:
8053: buf = xmlBufferCreate();
8054: if (buf == NULL) {
8055: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8056: "xmlLoadEntityContent parameter error");
8057: return(-1);
8058: }
8059:
8060: input = xmlNewEntityInputStream(ctxt, entity);
8061: if (input == NULL) {
8062: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063: "xmlLoadEntityContent input error");
8064: xmlBufferFree(buf);
8065: return(-1);
8066: }
8067:
8068: /*
8069: * Push the entity as the current input, read char by char
8070: * saving to the buffer until the end of the entity or an error
8071: */
8072: if (xmlPushInput(ctxt, input) < 0) {
8073: xmlBufferFree(buf);
8074: return(-1);
8075: }
8076:
8077: GROW;
8078: c = CUR_CHAR(l);
8079: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8080: (IS_CHAR(c))) {
8081: xmlBufferAdd(buf, ctxt->input->cur, l);
1.1.1.3 ! misho 8082: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 8083: count = 0;
8084: GROW;
1.1.1.3 ! misho 8085: if (ctxt->instate == XML_PARSER_EOF) {
! 8086: xmlBufferFree(buf);
! 8087: return(-1);
! 8088: }
1.1 misho 8089: }
8090: NEXTL(l);
8091: c = CUR_CHAR(l);
1.1.1.3 ! misho 8092: if (c == 0) {
! 8093: count = 0;
! 8094: GROW;
! 8095: if (ctxt->instate == XML_PARSER_EOF) {
! 8096: xmlBufferFree(buf);
! 8097: return(-1);
! 8098: }
! 8099: c = CUR_CHAR(l);
! 8100: }
1.1 misho 8101: }
8102:
8103: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8104: xmlPopInput(ctxt);
8105: } else if (!IS_CHAR(c)) {
8106: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8107: "xmlLoadEntityContent: invalid char value %d\n",
8108: c);
8109: xmlBufferFree(buf);
8110: return(-1);
8111: }
8112: entity->content = buf->content;
8113: buf->content = NULL;
8114: xmlBufferFree(buf);
8115:
8116: return(0);
8117: }
8118:
8119: /**
8120: * xmlParseStringPEReference:
8121: * @ctxt: an XML parser context
8122: * @str: a pointer to an index in the string
8123: *
8124: * parse PEReference declarations
8125: *
8126: * [69] PEReference ::= '%' Name ';'
8127: *
8128: * [ WFC: No Recursion ]
8129: * A parsed entity must not contain a recursive
8130: * reference to itself, either directly or indirectly.
8131: *
8132: * [ WFC: Entity Declared ]
8133: * In a document without any DTD, a document with only an internal DTD
8134: * subset which contains no parameter entity references, or a document
8135: * with "standalone='yes'", ... ... The declaration of a parameter
8136: * entity must precede any reference to it...
8137: *
8138: * [ VC: Entity Declared ]
8139: * In a document with an external subset or external parameter entities
8140: * with "standalone='no'", ... ... The declaration of a parameter entity
8141: * must precede any reference to it...
8142: *
8143: * [ WFC: In DTD ]
8144: * Parameter-entity references may only appear in the DTD.
8145: * NOTE: misleading but this is handled.
8146: *
8147: * Returns the string of the entity content.
8148: * str is updated to the current value of the index
8149: */
8150: static xmlEntityPtr
8151: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8152: const xmlChar *ptr;
8153: xmlChar cur;
8154: xmlChar *name;
8155: xmlEntityPtr entity = NULL;
8156:
8157: if ((str == NULL) || (*str == NULL)) return(NULL);
8158: ptr = *str;
8159: cur = *ptr;
8160: if (cur != '%')
8161: return(NULL);
8162: ptr++;
8163: name = xmlParseStringName(ctxt, &ptr);
8164: if (name == NULL) {
8165: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8166: "xmlParseStringPEReference: no name\n");
8167: *str = ptr;
8168: return(NULL);
8169: }
8170: cur = *ptr;
8171: if (cur != ';') {
8172: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8173: xmlFree(name);
8174: *str = ptr;
8175: return(NULL);
8176: }
8177: ptr++;
8178:
8179: /*
8180: * Increate the number of entity references parsed
8181: */
8182: ctxt->nbentities++;
8183:
8184: /*
8185: * Request the entity from SAX
8186: */
8187: if ((ctxt->sax != NULL) &&
8188: (ctxt->sax->getParameterEntity != NULL))
1.1.1.3 ! misho 8189: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
! 8190: if (ctxt->instate == XML_PARSER_EOF) {
! 8191: xmlFree(name);
! 8192: return(NULL);
! 8193: }
1.1 misho 8194: if (entity == NULL) {
8195: /*
8196: * [ WFC: Entity Declared ]
8197: * In a document without any DTD, a document with only an
8198: * internal DTD subset which contains no parameter entity
8199: * references, or a document with "standalone='yes'", ...
8200: * ... The declaration of a parameter entity must precede
8201: * any reference to it...
8202: */
8203: if ((ctxt->standalone == 1) ||
8204: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8205: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8206: "PEReference: %%%s; not found\n", name);
8207: } else {
8208: /*
8209: * [ VC: Entity Declared ]
8210: * In a document with an external subset or external
8211: * parameter entities with "standalone='no'", ...
8212: * ... The declaration of a parameter entity must
8213: * precede any reference to it...
8214: */
8215: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8216: "PEReference: %%%s; not found\n",
8217: name, NULL);
8218: ctxt->valid = 0;
8219: }
8220: } else {
8221: /*
8222: * Internal checking in case the entity quest barfed
8223: */
8224: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8225: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8226: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8227: "%%%s; is not a parameter entity\n",
8228: name, NULL);
8229: }
8230: }
8231: ctxt->hasPErefs = 1;
8232: xmlFree(name);
8233: *str = ptr;
8234: return(entity);
8235: }
8236:
8237: /**
8238: * xmlParseDocTypeDecl:
8239: * @ctxt: an XML parser context
8240: *
8241: * parse a DOCTYPE declaration
8242: *
1.1.1.3 ! misho 8243: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1.1 misho 8244: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8245: *
8246: * [ VC: Root Element Type ]
8247: * The Name in the document type declaration must match the element
1.1.1.3 ! misho 8248: * type of the root element.
1.1 misho 8249: */
8250:
8251: void
8252: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8253: const xmlChar *name = NULL;
8254: xmlChar *ExternalID = NULL;
8255: xmlChar *URI = NULL;
8256:
8257: /*
8258: * We know that '<!DOCTYPE' has been detected.
8259: */
8260: SKIP(9);
8261:
8262: SKIP_BLANKS;
8263:
8264: /*
8265: * Parse the DOCTYPE name.
8266: */
8267: name = xmlParseName(ctxt);
8268: if (name == NULL) {
8269: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8270: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8271: }
8272: ctxt->intSubName = name;
8273:
8274: SKIP_BLANKS;
8275:
8276: /*
8277: * Check for SystemID and ExternalID
8278: */
8279: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8280:
8281: if ((URI != NULL) || (ExternalID != NULL)) {
8282: ctxt->hasExternalSubset = 1;
8283: }
8284: ctxt->extSubURI = URI;
8285: ctxt->extSubSystem = ExternalID;
8286:
8287: SKIP_BLANKS;
8288:
8289: /*
8290: * Create and update the internal subset.
8291: */
8292: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8293: (!ctxt->disableSAX))
8294: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.1.1.3 ! misho 8295: if (ctxt->instate == XML_PARSER_EOF)
! 8296: return;
1.1 misho 8297:
8298: /*
8299: * Is there any internal subset declarations ?
8300: * they are handled separately in xmlParseInternalSubset()
8301: */
8302: if (RAW == '[')
8303: return;
8304:
8305: /*
8306: * We should be at the end of the DOCTYPE declaration.
8307: */
8308: if (RAW != '>') {
8309: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8310: }
8311: NEXT;
8312: }
8313:
8314: /**
8315: * xmlParseInternalSubset:
8316: * @ctxt: an XML parser context
8317: *
8318: * parse the internal subset declaration
8319: *
8320: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8321: */
8322:
8323: static void
8324: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8325: /*
8326: * Is there any DTD definition ?
8327: */
8328: if (RAW == '[') {
8329: ctxt->instate = XML_PARSER_DTD;
8330: NEXT;
8331: /*
1.1.1.3 ! misho 8332: * Parse the succession of Markup declarations and
1.1 misho 8333: * PEReferences.
8334: * Subsequence (markupdecl | PEReference | S)*
8335: */
1.1.1.3 ! misho 8336: while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 8337: const xmlChar *check = CUR_PTR;
8338: unsigned int cons = ctxt->input->consumed;
8339:
8340: SKIP_BLANKS;
8341: xmlParseMarkupDecl(ctxt);
8342: xmlParsePEReference(ctxt);
8343:
8344: /*
8345: * Pop-up of finished entities.
8346: */
8347: while ((RAW == 0) && (ctxt->inputNr > 1))
8348: xmlPopInput(ctxt);
8349:
8350: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8351: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8352: "xmlParseInternalSubset: error detected in Markup declaration\n");
8353: break;
8354: }
8355: }
1.1.1.3 ! misho 8356: if (RAW == ']') {
1.1 misho 8357: NEXT;
8358: SKIP_BLANKS;
8359: }
8360: }
8361:
8362: /*
8363: * We should be at the end of the DOCTYPE declaration.
8364: */
8365: if (RAW != '>') {
8366: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8367: }
8368: NEXT;
8369: }
8370:
8371: #ifdef LIBXML_SAX1_ENABLED
8372: /**
8373: * xmlParseAttribute:
8374: * @ctxt: an XML parser context
8375: * @value: a xmlChar ** used to store the value of the attribute
8376: *
8377: * parse an attribute
8378: *
8379: * [41] Attribute ::= Name Eq AttValue
8380: *
8381: * [ WFC: No External Entity References ]
8382: * Attribute values cannot contain direct or indirect entity references
8383: * to external entities.
8384: *
8385: * [ WFC: No < in Attribute Values ]
8386: * The replacement text of any entity referred to directly or indirectly in
1.1.1.3 ! misho 8387: * an attribute value (other than "<") must not contain a <.
! 8388: *
1.1 misho 8389: * [ VC: Attribute Value Type ]
8390: * The attribute must have been declared; the value must be of the type
8391: * declared for it.
8392: *
8393: * [25] Eq ::= S? '=' S?
8394: *
8395: * With namespace:
8396: *
8397: * [NS 11] Attribute ::= QName Eq AttValue
8398: *
8399: * Also the case QName == xmlns:??? is handled independently as a namespace
8400: * definition.
8401: *
8402: * Returns the attribute name, and the value in *value.
8403: */
8404:
8405: const xmlChar *
8406: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8407: const xmlChar *name;
8408: xmlChar *val;
8409:
8410: *value = NULL;
8411: GROW;
8412: name = xmlParseName(ctxt);
8413: if (name == NULL) {
8414: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8415: "error parsing attribute name\n");
8416: return(NULL);
8417: }
8418:
8419: /*
8420: * read the value
8421: */
8422: SKIP_BLANKS;
8423: if (RAW == '=') {
8424: NEXT;
8425: SKIP_BLANKS;
8426: val = xmlParseAttValue(ctxt);
8427: ctxt->instate = XML_PARSER_CONTENT;
8428: } else {
8429: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8430: "Specification mandate value for attribute %s\n", name);
8431: return(NULL);
8432: }
8433:
8434: /*
8435: * Check that xml:lang conforms to the specification
8436: * No more registered as an error, just generate a warning now
8437: * since this was deprecated in XML second edition
8438: */
8439: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8440: if (!xmlCheckLanguageID(val)) {
8441: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8442: "Malformed value for xml:lang : %s\n",
8443: val, NULL);
8444: }
8445: }
8446:
8447: /*
8448: * Check that xml:space conforms to the specification
8449: */
8450: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8451: if (xmlStrEqual(val, BAD_CAST "default"))
8452: *(ctxt->space) = 0;
8453: else if (xmlStrEqual(val, BAD_CAST "preserve"))
8454: *(ctxt->space) = 1;
8455: else {
8456: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8457: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8458: val, NULL);
8459: }
8460: }
8461:
8462: *value = val;
8463: return(name);
8464: }
8465:
8466: /**
8467: * xmlParseStartTag:
8468: * @ctxt: an XML parser context
1.1.1.3 ! misho 8469: *
1.1 misho 8470: * parse a start of tag either for rule element or
8471: * EmptyElement. In both case we don't parse the tag closing chars.
8472: *
8473: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8474: *
8475: * [ WFC: Unique Att Spec ]
8476: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 ! misho 8477: * empty-element tag.
1.1 misho 8478: *
8479: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8480: *
8481: * [ WFC: Unique Att Spec ]
8482: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 ! misho 8483: * empty-element tag.
1.1 misho 8484: *
8485: * With namespace:
8486: *
8487: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8488: *
8489: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8490: *
8491: * Returns the element name parsed
8492: */
8493:
8494: const xmlChar *
8495: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8496: const xmlChar *name;
8497: const xmlChar *attname;
8498: xmlChar *attvalue;
8499: const xmlChar **atts = ctxt->atts;
8500: int nbatts = 0;
8501: int maxatts = ctxt->maxatts;
8502: int i;
8503:
8504: if (RAW != '<') return(NULL);
8505: NEXT1;
8506:
8507: name = xmlParseName(ctxt);
8508: if (name == NULL) {
8509: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8510: "xmlParseStartTag: invalid element name\n");
8511: return(NULL);
8512: }
8513:
8514: /*
8515: * Now parse the attributes, it ends up with the ending
8516: *
8517: * (S Attribute)* S?
8518: */
8519: SKIP_BLANKS;
8520: GROW;
8521:
1.1.1.3 ! misho 8522: while (((RAW != '>') &&
1.1 misho 8523: ((RAW != '/') || (NXT(1) != '>')) &&
1.1.1.3 ! misho 8524: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 8525: const xmlChar *q = CUR_PTR;
8526: unsigned int cons = ctxt->input->consumed;
8527:
8528: attname = xmlParseAttribute(ctxt, &attvalue);
8529: if ((attname != NULL) && (attvalue != NULL)) {
8530: /*
8531: * [ WFC: Unique Att Spec ]
8532: * No attribute name may appear more than once in the same
1.1.1.3 ! misho 8533: * start-tag or empty-element tag.
1.1 misho 8534: */
8535: for (i = 0; i < nbatts;i += 2) {
8536: if (xmlStrEqual(atts[i], attname)) {
8537: xmlErrAttributeDup(ctxt, NULL, attname);
8538: xmlFree(attvalue);
8539: goto failed;
8540: }
8541: }
8542: /*
8543: * Add the pair to atts
8544: */
8545: if (atts == NULL) {
8546: maxatts = 22; /* allow for 10 attrs by default */
8547: atts = (const xmlChar **)
8548: xmlMalloc(maxatts * sizeof(xmlChar *));
8549: if (atts == NULL) {
8550: xmlErrMemory(ctxt, NULL);
8551: if (attvalue != NULL)
8552: xmlFree(attvalue);
8553: goto failed;
8554: }
8555: ctxt->atts = atts;
8556: ctxt->maxatts = maxatts;
8557: } else if (nbatts + 4 > maxatts) {
8558: const xmlChar **n;
8559:
8560: maxatts *= 2;
8561: n = (const xmlChar **) xmlRealloc((void *) atts,
8562: maxatts * sizeof(const xmlChar *));
8563: if (n == NULL) {
8564: xmlErrMemory(ctxt, NULL);
8565: if (attvalue != NULL)
8566: xmlFree(attvalue);
8567: goto failed;
8568: }
8569: atts = n;
8570: ctxt->atts = atts;
8571: ctxt->maxatts = maxatts;
8572: }
8573: atts[nbatts++] = attname;
8574: atts[nbatts++] = attvalue;
8575: atts[nbatts] = NULL;
8576: atts[nbatts + 1] = NULL;
8577: } else {
8578: if (attvalue != NULL)
8579: xmlFree(attvalue);
8580: }
8581:
1.1.1.3 ! misho 8582: failed:
1.1 misho 8583:
8584: GROW
8585: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8586: break;
8587: if (!IS_BLANK_CH(RAW)) {
8588: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8589: "attributes construct error\n");
8590: }
8591: SKIP_BLANKS;
8592: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8593: (attname == NULL) && (attvalue == NULL)) {
8594: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8595: "xmlParseStartTag: problem parsing attributes\n");
8596: break;
8597: }
8598: SHRINK;
8599: GROW;
8600: }
8601:
8602: /*
8603: * SAX: Start of Element !
8604: */
8605: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8606: (!ctxt->disableSAX)) {
8607: if (nbatts > 0)
8608: ctxt->sax->startElement(ctxt->userData, name, atts);
8609: else
8610: ctxt->sax->startElement(ctxt->userData, name, NULL);
8611: }
8612:
8613: if (atts != NULL) {
8614: /* Free only the content strings */
8615: for (i = 1;i < nbatts;i+=2)
8616: if (atts[i] != NULL)
8617: xmlFree((xmlChar *) atts[i]);
8618: }
8619: return(name);
8620: }
8621:
8622: /**
8623: * xmlParseEndTag1:
8624: * @ctxt: an XML parser context
8625: * @line: line of the start tag
8626: * @nsNr: number of namespaces on the start tag
8627: *
8628: * parse an end of tag
8629: *
8630: * [42] ETag ::= '</' Name S? '>'
8631: *
8632: * With namespace
8633: *
8634: * [NS 9] ETag ::= '</' QName S? '>'
8635: */
8636:
8637: static void
8638: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8639: const xmlChar *name;
8640:
8641: GROW;
8642: if ((RAW != '<') || (NXT(1) != '/')) {
8643: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8644: "xmlParseEndTag: '</' not found\n");
8645: return;
8646: }
8647: SKIP(2);
8648:
8649: name = xmlParseNameAndCompare(ctxt,ctxt->name);
8650:
8651: /*
8652: * We should definitely be at the ending "S? '>'" part
8653: */
8654: GROW;
8655: SKIP_BLANKS;
8656: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8657: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8658: } else
8659: NEXT1;
8660:
8661: /*
8662: * [ WFC: Element Type Match ]
8663: * The Name in an element's end-tag must match the element type in the
1.1.1.3 ! misho 8664: * start-tag.
1.1 misho 8665: *
8666: */
8667: if (name != (xmlChar*)1) {
8668: if (name == NULL) name = BAD_CAST "unparseable";
8669: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8670: "Opening and ending tag mismatch: %s line %d and %s\n",
8671: ctxt->name, line, name);
8672: }
8673:
8674: /*
8675: * SAX: End of Tag
8676: */
8677: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8678: (!ctxt->disableSAX))
8679: ctxt->sax->endElement(ctxt->userData, ctxt->name);
8680:
8681: namePop(ctxt);
8682: spacePop(ctxt);
8683: return;
8684: }
8685:
8686: /**
8687: * xmlParseEndTag:
8688: * @ctxt: an XML parser context
8689: *
8690: * parse an end of tag
8691: *
8692: * [42] ETag ::= '</' Name S? '>'
8693: *
8694: * With namespace
8695: *
8696: * [NS 9] ETag ::= '</' QName S? '>'
8697: */
8698:
8699: void
8700: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8701: xmlParseEndTag1(ctxt, 0);
8702: }
8703: #endif /* LIBXML_SAX1_ENABLED */
8704:
8705: /************************************************************************
8706: * *
8707: * SAX 2 specific operations *
8708: * *
8709: ************************************************************************/
8710:
8711: /*
8712: * xmlGetNamespace:
8713: * @ctxt: an XML parser context
8714: * @prefix: the prefix to lookup
8715: *
8716: * Lookup the namespace name for the @prefix (which ca be NULL)
8717: * The prefix must come from the @ctxt->dict dictionnary
8718: *
8719: * Returns the namespace name or NULL if not bound
8720: */
8721: static const xmlChar *
8722: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8723: int i;
8724:
8725: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8726: for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8727: if (ctxt->nsTab[i] == prefix) {
8728: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8729: return(NULL);
8730: return(ctxt->nsTab[i + 1]);
8731: }
8732: return(NULL);
8733: }
8734:
8735: /**
8736: * xmlParseQName:
8737: * @ctxt: an XML parser context
8738: * @prefix: pointer to store the prefix part
8739: *
8740: * parse an XML Namespace QName
8741: *
8742: * [6] QName ::= (Prefix ':')? LocalPart
8743: * [7] Prefix ::= NCName
8744: * [8] LocalPart ::= NCName
8745: *
8746: * Returns the Name parsed or NULL
8747: */
8748:
8749: static const xmlChar *
8750: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8751: const xmlChar *l, *p;
8752:
8753: GROW;
8754:
8755: l = xmlParseNCName(ctxt);
8756: if (l == NULL) {
8757: if (CUR == ':') {
8758: l = xmlParseName(ctxt);
8759: if (l != NULL) {
1.1.1.3 ! misho 8760: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
1.1 misho 8761: "Failed to parse QName '%s'\n", l, NULL, NULL);
8762: *prefix = NULL;
8763: return(l);
8764: }
8765: }
8766: return(NULL);
8767: }
8768: if (CUR == ':') {
8769: NEXT;
8770: p = l;
8771: l = xmlParseNCName(ctxt);
8772: if (l == NULL) {
8773: xmlChar *tmp;
8774:
8775: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8776: "Failed to parse QName '%s:'\n", p, NULL, NULL);
8777: l = xmlParseNmtoken(ctxt);
8778: if (l == NULL)
8779: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8780: else {
8781: tmp = xmlBuildQName(l, p, NULL, 0);
8782: xmlFree((char *)l);
8783: }
8784: p = xmlDictLookup(ctxt->dict, tmp, -1);
8785: if (tmp != NULL) xmlFree(tmp);
8786: *prefix = NULL;
8787: return(p);
8788: }
8789: if (CUR == ':') {
8790: xmlChar *tmp;
8791:
8792: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8793: "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8794: NEXT;
8795: tmp = (xmlChar *) xmlParseName(ctxt);
8796: if (tmp != NULL) {
8797: tmp = xmlBuildQName(tmp, l, NULL, 0);
8798: l = xmlDictLookup(ctxt->dict, tmp, -1);
8799: if (tmp != NULL) xmlFree(tmp);
8800: *prefix = p;
8801: return(l);
8802: }
8803: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8804: l = xmlDictLookup(ctxt->dict, tmp, -1);
8805: if (tmp != NULL) xmlFree(tmp);
8806: *prefix = p;
8807: return(l);
8808: }
8809: *prefix = p;
8810: } else
8811: *prefix = NULL;
8812: return(l);
8813: }
8814:
8815: /**
8816: * xmlParseQNameAndCompare:
8817: * @ctxt: an XML parser context
8818: * @name: the localname
8819: * @prefix: the prefix, if any.
8820: *
8821: * parse an XML name and compares for match
8822: * (specialized for endtag parsing)
8823: *
8824: * Returns NULL for an illegal name, (xmlChar*) 1 for success
8825: * and the name for mismatch
8826: */
8827:
8828: static const xmlChar *
8829: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8830: xmlChar const *prefix) {
8831: const xmlChar *cmp;
8832: const xmlChar *in;
8833: const xmlChar *ret;
8834: const xmlChar *prefix2;
8835:
8836: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8837:
8838: GROW;
8839: in = ctxt->input->cur;
8840:
8841: cmp = prefix;
8842: while (*in != 0 && *in == *cmp) {
1.1.1.3 ! misho 8843: ++in;
1.1 misho 8844: ++cmp;
8845: }
8846: if ((*cmp == 0) && (*in == ':')) {
8847: in++;
8848: cmp = name;
8849: while (*in != 0 && *in == *cmp) {
8850: ++in;
8851: ++cmp;
8852: }
8853: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8854: /* success */
8855: ctxt->input->cur = in;
8856: return((const xmlChar*) 1);
8857: }
8858: }
8859: /*
8860: * all strings coms from the dictionary, equality can be done directly
8861: */
8862: ret = xmlParseQName (ctxt, &prefix2);
8863: if ((ret == name) && (prefix == prefix2))
8864: return((const xmlChar*) 1);
8865: return ret;
8866: }
8867:
8868: /**
8869: * xmlParseAttValueInternal:
8870: * @ctxt: an XML parser context
8871: * @len: attribute len result
8872: * @alloc: whether the attribute was reallocated as a new string
8873: * @normalize: if 1 then further non-CDATA normalization must be done
8874: *
8875: * parse a value for an attribute.
8876: * NOTE: if no normalization is needed, the routine will return pointers
8877: * directly from the data buffer.
8878: *
8879: * 3.3.3 Attribute-Value Normalization:
8880: * Before the value of an attribute is passed to the application or
1.1.1.3 ! misho 8881: * checked for validity, the XML processor must normalize it as follows:
1.1 misho 8882: * - a character reference is processed by appending the referenced
8883: * character to the attribute value
8884: * - an entity reference is processed by recursively processing the
1.1.1.3 ! misho 8885: * replacement text of the entity
1.1 misho 8886: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8887: * appending #x20 to the normalized value, except that only a single
8888: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1.1.1.3 ! misho 8889: * parsed entity or the literal entity value of an internal parsed entity
! 8890: * - other characters are processed by appending them to the normalized value
1.1 misho 8891: * If the declared value is not CDATA, then the XML processor must further
8892: * process the normalized attribute value by discarding any leading and
8893: * trailing space (#x20) characters, and by replacing sequences of space
1.1.1.3 ! misho 8894: * (#x20) characters by a single space (#x20) character.
1.1 misho 8895: * All attributes for which no declaration has been read should be treated
8896: * by a non-validating parser as if declared CDATA.
8897: *
8898: * Returns the AttValue parsed or NULL. The value has to be freed by the
8899: * caller if it was copied, this can be detected by val[*len] == 0.
8900: */
8901:
8902: static xmlChar *
8903: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8904: int normalize)
8905: {
8906: xmlChar limit = 0;
8907: const xmlChar *in = NULL, *start, *end, *last;
8908: xmlChar *ret = NULL;
8909:
8910: GROW;
8911: in = (xmlChar *) CUR_PTR;
8912: if (*in != '"' && *in != '\'') {
8913: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8914: return (NULL);
8915: }
8916: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8917:
8918: /*
8919: * try to handle in this routine the most common case where no
8920: * allocation of a new string is required and where content is
8921: * pure ASCII.
8922: */
8923: limit = *in++;
8924: end = ctxt->input->end;
8925: start = in;
8926: if (in >= end) {
8927: const xmlChar *oldbase = ctxt->input->base;
8928: GROW;
8929: if (oldbase != ctxt->input->base) {
8930: long delta = ctxt->input->base - oldbase;
8931: start = start + delta;
8932: in = in + delta;
8933: }
8934: end = ctxt->input->end;
8935: }
8936: if (normalize) {
8937: /*
8938: * Skip any leading spaces
8939: */
1.1.1.3 ! misho 8940: while ((in < end) && (*in != limit) &&
1.1 misho 8941: ((*in == 0x20) || (*in == 0x9) ||
8942: (*in == 0xA) || (*in == 0xD))) {
8943: in++;
8944: start = in;
8945: if (in >= end) {
8946: const xmlChar *oldbase = ctxt->input->base;
8947: GROW;
1.1.1.3 ! misho 8948: if (ctxt->instate == XML_PARSER_EOF)
! 8949: return(NULL);
1.1 misho 8950: if (oldbase != ctxt->input->base) {
8951: long delta = ctxt->input->base - oldbase;
8952: start = start + delta;
8953: in = in + delta;
8954: }
8955: end = ctxt->input->end;
1.1.1.3 ! misho 8956: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 8957: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 8958: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 8959: "AttValue length too long\n");
! 8960: return(NULL);
! 8961: }
1.1 misho 8962: }
8963: }
8964: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966: if ((*in++ == 0x20) && (*in == 0x20)) break;
8967: if (in >= end) {
8968: const xmlChar *oldbase = ctxt->input->base;
8969: GROW;
1.1.1.3 ! misho 8970: if (ctxt->instate == XML_PARSER_EOF)
! 8971: return(NULL);
1.1 misho 8972: if (oldbase != ctxt->input->base) {
8973: long delta = ctxt->input->base - oldbase;
8974: start = start + delta;
8975: in = in + delta;
8976: }
8977: end = ctxt->input->end;
1.1.1.3 ! misho 8978: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 8979: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 8980: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 8981: "AttValue length too long\n");
! 8982: return(NULL);
! 8983: }
1.1 misho 8984: }
8985: }
8986: last = in;
8987: /*
8988: * skip the trailing blanks
8989: */
8990: while ((last[-1] == 0x20) && (last > start)) last--;
1.1.1.3 ! misho 8991: while ((in < end) && (*in != limit) &&
1.1 misho 8992: ((*in == 0x20) || (*in == 0x9) ||
8993: (*in == 0xA) || (*in == 0xD))) {
8994: in++;
8995: if (in >= end) {
8996: const xmlChar *oldbase = ctxt->input->base;
8997: GROW;
1.1.1.3 ! misho 8998: if (ctxt->instate == XML_PARSER_EOF)
! 8999: return(NULL);
1.1 misho 9000: if (oldbase != ctxt->input->base) {
9001: long delta = ctxt->input->base - oldbase;
9002: start = start + delta;
9003: in = in + delta;
9004: last = last + delta;
9005: }
9006: end = ctxt->input->end;
1.1.1.3 ! misho 9007: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 9008: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 9009: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 9010: "AttValue length too long\n");
! 9011: return(NULL);
! 9012: }
1.1 misho 9013: }
9014: }
1.1.1.3 ! misho 9015: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 9016: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 9017: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 9018: "AttValue length too long\n");
! 9019: return(NULL);
! 9020: }
1.1 misho 9021: if (*in != limit) goto need_complex;
9022: } else {
9023: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9025: in++;
9026: if (in >= end) {
9027: const xmlChar *oldbase = ctxt->input->base;
9028: GROW;
1.1.1.3 ! misho 9029: if (ctxt->instate == XML_PARSER_EOF)
! 9030: return(NULL);
1.1 misho 9031: if (oldbase != ctxt->input->base) {
9032: long delta = ctxt->input->base - oldbase;
9033: start = start + delta;
9034: in = in + delta;
9035: }
9036: end = ctxt->input->end;
1.1.1.3 ! misho 9037: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 9038: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 9039: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 9040: "AttValue length too long\n");
! 9041: return(NULL);
! 9042: }
1.1 misho 9043: }
9044: }
9045: last = in;
1.1.1.3 ! misho 9046: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
! 9047: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 9048: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 9049: "AttValue length too long\n");
! 9050: return(NULL);
! 9051: }
1.1 misho 9052: if (*in != limit) goto need_complex;
9053: }
9054: in++;
9055: if (len != NULL) {
9056: *len = last - start;
9057: ret = (xmlChar *) start;
9058: } else {
9059: if (alloc) *alloc = 1;
9060: ret = xmlStrndup(start, last - start);
9061: }
9062: CUR_PTR = in;
9063: if (alloc) *alloc = 0;
9064: return ret;
9065: need_complex:
9066: if (alloc) *alloc = 1;
9067: return xmlParseAttValueComplex(ctxt, len, normalize);
9068: }
9069:
9070: /**
9071: * xmlParseAttribute2:
9072: * @ctxt: an XML parser context
9073: * @pref: the element prefix
9074: * @elem: the element name
9075: * @prefix: a xmlChar ** used to store the value of the attribute prefix
9076: * @value: a xmlChar ** used to store the value of the attribute
9077: * @len: an int * to save the length of the attribute
9078: * @alloc: an int * to indicate if the attribute was allocated
9079: *
9080: * parse an attribute in the new SAX2 framework.
9081: *
9082: * Returns the attribute name, and the value in *value, .
9083: */
9084:
9085: static const xmlChar *
9086: xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9087: const xmlChar * pref, const xmlChar * elem,
9088: const xmlChar ** prefix, xmlChar ** value,
9089: int *len, int *alloc)
9090: {
9091: const xmlChar *name;
9092: xmlChar *val, *internal_val = NULL;
9093: int normalize = 0;
9094:
9095: *value = NULL;
9096: GROW;
9097: name = xmlParseQName(ctxt, prefix);
9098: if (name == NULL) {
9099: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9100: "error parsing attribute name\n");
9101: return (NULL);
9102: }
9103:
9104: /*
9105: * get the type if needed
9106: */
9107: if (ctxt->attsSpecial != NULL) {
9108: int type;
9109:
9110: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9111: pref, elem, *prefix, name);
9112: if (type != 0)
9113: normalize = 1;
9114: }
9115:
9116: /*
9117: * read the value
9118: */
9119: SKIP_BLANKS;
9120: if (RAW == '=') {
9121: NEXT;
9122: SKIP_BLANKS;
9123: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9124: if (normalize) {
9125: /*
9126: * Sometimes a second normalisation pass for spaces is needed
9127: * but that only happens if charrefs or entities refernces
9128: * have been used in the attribute value, i.e. the attribute
9129: * value have been extracted in an allocated string already.
9130: */
9131: if (*alloc) {
9132: const xmlChar *val2;
9133:
9134: val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9135: if ((val2 != NULL) && (val2 != val)) {
9136: xmlFree(val);
9137: val = (xmlChar *) val2;
9138: }
9139: }
9140: }
9141: ctxt->instate = XML_PARSER_CONTENT;
9142: } else {
9143: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9144: "Specification mandate value for attribute %s\n",
9145: name);
9146: return (NULL);
9147: }
9148:
9149: if (*prefix == ctxt->str_xml) {
9150: /*
9151: * Check that xml:lang conforms to the specification
9152: * No more registered as an error, just generate a warning now
9153: * since this was deprecated in XML second edition
9154: */
9155: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9156: internal_val = xmlStrndup(val, *len);
9157: if (!xmlCheckLanguageID(internal_val)) {
9158: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9159: "Malformed value for xml:lang : %s\n",
9160: internal_val, NULL);
9161: }
9162: }
9163:
9164: /*
9165: * Check that xml:space conforms to the specification
9166: */
9167: if (xmlStrEqual(name, BAD_CAST "space")) {
9168: internal_val = xmlStrndup(val, *len);
9169: if (xmlStrEqual(internal_val, BAD_CAST "default"))
9170: *(ctxt->space) = 0;
9171: else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9172: *(ctxt->space) = 1;
9173: else {
9174: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9175: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9176: internal_val, NULL);
9177: }
9178: }
9179: if (internal_val) {
9180: xmlFree(internal_val);
9181: }
9182: }
9183:
9184: *value = val;
9185: return (name);
9186: }
9187: /**
9188: * xmlParseStartTag2:
9189: * @ctxt: an XML parser context
1.1.1.3 ! misho 9190: *
1.1 misho 9191: * parse a start of tag either for rule element or
9192: * EmptyElement. In both case we don't parse the tag closing chars.
9193: * This routine is called when running SAX2 parsing
9194: *
9195: * [40] STag ::= '<' Name (S Attribute)* S? '>'
9196: *
9197: * [ WFC: Unique Att Spec ]
9198: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 ! misho 9199: * empty-element tag.
1.1 misho 9200: *
9201: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9202: *
9203: * [ WFC: Unique Att Spec ]
9204: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 ! misho 9205: * empty-element tag.
1.1 misho 9206: *
9207: * With namespace:
9208: *
9209: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9210: *
9211: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9212: *
9213: * Returns the element name parsed
9214: */
9215:
9216: static const xmlChar *
9217: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9218: const xmlChar **URI, int *tlen) {
9219: const xmlChar *localname;
9220: const xmlChar *prefix;
9221: const xmlChar *attname;
9222: const xmlChar *aprefix;
9223: const xmlChar *nsname;
9224: xmlChar *attvalue;
9225: const xmlChar **atts = ctxt->atts;
9226: int maxatts = ctxt->maxatts;
9227: int nratts, nbatts, nbdef;
9228: int i, j, nbNs, attval, oldline, oldcol;
9229: const xmlChar *base;
9230: unsigned long cur;
9231: int nsNr = ctxt->nsNr;
9232:
9233: if (RAW != '<') return(NULL);
9234: NEXT1;
9235:
9236: /*
9237: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9238: * point since the attribute values may be stored as pointers to
9239: * the buffer and calling SHRINK would destroy them !
9240: * The Shrinking is only possible once the full set of attribute
9241: * callbacks have been done.
9242: */
9243: reparse:
9244: SHRINK;
9245: base = ctxt->input->base;
9246: cur = ctxt->input->cur - ctxt->input->base;
9247: oldline = ctxt->input->line;
9248: oldcol = ctxt->input->col;
9249: nbatts = 0;
9250: nratts = 0;
9251: nbdef = 0;
9252: nbNs = 0;
9253: attval = 0;
9254: /* Forget any namespaces added during an earlier parse of this element. */
9255: ctxt->nsNr = nsNr;
9256:
9257: localname = xmlParseQName(ctxt, &prefix);
9258: if (localname == NULL) {
9259: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9260: "StartTag: invalid element name\n");
9261: return(NULL);
9262: }
9263: *tlen = ctxt->input->cur - ctxt->input->base - cur;
9264:
9265: /*
9266: * Now parse the attributes, it ends up with the ending
9267: *
9268: * (S Attribute)* S?
9269: */
9270: SKIP_BLANKS;
9271: GROW;
9272: if (ctxt->input->base != base) goto base_changed;
9273:
1.1.1.3 ! misho 9274: while (((RAW != '>') &&
1.1 misho 9275: ((RAW != '/') || (NXT(1) != '>')) &&
1.1.1.3 ! misho 9276: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 9277: const xmlChar *q = CUR_PTR;
9278: unsigned int cons = ctxt->input->consumed;
9279: int len = -1, alloc = 0;
9280:
9281: attname = xmlParseAttribute2(ctxt, prefix, localname,
9282: &aprefix, &attvalue, &len, &alloc);
9283: if (ctxt->input->base != base) {
9284: if ((attvalue != NULL) && (alloc != 0))
9285: xmlFree(attvalue);
9286: attvalue = NULL;
9287: goto base_changed;
9288: }
9289: if ((attname != NULL) && (attvalue != NULL)) {
9290: if (len < 0) len = xmlStrlen(attvalue);
9291: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9292: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9293: xmlURIPtr uri;
9294:
9295: if (*URL != 0) {
9296: uri = xmlParseURI((const char *) URL);
9297: if (uri == NULL) {
9298: xmlNsErr(ctxt, XML_WAR_NS_URI,
9299: "xmlns: '%s' is not a valid URI\n",
9300: URL, NULL, NULL);
9301: } else {
9302: if (uri->scheme == NULL) {
9303: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9304: "xmlns: URI %s is not absolute\n",
9305: URL, NULL, NULL);
9306: }
9307: xmlFreeURI(uri);
9308: }
9309: if (URL == ctxt->str_xml_ns) {
9310: if (attname != ctxt->str_xml) {
9311: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312: "xml namespace URI cannot be the default namespace\n",
9313: NULL, NULL, NULL);
9314: }
9315: goto skip_default_ns;
9316: }
9317: if ((len == 29) &&
9318: (xmlStrEqual(URL,
9319: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9320: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321: "reuse of the xmlns namespace name is forbidden\n",
9322: NULL, NULL, NULL);
9323: goto skip_default_ns;
9324: }
9325: }
9326: /*
9327: * check that it's not a defined namespace
9328: */
9329: for (j = 1;j <= nbNs;j++)
9330: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9331: break;
9332: if (j <= nbNs)
9333: xmlErrAttributeDup(ctxt, NULL, attname);
9334: else
9335: if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9336: skip_default_ns:
9337: if (alloc != 0) xmlFree(attvalue);
9338: SKIP_BLANKS;
9339: continue;
9340: }
9341: if (aprefix == ctxt->str_xmlns) {
9342: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9343: xmlURIPtr uri;
9344:
9345: if (attname == ctxt->str_xml) {
9346: if (URL != ctxt->str_xml_ns) {
9347: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348: "xml namespace prefix mapped to wrong URI\n",
9349: NULL, NULL, NULL);
9350: }
9351: /*
9352: * Do not keep a namespace definition node
9353: */
9354: goto skip_ns;
9355: }
9356: if (URL == ctxt->str_xml_ns) {
9357: if (attname != ctxt->str_xml) {
9358: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9359: "xml namespace URI mapped to wrong prefix\n",
9360: NULL, NULL, NULL);
9361: }
9362: goto skip_ns;
9363: }
9364: if (attname == ctxt->str_xmlns) {
9365: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366: "redefinition of the xmlns prefix is forbidden\n",
9367: NULL, NULL, NULL);
9368: goto skip_ns;
9369: }
9370: if ((len == 29) &&
9371: (xmlStrEqual(URL,
9372: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9373: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374: "reuse of the xmlns namespace name is forbidden\n",
9375: NULL, NULL, NULL);
9376: goto skip_ns;
9377: }
9378: if ((URL == NULL) || (URL[0] == 0)) {
9379: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380: "xmlns:%s: Empty XML namespace is not allowed\n",
9381: attname, NULL, NULL);
9382: goto skip_ns;
9383: } else {
9384: uri = xmlParseURI((const char *) URL);
9385: if (uri == NULL) {
9386: xmlNsErr(ctxt, XML_WAR_NS_URI,
9387: "xmlns:%s: '%s' is not a valid URI\n",
9388: attname, URL, NULL);
9389: } else {
9390: if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9391: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9392: "xmlns:%s: URI %s is not absolute\n",
9393: attname, URL, NULL);
9394: }
9395: xmlFreeURI(uri);
9396: }
9397: }
9398:
9399: /*
9400: * check that it's not a defined namespace
9401: */
9402: for (j = 1;j <= nbNs;j++)
9403: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9404: break;
9405: if (j <= nbNs)
9406: xmlErrAttributeDup(ctxt, aprefix, attname);
9407: else
9408: if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9409: skip_ns:
9410: if (alloc != 0) xmlFree(attvalue);
9411: SKIP_BLANKS;
9412: if (ctxt->input->base != base) goto base_changed;
9413: continue;
9414: }
9415:
9416: /*
9417: * Add the pair to atts
9418: */
9419: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9420: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9421: if (attvalue[len] == 0)
9422: xmlFree(attvalue);
9423: goto failed;
9424: }
9425: maxatts = ctxt->maxatts;
9426: atts = ctxt->atts;
9427: }
9428: ctxt->attallocs[nratts++] = alloc;
9429: atts[nbatts++] = attname;
9430: atts[nbatts++] = aprefix;
9431: atts[nbatts++] = NULL; /* the URI will be fetched later */
9432: atts[nbatts++] = attvalue;
9433: attvalue += len;
9434: atts[nbatts++] = attvalue;
9435: /*
9436: * tag if some deallocation is needed
9437: */
9438: if (alloc != 0) attval = 1;
9439: } else {
9440: if ((attvalue != NULL) && (attvalue[len] == 0))
9441: xmlFree(attvalue);
9442: }
9443:
9444: failed:
9445:
9446: GROW
1.1.1.3 ! misho 9447: if (ctxt->instate == XML_PARSER_EOF)
! 9448: break;
1.1 misho 9449: if (ctxt->input->base != base) goto base_changed;
9450: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9451: break;
9452: if (!IS_BLANK_CH(RAW)) {
9453: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9454: "attributes construct error\n");
9455: break;
9456: }
9457: SKIP_BLANKS;
9458: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9459: (attname == NULL) && (attvalue == NULL)) {
9460: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9461: "xmlParseStartTag: problem parsing attributes\n");
9462: break;
9463: }
9464: GROW;
9465: if (ctxt->input->base != base) goto base_changed;
9466: }
9467:
9468: /*
9469: * The attributes defaulting
9470: */
9471: if (ctxt->attsDefault != NULL) {
9472: xmlDefAttrsPtr defaults;
9473:
9474: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9475: if (defaults != NULL) {
9476: for (i = 0;i < defaults->nbAttrs;i++) {
9477: attname = defaults->values[5 * i];
9478: aprefix = defaults->values[5 * i + 1];
9479:
9480: /*
9481: * special work for namespaces defaulted defs
9482: */
9483: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9484: /*
9485: * check that it's not a defined namespace
9486: */
9487: for (j = 1;j <= nbNs;j++)
9488: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9489: break;
9490: if (j <= nbNs) continue;
9491:
9492: nsname = xmlGetNamespace(ctxt, NULL);
9493: if (nsname != defaults->values[5 * i + 2]) {
9494: if (nsPush(ctxt, NULL,
9495: defaults->values[5 * i + 2]) > 0)
9496: nbNs++;
9497: }
9498: } else if (aprefix == ctxt->str_xmlns) {
9499: /*
9500: * check that it's not a defined namespace
9501: */
9502: for (j = 1;j <= nbNs;j++)
9503: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9504: break;
9505: if (j <= nbNs) continue;
9506:
9507: nsname = xmlGetNamespace(ctxt, attname);
9508: if (nsname != defaults->values[2]) {
9509: if (nsPush(ctxt, attname,
9510: defaults->values[5 * i + 2]) > 0)
9511: nbNs++;
9512: }
9513: } else {
9514: /*
9515: * check that it's not a defined attribute
9516: */
9517: for (j = 0;j < nbatts;j+=5) {
9518: if ((attname == atts[j]) && (aprefix == atts[j+1]))
9519: break;
9520: }
9521: if (j < nbatts) continue;
9522:
9523: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9524: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9525: return(NULL);
9526: }
9527: maxatts = ctxt->maxatts;
9528: atts = ctxt->atts;
9529: }
9530: atts[nbatts++] = attname;
9531: atts[nbatts++] = aprefix;
9532: if (aprefix == NULL)
9533: atts[nbatts++] = NULL;
9534: else
9535: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9536: atts[nbatts++] = defaults->values[5 * i + 2];
9537: atts[nbatts++] = defaults->values[5 * i + 3];
9538: if ((ctxt->standalone == 1) &&
9539: (defaults->values[5 * i + 4] != NULL)) {
1.1.1.3 ! misho 9540: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
1.1 misho 9541: "standalone: attribute %s on %s defaulted from external subset\n",
9542: attname, localname);
9543: }
9544: nbdef++;
9545: }
9546: }
9547: }
9548: }
9549:
9550: /*
9551: * The attributes checkings
9552: */
9553: for (i = 0; i < nbatts;i += 5) {
9554: /*
9555: * The default namespace does not apply to attribute names.
9556: */
9557: if (atts[i + 1] != NULL) {
9558: nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9559: if (nsname == NULL) {
9560: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9561: "Namespace prefix %s for %s on %s is not defined\n",
9562: atts[i + 1], atts[i], localname);
9563: }
9564: atts[i + 2] = nsname;
9565: } else
9566: nsname = NULL;
9567: /*
9568: * [ WFC: Unique Att Spec ]
9569: * No attribute name may appear more than once in the same
1.1.1.3 ! misho 9570: * start-tag or empty-element tag.
1.1 misho 9571: * As extended by the Namespace in XML REC.
9572: */
9573: for (j = 0; j < i;j += 5) {
9574: if (atts[i] == atts[j]) {
9575: if (atts[i+1] == atts[j+1]) {
9576: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9577: break;
9578: }
9579: if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9580: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9581: "Namespaced Attribute %s in '%s' redefined\n",
9582: atts[i], nsname, NULL);
9583: break;
9584: }
9585: }
9586: }
9587: }
9588:
9589: nsname = xmlGetNamespace(ctxt, prefix);
9590: if ((prefix != NULL) && (nsname == NULL)) {
9591: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9592: "Namespace prefix %s on %s is not defined\n",
9593: prefix, localname, NULL);
9594: }
9595: *pref = prefix;
9596: *URI = nsname;
9597:
9598: /*
9599: * SAX: Start of Element !
9600: */
9601: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9602: (!ctxt->disableSAX)) {
9603: if (nbNs > 0)
9604: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9605: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9606: nbatts / 5, nbdef, atts);
9607: else
9608: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609: nsname, 0, NULL, nbatts / 5, nbdef, atts);
9610: }
9611:
9612: /*
9613: * Free up attribute allocated strings if needed
9614: */
9615: if (attval != 0) {
9616: for (i = 3,j = 0; j < nratts;i += 5,j++)
9617: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9618: xmlFree((xmlChar *) atts[i]);
9619: }
9620:
9621: return(localname);
9622:
9623: base_changed:
9624: /*
9625: * the attribute strings are valid iif the base didn't changed
9626: */
9627: if (attval != 0) {
9628: for (i = 3,j = 0; j < nratts;i += 5,j++)
9629: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9630: xmlFree((xmlChar *) atts[i]);
9631: }
9632: ctxt->input->cur = ctxt->input->base + cur;
9633: ctxt->input->line = oldline;
9634: ctxt->input->col = oldcol;
9635: if (ctxt->wellFormed == 1) {
9636: goto reparse;
9637: }
9638: return(NULL);
9639: }
9640:
9641: /**
9642: * xmlParseEndTag2:
9643: * @ctxt: an XML parser context
9644: * @line: line of the start tag
9645: * @nsNr: number of namespaces on the start tag
9646: *
9647: * parse an end of tag
9648: *
9649: * [42] ETag ::= '</' Name S? '>'
9650: *
9651: * With namespace
9652: *
9653: * [NS 9] ETag ::= '</' QName S? '>'
9654: */
9655:
9656: static void
9657: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9658: const xmlChar *URI, int line, int nsNr, int tlen) {
9659: const xmlChar *name;
9660:
9661: GROW;
9662: if ((RAW != '<') || (NXT(1) != '/')) {
9663: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9664: return;
9665: }
9666: SKIP(2);
9667:
9668: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9669: if (ctxt->input->cur[tlen] == '>') {
9670: ctxt->input->cur += tlen + 1;
9671: goto done;
9672: }
9673: ctxt->input->cur += tlen;
9674: name = (xmlChar*)1;
9675: } else {
9676: if (prefix == NULL)
9677: name = xmlParseNameAndCompare(ctxt, ctxt->name);
9678: else
9679: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9680: }
9681:
9682: /*
9683: * We should definitely be at the ending "S? '>'" part
9684: */
9685: GROW;
1.1.1.3 ! misho 9686: if (ctxt->instate == XML_PARSER_EOF)
! 9687: return;
1.1 misho 9688: SKIP_BLANKS;
9689: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9690: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9691: } else
9692: NEXT1;
9693:
9694: /*
9695: * [ WFC: Element Type Match ]
9696: * The Name in an element's end-tag must match the element type in the
1.1.1.3 ! misho 9697: * start-tag.
1.1 misho 9698: *
9699: */
9700: if (name != (xmlChar*)1) {
9701: if (name == NULL) name = BAD_CAST "unparseable";
9702: if ((line == 0) && (ctxt->node != NULL))
9703: line = ctxt->node->line;
9704: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9705: "Opening and ending tag mismatch: %s line %d and %s\n",
9706: ctxt->name, line, name);
9707: }
9708:
9709: /*
9710: * SAX: End of Tag
9711: */
9712: done:
9713: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9714: (!ctxt->disableSAX))
9715: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9716:
9717: spacePop(ctxt);
9718: if (nsNr != 0)
9719: nsPop(ctxt, nsNr);
9720: return;
9721: }
9722:
9723: /**
9724: * xmlParseCDSect:
9725: * @ctxt: an XML parser context
1.1.1.3 ! misho 9726: *
1.1 misho 9727: * Parse escaped pure raw content.
9728: *
9729: * [18] CDSect ::= CDStart CData CDEnd
9730: *
9731: * [19] CDStart ::= '<![CDATA['
9732: *
9733: * [20] Data ::= (Char* - (Char* ']]>' Char*))
9734: *
9735: * [21] CDEnd ::= ']]>'
9736: */
9737: void
9738: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9739: xmlChar *buf = NULL;
9740: int len = 0;
9741: int size = XML_PARSER_BUFFER_SIZE;
9742: int r, rl;
9743: int s, sl;
9744: int cur, l;
9745: int count = 0;
9746:
9747: /* Check 2.6.0 was NXT(0) not RAW */
9748: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9749: SKIP(9);
9750: } else
9751: return;
9752:
9753: ctxt->instate = XML_PARSER_CDATA_SECTION;
9754: r = CUR_CHAR(rl);
9755: if (!IS_CHAR(r)) {
9756: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9757: ctxt->instate = XML_PARSER_CONTENT;
9758: return;
9759: }
9760: NEXTL(rl);
9761: s = CUR_CHAR(sl);
9762: if (!IS_CHAR(s)) {
9763: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764: ctxt->instate = XML_PARSER_CONTENT;
9765: return;
9766: }
9767: NEXTL(sl);
9768: cur = CUR_CHAR(l);
9769: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9770: if (buf == NULL) {
9771: xmlErrMemory(ctxt, NULL);
9772: return;
9773: }
9774: while (IS_CHAR(cur) &&
9775: ((r != ']') || (s != ']') || (cur != '>'))) {
9776: if (len + 5 >= size) {
9777: xmlChar *tmp;
9778:
1.1.1.3 ! misho 9779: if ((size > XML_MAX_TEXT_LENGTH) &&
! 9780: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 9781: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
! 9782: "CData section too big found", NULL);
! 9783: xmlFree (buf);
! 9784: return;
! 9785: }
! 9786: tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
1.1 misho 9787: if (tmp == NULL) {
9788: xmlFree(buf);
9789: xmlErrMemory(ctxt, NULL);
9790: return;
9791: }
9792: buf = tmp;
1.1.1.3 ! misho 9793: size *= 2;
1.1 misho 9794: }
9795: COPY_BUF(rl,buf,len,r);
9796: r = s;
9797: rl = sl;
9798: s = cur;
9799: sl = l;
9800: count++;
9801: if (count > 50) {
9802: GROW;
1.1.1.3 ! misho 9803: if (ctxt->instate == XML_PARSER_EOF) {
! 9804: xmlFree(buf);
! 9805: return;
! 9806: }
1.1 misho 9807: count = 0;
9808: }
9809: NEXTL(l);
9810: cur = CUR_CHAR(l);
9811: }
9812: buf[len] = 0;
9813: ctxt->instate = XML_PARSER_CONTENT;
9814: if (cur != '>') {
9815: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9816: "CData section not finished\n%.50s\n", buf);
9817: xmlFree(buf);
9818: return;
9819: }
9820: NEXTL(l);
9821:
9822: /*
9823: * OK the buffer is to be consumed as cdata.
9824: */
9825: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9826: if (ctxt->sax->cdataBlock != NULL)
9827: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9828: else if (ctxt->sax->characters != NULL)
9829: ctxt->sax->characters(ctxt->userData, buf, len);
9830: }
9831: xmlFree(buf);
9832: }
9833:
9834: /**
9835: * xmlParseContent:
9836: * @ctxt: an XML parser context
9837: *
9838: * Parse a content:
9839: *
9840: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9841: */
9842:
9843: void
9844: xmlParseContent(xmlParserCtxtPtr ctxt) {
9845: GROW;
9846: while ((RAW != 0) &&
9847: ((RAW != '<') || (NXT(1) != '/')) &&
9848: (ctxt->instate != XML_PARSER_EOF)) {
9849: const xmlChar *test = CUR_PTR;
9850: unsigned int cons = ctxt->input->consumed;
9851: const xmlChar *cur = ctxt->input->cur;
9852:
9853: /*
9854: * First case : a Processing Instruction.
9855: */
9856: if ((*cur == '<') && (cur[1] == '?')) {
9857: xmlParsePI(ctxt);
9858: }
9859:
9860: /*
9861: * Second case : a CDSection
9862: */
9863: /* 2.6.0 test was *cur not RAW */
9864: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9865: xmlParseCDSect(ctxt);
9866: }
9867:
9868: /*
9869: * Third case : a comment
9870: */
9871: else if ((*cur == '<') && (NXT(1) == '!') &&
9872: (NXT(2) == '-') && (NXT(3) == '-')) {
9873: xmlParseComment(ctxt);
9874: ctxt->instate = XML_PARSER_CONTENT;
9875: }
9876:
9877: /*
9878: * Fourth case : a sub-element.
9879: */
9880: else if (*cur == '<') {
9881: xmlParseElement(ctxt);
9882: }
9883:
9884: /*
9885: * Fifth case : a reference. If if has not been resolved,
1.1.1.3 ! misho 9886: * parsing returns it's Name, create the node
1.1 misho 9887: */
9888:
9889: else if (*cur == '&') {
9890: xmlParseReference(ctxt);
9891: }
9892:
9893: /*
9894: * Last case, text. Note that References are handled directly.
9895: */
9896: else {
9897: xmlParseCharData(ctxt, 0);
9898: }
9899:
9900: GROW;
9901: /*
9902: * Pop-up of finished entities.
9903: */
9904: while ((RAW == 0) && (ctxt->inputNr > 1))
9905: xmlPopInput(ctxt);
9906: SHRINK;
9907:
9908: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9909: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9910: "detected an error in element content\n");
9911: ctxt->instate = XML_PARSER_EOF;
9912: break;
9913: }
9914: }
9915: }
9916:
9917: /**
9918: * xmlParseElement:
9919: * @ctxt: an XML parser context
9920: *
9921: * parse an XML element, this is highly recursive
9922: *
9923: * [39] element ::= EmptyElemTag | STag content ETag
9924: *
9925: * [ WFC: Element Type Match ]
9926: * The Name in an element's end-tag must match the element type in the
1.1.1.3 ! misho 9927: * start-tag.
1.1 misho 9928: *
9929: */
9930:
9931: void
9932: xmlParseElement(xmlParserCtxtPtr ctxt) {
9933: const xmlChar *name;
9934: const xmlChar *prefix = NULL;
9935: const xmlChar *URI = NULL;
9936: xmlParserNodeInfo node_info;
1.1.1.2 misho 9937: int line, tlen = 0;
1.1 misho 9938: xmlNodePtr ret;
9939: int nsNr = ctxt->nsNr;
9940:
9941: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9942: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9943: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9944: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9945: xmlParserMaxDepth);
9946: ctxt->instate = XML_PARSER_EOF;
9947: return;
9948: }
9949:
9950: /* Capture start position */
9951: if (ctxt->record_info) {
9952: node_info.begin_pos = ctxt->input->consumed +
9953: (CUR_PTR - ctxt->input->base);
9954: node_info.begin_line = ctxt->input->line;
9955: }
9956:
9957: if (ctxt->spaceNr == 0)
9958: spacePush(ctxt, -1);
9959: else if (*ctxt->space == -2)
9960: spacePush(ctxt, -1);
9961: else
9962: spacePush(ctxt, *ctxt->space);
9963:
9964: line = ctxt->input->line;
9965: #ifdef LIBXML_SAX1_ENABLED
9966: if (ctxt->sax2)
9967: #endif /* LIBXML_SAX1_ENABLED */
9968: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9969: #ifdef LIBXML_SAX1_ENABLED
9970: else
9971: name = xmlParseStartTag(ctxt);
9972: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 9973: if (ctxt->instate == XML_PARSER_EOF)
9974: return;
1.1 misho 9975: if (name == NULL) {
9976: spacePop(ctxt);
9977: return;
9978: }
9979: namePush(ctxt, name);
9980: ret = ctxt->node;
9981:
9982: #ifdef LIBXML_VALID_ENABLED
9983: /*
9984: * [ VC: Root Element Type ]
9985: * The Name in the document type declaration must match the element
1.1.1.3 ! misho 9986: * type of the root element.
1.1 misho 9987: */
9988: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9989: ctxt->node && (ctxt->node == ctxt->myDoc->children))
9990: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9991: #endif /* LIBXML_VALID_ENABLED */
9992:
9993: /*
9994: * Check for an Empty Element.
9995: */
9996: if ((RAW == '/') && (NXT(1) == '>')) {
9997: SKIP(2);
9998: if (ctxt->sax2) {
9999: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10000: (!ctxt->disableSAX))
10001: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10002: #ifdef LIBXML_SAX1_ENABLED
10003: } else {
10004: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10005: (!ctxt->disableSAX))
10006: ctxt->sax->endElement(ctxt->userData, name);
10007: #endif /* LIBXML_SAX1_ENABLED */
10008: }
10009: namePop(ctxt);
10010: spacePop(ctxt);
10011: if (nsNr != ctxt->nsNr)
10012: nsPop(ctxt, ctxt->nsNr - nsNr);
10013: if ( ret != NULL && ctxt->record_info ) {
10014: node_info.end_pos = ctxt->input->consumed +
10015: (CUR_PTR - ctxt->input->base);
10016: node_info.end_line = ctxt->input->line;
10017: node_info.node = ret;
10018: xmlParserAddNodeInfo(ctxt, &node_info);
10019: }
10020: return;
10021: }
10022: if (RAW == '>') {
10023: NEXT1;
10024: } else {
10025: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10026: "Couldn't find end of Start Tag %s line %d\n",
10027: name, line, NULL);
10028:
10029: /*
10030: * end of parsing of this node.
10031: */
10032: nodePop(ctxt);
10033: namePop(ctxt);
10034: spacePop(ctxt);
10035: if (nsNr != ctxt->nsNr)
10036: nsPop(ctxt, ctxt->nsNr - nsNr);
10037:
10038: /*
10039: * Capture end position and add node
10040: */
10041: if ( ret != NULL && ctxt->record_info ) {
10042: node_info.end_pos = ctxt->input->consumed +
10043: (CUR_PTR - ctxt->input->base);
10044: node_info.end_line = ctxt->input->line;
10045: node_info.node = ret;
10046: xmlParserAddNodeInfo(ctxt, &node_info);
10047: }
10048: return;
10049: }
10050:
10051: /*
10052: * Parse the content of the element:
10053: */
10054: xmlParseContent(ctxt);
1.1.1.3 ! misho 10055: if (ctxt->instate == XML_PARSER_EOF)
! 10056: return;
1.1 misho 10057: if (!IS_BYTE_CHAR(RAW)) {
10058: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10059: "Premature end of data in tag %s line %d\n",
10060: name, line, NULL);
10061:
10062: /*
10063: * end of parsing of this node.
10064: */
10065: nodePop(ctxt);
10066: namePop(ctxt);
10067: spacePop(ctxt);
10068: if (nsNr != ctxt->nsNr)
10069: nsPop(ctxt, ctxt->nsNr - nsNr);
10070: return;
10071: }
10072:
10073: /*
10074: * parse the end of tag: '</' should be here.
10075: */
10076: if (ctxt->sax2) {
10077: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10078: namePop(ctxt);
10079: }
10080: #ifdef LIBXML_SAX1_ENABLED
10081: else
10082: xmlParseEndTag1(ctxt, line);
10083: #endif /* LIBXML_SAX1_ENABLED */
10084:
10085: /*
10086: * Capture end position and add node
10087: */
10088: if ( ret != NULL && ctxt->record_info ) {
10089: node_info.end_pos = ctxt->input->consumed +
10090: (CUR_PTR - ctxt->input->base);
10091: node_info.end_line = ctxt->input->line;
10092: node_info.node = ret;
10093: xmlParserAddNodeInfo(ctxt, &node_info);
10094: }
10095: }
10096:
10097: /**
10098: * xmlParseVersionNum:
10099: * @ctxt: an XML parser context
10100: *
10101: * parse the XML version value.
10102: *
10103: * [26] VersionNum ::= '1.' [0-9]+
10104: *
10105: * In practice allow [0-9].[0-9]+ at that level
10106: *
10107: * Returns the string giving the XML version number, or NULL
10108: */
10109: xmlChar *
10110: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10111: xmlChar *buf = NULL;
10112: int len = 0;
10113: int size = 10;
10114: xmlChar cur;
10115:
10116: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10117: if (buf == NULL) {
10118: xmlErrMemory(ctxt, NULL);
10119: return(NULL);
10120: }
10121: cur = CUR;
10122: if (!((cur >= '0') && (cur <= '9'))) {
10123: xmlFree(buf);
10124: return(NULL);
10125: }
10126: buf[len++] = cur;
10127: NEXT;
10128: cur=CUR;
10129: if (cur != '.') {
10130: xmlFree(buf);
10131: return(NULL);
10132: }
10133: buf[len++] = cur;
10134: NEXT;
10135: cur=CUR;
10136: while ((cur >= '0') && (cur <= '9')) {
10137: if (len + 1 >= size) {
10138: xmlChar *tmp;
10139:
10140: size *= 2;
10141: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10142: if (tmp == NULL) {
10143: xmlFree(buf);
10144: xmlErrMemory(ctxt, NULL);
10145: return(NULL);
10146: }
10147: buf = tmp;
10148: }
10149: buf[len++] = cur;
10150: NEXT;
10151: cur=CUR;
10152: }
10153: buf[len] = 0;
10154: return(buf);
10155: }
10156:
10157: /**
10158: * xmlParseVersionInfo:
10159: * @ctxt: an XML parser context
10160: *
10161: * parse the XML version.
10162: *
10163: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10164: *
10165: * [25] Eq ::= S? '=' S?
10166: *
10167: * Returns the version string, e.g. "1.0"
10168: */
10169:
10170: xmlChar *
10171: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10172: xmlChar *version = NULL;
10173:
10174: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10175: SKIP(7);
10176: SKIP_BLANKS;
10177: if (RAW != '=') {
10178: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10179: return(NULL);
10180: }
10181: NEXT;
10182: SKIP_BLANKS;
10183: if (RAW == '"') {
10184: NEXT;
10185: version = xmlParseVersionNum(ctxt);
10186: if (RAW != '"') {
10187: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10188: } else
10189: NEXT;
10190: } else if (RAW == '\''){
10191: NEXT;
10192: version = xmlParseVersionNum(ctxt);
10193: if (RAW != '\'') {
10194: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10195: } else
10196: NEXT;
10197: } else {
10198: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10199: }
10200: }
10201: return(version);
10202: }
10203:
10204: /**
10205: * xmlParseEncName:
10206: * @ctxt: an XML parser context
10207: *
10208: * parse the XML encoding name
10209: *
10210: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10211: *
10212: * Returns the encoding name value or NULL
10213: */
10214: xmlChar *
10215: xmlParseEncName(xmlParserCtxtPtr ctxt) {
10216: xmlChar *buf = NULL;
10217: int len = 0;
10218: int size = 10;
10219: xmlChar cur;
10220:
10221: cur = CUR;
10222: if (((cur >= 'a') && (cur <= 'z')) ||
10223: ((cur >= 'A') && (cur <= 'Z'))) {
10224: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10225: if (buf == NULL) {
10226: xmlErrMemory(ctxt, NULL);
10227: return(NULL);
10228: }
10229:
10230: buf[len++] = cur;
10231: NEXT;
10232: cur = CUR;
10233: while (((cur >= 'a') && (cur <= 'z')) ||
10234: ((cur >= 'A') && (cur <= 'Z')) ||
10235: ((cur >= '0') && (cur <= '9')) ||
10236: (cur == '.') || (cur == '_') ||
10237: (cur == '-')) {
10238: if (len + 1 >= size) {
10239: xmlChar *tmp;
10240:
10241: size *= 2;
10242: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10243: if (tmp == NULL) {
10244: xmlErrMemory(ctxt, NULL);
10245: xmlFree(buf);
10246: return(NULL);
10247: }
10248: buf = tmp;
10249: }
10250: buf[len++] = cur;
10251: NEXT;
10252: cur = CUR;
10253: if (cur == 0) {
10254: SHRINK;
10255: GROW;
10256: cur = CUR;
10257: }
10258: }
10259: buf[len] = 0;
10260: } else {
10261: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10262: }
10263: return(buf);
10264: }
10265:
10266: /**
10267: * xmlParseEncodingDecl:
10268: * @ctxt: an XML parser context
1.1.1.3 ! misho 10269: *
1.1 misho 10270: * parse the XML encoding declaration
10271: *
10272: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10273: *
10274: * this setups the conversion filters.
10275: *
10276: * Returns the encoding value or NULL
10277: */
10278:
10279: const xmlChar *
10280: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10281: xmlChar *encoding = NULL;
10282:
10283: SKIP_BLANKS;
10284: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10285: SKIP(8);
10286: SKIP_BLANKS;
10287: if (RAW != '=') {
10288: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10289: return(NULL);
10290: }
10291: NEXT;
10292: SKIP_BLANKS;
10293: if (RAW == '"') {
10294: NEXT;
10295: encoding = xmlParseEncName(ctxt);
10296: if (RAW != '"') {
10297: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10298: } else
10299: NEXT;
10300: } else if (RAW == '\''){
10301: NEXT;
10302: encoding = xmlParseEncName(ctxt);
10303: if (RAW != '\'') {
10304: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10305: } else
10306: NEXT;
10307: } else {
10308: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10309: }
1.1.1.2 misho 10310:
10311: /*
10312: * Non standard parsing, allowing the user to ignore encoding
10313: */
10314: if (ctxt->options & XML_PARSE_IGNORE_ENC)
10315: return(encoding);
10316:
1.1 misho 10317: /*
10318: * UTF-16 encoding stwich has already taken place at this stage,
10319: * more over the little-endian/big-endian selection is already done
10320: */
10321: if ((encoding != NULL) &&
10322: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10323: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10324: /*
10325: * If no encoding was passed to the parser, that we are
1.1.1.3 ! misho 10326: * using UTF-16 and no decoder is present i.e. the
1.1 misho 10327: * document is apparently UTF-8 compatible, then raise an
10328: * encoding mismatch fatal error
10329: */
10330: if ((ctxt->encoding == NULL) &&
10331: (ctxt->input->buf != NULL) &&
10332: (ctxt->input->buf->encoder == NULL)) {
10333: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10334: "Document labelled UTF-16 but has UTF-8 content\n");
10335: }
10336: if (ctxt->encoding != NULL)
10337: xmlFree((xmlChar *) ctxt->encoding);
10338: ctxt->encoding = encoding;
10339: }
10340: /*
10341: * UTF-8 encoding is handled natively
10342: */
10343: else if ((encoding != NULL) &&
10344: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10345: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10346: if (ctxt->encoding != NULL)
10347: xmlFree((xmlChar *) ctxt->encoding);
10348: ctxt->encoding = encoding;
10349: }
10350: else if (encoding != NULL) {
10351: xmlCharEncodingHandlerPtr handler;
10352:
10353: if (ctxt->input->encoding != NULL)
10354: xmlFree((xmlChar *) ctxt->input->encoding);
10355: ctxt->input->encoding = encoding;
10356:
10357: handler = xmlFindCharEncodingHandler((const char *) encoding);
10358: if (handler != NULL) {
10359: xmlSwitchToEncoding(ctxt, handler);
10360: } else {
10361: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10362: "Unsupported encoding %s\n", encoding);
10363: return(NULL);
10364: }
10365: }
10366: }
10367: return(encoding);
10368: }
10369:
10370: /**
10371: * xmlParseSDDecl:
10372: * @ctxt: an XML parser context
10373: *
10374: * parse the XML standalone declaration
10375: *
10376: * [32] SDDecl ::= S 'standalone' Eq
1.1.1.3 ! misho 10377: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.1 misho 10378: *
10379: * [ VC: Standalone Document Declaration ]
10380: * TODO The standalone document declaration must have the value "no"
10381: * if any external markup declarations contain declarations of:
10382: * - attributes with default values, if elements to which these
10383: * attributes apply appear in the document without specifications
10384: * of values for these attributes, or
10385: * - entities (other than amp, lt, gt, apos, quot), if references
10386: * to those entities appear in the document, or
10387: * - attributes with values subject to normalization, where the
10388: * attribute appears in the document with a value which will change
10389: * as a result of normalization, or
10390: * - element types with element content, if white space occurs directly
10391: * within any instance of those types.
10392: *
10393: * Returns:
10394: * 1 if standalone="yes"
10395: * 0 if standalone="no"
10396: * -2 if standalone attribute is missing or invalid
10397: * (A standalone value of -2 means that the XML declaration was found,
10398: * but no value was specified for the standalone attribute).
10399: */
10400:
10401: int
10402: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10403: int standalone = -2;
10404:
10405: SKIP_BLANKS;
10406: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10407: SKIP(10);
10408: SKIP_BLANKS;
10409: if (RAW != '=') {
10410: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10411: return(standalone);
10412: }
10413: NEXT;
10414: SKIP_BLANKS;
10415: if (RAW == '\''){
10416: NEXT;
10417: if ((RAW == 'n') && (NXT(1) == 'o')) {
10418: standalone = 0;
10419: SKIP(2);
10420: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10421: (NXT(2) == 's')) {
10422: standalone = 1;
10423: SKIP(3);
10424: } else {
10425: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10426: }
10427: if (RAW != '\'') {
10428: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10429: } else
10430: NEXT;
10431: } else if (RAW == '"'){
10432: NEXT;
10433: if ((RAW == 'n') && (NXT(1) == 'o')) {
10434: standalone = 0;
10435: SKIP(2);
10436: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10437: (NXT(2) == 's')) {
10438: standalone = 1;
10439: SKIP(3);
10440: } else {
10441: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10442: }
10443: if (RAW != '"') {
10444: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10445: } else
10446: NEXT;
10447: } else {
10448: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10449: }
10450: }
10451: return(standalone);
10452: }
10453:
10454: /**
10455: * xmlParseXMLDecl:
10456: * @ctxt: an XML parser context
1.1.1.3 ! misho 10457: *
1.1 misho 10458: * parse an XML declaration header
10459: *
10460: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10461: */
10462:
10463: void
10464: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10465: xmlChar *version;
10466:
10467: /*
10468: * This value for standalone indicates that the document has an
10469: * XML declaration but it does not have a standalone attribute.
10470: * It will be overwritten later if a standalone attribute is found.
10471: */
10472: ctxt->input->standalone = -2;
10473:
10474: /*
10475: * We know that '<?xml' is here.
10476: */
10477: SKIP(5);
10478:
10479: if (!IS_BLANK_CH(RAW)) {
10480: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10481: "Blank needed after '<?xml'\n");
10482: }
10483: SKIP_BLANKS;
10484:
10485: /*
10486: * We must have the VersionInfo here.
10487: */
10488: version = xmlParseVersionInfo(ctxt);
10489: if (version == NULL) {
10490: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10491: } else {
10492: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10493: /*
10494: * Changed here for XML-1.0 5th edition
10495: */
10496: if (ctxt->options & XML_PARSE_OLD10) {
10497: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10498: "Unsupported version '%s'\n",
10499: version);
10500: } else {
10501: if ((version[0] == '1') && ((version[1] == '.'))) {
10502: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10503: "Unsupported version '%s'\n",
10504: version, NULL);
10505: } else {
10506: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10507: "Unsupported version '%s'\n",
10508: version);
10509: }
10510: }
10511: }
10512: if (ctxt->version != NULL)
10513: xmlFree((void *) ctxt->version);
10514: ctxt->version = version;
10515: }
10516:
10517: /*
10518: * We may have the encoding declaration
10519: */
10520: if (!IS_BLANK_CH(RAW)) {
10521: if ((RAW == '?') && (NXT(1) == '>')) {
10522: SKIP(2);
10523: return;
10524: }
10525: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10526: }
10527: xmlParseEncodingDecl(ctxt);
10528: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10529: /*
10530: * The XML REC instructs us to stop parsing right here
10531: */
10532: return;
10533: }
10534:
10535: /*
10536: * We may have the standalone status.
10537: */
10538: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10539: if ((RAW == '?') && (NXT(1) == '>')) {
10540: SKIP(2);
10541: return;
10542: }
10543: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10544: }
10545:
10546: /*
10547: * We can grow the input buffer freely at that point
10548: */
10549: GROW;
10550:
10551: SKIP_BLANKS;
10552: ctxt->input->standalone = xmlParseSDDecl(ctxt);
10553:
10554: SKIP_BLANKS;
10555: if ((RAW == '?') && (NXT(1) == '>')) {
10556: SKIP(2);
10557: } else if (RAW == '>') {
10558: /* Deprecated old WD ... */
10559: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10560: NEXT;
10561: } else {
10562: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10563: MOVETO_ENDTAG(CUR_PTR);
10564: NEXT;
10565: }
10566: }
10567:
10568: /**
10569: * xmlParseMisc:
10570: * @ctxt: an XML parser context
1.1.1.3 ! misho 10571: *
1.1 misho 10572: * parse an XML Misc* optional field.
10573: *
10574: * [27] Misc ::= Comment | PI | S
10575: */
10576:
10577: void
10578: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.1.1.3 ! misho 10579: while ((ctxt->instate != XML_PARSER_EOF) &&
! 10580: (((RAW == '<') && (NXT(1) == '?')) ||
! 10581: (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
! 10582: IS_BLANK_CH(CUR))) {
1.1 misho 10583: if ((RAW == '<') && (NXT(1) == '?')) {
10584: xmlParsePI(ctxt);
10585: } else if (IS_BLANK_CH(CUR)) {
10586: NEXT;
10587: } else
10588: xmlParseComment(ctxt);
10589: }
10590: }
10591:
10592: /**
10593: * xmlParseDocument:
10594: * @ctxt: an XML parser context
1.1.1.3 ! misho 10595: *
1.1 misho 10596: * parse an XML document (and build a tree if using the standard SAX
10597: * interface).
10598: *
10599: * [1] document ::= prolog element Misc*
10600: *
10601: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10602: *
10603: * Returns 0, -1 in case of error. the parser context is augmented
10604: * as a result of the parsing.
10605: */
10606:
10607: int
10608: xmlParseDocument(xmlParserCtxtPtr ctxt) {
10609: xmlChar start[4];
10610: xmlCharEncoding enc;
10611:
10612: xmlInitParser();
10613:
10614: if ((ctxt == NULL) || (ctxt->input == NULL))
10615: return(-1);
10616:
10617: GROW;
10618:
10619: /*
10620: * SAX: detecting the level.
10621: */
10622: xmlDetectSAX2(ctxt);
10623:
10624: /*
10625: * SAX: beginning of the document processing.
10626: */
10627: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.1.1.3 ! misho 10629: if (ctxt->instate == XML_PARSER_EOF)
! 10630: return(-1);
1.1 misho 10631:
10632: if ((ctxt->encoding == NULL) &&
10633: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
1.1.1.3 ! misho 10634: /*
1.1 misho 10635: * Get the 4 first bytes and decode the charset
10636: * if enc != XML_CHAR_ENCODING_NONE
10637: * plug some encoding conversion routines.
10638: */
10639: start[0] = RAW;
10640: start[1] = NXT(1);
10641: start[2] = NXT(2);
10642: start[3] = NXT(3);
10643: enc = xmlDetectCharEncoding(&start[0], 4);
10644: if (enc != XML_CHAR_ENCODING_NONE) {
10645: xmlSwitchEncoding(ctxt, enc);
10646: }
10647: }
10648:
10649:
10650: if (CUR == 0) {
10651: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10652: }
10653:
10654: /*
10655: * Check for the XMLDecl in the Prolog.
10656: * do not GROW here to avoid the detected encoder to decode more
10657: * than just the first line, unless the amount of data is really
10658: * too small to hold "<?xml version="1.0" encoding="foo"
10659: */
10660: if ((ctxt->input->end - ctxt->input->cur) < 35) {
10661: GROW;
10662: }
10663: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10664:
10665: /*
10666: * Note that we will switch encoding on the fly.
10667: */
10668: xmlParseXMLDecl(ctxt);
10669: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10670: /*
10671: * The XML REC instructs us to stop parsing right here
10672: */
10673: return(-1);
10674: }
10675: ctxt->standalone = ctxt->input->standalone;
10676: SKIP_BLANKS;
10677: } else {
10678: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10679: }
10680: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10681: ctxt->sax->startDocument(ctxt->userData);
1.1.1.3 ! misho 10682: if (ctxt->instate == XML_PARSER_EOF)
! 10683: return(-1);
1.1 misho 10684:
10685: /*
10686: * The Misc part of the Prolog
10687: */
10688: GROW;
10689: xmlParseMisc(ctxt);
10690:
10691: /*
10692: * Then possibly doc type declaration(s) and more Misc
10693: * (doctypedecl Misc*)?
10694: */
10695: GROW;
10696: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10697:
10698: ctxt->inSubset = 1;
10699: xmlParseDocTypeDecl(ctxt);
10700: if (RAW == '[') {
10701: ctxt->instate = XML_PARSER_DTD;
10702: xmlParseInternalSubset(ctxt);
1.1.1.3 ! misho 10703: if (ctxt->instate == XML_PARSER_EOF)
! 10704: return(-1);
1.1 misho 10705: }
10706:
10707: /*
10708: * Create and update the external subset.
10709: */
10710: ctxt->inSubset = 2;
10711: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10712: (!ctxt->disableSAX))
10713: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10714: ctxt->extSubSystem, ctxt->extSubURI);
1.1.1.3 ! misho 10715: if (ctxt->instate == XML_PARSER_EOF)
! 10716: return(-1);
1.1 misho 10717: ctxt->inSubset = 0;
10718:
10719: xmlCleanSpecialAttr(ctxt);
10720:
10721: ctxt->instate = XML_PARSER_PROLOG;
10722: xmlParseMisc(ctxt);
10723: }
10724:
10725: /*
10726: * Time to start parsing the tree itself
10727: */
10728: GROW;
10729: if (RAW != '<') {
10730: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10731: "Start tag expected, '<' not found\n");
10732: } else {
10733: ctxt->instate = XML_PARSER_CONTENT;
10734: xmlParseElement(ctxt);
10735: ctxt->instate = XML_PARSER_EPILOG;
10736:
10737:
10738: /*
10739: * The Misc part at the end
10740: */
10741: xmlParseMisc(ctxt);
10742:
10743: if (RAW != 0) {
10744: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10745: }
10746: ctxt->instate = XML_PARSER_EOF;
10747: }
10748:
10749: /*
10750: * SAX: end of the document processing.
10751: */
10752: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10753: ctxt->sax->endDocument(ctxt->userData);
10754:
10755: /*
10756: * Remove locally kept entity definitions if the tree was not built
10757: */
10758: if ((ctxt->myDoc != NULL) &&
10759: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10760: xmlFreeDoc(ctxt->myDoc);
10761: ctxt->myDoc = NULL;
10762: }
10763:
10764: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10765: ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10766: if (ctxt->valid)
10767: ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10768: if (ctxt->nsWellFormed)
10769: ctxt->myDoc->properties |= XML_DOC_NSVALID;
10770: if (ctxt->options & XML_PARSE_OLD10)
10771: ctxt->myDoc->properties |= XML_DOC_OLD10;
10772: }
10773: if (! ctxt->wellFormed) {
10774: ctxt->valid = 0;
10775: return(-1);
10776: }
10777: return(0);
10778: }
10779:
10780: /**
10781: * xmlParseExtParsedEnt:
10782: * @ctxt: an XML parser context
1.1.1.3 ! misho 10783: *
1.1 misho 10784: * parse a general parsed entity
10785: * An external general parsed entity is well-formed if it matches the
10786: * production labeled extParsedEnt.
10787: *
10788: * [78] extParsedEnt ::= TextDecl? content
10789: *
10790: * Returns 0, -1 in case of error. the parser context is augmented
10791: * as a result of the parsing.
10792: */
10793:
10794: int
10795: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10796: xmlChar start[4];
10797: xmlCharEncoding enc;
10798:
10799: if ((ctxt == NULL) || (ctxt->input == NULL))
10800: return(-1);
10801:
10802: xmlDefaultSAXHandlerInit();
10803:
10804: xmlDetectSAX2(ctxt);
10805:
10806: GROW;
10807:
10808: /*
10809: * SAX: beginning of the document processing.
10810: */
10811: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813:
1.1.1.3 ! misho 10814: /*
1.1 misho 10815: * Get the 4 first bytes and decode the charset
10816: * if enc != XML_CHAR_ENCODING_NONE
10817: * plug some encoding conversion routines.
10818: */
10819: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10820: start[0] = RAW;
10821: start[1] = NXT(1);
10822: start[2] = NXT(2);
10823: start[3] = NXT(3);
10824: enc = xmlDetectCharEncoding(start, 4);
10825: if (enc != XML_CHAR_ENCODING_NONE) {
10826: xmlSwitchEncoding(ctxt, enc);
10827: }
10828: }
10829:
10830:
10831: if (CUR == 0) {
10832: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10833: }
10834:
10835: /*
10836: * Check for the XMLDecl in the Prolog.
10837: */
10838: GROW;
10839: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10840:
10841: /*
10842: * Note that we will switch encoding on the fly.
10843: */
10844: xmlParseXMLDecl(ctxt);
10845: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10846: /*
10847: * The XML REC instructs us to stop parsing right here
10848: */
10849: return(-1);
10850: }
10851: SKIP_BLANKS;
10852: } else {
10853: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10854: }
10855: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10856: ctxt->sax->startDocument(ctxt->userData);
1.1.1.3 ! misho 10857: if (ctxt->instate == XML_PARSER_EOF)
! 10858: return(-1);
1.1 misho 10859:
10860: /*
10861: * Doing validity checking on chunk doesn't make sense
10862: */
10863: ctxt->instate = XML_PARSER_CONTENT;
10864: ctxt->validate = 0;
10865: ctxt->loadsubset = 0;
10866: ctxt->depth = 0;
10867:
10868: xmlParseContent(ctxt);
1.1.1.3 ! misho 10869: if (ctxt->instate == XML_PARSER_EOF)
! 10870: return(-1);
! 10871:
1.1 misho 10872: if ((RAW == '<') && (NXT(1) == '/')) {
10873: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10874: } else if (RAW != 0) {
10875: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10876: }
10877:
10878: /*
10879: * SAX: end of the document processing.
10880: */
10881: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10882: ctxt->sax->endDocument(ctxt->userData);
10883:
10884: if (! ctxt->wellFormed) return(-1);
10885: return(0);
10886: }
10887:
10888: #ifdef LIBXML_PUSH_ENABLED
10889: /************************************************************************
10890: * *
1.1.1.3 ! misho 10891: * Progressive parsing interfaces *
1.1 misho 10892: * *
10893: ************************************************************************/
10894:
10895: /**
10896: * xmlParseLookupSequence:
10897: * @ctxt: an XML parser context
10898: * @first: the first char to lookup
10899: * @next: the next char to lookup or zero
10900: * @third: the next char to lookup or zero
10901: *
10902: * Try to find if a sequence (first, next, third) or just (first next) or
10903: * (first) is available in the input stream.
10904: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10905: * to avoid rescanning sequences of bytes, it DOES change the state of the
10906: * parser, do not use liberally.
10907: *
10908: * Returns the index to the current parsing point if the full sequence
10909: * is available, -1 otherwise.
10910: */
10911: static int
10912: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10913: xmlChar next, xmlChar third) {
10914: int base, len;
10915: xmlParserInputPtr in;
10916: const xmlChar *buf;
10917:
10918: in = ctxt->input;
10919: if (in == NULL) return(-1);
10920: base = in->cur - in->base;
10921: if (base < 0) return(-1);
10922: if (ctxt->checkIndex > base)
10923: base = ctxt->checkIndex;
10924: if (in->buf == NULL) {
10925: buf = in->base;
10926: len = in->length;
10927: } else {
1.1.1.3 ! misho 10928: buf = xmlBufContent(in->buf->buffer);
! 10929: len = xmlBufUse(in->buf->buffer);
1.1 misho 10930: }
10931: /* take into account the sequence length */
10932: if (third) len -= 2;
10933: else if (next) len --;
10934: for (;base < len;base++) {
10935: if (buf[base] == first) {
10936: if (third != 0) {
10937: if ((buf[base + 1] != next) ||
10938: (buf[base + 2] != third)) continue;
10939: } else if (next != 0) {
10940: if (buf[base + 1] != next) continue;
10941: }
10942: ctxt->checkIndex = 0;
10943: #ifdef DEBUG_PUSH
10944: if (next == 0)
10945: xmlGenericError(xmlGenericErrorContext,
10946: "PP: lookup '%c' found at %d\n",
10947: first, base);
10948: else if (third == 0)
10949: xmlGenericError(xmlGenericErrorContext,
10950: "PP: lookup '%c%c' found at %d\n",
10951: first, next, base);
1.1.1.3 ! misho 10952: else
1.1 misho 10953: xmlGenericError(xmlGenericErrorContext,
10954: "PP: lookup '%c%c%c' found at %d\n",
10955: first, next, third, base);
10956: #endif
10957: return(base - (in->cur - in->base));
10958: }
10959: }
10960: ctxt->checkIndex = base;
10961: #ifdef DEBUG_PUSH
10962: if (next == 0)
10963: xmlGenericError(xmlGenericErrorContext,
10964: "PP: lookup '%c' failed\n", first);
10965: else if (third == 0)
10966: xmlGenericError(xmlGenericErrorContext,
10967: "PP: lookup '%c%c' failed\n", first, next);
1.1.1.3 ! misho 10968: else
1.1 misho 10969: xmlGenericError(xmlGenericErrorContext,
10970: "PP: lookup '%c%c%c' failed\n", first, next, third);
10971: #endif
10972: return(-1);
10973: }
10974:
10975: /**
10976: * xmlParseGetLasts:
10977: * @ctxt: an XML parser context
10978: * @lastlt: pointer to store the last '<' from the input
10979: * @lastgt: pointer to store the last '>' from the input
10980: *
10981: * Lookup the last < and > in the current chunk
10982: */
10983: static void
10984: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10985: const xmlChar **lastgt) {
10986: const xmlChar *tmp;
10987:
10988: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10989: xmlGenericError(xmlGenericErrorContext,
10990: "Internal error: xmlParseGetLasts\n");
10991: return;
10992: }
10993: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10994: tmp = ctxt->input->end;
10995: tmp--;
10996: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10997: if (tmp < ctxt->input->base) {
10998: *lastlt = NULL;
10999: *lastgt = NULL;
11000: } else {
11001: *lastlt = tmp;
11002: tmp++;
11003: while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11004: if (*tmp == '\'') {
11005: tmp++;
11006: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11007: if (tmp < ctxt->input->end) tmp++;
11008: } else if (*tmp == '"') {
11009: tmp++;
11010: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11011: if (tmp < ctxt->input->end) tmp++;
11012: } else
11013: tmp++;
11014: }
11015: if (tmp < ctxt->input->end)
11016: *lastgt = tmp;
11017: else {
11018: tmp = *lastlt;
11019: tmp--;
11020: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11021: if (tmp >= ctxt->input->base)
11022: *lastgt = tmp;
11023: else
11024: *lastgt = NULL;
11025: }
11026: }
11027: } else {
11028: *lastlt = NULL;
11029: *lastgt = NULL;
11030: }
11031: }
11032: /**
11033: * xmlCheckCdataPush:
11034: * @cur: pointer to the bock of characters
11035: * @len: length of the block in bytes
11036: *
11037: * Check that the block of characters is okay as SCdata content [20]
11038: *
11039: * Returns the number of bytes to pass if okay, a negative index where an
11040: * UTF-8 error occured otherwise
11041: */
11042: static int
11043: xmlCheckCdataPush(const xmlChar *utf, int len) {
11044: int ix;
11045: unsigned char c;
11046: int codepoint;
11047:
11048: if ((utf == NULL) || (len <= 0))
11049: return(0);
1.1.1.3 ! misho 11050:
1.1 misho 11051: for (ix = 0; ix < len;) { /* string is 0-terminated */
11052: c = utf[ix];
11053: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11054: if (c >= 0x20)
11055: ix++;
11056: else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11057: ix++;
11058: else
11059: return(-ix);
11060: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11061: if (ix + 2 > len) return(ix);
11062: if ((utf[ix+1] & 0xc0 ) != 0x80)
11063: return(-ix);
11064: codepoint = (utf[ix] & 0x1f) << 6;
11065: codepoint |= utf[ix+1] & 0x3f;
11066: if (!xmlIsCharQ(codepoint))
11067: return(-ix);
11068: ix += 2;
11069: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11070: if (ix + 3 > len) return(ix);
11071: if (((utf[ix+1] & 0xc0) != 0x80) ||
11072: ((utf[ix+2] & 0xc0) != 0x80))
11073: return(-ix);
11074: codepoint = (utf[ix] & 0xf) << 12;
11075: codepoint |= (utf[ix+1] & 0x3f) << 6;
11076: codepoint |= utf[ix+2] & 0x3f;
11077: if (!xmlIsCharQ(codepoint))
11078: return(-ix);
11079: ix += 3;
11080: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11081: if (ix + 4 > len) return(ix);
11082: if (((utf[ix+1] & 0xc0) != 0x80) ||
11083: ((utf[ix+2] & 0xc0) != 0x80) ||
11084: ((utf[ix+3] & 0xc0) != 0x80))
11085: return(-ix);
11086: codepoint = (utf[ix] & 0x7) << 18;
11087: codepoint |= (utf[ix+1] & 0x3f) << 12;
11088: codepoint |= (utf[ix+2] & 0x3f) << 6;
11089: codepoint |= utf[ix+3] & 0x3f;
11090: if (!xmlIsCharQ(codepoint))
11091: return(-ix);
11092: ix += 4;
11093: } else /* unknown encoding */
11094: return(-ix);
11095: }
11096: return(ix);
11097: }
11098:
11099: /**
11100: * xmlParseTryOrFinish:
11101: * @ctxt: an XML parser context
11102: * @terminate: last chunk indicator
11103: *
11104: * Try to progress on parsing
11105: *
11106: * Returns zero if no parsing was possible
11107: */
11108: static int
11109: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11110: int ret = 0;
11111: int avail, tlen;
11112: xmlChar cur, next;
11113: const xmlChar *lastlt, *lastgt;
11114:
11115: if (ctxt->input == NULL)
11116: return(0);
11117:
11118: #ifdef DEBUG_PUSH
11119: switch (ctxt->instate) {
11120: case XML_PARSER_EOF:
11121: xmlGenericError(xmlGenericErrorContext,
11122: "PP: try EOF\n"); break;
11123: case XML_PARSER_START:
11124: xmlGenericError(xmlGenericErrorContext,
11125: "PP: try START\n"); break;
11126: case XML_PARSER_MISC:
11127: xmlGenericError(xmlGenericErrorContext,
11128: "PP: try MISC\n");break;
11129: case XML_PARSER_COMMENT:
11130: xmlGenericError(xmlGenericErrorContext,
11131: "PP: try COMMENT\n");break;
11132: case XML_PARSER_PROLOG:
11133: xmlGenericError(xmlGenericErrorContext,
11134: "PP: try PROLOG\n");break;
11135: case XML_PARSER_START_TAG:
11136: xmlGenericError(xmlGenericErrorContext,
11137: "PP: try START_TAG\n");break;
11138: case XML_PARSER_CONTENT:
11139: xmlGenericError(xmlGenericErrorContext,
11140: "PP: try CONTENT\n");break;
11141: case XML_PARSER_CDATA_SECTION:
11142: xmlGenericError(xmlGenericErrorContext,
11143: "PP: try CDATA_SECTION\n");break;
11144: case XML_PARSER_END_TAG:
11145: xmlGenericError(xmlGenericErrorContext,
11146: "PP: try END_TAG\n");break;
11147: case XML_PARSER_ENTITY_DECL:
11148: xmlGenericError(xmlGenericErrorContext,
11149: "PP: try ENTITY_DECL\n");break;
11150: case XML_PARSER_ENTITY_VALUE:
11151: xmlGenericError(xmlGenericErrorContext,
11152: "PP: try ENTITY_VALUE\n");break;
11153: case XML_PARSER_ATTRIBUTE_VALUE:
11154: xmlGenericError(xmlGenericErrorContext,
11155: "PP: try ATTRIBUTE_VALUE\n");break;
11156: case XML_PARSER_DTD:
11157: xmlGenericError(xmlGenericErrorContext,
11158: "PP: try DTD\n");break;
11159: case XML_PARSER_EPILOG:
11160: xmlGenericError(xmlGenericErrorContext,
11161: "PP: try EPILOG\n");break;
11162: case XML_PARSER_PI:
11163: xmlGenericError(xmlGenericErrorContext,
11164: "PP: try PI\n");break;
11165: case XML_PARSER_IGNORE:
11166: xmlGenericError(xmlGenericErrorContext,
11167: "PP: try IGNORE\n");break;
11168: }
11169: #endif
11170:
11171: if ((ctxt->input != NULL) &&
11172: (ctxt->input->cur - ctxt->input->base > 4096)) {
11173: xmlSHRINK(ctxt);
11174: ctxt->checkIndex = 0;
11175: }
11176: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11177:
1.1.1.3 ! misho 11178: while (ctxt->instate != XML_PARSER_EOF) {
1.1 misho 11179: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11180: return(0);
11181:
1.1.1.3 ! misho 11182:
1.1 misho 11183: /*
11184: * Pop-up of finished entities.
11185: */
11186: while ((RAW == 0) && (ctxt->inputNr > 1))
11187: xmlPopInput(ctxt);
11188:
11189: if (ctxt->input == NULL) break;
11190: if (ctxt->input->buf == NULL)
11191: avail = ctxt->input->length -
11192: (ctxt->input->cur - ctxt->input->base);
11193: else {
11194: /*
11195: * If we are operating on converted input, try to flush
11196: * remainng chars to avoid them stalling in the non-converted
1.1.1.3 ! misho 11197: * buffer. But do not do this in document start where
! 11198: * encoding="..." may not have been read and we work on a
! 11199: * guessed encoding.
! 11200: */
! 11201: if ((ctxt->instate != XML_PARSER_START) &&
! 11202: (ctxt->input->buf->raw != NULL) &&
! 11203: (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
! 11204: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
! 11205: ctxt->input);
! 11206: size_t current = ctxt->input->cur - ctxt->input->base;
1.1 misho 11207:
11208: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
1.1.1.3 ! misho 11209: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
! 11210: base, current);
1.1 misho 11211: }
1.1.1.3 ! misho 11212: avail = xmlBufUse(ctxt->input->buf->buffer) -
1.1 misho 11213: (ctxt->input->cur - ctxt->input->base);
11214: }
11215: if (avail < 1)
11216: goto done;
11217: switch (ctxt->instate) {
11218: case XML_PARSER_EOF:
11219: /*
11220: * Document parsing is done !
11221: */
11222: goto done;
11223: case XML_PARSER_START:
11224: if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11225: xmlChar start[4];
11226: xmlCharEncoding enc;
11227:
11228: /*
11229: * Very first chars read from the document flow.
11230: */
11231: if (avail < 4)
11232: goto done;
11233:
1.1.1.3 ! misho 11234: /*
1.1 misho 11235: * Get the 4 first bytes and decode the charset
11236: * if enc != XML_CHAR_ENCODING_NONE
11237: * plug some encoding conversion routines,
11238: * else xmlSwitchEncoding will set to (default)
11239: * UTF8.
11240: */
11241: start[0] = RAW;
11242: start[1] = NXT(1);
11243: start[2] = NXT(2);
11244: start[3] = NXT(3);
11245: enc = xmlDetectCharEncoding(start, 4);
11246: xmlSwitchEncoding(ctxt, enc);
11247: break;
11248: }
11249:
11250: if (avail < 2)
11251: goto done;
11252: cur = ctxt->input->cur[0];
11253: next = ctxt->input->cur[1];
11254: if (cur == 0) {
11255: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256: ctxt->sax->setDocumentLocator(ctxt->userData,
11257: &xmlDefaultSAXLocator);
11258: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11259: ctxt->instate = XML_PARSER_EOF;
11260: #ifdef DEBUG_PUSH
11261: xmlGenericError(xmlGenericErrorContext,
11262: "PP: entering EOF\n");
11263: #endif
11264: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265: ctxt->sax->endDocument(ctxt->userData);
11266: goto done;
11267: }
11268: if ((cur == '<') && (next == '?')) {
11269: /* PI or XML decl */
11270: if (avail < 5) return(ret);
11271: if ((!terminate) &&
11272: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11273: return(ret);
11274: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275: ctxt->sax->setDocumentLocator(ctxt->userData,
11276: &xmlDefaultSAXLocator);
11277: if ((ctxt->input->cur[2] == 'x') &&
11278: (ctxt->input->cur[3] == 'm') &&
11279: (ctxt->input->cur[4] == 'l') &&
11280: (IS_BLANK_CH(ctxt->input->cur[5]))) {
11281: ret += 5;
11282: #ifdef DEBUG_PUSH
11283: xmlGenericError(xmlGenericErrorContext,
11284: "PP: Parsing XML Decl\n");
11285: #endif
11286: xmlParseXMLDecl(ctxt);
11287: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11288: /*
11289: * The XML REC instructs us to stop parsing right
11290: * here
11291: */
11292: ctxt->instate = XML_PARSER_EOF;
11293: return(0);
11294: }
11295: ctxt->standalone = ctxt->input->standalone;
11296: if ((ctxt->encoding == NULL) &&
11297: (ctxt->input->encoding != NULL))
11298: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300: (!ctxt->disableSAX))
11301: ctxt->sax->startDocument(ctxt->userData);
11302: ctxt->instate = XML_PARSER_MISC;
11303: #ifdef DEBUG_PUSH
11304: xmlGenericError(xmlGenericErrorContext,
11305: "PP: entering MISC\n");
11306: #endif
11307: } else {
11308: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310: (!ctxt->disableSAX))
11311: ctxt->sax->startDocument(ctxt->userData);
11312: ctxt->instate = XML_PARSER_MISC;
11313: #ifdef DEBUG_PUSH
11314: xmlGenericError(xmlGenericErrorContext,
11315: "PP: entering MISC\n");
11316: #endif
11317: }
11318: } else {
11319: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320: ctxt->sax->setDocumentLocator(ctxt->userData,
11321: &xmlDefaultSAXLocator);
11322: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323: if (ctxt->version == NULL) {
11324: xmlErrMemory(ctxt, NULL);
11325: break;
11326: }
11327: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328: (!ctxt->disableSAX))
11329: ctxt->sax->startDocument(ctxt->userData);
11330: ctxt->instate = XML_PARSER_MISC;
11331: #ifdef DEBUG_PUSH
11332: xmlGenericError(xmlGenericErrorContext,
11333: "PP: entering MISC\n");
11334: #endif
11335: }
11336: break;
11337: case XML_PARSER_START_TAG: {
11338: const xmlChar *name;
11339: const xmlChar *prefix = NULL;
11340: const xmlChar *URI = NULL;
11341: int nsNr = ctxt->nsNr;
11342:
11343: if ((avail < 2) && (ctxt->inputNr == 1))
11344: goto done;
11345: cur = ctxt->input->cur[0];
11346: if (cur != '<') {
11347: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11348: ctxt->instate = XML_PARSER_EOF;
11349: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350: ctxt->sax->endDocument(ctxt->userData);
11351: goto done;
11352: }
11353: if (!terminate) {
11354: if (ctxt->progressive) {
11355: /* > can be found unescaped in attribute values */
11356: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11357: goto done;
11358: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11359: goto done;
11360: }
11361: }
11362: if (ctxt->spaceNr == 0)
11363: spacePush(ctxt, -1);
11364: else if (*ctxt->space == -2)
11365: spacePush(ctxt, -1);
11366: else
11367: spacePush(ctxt, *ctxt->space);
11368: #ifdef LIBXML_SAX1_ENABLED
11369: if (ctxt->sax2)
11370: #endif /* LIBXML_SAX1_ENABLED */
11371: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11372: #ifdef LIBXML_SAX1_ENABLED
11373: else
11374: name = xmlParseStartTag(ctxt);
11375: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11376: if (ctxt->instate == XML_PARSER_EOF)
11377: goto done;
1.1 misho 11378: if (name == NULL) {
11379: spacePop(ctxt);
11380: ctxt->instate = XML_PARSER_EOF;
11381: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382: ctxt->sax->endDocument(ctxt->userData);
11383: goto done;
11384: }
11385: #ifdef LIBXML_VALID_ENABLED
11386: /*
11387: * [ VC: Root Element Type ]
11388: * The Name in the document type declaration must match
1.1.1.3 ! misho 11389: * the element type of the root element.
1.1 misho 11390: */
11391: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392: ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11394: #endif /* LIBXML_VALID_ENABLED */
11395:
11396: /*
11397: * Check for an Empty Element.
11398: */
11399: if ((RAW == '/') && (NXT(1) == '>')) {
11400: SKIP(2);
11401:
11402: if (ctxt->sax2) {
11403: if ((ctxt->sax != NULL) &&
11404: (ctxt->sax->endElementNs != NULL) &&
11405: (!ctxt->disableSAX))
11406: ctxt->sax->endElementNs(ctxt->userData, name,
11407: prefix, URI);
11408: if (ctxt->nsNr - nsNr > 0)
11409: nsPop(ctxt, ctxt->nsNr - nsNr);
11410: #ifdef LIBXML_SAX1_ENABLED
11411: } else {
11412: if ((ctxt->sax != NULL) &&
11413: (ctxt->sax->endElement != NULL) &&
11414: (!ctxt->disableSAX))
11415: ctxt->sax->endElement(ctxt->userData, name);
11416: #endif /* LIBXML_SAX1_ENABLED */
11417: }
1.1.1.3 ! misho 11418: if (ctxt->instate == XML_PARSER_EOF)
! 11419: goto done;
1.1 misho 11420: spacePop(ctxt);
11421: if (ctxt->nameNr == 0) {
11422: ctxt->instate = XML_PARSER_EPILOG;
11423: } else {
11424: ctxt->instate = XML_PARSER_CONTENT;
11425: }
1.1.1.3 ! misho 11426: ctxt->progressive = 1;
1.1 misho 11427: break;
11428: }
11429: if (RAW == '>') {
11430: NEXT;
11431: } else {
11432: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11433: "Couldn't find end of Start Tag %s\n",
11434: name);
11435: nodePop(ctxt);
11436: spacePop(ctxt);
11437: }
11438: if (ctxt->sax2)
11439: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11440: #ifdef LIBXML_SAX1_ENABLED
11441: else
11442: namePush(ctxt, name);
11443: #endif /* LIBXML_SAX1_ENABLED */
11444:
11445: ctxt->instate = XML_PARSER_CONTENT;
1.1.1.3 ! misho 11446: ctxt->progressive = 1;
1.1 misho 11447: break;
11448: }
11449: case XML_PARSER_CONTENT: {
11450: const xmlChar *test;
11451: unsigned int cons;
11452: if ((avail < 2) && (ctxt->inputNr == 1))
11453: goto done;
11454: cur = ctxt->input->cur[0];
11455: next = ctxt->input->cur[1];
11456:
11457: test = CUR_PTR;
11458: cons = ctxt->input->consumed;
11459: if ((cur == '<') && (next == '/')) {
11460: ctxt->instate = XML_PARSER_END_TAG;
11461: break;
11462: } else if ((cur == '<') && (next == '?')) {
11463: if ((!terminate) &&
1.1.1.3 ! misho 11464: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
! 11465: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11466: goto done;
1.1.1.3 ! misho 11467: }
1.1 misho 11468: xmlParsePI(ctxt);
1.1.1.3 ! misho 11469: ctxt->instate = XML_PARSER_CONTENT;
! 11470: ctxt->progressive = 1;
1.1 misho 11471: } else if ((cur == '<') && (next != '!')) {
11472: ctxt->instate = XML_PARSER_START_TAG;
11473: break;
11474: } else if ((cur == '<') && (next == '!') &&
11475: (ctxt->input->cur[2] == '-') &&
11476: (ctxt->input->cur[3] == '-')) {
11477: int term;
11478:
11479: if (avail < 4)
11480: goto done;
11481: ctxt->input->cur += 4;
11482: term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483: ctxt->input->cur -= 4;
1.1.1.3 ! misho 11484: if ((!terminate) && (term < 0)) {
! 11485: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11486: goto done;
1.1.1.3 ! misho 11487: }
1.1 misho 11488: xmlParseComment(ctxt);
11489: ctxt->instate = XML_PARSER_CONTENT;
1.1.1.3 ! misho 11490: ctxt->progressive = 1;
1.1 misho 11491: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492: (ctxt->input->cur[2] == '[') &&
11493: (ctxt->input->cur[3] == 'C') &&
11494: (ctxt->input->cur[4] == 'D') &&
11495: (ctxt->input->cur[5] == 'A') &&
11496: (ctxt->input->cur[6] == 'T') &&
11497: (ctxt->input->cur[7] == 'A') &&
11498: (ctxt->input->cur[8] == '[')) {
11499: SKIP(9);
11500: ctxt->instate = XML_PARSER_CDATA_SECTION;
11501: break;
11502: } else if ((cur == '<') && (next == '!') &&
11503: (avail < 9)) {
11504: goto done;
11505: } else if (cur == '&') {
11506: if ((!terminate) &&
11507: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11508: goto done;
11509: xmlParseReference(ctxt);
11510: } else {
11511: /* TODO Avoid the extra copy, handle directly !!! */
11512: /*
11513: * Goal of the following test is:
11514: * - minimize calls to the SAX 'character' callback
11515: * when they are mergeable
11516: * - handle an problem for isBlank when we only parse
11517: * a sequence of blank chars and the next one is
11518: * not available to check against '<' presence.
11519: * - tries to homogenize the differences in SAX
11520: * callbacks between the push and pull versions
11521: * of the parser.
11522: */
11523: if ((ctxt->inputNr == 1) &&
11524: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11525: if (!terminate) {
11526: if (ctxt->progressive) {
11527: if ((lastlt == NULL) ||
11528: (ctxt->input->cur > lastlt))
11529: goto done;
11530: } else if (xmlParseLookupSequence(ctxt,
11531: '<', 0, 0) < 0) {
11532: goto done;
11533: }
11534: }
11535: }
11536: ctxt->checkIndex = 0;
11537: xmlParseCharData(ctxt, 0);
11538: }
11539: /*
11540: * Pop-up of finished entities.
11541: */
11542: while ((RAW == 0) && (ctxt->inputNr > 1))
11543: xmlPopInput(ctxt);
11544: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11545: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11546: "detected an error in element content\n");
11547: ctxt->instate = XML_PARSER_EOF;
11548: break;
11549: }
11550: break;
11551: }
11552: case XML_PARSER_END_TAG:
11553: if (avail < 2)
11554: goto done;
11555: if (!terminate) {
11556: if (ctxt->progressive) {
11557: /* > can be found unescaped in attribute values */
11558: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11559: goto done;
11560: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11561: goto done;
11562: }
11563: }
11564: if (ctxt->sax2) {
11565: xmlParseEndTag2(ctxt,
11566: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11567: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11568: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11569: nameNsPop(ctxt);
11570: }
11571: #ifdef LIBXML_SAX1_ENABLED
11572: else
11573: xmlParseEndTag1(ctxt, 0);
11574: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11575: if (ctxt->instate == XML_PARSER_EOF) {
11576: /* Nothing */
11577: } else if (ctxt->nameNr == 0) {
1.1 misho 11578: ctxt->instate = XML_PARSER_EPILOG;
11579: } else {
11580: ctxt->instate = XML_PARSER_CONTENT;
11581: }
11582: break;
11583: case XML_PARSER_CDATA_SECTION: {
11584: /*
1.1.1.3 ! misho 11585: * The Push mode need to have the SAX callback for
1.1 misho 11586: * cdataBlock merge back contiguous callbacks.
11587: */
11588: int base;
11589:
11590: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11591: if (base < 0) {
11592: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11593: int tmp;
11594:
1.1.1.3 ! misho 11595: tmp = xmlCheckCdataPush(ctxt->input->cur,
1.1 misho 11596: XML_PARSER_BIG_BUFFER_SIZE);
11597: if (tmp < 0) {
11598: tmp = -tmp;
11599: ctxt->input->cur += tmp;
11600: goto encoding_error;
11601: }
11602: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11603: if (ctxt->sax->cdataBlock != NULL)
11604: ctxt->sax->cdataBlock(ctxt->userData,
11605: ctxt->input->cur, tmp);
11606: else if (ctxt->sax->characters != NULL)
11607: ctxt->sax->characters(ctxt->userData,
11608: ctxt->input->cur, tmp);
11609: }
1.1.1.3 ! misho 11610: if (ctxt->instate == XML_PARSER_EOF)
! 11611: goto done;
1.1 misho 11612: SKIPL(tmp);
11613: ctxt->checkIndex = 0;
11614: }
11615: goto done;
11616: } else {
11617: int tmp;
11618:
11619: tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11620: if ((tmp < 0) || (tmp != base)) {
11621: tmp = -tmp;
11622: ctxt->input->cur += tmp;
11623: goto encoding_error;
11624: }
11625: if ((ctxt->sax != NULL) && (base == 0) &&
11626: (ctxt->sax->cdataBlock != NULL) &&
11627: (!ctxt->disableSAX)) {
11628: /*
11629: * Special case to provide identical behaviour
11630: * between pull and push parsers on enpty CDATA
11631: * sections
11632: */
11633: if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11634: (!strncmp((const char *)&ctxt->input->cur[-9],
11635: "<![CDATA[", 9)))
11636: ctxt->sax->cdataBlock(ctxt->userData,
11637: BAD_CAST "", 0);
11638: } else if ((ctxt->sax != NULL) && (base > 0) &&
11639: (!ctxt->disableSAX)) {
11640: if (ctxt->sax->cdataBlock != NULL)
11641: ctxt->sax->cdataBlock(ctxt->userData,
11642: ctxt->input->cur, base);
11643: else if (ctxt->sax->characters != NULL)
11644: ctxt->sax->characters(ctxt->userData,
11645: ctxt->input->cur, base);
11646: }
1.1.1.3 ! misho 11647: if (ctxt->instate == XML_PARSER_EOF)
! 11648: goto done;
1.1 misho 11649: SKIPL(base + 3);
11650: ctxt->checkIndex = 0;
11651: ctxt->instate = XML_PARSER_CONTENT;
11652: #ifdef DEBUG_PUSH
11653: xmlGenericError(xmlGenericErrorContext,
11654: "PP: entering CONTENT\n");
11655: #endif
11656: }
11657: break;
11658: }
11659: case XML_PARSER_MISC:
11660: SKIP_BLANKS;
11661: if (ctxt->input->buf == NULL)
11662: avail = ctxt->input->length -
11663: (ctxt->input->cur - ctxt->input->base);
11664: else
1.1.1.3 ! misho 11665: avail = xmlBufUse(ctxt->input->buf->buffer) -
1.1 misho 11666: (ctxt->input->cur - ctxt->input->base);
11667: if (avail < 2)
11668: goto done;
11669: cur = ctxt->input->cur[0];
11670: next = ctxt->input->cur[1];
11671: if ((cur == '<') && (next == '?')) {
11672: if ((!terminate) &&
1.1.1.3 ! misho 11673: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
! 11674: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11675: goto done;
1.1.1.3 ! misho 11676: }
1.1 misho 11677: #ifdef DEBUG_PUSH
11678: xmlGenericError(xmlGenericErrorContext,
11679: "PP: Parsing PI\n");
11680: #endif
11681: xmlParsePI(ctxt);
1.1.1.3 ! misho 11682: if (ctxt->instate == XML_PARSER_EOF)
! 11683: goto done;
! 11684: ctxt->instate = XML_PARSER_MISC;
! 11685: ctxt->progressive = 1;
1.1 misho 11686: ctxt->checkIndex = 0;
11687: } else if ((cur == '<') && (next == '!') &&
11688: (ctxt->input->cur[2] == '-') &&
11689: (ctxt->input->cur[3] == '-')) {
11690: if ((!terminate) &&
1.1.1.3 ! misho 11691: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
! 11692: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11693: goto done;
1.1.1.3 ! misho 11694: }
1.1 misho 11695: #ifdef DEBUG_PUSH
11696: xmlGenericError(xmlGenericErrorContext,
11697: "PP: Parsing Comment\n");
11698: #endif
11699: xmlParseComment(ctxt);
1.1.1.3 ! misho 11700: if (ctxt->instate == XML_PARSER_EOF)
! 11701: goto done;
1.1 misho 11702: ctxt->instate = XML_PARSER_MISC;
1.1.1.3 ! misho 11703: ctxt->progressive = 1;
1.1 misho 11704: ctxt->checkIndex = 0;
11705: } else if ((cur == '<') && (next == '!') &&
11706: (ctxt->input->cur[2] == 'D') &&
11707: (ctxt->input->cur[3] == 'O') &&
11708: (ctxt->input->cur[4] == 'C') &&
11709: (ctxt->input->cur[5] == 'T') &&
11710: (ctxt->input->cur[6] == 'Y') &&
11711: (ctxt->input->cur[7] == 'P') &&
11712: (ctxt->input->cur[8] == 'E')) {
11713: if ((!terminate) &&
1.1.1.3 ! misho 11714: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
! 11715: ctxt->progressive = XML_PARSER_DTD;
1.1 misho 11716: goto done;
1.1.1.3 ! misho 11717: }
1.1 misho 11718: #ifdef DEBUG_PUSH
11719: xmlGenericError(xmlGenericErrorContext,
11720: "PP: Parsing internal subset\n");
11721: #endif
11722: ctxt->inSubset = 1;
1.1.1.3 ! misho 11723: ctxt->progressive = 0;
! 11724: ctxt->checkIndex = 0;
1.1 misho 11725: xmlParseDocTypeDecl(ctxt);
1.1.1.3 ! misho 11726: if (ctxt->instate == XML_PARSER_EOF)
! 11727: goto done;
1.1 misho 11728: if (RAW == '[') {
11729: ctxt->instate = XML_PARSER_DTD;
11730: #ifdef DEBUG_PUSH
11731: xmlGenericError(xmlGenericErrorContext,
11732: "PP: entering DTD\n");
11733: #endif
11734: } else {
11735: /*
11736: * Create and update the external subset.
11737: */
11738: ctxt->inSubset = 2;
11739: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11740: (ctxt->sax->externalSubset != NULL))
11741: ctxt->sax->externalSubset(ctxt->userData,
11742: ctxt->intSubName, ctxt->extSubSystem,
11743: ctxt->extSubURI);
11744: ctxt->inSubset = 0;
11745: xmlCleanSpecialAttr(ctxt);
11746: ctxt->instate = XML_PARSER_PROLOG;
11747: #ifdef DEBUG_PUSH
11748: xmlGenericError(xmlGenericErrorContext,
11749: "PP: entering PROLOG\n");
11750: #endif
11751: }
11752: } else if ((cur == '<') && (next == '!') &&
11753: (avail < 9)) {
11754: goto done;
11755: } else {
11756: ctxt->instate = XML_PARSER_START_TAG;
1.1.1.3 ! misho 11757: ctxt->progressive = XML_PARSER_START_TAG;
1.1 misho 11758: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11759: #ifdef DEBUG_PUSH
11760: xmlGenericError(xmlGenericErrorContext,
11761: "PP: entering START_TAG\n");
11762: #endif
11763: }
11764: break;
11765: case XML_PARSER_PROLOG:
11766: SKIP_BLANKS;
11767: if (ctxt->input->buf == NULL)
11768: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11769: else
1.1.1.3 ! misho 11770: avail = xmlBufUse(ctxt->input->buf->buffer) -
! 11771: (ctxt->input->cur - ctxt->input->base);
! 11772: if (avail < 2)
1.1 misho 11773: goto done;
11774: cur = ctxt->input->cur[0];
11775: next = ctxt->input->cur[1];
11776: if ((cur == '<') && (next == '?')) {
11777: if ((!terminate) &&
1.1.1.3 ! misho 11778: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
! 11779: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11780: goto done;
1.1.1.3 ! misho 11781: }
1.1 misho 11782: #ifdef DEBUG_PUSH
11783: xmlGenericError(xmlGenericErrorContext,
11784: "PP: Parsing PI\n");
11785: #endif
11786: xmlParsePI(ctxt);
1.1.1.3 ! misho 11787: if (ctxt->instate == XML_PARSER_EOF)
! 11788: goto done;
! 11789: ctxt->instate = XML_PARSER_PROLOG;
! 11790: ctxt->progressive = 1;
1.1 misho 11791: } else if ((cur == '<') && (next == '!') &&
11792: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11793: if ((!terminate) &&
1.1.1.3 ! misho 11794: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
! 11795: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11796: goto done;
1.1.1.3 ! misho 11797: }
1.1 misho 11798: #ifdef DEBUG_PUSH
11799: xmlGenericError(xmlGenericErrorContext,
11800: "PP: Parsing Comment\n");
11801: #endif
11802: xmlParseComment(ctxt);
1.1.1.3 ! misho 11803: if (ctxt->instate == XML_PARSER_EOF)
! 11804: goto done;
1.1 misho 11805: ctxt->instate = XML_PARSER_PROLOG;
1.1.1.3 ! misho 11806: ctxt->progressive = 1;
1.1 misho 11807: } else if ((cur == '<') && (next == '!') &&
11808: (avail < 4)) {
11809: goto done;
11810: } else {
11811: ctxt->instate = XML_PARSER_START_TAG;
11812: if (ctxt->progressive == 0)
1.1.1.3 ! misho 11813: ctxt->progressive = XML_PARSER_START_TAG;
1.1 misho 11814: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11815: #ifdef DEBUG_PUSH
11816: xmlGenericError(xmlGenericErrorContext,
11817: "PP: entering START_TAG\n");
11818: #endif
11819: }
11820: break;
11821: case XML_PARSER_EPILOG:
11822: SKIP_BLANKS;
11823: if (ctxt->input->buf == NULL)
11824: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11825: else
1.1.1.3 ! misho 11826: avail = xmlBufUse(ctxt->input->buf->buffer) -
! 11827: (ctxt->input->cur - ctxt->input->base);
1.1 misho 11828: if (avail < 2)
11829: goto done;
11830: cur = ctxt->input->cur[0];
11831: next = ctxt->input->cur[1];
11832: if ((cur == '<') && (next == '?')) {
11833: if ((!terminate) &&
1.1.1.3 ! misho 11834: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
! 11835: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11836: goto done;
1.1.1.3 ! misho 11837: }
1.1 misho 11838: #ifdef DEBUG_PUSH
11839: xmlGenericError(xmlGenericErrorContext,
11840: "PP: Parsing PI\n");
11841: #endif
11842: xmlParsePI(ctxt);
1.1.1.3 ! misho 11843: if (ctxt->instate == XML_PARSER_EOF)
! 11844: goto done;
1.1 misho 11845: ctxt->instate = XML_PARSER_EPILOG;
1.1.1.3 ! misho 11846: ctxt->progressive = 1;
1.1 misho 11847: } else if ((cur == '<') && (next == '!') &&
11848: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11849: if ((!terminate) &&
1.1.1.3 ! misho 11850: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
! 11851: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11852: goto done;
1.1.1.3 ! misho 11853: }
1.1 misho 11854: #ifdef DEBUG_PUSH
11855: xmlGenericError(xmlGenericErrorContext,
11856: "PP: Parsing Comment\n");
11857: #endif
11858: xmlParseComment(ctxt);
1.1.1.3 ! misho 11859: if (ctxt->instate == XML_PARSER_EOF)
! 11860: goto done;
1.1 misho 11861: ctxt->instate = XML_PARSER_EPILOG;
1.1.1.3 ! misho 11862: ctxt->progressive = 1;
1.1 misho 11863: } else if ((cur == '<') && (next == '!') &&
11864: (avail < 4)) {
11865: goto done;
11866: } else {
11867: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11868: ctxt->instate = XML_PARSER_EOF;
11869: #ifdef DEBUG_PUSH
11870: xmlGenericError(xmlGenericErrorContext,
11871: "PP: entering EOF\n");
11872: #endif
11873: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11874: ctxt->sax->endDocument(ctxt->userData);
11875: goto done;
11876: }
11877: break;
11878: case XML_PARSER_DTD: {
11879: /*
11880: * Sorry but progressive parsing of the internal subset
11881: * is not expected to be supported. We first check that
11882: * the full content of the internal subset is available and
11883: * the parsing is launched only at that point.
11884: * Internal subset ends up with "']' S? '>'" in an unescaped
11885: * section and not in a ']]>' sequence which are conditional
11886: * sections (whoever argued to keep that crap in XML deserve
11887: * a place in hell !).
11888: */
11889: int base, i;
11890: xmlChar *buf;
11891: xmlChar quote = 0;
1.1.1.3 ! misho 11892: size_t use;
1.1 misho 11893:
11894: base = ctxt->input->cur - ctxt->input->base;
11895: if (base < 0) return(0);
11896: if (ctxt->checkIndex > base)
11897: base = ctxt->checkIndex;
1.1.1.3 ! misho 11898: buf = xmlBufContent(ctxt->input->buf->buffer);
! 11899: use = xmlBufUse(ctxt->input->buf->buffer);
! 11900: for (;(unsigned int) base < use; base++) {
1.1 misho 11901: if (quote != 0) {
11902: if (buf[base] == quote)
11903: quote = 0;
1.1.1.3 ! misho 11904: continue;
1.1 misho 11905: }
11906: if ((quote == 0) && (buf[base] == '<')) {
11907: int found = 0;
11908: /* special handling of comments */
1.1.1.3 ! misho 11909: if (((unsigned int) base + 4 < use) &&
1.1 misho 11910: (buf[base + 1] == '!') &&
11911: (buf[base + 2] == '-') &&
11912: (buf[base + 3] == '-')) {
1.1.1.3 ! misho 11913: for (;(unsigned int) base + 3 < use; base++) {
1.1 misho 11914: if ((buf[base] == '-') &&
11915: (buf[base + 1] == '-') &&
11916: (buf[base + 2] == '>')) {
11917: found = 1;
11918: base += 2;
11919: break;
11920: }
11921: }
11922: if (!found) {
11923: #if 0
11924: fprintf(stderr, "unfinished comment\n");
11925: #endif
11926: break; /* for */
11927: }
11928: continue;
11929: }
11930: }
11931: if (buf[base] == '"') {
11932: quote = '"';
11933: continue;
11934: }
11935: if (buf[base] == '\'') {
11936: quote = '\'';
11937: continue;
11938: }
11939: if (buf[base] == ']') {
11940: #if 0
11941: fprintf(stderr, "%c%c%c%c: ", buf[base],
11942: buf[base + 1], buf[base + 2], buf[base + 3]);
11943: #endif
1.1.1.3 ! misho 11944: if ((unsigned int) base +1 >= use)
1.1 misho 11945: break;
11946: if (buf[base + 1] == ']') {
11947: /* conditional crap, skip both ']' ! */
11948: base++;
11949: continue;
11950: }
1.1.1.3 ! misho 11951: for (i = 1; (unsigned int) base + i < use; i++) {
1.1 misho 11952: if (buf[base + i] == '>') {
11953: #if 0
11954: fprintf(stderr, "found\n");
11955: #endif
11956: goto found_end_int_subset;
11957: }
11958: if (!IS_BLANK_CH(buf[base + i])) {
11959: #if 0
11960: fprintf(stderr, "not found\n");
11961: #endif
11962: goto not_end_of_int_subset;
11963: }
11964: }
11965: #if 0
11966: fprintf(stderr, "end of stream\n");
11967: #endif
11968: break;
1.1.1.3 ! misho 11969:
1.1 misho 11970: }
11971: not_end_of_int_subset:
11972: continue; /* for */
11973: }
11974: /*
11975: * We didn't found the end of the Internal subset
11976: */
1.1.1.3 ! misho 11977: if (quote == 0)
! 11978: ctxt->checkIndex = base;
! 11979: else
! 11980: ctxt->checkIndex = 0;
1.1 misho 11981: #ifdef DEBUG_PUSH
11982: if (next == 0)
11983: xmlGenericError(xmlGenericErrorContext,
11984: "PP: lookup of int subset end filed\n");
11985: #endif
11986: goto done;
11987:
11988: found_end_int_subset:
1.1.1.3 ! misho 11989: ctxt->checkIndex = 0;
1.1 misho 11990: xmlParseInternalSubset(ctxt);
1.1.1.3 ! misho 11991: if (ctxt->instate == XML_PARSER_EOF)
! 11992: goto done;
1.1 misho 11993: ctxt->inSubset = 2;
11994: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11995: (ctxt->sax->externalSubset != NULL))
11996: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11997: ctxt->extSubSystem, ctxt->extSubURI);
11998: ctxt->inSubset = 0;
11999: xmlCleanSpecialAttr(ctxt);
1.1.1.3 ! misho 12000: if (ctxt->instate == XML_PARSER_EOF)
! 12001: goto done;
1.1 misho 12002: ctxt->instate = XML_PARSER_PROLOG;
12003: ctxt->checkIndex = 0;
12004: #ifdef DEBUG_PUSH
12005: xmlGenericError(xmlGenericErrorContext,
12006: "PP: entering PROLOG\n");
12007: #endif
12008: break;
12009: }
12010: case XML_PARSER_COMMENT:
12011: xmlGenericError(xmlGenericErrorContext,
12012: "PP: internal error, state == COMMENT\n");
12013: ctxt->instate = XML_PARSER_CONTENT;
12014: #ifdef DEBUG_PUSH
12015: xmlGenericError(xmlGenericErrorContext,
12016: "PP: entering CONTENT\n");
12017: #endif
12018: break;
12019: case XML_PARSER_IGNORE:
12020: xmlGenericError(xmlGenericErrorContext,
12021: "PP: internal error, state == IGNORE");
12022: ctxt->instate = XML_PARSER_DTD;
12023: #ifdef DEBUG_PUSH
12024: xmlGenericError(xmlGenericErrorContext,
12025: "PP: entering DTD\n");
12026: #endif
12027: break;
12028: case XML_PARSER_PI:
12029: xmlGenericError(xmlGenericErrorContext,
12030: "PP: internal error, state == PI\n");
12031: ctxt->instate = XML_PARSER_CONTENT;
12032: #ifdef DEBUG_PUSH
12033: xmlGenericError(xmlGenericErrorContext,
12034: "PP: entering CONTENT\n");
12035: #endif
12036: break;
12037: case XML_PARSER_ENTITY_DECL:
12038: xmlGenericError(xmlGenericErrorContext,
12039: "PP: internal error, state == ENTITY_DECL\n");
12040: ctxt->instate = XML_PARSER_DTD;
12041: #ifdef DEBUG_PUSH
12042: xmlGenericError(xmlGenericErrorContext,
12043: "PP: entering DTD\n");
12044: #endif
12045: break;
12046: case XML_PARSER_ENTITY_VALUE:
12047: xmlGenericError(xmlGenericErrorContext,
12048: "PP: internal error, state == ENTITY_VALUE\n");
12049: ctxt->instate = XML_PARSER_CONTENT;
12050: #ifdef DEBUG_PUSH
12051: xmlGenericError(xmlGenericErrorContext,
12052: "PP: entering DTD\n");
12053: #endif
12054: break;
12055: case XML_PARSER_ATTRIBUTE_VALUE:
12056: xmlGenericError(xmlGenericErrorContext,
12057: "PP: internal error, state == ATTRIBUTE_VALUE\n");
12058: ctxt->instate = XML_PARSER_START_TAG;
12059: #ifdef DEBUG_PUSH
12060: xmlGenericError(xmlGenericErrorContext,
12061: "PP: entering START_TAG\n");
12062: #endif
12063: break;
12064: case XML_PARSER_SYSTEM_LITERAL:
12065: xmlGenericError(xmlGenericErrorContext,
12066: "PP: internal error, state == SYSTEM_LITERAL\n");
12067: ctxt->instate = XML_PARSER_START_TAG;
12068: #ifdef DEBUG_PUSH
12069: xmlGenericError(xmlGenericErrorContext,
12070: "PP: entering START_TAG\n");
12071: #endif
12072: break;
12073: case XML_PARSER_PUBLIC_LITERAL:
12074: xmlGenericError(xmlGenericErrorContext,
12075: "PP: internal error, state == PUBLIC_LITERAL\n");
12076: ctxt->instate = XML_PARSER_START_TAG;
12077: #ifdef DEBUG_PUSH
12078: xmlGenericError(xmlGenericErrorContext,
12079: "PP: entering START_TAG\n");
12080: #endif
12081: break;
12082: }
12083: }
1.1.1.3 ! misho 12084: done:
1.1 misho 12085: #ifdef DEBUG_PUSH
12086: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12087: #endif
12088: return(ret);
12089: encoding_error:
12090: {
12091: char buffer[150];
12092:
12093: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12094: ctxt->input->cur[0], ctxt->input->cur[1],
12095: ctxt->input->cur[2], ctxt->input->cur[3]);
12096: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12097: "Input is not proper UTF-8, indicate encoding !\n%s",
12098: BAD_CAST buffer, NULL);
12099: }
12100: return(0);
12101: }
12102:
12103: /**
1.1.1.3 ! misho 12104: * xmlParseCheckTransition:
! 12105: * @ctxt: an XML parser context
! 12106: * @chunk: a char array
! 12107: * @size: the size in byte of the chunk
! 12108: *
! 12109: * Check depending on the current parser state if the chunk given must be
! 12110: * processed immediately or one need more data to advance on parsing.
! 12111: *
! 12112: * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
! 12113: */
! 12114: static int
! 12115: xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
! 12116: if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
! 12117: return(-1);
! 12118: if (ctxt->instate == XML_PARSER_START_TAG) {
! 12119: if (memchr(chunk, '>', size) != NULL)
! 12120: return(1);
! 12121: return(0);
! 12122: }
! 12123: if (ctxt->progressive == XML_PARSER_COMMENT) {
! 12124: if (memchr(chunk, '>', size) != NULL)
! 12125: return(1);
! 12126: return(0);
! 12127: }
! 12128: if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
! 12129: if (memchr(chunk, '>', size) != NULL)
! 12130: return(1);
! 12131: return(0);
! 12132: }
! 12133: if (ctxt->progressive == XML_PARSER_PI) {
! 12134: if (memchr(chunk, '>', size) != NULL)
! 12135: return(1);
! 12136: return(0);
! 12137: }
! 12138: if (ctxt->instate == XML_PARSER_END_TAG) {
! 12139: if (memchr(chunk, '>', size) != NULL)
! 12140: return(1);
! 12141: return(0);
! 12142: }
! 12143: if ((ctxt->progressive == XML_PARSER_DTD) ||
! 12144: (ctxt->instate == XML_PARSER_DTD)) {
! 12145: if (memchr(chunk, '>', size) != NULL)
! 12146: return(1);
! 12147: return(0);
! 12148: }
! 12149: return(1);
! 12150: }
! 12151:
! 12152: /**
1.1 misho 12153: * xmlParseChunk:
12154: * @ctxt: an XML parser context
12155: * @chunk: an char array
12156: * @size: the size in byte of the chunk
12157: * @terminate: last chunk indicator
12158: *
12159: * Parse a Chunk of memory
12160: *
12161: * Returns zero if no error, the xmlParserErrors otherwise.
12162: */
12163: int
12164: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12165: int terminate) {
12166: int end_in_lf = 0;
12167: int remain = 0;
1.1.1.3 ! misho 12168: size_t old_avail = 0;
! 12169: size_t avail = 0;
1.1 misho 12170:
12171: if (ctxt == NULL)
12172: return(XML_ERR_INTERNAL_ERROR);
12173: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12174: return(ctxt->errNo);
1.1.1.3 ! misho 12175: if (ctxt->instate == XML_PARSER_EOF)
! 12176: return(-1);
1.1 misho 12177: if (ctxt->instate == XML_PARSER_START)
12178: xmlDetectSAX2(ctxt);
12179: if ((size > 0) && (chunk != NULL) && (!terminate) &&
12180: (chunk[size - 1] == '\r')) {
12181: end_in_lf = 1;
12182: size--;
12183: }
12184:
12185: xmldecl_done:
12186:
12187: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12188: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
1.1.1.3 ! misho 12189: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
! 12190: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 12191: int res;
12192:
1.1.1.3 ! misho 12193: old_avail = xmlBufUse(ctxt->input->buf->buffer);
1.1 misho 12194: /*
12195: * Specific handling if we autodetected an encoding, we should not
12196: * push more than the first line ... which depend on the encoding
12197: * And only push the rest once the final encoding was detected
12198: */
12199: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12200: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12201: unsigned int len = 45;
12202:
12203: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12204: BAD_CAST "UTF-16")) ||
12205: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12206: BAD_CAST "UTF16")))
12207: len = 90;
12208: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12209: BAD_CAST "UCS-4")) ||
12210: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12211: BAD_CAST "UCS4")))
12212: len = 180;
12213:
12214: if (ctxt->input->buf->rawconsumed < len)
12215: len -= ctxt->input->buf->rawconsumed;
12216:
12217: /*
12218: * Change size for reading the initial declaration only
12219: * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12220: * will blindly copy extra bytes from memory.
12221: */
12222: if ((unsigned int) size > len) {
12223: remain = size - len;
12224: size = len;
12225: } else {
12226: remain = 0;
12227: }
12228: }
1.1.1.3 ! misho 12229: res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.1 misho 12230: if (res < 0) {
12231: ctxt->errNo = XML_PARSER_EOF;
12232: ctxt->disableSAX = 1;
12233: return (XML_PARSER_EOF);
12234: }
1.1.1.3 ! misho 12235: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 12236: #ifdef DEBUG_PUSH
12237: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12238: #endif
12239:
12240: } else if (ctxt->instate != XML_PARSER_EOF) {
12241: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12242: xmlParserInputBufferPtr in = ctxt->input->buf;
12243: if ((in->encoder != NULL) && (in->buffer != NULL) &&
12244: (in->raw != NULL)) {
12245: int nbchars;
1.1.1.3 ! misho 12246: size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
! 12247: size_t current = ctxt->input->cur - ctxt->input->base;
1.1 misho 12248:
1.1.1.3 ! misho 12249: nbchars = xmlCharEncInput(in, terminate);
1.1 misho 12250: if (nbchars < 0) {
12251: /* TODO 2.6.0 */
12252: xmlGenericError(xmlGenericErrorContext,
12253: "xmlParseChunk: encoder error\n");
12254: return(XML_ERR_INVALID_ENCODING);
12255: }
1.1.1.3 ! misho 12256: xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
1.1 misho 12257: }
12258: }
12259: }
1.1.1.3 ! misho 12260: if (remain != 0) {
1.1 misho 12261: xmlParseTryOrFinish(ctxt, 0);
1.1.1.3 ! misho 12262: } else {
! 12263: if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
! 12264: avail = xmlBufUse(ctxt->input->buf->buffer);
! 12265: /*
! 12266: * Depending on the current state it may not be such
! 12267: * a good idea to try parsing if there is nothing in the chunk
! 12268: * which would be worth doing a parser state transition and we
! 12269: * need to wait for more data
! 12270: */
! 12271: if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
! 12272: (old_avail == 0) || (avail == 0) ||
! 12273: (xmlParseCheckTransition(ctxt,
! 12274: (const char *)&ctxt->input->base[old_avail],
! 12275: avail - old_avail)))
! 12276: xmlParseTryOrFinish(ctxt, terminate);
! 12277: }
! 12278: if (ctxt->instate == XML_PARSER_EOF)
! 12279: return(ctxt->errNo);
! 12280:
! 12281: if ((ctxt->input != NULL) &&
! 12282: (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
! 12283: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
! 12284: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
! 12285: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
! 12286: ctxt->instate = XML_PARSER_EOF;
! 12287: }
1.1 misho 12288: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12289: return(ctxt->errNo);
12290:
12291: if (remain != 0) {
12292: chunk += size;
12293: size = remain;
12294: remain = 0;
12295: goto xmldecl_done;
12296: }
12297: if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12298: (ctxt->input->buf != NULL)) {
1.1.1.3 ! misho 12299: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
! 12300: ctxt->input);
! 12301: size_t current = ctxt->input->cur - ctxt->input->base;
! 12302:
1.1 misho 12303: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
1.1.1.3 ! misho 12304:
! 12305: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
! 12306: base, current);
1.1 misho 12307: }
12308: if (terminate) {
12309: /*
12310: * Check for termination
12311: */
1.1.1.3 ! misho 12312: int cur_avail = 0;
1.1 misho 12313:
12314: if (ctxt->input != NULL) {
12315: if (ctxt->input->buf == NULL)
1.1.1.3 ! misho 12316: cur_avail = ctxt->input->length -
! 12317: (ctxt->input->cur - ctxt->input->base);
1.1 misho 12318: else
1.1.1.3 ! misho 12319: cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
! 12320: (ctxt->input->cur - ctxt->input->base);
1.1 misho 12321: }
1.1.1.3 ! misho 12322:
1.1 misho 12323: if ((ctxt->instate != XML_PARSER_EOF) &&
12324: (ctxt->instate != XML_PARSER_EPILOG)) {
12325: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
1.1.1.3 ! misho 12326: }
! 12327: if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
1.1 misho 12328: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12329: }
12330: if (ctxt->instate != XML_PARSER_EOF) {
12331: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12332: ctxt->sax->endDocument(ctxt->userData);
12333: }
12334: ctxt->instate = XML_PARSER_EOF;
12335: }
1.1.1.3 ! misho 12336: if (ctxt->wellFormed == 0)
! 12337: return((xmlParserErrors) ctxt->errNo);
! 12338: else
! 12339: return(0);
1.1 misho 12340: }
12341:
12342: /************************************************************************
12343: * *
1.1.1.3 ! misho 12344: * I/O front end functions to the parser *
1.1 misho 12345: * *
12346: ************************************************************************/
12347:
12348: /**
12349: * xmlCreatePushParserCtxt:
12350: * @sax: a SAX handler
12351: * @user_data: The user data returned on SAX callbacks
12352: * @chunk: a pointer to an array of chars
12353: * @size: number of chars in the array
12354: * @filename: an optional file name or URI
12355: *
12356: * Create a parser context for using the XML parser in push mode.
12357: * If @buffer and @size are non-NULL, the data is used to detect
12358: * the encoding. The remaining characters will be parsed so they
12359: * don't need to be fed in again through xmlParseChunk.
12360: * To allow content encoding detection, @size should be >= 4
12361: * The value of @filename is used for fetching external entities
12362: * and error/warning reports.
12363: *
12364: * Returns the new parser context or NULL
12365: */
12366:
12367: xmlParserCtxtPtr
1.1.1.3 ! misho 12368: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
1.1 misho 12369: const char *chunk, int size, const char *filename) {
12370: xmlParserCtxtPtr ctxt;
12371: xmlParserInputPtr inputStream;
12372: xmlParserInputBufferPtr buf;
12373: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12374:
12375: /*
12376: * plug some encoding conversion routines
12377: */
12378: if ((chunk != NULL) && (size >= 4))
12379: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12380:
12381: buf = xmlAllocParserInputBuffer(enc);
12382: if (buf == NULL) return(NULL);
12383:
12384: ctxt = xmlNewParserCtxt();
12385: if (ctxt == NULL) {
12386: xmlErrMemory(NULL, "creating parser: out of memory\n");
12387: xmlFreeParserInputBuffer(buf);
12388: return(NULL);
12389: }
12390: ctxt->dictNames = 1;
12391: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12392: if (ctxt->pushTab == NULL) {
12393: xmlErrMemory(ctxt, NULL);
12394: xmlFreeParserInputBuffer(buf);
12395: xmlFreeParserCtxt(ctxt);
12396: return(NULL);
12397: }
12398: if (sax != NULL) {
12399: #ifdef LIBXML_SAX1_ENABLED
12400: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12401: #endif /* LIBXML_SAX1_ENABLED */
12402: xmlFree(ctxt->sax);
12403: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12404: if (ctxt->sax == NULL) {
12405: xmlErrMemory(ctxt, NULL);
12406: xmlFreeParserInputBuffer(buf);
12407: xmlFreeParserCtxt(ctxt);
12408: return(NULL);
12409: }
12410: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12411: if (sax->initialized == XML_SAX2_MAGIC)
12412: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12413: else
12414: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12415: if (user_data != NULL)
12416: ctxt->userData = user_data;
1.1.1.3 ! misho 12417: }
1.1 misho 12418: if (filename == NULL) {
12419: ctxt->directory = NULL;
12420: } else {
12421: ctxt->directory = xmlParserGetDirectory(filename);
12422: }
12423:
12424: inputStream = xmlNewInputStream(ctxt);
12425: if (inputStream == NULL) {
12426: xmlFreeParserCtxt(ctxt);
12427: xmlFreeParserInputBuffer(buf);
12428: return(NULL);
12429: }
12430:
12431: if (filename == NULL)
12432: inputStream->filename = NULL;
12433: else {
12434: inputStream->filename = (char *)
12435: xmlCanonicPath((const xmlChar *) filename);
12436: if (inputStream->filename == NULL) {
12437: xmlFreeParserCtxt(ctxt);
12438: xmlFreeParserInputBuffer(buf);
12439: return(NULL);
12440: }
12441: }
12442: inputStream->buf = buf;
1.1.1.3 ! misho 12443: xmlBufResetInput(inputStream->buf->buffer, inputStream);
1.1 misho 12444: inputPush(ctxt, inputStream);
12445:
12446: /*
12447: * If the caller didn't provide an initial 'chunk' for determining
12448: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12449: * that it can be automatically determined later
12450: */
12451: if ((size == 0) || (chunk == NULL)) {
12452: ctxt->charset = XML_CHAR_ENCODING_NONE;
12453: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
1.1.1.3 ! misho 12454: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
! 12455: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 12456:
1.1.1.3 ! misho 12457: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.1 misho 12458:
1.1.1.3 ! misho 12459: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 12460: #ifdef DEBUG_PUSH
12461: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12462: #endif
12463: }
12464:
12465: if (enc != XML_CHAR_ENCODING_NONE) {
12466: xmlSwitchEncoding(ctxt, enc);
12467: }
12468:
12469: return(ctxt);
12470: }
12471: #endif /* LIBXML_PUSH_ENABLED */
12472:
12473: /**
12474: * xmlStopParser:
12475: * @ctxt: an XML parser context
12476: *
12477: * Blocks further parser processing
12478: */
1.1.1.3 ! misho 12479: void
1.1 misho 12480: xmlStopParser(xmlParserCtxtPtr ctxt) {
12481: if (ctxt == NULL)
12482: return;
12483: ctxt->instate = XML_PARSER_EOF;
1.1.1.3 ! misho 12484: ctxt->errNo = XML_ERR_USER_STOP;
1.1 misho 12485: ctxt->disableSAX = 1;
12486: if (ctxt->input != NULL) {
12487: ctxt->input->cur = BAD_CAST"";
12488: ctxt->input->base = ctxt->input->cur;
12489: }
12490: }
12491:
12492: /**
12493: * xmlCreateIOParserCtxt:
12494: * @sax: a SAX handler
12495: * @user_data: The user data returned on SAX callbacks
12496: * @ioread: an I/O read function
12497: * @ioclose: an I/O close function
12498: * @ioctx: an I/O handler
12499: * @enc: the charset encoding if known
12500: *
12501: * Create a parser context for using the XML parser with an existing
12502: * I/O stream
12503: *
12504: * Returns the new parser context or NULL
12505: */
12506: xmlParserCtxtPtr
12507: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12508: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12509: void *ioctx, xmlCharEncoding enc) {
12510: xmlParserCtxtPtr ctxt;
12511: xmlParserInputPtr inputStream;
12512: xmlParserInputBufferPtr buf;
1.1.1.2 misho 12513:
1.1 misho 12514: if (ioread == NULL) return(NULL);
12515:
12516: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
1.1.1.2 misho 12517: if (buf == NULL) {
12518: if (ioclose != NULL)
12519: ioclose(ioctx);
12520: return (NULL);
12521: }
1.1 misho 12522:
12523: ctxt = xmlNewParserCtxt();
12524: if (ctxt == NULL) {
12525: xmlFreeParserInputBuffer(buf);
12526: return(NULL);
12527: }
12528: if (sax != NULL) {
12529: #ifdef LIBXML_SAX1_ENABLED
12530: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12531: #endif /* LIBXML_SAX1_ENABLED */
12532: xmlFree(ctxt->sax);
12533: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12534: if (ctxt->sax == NULL) {
12535: xmlErrMemory(ctxt, NULL);
12536: xmlFreeParserCtxt(ctxt);
12537: return(NULL);
12538: }
12539: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12540: if (sax->initialized == XML_SAX2_MAGIC)
12541: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12542: else
12543: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12544: if (user_data != NULL)
12545: ctxt->userData = user_data;
1.1.1.2 misho 12546: }
1.1 misho 12547:
12548: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12549: if (inputStream == NULL) {
12550: xmlFreeParserCtxt(ctxt);
12551: return(NULL);
12552: }
12553: inputPush(ctxt, inputStream);
12554:
12555: return(ctxt);
12556: }
12557:
12558: #ifdef LIBXML_VALID_ENABLED
12559: /************************************************************************
12560: * *
1.1.1.3 ! misho 12561: * Front ends when parsing a DTD *
1.1 misho 12562: * *
12563: ************************************************************************/
12564:
12565: /**
12566: * xmlIOParseDTD:
12567: * @sax: the SAX handler block or NULL
12568: * @input: an Input Buffer
12569: * @enc: the charset encoding if known
12570: *
12571: * Load and parse a DTD
1.1.1.3 ! misho 12572: *
1.1 misho 12573: * Returns the resulting xmlDtdPtr or NULL in case of error.
12574: * @input will be freed by the function in any case.
12575: */
12576:
12577: xmlDtdPtr
12578: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12579: xmlCharEncoding enc) {
12580: xmlDtdPtr ret = NULL;
12581: xmlParserCtxtPtr ctxt;
12582: xmlParserInputPtr pinput = NULL;
12583: xmlChar start[4];
12584:
12585: if (input == NULL)
12586: return(NULL);
12587:
12588: ctxt = xmlNewParserCtxt();
12589: if (ctxt == NULL) {
12590: xmlFreeParserInputBuffer(input);
12591: return(NULL);
12592: }
12593:
12594: /*
12595: * Set-up the SAX context
12596: */
1.1.1.3 ! misho 12597: if (sax != NULL) {
1.1 misho 12598: if (ctxt->sax != NULL)
12599: xmlFree(ctxt->sax);
12600: ctxt->sax = sax;
12601: ctxt->userData = ctxt;
12602: }
12603: xmlDetectSAX2(ctxt);
12604:
12605: /*
12606: * generate a parser input from the I/O handler
12607: */
12608:
12609: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12610: if (pinput == NULL) {
12611: if (sax != NULL) ctxt->sax = NULL;
12612: xmlFreeParserInputBuffer(input);
12613: xmlFreeParserCtxt(ctxt);
12614: return(NULL);
12615: }
12616:
12617: /*
12618: * plug some encoding conversion routines here.
12619: */
12620: if (xmlPushInput(ctxt, pinput) < 0) {
12621: if (sax != NULL) ctxt->sax = NULL;
12622: xmlFreeParserCtxt(ctxt);
12623: return(NULL);
12624: }
12625: if (enc != XML_CHAR_ENCODING_NONE) {
12626: xmlSwitchEncoding(ctxt, enc);
12627: }
12628:
12629: pinput->filename = NULL;
12630: pinput->line = 1;
12631: pinput->col = 1;
12632: pinput->base = ctxt->input->cur;
12633: pinput->cur = ctxt->input->cur;
12634: pinput->free = NULL;
12635:
12636: /*
12637: * let's parse that entity knowing it's an external subset.
12638: */
12639: ctxt->inSubset = 2;
12640: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12641: if (ctxt->myDoc == NULL) {
12642: xmlErrMemory(ctxt, "New Doc failed");
12643: return(NULL);
12644: }
12645: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12646: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12647: BAD_CAST "none", BAD_CAST "none");
12648:
12649: if ((enc == XML_CHAR_ENCODING_NONE) &&
12650: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
1.1.1.3 ! misho 12651: /*
1.1 misho 12652: * Get the 4 first bytes and decode the charset
12653: * if enc != XML_CHAR_ENCODING_NONE
12654: * plug some encoding conversion routines.
12655: */
12656: start[0] = RAW;
12657: start[1] = NXT(1);
12658: start[2] = NXT(2);
12659: start[3] = NXT(3);
12660: enc = xmlDetectCharEncoding(start, 4);
12661: if (enc != XML_CHAR_ENCODING_NONE) {
12662: xmlSwitchEncoding(ctxt, enc);
12663: }
12664: }
12665:
12666: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12667:
12668: if (ctxt->myDoc != NULL) {
12669: if (ctxt->wellFormed) {
12670: ret = ctxt->myDoc->extSubset;
12671: ctxt->myDoc->extSubset = NULL;
12672: if (ret != NULL) {
12673: xmlNodePtr tmp;
12674:
12675: ret->doc = NULL;
12676: tmp = ret->children;
12677: while (tmp != NULL) {
12678: tmp->doc = NULL;
12679: tmp = tmp->next;
12680: }
12681: }
12682: } else {
12683: ret = NULL;
12684: }
12685: xmlFreeDoc(ctxt->myDoc);
12686: ctxt->myDoc = NULL;
12687: }
12688: if (sax != NULL) ctxt->sax = NULL;
12689: xmlFreeParserCtxt(ctxt);
1.1.1.3 ! misho 12690:
1.1 misho 12691: return(ret);
12692: }
12693:
12694: /**
12695: * xmlSAXParseDTD:
12696: * @sax: the SAX handler block
12697: * @ExternalID: a NAME* containing the External ID of the DTD
12698: * @SystemID: a NAME* containing the URL to the DTD
12699: *
12700: * Load and parse an external subset.
1.1.1.3 ! misho 12701: *
1.1 misho 12702: * Returns the resulting xmlDtdPtr or NULL in case of error.
12703: */
12704:
12705: xmlDtdPtr
12706: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12707: const xmlChar *SystemID) {
12708: xmlDtdPtr ret = NULL;
12709: xmlParserCtxtPtr ctxt;
12710: xmlParserInputPtr input = NULL;
12711: xmlCharEncoding enc;
12712: xmlChar* systemIdCanonic;
12713:
12714: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12715:
12716: ctxt = xmlNewParserCtxt();
12717: if (ctxt == NULL) {
12718: return(NULL);
12719: }
12720:
12721: /*
12722: * Set-up the SAX context
12723: */
1.1.1.3 ! misho 12724: if (sax != NULL) {
1.1 misho 12725: if (ctxt->sax != NULL)
12726: xmlFree(ctxt->sax);
12727: ctxt->sax = sax;
12728: ctxt->userData = ctxt;
12729: }
1.1.1.3 ! misho 12730:
1.1 misho 12731: /*
12732: * Canonicalise the system ID
12733: */
12734: systemIdCanonic = xmlCanonicPath(SystemID);
12735: if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12736: xmlFreeParserCtxt(ctxt);
12737: return(NULL);
12738: }
12739:
12740: /*
12741: * Ask the Entity resolver to load the damn thing
12742: */
12743:
12744: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12745: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12746: systemIdCanonic);
12747: if (input == NULL) {
12748: if (sax != NULL) ctxt->sax = NULL;
12749: xmlFreeParserCtxt(ctxt);
12750: if (systemIdCanonic != NULL)
12751: xmlFree(systemIdCanonic);
12752: return(NULL);
12753: }
12754:
12755: /*
12756: * plug some encoding conversion routines here.
12757: */
12758: if (xmlPushInput(ctxt, input) < 0) {
12759: if (sax != NULL) ctxt->sax = NULL;
12760: xmlFreeParserCtxt(ctxt);
12761: if (systemIdCanonic != NULL)
12762: xmlFree(systemIdCanonic);
12763: return(NULL);
12764: }
12765: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767: xmlSwitchEncoding(ctxt, enc);
12768: }
12769:
12770: if (input->filename == NULL)
12771: input->filename = (char *) systemIdCanonic;
12772: else
12773: xmlFree(systemIdCanonic);
12774: input->line = 1;
12775: input->col = 1;
12776: input->base = ctxt->input->cur;
12777: input->cur = ctxt->input->cur;
12778: input->free = NULL;
12779:
12780: /*
12781: * let's parse that entity knowing it's an external subset.
12782: */
12783: ctxt->inSubset = 2;
12784: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12785: if (ctxt->myDoc == NULL) {
12786: xmlErrMemory(ctxt, "New Doc failed");
12787: if (sax != NULL) ctxt->sax = NULL;
12788: xmlFreeParserCtxt(ctxt);
12789: return(NULL);
12790: }
12791: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12792: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793: ExternalID, SystemID);
12794: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12795:
12796: if (ctxt->myDoc != NULL) {
12797: if (ctxt->wellFormed) {
12798: ret = ctxt->myDoc->extSubset;
12799: ctxt->myDoc->extSubset = NULL;
12800: if (ret != NULL) {
12801: xmlNodePtr tmp;
12802:
12803: ret->doc = NULL;
12804: tmp = ret->children;
12805: while (tmp != NULL) {
12806: tmp->doc = NULL;
12807: tmp = tmp->next;
12808: }
12809: }
12810: } else {
12811: ret = NULL;
12812: }
12813: xmlFreeDoc(ctxt->myDoc);
12814: ctxt->myDoc = NULL;
12815: }
12816: if (sax != NULL) ctxt->sax = NULL;
12817: xmlFreeParserCtxt(ctxt);
12818:
12819: return(ret);
12820: }
12821:
12822:
12823: /**
12824: * xmlParseDTD:
12825: * @ExternalID: a NAME* containing the External ID of the DTD
12826: * @SystemID: a NAME* containing the URL to the DTD
12827: *
12828: * Load and parse an external subset.
12829: *
12830: * Returns the resulting xmlDtdPtr or NULL in case of error.
12831: */
12832:
12833: xmlDtdPtr
12834: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12836: }
12837: #endif /* LIBXML_VALID_ENABLED */
12838:
12839: /************************************************************************
12840: * *
1.1.1.3 ! misho 12841: * Front ends when parsing an Entity *
1.1 misho 12842: * *
12843: ************************************************************************/
12844:
12845: /**
12846: * xmlParseCtxtExternalEntity:
12847: * @ctx: the existing parsing context
12848: * @URL: the URL for the entity to load
12849: * @ID: the System ID for the entity to load
12850: * @lst: the return value for the set of parsed nodes
12851: *
12852: * Parse an external general entity within an existing parsing context
12853: * An external general parsed entity is well-formed if it matches the
12854: * production labeled extParsedEnt.
12855: *
12856: * [78] extParsedEnt ::= TextDecl? content
12857: *
12858: * Returns 0 if the entity is well formed, -1 in case of args problem and
12859: * the parser error code otherwise
12860: */
12861:
12862: int
12863: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12864: const xmlChar *ID, xmlNodePtr *lst) {
12865: xmlParserCtxtPtr ctxt;
12866: xmlDocPtr newDoc;
12867: xmlNodePtr newRoot;
12868: xmlSAXHandlerPtr oldsax = NULL;
12869: int ret = 0;
12870: xmlChar start[4];
12871: xmlCharEncoding enc;
12872:
12873: if (ctx == NULL) return(-1);
12874:
12875: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876: (ctx->depth > 1024)) {
12877: return(XML_ERR_ENTITY_LOOP);
12878: }
12879:
12880: if (lst != NULL)
12881: *lst = NULL;
12882: if ((URL == NULL) && (ID == NULL))
12883: return(-1);
12884: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12885: return(-1);
12886:
12887: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12888: if (ctxt == NULL) {
12889: return(-1);
12890: }
12891:
12892: oldsax = ctxt->sax;
12893: ctxt->sax = ctx->sax;
12894: xmlDetectSAX2(ctxt);
12895: newDoc = xmlNewDoc(BAD_CAST "1.0");
12896: if (newDoc == NULL) {
12897: xmlFreeParserCtxt(ctxt);
12898: return(-1);
12899: }
12900: newDoc->properties = XML_DOC_INTERNAL;
12901: if (ctx->myDoc->dict) {
12902: newDoc->dict = ctx->myDoc->dict;
12903: xmlDictReference(newDoc->dict);
12904: }
12905: if (ctx->myDoc != NULL) {
12906: newDoc->intSubset = ctx->myDoc->intSubset;
12907: newDoc->extSubset = ctx->myDoc->extSubset;
12908: }
12909: if (ctx->myDoc->URL != NULL) {
12910: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12911: }
12912: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913: if (newRoot == NULL) {
12914: ctxt->sax = oldsax;
12915: xmlFreeParserCtxt(ctxt);
12916: newDoc->intSubset = NULL;
12917: newDoc->extSubset = NULL;
12918: xmlFreeDoc(newDoc);
12919: return(-1);
12920: }
12921: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12922: nodePush(ctxt, newDoc->children);
12923: if (ctx->myDoc == NULL) {
12924: ctxt->myDoc = newDoc;
12925: } else {
12926: ctxt->myDoc = ctx->myDoc;
12927: newDoc->children->doc = ctx->myDoc;
12928: }
12929:
12930: /*
12931: * Get the 4 first bytes and decode the charset
12932: * if enc != XML_CHAR_ENCODING_NONE
12933: * plug some encoding conversion routines.
12934: */
12935: GROW
12936: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12937: start[0] = RAW;
12938: start[1] = NXT(1);
12939: start[2] = NXT(2);
12940: start[3] = NXT(3);
12941: enc = xmlDetectCharEncoding(start, 4);
12942: if (enc != XML_CHAR_ENCODING_NONE) {
12943: xmlSwitchEncoding(ctxt, enc);
12944: }
12945: }
12946:
12947: /*
12948: * Parse a possible text declaration first
12949: */
12950: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12951: xmlParseTextDecl(ctxt);
12952: /*
12953: * An XML-1.0 document can't reference an entity not XML-1.0
12954: */
12955: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
1.1.1.3 ! misho 12957: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
1.1 misho 12958: "Version mismatch between document and entity\n");
12959: }
12960: }
12961:
12962: /*
1.1.1.2 misho 12963: * If the user provided its own SAX callbacks then reuse the
12964: * useData callback field, otherwise the expected setup in a
12965: * DOM builder is to have userData == ctxt
12966: */
12967: if (ctx->userData == ctx)
12968: ctxt->userData = ctxt;
12969: else
12970: ctxt->userData = ctx->userData;
12971:
12972: /*
1.1 misho 12973: * Doing validity checking on chunk doesn't make sense
12974: */
12975: ctxt->instate = XML_PARSER_CONTENT;
12976: ctxt->validate = ctx->validate;
12977: ctxt->valid = ctx->valid;
12978: ctxt->loadsubset = ctx->loadsubset;
12979: ctxt->depth = ctx->depth + 1;
12980: ctxt->replaceEntities = ctx->replaceEntities;
12981: if (ctxt->validate) {
12982: ctxt->vctxt.error = ctx->vctxt.error;
12983: ctxt->vctxt.warning = ctx->vctxt.warning;
12984: } else {
12985: ctxt->vctxt.error = NULL;
12986: ctxt->vctxt.warning = NULL;
12987: }
12988: ctxt->vctxt.nodeTab = NULL;
12989: ctxt->vctxt.nodeNr = 0;
12990: ctxt->vctxt.nodeMax = 0;
12991: ctxt->vctxt.node = NULL;
12992: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993: ctxt->dict = ctx->dict;
12994: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12997: ctxt->dictNames = ctx->dictNames;
12998: ctxt->attsDefault = ctx->attsDefault;
12999: ctxt->attsSpecial = ctx->attsSpecial;
13000: ctxt->linenumbers = ctx->linenumbers;
13001:
13002: xmlParseContent(ctxt);
13003:
13004: ctx->validate = ctxt->validate;
13005: ctx->valid = ctxt->valid;
13006: if ((RAW == '<') && (NXT(1) == '/')) {
13007: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13008: } else if (RAW != 0) {
13009: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13010: }
13011: if (ctxt->node != newDoc->children) {
13012: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13013: }
13014:
13015: if (!ctxt->wellFormed) {
13016: if (ctxt->errNo == 0)
13017: ret = 1;
13018: else
13019: ret = ctxt->errNo;
13020: } else {
13021: if (lst != NULL) {
13022: xmlNodePtr cur;
13023:
13024: /*
13025: * Return the newly created nodeset after unlinking it from
13026: * they pseudo parent.
13027: */
13028: cur = newDoc->children->children;
13029: *lst = cur;
13030: while (cur != NULL) {
13031: cur->parent = NULL;
13032: cur = cur->next;
13033: }
13034: newDoc->children->children = NULL;
13035: }
13036: ret = 0;
13037: }
13038: ctxt->sax = oldsax;
13039: ctxt->dict = NULL;
13040: ctxt->attsDefault = NULL;
13041: ctxt->attsSpecial = NULL;
13042: xmlFreeParserCtxt(ctxt);
13043: newDoc->intSubset = NULL;
13044: newDoc->extSubset = NULL;
13045: xmlFreeDoc(newDoc);
13046:
13047: return(ret);
13048: }
13049:
13050: /**
13051: * xmlParseExternalEntityPrivate:
13052: * @doc: the document the chunk pertains to
13053: * @oldctxt: the previous parser context if available
13054: * @sax: the SAX handler bloc (possibly NULL)
13055: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13056: * @depth: Used for loop detection, use 0
13057: * @URL: the URL for the entity to load
13058: * @ID: the System ID for the entity to load
13059: * @list: the return value for the set of parsed nodes
13060: *
13061: * Private version of xmlParseExternalEntity()
13062: *
13063: * Returns 0 if the entity is well formed, -1 in case of args problem and
13064: * the parser error code otherwise
13065: */
13066:
13067: static xmlParserErrors
13068: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069: xmlSAXHandlerPtr sax,
13070: void *user_data, int depth, const xmlChar *URL,
13071: const xmlChar *ID, xmlNodePtr *list) {
13072: xmlParserCtxtPtr ctxt;
13073: xmlDocPtr newDoc;
13074: xmlNodePtr newRoot;
13075: xmlSAXHandlerPtr oldsax = NULL;
13076: xmlParserErrors ret = XML_ERR_OK;
13077: xmlChar start[4];
13078: xmlCharEncoding enc;
13079:
13080: if (((depth > 40) &&
13081: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13082: (depth > 1024)) {
13083: return(XML_ERR_ENTITY_LOOP);
13084: }
13085:
13086: if (list != NULL)
13087: *list = NULL;
13088: if ((URL == NULL) && (ID == NULL))
13089: return(XML_ERR_INTERNAL_ERROR);
13090: if (doc == NULL)
13091: return(XML_ERR_INTERNAL_ERROR);
13092:
13093:
13094: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13095: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13096: ctxt->userData = ctxt;
13097: if (oldctxt != NULL) {
13098: ctxt->_private = oldctxt->_private;
13099: ctxt->loadsubset = oldctxt->loadsubset;
13100: ctxt->validate = oldctxt->validate;
13101: ctxt->external = oldctxt->external;
13102: ctxt->record_info = oldctxt->record_info;
13103: ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104: ctxt->node_seq.length = oldctxt->node_seq.length;
13105: ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13106: } else {
13107: /*
13108: * Doing validity checking on chunk without context
13109: * doesn't make sense
13110: */
13111: ctxt->_private = NULL;
13112: ctxt->validate = 0;
13113: ctxt->external = 2;
13114: ctxt->loadsubset = 0;
13115: }
13116: if (sax != NULL) {
13117: oldsax = ctxt->sax;
13118: ctxt->sax = sax;
13119: if (user_data != NULL)
13120: ctxt->userData = user_data;
13121: }
13122: xmlDetectSAX2(ctxt);
13123: newDoc = xmlNewDoc(BAD_CAST "1.0");
13124: if (newDoc == NULL) {
13125: ctxt->node_seq.maximum = 0;
13126: ctxt->node_seq.length = 0;
13127: ctxt->node_seq.buffer = NULL;
13128: xmlFreeParserCtxt(ctxt);
13129: return(XML_ERR_INTERNAL_ERROR);
13130: }
13131: newDoc->properties = XML_DOC_INTERNAL;
13132: newDoc->intSubset = doc->intSubset;
13133: newDoc->extSubset = doc->extSubset;
13134: newDoc->dict = doc->dict;
13135: xmlDictReference(newDoc->dict);
13136:
13137: if (doc->URL != NULL) {
13138: newDoc->URL = xmlStrdup(doc->URL);
13139: }
13140: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141: if (newRoot == NULL) {
13142: if (sax != NULL)
13143: ctxt->sax = oldsax;
13144: ctxt->node_seq.maximum = 0;
13145: ctxt->node_seq.length = 0;
13146: ctxt->node_seq.buffer = NULL;
13147: xmlFreeParserCtxt(ctxt);
13148: newDoc->intSubset = NULL;
13149: newDoc->extSubset = NULL;
13150: xmlFreeDoc(newDoc);
13151: return(XML_ERR_INTERNAL_ERROR);
13152: }
13153: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13154: nodePush(ctxt, newDoc->children);
13155: ctxt->myDoc = doc;
13156: newRoot->doc = doc;
13157:
13158: /*
13159: * Get the 4 first bytes and decode the charset
13160: * if enc != XML_CHAR_ENCODING_NONE
13161: * plug some encoding conversion routines.
13162: */
13163: GROW;
13164: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13165: start[0] = RAW;
13166: start[1] = NXT(1);
13167: start[2] = NXT(2);
13168: start[3] = NXT(3);
13169: enc = xmlDetectCharEncoding(start, 4);
13170: if (enc != XML_CHAR_ENCODING_NONE) {
13171: xmlSwitchEncoding(ctxt, enc);
13172: }
13173: }
13174:
13175: /*
13176: * Parse a possible text declaration first
13177: */
13178: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13179: xmlParseTextDecl(ctxt);
13180: }
13181:
13182: ctxt->instate = XML_PARSER_CONTENT;
13183: ctxt->depth = depth;
13184:
13185: xmlParseContent(ctxt);
13186:
13187: if ((RAW == '<') && (NXT(1) == '/')) {
13188: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13189: } else if (RAW != 0) {
13190: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13191: }
13192: if (ctxt->node != newDoc->children) {
13193: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13194: }
13195:
13196: if (!ctxt->wellFormed) {
13197: if (ctxt->errNo == 0)
13198: ret = XML_ERR_INTERNAL_ERROR;
13199: else
13200: ret = (xmlParserErrors)ctxt->errNo;
13201: } else {
13202: if (list != NULL) {
13203: xmlNodePtr cur;
13204:
13205: /*
13206: * Return the newly created nodeset after unlinking it from
13207: * they pseudo parent.
13208: */
13209: cur = newDoc->children->children;
13210: *list = cur;
13211: while (cur != NULL) {
13212: cur->parent = NULL;
13213: cur = cur->next;
13214: }
13215: newDoc->children->children = NULL;
13216: }
13217: ret = XML_ERR_OK;
13218: }
13219:
13220: /*
13221: * Record in the parent context the number of entities replacement
13222: * done when parsing that reference.
13223: */
13224: if (oldctxt != NULL)
13225: oldctxt->nbentities += ctxt->nbentities;
13226:
13227: /*
13228: * Also record the size of the entity parsed
13229: */
13230: if (ctxt->input != NULL) {
13231: oldctxt->sizeentities += ctxt->input->consumed;
13232: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13233: }
13234: /*
13235: * And record the last error if any
13236: */
13237: if (ctxt->lastError.code != XML_ERR_OK)
13238: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13239:
1.1.1.3 ! misho 13240: if (sax != NULL)
1.1 misho 13241: ctxt->sax = oldsax;
13242: oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243: oldctxt->node_seq.length = ctxt->node_seq.length;
13244: oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245: ctxt->node_seq.maximum = 0;
13246: ctxt->node_seq.length = 0;
13247: ctxt->node_seq.buffer = NULL;
13248: xmlFreeParserCtxt(ctxt);
13249: newDoc->intSubset = NULL;
13250: newDoc->extSubset = NULL;
13251: xmlFreeDoc(newDoc);
13252:
13253: return(ret);
13254: }
13255:
13256: #ifdef LIBXML_SAX1_ENABLED
13257: /**
13258: * xmlParseExternalEntity:
13259: * @doc: the document the chunk pertains to
13260: * @sax: the SAX handler bloc (possibly NULL)
13261: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13262: * @depth: Used for loop detection, use 0
13263: * @URL: the URL for the entity to load
13264: * @ID: the System ID for the entity to load
13265: * @lst: the return value for the set of parsed nodes
13266: *
13267: * Parse an external general entity
13268: * An external general parsed entity is well-formed if it matches the
13269: * production labeled extParsedEnt.
13270: *
13271: * [78] extParsedEnt ::= TextDecl? content
13272: *
13273: * Returns 0 if the entity is well formed, -1 in case of args problem and
13274: * the parser error code otherwise
13275: */
13276:
13277: int
13278: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13279: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13280: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13281: ID, lst));
13282: }
13283:
13284: /**
13285: * xmlParseBalancedChunkMemory:
13286: * @doc: the document the chunk pertains to
13287: * @sax: the SAX handler bloc (possibly NULL)
13288: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13289: * @depth: Used for loop detection, use 0
13290: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13291: * @lst: the return value for the set of parsed nodes
13292: *
13293: * Parse a well-balanced chunk of an XML document
13294: * called by the parser
13295: * The allowed sequence for the Well Balanced Chunk is the one defined by
13296: * the content production in the XML grammar:
13297: *
13298: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13299: *
13300: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13301: * the parser error code otherwise
13302: */
13303:
13304: int
13305: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13306: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13307: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13308: depth, string, lst, 0 );
13309: }
13310: #endif /* LIBXML_SAX1_ENABLED */
13311:
13312: /**
13313: * xmlParseBalancedChunkMemoryInternal:
13314: * @oldctxt: the existing parsing context
13315: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13316: * @user_data: the user data field for the parser context
13317: * @lst: the return value for the set of parsed nodes
13318: *
13319: *
13320: * Parse a well-balanced chunk of an XML document
13321: * called by the parser
13322: * The allowed sequence for the Well Balanced Chunk is the one defined by
13323: * the content production in the XML grammar:
13324: *
13325: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13326: *
13327: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13328: * error code otherwise
13329: *
13330: * In case recover is set to 1, the nodelist will not be empty even if
13331: * the parsed chunk is not well balanced.
13332: */
13333: static xmlParserErrors
13334: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13335: const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13336: xmlParserCtxtPtr ctxt;
13337: xmlDocPtr newDoc = NULL;
13338: xmlNodePtr newRoot;
13339: xmlSAXHandlerPtr oldsax = NULL;
13340: xmlNodePtr content = NULL;
13341: xmlNodePtr last = NULL;
13342: int size;
13343: xmlParserErrors ret = XML_ERR_OK;
13344: #ifdef SAX2
13345: int i;
13346: #endif
13347:
13348: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13349: (oldctxt->depth > 1024)) {
13350: return(XML_ERR_ENTITY_LOOP);
13351: }
13352:
13353:
13354: if (lst != NULL)
13355: *lst = NULL;
13356: if (string == NULL)
13357: return(XML_ERR_INTERNAL_ERROR);
13358:
13359: size = xmlStrlen(string);
13360:
13361: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13362: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13363: if (user_data != NULL)
13364: ctxt->userData = user_data;
13365: else
13366: ctxt->userData = ctxt;
13367: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13368: ctxt->dict = oldctxt->dict;
13369: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13370: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13371: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13372:
13373: #ifdef SAX2
13374: /* propagate namespaces down the entity */
13375: for (i = 0;i < oldctxt->nsNr;i += 2) {
13376: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13377: }
13378: #endif
13379:
13380: oldsax = ctxt->sax;
13381: ctxt->sax = oldctxt->sax;
13382: xmlDetectSAX2(ctxt);
13383: ctxt->replaceEntities = oldctxt->replaceEntities;
13384: ctxt->options = oldctxt->options;
13385:
13386: ctxt->_private = oldctxt->_private;
13387: if (oldctxt->myDoc == NULL) {
13388: newDoc = xmlNewDoc(BAD_CAST "1.0");
13389: if (newDoc == NULL) {
13390: ctxt->sax = oldsax;
13391: ctxt->dict = NULL;
13392: xmlFreeParserCtxt(ctxt);
13393: return(XML_ERR_INTERNAL_ERROR);
13394: }
13395: newDoc->properties = XML_DOC_INTERNAL;
13396: newDoc->dict = ctxt->dict;
13397: xmlDictReference(newDoc->dict);
13398: ctxt->myDoc = newDoc;
13399: } else {
13400: ctxt->myDoc = oldctxt->myDoc;
13401: content = ctxt->myDoc->children;
13402: last = ctxt->myDoc->last;
13403: }
13404: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13405: if (newRoot == NULL) {
13406: ctxt->sax = oldsax;
13407: ctxt->dict = NULL;
13408: xmlFreeParserCtxt(ctxt);
13409: if (newDoc != NULL) {
13410: xmlFreeDoc(newDoc);
13411: }
13412: return(XML_ERR_INTERNAL_ERROR);
13413: }
13414: ctxt->myDoc->children = NULL;
13415: ctxt->myDoc->last = NULL;
13416: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13417: nodePush(ctxt, ctxt->myDoc->children);
13418: ctxt->instate = XML_PARSER_CONTENT;
13419: ctxt->depth = oldctxt->depth + 1;
13420:
13421: ctxt->validate = 0;
13422: ctxt->loadsubset = oldctxt->loadsubset;
13423: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13424: /*
13425: * ID/IDREF registration will be done in xmlValidateElement below
13426: */
13427: ctxt->loadsubset |= XML_SKIP_IDS;
13428: }
13429: ctxt->dictNames = oldctxt->dictNames;
13430: ctxt->attsDefault = oldctxt->attsDefault;
13431: ctxt->attsSpecial = oldctxt->attsSpecial;
13432:
13433: xmlParseContent(ctxt);
13434: if ((RAW == '<') && (NXT(1) == '/')) {
13435: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13436: } else if (RAW != 0) {
13437: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13438: }
13439: if (ctxt->node != ctxt->myDoc->children) {
13440: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13441: }
13442:
13443: if (!ctxt->wellFormed) {
13444: if (ctxt->errNo == 0)
13445: ret = XML_ERR_INTERNAL_ERROR;
13446: else
13447: ret = (xmlParserErrors)ctxt->errNo;
13448: } else {
13449: ret = XML_ERR_OK;
13450: }
13451:
13452: if ((lst != NULL) && (ret == XML_ERR_OK)) {
13453: xmlNodePtr cur;
13454:
13455: /*
13456: * Return the newly created nodeset after unlinking it from
13457: * they pseudo parent.
13458: */
13459: cur = ctxt->myDoc->children->children;
13460: *lst = cur;
13461: while (cur != NULL) {
13462: #ifdef LIBXML_VALID_ENABLED
13463: if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13464: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13465: (cur->type == XML_ELEMENT_NODE)) {
13466: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13467: oldctxt->myDoc, cur);
13468: }
13469: #endif /* LIBXML_VALID_ENABLED */
13470: cur->parent = NULL;
13471: cur = cur->next;
13472: }
13473: ctxt->myDoc->children->children = NULL;
13474: }
13475: if (ctxt->myDoc != NULL) {
13476: xmlFreeNode(ctxt->myDoc->children);
13477: ctxt->myDoc->children = content;
13478: ctxt->myDoc->last = last;
13479: }
13480:
13481: /*
13482: * Record in the parent context the number of entities replacement
13483: * done when parsing that reference.
13484: */
13485: if (oldctxt != NULL)
13486: oldctxt->nbentities += ctxt->nbentities;
13487:
13488: /*
13489: * Also record the last error if any
13490: */
13491: if (ctxt->lastError.code != XML_ERR_OK)
13492: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13493:
13494: ctxt->sax = oldsax;
13495: ctxt->dict = NULL;
13496: ctxt->attsDefault = NULL;
13497: ctxt->attsSpecial = NULL;
13498: xmlFreeParserCtxt(ctxt);
13499: if (newDoc != NULL) {
13500: xmlFreeDoc(newDoc);
13501: }
13502:
13503: return(ret);
13504: }
13505:
13506: /**
13507: * xmlParseInNodeContext:
13508: * @node: the context node
13509: * @data: the input string
13510: * @datalen: the input string length in bytes
13511: * @options: a combination of xmlParserOption
13512: * @lst: the return value for the set of parsed nodes
13513: *
13514: * Parse a well-balanced chunk of an XML document
13515: * within the context (DTD, namespaces, etc ...) of the given node.
13516: *
13517: * The allowed sequence for the data is a Well Balanced Chunk defined by
13518: * the content production in the XML grammar:
13519: *
13520: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13521: *
13522: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13523: * error code otherwise
13524: */
13525: xmlParserErrors
13526: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13527: int options, xmlNodePtr *lst) {
13528: #ifdef SAX2
13529: xmlParserCtxtPtr ctxt;
13530: xmlDocPtr doc = NULL;
13531: xmlNodePtr fake, cur;
13532: int nsnr = 0;
13533:
13534: xmlParserErrors ret = XML_ERR_OK;
13535:
13536: /*
13537: * check all input parameters, grab the document
13538: */
13539: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13540: return(XML_ERR_INTERNAL_ERROR);
13541: switch (node->type) {
13542: case XML_ELEMENT_NODE:
13543: case XML_ATTRIBUTE_NODE:
13544: case XML_TEXT_NODE:
13545: case XML_CDATA_SECTION_NODE:
13546: case XML_ENTITY_REF_NODE:
13547: case XML_PI_NODE:
13548: case XML_COMMENT_NODE:
13549: case XML_DOCUMENT_NODE:
13550: case XML_HTML_DOCUMENT_NODE:
13551: break;
13552: default:
13553: return(XML_ERR_INTERNAL_ERROR);
13554:
13555: }
13556: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13557: (node->type != XML_DOCUMENT_NODE) &&
13558: (node->type != XML_HTML_DOCUMENT_NODE))
13559: node = node->parent;
13560: if (node == NULL)
13561: return(XML_ERR_INTERNAL_ERROR);
13562: if (node->type == XML_ELEMENT_NODE)
13563: doc = node->doc;
13564: else
13565: doc = (xmlDocPtr) node;
13566: if (doc == NULL)
13567: return(XML_ERR_INTERNAL_ERROR);
13568:
13569: /*
13570: * allocate a context and set-up everything not related to the
13571: * node position in the tree
13572: */
13573: if (doc->type == XML_DOCUMENT_NODE)
13574: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13575: #ifdef LIBXML_HTML_ENABLED
13576: else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13577: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13578: /*
13579: * When parsing in context, it makes no sense to add implied
13580: * elements like html/body/etc...
13581: */
13582: options |= HTML_PARSE_NOIMPLIED;
13583: }
13584: #endif
13585: else
13586: return(XML_ERR_INTERNAL_ERROR);
13587:
13588: if (ctxt == NULL)
13589: return(XML_ERR_NO_MEMORY);
13590:
13591: /*
13592: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13593: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13594: * we must wait until the last moment to free the original one.
13595: */
13596: if (doc->dict != NULL) {
13597: if (ctxt->dict != NULL)
13598: xmlDictFree(ctxt->dict);
13599: ctxt->dict = doc->dict;
13600: } else
13601: options |= XML_PARSE_NODICT;
13602:
13603: if (doc->encoding != NULL) {
13604: xmlCharEncodingHandlerPtr hdlr;
13605:
13606: if (ctxt->encoding != NULL)
13607: xmlFree((xmlChar *) ctxt->encoding);
13608: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13609:
13610: hdlr = xmlFindCharEncodingHandler(doc->encoding);
13611: if (hdlr != NULL) {
13612: xmlSwitchToEncoding(ctxt, hdlr);
13613: } else {
13614: return(XML_ERR_UNSUPPORTED_ENCODING);
13615: }
13616: }
13617:
13618: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13619: xmlDetectSAX2(ctxt);
13620: ctxt->myDoc = doc;
13621:
13622: fake = xmlNewComment(NULL);
13623: if (fake == NULL) {
13624: xmlFreeParserCtxt(ctxt);
13625: return(XML_ERR_NO_MEMORY);
13626: }
13627: xmlAddChild(node, fake);
13628:
13629: if (node->type == XML_ELEMENT_NODE) {
13630: nodePush(ctxt, node);
13631: /*
13632: * initialize the SAX2 namespaces stack
13633: */
13634: cur = node;
13635: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13636: xmlNsPtr ns = cur->nsDef;
13637: const xmlChar *iprefix, *ihref;
13638:
13639: while (ns != NULL) {
13640: if (ctxt->dict) {
13641: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13642: ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13643: } else {
13644: iprefix = ns->prefix;
13645: ihref = ns->href;
13646: }
13647:
13648: if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13649: nsPush(ctxt, iprefix, ihref);
13650: nsnr++;
13651: }
13652: ns = ns->next;
13653: }
13654: cur = cur->parent;
13655: }
13656: ctxt->instate = XML_PARSER_CONTENT;
13657: }
13658:
13659: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13660: /*
13661: * ID/IDREF registration will be done in xmlValidateElement below
13662: */
13663: ctxt->loadsubset |= XML_SKIP_IDS;
13664: }
13665:
13666: #ifdef LIBXML_HTML_ENABLED
13667: if (doc->type == XML_HTML_DOCUMENT_NODE)
13668: __htmlParseContent(ctxt);
13669: else
13670: #endif
13671: xmlParseContent(ctxt);
13672:
13673: nsPop(ctxt, nsnr);
13674: if ((RAW == '<') && (NXT(1) == '/')) {
13675: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13676: } else if (RAW != 0) {
13677: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13678: }
13679: if ((ctxt->node != NULL) && (ctxt->node != node)) {
13680: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13681: ctxt->wellFormed = 0;
13682: }
13683:
13684: if (!ctxt->wellFormed) {
13685: if (ctxt->errNo == 0)
13686: ret = XML_ERR_INTERNAL_ERROR;
13687: else
13688: ret = (xmlParserErrors)ctxt->errNo;
13689: } else {
13690: ret = XML_ERR_OK;
13691: }
13692:
13693: /*
13694: * Return the newly created nodeset after unlinking it from
13695: * the pseudo sibling.
13696: */
13697:
13698: cur = fake->next;
13699: fake->next = NULL;
13700: node->last = fake;
13701:
13702: if (cur != NULL) {
13703: cur->prev = NULL;
13704: }
13705:
13706: *lst = cur;
13707:
13708: while (cur != NULL) {
13709: cur->parent = NULL;
13710: cur = cur->next;
13711: }
13712:
13713: xmlUnlinkNode(fake);
13714: xmlFreeNode(fake);
13715:
13716:
13717: if (ret != XML_ERR_OK) {
13718: xmlFreeNodeList(*lst);
13719: *lst = NULL;
13720: }
13721:
13722: if (doc->dict != NULL)
13723: ctxt->dict = NULL;
13724: xmlFreeParserCtxt(ctxt);
13725:
13726: return(ret);
13727: #else /* !SAX2 */
13728: return(XML_ERR_INTERNAL_ERROR);
13729: #endif
13730: }
13731:
13732: #ifdef LIBXML_SAX1_ENABLED
13733: /**
13734: * xmlParseBalancedChunkMemoryRecover:
13735: * @doc: the document the chunk pertains to
13736: * @sax: the SAX handler bloc (possibly NULL)
13737: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13738: * @depth: Used for loop detection, use 0
13739: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13740: * @lst: the return value for the set of parsed nodes
13741: * @recover: return nodes even if the data is broken (use 0)
13742: *
13743: *
13744: * Parse a well-balanced chunk of an XML document
13745: * called by the parser
13746: * The allowed sequence for the Well Balanced Chunk is the one defined by
13747: * the content production in the XML grammar:
13748: *
13749: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13750: *
13751: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13752: * the parser error code otherwise
13753: *
13754: * In case recover is set to 1, the nodelist will not be empty even if
13755: * the parsed chunk is not well balanced, assuming the parsing succeeded to
13756: * some extent.
13757: */
13758: int
13759: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13760: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13761: int recover) {
13762: xmlParserCtxtPtr ctxt;
13763: xmlDocPtr newDoc;
13764: xmlSAXHandlerPtr oldsax = NULL;
13765: xmlNodePtr content, newRoot;
13766: int size;
13767: int ret = 0;
13768:
13769: if (depth > 40) {
13770: return(XML_ERR_ENTITY_LOOP);
13771: }
13772:
13773:
13774: if (lst != NULL)
13775: *lst = NULL;
13776: if (string == NULL)
13777: return(-1);
13778:
13779: size = xmlStrlen(string);
13780:
13781: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13782: if (ctxt == NULL) return(-1);
13783: ctxt->userData = ctxt;
13784: if (sax != NULL) {
13785: oldsax = ctxt->sax;
13786: ctxt->sax = sax;
13787: if (user_data != NULL)
13788: ctxt->userData = user_data;
13789: }
13790: newDoc = xmlNewDoc(BAD_CAST "1.0");
13791: if (newDoc == NULL) {
13792: xmlFreeParserCtxt(ctxt);
13793: return(-1);
13794: }
13795: newDoc->properties = XML_DOC_INTERNAL;
13796: if ((doc != NULL) && (doc->dict != NULL)) {
13797: xmlDictFree(ctxt->dict);
13798: ctxt->dict = doc->dict;
13799: xmlDictReference(ctxt->dict);
13800: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13801: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13802: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13803: ctxt->dictNames = 1;
13804: } else {
13805: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13806: }
13807: if (doc != NULL) {
13808: newDoc->intSubset = doc->intSubset;
13809: newDoc->extSubset = doc->extSubset;
13810: }
13811: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13812: if (newRoot == NULL) {
13813: if (sax != NULL)
13814: ctxt->sax = oldsax;
13815: xmlFreeParserCtxt(ctxt);
13816: newDoc->intSubset = NULL;
13817: newDoc->extSubset = NULL;
13818: xmlFreeDoc(newDoc);
13819: return(-1);
13820: }
13821: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13822: nodePush(ctxt, newRoot);
13823: if (doc == NULL) {
13824: ctxt->myDoc = newDoc;
13825: } else {
13826: ctxt->myDoc = newDoc;
13827: newDoc->children->doc = doc;
13828: /* Ensure that doc has XML spec namespace */
13829: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13830: newDoc->oldNs = doc->oldNs;
13831: }
13832: ctxt->instate = XML_PARSER_CONTENT;
13833: ctxt->depth = depth;
13834:
13835: /*
13836: * Doing validity checking on chunk doesn't make sense
13837: */
13838: ctxt->validate = 0;
13839: ctxt->loadsubset = 0;
13840: xmlDetectSAX2(ctxt);
13841:
13842: if ( doc != NULL ){
13843: content = doc->children;
13844: doc->children = NULL;
13845: xmlParseContent(ctxt);
13846: doc->children = content;
13847: }
13848: else {
13849: xmlParseContent(ctxt);
13850: }
13851: if ((RAW == '<') && (NXT(1) == '/')) {
13852: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13853: } else if (RAW != 0) {
13854: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13855: }
13856: if (ctxt->node != newDoc->children) {
13857: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13858: }
13859:
13860: if (!ctxt->wellFormed) {
13861: if (ctxt->errNo == 0)
13862: ret = 1;
13863: else
13864: ret = ctxt->errNo;
13865: } else {
13866: ret = 0;
13867: }
13868:
13869: if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13870: xmlNodePtr cur;
13871:
13872: /*
13873: * Return the newly created nodeset after unlinking it from
13874: * they pseudo parent.
13875: */
13876: cur = newDoc->children->children;
13877: *lst = cur;
13878: while (cur != NULL) {
13879: xmlSetTreeDoc(cur, doc);
13880: cur->parent = NULL;
13881: cur = cur->next;
13882: }
13883: newDoc->children->children = NULL;
13884: }
13885:
13886: if (sax != NULL)
13887: ctxt->sax = oldsax;
13888: xmlFreeParserCtxt(ctxt);
13889: newDoc->intSubset = NULL;
13890: newDoc->extSubset = NULL;
13891: newDoc->oldNs = NULL;
13892: xmlFreeDoc(newDoc);
13893:
13894: return(ret);
13895: }
13896:
13897: /**
13898: * xmlSAXParseEntity:
13899: * @sax: the SAX handler block
13900: * @filename: the filename
13901: *
13902: * parse an XML external entity out of context and build a tree.
13903: * It use the given SAX function block to handle the parsing callback.
13904: * If sax is NULL, fallback to the default DOM tree building routines.
13905: *
13906: * [78] extParsedEnt ::= TextDecl? content
13907: *
13908: * This correspond to a "Well Balanced" chunk
13909: *
13910: * Returns the resulting document tree
13911: */
13912:
13913: xmlDocPtr
13914: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13915: xmlDocPtr ret;
13916: xmlParserCtxtPtr ctxt;
13917:
13918: ctxt = xmlCreateFileParserCtxt(filename);
13919: if (ctxt == NULL) {
13920: return(NULL);
13921: }
13922: if (sax != NULL) {
13923: if (ctxt->sax != NULL)
13924: xmlFree(ctxt->sax);
13925: ctxt->sax = sax;
13926: ctxt->userData = NULL;
13927: }
13928:
13929: xmlParseExtParsedEnt(ctxt);
13930:
13931: if (ctxt->wellFormed)
13932: ret = ctxt->myDoc;
13933: else {
13934: ret = NULL;
13935: xmlFreeDoc(ctxt->myDoc);
13936: ctxt->myDoc = NULL;
13937: }
13938: if (sax != NULL)
13939: ctxt->sax = NULL;
13940: xmlFreeParserCtxt(ctxt);
13941:
13942: return(ret);
13943: }
13944:
13945: /**
13946: * xmlParseEntity:
13947: * @filename: the filename
13948: *
13949: * parse an XML external entity out of context and build a tree.
13950: *
13951: * [78] extParsedEnt ::= TextDecl? content
13952: *
13953: * This correspond to a "Well Balanced" chunk
13954: *
13955: * Returns the resulting document tree
13956: */
13957:
13958: xmlDocPtr
13959: xmlParseEntity(const char *filename) {
13960: return(xmlSAXParseEntity(NULL, filename));
13961: }
13962: #endif /* LIBXML_SAX1_ENABLED */
13963:
13964: /**
13965: * xmlCreateEntityParserCtxtInternal:
13966: * @URL: the entity URL
13967: * @ID: the entity PUBLIC ID
13968: * @base: a possible base for the target URI
13969: * @pctx: parser context used to set options on new context
13970: *
13971: * Create a parser context for an external entity
13972: * Automatic support for ZLIB/Compress compressed document is provided
13973: * by default if found at compile-time.
13974: *
13975: * Returns the new parser context or NULL
13976: */
13977: static xmlParserCtxtPtr
13978: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13979: const xmlChar *base, xmlParserCtxtPtr pctx) {
13980: xmlParserCtxtPtr ctxt;
13981: xmlParserInputPtr inputStream;
13982: char *directory = NULL;
13983: xmlChar *uri;
13984:
13985: ctxt = xmlNewParserCtxt();
13986: if (ctxt == NULL) {
13987: return(NULL);
13988: }
13989:
13990: if (pctx != NULL) {
13991: ctxt->options = pctx->options;
13992: ctxt->_private = pctx->_private;
13993: }
13994:
13995: uri = xmlBuildURI(URL, base);
13996:
13997: if (uri == NULL) {
13998: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13999: if (inputStream == NULL) {
14000: xmlFreeParserCtxt(ctxt);
14001: return(NULL);
14002: }
14003:
14004: inputPush(ctxt, inputStream);
14005:
14006: if ((ctxt->directory == NULL) && (directory == NULL))
14007: directory = xmlParserGetDirectory((char *)URL);
14008: if ((ctxt->directory == NULL) && (directory != NULL))
14009: ctxt->directory = directory;
14010: } else {
14011: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14012: if (inputStream == NULL) {
14013: xmlFree(uri);
14014: xmlFreeParserCtxt(ctxt);
14015: return(NULL);
14016: }
14017:
14018: inputPush(ctxt, inputStream);
14019:
14020: if ((ctxt->directory == NULL) && (directory == NULL))
14021: directory = xmlParserGetDirectory((char *)uri);
14022: if ((ctxt->directory == NULL) && (directory != NULL))
14023: ctxt->directory = directory;
14024: xmlFree(uri);
14025: }
14026: return(ctxt);
14027: }
14028:
14029: /**
14030: * xmlCreateEntityParserCtxt:
14031: * @URL: the entity URL
14032: * @ID: the entity PUBLIC ID
14033: * @base: a possible base for the target URI
14034: *
14035: * Create a parser context for an external entity
14036: * Automatic support for ZLIB/Compress compressed document is provided
14037: * by default if found at compile-time.
14038: *
14039: * Returns the new parser context or NULL
14040: */
14041: xmlParserCtxtPtr
14042: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14043: const xmlChar *base) {
14044: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14045:
14046: }
14047:
14048: /************************************************************************
14049: * *
14050: * Front ends when parsing from a file *
14051: * *
14052: ************************************************************************/
14053:
14054: /**
14055: * xmlCreateURLParserCtxt:
14056: * @filename: the filename or URL
14057: * @options: a combination of xmlParserOption
14058: *
1.1.1.3 ! misho 14059: * Create a parser context for a file or URL content.
1.1 misho 14060: * Automatic support for ZLIB/Compress compressed document is provided
14061: * by default if found at compile-time and for file accesses
14062: *
14063: * Returns the new parser context or NULL
14064: */
14065: xmlParserCtxtPtr
14066: xmlCreateURLParserCtxt(const char *filename, int options)
14067: {
14068: xmlParserCtxtPtr ctxt;
14069: xmlParserInputPtr inputStream;
14070: char *directory = NULL;
14071:
14072: ctxt = xmlNewParserCtxt();
14073: if (ctxt == NULL) {
14074: xmlErrMemory(NULL, "cannot allocate parser context");
14075: return(NULL);
14076: }
14077:
14078: if (options)
14079: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14080: ctxt->linenumbers = 1;
14081:
14082: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14083: if (inputStream == NULL) {
14084: xmlFreeParserCtxt(ctxt);
14085: return(NULL);
14086: }
14087:
14088: inputPush(ctxt, inputStream);
14089: if ((ctxt->directory == NULL) && (directory == NULL))
14090: directory = xmlParserGetDirectory(filename);
14091: if ((ctxt->directory == NULL) && (directory != NULL))
14092: ctxt->directory = directory;
14093:
14094: return(ctxt);
14095: }
14096:
14097: /**
14098: * xmlCreateFileParserCtxt:
14099: * @filename: the filename
14100: *
1.1.1.3 ! misho 14101: * Create a parser context for a file content.
1.1 misho 14102: * Automatic support for ZLIB/Compress compressed document is provided
14103: * by default if found at compile-time.
14104: *
14105: * Returns the new parser context or NULL
14106: */
14107: xmlParserCtxtPtr
14108: xmlCreateFileParserCtxt(const char *filename)
14109: {
14110: return(xmlCreateURLParserCtxt(filename, 0));
14111: }
14112:
14113: #ifdef LIBXML_SAX1_ENABLED
14114: /**
14115: * xmlSAXParseFileWithData:
14116: * @sax: the SAX handler block
14117: * @filename: the filename
14118: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14119: * documents
14120: * @data: the userdata
14121: *
14122: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123: * compressed document is provided by default if found at compile-time.
14124: * It use the given SAX function block to handle the parsing callback.
14125: * If sax is NULL, fallback to the default DOM tree building routines.
14126: *
14127: * User data (void *) is stored within the parser context in the
14128: * context's _private member, so it is available nearly everywhere in libxml
14129: *
14130: * Returns the resulting document tree
14131: */
14132:
14133: xmlDocPtr
14134: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14135: int recovery, void *data) {
14136: xmlDocPtr ret;
14137: xmlParserCtxtPtr ctxt;
14138:
14139: xmlInitParser();
14140:
14141: ctxt = xmlCreateFileParserCtxt(filename);
14142: if (ctxt == NULL) {
14143: return(NULL);
14144: }
14145: if (sax != NULL) {
14146: if (ctxt->sax != NULL)
14147: xmlFree(ctxt->sax);
14148: ctxt->sax = sax;
14149: }
14150: xmlDetectSAX2(ctxt);
14151: if (data!=NULL) {
14152: ctxt->_private = data;
14153: }
14154:
14155: if (ctxt->directory == NULL)
14156: ctxt->directory = xmlParserGetDirectory(filename);
14157:
14158: ctxt->recovery = recovery;
14159:
14160: xmlParseDocument(ctxt);
14161:
14162: if ((ctxt->wellFormed) || recovery) {
14163: ret = ctxt->myDoc;
14164: if (ret != NULL) {
14165: if (ctxt->input->buf->compressed > 0)
14166: ret->compression = 9;
14167: else
14168: ret->compression = ctxt->input->buf->compressed;
14169: }
14170: }
14171: else {
14172: ret = NULL;
14173: xmlFreeDoc(ctxt->myDoc);
14174: ctxt->myDoc = NULL;
14175: }
14176: if (sax != NULL)
14177: ctxt->sax = NULL;
14178: xmlFreeParserCtxt(ctxt);
1.1.1.3 ! misho 14179:
1.1 misho 14180: return(ret);
14181: }
14182:
14183: /**
14184: * xmlSAXParseFile:
14185: * @sax: the SAX handler block
14186: * @filename: the filename
14187: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14188: * documents
14189: *
14190: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14191: * compressed document is provided by default if found at compile-time.
14192: * It use the given SAX function block to handle the parsing callback.
14193: * If sax is NULL, fallback to the default DOM tree building routines.
14194: *
14195: * Returns the resulting document tree
14196: */
14197:
14198: xmlDocPtr
14199: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14200: int recovery) {
14201: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14202: }
14203:
14204: /**
14205: * xmlRecoverDoc:
14206: * @cur: a pointer to an array of xmlChar
14207: *
14208: * parse an XML in-memory document and build a tree.
14209: * In the case the document is not Well Formed, a attempt to build a
14210: * tree is tried anyway
14211: *
14212: * Returns the resulting document tree or NULL in case of failure
14213: */
14214:
14215: xmlDocPtr
14216: xmlRecoverDoc(const xmlChar *cur) {
14217: return(xmlSAXParseDoc(NULL, cur, 1));
14218: }
14219:
14220: /**
14221: * xmlParseFile:
14222: * @filename: the filename
14223: *
14224: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14225: * compressed document is provided by default if found at compile-time.
14226: *
14227: * Returns the resulting document tree if the file was wellformed,
14228: * NULL otherwise.
14229: */
14230:
14231: xmlDocPtr
14232: xmlParseFile(const char *filename) {
14233: return(xmlSAXParseFile(NULL, filename, 0));
14234: }
14235:
14236: /**
14237: * xmlRecoverFile:
14238: * @filename: the filename
14239: *
14240: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14241: * compressed document is provided by default if found at compile-time.
14242: * In the case the document is not Well Formed, it attempts to build
14243: * a tree anyway
14244: *
14245: * Returns the resulting document tree or NULL in case of failure
14246: */
14247:
14248: xmlDocPtr
14249: xmlRecoverFile(const char *filename) {
14250: return(xmlSAXParseFile(NULL, filename, 1));
14251: }
14252:
14253:
14254: /**
14255: * xmlSetupParserForBuffer:
14256: * @ctxt: an XML parser context
14257: * @buffer: a xmlChar * buffer
14258: * @filename: a file name
14259: *
14260: * Setup the parser context to parse a new buffer; Clears any prior
14261: * contents from the parser context. The buffer parameter must not be
14262: * NULL, but the filename parameter can be
14263: */
14264: void
14265: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14266: const char* filename)
14267: {
14268: xmlParserInputPtr input;
14269:
14270: if ((ctxt == NULL) || (buffer == NULL))
14271: return;
14272:
14273: input = xmlNewInputStream(ctxt);
14274: if (input == NULL) {
14275: xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14276: xmlClearParserCtxt(ctxt);
14277: return;
14278: }
1.1.1.3 ! misho 14279:
1.1 misho 14280: xmlClearParserCtxt(ctxt);
14281: if (filename != NULL)
14282: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14283: input->base = buffer;
14284: input->cur = buffer;
14285: input->end = &buffer[xmlStrlen(buffer)];
14286: inputPush(ctxt, input);
14287: }
14288:
14289: /**
14290: * xmlSAXUserParseFile:
14291: * @sax: a SAX handler
14292: * @user_data: The user data returned on SAX callbacks
14293: * @filename: a file name
14294: *
14295: * parse an XML file and call the given SAX handler routines.
14296: * Automatic support for ZLIB/Compress compressed document is provided
1.1.1.3 ! misho 14297: *
1.1 misho 14298: * Returns 0 in case of success or a error number otherwise
14299: */
14300: int
14301: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14302: const char *filename) {
14303: int ret = 0;
14304: xmlParserCtxtPtr ctxt;
1.1.1.3 ! misho 14305:
1.1 misho 14306: ctxt = xmlCreateFileParserCtxt(filename);
14307: if (ctxt == NULL) return -1;
14308: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14309: xmlFree(ctxt->sax);
14310: ctxt->sax = sax;
14311: xmlDetectSAX2(ctxt);
14312:
14313: if (user_data != NULL)
14314: ctxt->userData = user_data;
1.1.1.3 ! misho 14315:
1.1 misho 14316: xmlParseDocument(ctxt);
1.1.1.3 ! misho 14317:
1.1 misho 14318: if (ctxt->wellFormed)
14319: ret = 0;
14320: else {
14321: if (ctxt->errNo != 0)
14322: ret = ctxt->errNo;
14323: else
14324: ret = -1;
14325: }
14326: if (sax != NULL)
14327: ctxt->sax = NULL;
14328: if (ctxt->myDoc != NULL) {
14329: xmlFreeDoc(ctxt->myDoc);
14330: ctxt->myDoc = NULL;
14331: }
14332: xmlFreeParserCtxt(ctxt);
1.1.1.3 ! misho 14333:
1.1 misho 14334: return ret;
14335: }
14336: #endif /* LIBXML_SAX1_ENABLED */
14337:
14338: /************************************************************************
14339: * *
1.1.1.3 ! misho 14340: * Front ends when parsing from memory *
1.1 misho 14341: * *
14342: ************************************************************************/
14343:
14344: /**
14345: * xmlCreateMemoryParserCtxt:
14346: * @buffer: a pointer to a char array
14347: * @size: the size of the array
14348: *
14349: * Create a parser context for an XML in-memory document.
14350: *
14351: * Returns the new parser context or NULL
14352: */
14353: xmlParserCtxtPtr
14354: xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14355: xmlParserCtxtPtr ctxt;
14356: xmlParserInputPtr input;
14357: xmlParserInputBufferPtr buf;
14358:
14359: if (buffer == NULL)
14360: return(NULL);
14361: if (size <= 0)
14362: return(NULL);
14363:
14364: ctxt = xmlNewParserCtxt();
14365: if (ctxt == NULL)
14366: return(NULL);
14367:
14368: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14369: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14370: if (buf == NULL) {
14371: xmlFreeParserCtxt(ctxt);
14372: return(NULL);
14373: }
14374:
14375: input = xmlNewInputStream(ctxt);
14376: if (input == NULL) {
14377: xmlFreeParserInputBuffer(buf);
14378: xmlFreeParserCtxt(ctxt);
14379: return(NULL);
14380: }
14381:
14382: input->filename = NULL;
14383: input->buf = buf;
1.1.1.3 ! misho 14384: xmlBufResetInput(input->buf->buffer, input);
1.1 misho 14385:
14386: inputPush(ctxt, input);
14387: return(ctxt);
14388: }
14389:
14390: #ifdef LIBXML_SAX1_ENABLED
14391: /**
14392: * xmlSAXParseMemoryWithData:
14393: * @sax: the SAX handler block
14394: * @buffer: an pointer to a char array
14395: * @size: the size of the array
14396: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14397: * documents
14398: * @data: the userdata
14399: *
14400: * parse an XML in-memory block and use the given SAX function block
14401: * to handle the parsing callback. If sax is NULL, fallback to the default
14402: * DOM tree building routines.
14403: *
14404: * User data (void *) is stored within the parser context in the
14405: * context's _private member, so it is available nearly everywhere in libxml
14406: *
14407: * Returns the resulting document tree
14408: */
14409:
14410: xmlDocPtr
14411: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14412: int size, int recovery, void *data) {
14413: xmlDocPtr ret;
14414: xmlParserCtxtPtr ctxt;
14415:
14416: xmlInitParser();
14417:
14418: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14419: if (ctxt == NULL) return(NULL);
14420: if (sax != NULL) {
14421: if (ctxt->sax != NULL)
14422: xmlFree(ctxt->sax);
14423: ctxt->sax = sax;
14424: }
14425: xmlDetectSAX2(ctxt);
14426: if (data!=NULL) {
14427: ctxt->_private=data;
14428: }
14429:
14430: ctxt->recovery = recovery;
14431:
14432: xmlParseDocument(ctxt);
14433:
14434: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14435: else {
14436: ret = NULL;
14437: xmlFreeDoc(ctxt->myDoc);
14438: ctxt->myDoc = NULL;
14439: }
1.1.1.3 ! misho 14440: if (sax != NULL)
1.1 misho 14441: ctxt->sax = NULL;
14442: xmlFreeParserCtxt(ctxt);
14443:
14444: return(ret);
14445: }
14446:
14447: /**
14448: * xmlSAXParseMemory:
14449: * @sax: the SAX handler block
14450: * @buffer: an pointer to a char array
14451: * @size: the size of the array
14452: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14453: * documents
14454: *
14455: * parse an XML in-memory block and use the given SAX function block
14456: * to handle the parsing callback. If sax is NULL, fallback to the default
14457: * DOM tree building routines.
1.1.1.3 ! misho 14458: *
1.1 misho 14459: * Returns the resulting document tree
14460: */
14461: xmlDocPtr
14462: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14463: int size, int recovery) {
14464: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14465: }
14466:
14467: /**
14468: * xmlParseMemory:
14469: * @buffer: an pointer to a char array
14470: * @size: the size of the array
14471: *
14472: * parse an XML in-memory block and build a tree.
1.1.1.3 ! misho 14473: *
1.1 misho 14474: * Returns the resulting document tree
14475: */
14476:
14477: xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14478: return(xmlSAXParseMemory(NULL, buffer, size, 0));
14479: }
14480:
14481: /**
14482: * xmlRecoverMemory:
14483: * @buffer: an pointer to a char array
14484: * @size: the size of the array
14485: *
14486: * parse an XML in-memory block and build a tree.
14487: * In the case the document is not Well Formed, an attempt to
14488: * build a tree is tried anyway
14489: *
14490: * Returns the resulting document tree or NULL in case of error
14491: */
14492:
14493: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14494: return(xmlSAXParseMemory(NULL, buffer, size, 1));
14495: }
14496:
14497: /**
14498: * xmlSAXUserParseMemory:
14499: * @sax: a SAX handler
14500: * @user_data: The user data returned on SAX callbacks
14501: * @buffer: an in-memory XML document input
14502: * @size: the length of the XML document in bytes
14503: *
14504: * A better SAX parsing routine.
14505: * parse an XML in-memory buffer and call the given SAX handler routines.
14506: *
14507: * Returns 0 in case of success or a error number otherwise
14508: */
14509: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14510: const char *buffer, int size) {
14511: int ret = 0;
14512: xmlParserCtxtPtr ctxt;
14513:
14514: xmlInitParser();
14515:
14516: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14517: if (ctxt == NULL) return -1;
14518: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14519: xmlFree(ctxt->sax);
14520: ctxt->sax = sax;
14521: xmlDetectSAX2(ctxt);
14522:
14523: if (user_data != NULL)
14524: ctxt->userData = user_data;
14525:
14526: xmlParseDocument(ctxt);
1.1.1.3 ! misho 14527:
1.1 misho 14528: if (ctxt->wellFormed)
14529: ret = 0;
14530: else {
14531: if (ctxt->errNo != 0)
14532: ret = ctxt->errNo;
14533: else
14534: ret = -1;
14535: }
14536: if (sax != NULL)
14537: ctxt->sax = NULL;
14538: if (ctxt->myDoc != NULL) {
14539: xmlFreeDoc(ctxt->myDoc);
14540: ctxt->myDoc = NULL;
14541: }
14542: xmlFreeParserCtxt(ctxt);
1.1.1.3 ! misho 14543:
1.1 misho 14544: return ret;
14545: }
14546: #endif /* LIBXML_SAX1_ENABLED */
14547:
14548: /**
14549: * xmlCreateDocParserCtxt:
14550: * @cur: a pointer to an array of xmlChar
14551: *
14552: * Creates a parser context for an XML in-memory document.
14553: *
14554: * Returns the new parser context or NULL
14555: */
14556: xmlParserCtxtPtr
14557: xmlCreateDocParserCtxt(const xmlChar *cur) {
14558: int len;
14559:
14560: if (cur == NULL)
14561: return(NULL);
14562: len = xmlStrlen(cur);
14563: return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14564: }
14565:
14566: #ifdef LIBXML_SAX1_ENABLED
14567: /**
14568: * xmlSAXParseDoc:
14569: * @sax: the SAX handler block
14570: * @cur: a pointer to an array of xmlChar
14571: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14572: * documents
14573: *
14574: * parse an XML in-memory document and build a tree.
14575: * It use the given SAX function block to handle the parsing callback.
14576: * If sax is NULL, fallback to the default DOM tree building routines.
1.1.1.3 ! misho 14577: *
1.1 misho 14578: * Returns the resulting document tree
14579: */
14580:
14581: xmlDocPtr
14582: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14583: xmlDocPtr ret;
14584: xmlParserCtxtPtr ctxt;
14585: xmlSAXHandlerPtr oldsax = NULL;
14586:
14587: if (cur == NULL) return(NULL);
14588:
14589:
14590: ctxt = xmlCreateDocParserCtxt(cur);
14591: if (ctxt == NULL) return(NULL);
1.1.1.3 ! misho 14592: if (sax != NULL) {
1.1 misho 14593: oldsax = ctxt->sax;
14594: ctxt->sax = sax;
14595: ctxt->userData = NULL;
14596: }
14597: xmlDetectSAX2(ctxt);
14598:
14599: xmlParseDocument(ctxt);
14600: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14601: else {
14602: ret = NULL;
14603: xmlFreeDoc(ctxt->myDoc);
14604: ctxt->myDoc = NULL;
14605: }
14606: if (sax != NULL)
14607: ctxt->sax = oldsax;
14608: xmlFreeParserCtxt(ctxt);
1.1.1.3 ! misho 14609:
1.1 misho 14610: return(ret);
14611: }
14612:
14613: /**
14614: * xmlParseDoc:
14615: * @cur: a pointer to an array of xmlChar
14616: *
14617: * parse an XML in-memory document and build a tree.
1.1.1.3 ! misho 14618: *
1.1 misho 14619: * Returns the resulting document tree
14620: */
14621:
14622: xmlDocPtr
14623: xmlParseDoc(const xmlChar *cur) {
14624: return(xmlSAXParseDoc(NULL, cur, 0));
14625: }
14626: #endif /* LIBXML_SAX1_ENABLED */
14627:
14628: #ifdef LIBXML_LEGACY_ENABLED
14629: /************************************************************************
14630: * *
1.1.1.3 ! misho 14631: * Specific function to keep track of entities references *
! 14632: * and used by the XSLT debugger *
1.1 misho 14633: * *
14634: ************************************************************************/
14635:
14636: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14637:
14638: /**
14639: * xmlAddEntityReference:
14640: * @ent : A valid entity
14641: * @firstNode : A valid first node for children of entity
1.1.1.3 ! misho 14642: * @lastNode : A valid last node of children entity
1.1 misho 14643: *
14644: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14645: */
14646: static void
14647: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14648: xmlNodePtr lastNode)
14649: {
14650: if (xmlEntityRefFunc != NULL) {
14651: (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14652: }
14653: }
14654:
14655:
14656: /**
14657: * xmlSetEntityReferenceFunc:
14658: * @func: A valid function
14659: *
14660: * Set the function to call call back when a xml reference has been made
14661: */
14662: void
14663: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14664: {
14665: xmlEntityRefFunc = func;
14666: }
14667: #endif /* LIBXML_LEGACY_ENABLED */
14668:
14669: /************************************************************************
14670: * *
1.1.1.3 ! misho 14671: * Miscellaneous *
1.1 misho 14672: * *
14673: ************************************************************************/
14674:
14675: #ifdef LIBXML_XPATH_ENABLED
14676: #include <libxml/xpath.h>
14677: #endif
14678:
14679: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14680: static int xmlParserInitialized = 0;
14681:
14682: /**
14683: * xmlInitParser:
14684: *
14685: * Initialization function for the XML parser.
14686: * This is not reentrant. Call once before processing in case of
14687: * use in multithreaded programs.
14688: */
14689:
14690: void
14691: xmlInitParser(void) {
14692: if (xmlParserInitialized != 0)
14693: return;
14694:
14695: #ifdef LIBXML_THREAD_ENABLED
14696: __xmlGlobalInitMutexLock();
14697: if (xmlParserInitialized == 0) {
14698: #endif
14699: xmlInitThreads();
14700: xmlInitGlobals();
14701: if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14702: (xmlGenericError == NULL))
14703: initGenericErrorDefaultFunc(NULL);
14704: xmlInitMemory();
1.1.1.2 misho 14705: xmlInitializeDict();
1.1 misho 14706: xmlInitCharEncodingHandlers();
14707: xmlDefaultSAXHandlerInit();
14708: xmlRegisterDefaultInputCallbacks();
14709: #ifdef LIBXML_OUTPUT_ENABLED
14710: xmlRegisterDefaultOutputCallbacks();
14711: #endif /* LIBXML_OUTPUT_ENABLED */
14712: #ifdef LIBXML_HTML_ENABLED
14713: htmlInitAutoClose();
14714: htmlDefaultSAXHandlerInit();
14715: #endif
14716: #ifdef LIBXML_XPATH_ENABLED
14717: xmlXPathInit();
14718: #endif
14719: xmlParserInitialized = 1;
14720: #ifdef LIBXML_THREAD_ENABLED
14721: }
14722: __xmlGlobalInitMutexUnlock();
14723: #endif
14724: }
14725:
14726: /**
14727: * xmlCleanupParser:
14728: *
14729: * This function name is somewhat misleading. It does not clean up
14730: * parser state, it cleans up memory allocated by the library itself.
14731: * It is a cleanup function for the XML library. It tries to reclaim all
14732: * related global memory allocated for the library processing.
14733: * It doesn't deallocate any document related memory. One should
14734: * call xmlCleanupParser() only when the process has finished using
14735: * the library and all XML/HTML documents built with it.
14736: * See also xmlInitParser() which has the opposite function of preparing
14737: * the library for operations.
14738: *
14739: * WARNING: if your application is multithreaded or has plugin support
14740: * calling this may crash the application if another thread or
14741: * a plugin is still using libxml2. It's sometimes very hard to
14742: * guess if libxml2 is in use in the application, some libraries
14743: * or plugins may use it without notice. In case of doubt abstain
14744: * from calling this function or do it just before calling exit()
14745: * to avoid leak reports from valgrind !
14746: */
14747:
14748: void
14749: xmlCleanupParser(void) {
14750: if (!xmlParserInitialized)
14751: return;
14752:
14753: xmlCleanupCharEncodingHandlers();
14754: #ifdef LIBXML_CATALOG_ENABLED
14755: xmlCatalogCleanup();
14756: #endif
14757: xmlDictCleanup();
14758: xmlCleanupInputCallbacks();
14759: #ifdef LIBXML_OUTPUT_ENABLED
14760: xmlCleanupOutputCallbacks();
14761: #endif
14762: #ifdef LIBXML_SCHEMAS_ENABLED
14763: xmlSchemaCleanupTypes();
14764: xmlRelaxNGCleanupTypes();
14765: #endif
14766: xmlCleanupGlobals();
14767: xmlResetLastError();
14768: xmlCleanupThreads(); /* must be last if called not from the main thread */
14769: xmlCleanupMemory();
14770: xmlParserInitialized = 0;
14771: }
14772:
14773: /************************************************************************
14774: * *
14775: * New set (2.6.0) of simpler and more flexible APIs *
14776: * *
14777: ************************************************************************/
14778:
14779: /**
14780: * DICT_FREE:
14781: * @str: a string
14782: *
14783: * Free a string if it is not owned by the "dict" dictionnary in the
14784: * current scope
14785: */
14786: #define DICT_FREE(str) \
1.1.1.3 ! misho 14787: if ((str) && ((!dict) || \
1.1 misho 14788: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14789: xmlFree((char *)(str));
14790:
14791: /**
14792: * xmlCtxtReset:
14793: * @ctxt: an XML parser context
14794: *
14795: * Reset a parser context
14796: */
14797: void
14798: xmlCtxtReset(xmlParserCtxtPtr ctxt)
14799: {
14800: xmlParserInputPtr input;
14801: xmlDictPtr dict;
1.1.1.3 ! misho 14802:
1.1 misho 14803: if (ctxt == NULL)
14804: return;
14805:
14806: dict = ctxt->dict;
14807:
14808: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14809: xmlFreeInputStream(input);
14810: }
14811: ctxt->inputNr = 0;
14812: ctxt->input = NULL;
14813:
14814: ctxt->spaceNr = 0;
14815: if (ctxt->spaceTab != NULL) {
14816: ctxt->spaceTab[0] = -1;
14817: ctxt->space = &ctxt->spaceTab[0];
14818: } else {
14819: ctxt->space = NULL;
14820: }
14821:
14822:
14823: ctxt->nodeNr = 0;
14824: ctxt->node = NULL;
14825:
14826: ctxt->nameNr = 0;
14827: ctxt->name = NULL;
14828:
14829: DICT_FREE(ctxt->version);
14830: ctxt->version = NULL;
14831: DICT_FREE(ctxt->encoding);
14832: ctxt->encoding = NULL;
14833: DICT_FREE(ctxt->directory);
14834: ctxt->directory = NULL;
14835: DICT_FREE(ctxt->extSubURI);
14836: ctxt->extSubURI = NULL;
14837: DICT_FREE(ctxt->extSubSystem);
14838: ctxt->extSubSystem = NULL;
14839: if (ctxt->myDoc != NULL)
14840: xmlFreeDoc(ctxt->myDoc);
14841: ctxt->myDoc = NULL;
14842:
14843: ctxt->standalone = -1;
14844: ctxt->hasExternalSubset = 0;
14845: ctxt->hasPErefs = 0;
14846: ctxt->html = 0;
14847: ctxt->external = 0;
14848: ctxt->instate = XML_PARSER_START;
14849: ctxt->token = 0;
14850:
14851: ctxt->wellFormed = 1;
14852: ctxt->nsWellFormed = 1;
14853: ctxt->disableSAX = 0;
14854: ctxt->valid = 1;
14855: #if 0
14856: ctxt->vctxt.userData = ctxt;
14857: ctxt->vctxt.error = xmlParserValidityError;
14858: ctxt->vctxt.warning = xmlParserValidityWarning;
14859: #endif
14860: ctxt->record_info = 0;
14861: ctxt->nbChars = 0;
14862: ctxt->checkIndex = 0;
14863: ctxt->inSubset = 0;
14864: ctxt->errNo = XML_ERR_OK;
14865: ctxt->depth = 0;
14866: ctxt->charset = XML_CHAR_ENCODING_UTF8;
14867: ctxt->catalogs = NULL;
14868: ctxt->nbentities = 0;
14869: ctxt->sizeentities = 0;
1.1.1.3 ! misho 14870: ctxt->sizeentcopy = 0;
1.1 misho 14871: xmlInitNodeInfoSeq(&ctxt->node_seq);
14872:
14873: if (ctxt->attsDefault != NULL) {
14874: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14875: ctxt->attsDefault = NULL;
14876: }
14877: if (ctxt->attsSpecial != NULL) {
14878: xmlHashFree(ctxt->attsSpecial, NULL);
14879: ctxt->attsSpecial = NULL;
14880: }
14881:
14882: #ifdef LIBXML_CATALOG_ENABLED
14883: if (ctxt->catalogs != NULL)
14884: xmlCatalogFreeLocal(ctxt->catalogs);
14885: #endif
14886: if (ctxt->lastError.code != XML_ERR_OK)
14887: xmlResetError(&ctxt->lastError);
14888: }
14889:
14890: /**
14891: * xmlCtxtResetPush:
14892: * @ctxt: an XML parser context
14893: * @chunk: a pointer to an array of chars
14894: * @size: number of chars in the array
14895: * @filename: an optional file name or URI
14896: * @encoding: the document encoding, or NULL
14897: *
14898: * Reset a push parser context
14899: *
14900: * Returns 0 in case of success and 1 in case of error
14901: */
14902: int
14903: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14904: int size, const char *filename, const char *encoding)
14905: {
14906: xmlParserInputPtr inputStream;
14907: xmlParserInputBufferPtr buf;
14908: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14909:
14910: if (ctxt == NULL)
14911: return(1);
14912:
14913: if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14914: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14915:
14916: buf = xmlAllocParserInputBuffer(enc);
14917: if (buf == NULL)
14918: return(1);
14919:
14920: if (ctxt == NULL) {
14921: xmlFreeParserInputBuffer(buf);
14922: return(1);
14923: }
14924:
14925: xmlCtxtReset(ctxt);
14926:
14927: if (ctxt->pushTab == NULL) {
14928: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14929: sizeof(xmlChar *));
14930: if (ctxt->pushTab == NULL) {
14931: xmlErrMemory(ctxt, NULL);
14932: xmlFreeParserInputBuffer(buf);
14933: return(1);
14934: }
14935: }
14936:
14937: if (filename == NULL) {
14938: ctxt->directory = NULL;
14939: } else {
14940: ctxt->directory = xmlParserGetDirectory(filename);
14941: }
14942:
14943: inputStream = xmlNewInputStream(ctxt);
14944: if (inputStream == NULL) {
14945: xmlFreeParserInputBuffer(buf);
14946: return(1);
14947: }
14948:
14949: if (filename == NULL)
14950: inputStream->filename = NULL;
14951: else
14952: inputStream->filename = (char *)
14953: xmlCanonicPath((const xmlChar *) filename);
14954: inputStream->buf = buf;
1.1.1.3 ! misho 14955: xmlBufResetInput(buf->buffer, inputStream);
1.1 misho 14956:
14957: inputPush(ctxt, inputStream);
14958:
14959: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14960: (ctxt->input->buf != NULL)) {
1.1.1.3 ! misho 14961: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
! 14962: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 14963:
14964: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14965:
1.1.1.3 ! misho 14966: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 14967: #ifdef DEBUG_PUSH
14968: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14969: #endif
14970: }
14971:
14972: if (encoding != NULL) {
14973: xmlCharEncodingHandlerPtr hdlr;
14974:
14975: if (ctxt->encoding != NULL)
14976: xmlFree((xmlChar *) ctxt->encoding);
14977: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14978:
14979: hdlr = xmlFindCharEncodingHandler(encoding);
14980: if (hdlr != NULL) {
14981: xmlSwitchToEncoding(ctxt, hdlr);
14982: } else {
14983: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14984: "Unsupported encoding %s\n", BAD_CAST encoding);
14985: }
14986: } else if (enc != XML_CHAR_ENCODING_NONE) {
14987: xmlSwitchEncoding(ctxt, enc);
14988: }
14989:
14990: return(0);
14991: }
14992:
14993:
14994: /**
14995: * xmlCtxtUseOptionsInternal:
14996: * @ctxt: an XML parser context
14997: * @options: a combination of xmlParserOption
14998: * @encoding: the user provided encoding to use
14999: *
15000: * Applies the options to the parser context
15001: *
15002: * Returns 0 in case of success, the set of unknown or unimplemented options
15003: * in case of error.
15004: */
15005: static int
15006: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15007: {
15008: if (ctxt == NULL)
15009: return(-1);
15010: if (encoding != NULL) {
15011: if (ctxt->encoding != NULL)
15012: xmlFree((xmlChar *) ctxt->encoding);
15013: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15014: }
15015: if (options & XML_PARSE_RECOVER) {
15016: ctxt->recovery = 1;
15017: options -= XML_PARSE_RECOVER;
15018: ctxt->options |= XML_PARSE_RECOVER;
15019: } else
15020: ctxt->recovery = 0;
15021: if (options & XML_PARSE_DTDLOAD) {
15022: ctxt->loadsubset = XML_DETECT_IDS;
15023: options -= XML_PARSE_DTDLOAD;
15024: ctxt->options |= XML_PARSE_DTDLOAD;
15025: } else
15026: ctxt->loadsubset = 0;
15027: if (options & XML_PARSE_DTDATTR) {
15028: ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15029: options -= XML_PARSE_DTDATTR;
15030: ctxt->options |= XML_PARSE_DTDATTR;
15031: }
15032: if (options & XML_PARSE_NOENT) {
15033: ctxt->replaceEntities = 1;
15034: /* ctxt->loadsubset |= XML_DETECT_IDS; */
15035: options -= XML_PARSE_NOENT;
15036: ctxt->options |= XML_PARSE_NOENT;
15037: } else
15038: ctxt->replaceEntities = 0;
15039: if (options & XML_PARSE_PEDANTIC) {
15040: ctxt->pedantic = 1;
15041: options -= XML_PARSE_PEDANTIC;
15042: ctxt->options |= XML_PARSE_PEDANTIC;
15043: } else
15044: ctxt->pedantic = 0;
15045: if (options & XML_PARSE_NOBLANKS) {
15046: ctxt->keepBlanks = 0;
15047: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15048: options -= XML_PARSE_NOBLANKS;
15049: ctxt->options |= XML_PARSE_NOBLANKS;
15050: } else
15051: ctxt->keepBlanks = 1;
15052: if (options & XML_PARSE_DTDVALID) {
15053: ctxt->validate = 1;
15054: if (options & XML_PARSE_NOWARNING)
15055: ctxt->vctxt.warning = NULL;
15056: if (options & XML_PARSE_NOERROR)
15057: ctxt->vctxt.error = NULL;
15058: options -= XML_PARSE_DTDVALID;
15059: ctxt->options |= XML_PARSE_DTDVALID;
15060: } else
15061: ctxt->validate = 0;
15062: if (options & XML_PARSE_NOWARNING) {
15063: ctxt->sax->warning = NULL;
15064: options -= XML_PARSE_NOWARNING;
15065: }
15066: if (options & XML_PARSE_NOERROR) {
15067: ctxt->sax->error = NULL;
15068: ctxt->sax->fatalError = NULL;
15069: options -= XML_PARSE_NOERROR;
15070: }
15071: #ifdef LIBXML_SAX1_ENABLED
15072: if (options & XML_PARSE_SAX1) {
15073: ctxt->sax->startElement = xmlSAX2StartElement;
15074: ctxt->sax->endElement = xmlSAX2EndElement;
15075: ctxt->sax->startElementNs = NULL;
15076: ctxt->sax->endElementNs = NULL;
15077: ctxt->sax->initialized = 1;
15078: options -= XML_PARSE_SAX1;
15079: ctxt->options |= XML_PARSE_SAX1;
15080: }
15081: #endif /* LIBXML_SAX1_ENABLED */
15082: if (options & XML_PARSE_NODICT) {
15083: ctxt->dictNames = 0;
15084: options -= XML_PARSE_NODICT;
15085: ctxt->options |= XML_PARSE_NODICT;
15086: } else {
15087: ctxt->dictNames = 1;
15088: }
15089: if (options & XML_PARSE_NOCDATA) {
15090: ctxt->sax->cdataBlock = NULL;
15091: options -= XML_PARSE_NOCDATA;
15092: ctxt->options |= XML_PARSE_NOCDATA;
15093: }
15094: if (options & XML_PARSE_NSCLEAN) {
15095: ctxt->options |= XML_PARSE_NSCLEAN;
15096: options -= XML_PARSE_NSCLEAN;
15097: }
15098: if (options & XML_PARSE_NONET) {
15099: ctxt->options |= XML_PARSE_NONET;
15100: options -= XML_PARSE_NONET;
15101: }
15102: if (options & XML_PARSE_COMPACT) {
15103: ctxt->options |= XML_PARSE_COMPACT;
15104: options -= XML_PARSE_COMPACT;
15105: }
15106: if (options & XML_PARSE_OLD10) {
15107: ctxt->options |= XML_PARSE_OLD10;
15108: options -= XML_PARSE_OLD10;
15109: }
15110: if (options & XML_PARSE_NOBASEFIX) {
15111: ctxt->options |= XML_PARSE_NOBASEFIX;
15112: options -= XML_PARSE_NOBASEFIX;
15113: }
15114: if (options & XML_PARSE_HUGE) {
15115: ctxt->options |= XML_PARSE_HUGE;
15116: options -= XML_PARSE_HUGE;
1.1.1.3 ! misho 15117: if (ctxt->dict != NULL)
! 15118: xmlDictSetLimit(ctxt->dict, 0);
1.1 misho 15119: }
15120: if (options & XML_PARSE_OLDSAX) {
15121: ctxt->options |= XML_PARSE_OLDSAX;
15122: options -= XML_PARSE_OLDSAX;
15123: }
1.1.1.2 misho 15124: if (options & XML_PARSE_IGNORE_ENC) {
15125: ctxt->options |= XML_PARSE_IGNORE_ENC;
15126: options -= XML_PARSE_IGNORE_ENC;
15127: }
1.1.1.3 ! misho 15128: if (options & XML_PARSE_BIG_LINES) {
! 15129: ctxt->options |= XML_PARSE_BIG_LINES;
! 15130: options -= XML_PARSE_BIG_LINES;
! 15131: }
1.1 misho 15132: ctxt->linenumbers = 1;
15133: return (options);
15134: }
15135:
15136: /**
15137: * xmlCtxtUseOptions:
15138: * @ctxt: an XML parser context
15139: * @options: a combination of xmlParserOption
15140: *
15141: * Applies the options to the parser context
15142: *
15143: * Returns 0 in case of success, the set of unknown or unimplemented options
15144: * in case of error.
15145: */
15146: int
15147: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15148: {
15149: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15150: }
15151:
15152: /**
15153: * xmlDoRead:
15154: * @ctxt: an XML parser context
15155: * @URL: the base URL to use for the document
15156: * @encoding: the document encoding, or NULL
15157: * @options: a combination of xmlParserOption
15158: * @reuse: keep the context for reuse
15159: *
15160: * Common front-end for the xmlRead functions
15161: *
15162: * Returns the resulting document tree or NULL
15163: */
15164: static xmlDocPtr
15165: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15166: int options, int reuse)
15167: {
15168: xmlDocPtr ret;
15169:
15170: xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15171: if (encoding != NULL) {
15172: xmlCharEncodingHandlerPtr hdlr;
15173:
15174: hdlr = xmlFindCharEncodingHandler(encoding);
15175: if (hdlr != NULL)
15176: xmlSwitchToEncoding(ctxt, hdlr);
15177: }
15178: if ((URL != NULL) && (ctxt->input != NULL) &&
15179: (ctxt->input->filename == NULL))
15180: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15181: xmlParseDocument(ctxt);
15182: if ((ctxt->wellFormed) || ctxt->recovery)
15183: ret = ctxt->myDoc;
15184: else {
15185: ret = NULL;
15186: if (ctxt->myDoc != NULL) {
15187: xmlFreeDoc(ctxt->myDoc);
15188: }
15189: }
15190: ctxt->myDoc = NULL;
15191: if (!reuse) {
15192: xmlFreeParserCtxt(ctxt);
15193: }
15194:
15195: return (ret);
15196: }
15197:
15198: /**
15199: * xmlReadDoc:
15200: * @cur: a pointer to a zero terminated string
15201: * @URL: the base URL to use for the document
15202: * @encoding: the document encoding, or NULL
15203: * @options: a combination of xmlParserOption
15204: *
15205: * parse an XML in-memory document and build a tree.
1.1.1.3 ! misho 15206: *
1.1 misho 15207: * Returns the resulting document tree
15208: */
15209: xmlDocPtr
15210: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15211: {
15212: xmlParserCtxtPtr ctxt;
15213:
15214: if (cur == NULL)
15215: return (NULL);
15216:
15217: ctxt = xmlCreateDocParserCtxt(cur);
15218: if (ctxt == NULL)
15219: return (NULL);
15220: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15221: }
15222:
15223: /**
15224: * xmlReadFile:
15225: * @filename: a file or URL
15226: * @encoding: the document encoding, or NULL
15227: * @options: a combination of xmlParserOption
15228: *
15229: * parse an XML file from the filesystem or the network.
1.1.1.3 ! misho 15230: *
1.1 misho 15231: * Returns the resulting document tree
15232: */
15233: xmlDocPtr
15234: xmlReadFile(const char *filename, const char *encoding, int options)
15235: {
15236: xmlParserCtxtPtr ctxt;
15237:
15238: ctxt = xmlCreateURLParserCtxt(filename, options);
15239: if (ctxt == NULL)
15240: return (NULL);
15241: return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15242: }
15243:
15244: /**
15245: * xmlReadMemory:
15246: * @buffer: a pointer to a char array
15247: * @size: the size of the array
15248: * @URL: the base URL to use for the document
15249: * @encoding: the document encoding, or NULL
15250: * @options: a combination of xmlParserOption
15251: *
15252: * parse an XML in-memory document and build a tree.
1.1.1.3 ! misho 15253: *
1.1 misho 15254: * Returns the resulting document tree
15255: */
15256: xmlDocPtr
15257: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15258: {
15259: xmlParserCtxtPtr ctxt;
15260:
15261: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15262: if (ctxt == NULL)
15263: return (NULL);
15264: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15265: }
15266:
15267: /**
15268: * xmlReadFd:
15269: * @fd: an open file descriptor
15270: * @URL: the base URL to use for the document
15271: * @encoding: the document encoding, or NULL
15272: * @options: a combination of xmlParserOption
15273: *
15274: * parse an XML from a file descriptor and build a tree.
15275: * NOTE that the file descriptor will not be closed when the
15276: * reader is closed or reset.
1.1.1.3 ! misho 15277: *
1.1 misho 15278: * Returns the resulting document tree
15279: */
15280: xmlDocPtr
15281: xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15282: {
15283: xmlParserCtxtPtr ctxt;
15284: xmlParserInputBufferPtr input;
15285: xmlParserInputPtr stream;
15286:
15287: if (fd < 0)
15288: return (NULL);
15289:
15290: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15291: if (input == NULL)
15292: return (NULL);
15293: input->closecallback = NULL;
15294: ctxt = xmlNewParserCtxt();
15295: if (ctxt == NULL) {
15296: xmlFreeParserInputBuffer(input);
15297: return (NULL);
15298: }
15299: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15300: if (stream == NULL) {
15301: xmlFreeParserInputBuffer(input);
15302: xmlFreeParserCtxt(ctxt);
15303: return (NULL);
15304: }
15305: inputPush(ctxt, stream);
15306: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15307: }
15308:
15309: /**
15310: * xmlReadIO:
15311: * @ioread: an I/O read function
15312: * @ioclose: an I/O close function
15313: * @ioctx: an I/O handler
15314: * @URL: the base URL to use for the document
15315: * @encoding: the document encoding, or NULL
15316: * @options: a combination of xmlParserOption
15317: *
15318: * parse an XML document from I/O functions and source and build a tree.
1.1.1.2 misho 15319: *
1.1 misho 15320: * Returns the resulting document tree
15321: */
15322: xmlDocPtr
15323: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15324: void *ioctx, const char *URL, const char *encoding, int options)
15325: {
15326: xmlParserCtxtPtr ctxt;
15327: xmlParserInputBufferPtr input;
15328: xmlParserInputPtr stream;
15329:
15330: if (ioread == NULL)
15331: return (NULL);
15332:
15333: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15334: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 15335: if (input == NULL) {
15336: if (ioclose != NULL)
15337: ioclose(ioctx);
1.1 misho 15338: return (NULL);
1.1.1.2 misho 15339: }
1.1 misho 15340: ctxt = xmlNewParserCtxt();
15341: if (ctxt == NULL) {
15342: xmlFreeParserInputBuffer(input);
15343: return (NULL);
15344: }
15345: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15346: if (stream == NULL) {
15347: xmlFreeParserInputBuffer(input);
15348: xmlFreeParserCtxt(ctxt);
15349: return (NULL);
15350: }
15351: inputPush(ctxt, stream);
15352: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15353: }
15354:
15355: /**
15356: * xmlCtxtReadDoc:
15357: * @ctxt: an XML parser context
15358: * @cur: a pointer to a zero terminated string
15359: * @URL: the base URL to use for the document
15360: * @encoding: the document encoding, or NULL
15361: * @options: a combination of xmlParserOption
15362: *
15363: * parse an XML in-memory document and build a tree.
15364: * This reuses the existing @ctxt parser context
1.1.1.2 misho 15365: *
1.1 misho 15366: * Returns the resulting document tree
15367: */
15368: xmlDocPtr
15369: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15370: const char *URL, const char *encoding, int options)
15371: {
15372: xmlParserInputPtr stream;
15373:
15374: if (cur == NULL)
15375: return (NULL);
15376: if (ctxt == NULL)
15377: return (NULL);
15378:
15379: xmlCtxtReset(ctxt);
15380:
15381: stream = xmlNewStringInputStream(ctxt, cur);
15382: if (stream == NULL) {
15383: return (NULL);
15384: }
15385: inputPush(ctxt, stream);
15386: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15387: }
15388:
15389: /**
15390: * xmlCtxtReadFile:
15391: * @ctxt: an XML parser context
15392: * @filename: a file or URL
15393: * @encoding: the document encoding, or NULL
15394: * @options: a combination of xmlParserOption
15395: *
15396: * parse an XML file from the filesystem or the network.
15397: * This reuses the existing @ctxt parser context
1.1.1.3 ! misho 15398: *
1.1 misho 15399: * Returns the resulting document tree
15400: */
15401: xmlDocPtr
15402: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15403: const char *encoding, int options)
15404: {
15405: xmlParserInputPtr stream;
15406:
15407: if (filename == NULL)
15408: return (NULL);
15409: if (ctxt == NULL)
15410: return (NULL);
15411:
15412: xmlCtxtReset(ctxt);
15413:
15414: stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15415: if (stream == NULL) {
15416: return (NULL);
15417: }
15418: inputPush(ctxt, stream);
15419: return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15420: }
15421:
15422: /**
15423: * xmlCtxtReadMemory:
15424: * @ctxt: an XML parser context
15425: * @buffer: a pointer to a char array
15426: * @size: the size of the array
15427: * @URL: the base URL to use for the document
15428: * @encoding: the document encoding, or NULL
15429: * @options: a combination of xmlParserOption
15430: *
15431: * parse an XML in-memory document and build a tree.
15432: * This reuses the existing @ctxt parser context
1.1.1.3 ! misho 15433: *
1.1 misho 15434: * Returns the resulting document tree
15435: */
15436: xmlDocPtr
15437: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15438: const char *URL, const char *encoding, int options)
15439: {
15440: xmlParserInputBufferPtr input;
15441: xmlParserInputPtr stream;
15442:
15443: if (ctxt == NULL)
15444: return (NULL);
15445: if (buffer == NULL)
15446: return (NULL);
15447:
15448: xmlCtxtReset(ctxt);
15449:
15450: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15451: if (input == NULL) {
15452: return(NULL);
15453: }
15454:
15455: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15456: if (stream == NULL) {
15457: xmlFreeParserInputBuffer(input);
15458: return(NULL);
15459: }
15460:
15461: inputPush(ctxt, stream);
15462: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15463: }
15464:
15465: /**
15466: * xmlCtxtReadFd:
15467: * @ctxt: an XML parser context
15468: * @fd: an open file descriptor
15469: * @URL: the base URL to use for the document
15470: * @encoding: the document encoding, or NULL
15471: * @options: a combination of xmlParserOption
15472: *
15473: * parse an XML from a file descriptor and build a tree.
15474: * This reuses the existing @ctxt parser context
15475: * NOTE that the file descriptor will not be closed when the
15476: * reader is closed or reset.
1.1.1.3 ! misho 15477: *
1.1 misho 15478: * Returns the resulting document tree
15479: */
15480: xmlDocPtr
15481: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15482: const char *URL, const char *encoding, int options)
15483: {
15484: xmlParserInputBufferPtr input;
15485: xmlParserInputPtr stream;
15486:
15487: if (fd < 0)
15488: return (NULL);
15489: if (ctxt == NULL)
15490: return (NULL);
15491:
15492: xmlCtxtReset(ctxt);
15493:
15494:
15495: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15496: if (input == NULL)
15497: return (NULL);
15498: input->closecallback = NULL;
15499: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15500: if (stream == NULL) {
15501: xmlFreeParserInputBuffer(input);
15502: return (NULL);
15503: }
15504: inputPush(ctxt, stream);
15505: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15506: }
15507:
15508: /**
15509: * xmlCtxtReadIO:
15510: * @ctxt: an XML parser context
15511: * @ioread: an I/O read function
15512: * @ioclose: an I/O close function
15513: * @ioctx: an I/O handler
15514: * @URL: the base URL to use for the document
15515: * @encoding: the document encoding, or NULL
15516: * @options: a combination of xmlParserOption
15517: *
15518: * parse an XML document from I/O functions and source and build a tree.
15519: * This reuses the existing @ctxt parser context
1.1.1.2 misho 15520: *
1.1 misho 15521: * Returns the resulting document tree
15522: */
15523: xmlDocPtr
15524: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15525: xmlInputCloseCallback ioclose, void *ioctx,
15526: const char *URL,
15527: const char *encoding, int options)
15528: {
15529: xmlParserInputBufferPtr input;
15530: xmlParserInputPtr stream;
15531:
15532: if (ioread == NULL)
15533: return (NULL);
15534: if (ctxt == NULL)
15535: return (NULL);
15536:
15537: xmlCtxtReset(ctxt);
15538:
15539: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15540: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 15541: if (input == NULL) {
15542: if (ioclose != NULL)
15543: ioclose(ioctx);
1.1 misho 15544: return (NULL);
1.1.1.2 misho 15545: }
1.1 misho 15546: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15547: if (stream == NULL) {
15548: xmlFreeParserInputBuffer(input);
15549: return (NULL);
15550: }
15551: inputPush(ctxt, stream);
15552: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15553: }
15554:
15555: #define bottom_parser
15556: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>