Annotation of embedaddon/libxml2/parser.c, revision 1.1.1.1
1.1 misho 1: /*
2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
4: *
5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscellaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAX callbacks or as standalone functions using a preparsed
26: * document.
27: *
28: * See Copyright for the status of this software.
29: *
30: * daniel@veillard.com
31: */
32:
33: #define IN_LIBXML
34: #include "libxml.h"
35:
36: #if defined(WIN32) && !defined (__CYGWIN__)
37: #define XML_DIR_SEP '\\'
38: #else
39: #define XML_DIR_SEP '/'
40: #endif
41:
42: #include <stdlib.h>
43: #include <string.h>
44: #include <stdarg.h>
45: #include <libxml/xmlmemory.h>
46: #include <libxml/threads.h>
47: #include <libxml/globals.h>
48: #include <libxml/tree.h>
49: #include <libxml/parser.h>
50: #include <libxml/parserInternals.h>
51: #include <libxml/valid.h>
52: #include <libxml/entities.h>
53: #include <libxml/xmlerror.h>
54: #include <libxml/encoding.h>
55: #include <libxml/xmlIO.h>
56: #include <libxml/uri.h>
57: #ifdef LIBXML_CATALOG_ENABLED
58: #include <libxml/catalog.h>
59: #endif
60: #ifdef LIBXML_SCHEMAS_ENABLED
61: #include <libxml/xmlschemastypes.h>
62: #include <libxml/relaxng.h>
63: #endif
64: #ifdef HAVE_CTYPE_H
65: #include <ctype.h>
66: #endif
67: #ifdef HAVE_STDLIB_H
68: #include <stdlib.h>
69: #endif
70: #ifdef HAVE_SYS_STAT_H
71: #include <sys/stat.h>
72: #endif
73: #ifdef HAVE_FCNTL_H
74: #include <fcntl.h>
75: #endif
76: #ifdef HAVE_UNISTD_H
77: #include <unistd.h>
78: #endif
79: #ifdef HAVE_ZLIB_H
80: #include <zlib.h>
81: #endif
82:
83: static void
84: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85:
86: static xmlParserCtxtPtr
87: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88: const xmlChar *base, xmlParserCtxtPtr pctx);
89:
90: /************************************************************************
91: * *
92: * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93: * *
94: ************************************************************************/
95:
96: #define XML_PARSER_BIG_ENTITY 1000
97: #define XML_PARSER_LOT_ENTITY 5000
98:
99: /*
100: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101: * replacement over the size in byte of the input indicates that you have
102: * and eponential behaviour. A value of 10 correspond to at least 3 entity
103: * replacement per byte of input.
104: */
105: #define XML_PARSER_NON_LINEAR 10
106:
107: /*
108: * xmlParserEntityCheck
109: *
110: * Function to check non-linear entity expansion behaviour
111: * This is here to detect and stop exponential linear entity expansion
112: * This is not a limitation of the parser but a safety
113: * boundary feature. It can be disabled with the XML_PARSE_HUGE
114: * parser option.
115: */
116: static int
117: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118: xmlEntityPtr ent)
119: {
120: unsigned long consumed = 0;
121:
122: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123: return (0);
124: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125: return (1);
126: if (size != 0) {
127: /*
128: * Do the check based on the replacement size of the entity
129: */
130: if (size < XML_PARSER_BIG_ENTITY)
131: return(0);
132:
133: /*
134: * A limit on the amount of text data reasonably used
135: */
136: if (ctxt->input != NULL) {
137: consumed = ctxt->input->consumed +
138: (ctxt->input->cur - ctxt->input->base);
139: }
140: consumed += ctxt->sizeentities;
141:
142: if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144: return (0);
145: } else if (ent != NULL) {
146: /*
147: * use the number of parsed entities in the replacement
148: */
149: size = ent->checked;
150:
151: /*
152: * The amount of data parsed counting entities size only once
153: */
154: if (ctxt->input != NULL) {
155: consumed = ctxt->input->consumed +
156: (ctxt->input->cur - ctxt->input->base);
157: }
158: consumed += ctxt->sizeentities;
159:
160: /*
161: * Check the density of entities for the amount of data
162: * knowing an entity reference will take at least 3 bytes
163: */
164: if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165: return (0);
166: } else {
167: /*
168: * strange we got no data for checking just return
169: */
170: return (0);
171: }
172:
173: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174: return (1);
175: }
176:
177: /**
178: * xmlParserMaxDepth:
179: *
180: * arbitrary depth limit for the XML documents that we allow to
181: * process. This is not a limitation of the parser but a safety
182: * boundary feature. It can be disabled with the XML_PARSE_HUGE
183: * parser option.
184: */
185: unsigned int xmlParserMaxDepth = 256;
186:
187:
188:
189: #define SAX2 1
190: #define XML_PARSER_BIG_BUFFER_SIZE 300
191: #define XML_PARSER_BUFFER_SIZE 100
192: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193:
194: /*
195: * List of XML prefixed PI allowed by W3C specs
196: */
197:
198: static const char *xmlW3CPIs[] = {
199: "xml-stylesheet",
200: NULL
201: };
202:
203:
204: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206: const xmlChar **str);
207:
208: static xmlParserErrors
209: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210: xmlSAXHandlerPtr sax,
211: void *user_data, int depth, const xmlChar *URL,
212: const xmlChar *ID, xmlNodePtr *list);
213:
214: static int
215: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216: const char *encoding);
217: #ifdef LIBXML_LEGACY_ENABLED
218: static void
219: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220: xmlNodePtr lastNode);
221: #endif /* LIBXML_LEGACY_ENABLED */
222:
223: static xmlParserErrors
224: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225: const xmlChar *string, void *user_data, xmlNodePtr *lst);
226:
227: static int
228: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229:
230: /************************************************************************
231: * *
232: * Some factorized error routines *
233: * *
234: ************************************************************************/
235:
236: /**
237: * xmlErrAttributeDup:
238: * @ctxt: an XML parser context
239: * @prefix: the attribute prefix
240: * @localname: the attribute localname
241: *
242: * Handle a redefinition of attribute error
243: */
244: static void
245: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246: const xmlChar * localname)
247: {
248: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249: (ctxt->instate == XML_PARSER_EOF))
250: return;
251: if (ctxt != NULL)
252: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253:
254: if (prefix == NULL)
255: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257: (const char *) localname, NULL, NULL, 0, 0,
258: "Attribute %s redefined\n", localname);
259: else
260: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262: (const char *) prefix, (const char *) localname,
263: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264: localname);
265: if (ctxt != NULL) {
266: ctxt->wellFormed = 0;
267: if (ctxt->recovery == 0)
268: ctxt->disableSAX = 1;
269: }
270: }
271:
272: /**
273: * xmlFatalErr:
274: * @ctxt: an XML parser context
275: * @error: the error number
276: * @extra: extra information string
277: *
278: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279: */
280: static void
281: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
282: {
283: const char *errmsg;
284:
285: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286: (ctxt->instate == XML_PARSER_EOF))
287: return;
288: switch (error) {
289: case XML_ERR_INVALID_HEX_CHARREF:
290: errmsg = "CharRef: invalid hexadecimal value\n";
291: break;
292: case XML_ERR_INVALID_DEC_CHARREF:
293: errmsg = "CharRef: invalid decimal value\n";
294: break;
295: case XML_ERR_INVALID_CHARREF:
296: errmsg = "CharRef: invalid value\n";
297: break;
298: case XML_ERR_INTERNAL_ERROR:
299: errmsg = "internal error";
300: break;
301: case XML_ERR_PEREF_AT_EOF:
302: errmsg = "PEReference at end of document\n";
303: break;
304: case XML_ERR_PEREF_IN_PROLOG:
305: errmsg = "PEReference in prolog\n";
306: break;
307: case XML_ERR_PEREF_IN_EPILOG:
308: errmsg = "PEReference in epilog\n";
309: break;
310: case XML_ERR_PEREF_NO_NAME:
311: errmsg = "PEReference: no name\n";
312: break;
313: case XML_ERR_PEREF_SEMICOL_MISSING:
314: errmsg = "PEReference: expecting ';'\n";
315: break;
316: case XML_ERR_ENTITY_LOOP:
317: errmsg = "Detected an entity reference loop\n";
318: break;
319: case XML_ERR_ENTITY_NOT_STARTED:
320: errmsg = "EntityValue: \" or ' expected\n";
321: break;
322: case XML_ERR_ENTITY_PE_INTERNAL:
323: errmsg = "PEReferences forbidden in internal subset\n";
324: break;
325: case XML_ERR_ENTITY_NOT_FINISHED:
326: errmsg = "EntityValue: \" or ' expected\n";
327: break;
328: case XML_ERR_ATTRIBUTE_NOT_STARTED:
329: errmsg = "AttValue: \" or ' expected\n";
330: break;
331: case XML_ERR_LT_IN_ATTRIBUTE:
332: errmsg = "Unescaped '<' not allowed in attributes values\n";
333: break;
334: case XML_ERR_LITERAL_NOT_STARTED:
335: errmsg = "SystemLiteral \" or ' expected\n";
336: break;
337: case XML_ERR_LITERAL_NOT_FINISHED:
338: errmsg = "Unfinished System or Public ID \" or ' expected\n";
339: break;
340: case XML_ERR_MISPLACED_CDATA_END:
341: errmsg = "Sequence ']]>' not allowed in content\n";
342: break;
343: case XML_ERR_URI_REQUIRED:
344: errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345: break;
346: case XML_ERR_PUBID_REQUIRED:
347: errmsg = "PUBLIC, the Public Identifier is missing\n";
348: break;
349: case XML_ERR_HYPHEN_IN_COMMENT:
350: errmsg = "Comment must not contain '--' (double-hyphen)\n";
351: break;
352: case XML_ERR_PI_NOT_STARTED:
353: errmsg = "xmlParsePI : no target name\n";
354: break;
355: case XML_ERR_RESERVED_XML_NAME:
356: errmsg = "Invalid PI name\n";
357: break;
358: case XML_ERR_NOTATION_NOT_STARTED:
359: errmsg = "NOTATION: Name expected here\n";
360: break;
361: case XML_ERR_NOTATION_NOT_FINISHED:
362: errmsg = "'>' required to close NOTATION declaration\n";
363: break;
364: case XML_ERR_VALUE_REQUIRED:
365: errmsg = "Entity value required\n";
366: break;
367: case XML_ERR_URI_FRAGMENT:
368: errmsg = "Fragment not allowed";
369: break;
370: case XML_ERR_ATTLIST_NOT_STARTED:
371: errmsg = "'(' required to start ATTLIST enumeration\n";
372: break;
373: case XML_ERR_NMTOKEN_REQUIRED:
374: errmsg = "NmToken expected in ATTLIST enumeration\n";
375: break;
376: case XML_ERR_ATTLIST_NOT_FINISHED:
377: errmsg = "')' required to finish ATTLIST enumeration\n";
378: break;
379: case XML_ERR_MIXED_NOT_STARTED:
380: errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381: break;
382: case XML_ERR_PCDATA_REQUIRED:
383: errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384: break;
385: case XML_ERR_ELEMCONTENT_NOT_STARTED:
386: errmsg = "ContentDecl : Name or '(' expected\n";
387: break;
388: case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389: errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390: break;
391: case XML_ERR_PEREF_IN_INT_SUBSET:
392: errmsg =
393: "PEReference: forbidden within markup decl in internal subset\n";
394: break;
395: case XML_ERR_GT_REQUIRED:
396: errmsg = "expected '>'\n";
397: break;
398: case XML_ERR_CONDSEC_INVALID:
399: errmsg = "XML conditional section '[' expected\n";
400: break;
401: case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402: errmsg = "Content error in the external subset\n";
403: break;
404: case XML_ERR_CONDSEC_INVALID_KEYWORD:
405: errmsg =
406: "conditional section INCLUDE or IGNORE keyword expected\n";
407: break;
408: case XML_ERR_CONDSEC_NOT_FINISHED:
409: errmsg = "XML conditional section not closed\n";
410: break;
411: case XML_ERR_XMLDECL_NOT_STARTED:
412: errmsg = "Text declaration '<?xml' required\n";
413: break;
414: case XML_ERR_XMLDECL_NOT_FINISHED:
415: errmsg = "parsing XML declaration: '?>' expected\n";
416: break;
417: case XML_ERR_EXT_ENTITY_STANDALONE:
418: errmsg = "external parsed entities cannot be standalone\n";
419: break;
420: case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421: errmsg = "EntityRef: expecting ';'\n";
422: break;
423: case XML_ERR_DOCTYPE_NOT_FINISHED:
424: errmsg = "DOCTYPE improperly terminated\n";
425: break;
426: case XML_ERR_LTSLASH_REQUIRED:
427: errmsg = "EndTag: '</' not found\n";
428: break;
429: case XML_ERR_EQUAL_REQUIRED:
430: errmsg = "expected '='\n";
431: break;
432: case XML_ERR_STRING_NOT_CLOSED:
433: errmsg = "String not closed expecting \" or '\n";
434: break;
435: case XML_ERR_STRING_NOT_STARTED:
436: errmsg = "String not started expecting ' or \"\n";
437: break;
438: case XML_ERR_ENCODING_NAME:
439: errmsg = "Invalid XML encoding name\n";
440: break;
441: case XML_ERR_STANDALONE_VALUE:
442: errmsg = "standalone accepts only 'yes' or 'no'\n";
443: break;
444: case XML_ERR_DOCUMENT_EMPTY:
445: errmsg = "Document is empty\n";
446: break;
447: case XML_ERR_DOCUMENT_END:
448: errmsg = "Extra content at the end of the document\n";
449: break;
450: case XML_ERR_NOT_WELL_BALANCED:
451: errmsg = "chunk is not well balanced\n";
452: break;
453: case XML_ERR_EXTRA_CONTENT:
454: errmsg = "extra content at the end of well balanced chunk\n";
455: break;
456: case XML_ERR_VERSION_MISSING:
457: errmsg = "Malformed declaration expecting version\n";
458: break;
459: #if 0
460: case:
461: errmsg = "\n";
462: break;
463: #endif
464: default:
465: errmsg = "Unregistered error message\n";
466: }
467: if (ctxt != NULL)
468: ctxt->errNo = error;
469: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471: info);
472: if (ctxt != NULL) {
473: ctxt->wellFormed = 0;
474: if (ctxt->recovery == 0)
475: ctxt->disableSAX = 1;
476: }
477: }
478:
479: /**
480: * xmlFatalErrMsg:
481: * @ctxt: an XML parser context
482: * @error: the error number
483: * @msg: the error message
484: *
485: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486: */
487: static void
488: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489: const char *msg)
490: {
491: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492: (ctxt->instate == XML_PARSER_EOF))
493: return;
494: if (ctxt != NULL)
495: ctxt->errNo = error;
496: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498: if (ctxt != NULL) {
499: ctxt->wellFormed = 0;
500: if (ctxt->recovery == 0)
501: ctxt->disableSAX = 1;
502: }
503: }
504:
505: /**
506: * xmlWarningMsg:
507: * @ctxt: an XML parser context
508: * @error: the error number
509: * @msg: the error message
510: * @str1: extra data
511: * @str2: extra data
512: *
513: * Handle a warning.
514: */
515: static void
516: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517: const char *msg, const xmlChar *str1, const xmlChar *str2)
518: {
519: xmlStructuredErrorFunc schannel = NULL;
520:
521: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522: (ctxt->instate == XML_PARSER_EOF))
523: return;
524: if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525: (ctxt->sax->initialized == XML_SAX2_MAGIC))
526: schannel = ctxt->sax->serror;
527: if (ctxt != NULL) {
528: __xmlRaiseError(schannel,
529: (ctxt->sax) ? ctxt->sax->warning : NULL,
530: ctxt->userData,
531: ctxt, NULL, XML_FROM_PARSER, error,
532: XML_ERR_WARNING, NULL, 0,
533: (const char *) str1, (const char *) str2, NULL, 0, 0,
534: msg, (const char *) str1, (const char *) str2);
535: } else {
536: __xmlRaiseError(schannel, NULL, NULL,
537: ctxt, NULL, XML_FROM_PARSER, error,
538: XML_ERR_WARNING, NULL, 0,
539: (const char *) str1, (const char *) str2, NULL, 0, 0,
540: msg, (const char *) str1, (const char *) str2);
541: }
542: }
543:
544: /**
545: * xmlValidityError:
546: * @ctxt: an XML parser context
547: * @error: the error number
548: * @msg: the error message
549: * @str1: extra data
550: *
551: * Handle a validity error.
552: */
553: static void
554: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555: const char *msg, const xmlChar *str1, const xmlChar *str2)
556: {
557: xmlStructuredErrorFunc schannel = NULL;
558:
559: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560: (ctxt->instate == XML_PARSER_EOF))
561: return;
562: if (ctxt != NULL) {
563: ctxt->errNo = error;
564: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565: schannel = ctxt->sax->serror;
566: }
567: if (ctxt != NULL) {
568: __xmlRaiseError(schannel,
569: ctxt->vctxt.error, ctxt->vctxt.userData,
570: ctxt, NULL, XML_FROM_DTD, error,
571: XML_ERR_ERROR, NULL, 0, (const char *) str1,
572: (const char *) str2, NULL, 0, 0,
573: msg, (const char *) str1, (const char *) str2);
574: ctxt->valid = 0;
575: } else {
576: __xmlRaiseError(schannel, NULL, NULL,
577: ctxt, NULL, XML_FROM_DTD, error,
578: XML_ERR_ERROR, NULL, 0, (const char *) str1,
579: (const char *) str2, NULL, 0, 0,
580: msg, (const char *) str1, (const char *) str2);
581: }
582: }
583:
584: /**
585: * xmlFatalErrMsgInt:
586: * @ctxt: an XML parser context
587: * @error: the error number
588: * @msg: the error message
589: * @val: an integer value
590: *
591: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592: */
593: static void
594: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595: const char *msg, int val)
596: {
597: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598: (ctxt->instate == XML_PARSER_EOF))
599: return;
600: if (ctxt != NULL)
601: ctxt->errNo = error;
602: __xmlRaiseError(NULL, NULL, NULL,
603: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605: if (ctxt != NULL) {
606: ctxt->wellFormed = 0;
607: if (ctxt->recovery == 0)
608: ctxt->disableSAX = 1;
609: }
610: }
611:
612: /**
613: * xmlFatalErrMsgStrIntStr:
614: * @ctxt: an XML parser context
615: * @error: the error number
616: * @msg: the error message
617: * @str1: an string info
618: * @val: an integer value
619: * @str2: an string info
620: *
621: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622: */
623: static void
624: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625: const char *msg, const xmlChar *str1, int val,
626: const xmlChar *str2)
627: {
628: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629: (ctxt->instate == XML_PARSER_EOF))
630: return;
631: if (ctxt != NULL)
632: ctxt->errNo = error;
633: __xmlRaiseError(NULL, NULL, NULL,
634: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635: NULL, 0, (const char *) str1, (const char *) str2,
636: NULL, val, 0, msg, str1, val, str2);
637: if (ctxt != NULL) {
638: ctxt->wellFormed = 0;
639: if (ctxt->recovery == 0)
640: ctxt->disableSAX = 1;
641: }
642: }
643:
644: /**
645: * xmlFatalErrMsgStr:
646: * @ctxt: an XML parser context
647: * @error: the error number
648: * @msg: the error message
649: * @val: a string value
650: *
651: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652: */
653: static void
654: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655: const char *msg, const xmlChar * val)
656: {
657: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658: (ctxt->instate == XML_PARSER_EOF))
659: return;
660: if (ctxt != NULL)
661: ctxt->errNo = error;
662: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663: XML_FROM_PARSER, error, XML_ERR_FATAL,
664: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665: val);
666: if (ctxt != NULL) {
667: ctxt->wellFormed = 0;
668: if (ctxt->recovery == 0)
669: ctxt->disableSAX = 1;
670: }
671: }
672:
673: /**
674: * xmlErrMsgStr:
675: * @ctxt: an XML parser context
676: * @error: the error number
677: * @msg: the error message
678: * @val: a string value
679: *
680: * Handle a non fatal parser error
681: */
682: static void
683: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684: const char *msg, const xmlChar * val)
685: {
686: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687: (ctxt->instate == XML_PARSER_EOF))
688: return;
689: if (ctxt != NULL)
690: ctxt->errNo = error;
691: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692: XML_FROM_PARSER, error, XML_ERR_ERROR,
693: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694: val);
695: }
696:
697: /**
698: * xmlNsErr:
699: * @ctxt: an XML parser context
700: * @error: the error number
701: * @msg: the message
702: * @info1: extra information string
703: * @info2: extra information string
704: *
705: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706: */
707: static void
708: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709: const char *msg,
710: const xmlChar * info1, const xmlChar * info2,
711: const xmlChar * info3)
712: {
713: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714: (ctxt->instate == XML_PARSER_EOF))
715: return;
716: if (ctxt != NULL)
717: ctxt->errNo = error;
718: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719: XML_ERR_ERROR, NULL, 0, (const char *) info1,
720: (const char *) info2, (const char *) info3, 0, 0, msg,
721: info1, info2, info3);
722: if (ctxt != NULL)
723: ctxt->nsWellFormed = 0;
724: }
725:
726: /**
727: * xmlNsWarn
728: * @ctxt: an XML parser context
729: * @error: the error number
730: * @msg: the message
731: * @info1: extra information string
732: * @info2: extra information string
733: *
734: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735: */
736: static void
737: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738: const char *msg,
739: const xmlChar * info1, const xmlChar * info2,
740: const xmlChar * info3)
741: {
742: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743: (ctxt->instate == XML_PARSER_EOF))
744: return;
745: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746: XML_ERR_WARNING, NULL, 0, (const char *) info1,
747: (const char *) info2, (const char *) info3, 0, 0, msg,
748: info1, info2, info3);
749: }
750:
751: /************************************************************************
752: * *
753: * Library wide options *
754: * *
755: ************************************************************************/
756:
757: /**
758: * xmlHasFeature:
759: * @feature: the feature to be examined
760: *
761: * Examines if the library has been compiled with a given feature.
762: *
763: * Returns a non-zero value if the feature exist, otherwise zero.
764: * Returns zero (0) if the feature does not exist or an unknown
765: * unknown feature is requested, non-zero otherwise.
766: */
767: int
768: xmlHasFeature(xmlFeature feature)
769: {
770: switch (feature) {
771: case XML_WITH_THREAD:
772: #ifdef LIBXML_THREAD_ENABLED
773: return(1);
774: #else
775: return(0);
776: #endif
777: case XML_WITH_TREE:
778: #ifdef LIBXML_TREE_ENABLED
779: return(1);
780: #else
781: return(0);
782: #endif
783: case XML_WITH_OUTPUT:
784: #ifdef LIBXML_OUTPUT_ENABLED
785: return(1);
786: #else
787: return(0);
788: #endif
789: case XML_WITH_PUSH:
790: #ifdef LIBXML_PUSH_ENABLED
791: return(1);
792: #else
793: return(0);
794: #endif
795: case XML_WITH_READER:
796: #ifdef LIBXML_READER_ENABLED
797: return(1);
798: #else
799: return(0);
800: #endif
801: case XML_WITH_PATTERN:
802: #ifdef LIBXML_PATTERN_ENABLED
803: return(1);
804: #else
805: return(0);
806: #endif
807: case XML_WITH_WRITER:
808: #ifdef LIBXML_WRITER_ENABLED
809: return(1);
810: #else
811: return(0);
812: #endif
813: case XML_WITH_SAX1:
814: #ifdef LIBXML_SAX1_ENABLED
815: return(1);
816: #else
817: return(0);
818: #endif
819: case XML_WITH_FTP:
820: #ifdef LIBXML_FTP_ENABLED
821: return(1);
822: #else
823: return(0);
824: #endif
825: case XML_WITH_HTTP:
826: #ifdef LIBXML_HTTP_ENABLED
827: return(1);
828: #else
829: return(0);
830: #endif
831: case XML_WITH_VALID:
832: #ifdef LIBXML_VALID_ENABLED
833: return(1);
834: #else
835: return(0);
836: #endif
837: case XML_WITH_HTML:
838: #ifdef LIBXML_HTML_ENABLED
839: return(1);
840: #else
841: return(0);
842: #endif
843: case XML_WITH_LEGACY:
844: #ifdef LIBXML_LEGACY_ENABLED
845: return(1);
846: #else
847: return(0);
848: #endif
849: case XML_WITH_C14N:
850: #ifdef LIBXML_C14N_ENABLED
851: return(1);
852: #else
853: return(0);
854: #endif
855: case XML_WITH_CATALOG:
856: #ifdef LIBXML_CATALOG_ENABLED
857: return(1);
858: #else
859: return(0);
860: #endif
861: case XML_WITH_XPATH:
862: #ifdef LIBXML_XPATH_ENABLED
863: return(1);
864: #else
865: return(0);
866: #endif
867: case XML_WITH_XPTR:
868: #ifdef LIBXML_XPTR_ENABLED
869: return(1);
870: #else
871: return(0);
872: #endif
873: case XML_WITH_XINCLUDE:
874: #ifdef LIBXML_XINCLUDE_ENABLED
875: return(1);
876: #else
877: return(0);
878: #endif
879: case XML_WITH_ICONV:
880: #ifdef LIBXML_ICONV_ENABLED
881: return(1);
882: #else
883: return(0);
884: #endif
885: case XML_WITH_ISO8859X:
886: #ifdef LIBXML_ISO8859X_ENABLED
887: return(1);
888: #else
889: return(0);
890: #endif
891: case XML_WITH_UNICODE:
892: #ifdef LIBXML_UNICODE_ENABLED
893: return(1);
894: #else
895: return(0);
896: #endif
897: case XML_WITH_REGEXP:
898: #ifdef LIBXML_REGEXP_ENABLED
899: return(1);
900: #else
901: return(0);
902: #endif
903: case XML_WITH_AUTOMATA:
904: #ifdef LIBXML_AUTOMATA_ENABLED
905: return(1);
906: #else
907: return(0);
908: #endif
909: case XML_WITH_EXPR:
910: #ifdef LIBXML_EXPR_ENABLED
911: return(1);
912: #else
913: return(0);
914: #endif
915: case XML_WITH_SCHEMAS:
916: #ifdef LIBXML_SCHEMAS_ENABLED
917: return(1);
918: #else
919: return(0);
920: #endif
921: case XML_WITH_SCHEMATRON:
922: #ifdef LIBXML_SCHEMATRON_ENABLED
923: return(1);
924: #else
925: return(0);
926: #endif
927: case XML_WITH_MODULES:
928: #ifdef LIBXML_MODULES_ENABLED
929: return(1);
930: #else
931: return(0);
932: #endif
933: case XML_WITH_DEBUG:
934: #ifdef LIBXML_DEBUG_ENABLED
935: return(1);
936: #else
937: return(0);
938: #endif
939: case XML_WITH_DEBUG_MEM:
940: #ifdef DEBUG_MEMORY_LOCATION
941: return(1);
942: #else
943: return(0);
944: #endif
945: case XML_WITH_DEBUG_RUN:
946: #ifdef LIBXML_DEBUG_RUNTIME
947: return(1);
948: #else
949: return(0);
950: #endif
951: case XML_WITH_ZLIB:
952: #ifdef LIBXML_ZLIB_ENABLED
953: return(1);
954: #else
955: return(0);
956: #endif
957: case XML_WITH_ICU:
958: #ifdef LIBXML_ICU_ENABLED
959: return(1);
960: #else
961: return(0);
962: #endif
963: default:
964: break;
965: }
966: return(0);
967: }
968:
969: /************************************************************************
970: * *
971: * SAX2 defaulted attributes handling *
972: * *
973: ************************************************************************/
974:
975: /**
976: * xmlDetectSAX2:
977: * @ctxt: an XML parser context
978: *
979: * Do the SAX2 detection and specific intialization
980: */
981: static void
982: xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983: if (ctxt == NULL) return;
984: #ifdef LIBXML_SAX1_ENABLED
985: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986: ((ctxt->sax->startElementNs != NULL) ||
987: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
988: #else
989: ctxt->sax2 = 1;
990: #endif /* LIBXML_SAX1_ENABLED */
991:
992: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996: (ctxt->str_xml_ns == NULL)) {
997: xmlErrMemory(ctxt, NULL);
998: }
999: }
1000:
1001: typedef struct _xmlDefAttrs xmlDefAttrs;
1002: typedef xmlDefAttrs *xmlDefAttrsPtr;
1003: struct _xmlDefAttrs {
1004: int nbAttrs; /* number of defaulted attributes on that element */
1005: int maxAttrs; /* the size of the array */
1006: const xmlChar *values[5]; /* array of localname/prefix/values/external */
1007: };
1008:
1009: /**
1010: * xmlAttrNormalizeSpace:
1011: * @src: the source string
1012: * @dst: the target string
1013: *
1014: * Normalize the space in non CDATA attribute values:
1015: * If the attribute type is not CDATA, then the XML processor MUST further
1016: * process the normalized attribute value by discarding any leading and
1017: * trailing space (#x20) characters, and by replacing sequences of space
1018: * (#x20) characters by a single space (#x20) character.
1019: * Note that the size of dst need to be at least src, and if one doesn't need
1020: * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021: * passing src as dst is just fine.
1022: *
1023: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024: * is needed.
1025: */
1026: static xmlChar *
1027: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028: {
1029: if ((src == NULL) || (dst == NULL))
1030: return(NULL);
1031:
1032: while (*src == 0x20) src++;
1033: while (*src != 0) {
1034: if (*src == 0x20) {
1035: while (*src == 0x20) src++;
1036: if (*src != 0)
1037: *dst++ = 0x20;
1038: } else {
1039: *dst++ = *src++;
1040: }
1041: }
1042: *dst = 0;
1043: if (dst == src)
1044: return(NULL);
1045: return(dst);
1046: }
1047:
1048: /**
1049: * xmlAttrNormalizeSpace2:
1050: * @src: the source string
1051: *
1052: * Normalize the space in non CDATA attribute values, a slightly more complex
1053: * front end to avoid allocation problems when running on attribute values
1054: * coming from the input.
1055: *
1056: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057: * is needed.
1058: */
1059: static const xmlChar *
1060: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1061: {
1062: int i;
1063: int remove_head = 0;
1064: int need_realloc = 0;
1065: const xmlChar *cur;
1066:
1067: if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068: return(NULL);
1069: i = *len;
1070: if (i <= 0)
1071: return(NULL);
1072:
1073: cur = src;
1074: while (*cur == 0x20) {
1075: cur++;
1076: remove_head++;
1077: }
1078: while (*cur != 0) {
1079: if (*cur == 0x20) {
1080: cur++;
1081: if ((*cur == 0x20) || (*cur == 0)) {
1082: need_realloc = 1;
1083: break;
1084: }
1085: } else
1086: cur++;
1087: }
1088: if (need_realloc) {
1089: xmlChar *ret;
1090:
1091: ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092: if (ret == NULL) {
1093: xmlErrMemory(ctxt, NULL);
1094: return(NULL);
1095: }
1096: xmlAttrNormalizeSpace(ret, ret);
1097: *len = (int) strlen((const char *)ret);
1098: return(ret);
1099: } else if (remove_head) {
1100: *len -= remove_head;
1101: memmove(src, src + remove_head, 1 + *len);
1102: return(src);
1103: }
1104: return(NULL);
1105: }
1106:
1107: /**
1108: * xmlAddDefAttrs:
1109: * @ctxt: an XML parser context
1110: * @fullname: the element fullname
1111: * @fullattr: the attribute fullname
1112: * @value: the attribute value
1113: *
1114: * Add a defaulted attribute for an element
1115: */
1116: static void
1117: xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118: const xmlChar *fullname,
1119: const xmlChar *fullattr,
1120: const xmlChar *value) {
1121: xmlDefAttrsPtr defaults;
1122: int len;
1123: const xmlChar *name;
1124: const xmlChar *prefix;
1125:
1126: /*
1127: * Allows to detect attribute redefinitions
1128: */
1129: if (ctxt->attsSpecial != NULL) {
1130: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131: return;
1132: }
1133:
1134: if (ctxt->attsDefault == NULL) {
1135: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136: if (ctxt->attsDefault == NULL)
1137: goto mem_error;
1138: }
1139:
1140: /*
1141: * split the element name into prefix:localname , the string found
1142: * are within the DTD and then not associated to namespace names.
1143: */
1144: name = xmlSplitQName3(fullname, &len);
1145: if (name == NULL) {
1146: name = xmlDictLookup(ctxt->dict, fullname, -1);
1147: prefix = NULL;
1148: } else {
1149: name = xmlDictLookup(ctxt->dict, name, -1);
1150: prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151: }
1152:
1153: /*
1154: * make sure there is some storage
1155: */
1156: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157: if (defaults == NULL) {
1158: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159: (4 * 5) * sizeof(const xmlChar *));
1160: if (defaults == NULL)
1161: goto mem_error;
1162: defaults->nbAttrs = 0;
1163: defaults->maxAttrs = 4;
1164: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165: defaults, NULL) < 0) {
1166: xmlFree(defaults);
1167: goto mem_error;
1168: }
1169: } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170: xmlDefAttrsPtr temp;
1171:
1172: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174: if (temp == NULL)
1175: goto mem_error;
1176: defaults = temp;
1177: defaults->maxAttrs *= 2;
1178: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179: defaults, NULL) < 0) {
1180: xmlFree(defaults);
1181: goto mem_error;
1182: }
1183: }
1184:
1185: /*
1186: * Split the element name into prefix:localname , the string found
1187: * are within the DTD and hen not associated to namespace names.
1188: */
1189: name = xmlSplitQName3(fullattr, &len);
1190: if (name == NULL) {
1191: name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192: prefix = NULL;
1193: } else {
1194: name = xmlDictLookup(ctxt->dict, name, -1);
1195: prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196: }
1197:
1198: defaults->values[5 * defaults->nbAttrs] = name;
1199: defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200: /* intern the string and precompute the end */
1201: len = xmlStrlen(value);
1202: value = xmlDictLookup(ctxt->dict, value, len);
1203: defaults->values[5 * defaults->nbAttrs + 2] = value;
1204: defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205: if (ctxt->external)
1206: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207: else
1208: defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209: defaults->nbAttrs++;
1210:
1211: return;
1212:
1213: mem_error:
1214: xmlErrMemory(ctxt, NULL);
1215: return;
1216: }
1217:
1218: /**
1219: * xmlAddSpecialAttr:
1220: * @ctxt: an XML parser context
1221: * @fullname: the element fullname
1222: * @fullattr: the attribute fullname
1223: * @type: the attribute type
1224: *
1225: * Register this attribute type
1226: */
1227: static void
1228: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229: const xmlChar *fullname,
1230: const xmlChar *fullattr,
1231: int type)
1232: {
1233: if (ctxt->attsSpecial == NULL) {
1234: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235: if (ctxt->attsSpecial == NULL)
1236: goto mem_error;
1237: }
1238:
1239: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240: return;
1241:
1242: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243: (void *) (long) type);
1244: return;
1245:
1246: mem_error:
1247: xmlErrMemory(ctxt, NULL);
1248: return;
1249: }
1250:
1251: /**
1252: * xmlCleanSpecialAttrCallback:
1253: *
1254: * Removes CDATA attributes from the special attribute table
1255: */
1256: static void
1257: xmlCleanSpecialAttrCallback(void *payload, void *data,
1258: const xmlChar *fullname, const xmlChar *fullattr,
1259: const xmlChar *unused ATTRIBUTE_UNUSED) {
1260: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261:
1262: if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264: }
1265: }
1266:
1267: /**
1268: * xmlCleanSpecialAttr:
1269: * @ctxt: an XML parser context
1270: *
1271: * Trim the list of attributes defined to remove all those of type
1272: * CDATA as they are not special. This call should be done when finishing
1273: * to parse the DTD and before starting to parse the document root.
1274: */
1275: static void
1276: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277: {
1278: if (ctxt->attsSpecial == NULL)
1279: return;
1280:
1281: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282:
1283: if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284: xmlHashFree(ctxt->attsSpecial, NULL);
1285: ctxt->attsSpecial = NULL;
1286: }
1287: return;
1288: }
1289:
1290: /**
1291: * xmlCheckLanguageID:
1292: * @lang: pointer to the string value
1293: *
1294: * Checks that the value conforms to the LanguageID production:
1295: *
1296: * NOTE: this is somewhat deprecated, those productions were removed from
1297: * the XML Second edition.
1298: *
1299: * [33] LanguageID ::= Langcode ('-' Subcode)*
1300: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304: * [38] Subcode ::= ([a-z] | [A-Z])+
1305: *
1306: * The current REC reference the sucessors of RFC 1766, currently 5646
1307: *
1308: * http://www.rfc-editor.org/rfc/rfc5646.txt
1309: * langtag = language
1310: * ["-" script]
1311: * ["-" region]
1312: * *("-" variant)
1313: * *("-" extension)
1314: * ["-" privateuse]
1315: * language = 2*3ALPHA ; shortest ISO 639 code
1316: * ["-" extlang] ; sometimes followed by
1317: * ; extended language subtags
1318: * / 4ALPHA ; or reserved for future use
1319: * / 5*8ALPHA ; or registered language subtag
1320: *
1321: * extlang = 3ALPHA ; selected ISO 639 codes
1322: * *2("-" 3ALPHA) ; permanently reserved
1323: *
1324: * script = 4ALPHA ; ISO 15924 code
1325: *
1326: * region = 2ALPHA ; ISO 3166-1 code
1327: * / 3DIGIT ; UN M.49 code
1328: *
1329: * variant = 5*8alphanum ; registered variants
1330: * / (DIGIT 3alphanum)
1331: *
1332: * extension = singleton 1*("-" (2*8alphanum))
1333: *
1334: * ; Single alphanumerics
1335: * ; "x" reserved for private use
1336: * singleton = DIGIT ; 0 - 9
1337: * / %x41-57 ; A - W
1338: * / %x59-5A ; Y - Z
1339: * / %x61-77 ; a - w
1340: * / %x79-7A ; y - z
1341: *
1342: * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343: * The parser below doesn't try to cope with extension or privateuse
1344: * that could be added but that's not interoperable anyway
1345: *
1346: * Returns 1 if correct 0 otherwise
1347: **/
1348: int
1349: xmlCheckLanguageID(const xmlChar * lang)
1350: {
1351: const xmlChar *cur = lang, *nxt;
1352:
1353: if (cur == NULL)
1354: return (0);
1355: if (((cur[0] == 'i') && (cur[1] == '-')) ||
1356: ((cur[0] == 'I') && (cur[1] == '-')) ||
1357: ((cur[0] == 'x') && (cur[1] == '-')) ||
1358: ((cur[0] == 'X') && (cur[1] == '-'))) {
1359: /*
1360: * Still allow IANA code and user code which were coming
1361: * from the previous version of the XML-1.0 specification
1362: * it's deprecated but we should not fail
1363: */
1364: cur += 2;
1365: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1366: ((cur[0] >= 'a') && (cur[0] <= 'z')))
1367: cur++;
1368: return(cur[0] == 0);
1369: }
1370: nxt = cur;
1371: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373: nxt++;
1374: if (nxt - cur >= 4) {
1375: /*
1376: * Reserved
1377: */
1378: if ((nxt - cur > 8) || (nxt[0] != 0))
1379: return(0);
1380: return(1);
1381: }
1382: if (nxt - cur < 2)
1383: return(0);
1384: /* we got an ISO 639 code */
1385: if (nxt[0] == 0)
1386: return(1);
1387: if (nxt[0] != '-')
1388: return(0);
1389:
1390: nxt++;
1391: cur = nxt;
1392: /* now we can have extlang or script or region or variant */
1393: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1394: goto region_m49;
1395:
1396: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398: nxt++;
1399: if (nxt - cur == 4)
1400: goto script;
1401: if (nxt - cur == 2)
1402: goto region;
1403: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1404: goto variant;
1405: if (nxt - cur != 3)
1406: return(0);
1407: /* we parsed an extlang */
1408: if (nxt[0] == 0)
1409: return(1);
1410: if (nxt[0] != '-')
1411: return(0);
1412:
1413: nxt++;
1414: cur = nxt;
1415: /* now we can have script or region or variant */
1416: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1417: goto region_m49;
1418:
1419: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1421: nxt++;
1422: if (nxt - cur == 2)
1423: goto region;
1424: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1425: goto variant;
1426: if (nxt - cur != 4)
1427: return(0);
1428: /* we parsed a script */
1429: script:
1430: if (nxt[0] == 0)
1431: return(1);
1432: if (nxt[0] != '-')
1433: return(0);
1434:
1435: nxt++;
1436: cur = nxt;
1437: /* now we can have region or variant */
1438: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1439: goto region_m49;
1440:
1441: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1443: nxt++;
1444:
1445: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1446: goto variant;
1447: if (nxt - cur != 2)
1448: return(0);
1449: /* we parsed a region */
1450: region:
1451: if (nxt[0] == 0)
1452: return(1);
1453: if (nxt[0] != '-')
1454: return(0);
1455:
1456: nxt++;
1457: cur = nxt;
1458: /* now we can just have a variant */
1459: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1461: nxt++;
1462:
1463: if ((nxt - cur < 5) || (nxt - cur > 8))
1464: return(0);
1465:
1466: /* we parsed a variant */
1467: variant:
1468: if (nxt[0] == 0)
1469: return(1);
1470: if (nxt[0] != '-')
1471: return(0);
1472: /* extensions and private use subtags not checked */
1473: return (1);
1474:
1475: region_m49:
1476: if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477: ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1478: nxt += 3;
1479: goto region;
1480: }
1481: return(0);
1482: }
1483:
1484: /************************************************************************
1485: * *
1486: * Parser stacks related functions and macros *
1487: * *
1488: ************************************************************************/
1489:
1490: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491: const xmlChar ** str);
1492:
1493: #ifdef SAX2
1494: /**
1495: * nsPush:
1496: * @ctxt: an XML parser context
1497: * @prefix: the namespace prefix or NULL
1498: * @URL: the namespace name
1499: *
1500: * Pushes a new parser namespace on top of the ns stack
1501: *
1502: * Returns -1 in case of error, -2 if the namespace should be discarded
1503: * and the index in the stack otherwise.
1504: */
1505: static int
1506: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1507: {
1508: if (ctxt->options & XML_PARSE_NSCLEAN) {
1509: int i;
1510: for (i = 0;i < ctxt->nsNr;i += 2) {
1511: if (ctxt->nsTab[i] == prefix) {
1512: /* in scope */
1513: if (ctxt->nsTab[i + 1] == URL)
1514: return(-2);
1515: /* out of scope keep it */
1516: break;
1517: }
1518: }
1519: }
1520: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1521: ctxt->nsMax = 10;
1522: ctxt->nsNr = 0;
1523: ctxt->nsTab = (const xmlChar **)
1524: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525: if (ctxt->nsTab == NULL) {
1526: xmlErrMemory(ctxt, NULL);
1527: ctxt->nsMax = 0;
1528: return (-1);
1529: }
1530: } else if (ctxt->nsNr >= ctxt->nsMax) {
1531: const xmlChar ** tmp;
1532: ctxt->nsMax *= 2;
1533: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534: ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1535: if (tmp == NULL) {
1536: xmlErrMemory(ctxt, NULL);
1537: ctxt->nsMax /= 2;
1538: return (-1);
1539: }
1540: ctxt->nsTab = tmp;
1541: }
1542: ctxt->nsTab[ctxt->nsNr++] = prefix;
1543: ctxt->nsTab[ctxt->nsNr++] = URL;
1544: return (ctxt->nsNr);
1545: }
1546: /**
1547: * nsPop:
1548: * @ctxt: an XML parser context
1549: * @nr: the number to pop
1550: *
1551: * Pops the top @nr parser prefix/namespace from the ns stack
1552: *
1553: * Returns the number of namespaces removed
1554: */
1555: static int
1556: nsPop(xmlParserCtxtPtr ctxt, int nr)
1557: {
1558: int i;
1559:
1560: if (ctxt->nsTab == NULL) return(0);
1561: if (ctxt->nsNr < nr) {
1562: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1563: nr = ctxt->nsNr;
1564: }
1565: if (ctxt->nsNr <= 0)
1566: return (0);
1567:
1568: for (i = 0;i < nr;i++) {
1569: ctxt->nsNr--;
1570: ctxt->nsTab[ctxt->nsNr] = NULL;
1571: }
1572: return(nr);
1573: }
1574: #endif
1575:
1576: static int
1577: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578: const xmlChar **atts;
1579: int *attallocs;
1580: int maxatts;
1581:
1582: if (ctxt->atts == NULL) {
1583: maxatts = 55; /* allow for 10 attrs by default */
1584: atts = (const xmlChar **)
1585: xmlMalloc(maxatts * sizeof(xmlChar *));
1586: if (atts == NULL) goto mem_error;
1587: ctxt->atts = atts;
1588: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589: if (attallocs == NULL) goto mem_error;
1590: ctxt->attallocs = attallocs;
1591: ctxt->maxatts = maxatts;
1592: } else if (nr + 5 > ctxt->maxatts) {
1593: maxatts = (nr + 5) * 2;
1594: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595: maxatts * sizeof(const xmlChar *));
1596: if (atts == NULL) goto mem_error;
1597: ctxt->atts = atts;
1598: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599: (maxatts / 5) * sizeof(int));
1600: if (attallocs == NULL) goto mem_error;
1601: ctxt->attallocs = attallocs;
1602: ctxt->maxatts = maxatts;
1603: }
1604: return(ctxt->maxatts);
1605: mem_error:
1606: xmlErrMemory(ctxt, NULL);
1607: return(-1);
1608: }
1609:
1610: /**
1611: * inputPush:
1612: * @ctxt: an XML parser context
1613: * @value: the parser input
1614: *
1615: * Pushes a new parser input on top of the input stack
1616: *
1617: * Returns -1 in case of error, the index in the stack otherwise
1618: */
1619: int
1620: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1621: {
1622: if ((ctxt == NULL) || (value == NULL))
1623: return(-1);
1624: if (ctxt->inputNr >= ctxt->inputMax) {
1625: ctxt->inputMax *= 2;
1626: ctxt->inputTab =
1627: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1628: ctxt->inputMax *
1629: sizeof(ctxt->inputTab[0]));
1630: if (ctxt->inputTab == NULL) {
1631: xmlErrMemory(ctxt, NULL);
1632: xmlFreeInputStream(value);
1633: ctxt->inputMax /= 2;
1634: value = NULL;
1635: return (-1);
1636: }
1637: }
1638: ctxt->inputTab[ctxt->inputNr] = value;
1639: ctxt->input = value;
1640: return (ctxt->inputNr++);
1641: }
1642: /**
1643: * inputPop:
1644: * @ctxt: an XML parser context
1645: *
1646: * Pops the top parser input from the input stack
1647: *
1648: * Returns the input just removed
1649: */
1650: xmlParserInputPtr
1651: inputPop(xmlParserCtxtPtr ctxt)
1652: {
1653: xmlParserInputPtr ret;
1654:
1655: if (ctxt == NULL)
1656: return(NULL);
1657: if (ctxt->inputNr <= 0)
1658: return (NULL);
1659: ctxt->inputNr--;
1660: if (ctxt->inputNr > 0)
1661: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1662: else
1663: ctxt->input = NULL;
1664: ret = ctxt->inputTab[ctxt->inputNr];
1665: ctxt->inputTab[ctxt->inputNr] = NULL;
1666: return (ret);
1667: }
1668: /**
1669: * nodePush:
1670: * @ctxt: an XML parser context
1671: * @value: the element node
1672: *
1673: * Pushes a new element node on top of the node stack
1674: *
1675: * Returns -1 in case of error, the index in the stack otherwise
1676: */
1677: int
1678: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1679: {
1680: if (ctxt == NULL) return(0);
1681: if (ctxt->nodeNr >= ctxt->nodeMax) {
1682: xmlNodePtr *tmp;
1683:
1684: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1685: ctxt->nodeMax * 2 *
1686: sizeof(ctxt->nodeTab[0]));
1687: if (tmp == NULL) {
1688: xmlErrMemory(ctxt, NULL);
1689: return (-1);
1690: }
1691: ctxt->nodeTab = tmp;
1692: ctxt->nodeMax *= 2;
1693: }
1694: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1696: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1697: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1698: xmlParserMaxDepth);
1699: ctxt->instate = XML_PARSER_EOF;
1700: return(-1);
1701: }
1702: ctxt->nodeTab[ctxt->nodeNr] = value;
1703: ctxt->node = value;
1704: return (ctxt->nodeNr++);
1705: }
1706:
1707: /**
1708: * nodePop:
1709: * @ctxt: an XML parser context
1710: *
1711: * Pops the top element node from the node stack
1712: *
1713: * Returns the node just removed
1714: */
1715: xmlNodePtr
1716: nodePop(xmlParserCtxtPtr ctxt)
1717: {
1718: xmlNodePtr ret;
1719:
1720: if (ctxt == NULL) return(NULL);
1721: if (ctxt->nodeNr <= 0)
1722: return (NULL);
1723: ctxt->nodeNr--;
1724: if (ctxt->nodeNr > 0)
1725: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1726: else
1727: ctxt->node = NULL;
1728: ret = ctxt->nodeTab[ctxt->nodeNr];
1729: ctxt->nodeTab[ctxt->nodeNr] = NULL;
1730: return (ret);
1731: }
1732:
1733: #ifdef LIBXML_PUSH_ENABLED
1734: /**
1735: * nameNsPush:
1736: * @ctxt: an XML parser context
1737: * @value: the element name
1738: * @prefix: the element prefix
1739: * @URI: the element namespace name
1740: *
1741: * Pushes a new element name/prefix/URL on top of the name stack
1742: *
1743: * Returns -1 in case of error, the index in the stack otherwise
1744: */
1745: static int
1746: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747: const xmlChar *prefix, const xmlChar *URI, int nsNr)
1748: {
1749: if (ctxt->nameNr >= ctxt->nameMax) {
1750: const xmlChar * *tmp;
1751: void **tmp2;
1752: ctxt->nameMax *= 2;
1753: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1754: ctxt->nameMax *
1755: sizeof(ctxt->nameTab[0]));
1756: if (tmp == NULL) {
1757: ctxt->nameMax /= 2;
1758: goto mem_error;
1759: }
1760: ctxt->nameTab = tmp;
1761: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1762: ctxt->nameMax * 3 *
1763: sizeof(ctxt->pushTab[0]));
1764: if (tmp2 == NULL) {
1765: ctxt->nameMax /= 2;
1766: goto mem_error;
1767: }
1768: ctxt->pushTab = tmp2;
1769: }
1770: ctxt->nameTab[ctxt->nameNr] = value;
1771: ctxt->name = value;
1772: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1774: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1775: return (ctxt->nameNr++);
1776: mem_error:
1777: xmlErrMemory(ctxt, NULL);
1778: return (-1);
1779: }
1780: /**
1781: * nameNsPop:
1782: * @ctxt: an XML parser context
1783: *
1784: * Pops the top element/prefix/URI name from the name stack
1785: *
1786: * Returns the name just removed
1787: */
1788: static const xmlChar *
1789: nameNsPop(xmlParserCtxtPtr ctxt)
1790: {
1791: const xmlChar *ret;
1792:
1793: if (ctxt->nameNr <= 0)
1794: return (NULL);
1795: ctxt->nameNr--;
1796: if (ctxt->nameNr > 0)
1797: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1798: else
1799: ctxt->name = NULL;
1800: ret = ctxt->nameTab[ctxt->nameNr];
1801: ctxt->nameTab[ctxt->nameNr] = NULL;
1802: return (ret);
1803: }
1804: #endif /* LIBXML_PUSH_ENABLED */
1805:
1806: /**
1807: * namePush:
1808: * @ctxt: an XML parser context
1809: * @value: the element name
1810: *
1811: * Pushes a new element name on top of the name stack
1812: *
1813: * Returns -1 in case of error, the index in the stack otherwise
1814: */
1815: int
1816: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1817: {
1818: if (ctxt == NULL) return (-1);
1819:
1820: if (ctxt->nameNr >= ctxt->nameMax) {
1821: const xmlChar * *tmp;
1822: ctxt->nameMax *= 2;
1823: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1824: ctxt->nameMax *
1825: sizeof(ctxt->nameTab[0]));
1826: if (tmp == NULL) {
1827: ctxt->nameMax /= 2;
1828: goto mem_error;
1829: }
1830: ctxt->nameTab = tmp;
1831: }
1832: ctxt->nameTab[ctxt->nameNr] = value;
1833: ctxt->name = value;
1834: return (ctxt->nameNr++);
1835: mem_error:
1836: xmlErrMemory(ctxt, NULL);
1837: return (-1);
1838: }
1839: /**
1840: * namePop:
1841: * @ctxt: an XML parser context
1842: *
1843: * Pops the top element name from the name stack
1844: *
1845: * Returns the name just removed
1846: */
1847: const xmlChar *
1848: namePop(xmlParserCtxtPtr ctxt)
1849: {
1850: const xmlChar *ret;
1851:
1852: if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1853: return (NULL);
1854: ctxt->nameNr--;
1855: if (ctxt->nameNr > 0)
1856: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1857: else
1858: ctxt->name = NULL;
1859: ret = ctxt->nameTab[ctxt->nameNr];
1860: ctxt->nameTab[ctxt->nameNr] = NULL;
1861: return (ret);
1862: }
1863:
1864: static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1865: if (ctxt->spaceNr >= ctxt->spaceMax) {
1866: int *tmp;
1867:
1868: ctxt->spaceMax *= 2;
1869: tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1871: if (tmp == NULL) {
1872: xmlErrMemory(ctxt, NULL);
1873: ctxt->spaceMax /=2;
1874: return(-1);
1875: }
1876: ctxt->spaceTab = tmp;
1877: }
1878: ctxt->spaceTab[ctxt->spaceNr] = val;
1879: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880: return(ctxt->spaceNr++);
1881: }
1882:
1883: static int spacePop(xmlParserCtxtPtr ctxt) {
1884: int ret;
1885: if (ctxt->spaceNr <= 0) return(0);
1886: ctxt->spaceNr--;
1887: if (ctxt->spaceNr > 0)
1888: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1889: else
1890: ctxt->space = &ctxt->spaceTab[0];
1891: ret = ctxt->spaceTab[ctxt->spaceNr];
1892: ctxt->spaceTab[ctxt->spaceNr] = -1;
1893: return(ret);
1894: }
1895:
1896: /*
1897: * Macros for accessing the content. Those should be used only by the parser,
1898: * and not exported.
1899: *
1900: * Dirty macros, i.e. one often need to make assumption on the context to
1901: * use them
1902: *
1903: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1904: * To be used with extreme caution since operations consuming
1905: * characters may move the input buffer to a different location !
1906: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1907: * This should be used internally by the parser
1908: * only to compare to ASCII values otherwise it would break when
1909: * running with UTF-8 encoding.
1910: * RAW same as CUR but in the input buffer, bypass any token
1911: * extraction that may have been done
1912: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1913: * to compare on ASCII based substring.
1914: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1915: * strings without newlines within the parser.
1916: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917: * defined char within the parser.
1918: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1919: *
1920: * NEXT Skip to the next character, this does the proper decoding
1921: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1922: * NEXTL(l) Skip the current unicode character of l xmlChars long.
1923: * CUR_CHAR(l) returns the current unicode character (int), set l
1924: * to the number of xmlChars used for the encoding [0-5].
1925: * CUR_SCHAR same but operate on a string instead of the context
1926: * COPY_BUF copy the current unicode char to the target buffer, increment
1927: * the index
1928: * GROW, SHRINK handling of input buffers
1929: */
1930:
1931: #define RAW (*ctxt->input->cur)
1932: #define CUR (*ctxt->input->cur)
1933: #define NXT(val) ctxt->input->cur[(val)]
1934: #define CUR_PTR ctxt->input->cur
1935:
1936: #define CMP4( s, c1, c2, c3, c4 ) \
1937: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939: #define CMP5( s, c1, c2, c3, c4, c5 ) \
1940: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949: ((unsigned char *) s)[ 8 ] == c9 )
1950: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952: ((unsigned char *) s)[ 9 ] == c10 )
1953:
1954: #define SKIP(val) do { \
1955: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1956: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1957: if ((*ctxt->input->cur == 0) && \
1958: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1959: xmlPopInput(ctxt); \
1960: } while (0)
1961:
1962: #define SKIPL(val) do { \
1963: int skipl; \
1964: for(skipl=0; skipl<val; skipl++) { \
1965: if (*(ctxt->input->cur) == '\n') { \
1966: ctxt->input->line++; ctxt->input->col = 1; \
1967: } else ctxt->input->col++; \
1968: ctxt->nbChars++; \
1969: ctxt->input->cur++; \
1970: } \
1971: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1972: if ((*ctxt->input->cur == 0) && \
1973: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1974: xmlPopInput(ctxt); \
1975: } while (0)
1976:
1977: #define SHRINK if ((ctxt->progressive == 0) && \
1978: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1980: xmlSHRINK (ctxt);
1981:
1982: static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983: xmlParserInputShrink(ctxt->input);
1984: if ((*ctxt->input->cur == 0) &&
1985: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1986: xmlPopInput(ctxt);
1987: }
1988:
1989: #define GROW if ((ctxt->progressive == 0) && \
1990: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1991: xmlGROW (ctxt);
1992:
1993: static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1995: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1996: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1997: xmlPopInput(ctxt);
1998: }
1999:
2000: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2001:
2002: #define NEXT xmlNextChar(ctxt)
2003:
2004: #define NEXT1 { \
2005: ctxt->input->col++; \
2006: ctxt->input->cur++; \
2007: ctxt->nbChars++; \
2008: if (*ctxt->input->cur == 0) \
2009: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2010: }
2011:
2012: #define NEXTL(l) do { \
2013: if (*(ctxt->input->cur) == '\n') { \
2014: ctxt->input->line++; ctxt->input->col = 1; \
2015: } else ctxt->input->col++; \
2016: ctxt->input->cur += l; \
2017: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2018: } while (0)
2019:
2020: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2022:
2023: #define COPY_BUF(l,b,i,v) \
2024: if (l == 1) b[i++] = (xmlChar) v; \
2025: else i += xmlCopyCharMultiByte(&b[i],v)
2026:
2027: /**
2028: * xmlSkipBlankChars:
2029: * @ctxt: the XML parser context
2030: *
2031: * skip all blanks character found at that point in the input streams.
2032: * It pops up finished entities in the process if allowable at that point.
2033: *
2034: * Returns the number of space chars skipped
2035: */
2036:
2037: int
2038: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2039: int res = 0;
2040:
2041: /*
2042: * It's Okay to use CUR/NEXT here since all the blanks are on
2043: * the ASCII range.
2044: */
2045: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2046: const xmlChar *cur;
2047: /*
2048: * if we are in the document content, go really fast
2049: */
2050: cur = ctxt->input->cur;
2051: while (IS_BLANK_CH(*cur)) {
2052: if (*cur == '\n') {
2053: ctxt->input->line++; ctxt->input->col = 1;
2054: }
2055: cur++;
2056: res++;
2057: if (*cur == 0) {
2058: ctxt->input->cur = cur;
2059: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060: cur = ctxt->input->cur;
2061: }
2062: }
2063: ctxt->input->cur = cur;
2064: } else {
2065: int cur;
2066: do {
2067: cur = CUR;
2068: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2069: NEXT;
2070: cur = CUR;
2071: res++;
2072: }
2073: while ((cur == 0) && (ctxt->inputNr > 1) &&
2074: (ctxt->instate != XML_PARSER_COMMENT)) {
2075: xmlPopInput(ctxt);
2076: cur = CUR;
2077: }
2078: /*
2079: * Need to handle support of entities branching here
2080: */
2081: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2083: }
2084: return(res);
2085: }
2086:
2087: /************************************************************************
2088: * *
2089: * Commodity functions to handle entities *
2090: * *
2091: ************************************************************************/
2092:
2093: /**
2094: * xmlPopInput:
2095: * @ctxt: an XML parser context
2096: *
2097: * xmlPopInput: the current input pointed by ctxt->input came to an end
2098: * pop it and return the next char.
2099: *
2100: * Returns the current xmlChar in the parser context
2101: */
2102: xmlChar
2103: xmlPopInput(xmlParserCtxtPtr ctxt) {
2104: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2105: if (xmlParserDebugEntities)
2106: xmlGenericError(xmlGenericErrorContext,
2107: "Popping input %d\n", ctxt->inputNr);
2108: xmlFreeInputStream(inputPop(ctxt));
2109: if ((*ctxt->input->cur == 0) &&
2110: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111: return(xmlPopInput(ctxt));
2112: return(CUR);
2113: }
2114:
2115: /**
2116: * xmlPushInput:
2117: * @ctxt: an XML parser context
2118: * @input: an XML parser input fragment (entity, XML fragment ...).
2119: *
2120: * xmlPushInput: switch to a new input stream which is stacked on top
2121: * of the previous one(s).
2122: * Returns -1 in case of error or the index in the input stack
2123: */
2124: int
2125: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2126: int ret;
2127: if (input == NULL) return(-1);
2128:
2129: if (xmlParserDebugEntities) {
2130: if ((ctxt->input != NULL) && (ctxt->input->filename))
2131: xmlGenericError(xmlGenericErrorContext,
2132: "%s(%d): ", ctxt->input->filename,
2133: ctxt->input->line);
2134: xmlGenericError(xmlGenericErrorContext,
2135: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2136: }
2137: ret = inputPush(ctxt, input);
2138: GROW;
2139: return(ret);
2140: }
2141:
2142: /**
2143: * xmlParseCharRef:
2144: * @ctxt: an XML parser context
2145: *
2146: * parse Reference declarations
2147: *
2148: * [66] CharRef ::= '&#' [0-9]+ ';' |
2149: * '&#x' [0-9a-fA-F]+ ';'
2150: *
2151: * [ WFC: Legal Character ]
2152: * Characters referred to using character references must match the
2153: * production for Char.
2154: *
2155: * Returns the value parsed (as an int), 0 in case of error
2156: */
2157: int
2158: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2159: unsigned int val = 0;
2160: int count = 0;
2161: unsigned int outofrange = 0;
2162:
2163: /*
2164: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2165: */
2166: if ((RAW == '&') && (NXT(1) == '#') &&
2167: (NXT(2) == 'x')) {
2168: SKIP(3);
2169: GROW;
2170: while (RAW != ';') { /* loop blocked by count */
2171: if (count++ > 20) {
2172: count = 0;
2173: GROW;
2174: }
2175: if ((RAW >= '0') && (RAW <= '9'))
2176: val = val * 16 + (CUR - '0');
2177: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178: val = val * 16 + (CUR - 'a') + 10;
2179: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180: val = val * 16 + (CUR - 'A') + 10;
2181: else {
2182: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2183: val = 0;
2184: break;
2185: }
2186: if (val > 0x10FFFF)
2187: outofrange = val;
2188:
2189: NEXT;
2190: count++;
2191: }
2192: if (RAW == ';') {
2193: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2194: ctxt->input->col++;
2195: ctxt->nbChars ++;
2196: ctxt->input->cur++;
2197: }
2198: } else if ((RAW == '&') && (NXT(1) == '#')) {
2199: SKIP(2);
2200: GROW;
2201: while (RAW != ';') { /* loop blocked by count */
2202: if (count++ > 20) {
2203: count = 0;
2204: GROW;
2205: }
2206: if ((RAW >= '0') && (RAW <= '9'))
2207: val = val * 10 + (CUR - '0');
2208: else {
2209: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2210: val = 0;
2211: break;
2212: }
2213: if (val > 0x10FFFF)
2214: outofrange = val;
2215:
2216: NEXT;
2217: count++;
2218: }
2219: if (RAW == ';') {
2220: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2221: ctxt->input->col++;
2222: ctxt->nbChars ++;
2223: ctxt->input->cur++;
2224: }
2225: } else {
2226: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2227: }
2228:
2229: /*
2230: * [ WFC: Legal Character ]
2231: * Characters referred to using character references must match the
2232: * production for Char.
2233: */
2234: if ((IS_CHAR(val) && (outofrange == 0))) {
2235: return(val);
2236: } else {
2237: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238: "xmlParseCharRef: invalid xmlChar value %d\n",
2239: val);
2240: }
2241: return(0);
2242: }
2243:
2244: /**
2245: * xmlParseStringCharRef:
2246: * @ctxt: an XML parser context
2247: * @str: a pointer to an index in the string
2248: *
2249: * parse Reference declarations, variant parsing from a string rather
2250: * than an an input flow.
2251: *
2252: * [66] CharRef ::= '&#' [0-9]+ ';' |
2253: * '&#x' [0-9a-fA-F]+ ';'
2254: *
2255: * [ WFC: Legal Character ]
2256: * Characters referred to using character references must match the
2257: * production for Char.
2258: *
2259: * Returns the value parsed (as an int), 0 in case of error, str will be
2260: * updated to the current value of the index
2261: */
2262: static int
2263: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2264: const xmlChar *ptr;
2265: xmlChar cur;
2266: unsigned int val = 0;
2267: unsigned int outofrange = 0;
2268:
2269: if ((str == NULL) || (*str == NULL)) return(0);
2270: ptr = *str;
2271: cur = *ptr;
2272: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2273: ptr += 3;
2274: cur = *ptr;
2275: while (cur != ';') { /* Non input consuming loop */
2276: if ((cur >= '0') && (cur <= '9'))
2277: val = val * 16 + (cur - '0');
2278: else if ((cur >= 'a') && (cur <= 'f'))
2279: val = val * 16 + (cur - 'a') + 10;
2280: else if ((cur >= 'A') && (cur <= 'F'))
2281: val = val * 16 + (cur - 'A') + 10;
2282: else {
2283: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284: val = 0;
2285: break;
2286: }
2287: if (val > 0x10FFFF)
2288: outofrange = val;
2289:
2290: ptr++;
2291: cur = *ptr;
2292: }
2293: if (cur == ';')
2294: ptr++;
2295: } else if ((cur == '&') && (ptr[1] == '#')){
2296: ptr += 2;
2297: cur = *ptr;
2298: while (cur != ';') { /* Non input consuming loops */
2299: if ((cur >= '0') && (cur <= '9'))
2300: val = val * 10 + (cur - '0');
2301: else {
2302: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2303: val = 0;
2304: break;
2305: }
2306: if (val > 0x10FFFF)
2307: outofrange = val;
2308:
2309: ptr++;
2310: cur = *ptr;
2311: }
2312: if (cur == ';')
2313: ptr++;
2314: } else {
2315: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2316: return(0);
2317: }
2318: *str = ptr;
2319:
2320: /*
2321: * [ WFC: Legal Character ]
2322: * Characters referred to using character references must match the
2323: * production for Char.
2324: */
2325: if ((IS_CHAR(val) && (outofrange == 0))) {
2326: return(val);
2327: } else {
2328: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329: "xmlParseStringCharRef: invalid xmlChar value %d\n",
2330: val);
2331: }
2332: return(0);
2333: }
2334:
2335: /**
2336: * xmlNewBlanksWrapperInputStream:
2337: * @ctxt: an XML parser context
2338: * @entity: an Entity pointer
2339: *
2340: * Create a new input stream for wrapping
2341: * blanks around a PEReference
2342: *
2343: * Returns the new input stream or NULL
2344: */
2345:
2346: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2347:
2348: static xmlParserInputPtr
2349: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350: xmlParserInputPtr input;
2351: xmlChar *buffer;
2352: size_t length;
2353: if (entity == NULL) {
2354: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355: "xmlNewBlanksWrapperInputStream entity\n");
2356: return(NULL);
2357: }
2358: if (xmlParserDebugEntities)
2359: xmlGenericError(xmlGenericErrorContext,
2360: "new blanks wrapper for entity: %s\n", entity->name);
2361: input = xmlNewInputStream(ctxt);
2362: if (input == NULL) {
2363: return(NULL);
2364: }
2365: length = xmlStrlen(entity->name) + 5;
2366: buffer = xmlMallocAtomic(length);
2367: if (buffer == NULL) {
2368: xmlErrMemory(ctxt, NULL);
2369: xmlFree(input);
2370: return(NULL);
2371: }
2372: buffer [0] = ' ';
2373: buffer [1] = '%';
2374: buffer [length-3] = ';';
2375: buffer [length-2] = ' ';
2376: buffer [length-1] = 0;
2377: memcpy(buffer + 2, entity->name, length - 5);
2378: input->free = deallocblankswrapper;
2379: input->base = buffer;
2380: input->cur = buffer;
2381: input->length = length;
2382: input->end = &buffer[length];
2383: return(input);
2384: }
2385:
2386: /**
2387: * xmlParserHandlePEReference:
2388: * @ctxt: the parser context
2389: *
2390: * [69] PEReference ::= '%' Name ';'
2391: *
2392: * [ WFC: No Recursion ]
2393: * A parsed entity must not contain a recursive
2394: * reference to itself, either directly or indirectly.
2395: *
2396: * [ WFC: Entity Declared ]
2397: * In a document without any DTD, a document with only an internal DTD
2398: * subset which contains no parameter entity references, or a document
2399: * with "standalone='yes'", ... ... The declaration of a parameter
2400: * entity must precede any reference to it...
2401: *
2402: * [ VC: Entity Declared ]
2403: * In a document with an external subset or external parameter entities
2404: * with "standalone='no'", ... ... The declaration of a parameter entity
2405: * must precede any reference to it...
2406: *
2407: * [ WFC: In DTD ]
2408: * Parameter-entity references may only appear in the DTD.
2409: * NOTE: misleading but this is handled.
2410: *
2411: * A PEReference may have been detected in the current input stream
2412: * the handling is done accordingly to
2413: * http://www.w3.org/TR/REC-xml#entproc
2414: * i.e.
2415: * - Included in literal in entity values
2416: * - Included as Parameter Entity reference within DTDs
2417: */
2418: void
2419: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2420: const xmlChar *name;
2421: xmlEntityPtr entity = NULL;
2422: xmlParserInputPtr input;
2423:
2424: if (RAW != '%') return;
2425: switch(ctxt->instate) {
2426: case XML_PARSER_CDATA_SECTION:
2427: return;
2428: case XML_PARSER_COMMENT:
2429: return;
2430: case XML_PARSER_START_TAG:
2431: return;
2432: case XML_PARSER_END_TAG:
2433: return;
2434: case XML_PARSER_EOF:
2435: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2436: return;
2437: case XML_PARSER_PROLOG:
2438: case XML_PARSER_START:
2439: case XML_PARSER_MISC:
2440: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2441: return;
2442: case XML_PARSER_ENTITY_DECL:
2443: case XML_PARSER_CONTENT:
2444: case XML_PARSER_ATTRIBUTE_VALUE:
2445: case XML_PARSER_PI:
2446: case XML_PARSER_SYSTEM_LITERAL:
2447: case XML_PARSER_PUBLIC_LITERAL:
2448: /* we just ignore it there */
2449: return;
2450: case XML_PARSER_EPILOG:
2451: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2452: return;
2453: case XML_PARSER_ENTITY_VALUE:
2454: /*
2455: * NOTE: in the case of entity values, we don't do the
2456: * substitution here since we need the literal
2457: * entity value to be able to save the internal
2458: * subset of the document.
2459: * This will be handled by xmlStringDecodeEntities
2460: */
2461: return;
2462: case XML_PARSER_DTD:
2463: /*
2464: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465: * In the internal DTD subset, parameter-entity references
2466: * can occur only where markup declarations can occur, not
2467: * within markup declarations.
2468: * In that case this is handled in xmlParseMarkupDecl
2469: */
2470: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2471: return;
2472: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2473: return;
2474: break;
2475: case XML_PARSER_IGNORE:
2476: return;
2477: }
2478:
2479: NEXT;
2480: name = xmlParseName(ctxt);
2481: if (xmlParserDebugEntities)
2482: xmlGenericError(xmlGenericErrorContext,
2483: "PEReference: %s\n", name);
2484: if (name == NULL) {
2485: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2486: } else {
2487: if (RAW == ';') {
2488: NEXT;
2489: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491: if (entity == NULL) {
2492:
2493: /*
2494: * [ WFC: Entity Declared ]
2495: * In a document without any DTD, a document with only an
2496: * internal DTD subset which contains no parameter entity
2497: * references, or a document with "standalone='yes'", ...
2498: * ... The declaration of a parameter entity must precede
2499: * any reference to it...
2500: */
2501: if ((ctxt->standalone == 1) ||
2502: ((ctxt->hasExternalSubset == 0) &&
2503: (ctxt->hasPErefs == 0))) {
2504: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2505: "PEReference: %%%s; not found\n", name);
2506: } else {
2507: /*
2508: * [ VC: Entity Declared ]
2509: * In a document with an external subset or external
2510: * parameter entities with "standalone='no'", ...
2511: * ... The declaration of a parameter entity must precede
2512: * any reference to it...
2513: */
2514: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516: "PEReference: %%%s; not found\n",
2517: name, NULL);
2518: } else
2519: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520: "PEReference: %%%s; not found\n",
2521: name, NULL);
2522: ctxt->valid = 0;
2523: }
2524: } else if (ctxt->input->free != deallocblankswrapper) {
2525: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2526: if (xmlPushInput(ctxt, input) < 0)
2527: return;
2528: } else {
2529: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2531: xmlChar start[4];
2532: xmlCharEncoding enc;
2533:
2534: /*
2535: * handle the extra spaces added before and after
2536: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2537: * this is done independently.
2538: */
2539: input = xmlNewEntityInputStream(ctxt, entity);
2540: if (xmlPushInput(ctxt, input) < 0)
2541: return;
2542:
2543: /*
2544: * Get the 4 first bytes and decode the charset
2545: * if enc != XML_CHAR_ENCODING_NONE
2546: * plug some encoding conversion routines.
2547: * Note that, since we may have some non-UTF8
2548: * encoding (like UTF16, bug 135229), the 'length'
2549: * is not known, but we can calculate based upon
2550: * the amount of data in the buffer.
2551: */
2552: GROW
2553: if ((ctxt->input->end - ctxt->input->cur)>=4) {
2554: start[0] = RAW;
2555: start[1] = NXT(1);
2556: start[2] = NXT(2);
2557: start[3] = NXT(3);
2558: enc = xmlDetectCharEncoding(start, 4);
2559: if (enc != XML_CHAR_ENCODING_NONE) {
2560: xmlSwitchEncoding(ctxt, enc);
2561: }
2562: }
2563:
2564: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2565: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566: (IS_BLANK_CH(NXT(5)))) {
2567: xmlParseTextDecl(ctxt);
2568: }
2569: } else {
2570: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571: "PEReference: %s is not a parameter entity\n",
2572: name);
2573: }
2574: }
2575: } else {
2576: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2577: }
2578: }
2579: }
2580:
2581: /*
2582: * Macro used to grow the current buffer.
2583: */
2584: #define growBuffer(buffer, n) { \
2585: xmlChar *tmp; \
2586: buffer##_size *= 2; \
2587: buffer##_size += n; \
2588: tmp = (xmlChar *) \
2589: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2590: if (tmp == NULL) goto mem_error; \
2591: buffer = tmp; \
2592: }
2593:
2594: /**
2595: * xmlStringLenDecodeEntities:
2596: * @ctxt: the parser context
2597: * @str: the input string
2598: * @len: the string length
2599: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600: * @end: an end marker xmlChar, 0 if none
2601: * @end2: an end marker xmlChar, 0 if none
2602: * @end3: an end marker xmlChar, 0 if none
2603: *
2604: * Takes a entity string content and process to do the adequate substitutions.
2605: *
2606: * [67] Reference ::= EntityRef | CharRef
2607: *
2608: * [69] PEReference ::= '%' Name ';'
2609: *
2610: * Returns A newly allocated string with the substitution done. The caller
2611: * must deallocate it !
2612: */
2613: xmlChar *
2614: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615: int what, xmlChar end, xmlChar end2, xmlChar end3) {
2616: xmlChar *buffer = NULL;
2617: int buffer_size = 0;
2618:
2619: xmlChar *current = NULL;
2620: xmlChar *rep = NULL;
2621: const xmlChar *last;
2622: xmlEntityPtr ent;
2623: int c,l;
2624: int nbchars = 0;
2625:
2626: if ((ctxt == NULL) || (str == NULL) || (len < 0))
2627: return(NULL);
2628: last = str + len;
2629:
2630: if (((ctxt->depth > 40) &&
2631: ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632: (ctxt->depth > 1024)) {
2633: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2634: return(NULL);
2635: }
2636:
2637: /*
2638: * allocate a translation buffer.
2639: */
2640: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2641: buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2642: if (buffer == NULL) goto mem_error;
2643:
2644: /*
2645: * OK loop until we reach one of the ending char or a size limit.
2646: * we are operating on already parsed values.
2647: */
2648: if (str < last)
2649: c = CUR_SCHAR(str, l);
2650: else
2651: c = 0;
2652: while ((c != 0) && (c != end) && /* non input consuming loop */
2653: (c != end2) && (c != end3)) {
2654:
2655: if (c == 0) break;
2656: if ((c == '&') && (str[1] == '#')) {
2657: int val = xmlParseStringCharRef(ctxt, &str);
2658: if (val != 0) {
2659: COPY_BUF(0,buffer,nbchars,val);
2660: }
2661: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2663: }
2664: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665: if (xmlParserDebugEntities)
2666: xmlGenericError(xmlGenericErrorContext,
2667: "String decoding Entity Reference: %.30s\n",
2668: str);
2669: ent = xmlParseStringEntityRef(ctxt, &str);
2670: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2672: goto int_error;
2673: if (ent != NULL)
2674: ctxt->nbentities += ent->checked;
2675: if ((ent != NULL) &&
2676: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677: if (ent->content != NULL) {
2678: COPY_BUF(0,buffer,nbchars,ent->content[0]);
2679: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2680: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681: }
2682: } else {
2683: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684: "predefined entity has no content\n");
2685: }
2686: } else if ((ent != NULL) && (ent->content != NULL)) {
2687: ctxt->depth++;
2688: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2689: 0, 0, 0);
2690: ctxt->depth--;
2691:
2692: if (rep != NULL) {
2693: current = rep;
2694: while (*current != 0) { /* non input consuming loop */
2695: buffer[nbchars++] = *current++;
2696: if (nbchars >
2697: buffer_size - XML_PARSER_BUFFER_SIZE) {
2698: if (xmlParserEntityCheck(ctxt, nbchars, ent))
2699: goto int_error;
2700: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701: }
2702: }
2703: xmlFree(rep);
2704: rep = NULL;
2705: }
2706: } else if (ent != NULL) {
2707: int i = xmlStrlen(ent->name);
2708: const xmlChar *cur = ent->name;
2709:
2710: buffer[nbchars++] = '&';
2711: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2712: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713: }
2714: for (;i > 0;i--)
2715: buffer[nbchars++] = *cur++;
2716: buffer[nbchars++] = ';';
2717: }
2718: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719: if (xmlParserDebugEntities)
2720: xmlGenericError(xmlGenericErrorContext,
2721: "String decoding PE Reference: %.30s\n", str);
2722: ent = xmlParseStringPEReference(ctxt, &str);
2723: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2724: goto int_error;
2725: if (ent != NULL)
2726: ctxt->nbentities += ent->checked;
2727: if (ent != NULL) {
2728: if (ent->content == NULL) {
2729: xmlLoadEntityContent(ctxt, ent);
2730: }
2731: ctxt->depth++;
2732: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733: 0, 0, 0);
2734: ctxt->depth--;
2735: if (rep != NULL) {
2736: current = rep;
2737: while (*current != 0) { /* non input consuming loop */
2738: buffer[nbchars++] = *current++;
2739: if (nbchars >
2740: buffer_size - XML_PARSER_BUFFER_SIZE) {
2741: if (xmlParserEntityCheck(ctxt, nbchars, ent))
2742: goto int_error;
2743: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2744: }
2745: }
2746: xmlFree(rep);
2747: rep = NULL;
2748: }
2749: }
2750: } else {
2751: COPY_BUF(l,buffer,nbchars,c);
2752: str += l;
2753: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2754: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2755: }
2756: }
2757: if (str < last)
2758: c = CUR_SCHAR(str, l);
2759: else
2760: c = 0;
2761: }
2762: buffer[nbchars] = 0;
2763: return(buffer);
2764:
2765: mem_error:
2766: xmlErrMemory(ctxt, NULL);
2767: int_error:
2768: if (rep != NULL)
2769: xmlFree(rep);
2770: if (buffer != NULL)
2771: xmlFree(buffer);
2772: return(NULL);
2773: }
2774:
2775: /**
2776: * xmlStringDecodeEntities:
2777: * @ctxt: the parser context
2778: * @str: the input string
2779: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780: * @end: an end marker xmlChar, 0 if none
2781: * @end2: an end marker xmlChar, 0 if none
2782: * @end3: an end marker xmlChar, 0 if none
2783: *
2784: * Takes a entity string content and process to do the adequate substitutions.
2785: *
2786: * [67] Reference ::= EntityRef | CharRef
2787: *
2788: * [69] PEReference ::= '%' Name ';'
2789: *
2790: * Returns A newly allocated string with the substitution done. The caller
2791: * must deallocate it !
2792: */
2793: xmlChar *
2794: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795: xmlChar end, xmlChar end2, xmlChar end3) {
2796: if ((ctxt == NULL) || (str == NULL)) return(NULL);
2797: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2798: end, end2, end3));
2799: }
2800:
2801: /************************************************************************
2802: * *
2803: * Commodity functions, cleanup needed ? *
2804: * *
2805: ************************************************************************/
2806:
2807: /**
2808: * areBlanks:
2809: * @ctxt: an XML parser context
2810: * @str: a xmlChar *
2811: * @len: the size of @str
2812: * @blank_chars: we know the chars are blanks
2813: *
2814: * Is this a sequence of blank chars that one can ignore ?
2815: *
2816: * Returns 1 if ignorable 0 otherwise.
2817: */
2818:
2819: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2820: int blank_chars) {
2821: int i, ret;
2822: xmlNodePtr lastChild;
2823:
2824: /*
2825: * Don't spend time trying to differentiate them, the same callback is
2826: * used !
2827: */
2828: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2829: return(0);
2830:
2831: /*
2832: * Check for xml:space value.
2833: */
2834: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835: (*(ctxt->space) == -2))
2836: return(0);
2837:
2838: /*
2839: * Check that the string is made of blanks
2840: */
2841: if (blank_chars == 0) {
2842: for (i = 0;i < len;i++)
2843: if (!(IS_BLANK_CH(str[i]))) return(0);
2844: }
2845:
2846: /*
2847: * Look if the element is mixed content in the DTD if available
2848: */
2849: if (ctxt->node == NULL) return(0);
2850: if (ctxt->myDoc != NULL) {
2851: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852: if (ret == 0) return(1);
2853: if (ret == 1) return(0);
2854: }
2855:
2856: /*
2857: * Otherwise, heuristic :-\
2858: */
2859: if ((RAW != '<') && (RAW != 0xD)) return(0);
2860: if ((ctxt->node->children == NULL) &&
2861: (RAW == '<') && (NXT(1) == '/')) return(0);
2862:
2863: lastChild = xmlGetLastChild(ctxt->node);
2864: if (lastChild == NULL) {
2865: if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866: (ctxt->node->content != NULL)) return(0);
2867: } else if (xmlNodeIsText(lastChild))
2868: return(0);
2869: else if ((ctxt->node->children != NULL) &&
2870: (xmlNodeIsText(ctxt->node->children)))
2871: return(0);
2872: return(1);
2873: }
2874:
2875: /************************************************************************
2876: * *
2877: * Extra stuff for namespace support *
2878: * Relates to http://www.w3.org/TR/WD-xml-names *
2879: * *
2880: ************************************************************************/
2881:
2882: /**
2883: * xmlSplitQName:
2884: * @ctxt: an XML parser context
2885: * @name: an XML parser context
2886: * @prefix: a xmlChar **
2887: *
2888: * parse an UTF8 encoded XML qualified name string
2889: *
2890: * [NS 5] QName ::= (Prefix ':')? LocalPart
2891: *
2892: * [NS 6] Prefix ::= NCName
2893: *
2894: * [NS 7] LocalPart ::= NCName
2895: *
2896: * Returns the local part, and prefix is updated
2897: * to get the Prefix if any.
2898: */
2899:
2900: xmlChar *
2901: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902: xmlChar buf[XML_MAX_NAMELEN + 5];
2903: xmlChar *buffer = NULL;
2904: int len = 0;
2905: int max = XML_MAX_NAMELEN;
2906: xmlChar *ret = NULL;
2907: const xmlChar *cur = name;
2908: int c;
2909:
2910: if (prefix == NULL) return(NULL);
2911: *prefix = NULL;
2912:
2913: if (cur == NULL) return(NULL);
2914:
2915: #ifndef XML_XML_NAMESPACE
2916: /* xml: prefix is not really a namespace */
2917: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918: (cur[2] == 'l') && (cur[3] == ':'))
2919: return(xmlStrdup(name));
2920: #endif
2921:
2922: /* nasty but well=formed */
2923: if (cur[0] == ':')
2924: return(xmlStrdup(name));
2925:
2926: c = *cur++;
2927: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2928: buf[len++] = c;
2929: c = *cur++;
2930: }
2931: if (len >= max) {
2932: /*
2933: * Okay someone managed to make a huge name, so he's ready to pay
2934: * for the processing speed.
2935: */
2936: max = len * 2;
2937:
2938: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2939: if (buffer == NULL) {
2940: xmlErrMemory(ctxt, NULL);
2941: return(NULL);
2942: }
2943: memcpy(buffer, buf, len);
2944: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945: if (len + 10 > max) {
2946: xmlChar *tmp;
2947:
2948: max *= 2;
2949: tmp = (xmlChar *) xmlRealloc(buffer,
2950: max * sizeof(xmlChar));
2951: if (tmp == NULL) {
2952: xmlFree(buffer);
2953: xmlErrMemory(ctxt, NULL);
2954: return(NULL);
2955: }
2956: buffer = tmp;
2957: }
2958: buffer[len++] = c;
2959: c = *cur++;
2960: }
2961: buffer[len] = 0;
2962: }
2963:
2964: if ((c == ':') && (*cur == 0)) {
2965: if (buffer != NULL)
2966: xmlFree(buffer);
2967: *prefix = NULL;
2968: return(xmlStrdup(name));
2969: }
2970:
2971: if (buffer == NULL)
2972: ret = xmlStrndup(buf, len);
2973: else {
2974: ret = buffer;
2975: buffer = NULL;
2976: max = XML_MAX_NAMELEN;
2977: }
2978:
2979:
2980: if (c == ':') {
2981: c = *cur;
2982: *prefix = ret;
2983: if (c == 0) {
2984: return(xmlStrndup(BAD_CAST "", 0));
2985: }
2986: len = 0;
2987:
2988: /*
2989: * Check that the first character is proper to start
2990: * a new name
2991: */
2992: if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993: ((c >= 0x41) && (c <= 0x5A)) ||
2994: (c == '_') || (c == ':'))) {
2995: int l;
2996: int first = CUR_SCHAR(cur, l);
2997:
2998: if (!IS_LETTER(first) && (first != '_')) {
2999: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3000: "Name %s is not XML Namespace compliant\n",
3001: name);
3002: }
3003: }
3004: cur++;
3005:
3006: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3007: buf[len++] = c;
3008: c = *cur++;
3009: }
3010: if (len >= max) {
3011: /*
3012: * Okay someone managed to make a huge name, so he's ready to pay
3013: * for the processing speed.
3014: */
3015: max = len * 2;
3016:
3017: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018: if (buffer == NULL) {
3019: xmlErrMemory(ctxt, NULL);
3020: return(NULL);
3021: }
3022: memcpy(buffer, buf, len);
3023: while (c != 0) { /* tested bigname2.xml */
3024: if (len + 10 > max) {
3025: xmlChar *tmp;
3026:
3027: max *= 2;
3028: tmp = (xmlChar *) xmlRealloc(buffer,
3029: max * sizeof(xmlChar));
3030: if (tmp == NULL) {
3031: xmlErrMemory(ctxt, NULL);
3032: xmlFree(buffer);
3033: return(NULL);
3034: }
3035: buffer = tmp;
3036: }
3037: buffer[len++] = c;
3038: c = *cur++;
3039: }
3040: buffer[len] = 0;
3041: }
3042:
3043: if (buffer == NULL)
3044: ret = xmlStrndup(buf, len);
3045: else {
3046: ret = buffer;
3047: }
3048: }
3049:
3050: return(ret);
3051: }
3052:
3053: /************************************************************************
3054: * *
3055: * The parser itself *
3056: * Relates to http://www.w3.org/TR/REC-xml *
3057: * *
3058: ************************************************************************/
3059:
3060: /************************************************************************
3061: * *
3062: * Routines to parse Name, NCName and NmToken *
3063: * *
3064: ************************************************************************/
3065: #ifdef DEBUG
3066: static unsigned long nbParseName = 0;
3067: static unsigned long nbParseNmToken = 0;
3068: static unsigned long nbParseNCName = 0;
3069: static unsigned long nbParseNCNameComplex = 0;
3070: static unsigned long nbParseNameComplex = 0;
3071: static unsigned long nbParseStringName = 0;
3072: #endif
3073:
3074: /*
3075: * The two following functions are related to the change of accepted
3076: * characters for Name and NmToken in the Revision 5 of XML-1.0
3077: * They correspond to the modified production [4] and the new production [4a]
3078: * changes in that revision. Also note that the macros used for the
3079: * productions Letter, Digit, CombiningChar and Extender are not needed
3080: * anymore.
3081: * We still keep compatibility to pre-revision5 parsing semantic if the
3082: * new XML_PARSE_OLD10 option is given to the parser.
3083: */
3084: static int
3085: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087: /*
3088: * Use the new checks of production [4] [4a] amd [5] of the
3089: * Update 5 of XML-1.0
3090: */
3091: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092: (((c >= 'a') && (c <= 'z')) ||
3093: ((c >= 'A') && (c <= 'Z')) ||
3094: (c == '_') || (c == ':') ||
3095: ((c >= 0xC0) && (c <= 0xD6)) ||
3096: ((c >= 0xD8) && (c <= 0xF6)) ||
3097: ((c >= 0xF8) && (c <= 0x2FF)) ||
3098: ((c >= 0x370) && (c <= 0x37D)) ||
3099: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100: ((c >= 0x200C) && (c <= 0x200D)) ||
3101: ((c >= 0x2070) && (c <= 0x218F)) ||
3102: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106: ((c >= 0x10000) && (c <= 0xEFFFF))))
3107: return(1);
3108: } else {
3109: if (IS_LETTER(c) || (c == '_') || (c == ':'))
3110: return(1);
3111: }
3112: return(0);
3113: }
3114:
3115: static int
3116: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118: /*
3119: * Use the new checks of production [4] [4a] amd [5] of the
3120: * Update 5 of XML-1.0
3121: */
3122: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123: (((c >= 'a') && (c <= 'z')) ||
3124: ((c >= 'A') && (c <= 'Z')) ||
3125: ((c >= '0') && (c <= '9')) || /* !start */
3126: (c == '_') || (c == ':') ||
3127: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128: ((c >= 0xC0) && (c <= 0xD6)) ||
3129: ((c >= 0xD8) && (c <= 0xF6)) ||
3130: ((c >= 0xF8) && (c <= 0x2FF)) ||
3131: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132: ((c >= 0x370) && (c <= 0x37D)) ||
3133: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134: ((c >= 0x200C) && (c <= 0x200D)) ||
3135: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136: ((c >= 0x2070) && (c <= 0x218F)) ||
3137: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141: ((c >= 0x10000) && (c <= 0xEFFFF))))
3142: return(1);
3143: } else {
3144: if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145: (c == '.') || (c == '-') ||
3146: (c == '_') || (c == ':') ||
3147: (IS_COMBINING(c)) ||
3148: (IS_EXTENDER(c)))
3149: return(1);
3150: }
3151: return(0);
3152: }
3153:
3154: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3155: int *len, int *alloc, int normalize);
3156:
3157: static const xmlChar *
3158: xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3159: int len = 0, l;
3160: int c;
3161: int count = 0;
3162:
3163: #ifdef DEBUG
3164: nbParseNameComplex++;
3165: #endif
3166:
3167: /*
3168: * Handler for more complex cases
3169: */
3170: GROW;
3171: c = CUR_CHAR(l);
3172: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173: /*
3174: * Use the new checks of production [4] [4a] amd [5] of the
3175: * Update 5 of XML-1.0
3176: */
3177: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178: (!(((c >= 'a') && (c <= 'z')) ||
3179: ((c >= 'A') && (c <= 'Z')) ||
3180: (c == '_') || (c == ':') ||
3181: ((c >= 0xC0) && (c <= 0xD6)) ||
3182: ((c >= 0xD8) && (c <= 0xF6)) ||
3183: ((c >= 0xF8) && (c <= 0x2FF)) ||
3184: ((c >= 0x370) && (c <= 0x37D)) ||
3185: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186: ((c >= 0x200C) && (c <= 0x200D)) ||
3187: ((c >= 0x2070) && (c <= 0x218F)) ||
3188: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192: ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3193: return(NULL);
3194: }
3195: len += l;
3196: NEXTL(l);
3197: c = CUR_CHAR(l);
3198: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199: (((c >= 'a') && (c <= 'z')) ||
3200: ((c >= 'A') && (c <= 'Z')) ||
3201: ((c >= '0') && (c <= '9')) || /* !start */
3202: (c == '_') || (c == ':') ||
3203: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204: ((c >= 0xC0) && (c <= 0xD6)) ||
3205: ((c >= 0xD8) && (c <= 0xF6)) ||
3206: ((c >= 0xF8) && (c <= 0x2FF)) ||
3207: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208: ((c >= 0x370) && (c <= 0x37D)) ||
3209: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210: ((c >= 0x200C) && (c <= 0x200D)) ||
3211: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212: ((c >= 0x2070) && (c <= 0x218F)) ||
3213: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217: ((c >= 0x10000) && (c <= 0xEFFFF))
3218: )) {
3219: if (count++ > 100) {
3220: count = 0;
3221: GROW;
3222: }
3223: len += l;
3224: NEXTL(l);
3225: c = CUR_CHAR(l);
3226: }
3227: } else {
3228: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229: (!IS_LETTER(c) && (c != '_') &&
3230: (c != ':'))) {
3231: return(NULL);
3232: }
3233: len += l;
3234: NEXTL(l);
3235: c = CUR_CHAR(l);
3236:
3237: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239: (c == '.') || (c == '-') ||
3240: (c == '_') || (c == ':') ||
3241: (IS_COMBINING(c)) ||
3242: (IS_EXTENDER(c)))) {
3243: if (count++ > 100) {
3244: count = 0;
3245: GROW;
3246: }
3247: len += l;
3248: NEXTL(l);
3249: c = CUR_CHAR(l);
3250: }
3251: }
3252: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3255: }
3256:
3257: /**
3258: * xmlParseName:
3259: * @ctxt: an XML parser context
3260: *
3261: * parse an XML name.
3262: *
3263: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264: * CombiningChar | Extender
3265: *
3266: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3267: *
3268: * [6] Names ::= Name (#x20 Name)*
3269: *
3270: * Returns the Name parsed or NULL
3271: */
3272:
3273: const xmlChar *
3274: xmlParseName(xmlParserCtxtPtr ctxt) {
3275: const xmlChar *in;
3276: const xmlChar *ret;
3277: int count = 0;
3278:
3279: GROW;
3280:
3281: #ifdef DEBUG
3282: nbParseName++;
3283: #endif
3284:
3285: /*
3286: * Accelerator for simple ASCII names
3287: */
3288: in = ctxt->input->cur;
3289: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290: ((*in >= 0x41) && (*in <= 0x5A)) ||
3291: (*in == '_') || (*in == ':')) {
3292: in++;
3293: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294: ((*in >= 0x41) && (*in <= 0x5A)) ||
3295: ((*in >= 0x30) && (*in <= 0x39)) ||
3296: (*in == '_') || (*in == '-') ||
3297: (*in == ':') || (*in == '.'))
3298: in++;
3299: if ((*in > 0) && (*in < 0x80)) {
3300: count = in - ctxt->input->cur;
3301: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3302: ctxt->input->cur = in;
3303: ctxt->nbChars += count;
3304: ctxt->input->col += count;
3305: if (ret == NULL)
3306: xmlErrMemory(ctxt, NULL);
3307: return(ret);
3308: }
3309: }
3310: /* accelerator for special cases */
3311: return(xmlParseNameComplex(ctxt));
3312: }
3313:
3314: static const xmlChar *
3315: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3316: int len = 0, l;
3317: int c;
3318: int count = 0;
3319:
3320: #ifdef DEBUG
3321: nbParseNCNameComplex++;
3322: #endif
3323:
3324: /*
3325: * Handler for more complex cases
3326: */
3327: GROW;
3328: c = CUR_CHAR(l);
3329: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3331: return(NULL);
3332: }
3333:
3334: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335: (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336: if (count++ > 100) {
3337: count = 0;
3338: GROW;
3339: }
3340: len += l;
3341: NEXTL(l);
3342: c = CUR_CHAR(l);
3343: }
3344: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3345: }
3346:
3347: /**
3348: * xmlParseNCName:
3349: * @ctxt: an XML parser context
3350: * @len: lenght of the string parsed
3351: *
3352: * parse an XML name.
3353: *
3354: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355: * CombiningChar | Extender
3356: *
3357: * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3358: *
3359: * Returns the Name parsed or NULL
3360: */
3361:
3362: static const xmlChar *
3363: xmlParseNCName(xmlParserCtxtPtr ctxt) {
3364: const xmlChar *in;
3365: const xmlChar *ret;
3366: int count = 0;
3367:
3368: #ifdef DEBUG
3369: nbParseNCName++;
3370: #endif
3371:
3372: /*
3373: * Accelerator for simple ASCII names
3374: */
3375: in = ctxt->input->cur;
3376: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377: ((*in >= 0x41) && (*in <= 0x5A)) ||
3378: (*in == '_')) {
3379: in++;
3380: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381: ((*in >= 0x41) && (*in <= 0x5A)) ||
3382: ((*in >= 0x30) && (*in <= 0x39)) ||
3383: (*in == '_') || (*in == '-') ||
3384: (*in == '.'))
3385: in++;
3386: if ((*in > 0) && (*in < 0x80)) {
3387: count = in - ctxt->input->cur;
3388: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389: ctxt->input->cur = in;
3390: ctxt->nbChars += count;
3391: ctxt->input->col += count;
3392: if (ret == NULL) {
3393: xmlErrMemory(ctxt, NULL);
3394: }
3395: return(ret);
3396: }
3397: }
3398: return(xmlParseNCNameComplex(ctxt));
3399: }
3400:
3401: /**
3402: * xmlParseNameAndCompare:
3403: * @ctxt: an XML parser context
3404: *
3405: * parse an XML name and compares for match
3406: * (specialized for endtag parsing)
3407: *
3408: * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409: * and the name for mismatch
3410: */
3411:
3412: static const xmlChar *
3413: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3414: register const xmlChar *cmp = other;
3415: register const xmlChar *in;
3416: const xmlChar *ret;
3417:
3418: GROW;
3419:
3420: in = ctxt->input->cur;
3421: while (*in != 0 && *in == *cmp) {
3422: ++in;
3423: ++cmp;
3424: ctxt->input->col++;
3425: }
3426: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3427: /* success */
3428: ctxt->input->cur = in;
3429: return (const xmlChar*) 1;
3430: }
3431: /* failure (or end of input buffer), check with full function */
3432: ret = xmlParseName (ctxt);
3433: /* strings coming from the dictionnary direct compare possible */
3434: if (ret == other) {
3435: return (const xmlChar*) 1;
3436: }
3437: return ret;
3438: }
3439:
3440: /**
3441: * xmlParseStringName:
3442: * @ctxt: an XML parser context
3443: * @str: a pointer to the string pointer (IN/OUT)
3444: *
3445: * parse an XML name.
3446: *
3447: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448: * CombiningChar | Extender
3449: *
3450: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3451: *
3452: * [6] Names ::= Name (#x20 Name)*
3453: *
3454: * Returns the Name parsed or NULL. The @str pointer
3455: * is updated to the current location in the string.
3456: */
3457:
3458: static xmlChar *
3459: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460: xmlChar buf[XML_MAX_NAMELEN + 5];
3461: const xmlChar *cur = *str;
3462: int len = 0, l;
3463: int c;
3464:
3465: #ifdef DEBUG
3466: nbParseStringName++;
3467: #endif
3468:
3469: c = CUR_SCHAR(cur, l);
3470: if (!xmlIsNameStartChar(ctxt, c)) {
3471: return(NULL);
3472: }
3473:
3474: COPY_BUF(l,buf,len,c);
3475: cur += l;
3476: c = CUR_SCHAR(cur, l);
3477: while (xmlIsNameChar(ctxt, c)) {
3478: COPY_BUF(l,buf,len,c);
3479: cur += l;
3480: c = CUR_SCHAR(cur, l);
3481: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3482: /*
3483: * Okay someone managed to make a huge name, so he's ready to pay
3484: * for the processing speed.
3485: */
3486: xmlChar *buffer;
3487: int max = len * 2;
3488:
3489: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3490: if (buffer == NULL) {
3491: xmlErrMemory(ctxt, NULL);
3492: return(NULL);
3493: }
3494: memcpy(buffer, buf, len);
3495: while (xmlIsNameChar(ctxt, c)) {
3496: if (len + 10 > max) {
3497: xmlChar *tmp;
3498: max *= 2;
3499: tmp = (xmlChar *) xmlRealloc(buffer,
3500: max * sizeof(xmlChar));
3501: if (tmp == NULL) {
3502: xmlErrMemory(ctxt, NULL);
3503: xmlFree(buffer);
3504: return(NULL);
3505: }
3506: buffer = tmp;
3507: }
3508: COPY_BUF(l,buffer,len,c);
3509: cur += l;
3510: c = CUR_SCHAR(cur, l);
3511: }
3512: buffer[len] = 0;
3513: *str = cur;
3514: return(buffer);
3515: }
3516: }
3517: *str = cur;
3518: return(xmlStrndup(buf, len));
3519: }
3520:
3521: /**
3522: * xmlParseNmtoken:
3523: * @ctxt: an XML parser context
3524: *
3525: * parse an XML Nmtoken.
3526: *
3527: * [7] Nmtoken ::= (NameChar)+
3528: *
3529: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3530: *
3531: * Returns the Nmtoken parsed or NULL
3532: */
3533:
3534: xmlChar *
3535: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536: xmlChar buf[XML_MAX_NAMELEN + 5];
3537: int len = 0, l;
3538: int c;
3539: int count = 0;
3540:
3541: #ifdef DEBUG
3542: nbParseNmToken++;
3543: #endif
3544:
3545: GROW;
3546: c = CUR_CHAR(l);
3547:
3548: while (xmlIsNameChar(ctxt, c)) {
3549: if (count++ > 100) {
3550: count = 0;
3551: GROW;
3552: }
3553: COPY_BUF(l,buf,len,c);
3554: NEXTL(l);
3555: c = CUR_CHAR(l);
3556: if (len >= XML_MAX_NAMELEN) {
3557: /*
3558: * Okay someone managed to make a huge token, so he's ready to pay
3559: * for the processing speed.
3560: */
3561: xmlChar *buffer;
3562: int max = len * 2;
3563:
3564: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3565: if (buffer == NULL) {
3566: xmlErrMemory(ctxt, NULL);
3567: return(NULL);
3568: }
3569: memcpy(buffer, buf, len);
3570: while (xmlIsNameChar(ctxt, c)) {
3571: if (count++ > 100) {
3572: count = 0;
3573: GROW;
3574: }
3575: if (len + 10 > max) {
3576: xmlChar *tmp;
3577:
3578: max *= 2;
3579: tmp = (xmlChar *) xmlRealloc(buffer,
3580: max * sizeof(xmlChar));
3581: if (tmp == NULL) {
3582: xmlErrMemory(ctxt, NULL);
3583: xmlFree(buffer);
3584: return(NULL);
3585: }
3586: buffer = tmp;
3587: }
3588: COPY_BUF(l,buffer,len,c);
3589: NEXTL(l);
3590: c = CUR_CHAR(l);
3591: }
3592: buffer[len] = 0;
3593: return(buffer);
3594: }
3595: }
3596: if (len == 0)
3597: return(NULL);
3598: return(xmlStrndup(buf, len));
3599: }
3600:
3601: /**
3602: * xmlParseEntityValue:
3603: * @ctxt: an XML parser context
3604: * @orig: if non-NULL store a copy of the original entity value
3605: *
3606: * parse a value for ENTITY declarations
3607: *
3608: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609: * "'" ([^%&'] | PEReference | Reference)* "'"
3610: *
3611: * Returns the EntityValue parsed with reference substituted or NULL
3612: */
3613:
3614: xmlChar *
3615: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616: xmlChar *buf = NULL;
3617: int len = 0;
3618: int size = XML_PARSER_BUFFER_SIZE;
3619: int c, l;
3620: xmlChar stop;
3621: xmlChar *ret = NULL;
3622: const xmlChar *cur = NULL;
3623: xmlParserInputPtr input;
3624:
3625: if (RAW == '"') stop = '"';
3626: else if (RAW == '\'') stop = '\'';
3627: else {
3628: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3629: return(NULL);
3630: }
3631: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3632: if (buf == NULL) {
3633: xmlErrMemory(ctxt, NULL);
3634: return(NULL);
3635: }
3636:
3637: /*
3638: * The content of the entity definition is copied in a buffer.
3639: */
3640:
3641: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642: input = ctxt->input;
3643: GROW;
3644: NEXT;
3645: c = CUR_CHAR(l);
3646: /*
3647: * NOTE: 4.4.5 Included in Literal
3648: * When a parameter entity reference appears in a literal entity
3649: * value, ... a single or double quote character in the replacement
3650: * text is always treated as a normal data character and will not
3651: * terminate the literal.
3652: * In practice it means we stop the loop only when back at parsing
3653: * the initial entity and the quote is found
3654: */
3655: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3656: (ctxt->input != input))) {
3657: if (len + 5 >= size) {
3658: xmlChar *tmp;
3659:
3660: size *= 2;
3661: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662: if (tmp == NULL) {
3663: xmlErrMemory(ctxt, NULL);
3664: xmlFree(buf);
3665: return(NULL);
3666: }
3667: buf = tmp;
3668: }
3669: COPY_BUF(l,buf,len,c);
3670: NEXTL(l);
3671: /*
3672: * Pop-up of finished entities.
3673: */
3674: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3675: xmlPopInput(ctxt);
3676:
3677: GROW;
3678: c = CUR_CHAR(l);
3679: if (c == 0) {
3680: GROW;
3681: c = CUR_CHAR(l);
3682: }
3683: }
3684: buf[len] = 0;
3685:
3686: /*
3687: * Raise problem w.r.t. '&' and '%' being used in non-entities
3688: * reference constructs. Note Charref will be handled in
3689: * xmlStringDecodeEntities()
3690: */
3691: cur = buf;
3692: while (*cur != 0) { /* non input consuming */
3693: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3694: xmlChar *name;
3695: xmlChar tmp = *cur;
3696:
3697: cur++;
3698: name = xmlParseStringName(ctxt, &cur);
3699: if ((name == NULL) || (*cur != ';')) {
3700: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3701: "EntityValue: '%c' forbidden except for entities references\n",
3702: tmp);
3703: }
3704: if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705: (ctxt->inputNr == 1)) {
3706: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3707: }
3708: if (name != NULL)
3709: xmlFree(name);
3710: if (*cur == 0)
3711: break;
3712: }
3713: cur++;
3714: }
3715:
3716: /*
3717: * Then PEReference entities are substituted.
3718: */
3719: if (c != stop) {
3720: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3721: xmlFree(buf);
3722: } else {
3723: NEXT;
3724: /*
3725: * NOTE: 4.4.7 Bypassed
3726: * When a general entity reference appears in the EntityValue in
3727: * an entity declaration, it is bypassed and left as is.
3728: * so XML_SUBSTITUTE_REF is not set here.
3729: */
3730: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3731: 0, 0, 0);
3732: if (orig != NULL)
3733: *orig = buf;
3734: else
3735: xmlFree(buf);
3736: }
3737:
3738: return(ret);
3739: }
3740:
3741: /**
3742: * xmlParseAttValueComplex:
3743: * @ctxt: an XML parser context
3744: * @len: the resulting attribute len
3745: * @normalize: wether to apply the inner normalization
3746: *
3747: * parse a value for an attribute, this is the fallback function
3748: * of xmlParseAttValue() when the attribute parsing requires handling
3749: * of non-ASCII characters, or normalization compaction.
3750: *
3751: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3752: */
3753: static xmlChar *
3754: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3755: xmlChar limit = 0;
3756: xmlChar *buf = NULL;
3757: xmlChar *rep = NULL;
3758: int len = 0;
3759: int buf_size = 0;
3760: int c, l, in_space = 0;
3761: xmlChar *current = NULL;
3762: xmlEntityPtr ent;
3763:
3764: if (NXT(0) == '"') {
3765: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3766: limit = '"';
3767: NEXT;
3768: } else if (NXT(0) == '\'') {
3769: limit = '\'';
3770: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3771: NEXT;
3772: } else {
3773: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3774: return(NULL);
3775: }
3776:
3777: /*
3778: * allocate a translation buffer.
3779: */
3780: buf_size = XML_PARSER_BUFFER_SIZE;
3781: buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3782: if (buf == NULL) goto mem_error;
3783:
3784: /*
3785: * OK loop until we reach one of the ending char or a size limit.
3786: */
3787: c = CUR_CHAR(l);
3788: while ((NXT(0) != limit) && /* checked */
3789: (IS_CHAR(c)) && (c != '<')) {
3790: if (c == 0) break;
3791: if (c == '&') {
3792: in_space = 0;
3793: if (NXT(1) == '#') {
3794: int val = xmlParseCharRef(ctxt);
3795:
3796: if (val == '&') {
3797: if (ctxt->replaceEntities) {
3798: if (len > buf_size - 10) {
3799: growBuffer(buf, 10);
3800: }
3801: buf[len++] = '&';
3802: } else {
3803: /*
3804: * The reparsing will be done in xmlStringGetNodeList()
3805: * called by the attribute() function in SAX.c
3806: */
3807: if (len > buf_size - 10) {
3808: growBuffer(buf, 10);
3809: }
3810: buf[len++] = '&';
3811: buf[len++] = '#';
3812: buf[len++] = '3';
3813: buf[len++] = '8';
3814: buf[len++] = ';';
3815: }
3816: } else if (val != 0) {
3817: if (len > buf_size - 10) {
3818: growBuffer(buf, 10);
3819: }
3820: len += xmlCopyChar(0, &buf[len], val);
3821: }
3822: } else {
3823: ent = xmlParseEntityRef(ctxt);
3824: ctxt->nbentities++;
3825: if (ent != NULL)
3826: ctxt->nbentities += ent->owner;
3827: if ((ent != NULL) &&
3828: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829: if (len > buf_size - 10) {
3830: growBuffer(buf, 10);
3831: }
3832: if ((ctxt->replaceEntities == 0) &&
3833: (ent->content[0] == '&')) {
3834: buf[len++] = '&';
3835: buf[len++] = '#';
3836: buf[len++] = '3';
3837: buf[len++] = '8';
3838: buf[len++] = ';';
3839: } else {
3840: buf[len++] = ent->content[0];
3841: }
3842: } else if ((ent != NULL) &&
3843: (ctxt->replaceEntities != 0)) {
3844: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845: rep = xmlStringDecodeEntities(ctxt, ent->content,
3846: XML_SUBSTITUTE_REF,
3847: 0, 0, 0);
3848: if (rep != NULL) {
3849: current = rep;
3850: while (*current != 0) { /* non input consuming */
3851: if ((*current == 0xD) || (*current == 0xA) ||
3852: (*current == 0x9)) {
3853: buf[len++] = 0x20;
3854: current++;
3855: } else
3856: buf[len++] = *current++;
3857: if (len > buf_size - 10) {
3858: growBuffer(buf, 10);
3859: }
3860: }
3861: xmlFree(rep);
3862: rep = NULL;
3863: }
3864: } else {
3865: if (len > buf_size - 10) {
3866: growBuffer(buf, 10);
3867: }
3868: if (ent->content != NULL)
3869: buf[len++] = ent->content[0];
3870: }
3871: } else if (ent != NULL) {
3872: int i = xmlStrlen(ent->name);
3873: const xmlChar *cur = ent->name;
3874:
3875: /*
3876: * This may look absurd but is needed to detect
3877: * entities problems
3878: */
3879: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880: (ent->content != NULL)) {
3881: rep = xmlStringDecodeEntities(ctxt, ent->content,
3882: XML_SUBSTITUTE_REF, 0, 0, 0);
3883: if (rep != NULL) {
3884: xmlFree(rep);
3885: rep = NULL;
3886: }
3887: }
3888:
3889: /*
3890: * Just output the reference
3891: */
3892: buf[len++] = '&';
3893: while (len > buf_size - i - 10) {
3894: growBuffer(buf, i + 10);
3895: }
3896: for (;i > 0;i--)
3897: buf[len++] = *cur++;
3898: buf[len++] = ';';
3899: }
3900: }
3901: } else {
3902: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903: if ((len != 0) || (!normalize)) {
3904: if ((!normalize) || (!in_space)) {
3905: COPY_BUF(l,buf,len,0x20);
3906: while (len > buf_size - 10) {
3907: growBuffer(buf, 10);
3908: }
3909: }
3910: in_space = 1;
3911: }
3912: } else {
3913: in_space = 0;
3914: COPY_BUF(l,buf,len,c);
3915: if (len > buf_size - 10) {
3916: growBuffer(buf, 10);
3917: }
3918: }
3919: NEXTL(l);
3920: }
3921: GROW;
3922: c = CUR_CHAR(l);
3923: }
3924: if ((in_space) && (normalize)) {
3925: while (buf[len - 1] == 0x20) len--;
3926: }
3927: buf[len] = 0;
3928: if (RAW == '<') {
3929: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3930: } else if (RAW != limit) {
3931: if ((c != 0) && (!IS_CHAR(c))) {
3932: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933: "invalid character in attribute value\n");
3934: } else {
3935: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936: "AttValue: ' expected\n");
3937: }
3938: } else
3939: NEXT;
3940: if (attlen != NULL) *attlen = len;
3941: return(buf);
3942:
3943: mem_error:
3944: xmlErrMemory(ctxt, NULL);
3945: if (buf != NULL)
3946: xmlFree(buf);
3947: if (rep != NULL)
3948: xmlFree(rep);
3949: return(NULL);
3950: }
3951:
3952: /**
3953: * xmlParseAttValue:
3954: * @ctxt: an XML parser context
3955: *
3956: * parse a value for an attribute
3957: * Note: the parser won't do substitution of entities here, this
3958: * will be handled later in xmlStringGetNodeList
3959: *
3960: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961: * "'" ([^<&'] | Reference)* "'"
3962: *
3963: * 3.3.3 Attribute-Value Normalization:
3964: * Before the value of an attribute is passed to the application or
3965: * checked for validity, the XML processor must normalize it as follows:
3966: * - a character reference is processed by appending the referenced
3967: * character to the attribute value
3968: * - an entity reference is processed by recursively processing the
3969: * replacement text of the entity
3970: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971: * appending #x20 to the normalized value, except that only a single
3972: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3973: * parsed entity or the literal entity value of an internal parsed entity
3974: * - other characters are processed by appending them to the normalized value
3975: * If the declared value is not CDATA, then the XML processor must further
3976: * process the normalized attribute value by discarding any leading and
3977: * trailing space (#x20) characters, and by replacing sequences of space
3978: * (#x20) characters by a single space (#x20) character.
3979: * All attributes for which no declaration has been read should be treated
3980: * by a non-validating parser as if declared CDATA.
3981: *
3982: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3983: */
3984:
3985:
3986: xmlChar *
3987: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3988: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3989: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3990: }
3991:
3992: /**
3993: * xmlParseSystemLiteral:
3994: * @ctxt: an XML parser context
3995: *
3996: * parse an XML Literal
3997: *
3998: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3999: *
4000: * Returns the SystemLiteral parsed or NULL
4001: */
4002:
4003: xmlChar *
4004: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005: xmlChar *buf = NULL;
4006: int len = 0;
4007: int size = XML_PARSER_BUFFER_SIZE;
4008: int cur, l;
4009: xmlChar stop;
4010: int state = ctxt->instate;
4011: int count = 0;
4012:
4013: SHRINK;
4014: if (RAW == '"') {
4015: NEXT;
4016: stop = '"';
4017: } else if (RAW == '\'') {
4018: NEXT;
4019: stop = '\'';
4020: } else {
4021: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4022: return(NULL);
4023: }
4024:
4025: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4026: if (buf == NULL) {
4027: xmlErrMemory(ctxt, NULL);
4028: return(NULL);
4029: }
4030: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4031: cur = CUR_CHAR(l);
4032: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4033: if (len + 5 >= size) {
4034: xmlChar *tmp;
4035:
4036: size *= 2;
4037: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4038: if (tmp == NULL) {
4039: xmlFree(buf);
4040: xmlErrMemory(ctxt, NULL);
4041: ctxt->instate = (xmlParserInputState) state;
4042: return(NULL);
4043: }
4044: buf = tmp;
4045: }
4046: count++;
4047: if (count > 50) {
4048: GROW;
4049: count = 0;
4050: }
4051: COPY_BUF(l,buf,len,cur);
4052: NEXTL(l);
4053: cur = CUR_CHAR(l);
4054: if (cur == 0) {
4055: GROW;
4056: SHRINK;
4057: cur = CUR_CHAR(l);
4058: }
4059: }
4060: buf[len] = 0;
4061: ctxt->instate = (xmlParserInputState) state;
4062: if (!IS_CHAR(cur)) {
4063: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064: } else {
4065: NEXT;
4066: }
4067: return(buf);
4068: }
4069:
4070: /**
4071: * xmlParsePubidLiteral:
4072: * @ctxt: an XML parser context
4073: *
4074: * parse an XML public literal
4075: *
4076: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4077: *
4078: * Returns the PubidLiteral parsed or NULL.
4079: */
4080:
4081: xmlChar *
4082: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083: xmlChar *buf = NULL;
4084: int len = 0;
4085: int size = XML_PARSER_BUFFER_SIZE;
4086: xmlChar cur;
4087: xmlChar stop;
4088: int count = 0;
4089: xmlParserInputState oldstate = ctxt->instate;
4090:
4091: SHRINK;
4092: if (RAW == '"') {
4093: NEXT;
4094: stop = '"';
4095: } else if (RAW == '\'') {
4096: NEXT;
4097: stop = '\'';
4098: } else {
4099: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4100: return(NULL);
4101: }
4102: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4103: if (buf == NULL) {
4104: xmlErrMemory(ctxt, NULL);
4105: return(NULL);
4106: }
4107: ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4108: cur = CUR;
4109: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4110: if (len + 1 >= size) {
4111: xmlChar *tmp;
4112:
4113: size *= 2;
4114: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4115: if (tmp == NULL) {
4116: xmlErrMemory(ctxt, NULL);
4117: xmlFree(buf);
4118: return(NULL);
4119: }
4120: buf = tmp;
4121: }
4122: buf[len++] = cur;
4123: count++;
4124: if (count > 50) {
4125: GROW;
4126: count = 0;
4127: }
4128: NEXT;
4129: cur = CUR;
4130: if (cur == 0) {
4131: GROW;
4132: SHRINK;
4133: cur = CUR;
4134: }
4135: }
4136: buf[len] = 0;
4137: if (cur != stop) {
4138: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4139: } else {
4140: NEXT;
4141: }
4142: ctxt->instate = oldstate;
4143: return(buf);
4144: }
4145:
4146: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4147:
4148: /*
4149: * used for the test in the inner loop of the char data testing
4150: */
4151: static const unsigned char test_char_data[256] = {
4152: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4184: };
4185:
4186: /**
4187: * xmlParseCharData:
4188: * @ctxt: an XML parser context
4189: * @cdata: int indicating whether we are within a CDATA section
4190: *
4191: * parse a CharData section.
4192: * if we are within a CDATA section ']]>' marks an end of section.
4193: *
4194: * The right angle bracket (>) may be represented using the string ">",
4195: * and must, for compatibility, be escaped using ">" or a character
4196: * reference when it appears in the string "]]>" in content, when that
4197: * string is not marking the end of a CDATA section.
4198: *
4199: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4200: */
4201:
4202: void
4203: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4204: const xmlChar *in;
4205: int nbchar = 0;
4206: int line = ctxt->input->line;
4207: int col = ctxt->input->col;
4208: int ccol;
4209:
4210: SHRINK;
4211: GROW;
4212: /*
4213: * Accelerated common case where input don't need to be
4214: * modified before passing it to the handler.
4215: */
4216: if (!cdata) {
4217: in = ctxt->input->cur;
4218: do {
4219: get_more_space:
4220: while (*in == 0x20) { in++; ctxt->input->col++; }
4221: if (*in == 0xA) {
4222: do {
4223: ctxt->input->line++; ctxt->input->col = 1;
4224: in++;
4225: } while (*in == 0xA);
4226: goto get_more_space;
4227: }
4228: if (*in == '<') {
4229: nbchar = in - ctxt->input->cur;
4230: if (nbchar > 0) {
4231: const xmlChar *tmp = ctxt->input->cur;
4232: ctxt->input->cur = in;
4233:
4234: if ((ctxt->sax != NULL) &&
4235: (ctxt->sax->ignorableWhitespace !=
4236: ctxt->sax->characters)) {
4237: if (areBlanks(ctxt, tmp, nbchar, 1)) {
4238: if (ctxt->sax->ignorableWhitespace != NULL)
4239: ctxt->sax->ignorableWhitespace(ctxt->userData,
4240: tmp, nbchar);
4241: } else {
4242: if (ctxt->sax->characters != NULL)
4243: ctxt->sax->characters(ctxt->userData,
4244: tmp, nbchar);
4245: if (*ctxt->space == -1)
4246: *ctxt->space = -2;
4247: }
4248: } else if ((ctxt->sax != NULL) &&
4249: (ctxt->sax->characters != NULL)) {
4250: ctxt->sax->characters(ctxt->userData,
4251: tmp, nbchar);
4252: }
4253: }
4254: return;
4255: }
4256:
4257: get_more:
4258: ccol = ctxt->input->col;
4259: while (test_char_data[*in]) {
4260: in++;
4261: ccol++;
4262: }
4263: ctxt->input->col = ccol;
4264: if (*in == 0xA) {
4265: do {
4266: ctxt->input->line++; ctxt->input->col = 1;
4267: in++;
4268: } while (*in == 0xA);
4269: goto get_more;
4270: }
4271: if (*in == ']') {
4272: if ((in[1] == ']') && (in[2] == '>')) {
4273: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4274: ctxt->input->cur = in;
4275: return;
4276: }
4277: in++;
4278: ctxt->input->col++;
4279: goto get_more;
4280: }
4281: nbchar = in - ctxt->input->cur;
4282: if (nbchar > 0) {
4283: if ((ctxt->sax != NULL) &&
4284: (ctxt->sax->ignorableWhitespace !=
4285: ctxt->sax->characters) &&
4286: (IS_BLANK_CH(*ctxt->input->cur))) {
4287: const xmlChar *tmp = ctxt->input->cur;
4288: ctxt->input->cur = in;
4289:
4290: if (areBlanks(ctxt, tmp, nbchar, 0)) {
4291: if (ctxt->sax->ignorableWhitespace != NULL)
4292: ctxt->sax->ignorableWhitespace(ctxt->userData,
4293: tmp, nbchar);
4294: } else {
4295: if (ctxt->sax->characters != NULL)
4296: ctxt->sax->characters(ctxt->userData,
4297: tmp, nbchar);
4298: if (*ctxt->space == -1)
4299: *ctxt->space = -2;
4300: }
4301: line = ctxt->input->line;
4302: col = ctxt->input->col;
4303: } else if (ctxt->sax != NULL) {
4304: if (ctxt->sax->characters != NULL)
4305: ctxt->sax->characters(ctxt->userData,
4306: ctxt->input->cur, nbchar);
4307: line = ctxt->input->line;
4308: col = ctxt->input->col;
4309: }
4310: /* something really bad happened in the SAX callback */
4311: if (ctxt->instate != XML_PARSER_CONTENT)
4312: return;
4313: }
4314: ctxt->input->cur = in;
4315: if (*in == 0xD) {
4316: in++;
4317: if (*in == 0xA) {
4318: ctxt->input->cur = in;
4319: in++;
4320: ctxt->input->line++; ctxt->input->col = 1;
4321: continue; /* while */
4322: }
4323: in--;
4324: }
4325: if (*in == '<') {
4326: return;
4327: }
4328: if (*in == '&') {
4329: return;
4330: }
4331: SHRINK;
4332: GROW;
4333: in = ctxt->input->cur;
4334: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4335: nbchar = 0;
4336: }
4337: ctxt->input->line = line;
4338: ctxt->input->col = col;
4339: xmlParseCharDataComplex(ctxt, cdata);
4340: }
4341:
4342: /**
4343: * xmlParseCharDataComplex:
4344: * @ctxt: an XML parser context
4345: * @cdata: int indicating whether we are within a CDATA section
4346: *
4347: * parse a CharData section.this is the fallback function
4348: * of xmlParseCharData() when the parsing requires handling
4349: * of non-ASCII characters.
4350: */
4351: static void
4352: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4353: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4354: int nbchar = 0;
4355: int cur, l;
4356: int count = 0;
4357:
4358: SHRINK;
4359: GROW;
4360: cur = CUR_CHAR(l);
4361: while ((cur != '<') && /* checked */
4362: (cur != '&') &&
4363: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4364: if ((cur == ']') && (NXT(1) == ']') &&
4365: (NXT(2) == '>')) {
4366: if (cdata) break;
4367: else {
4368: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4369: }
4370: }
4371: COPY_BUF(l,buf,nbchar,cur);
4372: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4373: buf[nbchar] = 0;
4374:
4375: /*
4376: * OK the segment is to be consumed as chars.
4377: */
4378: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4379: if (areBlanks(ctxt, buf, nbchar, 0)) {
4380: if (ctxt->sax->ignorableWhitespace != NULL)
4381: ctxt->sax->ignorableWhitespace(ctxt->userData,
4382: buf, nbchar);
4383: } else {
4384: if (ctxt->sax->characters != NULL)
4385: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4386: if ((ctxt->sax->characters !=
4387: ctxt->sax->ignorableWhitespace) &&
4388: (*ctxt->space == -1))
4389: *ctxt->space = -2;
4390: }
4391: }
4392: nbchar = 0;
4393: /* something really bad happened in the SAX callback */
4394: if (ctxt->instate != XML_PARSER_CONTENT)
4395: return;
4396: }
4397: count++;
4398: if (count > 50) {
4399: GROW;
4400: count = 0;
4401: }
4402: NEXTL(l);
4403: cur = CUR_CHAR(l);
4404: }
4405: if (nbchar != 0) {
4406: buf[nbchar] = 0;
4407: /*
4408: * OK the segment is to be consumed as chars.
4409: */
4410: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4411: if (areBlanks(ctxt, buf, nbchar, 0)) {
4412: if (ctxt->sax->ignorableWhitespace != NULL)
4413: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4414: } else {
4415: if (ctxt->sax->characters != NULL)
4416: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4417: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418: (*ctxt->space == -1))
4419: *ctxt->space = -2;
4420: }
4421: }
4422: }
4423: if ((cur != 0) && (!IS_CHAR(cur))) {
4424: /* Generate the error and skip the offending character */
4425: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426: "PCDATA invalid Char value %d\n",
4427: cur);
4428: NEXTL(l);
4429: }
4430: }
4431:
4432: /**
4433: * xmlParseExternalID:
4434: * @ctxt: an XML parser context
4435: * @publicID: a xmlChar** receiving PubidLiteral
4436: * @strict: indicate whether we should restrict parsing to only
4437: * production [75], see NOTE below
4438: *
4439: * Parse an External ID or a Public ID
4440: *
4441: * NOTE: Productions [75] and [83] interact badly since [75] can generate
4442: * 'PUBLIC' S PubidLiteral S SystemLiteral
4443: *
4444: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445: * | 'PUBLIC' S PubidLiteral S SystemLiteral
4446: *
4447: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4448: *
4449: * Returns the function returns SystemLiteral and in the second
4450: * case publicID receives PubidLiteral, is strict is off
4451: * it is possible to return NULL and have publicID set.
4452: */
4453:
4454: xmlChar *
4455: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456: xmlChar *URI = NULL;
4457:
4458: SHRINK;
4459:
4460: *publicID = NULL;
4461: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4462: SKIP(6);
4463: if (!IS_BLANK_CH(CUR)) {
4464: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465: "Space required after 'SYSTEM'\n");
4466: }
4467: SKIP_BLANKS;
4468: URI = xmlParseSystemLiteral(ctxt);
4469: if (URI == NULL) {
4470: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4471: }
4472: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4473: SKIP(6);
4474: if (!IS_BLANK_CH(CUR)) {
4475: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476: "Space required after 'PUBLIC'\n");
4477: }
4478: SKIP_BLANKS;
4479: *publicID = xmlParsePubidLiteral(ctxt);
4480: if (*publicID == NULL) {
4481: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4482: }
4483: if (strict) {
4484: /*
4485: * We don't handle [83] so "S SystemLiteral" is required.
4486: */
4487: if (!IS_BLANK_CH(CUR)) {
4488: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489: "Space required after the Public Identifier\n");
4490: }
4491: } else {
4492: /*
4493: * We handle [83] so we return immediately, if
4494: * "S SystemLiteral" is not detected. From a purely parsing
4495: * point of view that's a nice mess.
4496: */
4497: const xmlChar *ptr;
4498: GROW;
4499:
4500: ptr = CUR_PTR;
4501: if (!IS_BLANK_CH(*ptr)) return(NULL);
4502:
4503: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4504: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4505: }
4506: SKIP_BLANKS;
4507: URI = xmlParseSystemLiteral(ctxt);
4508: if (URI == NULL) {
4509: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4510: }
4511: }
4512: return(URI);
4513: }
4514:
4515: /**
4516: * xmlParseCommentComplex:
4517: * @ctxt: an XML parser context
4518: * @buf: the already parsed part of the buffer
4519: * @len: number of bytes filles in the buffer
4520: * @size: allocated size of the buffer
4521: *
4522: * Skip an XML (SGML) comment <!-- .... -->
4523: * The spec says that "For compatibility, the string "--" (double-hyphen)
4524: * must not occur within comments. "
4525: * This is the slow routine in case the accelerator for ascii didn't work
4526: *
4527: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4528: */
4529: static void
4530: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4531: int q, ql;
4532: int r, rl;
4533: int cur, l;
4534: int count = 0;
4535: int inputid;
4536:
4537: inputid = ctxt->input->id;
4538:
4539: if (buf == NULL) {
4540: len = 0;
4541: size = XML_PARSER_BUFFER_SIZE;
4542: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4543: if (buf == NULL) {
4544: xmlErrMemory(ctxt, NULL);
4545: return;
4546: }
4547: }
4548: GROW; /* Assure there's enough input data */
4549: q = CUR_CHAR(ql);
4550: if (q == 0)
4551: goto not_terminated;
4552: if (!IS_CHAR(q)) {
4553: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554: "xmlParseComment: invalid xmlChar value %d\n",
4555: q);
4556: xmlFree (buf);
4557: return;
4558: }
4559: NEXTL(ql);
4560: r = CUR_CHAR(rl);
4561: if (r == 0)
4562: goto not_terminated;
4563: if (!IS_CHAR(r)) {
4564: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565: "xmlParseComment: invalid xmlChar value %d\n",
4566: q);
4567: xmlFree (buf);
4568: return;
4569: }
4570: NEXTL(rl);
4571: cur = CUR_CHAR(l);
4572: if (cur == 0)
4573: goto not_terminated;
4574: while (IS_CHAR(cur) && /* checked */
4575: ((cur != '>') ||
4576: (r != '-') || (q != '-'))) {
4577: if ((r == '-') && (q == '-')) {
4578: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4579: }
4580: if (len + 5 >= size) {
4581: xmlChar *new_buf;
4582: size *= 2;
4583: new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584: if (new_buf == NULL) {
4585: xmlFree (buf);
4586: xmlErrMemory(ctxt, NULL);
4587: return;
4588: }
4589: buf = new_buf;
4590: }
4591: COPY_BUF(ql,buf,len,q);
4592: q = r;
4593: ql = rl;
4594: r = cur;
4595: rl = l;
4596:
4597: count++;
4598: if (count > 50) {
4599: GROW;
4600: count = 0;
4601: }
4602: NEXTL(l);
4603: cur = CUR_CHAR(l);
4604: if (cur == 0) {
4605: SHRINK;
4606: GROW;
4607: cur = CUR_CHAR(l);
4608: }
4609: }
4610: buf[len] = 0;
4611: if (cur == 0) {
4612: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613: "Comment not terminated \n<!--%.50s\n", buf);
4614: } else if (!IS_CHAR(cur)) {
4615: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616: "xmlParseComment: invalid xmlChar value %d\n",
4617: cur);
4618: } else {
4619: if (inputid != ctxt->input->id) {
4620: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621: "Comment doesn't start and stop in the same entity\n");
4622: }
4623: NEXT;
4624: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625: (!ctxt->disableSAX))
4626: ctxt->sax->comment(ctxt->userData, buf);
4627: }
4628: xmlFree(buf);
4629: return;
4630: not_terminated:
4631: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632: "Comment not terminated\n", NULL);
4633: xmlFree(buf);
4634: return;
4635: }
4636:
4637: /**
4638: * xmlParseComment:
4639: * @ctxt: an XML parser context
4640: *
4641: * Skip an XML (SGML) comment <!-- .... -->
4642: * The spec says that "For compatibility, the string "--" (double-hyphen)
4643: * must not occur within comments. "
4644: *
4645: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4646: */
4647: void
4648: xmlParseComment(xmlParserCtxtPtr ctxt) {
4649: xmlChar *buf = NULL;
4650: int size = XML_PARSER_BUFFER_SIZE;
4651: int len = 0;
4652: xmlParserInputState state;
4653: const xmlChar *in;
4654: int nbchar = 0, ccol;
4655: int inputid;
4656:
4657: /*
4658: * Check that there is a comment right here.
4659: */
4660: if ((RAW != '<') || (NXT(1) != '!') ||
4661: (NXT(2) != '-') || (NXT(3) != '-')) return;
4662: state = ctxt->instate;
4663: ctxt->instate = XML_PARSER_COMMENT;
4664: inputid = ctxt->input->id;
4665: SKIP(4);
4666: SHRINK;
4667: GROW;
4668:
4669: /*
4670: * Accelerated common case where input don't need to be
4671: * modified before passing it to the handler.
4672: */
4673: in = ctxt->input->cur;
4674: do {
4675: if (*in == 0xA) {
4676: do {
4677: ctxt->input->line++; ctxt->input->col = 1;
4678: in++;
4679: } while (*in == 0xA);
4680: }
4681: get_more:
4682: ccol = ctxt->input->col;
4683: while (((*in > '-') && (*in <= 0x7F)) ||
4684: ((*in >= 0x20) && (*in < '-')) ||
4685: (*in == 0x09)) {
4686: in++;
4687: ccol++;
4688: }
4689: ctxt->input->col = ccol;
4690: if (*in == 0xA) {
4691: do {
4692: ctxt->input->line++; ctxt->input->col = 1;
4693: in++;
4694: } while (*in == 0xA);
4695: goto get_more;
4696: }
4697: nbchar = in - ctxt->input->cur;
4698: /*
4699: * save current set of data
4700: */
4701: if (nbchar > 0) {
4702: if ((ctxt->sax != NULL) &&
4703: (ctxt->sax->comment != NULL)) {
4704: if (buf == NULL) {
4705: if ((*in == '-') && (in[1] == '-'))
4706: size = nbchar + 1;
4707: else
4708: size = XML_PARSER_BUFFER_SIZE + nbchar;
4709: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4710: if (buf == NULL) {
4711: xmlErrMemory(ctxt, NULL);
4712: ctxt->instate = state;
4713: return;
4714: }
4715: len = 0;
4716: } else if (len + nbchar + 1 >= size) {
4717: xmlChar *new_buf;
4718: size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719: new_buf = (xmlChar *) xmlRealloc(buf,
4720: size * sizeof(xmlChar));
4721: if (new_buf == NULL) {
4722: xmlFree (buf);
4723: xmlErrMemory(ctxt, NULL);
4724: ctxt->instate = state;
4725: return;
4726: }
4727: buf = new_buf;
4728: }
4729: memcpy(&buf[len], ctxt->input->cur, nbchar);
4730: len += nbchar;
4731: buf[len] = 0;
4732: }
4733: }
4734: ctxt->input->cur = in;
4735: if (*in == 0xA) {
4736: in++;
4737: ctxt->input->line++; ctxt->input->col = 1;
4738: }
4739: if (*in == 0xD) {
4740: in++;
4741: if (*in == 0xA) {
4742: ctxt->input->cur = in;
4743: in++;
4744: ctxt->input->line++; ctxt->input->col = 1;
4745: continue; /* while */
4746: }
4747: in--;
4748: }
4749: SHRINK;
4750: GROW;
4751: in = ctxt->input->cur;
4752: if (*in == '-') {
4753: if (in[1] == '-') {
4754: if (in[2] == '>') {
4755: if (ctxt->input->id != inputid) {
4756: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757: "comment doesn't start and stop in the same entity\n");
4758: }
4759: SKIP(3);
4760: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761: (!ctxt->disableSAX)) {
4762: if (buf != NULL)
4763: ctxt->sax->comment(ctxt->userData, buf);
4764: else
4765: ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4766: }
4767: if (buf != NULL)
4768: xmlFree(buf);
4769: ctxt->instate = state;
4770: return;
4771: }
4772: if (buf != NULL)
4773: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774: "Comment not terminated \n<!--%.50s\n",
4775: buf);
4776: else
4777: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778: "Comment not terminated \n", NULL);
4779: in++;
4780: ctxt->input->col++;
4781: }
4782: in++;
4783: ctxt->input->col++;
4784: goto get_more;
4785: }
4786: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787: xmlParseCommentComplex(ctxt, buf, len, size);
4788: ctxt->instate = state;
4789: return;
4790: }
4791:
4792:
4793: /**
4794: * xmlParsePITarget:
4795: * @ctxt: an XML parser context
4796: *
4797: * parse the name of a PI
4798: *
4799: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4800: *
4801: * Returns the PITarget name or NULL
4802: */
4803:
4804: const xmlChar *
4805: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4806: const xmlChar *name;
4807:
4808: name = xmlParseName(ctxt);
4809: if ((name != NULL) &&
4810: ((name[0] == 'x') || (name[0] == 'X')) &&
4811: ((name[1] == 'm') || (name[1] == 'M')) &&
4812: ((name[2] == 'l') || (name[2] == 'L'))) {
4813: int i;
4814: if ((name[0] == 'x') && (name[1] == 'm') &&
4815: (name[2] == 'l') && (name[3] == 0)) {
4816: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4817: "XML declaration allowed only at the start of the document\n");
4818: return(name);
4819: } else if (name[3] == 0) {
4820: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4821: return(name);
4822: }
4823: for (i = 0;;i++) {
4824: if (xmlW3CPIs[i] == NULL) break;
4825: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4826: return(name);
4827: }
4828: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829: "xmlParsePITarget: invalid name prefix 'xml'\n",
4830: NULL, NULL);
4831: }
4832: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833: xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834: "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4835: }
4836: return(name);
4837: }
4838:
4839: #ifdef LIBXML_CATALOG_ENABLED
4840: /**
4841: * xmlParseCatalogPI:
4842: * @ctxt: an XML parser context
4843: * @catalog: the PI value string
4844: *
4845: * parse an XML Catalog Processing Instruction.
4846: *
4847: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4848: *
4849: * Occurs only if allowed by the user and if happening in the Misc
4850: * part of the document before any doctype informations
4851: * This will add the given catalog to the parsing context in order
4852: * to be used if there is a resolution need further down in the document
4853: */
4854:
4855: static void
4856: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857: xmlChar *URL = NULL;
4858: const xmlChar *tmp, *base;
4859: xmlChar marker;
4860:
4861: tmp = catalog;
4862: while (IS_BLANK_CH(*tmp)) tmp++;
4863: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4864: goto error;
4865: tmp += 7;
4866: while (IS_BLANK_CH(*tmp)) tmp++;
4867: if (*tmp != '=') {
4868: return;
4869: }
4870: tmp++;
4871: while (IS_BLANK_CH(*tmp)) tmp++;
4872: marker = *tmp;
4873: if ((marker != '\'') && (marker != '"'))
4874: goto error;
4875: tmp++;
4876: base = tmp;
4877: while ((*tmp != 0) && (*tmp != marker)) tmp++;
4878: if (*tmp == 0)
4879: goto error;
4880: URL = xmlStrndup(base, tmp - base);
4881: tmp++;
4882: while (IS_BLANK_CH(*tmp)) tmp++;
4883: if (*tmp != 0)
4884: goto error;
4885:
4886: if (URL != NULL) {
4887: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4888: xmlFree(URL);
4889: }
4890: return;
4891:
4892: error:
4893: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894: "Catalog PI syntax error: %s\n",
4895: catalog, NULL);
4896: if (URL != NULL)
4897: xmlFree(URL);
4898: }
4899: #endif
4900:
4901: /**
4902: * xmlParsePI:
4903: * @ctxt: an XML parser context
4904: *
4905: * parse an XML Processing Instruction.
4906: *
4907: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4908: *
4909: * The processing is transfered to SAX once parsed.
4910: */
4911:
4912: void
4913: xmlParsePI(xmlParserCtxtPtr ctxt) {
4914: xmlChar *buf = NULL;
4915: int len = 0;
4916: int size = XML_PARSER_BUFFER_SIZE;
4917: int cur, l;
4918: const xmlChar *target;
4919: xmlParserInputState state;
4920: int count = 0;
4921:
4922: if ((RAW == '<') && (NXT(1) == '?')) {
4923: xmlParserInputPtr input = ctxt->input;
4924: state = ctxt->instate;
4925: ctxt->instate = XML_PARSER_PI;
4926: /*
4927: * this is a Processing Instruction.
4928: */
4929: SKIP(2);
4930: SHRINK;
4931:
4932: /*
4933: * Parse the target name and check for special support like
4934: * namespace.
4935: */
4936: target = xmlParsePITarget(ctxt);
4937: if (target != NULL) {
4938: if ((RAW == '?') && (NXT(1) == '>')) {
4939: if (input != ctxt->input) {
4940: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941: "PI declaration doesn't start and stop in the same entity\n");
4942: }
4943: SKIP(2);
4944:
4945: /*
4946: * SAX: PI detected.
4947: */
4948: if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949: (ctxt->sax->processingInstruction != NULL))
4950: ctxt->sax->processingInstruction(ctxt->userData,
4951: target, NULL);
4952: ctxt->instate = state;
4953: return;
4954: }
4955: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4956: if (buf == NULL) {
4957: xmlErrMemory(ctxt, NULL);
4958: ctxt->instate = state;
4959: return;
4960: }
4961: cur = CUR;
4962: if (!IS_BLANK(cur)) {
4963: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964: "ParsePI: PI %s space expected\n", target);
4965: }
4966: SKIP_BLANKS;
4967: cur = CUR_CHAR(l);
4968: while (IS_CHAR(cur) && /* checked */
4969: ((cur != '?') || (NXT(1) != '>'))) {
4970: if (len + 5 >= size) {
4971: xmlChar *tmp;
4972:
4973: size *= 2;
4974: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4975: if (tmp == NULL) {
4976: xmlErrMemory(ctxt, NULL);
4977: xmlFree(buf);
4978: ctxt->instate = state;
4979: return;
4980: }
4981: buf = tmp;
4982: }
4983: count++;
4984: if (count > 50) {
4985: GROW;
4986: count = 0;
4987: }
4988: COPY_BUF(l,buf,len,cur);
4989: NEXTL(l);
4990: cur = CUR_CHAR(l);
4991: if (cur == 0) {
4992: SHRINK;
4993: GROW;
4994: cur = CUR_CHAR(l);
4995: }
4996: }
4997: buf[len] = 0;
4998: if (cur != '?') {
4999: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000: "ParsePI: PI %s never end ...\n", target);
5001: } else {
5002: if (input != ctxt->input) {
5003: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004: "PI declaration doesn't start and stop in the same entity\n");
5005: }
5006: SKIP(2);
5007:
5008: #ifdef LIBXML_CATALOG_ENABLED
5009: if (((state == XML_PARSER_MISC) ||
5010: (state == XML_PARSER_START)) &&
5011: (xmlStrEqual(target, XML_CATALOG_PI))) {
5012: xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013: if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014: (allow == XML_CATA_ALLOW_ALL))
5015: xmlParseCatalogPI(ctxt, buf);
5016: }
5017: #endif
5018:
5019:
5020: /*
5021: * SAX: PI detected.
5022: */
5023: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024: (ctxt->sax->processingInstruction != NULL))
5025: ctxt->sax->processingInstruction(ctxt->userData,
5026: target, buf);
5027: }
5028: xmlFree(buf);
5029: } else {
5030: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5031: }
5032: ctxt->instate = state;
5033: }
5034: }
5035:
5036: /**
5037: * xmlParseNotationDecl:
5038: * @ctxt: an XML parser context
5039: *
5040: * parse a notation declaration
5041: *
5042: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5043: *
5044: * Hence there is actually 3 choices:
5045: * 'PUBLIC' S PubidLiteral
5046: * 'PUBLIC' S PubidLiteral S SystemLiteral
5047: * and 'SYSTEM' S SystemLiteral
5048: *
5049: * See the NOTE on xmlParseExternalID().
5050: */
5051:
5052: void
5053: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5054: const xmlChar *name;
5055: xmlChar *Pubid;
5056: xmlChar *Systemid;
5057:
5058: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5059: xmlParserInputPtr input = ctxt->input;
5060: SHRINK;
5061: SKIP(10);
5062: if (!IS_BLANK_CH(CUR)) {
5063: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064: "Space required after '<!NOTATION'\n");
5065: return;
5066: }
5067: SKIP_BLANKS;
5068:
5069: name = xmlParseName(ctxt);
5070: if (name == NULL) {
5071: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5072: return;
5073: }
5074: if (!IS_BLANK_CH(CUR)) {
5075: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076: "Space required after the NOTATION name'\n");
5077: return;
5078: }
5079: if (xmlStrchr(name, ':') != NULL) {
5080: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081: "colon are forbidden from notation names '%s'\n",
5082: name, NULL, NULL);
5083: }
5084: SKIP_BLANKS;
5085:
5086: /*
5087: * Parse the IDs.
5088: */
5089: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5090: SKIP_BLANKS;
5091:
5092: if (RAW == '>') {
5093: if (input != ctxt->input) {
5094: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095: "Notation declaration doesn't start and stop in the same entity\n");
5096: }
5097: NEXT;
5098: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099: (ctxt->sax->notationDecl != NULL))
5100: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5101: } else {
5102: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5103: }
5104: if (Systemid != NULL) xmlFree(Systemid);
5105: if (Pubid != NULL) xmlFree(Pubid);
5106: }
5107: }
5108:
5109: /**
5110: * xmlParseEntityDecl:
5111: * @ctxt: an XML parser context
5112: *
5113: * parse <!ENTITY declarations
5114: *
5115: * [70] EntityDecl ::= GEDecl | PEDecl
5116: *
5117: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5118: *
5119: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5120: *
5121: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5122: *
5123: * [74] PEDef ::= EntityValue | ExternalID
5124: *
5125: * [76] NDataDecl ::= S 'NDATA' S Name
5126: *
5127: * [ VC: Notation Declared ]
5128: * The Name must match the declared name of a notation.
5129: */
5130:
5131: void
5132: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5133: const xmlChar *name = NULL;
5134: xmlChar *value = NULL;
5135: xmlChar *URI = NULL, *literal = NULL;
5136: const xmlChar *ndata = NULL;
5137: int isParameter = 0;
5138: xmlChar *orig = NULL;
5139: int skipped;
5140:
5141: /* GROW; done in the caller */
5142: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5143: xmlParserInputPtr input = ctxt->input;
5144: SHRINK;
5145: SKIP(8);
5146: skipped = SKIP_BLANKS;
5147: if (skipped == 0) {
5148: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149: "Space required after '<!ENTITY'\n");
5150: }
5151:
5152: if (RAW == '%') {
5153: NEXT;
5154: skipped = SKIP_BLANKS;
5155: if (skipped == 0) {
5156: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157: "Space required after '%'\n");
5158: }
5159: isParameter = 1;
5160: }
5161:
5162: name = xmlParseName(ctxt);
5163: if (name == NULL) {
5164: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165: "xmlParseEntityDecl: no name\n");
5166: return;
5167: }
5168: if (xmlStrchr(name, ':') != NULL) {
5169: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170: "colon are forbidden from entities names '%s'\n",
5171: name, NULL, NULL);
5172: }
5173: skipped = SKIP_BLANKS;
5174: if (skipped == 0) {
5175: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176: "Space required after the entity name\n");
5177: }
5178:
5179: ctxt->instate = XML_PARSER_ENTITY_DECL;
5180: /*
5181: * handle the various case of definitions...
5182: */
5183: if (isParameter) {
5184: if ((RAW == '"') || (RAW == '\'')) {
5185: value = xmlParseEntityValue(ctxt, &orig);
5186: if (value) {
5187: if ((ctxt->sax != NULL) &&
5188: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189: ctxt->sax->entityDecl(ctxt->userData, name,
5190: XML_INTERNAL_PARAMETER_ENTITY,
5191: NULL, NULL, value);
5192: }
5193: } else {
5194: URI = xmlParseExternalID(ctxt, &literal, 1);
5195: if ((URI == NULL) && (literal == NULL)) {
5196: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5197: }
5198: if (URI) {
5199: xmlURIPtr uri;
5200:
5201: uri = xmlParseURI((const char *) URI);
5202: if (uri == NULL) {
5203: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204: "Invalid URI: %s\n", URI);
5205: /*
5206: * This really ought to be a well formedness error
5207: * but the XML Core WG decided otherwise c.f. issue
5208: * E26 of the XML erratas.
5209: */
5210: } else {
5211: if (uri->fragment != NULL) {
5212: /*
5213: * Okay this is foolish to block those but not
5214: * invalid URIs.
5215: */
5216: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5217: } else {
5218: if ((ctxt->sax != NULL) &&
5219: (!ctxt->disableSAX) &&
5220: (ctxt->sax->entityDecl != NULL))
5221: ctxt->sax->entityDecl(ctxt->userData, name,
5222: XML_EXTERNAL_PARAMETER_ENTITY,
5223: literal, URI, NULL);
5224: }
5225: xmlFreeURI(uri);
5226: }
5227: }
5228: }
5229: } else {
5230: if ((RAW == '"') || (RAW == '\'')) {
5231: value = xmlParseEntityValue(ctxt, &orig);
5232: if ((ctxt->sax != NULL) &&
5233: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234: ctxt->sax->entityDecl(ctxt->userData, name,
5235: XML_INTERNAL_GENERAL_ENTITY,
5236: NULL, NULL, value);
5237: /*
5238: * For expat compatibility in SAX mode.
5239: */
5240: if ((ctxt->myDoc == NULL) ||
5241: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242: if (ctxt->myDoc == NULL) {
5243: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5244: if (ctxt->myDoc == NULL) {
5245: xmlErrMemory(ctxt, "New Doc failed");
5246: return;
5247: }
5248: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5249: }
5250: if (ctxt->myDoc->intSubset == NULL)
5251: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252: BAD_CAST "fake", NULL, NULL);
5253:
5254: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5255: NULL, NULL, value);
5256: }
5257: } else {
5258: URI = xmlParseExternalID(ctxt, &literal, 1);
5259: if ((URI == NULL) && (literal == NULL)) {
5260: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5261: }
5262: if (URI) {
5263: xmlURIPtr uri;
5264:
5265: uri = xmlParseURI((const char *)URI);
5266: if (uri == NULL) {
5267: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268: "Invalid URI: %s\n", URI);
5269: /*
5270: * This really ought to be a well formedness error
5271: * but the XML Core WG decided otherwise c.f. issue
5272: * E26 of the XML erratas.
5273: */
5274: } else {
5275: if (uri->fragment != NULL) {
5276: /*
5277: * Okay this is foolish to block those but not
5278: * invalid URIs.
5279: */
5280: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5281: }
5282: xmlFreeURI(uri);
5283: }
5284: }
5285: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5286: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287: "Space required before 'NDATA'\n");
5288: }
5289: SKIP_BLANKS;
5290: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5291: SKIP(5);
5292: if (!IS_BLANK_CH(CUR)) {
5293: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294: "Space required after 'NDATA'\n");
5295: }
5296: SKIP_BLANKS;
5297: ndata = xmlParseName(ctxt);
5298: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299: (ctxt->sax->unparsedEntityDecl != NULL))
5300: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301: literal, URI, ndata);
5302: } else {
5303: if ((ctxt->sax != NULL) &&
5304: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305: ctxt->sax->entityDecl(ctxt->userData, name,
5306: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307: literal, URI, NULL);
5308: /*
5309: * For expat compatibility in SAX mode.
5310: * assuming the entity repalcement was asked for
5311: */
5312: if ((ctxt->replaceEntities != 0) &&
5313: ((ctxt->myDoc == NULL) ||
5314: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315: if (ctxt->myDoc == NULL) {
5316: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5317: if (ctxt->myDoc == NULL) {
5318: xmlErrMemory(ctxt, "New Doc failed");
5319: return;
5320: }
5321: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5322: }
5323:
5324: if (ctxt->myDoc->intSubset == NULL)
5325: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326: BAD_CAST "fake", NULL, NULL);
5327: xmlSAX2EntityDecl(ctxt, name,
5328: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329: literal, URI, NULL);
5330: }
5331: }
5332: }
5333: }
5334: SKIP_BLANKS;
5335: if (RAW != '>') {
5336: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5337: "xmlParseEntityDecl: entity %s not terminated\n", name);
5338: } else {
5339: if (input != ctxt->input) {
5340: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341: "Entity declaration doesn't start and stop in the same entity\n");
5342: }
5343: NEXT;
5344: }
5345: if (orig != NULL) {
5346: /*
5347: * Ugly mechanism to save the raw entity value.
5348: */
5349: xmlEntityPtr cur = NULL;
5350:
5351: if (isParameter) {
5352: if ((ctxt->sax != NULL) &&
5353: (ctxt->sax->getParameterEntity != NULL))
5354: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5355: } else {
5356: if ((ctxt->sax != NULL) &&
5357: (ctxt->sax->getEntity != NULL))
5358: cur = ctxt->sax->getEntity(ctxt->userData, name);
5359: if ((cur == NULL) && (ctxt->userData==ctxt)) {
5360: cur = xmlSAX2GetEntity(ctxt, name);
5361: }
5362: }
5363: if (cur != NULL) {
5364: if (cur->orig != NULL)
5365: xmlFree(orig);
5366: else
5367: cur->orig = orig;
5368: } else
5369: xmlFree(orig);
5370: }
5371: if (value != NULL) xmlFree(value);
5372: if (URI != NULL) xmlFree(URI);
5373: if (literal != NULL) xmlFree(literal);
5374: }
5375: }
5376:
5377: /**
5378: * xmlParseDefaultDecl:
5379: * @ctxt: an XML parser context
5380: * @value: Receive a possible fixed default value for the attribute
5381: *
5382: * Parse an attribute default declaration
5383: *
5384: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5385: *
5386: * [ VC: Required Attribute ]
5387: * if the default declaration is the keyword #REQUIRED, then the
5388: * attribute must be specified for all elements of the type in the
5389: * attribute-list declaration.
5390: *
5391: * [ VC: Attribute Default Legal ]
5392: * The declared default value must meet the lexical constraints of
5393: * the declared attribute type c.f. xmlValidateAttributeDecl()
5394: *
5395: * [ VC: Fixed Attribute Default ]
5396: * if an attribute has a default value declared with the #FIXED
5397: * keyword, instances of that attribute must match the default value.
5398: *
5399: * [ WFC: No < in Attribute Values ]
5400: * handled in xmlParseAttValue()
5401: *
5402: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403: * or XML_ATTRIBUTE_FIXED.
5404: */
5405:
5406: int
5407: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5408: int val;
5409: xmlChar *ret;
5410:
5411: *value = NULL;
5412: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5413: SKIP(9);
5414: return(XML_ATTRIBUTE_REQUIRED);
5415: }
5416: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5417: SKIP(8);
5418: return(XML_ATTRIBUTE_IMPLIED);
5419: }
5420: val = XML_ATTRIBUTE_NONE;
5421: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5422: SKIP(6);
5423: val = XML_ATTRIBUTE_FIXED;
5424: if (!IS_BLANK_CH(CUR)) {
5425: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426: "Space required after '#FIXED'\n");
5427: }
5428: SKIP_BLANKS;
5429: }
5430: ret = xmlParseAttValue(ctxt);
5431: ctxt->instate = XML_PARSER_DTD;
5432: if (ret == NULL) {
5433: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5434: "Attribute default value declaration error\n");
5435: } else
5436: *value = ret;
5437: return(val);
5438: }
5439:
5440: /**
5441: * xmlParseNotationType:
5442: * @ctxt: an XML parser context
5443: *
5444: * parse an Notation attribute type.
5445: *
5446: * Note: the leading 'NOTATION' S part has already being parsed...
5447: *
5448: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5449: *
5450: * [ VC: Notation Attributes ]
5451: * Values of this type must match one of the notation names included
5452: * in the declaration; all notation names in the declaration must be declared.
5453: *
5454: * Returns: the notation attribute tree built while parsing
5455: */
5456:
5457: xmlEnumerationPtr
5458: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5459: const xmlChar *name;
5460: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5461:
5462: if (RAW != '(') {
5463: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5464: return(NULL);
5465: }
5466: SHRINK;
5467: do {
5468: NEXT;
5469: SKIP_BLANKS;
5470: name = xmlParseName(ctxt);
5471: if (name == NULL) {
5472: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473: "Name expected in NOTATION declaration\n");
5474: xmlFreeEnumeration(ret);
5475: return(NULL);
5476: }
5477: tmp = ret;
5478: while (tmp != NULL) {
5479: if (xmlStrEqual(name, tmp->name)) {
5480: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481: "standalone: attribute notation value token %s duplicated\n",
5482: name, NULL);
5483: if (!xmlDictOwns(ctxt->dict, name))
5484: xmlFree((xmlChar *) name);
5485: break;
5486: }
5487: tmp = tmp->next;
5488: }
5489: if (tmp == NULL) {
5490: cur = xmlCreateEnumeration(name);
5491: if (cur == NULL) {
5492: xmlFreeEnumeration(ret);
5493: return(NULL);
5494: }
5495: if (last == NULL) ret = last = cur;
5496: else {
5497: last->next = cur;
5498: last = cur;
5499: }
5500: }
5501: SKIP_BLANKS;
5502: } while (RAW == '|');
5503: if (RAW != ')') {
5504: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5505: xmlFreeEnumeration(ret);
5506: return(NULL);
5507: }
5508: NEXT;
5509: return(ret);
5510: }
5511:
5512: /**
5513: * xmlParseEnumerationType:
5514: * @ctxt: an XML parser context
5515: *
5516: * parse an Enumeration attribute type.
5517: *
5518: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5519: *
5520: * [ VC: Enumeration ]
5521: * Values of this type must match one of the Nmtoken tokens in
5522: * the declaration
5523: *
5524: * Returns: the enumeration attribute tree built while parsing
5525: */
5526:
5527: xmlEnumerationPtr
5528: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5529: xmlChar *name;
5530: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5531:
5532: if (RAW != '(') {
5533: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5534: return(NULL);
5535: }
5536: SHRINK;
5537: do {
5538: NEXT;
5539: SKIP_BLANKS;
5540: name = xmlParseNmtoken(ctxt);
5541: if (name == NULL) {
5542: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5543: return(ret);
5544: }
5545: tmp = ret;
5546: while (tmp != NULL) {
5547: if (xmlStrEqual(name, tmp->name)) {
5548: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549: "standalone: attribute enumeration value token %s duplicated\n",
5550: name, NULL);
5551: if (!xmlDictOwns(ctxt->dict, name))
5552: xmlFree(name);
5553: break;
5554: }
5555: tmp = tmp->next;
5556: }
5557: if (tmp == NULL) {
5558: cur = xmlCreateEnumeration(name);
5559: if (!xmlDictOwns(ctxt->dict, name))
5560: xmlFree(name);
5561: if (cur == NULL) {
5562: xmlFreeEnumeration(ret);
5563: return(NULL);
5564: }
5565: if (last == NULL) ret = last = cur;
5566: else {
5567: last->next = cur;
5568: last = cur;
5569: }
5570: }
5571: SKIP_BLANKS;
5572: } while (RAW == '|');
5573: if (RAW != ')') {
5574: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5575: return(ret);
5576: }
5577: NEXT;
5578: return(ret);
5579: }
5580:
5581: /**
5582: * xmlParseEnumeratedType:
5583: * @ctxt: an XML parser context
5584: * @tree: the enumeration tree built while parsing
5585: *
5586: * parse an Enumerated attribute type.
5587: *
5588: * [57] EnumeratedType ::= NotationType | Enumeration
5589: *
5590: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5591: *
5592: *
5593: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5594: */
5595:
5596: int
5597: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5598: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5599: SKIP(8);
5600: if (!IS_BLANK_CH(CUR)) {
5601: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602: "Space required after 'NOTATION'\n");
5603: return(0);
5604: }
5605: SKIP_BLANKS;
5606: *tree = xmlParseNotationType(ctxt);
5607: if (*tree == NULL) return(0);
5608: return(XML_ATTRIBUTE_NOTATION);
5609: }
5610: *tree = xmlParseEnumerationType(ctxt);
5611: if (*tree == NULL) return(0);
5612: return(XML_ATTRIBUTE_ENUMERATION);
5613: }
5614:
5615: /**
5616: * xmlParseAttributeType:
5617: * @ctxt: an XML parser context
5618: * @tree: the enumeration tree built while parsing
5619: *
5620: * parse the Attribute list def for an element
5621: *
5622: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5623: *
5624: * [55] StringType ::= 'CDATA'
5625: *
5626: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5628: *
5629: * Validity constraints for attribute values syntax are checked in
5630: * xmlValidateAttributeValue()
5631: *
5632: * [ VC: ID ]
5633: * Values of type ID must match the Name production. A name must not
5634: * appear more than once in an XML document as a value of this type;
5635: * i.e., ID values must uniquely identify the elements which bear them.
5636: *
5637: * [ VC: One ID per Element Type ]
5638: * No element type may have more than one ID attribute specified.
5639: *
5640: * [ VC: ID Attribute Default ]
5641: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5642: *
5643: * [ VC: IDREF ]
5644: * Values of type IDREF must match the Name production, and values
5645: * of type IDREFS must match Names; each IDREF Name must match the value
5646: * of an ID attribute on some element in the XML document; i.e. IDREF
5647: * values must match the value of some ID attribute.
5648: *
5649: * [ VC: Entity Name ]
5650: * Values of type ENTITY must match the Name production, values
5651: * of type ENTITIES must match Names; each Entity Name must match the
5652: * name of an unparsed entity declared in the DTD.
5653: *
5654: * [ VC: Name Token ]
5655: * Values of type NMTOKEN must match the Nmtoken production; values
5656: * of type NMTOKENS must match Nmtokens.
5657: *
5658: * Returns the attribute type
5659: */
5660: int
5661: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5662: SHRINK;
5663: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5664: SKIP(5);
5665: return(XML_ATTRIBUTE_CDATA);
5666: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5667: SKIP(6);
5668: return(XML_ATTRIBUTE_IDREFS);
5669: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5670: SKIP(5);
5671: return(XML_ATTRIBUTE_IDREF);
5672: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5673: SKIP(2);
5674: return(XML_ATTRIBUTE_ID);
5675: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5676: SKIP(6);
5677: return(XML_ATTRIBUTE_ENTITY);
5678: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5679: SKIP(8);
5680: return(XML_ATTRIBUTE_ENTITIES);
5681: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5682: SKIP(8);
5683: return(XML_ATTRIBUTE_NMTOKENS);
5684: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5685: SKIP(7);
5686: return(XML_ATTRIBUTE_NMTOKEN);
5687: }
5688: return(xmlParseEnumeratedType(ctxt, tree));
5689: }
5690:
5691: /**
5692: * xmlParseAttributeListDecl:
5693: * @ctxt: an XML parser context
5694: *
5695: * : parse the Attribute list def for an element
5696: *
5697: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5698: *
5699: * [53] AttDef ::= S Name S AttType S DefaultDecl
5700: *
5701: */
5702: void
5703: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5704: const xmlChar *elemName;
5705: const xmlChar *attrName;
5706: xmlEnumerationPtr tree;
5707:
5708: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5709: xmlParserInputPtr input = ctxt->input;
5710:
5711: SKIP(9);
5712: if (!IS_BLANK_CH(CUR)) {
5713: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714: "Space required after '<!ATTLIST'\n");
5715: }
5716: SKIP_BLANKS;
5717: elemName = xmlParseName(ctxt);
5718: if (elemName == NULL) {
5719: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720: "ATTLIST: no name for Element\n");
5721: return;
5722: }
5723: SKIP_BLANKS;
5724: GROW;
5725: while (RAW != '>') {
5726: const xmlChar *check = CUR_PTR;
5727: int type;
5728: int def;
5729: xmlChar *defaultValue = NULL;
5730:
5731: GROW;
5732: tree = NULL;
5733: attrName = xmlParseName(ctxt);
5734: if (attrName == NULL) {
5735: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736: "ATTLIST: no name for Attribute\n");
5737: break;
5738: }
5739: GROW;
5740: if (!IS_BLANK_CH(CUR)) {
5741: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5742: "Space required after the attribute name\n");
5743: break;
5744: }
5745: SKIP_BLANKS;
5746:
5747: type = xmlParseAttributeType(ctxt, &tree);
5748: if (type <= 0) {
5749: break;
5750: }
5751:
5752: GROW;
5753: if (!IS_BLANK_CH(CUR)) {
5754: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755: "Space required after the attribute type\n");
5756: if (tree != NULL)
5757: xmlFreeEnumeration(tree);
5758: break;
5759: }
5760: SKIP_BLANKS;
5761:
5762: def = xmlParseDefaultDecl(ctxt, &defaultValue);
5763: if (def <= 0) {
5764: if (defaultValue != NULL)
5765: xmlFree(defaultValue);
5766: if (tree != NULL)
5767: xmlFreeEnumeration(tree);
5768: break;
5769: }
5770: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771: xmlAttrNormalizeSpace(defaultValue, defaultValue);
5772:
5773: GROW;
5774: if (RAW != '>') {
5775: if (!IS_BLANK_CH(CUR)) {
5776: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777: "Space required after the attribute default value\n");
5778: if (defaultValue != NULL)
5779: xmlFree(defaultValue);
5780: if (tree != NULL)
5781: xmlFreeEnumeration(tree);
5782: break;
5783: }
5784: SKIP_BLANKS;
5785: }
5786: if (check == CUR_PTR) {
5787: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788: "in xmlParseAttributeListDecl\n");
5789: if (defaultValue != NULL)
5790: xmlFree(defaultValue);
5791: if (tree != NULL)
5792: xmlFreeEnumeration(tree);
5793: break;
5794: }
5795: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796: (ctxt->sax->attributeDecl != NULL))
5797: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798: type, def, defaultValue, tree);
5799: else if (tree != NULL)
5800: xmlFreeEnumeration(tree);
5801:
5802: if ((ctxt->sax2) && (defaultValue != NULL) &&
5803: (def != XML_ATTRIBUTE_IMPLIED) &&
5804: (def != XML_ATTRIBUTE_REQUIRED)) {
5805: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5806: }
5807: if (ctxt->sax2) {
5808: xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5809: }
5810: if (defaultValue != NULL)
5811: xmlFree(defaultValue);
5812: GROW;
5813: }
5814: if (RAW == '>') {
5815: if (input != ctxt->input) {
5816: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817: "Attribute list declaration doesn't start and stop in the same entity\n",
5818: NULL, NULL);
5819: }
5820: NEXT;
5821: }
5822: }
5823: }
5824:
5825: /**
5826: * xmlParseElementMixedContentDecl:
5827: * @ctxt: an XML parser context
5828: * @inputchk: the input used for the current entity, needed for boundary checks
5829: *
5830: * parse the declaration for a Mixed Element content
5831: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5832: *
5833: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834: * '(' S? '#PCDATA' S? ')'
5835: *
5836: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5837: *
5838: * [ VC: No Duplicate Types ]
5839: * The same name must not appear more than once in a single
5840: * mixed-content declaration.
5841: *
5842: * returns: the list of the xmlElementContentPtr describing the element choices
5843: */
5844: xmlElementContentPtr
5845: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5846: xmlElementContentPtr ret = NULL, cur = NULL, n;
5847: const xmlChar *elem = NULL;
5848:
5849: GROW;
5850: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5851: SKIP(7);
5852: SKIP_BLANKS;
5853: SHRINK;
5854: if (RAW == ')') {
5855: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5856: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857: "Element content declaration doesn't start and stop in the same entity\n",
5858: NULL, NULL);
5859: }
5860: NEXT;
5861: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5862: if (ret == NULL)
5863: return(NULL);
5864: if (RAW == '*') {
5865: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5866: NEXT;
5867: }
5868: return(ret);
5869: }
5870: if ((RAW == '(') || (RAW == '|')) {
5871: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5872: if (ret == NULL) return(NULL);
5873: }
5874: while (RAW == '|') {
5875: NEXT;
5876: if (elem == NULL) {
5877: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5878: if (ret == NULL) return(NULL);
5879: ret->c1 = cur;
5880: if (cur != NULL)
5881: cur->parent = ret;
5882: cur = ret;
5883: } else {
5884: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5885: if (n == NULL) return(NULL);
5886: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5887: if (n->c1 != NULL)
5888: n->c1->parent = n;
5889: cur->c2 = n;
5890: if (n != NULL)
5891: n->parent = cur;
5892: cur = n;
5893: }
5894: SKIP_BLANKS;
5895: elem = xmlParseName(ctxt);
5896: if (elem == NULL) {
5897: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5898: "xmlParseElementMixedContentDecl : Name expected\n");
5899: xmlFreeDocElementContent(ctxt->myDoc, cur);
5900: return(NULL);
5901: }
5902: SKIP_BLANKS;
5903: GROW;
5904: }
5905: if ((RAW == ')') && (NXT(1) == '*')) {
5906: if (elem != NULL) {
5907: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5908: XML_ELEMENT_CONTENT_ELEMENT);
5909: if (cur->c2 != NULL)
5910: cur->c2->parent = cur;
5911: }
5912: if (ret != NULL)
5913: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5914: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5915: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916: "Element content declaration doesn't start and stop in the same entity\n",
5917: NULL, NULL);
5918: }
5919: SKIP(2);
5920: } else {
5921: xmlFreeDocElementContent(ctxt->myDoc, ret);
5922: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5923: return(NULL);
5924: }
5925:
5926: } else {
5927: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5928: }
5929: return(ret);
5930: }
5931:
5932: /**
5933: * xmlParseElementChildrenContentDeclPriv:
5934: * @ctxt: an XML parser context
5935: * @inputchk: the input used for the current entity, needed for boundary checks
5936: * @depth: the level of recursion
5937: *
5938: * parse the declaration for a Mixed Element content
5939: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5940: *
5941: *
5942: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5943: *
5944: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5945: *
5946: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5947: *
5948: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5949: *
5950: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951: * TODO Parameter-entity replacement text must be properly nested
5952: * with parenthesized groups. That is to say, if either of the
5953: * opening or closing parentheses in a choice, seq, or Mixed
5954: * construct is contained in the replacement text for a parameter
5955: * entity, both must be contained in the same replacement text. For
5956: * interoperability, if a parameter-entity reference appears in a
5957: * choice, seq, or Mixed construct, its replacement text should not
5958: * be empty, and neither the first nor last non-blank character of
5959: * the replacement text should be a connector (| or ,).
5960: *
5961: * Returns the tree of xmlElementContentPtr describing the element
5962: * hierarchy.
5963: */
5964: static xmlElementContentPtr
5965: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5966: int depth) {
5967: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5968: const xmlChar *elem;
5969: xmlChar type = 0;
5970:
5971: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5972: (depth > 2048)) {
5973: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5975: depth);
5976: return(NULL);
5977: }
5978: SKIP_BLANKS;
5979: GROW;
5980: if (RAW == '(') {
5981: int inputid = ctxt->input->id;
5982:
5983: /* Recurse on first child */
5984: NEXT;
5985: SKIP_BLANKS;
5986: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5987: depth + 1);
5988: SKIP_BLANKS;
5989: GROW;
5990: } else {
5991: elem = xmlParseName(ctxt);
5992: if (elem == NULL) {
5993: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5994: return(NULL);
5995: }
5996: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5997: if (cur == NULL) {
5998: xmlErrMemory(ctxt, NULL);
5999: return(NULL);
6000: }
6001: GROW;
6002: if (RAW == '?') {
6003: cur->ocur = XML_ELEMENT_CONTENT_OPT;
6004: NEXT;
6005: } else if (RAW == '*') {
6006: cur->ocur = XML_ELEMENT_CONTENT_MULT;
6007: NEXT;
6008: } else if (RAW == '+') {
6009: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6010: NEXT;
6011: } else {
6012: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6013: }
6014: GROW;
6015: }
6016: SKIP_BLANKS;
6017: SHRINK;
6018: while (RAW != ')') {
6019: /*
6020: * Each loop we parse one separator and one element.
6021: */
6022: if (RAW == ',') {
6023: if (type == 0) type = CUR;
6024:
6025: /*
6026: * Detect "Name | Name , Name" error
6027: */
6028: else if (type != CUR) {
6029: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6030: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6031: type);
6032: if ((last != NULL) && (last != ret))
6033: xmlFreeDocElementContent(ctxt->myDoc, last);
6034: if (ret != NULL)
6035: xmlFreeDocElementContent(ctxt->myDoc, ret);
6036: return(NULL);
6037: }
6038: NEXT;
6039:
6040: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6041: if (op == NULL) {
6042: if ((last != NULL) && (last != ret))
6043: xmlFreeDocElementContent(ctxt->myDoc, last);
6044: xmlFreeDocElementContent(ctxt->myDoc, ret);
6045: return(NULL);
6046: }
6047: if (last == NULL) {
6048: op->c1 = ret;
6049: if (ret != NULL)
6050: ret->parent = op;
6051: ret = cur = op;
6052: } else {
6053: cur->c2 = op;
6054: if (op != NULL)
6055: op->parent = cur;
6056: op->c1 = last;
6057: if (last != NULL)
6058: last->parent = op;
6059: cur =op;
6060: last = NULL;
6061: }
6062: } else if (RAW == '|') {
6063: if (type == 0) type = CUR;
6064:
6065: /*
6066: * Detect "Name , Name | Name" error
6067: */
6068: else if (type != CUR) {
6069: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6071: type);
6072: if ((last != NULL) && (last != ret))
6073: xmlFreeDocElementContent(ctxt->myDoc, last);
6074: if (ret != NULL)
6075: xmlFreeDocElementContent(ctxt->myDoc, ret);
6076: return(NULL);
6077: }
6078: NEXT;
6079:
6080: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6081: if (op == NULL) {
6082: if ((last != NULL) && (last != ret))
6083: xmlFreeDocElementContent(ctxt->myDoc, last);
6084: if (ret != NULL)
6085: xmlFreeDocElementContent(ctxt->myDoc, ret);
6086: return(NULL);
6087: }
6088: if (last == NULL) {
6089: op->c1 = ret;
6090: if (ret != NULL)
6091: ret->parent = op;
6092: ret = cur = op;
6093: } else {
6094: cur->c2 = op;
6095: if (op != NULL)
6096: op->parent = cur;
6097: op->c1 = last;
6098: if (last != NULL)
6099: last->parent = op;
6100: cur =op;
6101: last = NULL;
6102: }
6103: } else {
6104: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6105: if ((last != NULL) && (last != ret))
6106: xmlFreeDocElementContent(ctxt->myDoc, last);
6107: if (ret != NULL)
6108: xmlFreeDocElementContent(ctxt->myDoc, ret);
6109: return(NULL);
6110: }
6111: GROW;
6112: SKIP_BLANKS;
6113: GROW;
6114: if (RAW == '(') {
6115: int inputid = ctxt->input->id;
6116: /* Recurse on second child */
6117: NEXT;
6118: SKIP_BLANKS;
6119: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6120: depth + 1);
6121: SKIP_BLANKS;
6122: } else {
6123: elem = xmlParseName(ctxt);
6124: if (elem == NULL) {
6125: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6126: if (ret != NULL)
6127: xmlFreeDocElementContent(ctxt->myDoc, ret);
6128: return(NULL);
6129: }
6130: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6131: if (last == NULL) {
6132: if (ret != NULL)
6133: xmlFreeDocElementContent(ctxt->myDoc, ret);
6134: return(NULL);
6135: }
6136: if (RAW == '?') {
6137: last->ocur = XML_ELEMENT_CONTENT_OPT;
6138: NEXT;
6139: } else if (RAW == '*') {
6140: last->ocur = XML_ELEMENT_CONTENT_MULT;
6141: NEXT;
6142: } else if (RAW == '+') {
6143: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6144: NEXT;
6145: } else {
6146: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6147: }
6148: }
6149: SKIP_BLANKS;
6150: GROW;
6151: }
6152: if ((cur != NULL) && (last != NULL)) {
6153: cur->c2 = last;
6154: if (last != NULL)
6155: last->parent = cur;
6156: }
6157: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6158: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159: "Element content declaration doesn't start and stop in the same entity\n",
6160: NULL, NULL);
6161: }
6162: NEXT;
6163: if (RAW == '?') {
6164: if (ret != NULL) {
6165: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168: else
6169: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6170: }
6171: NEXT;
6172: } else if (RAW == '*') {
6173: if (ret != NULL) {
6174: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6175: cur = ret;
6176: /*
6177: * Some normalization:
6178: * (a | b* | c?)* == (a | b | c)*
6179: */
6180: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6181: if ((cur->c1 != NULL) &&
6182: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185: if ((cur->c2 != NULL) &&
6186: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6189: cur = cur->c2;
6190: }
6191: }
6192: NEXT;
6193: } else if (RAW == '+') {
6194: if (ret != NULL) {
6195: int found = 0;
6196:
6197: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6200: else
6201: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6202: /*
6203: * Some normalization:
6204: * (a | b*)+ == (a | b)*
6205: * (a | b?)+ == (a | b)*
6206: */
6207: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6208: if ((cur->c1 != NULL) &&
6209: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212: found = 1;
6213: }
6214: if ((cur->c2 != NULL) &&
6215: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6218: found = 1;
6219: }
6220: cur = cur->c2;
6221: }
6222: if (found)
6223: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6224: }
6225: NEXT;
6226: }
6227: return(ret);
6228: }
6229:
6230: /**
6231: * xmlParseElementChildrenContentDecl:
6232: * @ctxt: an XML parser context
6233: * @inputchk: the input used for the current entity, needed for boundary checks
6234: *
6235: * parse the declaration for a Mixed Element content
6236: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6237: *
6238: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6239: *
6240: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6241: *
6242: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6243: *
6244: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6245: *
6246: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247: * TODO Parameter-entity replacement text must be properly nested
6248: * with parenthesized groups. That is to say, if either of the
6249: * opening or closing parentheses in a choice, seq, or Mixed
6250: * construct is contained in the replacement text for a parameter
6251: * entity, both must be contained in the same replacement text. For
6252: * interoperability, if a parameter-entity reference appears in a
6253: * choice, seq, or Mixed construct, its replacement text should not
6254: * be empty, and neither the first nor last non-blank character of
6255: * the replacement text should be a connector (| or ,).
6256: *
6257: * Returns the tree of xmlElementContentPtr describing the element
6258: * hierarchy.
6259: */
6260: xmlElementContentPtr
6261: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262: /* stub left for API/ABI compat */
6263: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6264: }
6265:
6266: /**
6267: * xmlParseElementContentDecl:
6268: * @ctxt: an XML parser context
6269: * @name: the name of the element being defined.
6270: * @result: the Element Content pointer will be stored here if any
6271: *
6272: * parse the declaration for an Element content either Mixed or Children,
6273: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6274: *
6275: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6276: *
6277: * returns: the type of element content XML_ELEMENT_TYPE_xxx
6278: */
6279:
6280: int
6281: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6282: xmlElementContentPtr *result) {
6283:
6284: xmlElementContentPtr tree = NULL;
6285: int inputid = ctxt->input->id;
6286: int res;
6287:
6288: *result = NULL;
6289:
6290: if (RAW != '(') {
6291: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6292: "xmlParseElementContentDecl : %s '(' expected\n", name);
6293: return(-1);
6294: }
6295: NEXT;
6296: GROW;
6297: SKIP_BLANKS;
6298: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6299: tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6300: res = XML_ELEMENT_TYPE_MIXED;
6301: } else {
6302: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6303: res = XML_ELEMENT_TYPE_ELEMENT;
6304: }
6305: SKIP_BLANKS;
6306: *result = tree;
6307: return(res);
6308: }
6309:
6310: /**
6311: * xmlParseElementDecl:
6312: * @ctxt: an XML parser context
6313: *
6314: * parse an Element declaration.
6315: *
6316: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6317: *
6318: * [ VC: Unique Element Type Declaration ]
6319: * No element type may be declared more than once
6320: *
6321: * Returns the type of the element, or -1 in case of error
6322: */
6323: int
6324: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6325: const xmlChar *name;
6326: int ret = -1;
6327: xmlElementContentPtr content = NULL;
6328:
6329: /* GROW; done in the caller */
6330: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6331: xmlParserInputPtr input = ctxt->input;
6332:
6333: SKIP(9);
6334: if (!IS_BLANK_CH(CUR)) {
6335: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336: "Space required after 'ELEMENT'\n");
6337: }
6338: SKIP_BLANKS;
6339: name = xmlParseName(ctxt);
6340: if (name == NULL) {
6341: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342: "xmlParseElementDecl: no name for Element\n");
6343: return(-1);
6344: }
6345: while ((RAW == 0) && (ctxt->inputNr > 1))
6346: xmlPopInput(ctxt);
6347: if (!IS_BLANK_CH(CUR)) {
6348: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349: "Space required after the element name\n");
6350: }
6351: SKIP_BLANKS;
6352: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6353: SKIP(5);
6354: /*
6355: * Element must always be empty.
6356: */
6357: ret = XML_ELEMENT_TYPE_EMPTY;
6358: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6359: (NXT(2) == 'Y')) {
6360: SKIP(3);
6361: /*
6362: * Element is a generic container.
6363: */
6364: ret = XML_ELEMENT_TYPE_ANY;
6365: } else if (RAW == '(') {
6366: ret = xmlParseElementContentDecl(ctxt, name, &content);
6367: } else {
6368: /*
6369: * [ WFC: PEs in Internal Subset ] error handling.
6370: */
6371: if ((RAW == '%') && (ctxt->external == 0) &&
6372: (ctxt->inputNr == 1)) {
6373: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6374: "PEReference: forbidden within markup decl in internal subset\n");
6375: } else {
6376: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6377: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6378: }
6379: return(-1);
6380: }
6381:
6382: SKIP_BLANKS;
6383: /*
6384: * Pop-up of finished entities.
6385: */
6386: while ((RAW == 0) && (ctxt->inputNr > 1))
6387: xmlPopInput(ctxt);
6388: SKIP_BLANKS;
6389:
6390: if (RAW != '>') {
6391: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6392: if (content != NULL) {
6393: xmlFreeDocElementContent(ctxt->myDoc, content);
6394: }
6395: } else {
6396: if (input != ctxt->input) {
6397: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398: "Element declaration doesn't start and stop in the same entity\n");
6399: }
6400:
6401: NEXT;
6402: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6403: (ctxt->sax->elementDecl != NULL)) {
6404: if (content != NULL)
6405: content->parent = NULL;
6406: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6407: content);
6408: if ((content != NULL) && (content->parent == NULL)) {
6409: /*
6410: * this is a trick: if xmlAddElementDecl is called,
6411: * instead of copying the full tree it is plugged directly
6412: * if called from the parser. Avoid duplicating the
6413: * interfaces or change the API/ABI
6414: */
6415: xmlFreeDocElementContent(ctxt->myDoc, content);
6416: }
6417: } else if (content != NULL) {
6418: xmlFreeDocElementContent(ctxt->myDoc, content);
6419: }
6420: }
6421: }
6422: return(ret);
6423: }
6424:
6425: /**
6426: * xmlParseConditionalSections
6427: * @ctxt: an XML parser context
6428: *
6429: * [61] conditionalSect ::= includeSect | ignoreSect
6430: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6434: */
6435:
6436: static void
6437: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6438: int id = ctxt->input->id;
6439:
6440: SKIP(3);
6441: SKIP_BLANKS;
6442: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6443: SKIP(7);
6444: SKIP_BLANKS;
6445: if (RAW != '[') {
6446: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6447: } else {
6448: if (ctxt->input->id != id) {
6449: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450: "All markup of the conditional section is not in the same entity\n",
6451: NULL, NULL);
6452: }
6453: NEXT;
6454: }
6455: if (xmlParserDebugEntities) {
6456: if ((ctxt->input != NULL) && (ctxt->input->filename))
6457: xmlGenericError(xmlGenericErrorContext,
6458: "%s(%d): ", ctxt->input->filename,
6459: ctxt->input->line);
6460: xmlGenericError(xmlGenericErrorContext,
6461: "Entering INCLUDE Conditional Section\n");
6462: }
6463:
6464: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6465: (NXT(2) != '>'))) {
6466: const xmlChar *check = CUR_PTR;
6467: unsigned int cons = ctxt->input->consumed;
6468:
6469: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470: xmlParseConditionalSections(ctxt);
6471: } else if (IS_BLANK_CH(CUR)) {
6472: NEXT;
6473: } else if (RAW == '%') {
6474: xmlParsePEReference(ctxt);
6475: } else
6476: xmlParseMarkupDecl(ctxt);
6477:
6478: /*
6479: * Pop-up of finished entities.
6480: */
6481: while ((RAW == 0) && (ctxt->inputNr > 1))
6482: xmlPopInput(ctxt);
6483:
6484: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6485: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6486: break;
6487: }
6488: }
6489: if (xmlParserDebugEntities) {
6490: if ((ctxt->input != NULL) && (ctxt->input->filename))
6491: xmlGenericError(xmlGenericErrorContext,
6492: "%s(%d): ", ctxt->input->filename,
6493: ctxt->input->line);
6494: xmlGenericError(xmlGenericErrorContext,
6495: "Leaving INCLUDE Conditional Section\n");
6496: }
6497:
6498: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6499: int state;
6500: xmlParserInputState instate;
6501: int depth = 0;
6502:
6503: SKIP(6);
6504: SKIP_BLANKS;
6505: if (RAW != '[') {
6506: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6507: } else {
6508: if (ctxt->input->id != id) {
6509: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510: "All markup of the conditional section is not in the same entity\n",
6511: NULL, NULL);
6512: }
6513: NEXT;
6514: }
6515: if (xmlParserDebugEntities) {
6516: if ((ctxt->input != NULL) && (ctxt->input->filename))
6517: xmlGenericError(xmlGenericErrorContext,
6518: "%s(%d): ", ctxt->input->filename,
6519: ctxt->input->line);
6520: xmlGenericError(xmlGenericErrorContext,
6521: "Entering IGNORE Conditional Section\n");
6522: }
6523:
6524: /*
6525: * Parse up to the end of the conditional section
6526: * But disable SAX event generating DTD building in the meantime
6527: */
6528: state = ctxt->disableSAX;
6529: instate = ctxt->instate;
6530: if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6531: ctxt->instate = XML_PARSER_IGNORE;
6532:
6533: while ((depth >= 0) && (RAW != 0)) {
6534: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535: depth++;
6536: SKIP(3);
6537: continue;
6538: }
6539: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540: if (--depth >= 0) SKIP(3);
6541: continue;
6542: }
6543: NEXT;
6544: continue;
6545: }
6546:
6547: ctxt->disableSAX = state;
6548: ctxt->instate = instate;
6549:
6550: if (xmlParserDebugEntities) {
6551: if ((ctxt->input != NULL) && (ctxt->input->filename))
6552: xmlGenericError(xmlGenericErrorContext,
6553: "%s(%d): ", ctxt->input->filename,
6554: ctxt->input->line);
6555: xmlGenericError(xmlGenericErrorContext,
6556: "Leaving IGNORE Conditional Section\n");
6557: }
6558:
6559: } else {
6560: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6561: }
6562:
6563: if (RAW == 0)
6564: SHRINK;
6565:
6566: if (RAW == 0) {
6567: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6568: } else {
6569: if (ctxt->input->id != id) {
6570: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571: "All markup of the conditional section is not in the same entity\n",
6572: NULL, NULL);
6573: }
6574: SKIP(3);
6575: }
6576: }
6577:
6578: /**
6579: * xmlParseMarkupDecl:
6580: * @ctxt: an XML parser context
6581: *
6582: * parse Markup declarations
6583: *
6584: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585: * NotationDecl | PI | Comment
6586: *
6587: * [ VC: Proper Declaration/PE Nesting ]
6588: * Parameter-entity replacement text must be properly nested with
6589: * markup declarations. That is to say, if either the first character
6590: * or the last character of a markup declaration (markupdecl above) is
6591: * contained in the replacement text for a parameter-entity reference,
6592: * both must be contained in the same replacement text.
6593: *
6594: * [ WFC: PEs in Internal Subset ]
6595: * In the internal DTD subset, parameter-entity references can occur
6596: * only where markup declarations can occur, not within markup declarations.
6597: * (This does not apply to references that occur in external parameter
6598: * entities or to the external subset.)
6599: */
6600: void
6601: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6602: GROW;
6603: if (CUR == '<') {
6604: if (NXT(1) == '!') {
6605: switch (NXT(2)) {
6606: case 'E':
6607: if (NXT(3) == 'L')
6608: xmlParseElementDecl(ctxt);
6609: else if (NXT(3) == 'N')
6610: xmlParseEntityDecl(ctxt);
6611: break;
6612: case 'A':
6613: xmlParseAttributeListDecl(ctxt);
6614: break;
6615: case 'N':
6616: xmlParseNotationDecl(ctxt);
6617: break;
6618: case '-':
6619: xmlParseComment(ctxt);
6620: break;
6621: default:
6622: /* there is an error but it will be detected later */
6623: break;
6624: }
6625: } else if (NXT(1) == '?') {
6626: xmlParsePI(ctxt);
6627: }
6628: }
6629: /*
6630: * This is only for internal subset. On external entities,
6631: * the replacement is done before parsing stage
6632: */
6633: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634: xmlParsePEReference(ctxt);
6635:
6636: /*
6637: * Conditional sections are allowed from entities included
6638: * by PE References in the internal subset.
6639: */
6640: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642: xmlParseConditionalSections(ctxt);
6643: }
6644: }
6645:
6646: ctxt->instate = XML_PARSER_DTD;
6647: }
6648:
6649: /**
6650: * xmlParseTextDecl:
6651: * @ctxt: an XML parser context
6652: *
6653: * parse an XML declaration header for external entities
6654: *
6655: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6656: */
6657:
6658: void
6659: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6660: xmlChar *version;
6661: const xmlChar *encoding;
6662:
6663: /*
6664: * We know that '<?xml' is here.
6665: */
6666: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6667: SKIP(5);
6668: } else {
6669: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6670: return;
6671: }
6672:
6673: if (!IS_BLANK_CH(CUR)) {
6674: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675: "Space needed after '<?xml'\n");
6676: }
6677: SKIP_BLANKS;
6678:
6679: /*
6680: * We may have the VersionInfo here.
6681: */
6682: version = xmlParseVersionInfo(ctxt);
6683: if (version == NULL)
6684: version = xmlCharStrdup(XML_DEFAULT_VERSION);
6685: else {
6686: if (!IS_BLANK_CH(CUR)) {
6687: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688: "Space needed here\n");
6689: }
6690: }
6691: ctxt->input->version = version;
6692:
6693: /*
6694: * We must have the encoding declaration
6695: */
6696: encoding = xmlParseEncodingDecl(ctxt);
6697: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6698: /*
6699: * The XML REC instructs us to stop parsing right here
6700: */
6701: return;
6702: }
6703: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705: "Missing encoding in text declaration\n");
6706: }
6707:
6708: SKIP_BLANKS;
6709: if ((RAW == '?') && (NXT(1) == '>')) {
6710: SKIP(2);
6711: } else if (RAW == '>') {
6712: /* Deprecated old WD ... */
6713: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6714: NEXT;
6715: } else {
6716: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6717: MOVETO_ENDTAG(CUR_PTR);
6718: NEXT;
6719: }
6720: }
6721:
6722: /**
6723: * xmlParseExternalSubset:
6724: * @ctxt: an XML parser context
6725: * @ExternalID: the external identifier
6726: * @SystemID: the system identifier (or URL)
6727: *
6728: * parse Markup declarations from an external subset
6729: *
6730: * [30] extSubset ::= textDecl? extSubsetDecl
6731: *
6732: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6733: */
6734: void
6735: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736: const xmlChar *SystemID) {
6737: xmlDetectSAX2(ctxt);
6738: GROW;
6739:
6740: if ((ctxt->encoding == NULL) &&
6741: (ctxt->input->end - ctxt->input->cur >= 4)) {
6742: xmlChar start[4];
6743: xmlCharEncoding enc;
6744:
6745: start[0] = RAW;
6746: start[1] = NXT(1);
6747: start[2] = NXT(2);
6748: start[3] = NXT(3);
6749: enc = xmlDetectCharEncoding(start, 4);
6750: if (enc != XML_CHAR_ENCODING_NONE)
6751: xmlSwitchEncoding(ctxt, enc);
6752: }
6753:
6754: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6755: xmlParseTextDecl(ctxt);
6756: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6757: /*
6758: * The XML REC instructs us to stop parsing right here
6759: */
6760: ctxt->instate = XML_PARSER_EOF;
6761: return;
6762: }
6763: }
6764: if (ctxt->myDoc == NULL) {
6765: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6766: if (ctxt->myDoc == NULL) {
6767: xmlErrMemory(ctxt, "New Doc failed");
6768: return;
6769: }
6770: ctxt->myDoc->properties = XML_DOC_INTERNAL;
6771: }
6772: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6774:
6775: ctxt->instate = XML_PARSER_DTD;
6776: ctxt->external = 1;
6777: while (((RAW == '<') && (NXT(1) == '?')) ||
6778: ((RAW == '<') && (NXT(1) == '!')) ||
6779: (RAW == '%') || IS_BLANK_CH(CUR)) {
6780: const xmlChar *check = CUR_PTR;
6781: unsigned int cons = ctxt->input->consumed;
6782:
6783: GROW;
6784: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785: xmlParseConditionalSections(ctxt);
6786: } else if (IS_BLANK_CH(CUR)) {
6787: NEXT;
6788: } else if (RAW == '%') {
6789: xmlParsePEReference(ctxt);
6790: } else
6791: xmlParseMarkupDecl(ctxt);
6792:
6793: /*
6794: * Pop-up of finished entities.
6795: */
6796: while ((RAW == 0) && (ctxt->inputNr > 1))
6797: xmlPopInput(ctxt);
6798:
6799: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6800: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6801: break;
6802: }
6803: }
6804:
6805: if (RAW != 0) {
6806: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6807: }
6808:
6809: }
6810:
6811: /**
6812: * xmlParseReference:
6813: * @ctxt: an XML parser context
6814: *
6815: * parse and handle entity references in content, depending on the SAX
6816: * interface, this may end-up in a call to character() if this is a
6817: * CharRef, a predefined entity, if there is no reference() callback.
6818: * or if the parser was asked to switch to that mode.
6819: *
6820: * [67] Reference ::= EntityRef | CharRef
6821: */
6822: void
6823: xmlParseReference(xmlParserCtxtPtr ctxt) {
6824: xmlEntityPtr ent;
6825: xmlChar *val;
6826: int was_checked;
6827: xmlNodePtr list = NULL;
6828: xmlParserErrors ret = XML_ERR_OK;
6829:
6830:
6831: if (RAW != '&')
6832: return;
6833:
6834: /*
6835: * Simple case of a CharRef
6836: */
6837: if (NXT(1) == '#') {
6838: int i = 0;
6839: xmlChar out[10];
6840: int hex = NXT(2);
6841: int value = xmlParseCharRef(ctxt);
6842:
6843: if (value == 0)
6844: return;
6845: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6846: /*
6847: * So we are using non-UTF-8 buffers
6848: * Check that the char fit on 8bits, if not
6849: * generate a CharRef.
6850: */
6851: if (value <= 0xFF) {
6852: out[0] = value;
6853: out[1] = 0;
6854: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855: (!ctxt->disableSAX))
6856: ctxt->sax->characters(ctxt->userData, out, 1);
6857: } else {
6858: if ((hex == 'x') || (hex == 'X'))
6859: snprintf((char *)out, sizeof(out), "#x%X", value);
6860: else
6861: snprintf((char *)out, sizeof(out), "#%d", value);
6862: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863: (!ctxt->disableSAX))
6864: ctxt->sax->reference(ctxt->userData, out);
6865: }
6866: } else {
6867: /*
6868: * Just encode the value in UTF-8
6869: */
6870: COPY_BUF(0 ,out, i, value);
6871: out[i] = 0;
6872: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873: (!ctxt->disableSAX))
6874: ctxt->sax->characters(ctxt->userData, out, i);
6875: }
6876: return;
6877: }
6878:
6879: /*
6880: * We are seeing an entity reference
6881: */
6882: ent = xmlParseEntityRef(ctxt);
6883: if (ent == NULL) return;
6884: if (!ctxt->wellFormed)
6885: return;
6886: was_checked = ent->checked;
6887:
6888: /* special case of predefined entities */
6889: if ((ent->name == NULL) ||
6890: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6891: val = ent->content;
6892: if (val == NULL) return;
6893: /*
6894: * inline the entity.
6895: */
6896: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897: (!ctxt->disableSAX))
6898: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6899: return;
6900: }
6901:
6902: /*
6903: * The first reference to the entity trigger a parsing phase
6904: * where the ent->children is filled with the result from
6905: * the parsing.
6906: */
6907: if (ent->checked == 0) {
6908: unsigned long oldnbent = ctxt->nbentities;
6909:
6910: /*
6911: * This is a bit hackish but this seems the best
6912: * way to make sure both SAX and DOM entity support
6913: * behaves okay.
6914: */
6915: void *user_data;
6916: if (ctxt->userData == ctxt)
6917: user_data = NULL;
6918: else
6919: user_data = ctxt->userData;
6920:
6921: /*
6922: * Check that this entity is well formed
6923: * 4.3.2: An internal general parsed entity is well-formed
6924: * if its replacement text matches the production labeled
6925: * content.
6926: */
6927: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6928: ctxt->depth++;
6929: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6930: user_data, &list);
6931: ctxt->depth--;
6932:
6933: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6934: ctxt->depth++;
6935: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936: user_data, ctxt->depth, ent->URI,
6937: ent->ExternalID, &list);
6938: ctxt->depth--;
6939: } else {
6940: ret = XML_ERR_ENTITY_PE_INTERNAL;
6941: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942: "invalid entity type found\n", NULL);
6943: }
6944:
6945: /*
6946: * Store the number of entities needing parsing for this entity
6947: * content and do checkings
6948: */
6949: ent->checked = ctxt->nbentities - oldnbent;
6950: if (ret == XML_ERR_ENTITY_LOOP) {
6951: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6952: xmlFreeNodeList(list);
6953: return;
6954: }
6955: if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956: xmlFreeNodeList(list);
6957: return;
6958: }
6959:
6960: if ((ret == XML_ERR_OK) && (list != NULL)) {
6961: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963: (ent->children == NULL)) {
6964: ent->children = list;
6965: if (ctxt->replaceEntities) {
6966: /*
6967: * Prune it directly in the generated document
6968: * except for single text nodes.
6969: */
6970: if (((list->type == XML_TEXT_NODE) &&
6971: (list->next == NULL)) ||
6972: (ctxt->parseMode == XML_PARSE_READER)) {
6973: list->parent = (xmlNodePtr) ent;
6974: list = NULL;
6975: ent->owner = 1;
6976: } else {
6977: ent->owner = 0;
6978: while (list != NULL) {
6979: list->parent = (xmlNodePtr) ctxt->node;
6980: list->doc = ctxt->myDoc;
6981: if (list->next == NULL)
6982: ent->last = list;
6983: list = list->next;
6984: }
6985: list = ent->children;
6986: #ifdef LIBXML_LEGACY_ENABLED
6987: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988: xmlAddEntityReference(ent, list, NULL);
6989: #endif /* LIBXML_LEGACY_ENABLED */
6990: }
6991: } else {
6992: ent->owner = 1;
6993: while (list != NULL) {
6994: list->parent = (xmlNodePtr) ent;
6995: if (list->next == NULL)
6996: ent->last = list;
6997: list = list->next;
6998: }
6999: }
7000: } else {
7001: xmlFreeNodeList(list);
7002: list = NULL;
7003: }
7004: } else if ((ret != XML_ERR_OK) &&
7005: (ret != XML_WAR_UNDECLARED_ENTITY)) {
7006: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7007: "Entity '%s' failed to parse\n", ent->name);
7008: } else if (list != NULL) {
7009: xmlFreeNodeList(list);
7010: list = NULL;
7011: }
7012: if (ent->checked == 0)
7013: ent->checked = 1;
7014: } else if (ent->checked != 1) {
7015: ctxt->nbentities += ent->checked;
7016: }
7017:
7018: /*
7019: * Now that the entity content has been gathered
7020: * provide it to the application, this can take different forms based
7021: * on the parsing modes.
7022: */
7023: if (ent->children == NULL) {
7024: /*
7025: * Probably running in SAX mode and the callbacks don't
7026: * build the entity content. So unless we already went
7027: * though parsing for first checking go though the entity
7028: * content to generate callbacks associated to the entity
7029: */
7030: if (was_checked != 0) {
7031: void *user_data;
7032: /*
7033: * This is a bit hackish but this seems the best
7034: * way to make sure both SAX and DOM entity support
7035: * behaves okay.
7036: */
7037: if (ctxt->userData == ctxt)
7038: user_data = NULL;
7039: else
7040: user_data = ctxt->userData;
7041:
7042: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7043: ctxt->depth++;
7044: ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7045: ent->content, user_data, NULL);
7046: ctxt->depth--;
7047: } else if (ent->etype ==
7048: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7049: ctxt->depth++;
7050: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7051: ctxt->sax, user_data, ctxt->depth,
7052: ent->URI, ent->ExternalID, NULL);
7053: ctxt->depth--;
7054: } else {
7055: ret = XML_ERR_ENTITY_PE_INTERNAL;
7056: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7057: "invalid entity type found\n", NULL);
7058: }
7059: if (ret == XML_ERR_ENTITY_LOOP) {
7060: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7061: return;
7062: }
7063: }
7064: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7066: /*
7067: * Entity reference callback comes second, it's somewhat
7068: * superfluous but a compatibility to historical behaviour
7069: */
7070: ctxt->sax->reference(ctxt->userData, ent->name);
7071: }
7072: return;
7073: }
7074:
7075: /*
7076: * If we didn't get any children for the entity being built
7077: */
7078: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7079: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7080: /*
7081: * Create a node.
7082: */
7083: ctxt->sax->reference(ctxt->userData, ent->name);
7084: return;
7085: }
7086:
7087: if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7088: /*
7089: * There is a problem on the handling of _private for entities
7090: * (bug 155816): Should we copy the content of the field from
7091: * the entity (possibly overwriting some value set by the user
7092: * when a copy is created), should we leave it alone, or should
7093: * we try to take care of different situations? The problem
7094: * is exacerbated by the usage of this field by the xmlReader.
7095: * To fix this bug, we look at _private on the created node
7096: * and, if it's NULL, we copy in whatever was in the entity.
7097: * If it's not NULL we leave it alone. This is somewhat of a
7098: * hack - maybe we should have further tests to determine
7099: * what to do.
7100: */
7101: if ((ctxt->node != NULL) && (ent->children != NULL)) {
7102: /*
7103: * Seems we are generating the DOM content, do
7104: * a simple tree copy for all references except the first
7105: * In the first occurrence list contains the replacement.
7106: * progressive == 2 means we are operating on the Reader
7107: * and since nodes are discarded we must copy all the time.
7108: */
7109: if (((list == NULL) && (ent->owner == 0)) ||
7110: (ctxt->parseMode == XML_PARSE_READER)) {
7111: xmlNodePtr nw = NULL, cur, firstChild = NULL;
7112:
7113: /*
7114: * when operating on a reader, the entities definitions
7115: * are always owning the entities subtree.
7116: if (ctxt->parseMode == XML_PARSE_READER)
7117: ent->owner = 1;
7118: */
7119:
7120: cur = ent->children;
7121: while (cur != NULL) {
7122: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7123: if (nw != NULL) {
7124: if (nw->_private == NULL)
7125: nw->_private = cur->_private;
7126: if (firstChild == NULL){
7127: firstChild = nw;
7128: }
7129: nw = xmlAddChild(ctxt->node, nw);
7130: }
7131: if (cur == ent->last) {
7132: /*
7133: * needed to detect some strange empty
7134: * node cases in the reader tests
7135: */
7136: if ((ctxt->parseMode == XML_PARSE_READER) &&
7137: (nw != NULL) &&
7138: (nw->type == XML_ELEMENT_NODE) &&
7139: (nw->children == NULL))
7140: nw->extra = 1;
7141:
7142: break;
7143: }
7144: cur = cur->next;
7145: }
7146: #ifdef LIBXML_LEGACY_ENABLED
7147: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7148: xmlAddEntityReference(ent, firstChild, nw);
7149: #endif /* LIBXML_LEGACY_ENABLED */
7150: } else if (list == NULL) {
7151: xmlNodePtr nw = NULL, cur, next, last,
7152: firstChild = NULL;
7153: /*
7154: * Copy the entity child list and make it the new
7155: * entity child list. The goal is to make sure any
7156: * ID or REF referenced will be the one from the
7157: * document content and not the entity copy.
7158: */
7159: cur = ent->children;
7160: ent->children = NULL;
7161: last = ent->last;
7162: ent->last = NULL;
7163: while (cur != NULL) {
7164: next = cur->next;
7165: cur->next = NULL;
7166: cur->parent = NULL;
7167: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7168: if (nw != NULL) {
7169: if (nw->_private == NULL)
7170: nw->_private = cur->_private;
7171: if (firstChild == NULL){
7172: firstChild = cur;
7173: }
7174: xmlAddChild((xmlNodePtr) ent, nw);
7175: xmlAddChild(ctxt->node, cur);
7176: }
7177: if (cur == last)
7178: break;
7179: cur = next;
7180: }
7181: if (ent->owner == 0)
7182: ent->owner = 1;
7183: #ifdef LIBXML_LEGACY_ENABLED
7184: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7185: xmlAddEntityReference(ent, firstChild, nw);
7186: #endif /* LIBXML_LEGACY_ENABLED */
7187: } else {
7188: const xmlChar *nbktext;
7189:
7190: /*
7191: * the name change is to avoid coalescing of the
7192: * node with a possible previous text one which
7193: * would make ent->children a dangling pointer
7194: */
7195: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7196: -1);
7197: if (ent->children->type == XML_TEXT_NODE)
7198: ent->children->name = nbktext;
7199: if ((ent->last != ent->children) &&
7200: (ent->last->type == XML_TEXT_NODE))
7201: ent->last->name = nbktext;
7202: xmlAddChildList(ctxt->node, ent->children);
7203: }
7204:
7205: /*
7206: * This is to avoid a nasty side effect, see
7207: * characters() in SAX.c
7208: */
7209: ctxt->nodemem = 0;
7210: ctxt->nodelen = 0;
7211: return;
7212: }
7213: }
7214: }
7215:
7216: /**
7217: * xmlParseEntityRef:
7218: * @ctxt: an XML parser context
7219: *
7220: * parse ENTITY references declarations
7221: *
7222: * [68] EntityRef ::= '&' Name ';'
7223: *
7224: * [ WFC: Entity Declared ]
7225: * In a document without any DTD, a document with only an internal DTD
7226: * subset which contains no parameter entity references, or a document
7227: * with "standalone='yes'", the Name given in the entity reference
7228: * must match that in an entity declaration, except that well-formed
7229: * documents need not declare any of the following entities: amp, lt,
7230: * gt, apos, quot. The declaration of a parameter entity must precede
7231: * any reference to it. Similarly, the declaration of a general entity
7232: * must precede any reference to it which appears in a default value in an
7233: * attribute-list declaration. Note that if entities are declared in the
7234: * external subset or in external parameter entities, a non-validating
7235: * processor is not obligated to read and process their declarations;
7236: * for such documents, the rule that an entity must be declared is a
7237: * well-formedness constraint only if standalone='yes'.
7238: *
7239: * [ WFC: Parsed Entity ]
7240: * An entity reference must not contain the name of an unparsed entity
7241: *
7242: * Returns the xmlEntityPtr if found, or NULL otherwise.
7243: */
7244: xmlEntityPtr
7245: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7246: const xmlChar *name;
7247: xmlEntityPtr ent = NULL;
7248:
7249: GROW;
7250:
7251: if (RAW != '&')
7252: return(NULL);
7253: NEXT;
7254: name = xmlParseName(ctxt);
7255: if (name == NULL) {
7256: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7257: "xmlParseEntityRef: no name\n");
7258: return(NULL);
7259: }
7260: if (RAW != ';') {
7261: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7262: return(NULL);
7263: }
7264: NEXT;
7265:
7266: /*
7267: * Predefined entites override any extra definition
7268: */
7269: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7270: ent = xmlGetPredefinedEntity(name);
7271: if (ent != NULL)
7272: return(ent);
7273: }
7274:
7275: /*
7276: * Increate the number of entity references parsed
7277: */
7278: ctxt->nbentities++;
7279:
7280: /*
7281: * Ask first SAX for entity resolution, otherwise try the
7282: * entities which may have stored in the parser context.
7283: */
7284: if (ctxt->sax != NULL) {
7285: if (ctxt->sax->getEntity != NULL)
7286: ent = ctxt->sax->getEntity(ctxt->userData, name);
7287: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7288: (ctxt->options & XML_PARSE_OLDSAX))
7289: ent = xmlGetPredefinedEntity(name);
7290: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7291: (ctxt->userData==ctxt)) {
7292: ent = xmlSAX2GetEntity(ctxt, name);
7293: }
7294: }
7295: /*
7296: * [ WFC: Entity Declared ]
7297: * In a document without any DTD, a document with only an
7298: * internal DTD subset which contains no parameter entity
7299: * references, or a document with "standalone='yes'", the
7300: * Name given in the entity reference must match that in an
7301: * entity declaration, except that well-formed documents
7302: * need not declare any of the following entities: amp, lt,
7303: * gt, apos, quot.
7304: * The declaration of a parameter entity must precede any
7305: * reference to it.
7306: * Similarly, the declaration of a general entity must
7307: * precede any reference to it which appears in a default
7308: * value in an attribute-list declaration. Note that if
7309: * entities are declared in the external subset or in
7310: * external parameter entities, a non-validating processor
7311: * is not obligated to read and process their declarations;
7312: * for such documents, the rule that an entity must be
7313: * declared is a well-formedness constraint only if
7314: * standalone='yes'.
7315: */
7316: if (ent == NULL) {
7317: if ((ctxt->standalone == 1) ||
7318: ((ctxt->hasExternalSubset == 0) &&
7319: (ctxt->hasPErefs == 0))) {
7320: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7321: "Entity '%s' not defined\n", name);
7322: } else {
7323: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7324: "Entity '%s' not defined\n", name);
7325: if ((ctxt->inSubset == 0) &&
7326: (ctxt->sax != NULL) &&
7327: (ctxt->sax->reference != NULL)) {
7328: ctxt->sax->reference(ctxt->userData, name);
7329: }
7330: }
7331: ctxt->valid = 0;
7332: }
7333:
7334: /*
7335: * [ WFC: Parsed Entity ]
7336: * An entity reference must not contain the name of an
7337: * unparsed entity
7338: */
7339: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7340: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7341: "Entity reference to unparsed entity %s\n", name);
7342: }
7343:
7344: /*
7345: * [ WFC: No External Entity References ]
7346: * Attribute values cannot contain direct or indirect
7347: * entity references to external entities.
7348: */
7349: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7350: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7351: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7352: "Attribute references external entity '%s'\n", name);
7353: }
7354: /*
7355: * [ WFC: No < in Attribute Values ]
7356: * The replacement text of any entity referred to directly or
7357: * indirectly in an attribute value (other than "<") must
7358: * not contain a <.
7359: */
7360: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7361: (ent != NULL) && (ent->content != NULL) &&
7362: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7363: (xmlStrchr(ent->content, '<'))) {
7364: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7365: "'<' in entity '%s' is not allowed in attributes values\n", name);
7366: }
7367:
7368: /*
7369: * Internal check, no parameter entities here ...
7370: */
7371: else {
7372: switch (ent->etype) {
7373: case XML_INTERNAL_PARAMETER_ENTITY:
7374: case XML_EXTERNAL_PARAMETER_ENTITY:
7375: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7376: "Attempt to reference the parameter entity '%s'\n",
7377: name);
7378: break;
7379: default:
7380: break;
7381: }
7382: }
7383:
7384: /*
7385: * [ WFC: No Recursion ]
7386: * A parsed entity must not contain a recursive reference
7387: * to itself, either directly or indirectly.
7388: * Done somewhere else
7389: */
7390: return(ent);
7391: }
7392:
7393: /**
7394: * xmlParseStringEntityRef:
7395: * @ctxt: an XML parser context
7396: * @str: a pointer to an index in the string
7397: *
7398: * parse ENTITY references declarations, but this version parses it from
7399: * a string value.
7400: *
7401: * [68] EntityRef ::= '&' Name ';'
7402: *
7403: * [ WFC: Entity Declared ]
7404: * In a document without any DTD, a document with only an internal DTD
7405: * subset which contains no parameter entity references, or a document
7406: * with "standalone='yes'", the Name given in the entity reference
7407: * must match that in an entity declaration, except that well-formed
7408: * documents need not declare any of the following entities: amp, lt,
7409: * gt, apos, quot. The declaration of a parameter entity must precede
7410: * any reference to it. Similarly, the declaration of a general entity
7411: * must precede any reference to it which appears in a default value in an
7412: * attribute-list declaration. Note that if entities are declared in the
7413: * external subset or in external parameter entities, a non-validating
7414: * processor is not obligated to read and process their declarations;
7415: * for such documents, the rule that an entity must be declared is a
7416: * well-formedness constraint only if standalone='yes'.
7417: *
7418: * [ WFC: Parsed Entity ]
7419: * An entity reference must not contain the name of an unparsed entity
7420: *
7421: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7422: * is updated to the current location in the string.
7423: */
7424: static xmlEntityPtr
7425: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7426: xmlChar *name;
7427: const xmlChar *ptr;
7428: xmlChar cur;
7429: xmlEntityPtr ent = NULL;
7430:
7431: if ((str == NULL) || (*str == NULL))
7432: return(NULL);
7433: ptr = *str;
7434: cur = *ptr;
7435: if (cur != '&')
7436: return(NULL);
7437:
7438: ptr++;
7439: name = xmlParseStringName(ctxt, &ptr);
7440: if (name == NULL) {
7441: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7442: "xmlParseStringEntityRef: no name\n");
7443: *str = ptr;
7444: return(NULL);
7445: }
7446: if (*ptr != ';') {
7447: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7448: xmlFree(name);
7449: *str = ptr;
7450: return(NULL);
7451: }
7452: ptr++;
7453:
7454:
7455: /*
7456: * Predefined entites override any extra definition
7457: */
7458: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7459: ent = xmlGetPredefinedEntity(name);
7460: if (ent != NULL) {
7461: xmlFree(name);
7462: *str = ptr;
7463: return(ent);
7464: }
7465: }
7466:
7467: /*
7468: * Increate the number of entity references parsed
7469: */
7470: ctxt->nbentities++;
7471:
7472: /*
7473: * Ask first SAX for entity resolution, otherwise try the
7474: * entities which may have stored in the parser context.
7475: */
7476: if (ctxt->sax != NULL) {
7477: if (ctxt->sax->getEntity != NULL)
7478: ent = ctxt->sax->getEntity(ctxt->userData, name);
7479: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7480: ent = xmlGetPredefinedEntity(name);
7481: if ((ent == NULL) && (ctxt->userData==ctxt)) {
7482: ent = xmlSAX2GetEntity(ctxt, name);
7483: }
7484: }
7485:
7486: /*
7487: * [ WFC: Entity Declared ]
7488: * In a document without any DTD, a document with only an
7489: * internal DTD subset which contains no parameter entity
7490: * references, or a document with "standalone='yes'", the
7491: * Name given in the entity reference must match that in an
7492: * entity declaration, except that well-formed documents
7493: * need not declare any of the following entities: amp, lt,
7494: * gt, apos, quot.
7495: * The declaration of a parameter entity must precede any
7496: * reference to it.
7497: * Similarly, the declaration of a general entity must
7498: * precede any reference to it which appears in a default
7499: * value in an attribute-list declaration. Note that if
7500: * entities are declared in the external subset or in
7501: * external parameter entities, a non-validating processor
7502: * is not obligated to read and process their declarations;
7503: * for such documents, the rule that an entity must be
7504: * declared is a well-formedness constraint only if
7505: * standalone='yes'.
7506: */
7507: if (ent == NULL) {
7508: if ((ctxt->standalone == 1) ||
7509: ((ctxt->hasExternalSubset == 0) &&
7510: (ctxt->hasPErefs == 0))) {
7511: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7512: "Entity '%s' not defined\n", name);
7513: } else {
7514: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7515: "Entity '%s' not defined\n",
7516: name);
7517: }
7518: /* TODO ? check regressions ctxt->valid = 0; */
7519: }
7520:
7521: /*
7522: * [ WFC: Parsed Entity ]
7523: * An entity reference must not contain the name of an
7524: * unparsed entity
7525: */
7526: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7527: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7528: "Entity reference to unparsed entity %s\n", name);
7529: }
7530:
7531: /*
7532: * [ WFC: No External Entity References ]
7533: * Attribute values cannot contain direct or indirect
7534: * entity references to external entities.
7535: */
7536: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7537: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7538: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7539: "Attribute references external entity '%s'\n", name);
7540: }
7541: /*
7542: * [ WFC: No < in Attribute Values ]
7543: * The replacement text of any entity referred to directly or
7544: * indirectly in an attribute value (other than "<") must
7545: * not contain a <.
7546: */
7547: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7548: (ent != NULL) && (ent->content != NULL) &&
7549: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7550: (xmlStrchr(ent->content, '<'))) {
7551: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7552: "'<' in entity '%s' is not allowed in attributes values\n",
7553: name);
7554: }
7555:
7556: /*
7557: * Internal check, no parameter entities here ...
7558: */
7559: else {
7560: switch (ent->etype) {
7561: case XML_INTERNAL_PARAMETER_ENTITY:
7562: case XML_EXTERNAL_PARAMETER_ENTITY:
7563: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7564: "Attempt to reference the parameter entity '%s'\n",
7565: name);
7566: break;
7567: default:
7568: break;
7569: }
7570: }
7571:
7572: /*
7573: * [ WFC: No Recursion ]
7574: * A parsed entity must not contain a recursive reference
7575: * to itself, either directly or indirectly.
7576: * Done somewhere else
7577: */
7578:
7579: xmlFree(name);
7580: *str = ptr;
7581: return(ent);
7582: }
7583:
7584: /**
7585: * xmlParsePEReference:
7586: * @ctxt: an XML parser context
7587: *
7588: * parse PEReference declarations
7589: * The entity content is handled directly by pushing it's content as
7590: * a new input stream.
7591: *
7592: * [69] PEReference ::= '%' Name ';'
7593: *
7594: * [ WFC: No Recursion ]
7595: * A parsed entity must not contain a recursive
7596: * reference to itself, either directly or indirectly.
7597: *
7598: * [ WFC: Entity Declared ]
7599: * In a document without any DTD, a document with only an internal DTD
7600: * subset which contains no parameter entity references, or a document
7601: * with "standalone='yes'", ... ... The declaration of a parameter
7602: * entity must precede any reference to it...
7603: *
7604: * [ VC: Entity Declared ]
7605: * In a document with an external subset or external parameter entities
7606: * with "standalone='no'", ... ... The declaration of a parameter entity
7607: * must precede any reference to it...
7608: *
7609: * [ WFC: In DTD ]
7610: * Parameter-entity references may only appear in the DTD.
7611: * NOTE: misleading but this is handled.
7612: */
7613: void
7614: xmlParsePEReference(xmlParserCtxtPtr ctxt)
7615: {
7616: const xmlChar *name;
7617: xmlEntityPtr entity = NULL;
7618: xmlParserInputPtr input;
7619:
7620: if (RAW != '%')
7621: return;
7622: NEXT;
7623: name = xmlParseName(ctxt);
7624: if (name == NULL) {
7625: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7626: "xmlParsePEReference: no name\n");
7627: return;
7628: }
7629: if (RAW != ';') {
7630: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7631: return;
7632: }
7633:
7634: NEXT;
7635:
7636: /*
7637: * Increate the number of entity references parsed
7638: */
7639: ctxt->nbentities++;
7640:
7641: /*
7642: * Request the entity from SAX
7643: */
7644: if ((ctxt->sax != NULL) &&
7645: (ctxt->sax->getParameterEntity != NULL))
7646: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7647: name);
7648: if (entity == NULL) {
7649: /*
7650: * [ WFC: Entity Declared ]
7651: * In a document without any DTD, a document with only an
7652: * internal DTD subset which contains no parameter entity
7653: * references, or a document with "standalone='yes'", ...
7654: * ... The declaration of a parameter entity must precede
7655: * any reference to it...
7656: */
7657: if ((ctxt->standalone == 1) ||
7658: ((ctxt->hasExternalSubset == 0) &&
7659: (ctxt->hasPErefs == 0))) {
7660: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7661: "PEReference: %%%s; not found\n",
7662: name);
7663: } else {
7664: /*
7665: * [ VC: Entity Declared ]
7666: * In a document with an external subset or external
7667: * parameter entities with "standalone='no'", ...
7668: * ... The declaration of a parameter entity must
7669: * precede any reference to it...
7670: */
7671: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7672: "PEReference: %%%s; not found\n",
7673: name, NULL);
7674: ctxt->valid = 0;
7675: }
7676: } else {
7677: /*
7678: * Internal checking in case the entity quest barfed
7679: */
7680: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7681: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7682: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7683: "Internal: %%%s; is not a parameter entity\n",
7684: name, NULL);
7685: } else if (ctxt->input->free != deallocblankswrapper) {
7686: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7687: if (xmlPushInput(ctxt, input) < 0)
7688: return;
7689: } else {
7690: /*
7691: * TODO !!!
7692: * handle the extra spaces added before and after
7693: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7694: */
7695: input = xmlNewEntityInputStream(ctxt, entity);
7696: if (xmlPushInput(ctxt, input) < 0)
7697: return;
7698: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7699: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7700: (IS_BLANK_CH(NXT(5)))) {
7701: xmlParseTextDecl(ctxt);
7702: if (ctxt->errNo ==
7703: XML_ERR_UNSUPPORTED_ENCODING) {
7704: /*
7705: * The XML REC instructs us to stop parsing
7706: * right here
7707: */
7708: ctxt->instate = XML_PARSER_EOF;
7709: return;
7710: }
7711: }
7712: }
7713: }
7714: ctxt->hasPErefs = 1;
7715: }
7716:
7717: /**
7718: * xmlLoadEntityContent:
7719: * @ctxt: an XML parser context
7720: * @entity: an unloaded system entity
7721: *
7722: * Load the original content of the given system entity from the
7723: * ExternalID/SystemID given. This is to be used for Included in Literal
7724: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7725: *
7726: * Returns 0 in case of success and -1 in case of failure
7727: */
7728: static int
7729: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7730: xmlParserInputPtr input;
7731: xmlBufferPtr buf;
7732: int l, c;
7733: int count = 0;
7734:
7735: if ((ctxt == NULL) || (entity == NULL) ||
7736: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738: (entity->content != NULL)) {
7739: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7740: "xmlLoadEntityContent parameter error");
7741: return(-1);
7742: }
7743:
7744: if (xmlParserDebugEntities)
7745: xmlGenericError(xmlGenericErrorContext,
7746: "Reading %s entity content input\n", entity->name);
7747:
7748: buf = xmlBufferCreate();
7749: if (buf == NULL) {
7750: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7751: "xmlLoadEntityContent parameter error");
7752: return(-1);
7753: }
7754:
7755: input = xmlNewEntityInputStream(ctxt, entity);
7756: if (input == NULL) {
7757: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7758: "xmlLoadEntityContent input error");
7759: xmlBufferFree(buf);
7760: return(-1);
7761: }
7762:
7763: /*
7764: * Push the entity as the current input, read char by char
7765: * saving to the buffer until the end of the entity or an error
7766: */
7767: if (xmlPushInput(ctxt, input) < 0) {
7768: xmlBufferFree(buf);
7769: return(-1);
7770: }
7771:
7772: GROW;
7773: c = CUR_CHAR(l);
7774: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7775: (IS_CHAR(c))) {
7776: xmlBufferAdd(buf, ctxt->input->cur, l);
7777: if (count++ > 100) {
7778: count = 0;
7779: GROW;
7780: }
7781: NEXTL(l);
7782: c = CUR_CHAR(l);
7783: }
7784:
7785: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7786: xmlPopInput(ctxt);
7787: } else if (!IS_CHAR(c)) {
7788: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7789: "xmlLoadEntityContent: invalid char value %d\n",
7790: c);
7791: xmlBufferFree(buf);
7792: return(-1);
7793: }
7794: entity->content = buf->content;
7795: buf->content = NULL;
7796: xmlBufferFree(buf);
7797:
7798: return(0);
7799: }
7800:
7801: /**
7802: * xmlParseStringPEReference:
7803: * @ctxt: an XML parser context
7804: * @str: a pointer to an index in the string
7805: *
7806: * parse PEReference declarations
7807: *
7808: * [69] PEReference ::= '%' Name ';'
7809: *
7810: * [ WFC: No Recursion ]
7811: * A parsed entity must not contain a recursive
7812: * reference to itself, either directly or indirectly.
7813: *
7814: * [ WFC: Entity Declared ]
7815: * In a document without any DTD, a document with only an internal DTD
7816: * subset which contains no parameter entity references, or a document
7817: * with "standalone='yes'", ... ... The declaration of a parameter
7818: * entity must precede any reference to it...
7819: *
7820: * [ VC: Entity Declared ]
7821: * In a document with an external subset or external parameter entities
7822: * with "standalone='no'", ... ... The declaration of a parameter entity
7823: * must precede any reference to it...
7824: *
7825: * [ WFC: In DTD ]
7826: * Parameter-entity references may only appear in the DTD.
7827: * NOTE: misleading but this is handled.
7828: *
7829: * Returns the string of the entity content.
7830: * str is updated to the current value of the index
7831: */
7832: static xmlEntityPtr
7833: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7834: const xmlChar *ptr;
7835: xmlChar cur;
7836: xmlChar *name;
7837: xmlEntityPtr entity = NULL;
7838:
7839: if ((str == NULL) || (*str == NULL)) return(NULL);
7840: ptr = *str;
7841: cur = *ptr;
7842: if (cur != '%')
7843: return(NULL);
7844: ptr++;
7845: name = xmlParseStringName(ctxt, &ptr);
7846: if (name == NULL) {
7847: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7848: "xmlParseStringPEReference: no name\n");
7849: *str = ptr;
7850: return(NULL);
7851: }
7852: cur = *ptr;
7853: if (cur != ';') {
7854: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7855: xmlFree(name);
7856: *str = ptr;
7857: return(NULL);
7858: }
7859: ptr++;
7860:
7861: /*
7862: * Increate the number of entity references parsed
7863: */
7864: ctxt->nbentities++;
7865:
7866: /*
7867: * Request the entity from SAX
7868: */
7869: if ((ctxt->sax != NULL) &&
7870: (ctxt->sax->getParameterEntity != NULL))
7871: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7872: name);
7873: if (entity == NULL) {
7874: /*
7875: * [ WFC: Entity Declared ]
7876: * In a document without any DTD, a document with only an
7877: * internal DTD subset which contains no parameter entity
7878: * references, or a document with "standalone='yes'", ...
7879: * ... The declaration of a parameter entity must precede
7880: * any reference to it...
7881: */
7882: if ((ctxt->standalone == 1) ||
7883: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7884: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7885: "PEReference: %%%s; not found\n", name);
7886: } else {
7887: /*
7888: * [ VC: Entity Declared ]
7889: * In a document with an external subset or external
7890: * parameter entities with "standalone='no'", ...
7891: * ... The declaration of a parameter entity must
7892: * precede any reference to it...
7893: */
7894: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895: "PEReference: %%%s; not found\n",
7896: name, NULL);
7897: ctxt->valid = 0;
7898: }
7899: } else {
7900: /*
7901: * Internal checking in case the entity quest barfed
7902: */
7903: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906: "%%%s; is not a parameter entity\n",
7907: name, NULL);
7908: }
7909: }
7910: ctxt->hasPErefs = 1;
7911: xmlFree(name);
7912: *str = ptr;
7913: return(entity);
7914: }
7915:
7916: /**
7917: * xmlParseDocTypeDecl:
7918: * @ctxt: an XML parser context
7919: *
7920: * parse a DOCTYPE declaration
7921: *
7922: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7923: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7924: *
7925: * [ VC: Root Element Type ]
7926: * The Name in the document type declaration must match the element
7927: * type of the root element.
7928: */
7929:
7930: void
7931: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7932: const xmlChar *name = NULL;
7933: xmlChar *ExternalID = NULL;
7934: xmlChar *URI = NULL;
7935:
7936: /*
7937: * We know that '<!DOCTYPE' has been detected.
7938: */
7939: SKIP(9);
7940:
7941: SKIP_BLANKS;
7942:
7943: /*
7944: * Parse the DOCTYPE name.
7945: */
7946: name = xmlParseName(ctxt);
7947: if (name == NULL) {
7948: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7949: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7950: }
7951: ctxt->intSubName = name;
7952:
7953: SKIP_BLANKS;
7954:
7955: /*
7956: * Check for SystemID and ExternalID
7957: */
7958: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7959:
7960: if ((URI != NULL) || (ExternalID != NULL)) {
7961: ctxt->hasExternalSubset = 1;
7962: }
7963: ctxt->extSubURI = URI;
7964: ctxt->extSubSystem = ExternalID;
7965:
7966: SKIP_BLANKS;
7967:
7968: /*
7969: * Create and update the internal subset.
7970: */
7971: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7972: (!ctxt->disableSAX))
7973: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7974:
7975: /*
7976: * Is there any internal subset declarations ?
7977: * they are handled separately in xmlParseInternalSubset()
7978: */
7979: if (RAW == '[')
7980: return;
7981:
7982: /*
7983: * We should be at the end of the DOCTYPE declaration.
7984: */
7985: if (RAW != '>') {
7986: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7987: }
7988: NEXT;
7989: }
7990:
7991: /**
7992: * xmlParseInternalSubset:
7993: * @ctxt: an XML parser context
7994: *
7995: * parse the internal subset declaration
7996: *
7997: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7998: */
7999:
8000: static void
8001: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8002: /*
8003: * Is there any DTD definition ?
8004: */
8005: if (RAW == '[') {
8006: ctxt->instate = XML_PARSER_DTD;
8007: NEXT;
8008: /*
8009: * Parse the succession of Markup declarations and
8010: * PEReferences.
8011: * Subsequence (markupdecl | PEReference | S)*
8012: */
8013: while (RAW != ']') {
8014: const xmlChar *check = CUR_PTR;
8015: unsigned int cons = ctxt->input->consumed;
8016:
8017: SKIP_BLANKS;
8018: xmlParseMarkupDecl(ctxt);
8019: xmlParsePEReference(ctxt);
8020:
8021: /*
8022: * Pop-up of finished entities.
8023: */
8024: while ((RAW == 0) && (ctxt->inputNr > 1))
8025: xmlPopInput(ctxt);
8026:
8027: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8028: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8029: "xmlParseInternalSubset: error detected in Markup declaration\n");
8030: break;
8031: }
8032: }
8033: if (RAW == ']') {
8034: NEXT;
8035: SKIP_BLANKS;
8036: }
8037: }
8038:
8039: /*
8040: * We should be at the end of the DOCTYPE declaration.
8041: */
8042: if (RAW != '>') {
8043: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8044: }
8045: NEXT;
8046: }
8047:
8048: #ifdef LIBXML_SAX1_ENABLED
8049: /**
8050: * xmlParseAttribute:
8051: * @ctxt: an XML parser context
8052: * @value: a xmlChar ** used to store the value of the attribute
8053: *
8054: * parse an attribute
8055: *
8056: * [41] Attribute ::= Name Eq AttValue
8057: *
8058: * [ WFC: No External Entity References ]
8059: * Attribute values cannot contain direct or indirect entity references
8060: * to external entities.
8061: *
8062: * [ WFC: No < in Attribute Values ]
8063: * The replacement text of any entity referred to directly or indirectly in
8064: * an attribute value (other than "<") must not contain a <.
8065: *
8066: * [ VC: Attribute Value Type ]
8067: * The attribute must have been declared; the value must be of the type
8068: * declared for it.
8069: *
8070: * [25] Eq ::= S? '=' S?
8071: *
8072: * With namespace:
8073: *
8074: * [NS 11] Attribute ::= QName Eq AttValue
8075: *
8076: * Also the case QName == xmlns:??? is handled independently as a namespace
8077: * definition.
8078: *
8079: * Returns the attribute name, and the value in *value.
8080: */
8081:
8082: const xmlChar *
8083: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8084: const xmlChar *name;
8085: xmlChar *val;
8086:
8087: *value = NULL;
8088: GROW;
8089: name = xmlParseName(ctxt);
8090: if (name == NULL) {
8091: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8092: "error parsing attribute name\n");
8093: return(NULL);
8094: }
8095:
8096: /*
8097: * read the value
8098: */
8099: SKIP_BLANKS;
8100: if (RAW == '=') {
8101: NEXT;
8102: SKIP_BLANKS;
8103: val = xmlParseAttValue(ctxt);
8104: ctxt->instate = XML_PARSER_CONTENT;
8105: } else {
8106: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8107: "Specification mandate value for attribute %s\n", name);
8108: return(NULL);
8109: }
8110:
8111: /*
8112: * Check that xml:lang conforms to the specification
8113: * No more registered as an error, just generate a warning now
8114: * since this was deprecated in XML second edition
8115: */
8116: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8117: if (!xmlCheckLanguageID(val)) {
8118: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8119: "Malformed value for xml:lang : %s\n",
8120: val, NULL);
8121: }
8122: }
8123:
8124: /*
8125: * Check that xml:space conforms to the specification
8126: */
8127: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8128: if (xmlStrEqual(val, BAD_CAST "default"))
8129: *(ctxt->space) = 0;
8130: else if (xmlStrEqual(val, BAD_CAST "preserve"))
8131: *(ctxt->space) = 1;
8132: else {
8133: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8134: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8135: val, NULL);
8136: }
8137: }
8138:
8139: *value = val;
8140: return(name);
8141: }
8142:
8143: /**
8144: * xmlParseStartTag:
8145: * @ctxt: an XML parser context
8146: *
8147: * parse a start of tag either for rule element or
8148: * EmptyElement. In both case we don't parse the tag closing chars.
8149: *
8150: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8151: *
8152: * [ WFC: Unique Att Spec ]
8153: * No attribute name may appear more than once in the same start-tag or
8154: * empty-element tag.
8155: *
8156: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8157: *
8158: * [ WFC: Unique Att Spec ]
8159: * No attribute name may appear more than once in the same start-tag or
8160: * empty-element tag.
8161: *
8162: * With namespace:
8163: *
8164: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8165: *
8166: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8167: *
8168: * Returns the element name parsed
8169: */
8170:
8171: const xmlChar *
8172: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8173: const xmlChar *name;
8174: const xmlChar *attname;
8175: xmlChar *attvalue;
8176: const xmlChar **atts = ctxt->atts;
8177: int nbatts = 0;
8178: int maxatts = ctxt->maxatts;
8179: int i;
8180:
8181: if (RAW != '<') return(NULL);
8182: NEXT1;
8183:
8184: name = xmlParseName(ctxt);
8185: if (name == NULL) {
8186: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8187: "xmlParseStartTag: invalid element name\n");
8188: return(NULL);
8189: }
8190:
8191: /*
8192: * Now parse the attributes, it ends up with the ending
8193: *
8194: * (S Attribute)* S?
8195: */
8196: SKIP_BLANKS;
8197: GROW;
8198:
8199: while ((RAW != '>') &&
8200: ((RAW != '/') || (NXT(1) != '>')) &&
8201: (IS_BYTE_CHAR(RAW))) {
8202: const xmlChar *q = CUR_PTR;
8203: unsigned int cons = ctxt->input->consumed;
8204:
8205: attname = xmlParseAttribute(ctxt, &attvalue);
8206: if ((attname != NULL) && (attvalue != NULL)) {
8207: /*
8208: * [ WFC: Unique Att Spec ]
8209: * No attribute name may appear more than once in the same
8210: * start-tag or empty-element tag.
8211: */
8212: for (i = 0; i < nbatts;i += 2) {
8213: if (xmlStrEqual(atts[i], attname)) {
8214: xmlErrAttributeDup(ctxt, NULL, attname);
8215: xmlFree(attvalue);
8216: goto failed;
8217: }
8218: }
8219: /*
8220: * Add the pair to atts
8221: */
8222: if (atts == NULL) {
8223: maxatts = 22; /* allow for 10 attrs by default */
8224: atts = (const xmlChar **)
8225: xmlMalloc(maxatts * sizeof(xmlChar *));
8226: if (atts == NULL) {
8227: xmlErrMemory(ctxt, NULL);
8228: if (attvalue != NULL)
8229: xmlFree(attvalue);
8230: goto failed;
8231: }
8232: ctxt->atts = atts;
8233: ctxt->maxatts = maxatts;
8234: } else if (nbatts + 4 > maxatts) {
8235: const xmlChar **n;
8236:
8237: maxatts *= 2;
8238: n = (const xmlChar **) xmlRealloc((void *) atts,
8239: maxatts * sizeof(const xmlChar *));
8240: if (n == NULL) {
8241: xmlErrMemory(ctxt, NULL);
8242: if (attvalue != NULL)
8243: xmlFree(attvalue);
8244: goto failed;
8245: }
8246: atts = n;
8247: ctxt->atts = atts;
8248: ctxt->maxatts = maxatts;
8249: }
8250: atts[nbatts++] = attname;
8251: atts[nbatts++] = attvalue;
8252: atts[nbatts] = NULL;
8253: atts[nbatts + 1] = NULL;
8254: } else {
8255: if (attvalue != NULL)
8256: xmlFree(attvalue);
8257: }
8258:
8259: failed:
8260:
8261: GROW
8262: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8263: break;
8264: if (!IS_BLANK_CH(RAW)) {
8265: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266: "attributes construct error\n");
8267: }
8268: SKIP_BLANKS;
8269: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8270: (attname == NULL) && (attvalue == NULL)) {
8271: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8272: "xmlParseStartTag: problem parsing attributes\n");
8273: break;
8274: }
8275: SHRINK;
8276: GROW;
8277: }
8278:
8279: /*
8280: * SAX: Start of Element !
8281: */
8282: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8283: (!ctxt->disableSAX)) {
8284: if (nbatts > 0)
8285: ctxt->sax->startElement(ctxt->userData, name, atts);
8286: else
8287: ctxt->sax->startElement(ctxt->userData, name, NULL);
8288: }
8289:
8290: if (atts != NULL) {
8291: /* Free only the content strings */
8292: for (i = 1;i < nbatts;i+=2)
8293: if (atts[i] != NULL)
8294: xmlFree((xmlChar *) atts[i]);
8295: }
8296: return(name);
8297: }
8298:
8299: /**
8300: * xmlParseEndTag1:
8301: * @ctxt: an XML parser context
8302: * @line: line of the start tag
8303: * @nsNr: number of namespaces on the start tag
8304: *
8305: * parse an end of tag
8306: *
8307: * [42] ETag ::= '</' Name S? '>'
8308: *
8309: * With namespace
8310: *
8311: * [NS 9] ETag ::= '</' QName S? '>'
8312: */
8313:
8314: static void
8315: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8316: const xmlChar *name;
8317:
8318: GROW;
8319: if ((RAW != '<') || (NXT(1) != '/')) {
8320: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8321: "xmlParseEndTag: '</' not found\n");
8322: return;
8323: }
8324: SKIP(2);
8325:
8326: name = xmlParseNameAndCompare(ctxt,ctxt->name);
8327:
8328: /*
8329: * We should definitely be at the ending "S? '>'" part
8330: */
8331: GROW;
8332: SKIP_BLANKS;
8333: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8334: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8335: } else
8336: NEXT1;
8337:
8338: /*
8339: * [ WFC: Element Type Match ]
8340: * The Name in an element's end-tag must match the element type in the
8341: * start-tag.
8342: *
8343: */
8344: if (name != (xmlChar*)1) {
8345: if (name == NULL) name = BAD_CAST "unparseable";
8346: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8347: "Opening and ending tag mismatch: %s line %d and %s\n",
8348: ctxt->name, line, name);
8349: }
8350:
8351: /*
8352: * SAX: End of Tag
8353: */
8354: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8355: (!ctxt->disableSAX))
8356: ctxt->sax->endElement(ctxt->userData, ctxt->name);
8357:
8358: namePop(ctxt);
8359: spacePop(ctxt);
8360: return;
8361: }
8362:
8363: /**
8364: * xmlParseEndTag:
8365: * @ctxt: an XML parser context
8366: *
8367: * parse an end of tag
8368: *
8369: * [42] ETag ::= '</' Name S? '>'
8370: *
8371: * With namespace
8372: *
8373: * [NS 9] ETag ::= '</' QName S? '>'
8374: */
8375:
8376: void
8377: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8378: xmlParseEndTag1(ctxt, 0);
8379: }
8380: #endif /* LIBXML_SAX1_ENABLED */
8381:
8382: /************************************************************************
8383: * *
8384: * SAX 2 specific operations *
8385: * *
8386: ************************************************************************/
8387:
8388: /*
8389: * xmlGetNamespace:
8390: * @ctxt: an XML parser context
8391: * @prefix: the prefix to lookup
8392: *
8393: * Lookup the namespace name for the @prefix (which ca be NULL)
8394: * The prefix must come from the @ctxt->dict dictionnary
8395: *
8396: * Returns the namespace name or NULL if not bound
8397: */
8398: static const xmlChar *
8399: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8400: int i;
8401:
8402: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8403: for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8404: if (ctxt->nsTab[i] == prefix) {
8405: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8406: return(NULL);
8407: return(ctxt->nsTab[i + 1]);
8408: }
8409: return(NULL);
8410: }
8411:
8412: /**
8413: * xmlParseQName:
8414: * @ctxt: an XML parser context
8415: * @prefix: pointer to store the prefix part
8416: *
8417: * parse an XML Namespace QName
8418: *
8419: * [6] QName ::= (Prefix ':')? LocalPart
8420: * [7] Prefix ::= NCName
8421: * [8] LocalPart ::= NCName
8422: *
8423: * Returns the Name parsed or NULL
8424: */
8425:
8426: static const xmlChar *
8427: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8428: const xmlChar *l, *p;
8429:
8430: GROW;
8431:
8432: l = xmlParseNCName(ctxt);
8433: if (l == NULL) {
8434: if (CUR == ':') {
8435: l = xmlParseName(ctxt);
8436: if (l != NULL) {
8437: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8438: "Failed to parse QName '%s'\n", l, NULL, NULL);
8439: *prefix = NULL;
8440: return(l);
8441: }
8442: }
8443: return(NULL);
8444: }
8445: if (CUR == ':') {
8446: NEXT;
8447: p = l;
8448: l = xmlParseNCName(ctxt);
8449: if (l == NULL) {
8450: xmlChar *tmp;
8451:
8452: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8453: "Failed to parse QName '%s:'\n", p, NULL, NULL);
8454: l = xmlParseNmtoken(ctxt);
8455: if (l == NULL)
8456: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8457: else {
8458: tmp = xmlBuildQName(l, p, NULL, 0);
8459: xmlFree((char *)l);
8460: }
8461: p = xmlDictLookup(ctxt->dict, tmp, -1);
8462: if (tmp != NULL) xmlFree(tmp);
8463: *prefix = NULL;
8464: return(p);
8465: }
8466: if (CUR == ':') {
8467: xmlChar *tmp;
8468:
8469: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8470: "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8471: NEXT;
8472: tmp = (xmlChar *) xmlParseName(ctxt);
8473: if (tmp != NULL) {
8474: tmp = xmlBuildQName(tmp, l, NULL, 0);
8475: l = xmlDictLookup(ctxt->dict, tmp, -1);
8476: if (tmp != NULL) xmlFree(tmp);
8477: *prefix = p;
8478: return(l);
8479: }
8480: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8481: l = xmlDictLookup(ctxt->dict, tmp, -1);
8482: if (tmp != NULL) xmlFree(tmp);
8483: *prefix = p;
8484: return(l);
8485: }
8486: *prefix = p;
8487: } else
8488: *prefix = NULL;
8489: return(l);
8490: }
8491:
8492: /**
8493: * xmlParseQNameAndCompare:
8494: * @ctxt: an XML parser context
8495: * @name: the localname
8496: * @prefix: the prefix, if any.
8497: *
8498: * parse an XML name and compares for match
8499: * (specialized for endtag parsing)
8500: *
8501: * Returns NULL for an illegal name, (xmlChar*) 1 for success
8502: * and the name for mismatch
8503: */
8504:
8505: static const xmlChar *
8506: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8507: xmlChar const *prefix) {
8508: const xmlChar *cmp;
8509: const xmlChar *in;
8510: const xmlChar *ret;
8511: const xmlChar *prefix2;
8512:
8513: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8514:
8515: GROW;
8516: in = ctxt->input->cur;
8517:
8518: cmp = prefix;
8519: while (*in != 0 && *in == *cmp) {
8520: ++in;
8521: ++cmp;
8522: }
8523: if ((*cmp == 0) && (*in == ':')) {
8524: in++;
8525: cmp = name;
8526: while (*in != 0 && *in == *cmp) {
8527: ++in;
8528: ++cmp;
8529: }
8530: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8531: /* success */
8532: ctxt->input->cur = in;
8533: return((const xmlChar*) 1);
8534: }
8535: }
8536: /*
8537: * all strings coms from the dictionary, equality can be done directly
8538: */
8539: ret = xmlParseQName (ctxt, &prefix2);
8540: if ((ret == name) && (prefix == prefix2))
8541: return((const xmlChar*) 1);
8542: return ret;
8543: }
8544:
8545: /**
8546: * xmlParseAttValueInternal:
8547: * @ctxt: an XML parser context
8548: * @len: attribute len result
8549: * @alloc: whether the attribute was reallocated as a new string
8550: * @normalize: if 1 then further non-CDATA normalization must be done
8551: *
8552: * parse a value for an attribute.
8553: * NOTE: if no normalization is needed, the routine will return pointers
8554: * directly from the data buffer.
8555: *
8556: * 3.3.3 Attribute-Value Normalization:
8557: * Before the value of an attribute is passed to the application or
8558: * checked for validity, the XML processor must normalize it as follows:
8559: * - a character reference is processed by appending the referenced
8560: * character to the attribute value
8561: * - an entity reference is processed by recursively processing the
8562: * replacement text of the entity
8563: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8564: * appending #x20 to the normalized value, except that only a single
8565: * #x20 is appended for a "#xD#xA" sequence that is part of an external
8566: * parsed entity or the literal entity value of an internal parsed entity
8567: * - other characters are processed by appending them to the normalized value
8568: * If the declared value is not CDATA, then the XML processor must further
8569: * process the normalized attribute value by discarding any leading and
8570: * trailing space (#x20) characters, and by replacing sequences of space
8571: * (#x20) characters by a single space (#x20) character.
8572: * All attributes for which no declaration has been read should be treated
8573: * by a non-validating parser as if declared CDATA.
8574: *
8575: * Returns the AttValue parsed or NULL. The value has to be freed by the
8576: * caller if it was copied, this can be detected by val[*len] == 0.
8577: */
8578:
8579: static xmlChar *
8580: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8581: int normalize)
8582: {
8583: xmlChar limit = 0;
8584: const xmlChar *in = NULL, *start, *end, *last;
8585: xmlChar *ret = NULL;
8586:
8587: GROW;
8588: in = (xmlChar *) CUR_PTR;
8589: if (*in != '"' && *in != '\'') {
8590: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8591: return (NULL);
8592: }
8593: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8594:
8595: /*
8596: * try to handle in this routine the most common case where no
8597: * allocation of a new string is required and where content is
8598: * pure ASCII.
8599: */
8600: limit = *in++;
8601: end = ctxt->input->end;
8602: start = in;
8603: if (in >= end) {
8604: const xmlChar *oldbase = ctxt->input->base;
8605: GROW;
8606: if (oldbase != ctxt->input->base) {
8607: long delta = ctxt->input->base - oldbase;
8608: start = start + delta;
8609: in = in + delta;
8610: }
8611: end = ctxt->input->end;
8612: }
8613: if (normalize) {
8614: /*
8615: * Skip any leading spaces
8616: */
8617: while ((in < end) && (*in != limit) &&
8618: ((*in == 0x20) || (*in == 0x9) ||
8619: (*in == 0xA) || (*in == 0xD))) {
8620: in++;
8621: start = in;
8622: if (in >= end) {
8623: const xmlChar *oldbase = ctxt->input->base;
8624: GROW;
8625: if (oldbase != ctxt->input->base) {
8626: long delta = ctxt->input->base - oldbase;
8627: start = start + delta;
8628: in = in + delta;
8629: }
8630: end = ctxt->input->end;
8631: }
8632: }
8633: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8634: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8635: if ((*in++ == 0x20) && (*in == 0x20)) break;
8636: if (in >= end) {
8637: const xmlChar *oldbase = ctxt->input->base;
8638: GROW;
8639: if (oldbase != ctxt->input->base) {
8640: long delta = ctxt->input->base - oldbase;
8641: start = start + delta;
8642: in = in + delta;
8643: }
8644: end = ctxt->input->end;
8645: }
8646: }
8647: last = in;
8648: /*
8649: * skip the trailing blanks
8650: */
8651: while ((last[-1] == 0x20) && (last > start)) last--;
8652: while ((in < end) && (*in != limit) &&
8653: ((*in == 0x20) || (*in == 0x9) ||
8654: (*in == 0xA) || (*in == 0xD))) {
8655: in++;
8656: if (in >= end) {
8657: const xmlChar *oldbase = ctxt->input->base;
8658: GROW;
8659: if (oldbase != ctxt->input->base) {
8660: long delta = ctxt->input->base - oldbase;
8661: start = start + delta;
8662: in = in + delta;
8663: last = last + delta;
8664: }
8665: end = ctxt->input->end;
8666: }
8667: }
8668: if (*in != limit) goto need_complex;
8669: } else {
8670: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8671: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8672: in++;
8673: if (in >= end) {
8674: const xmlChar *oldbase = ctxt->input->base;
8675: GROW;
8676: if (oldbase != ctxt->input->base) {
8677: long delta = ctxt->input->base - oldbase;
8678: start = start + delta;
8679: in = in + delta;
8680: }
8681: end = ctxt->input->end;
8682: }
8683: }
8684: last = in;
8685: if (*in != limit) goto need_complex;
8686: }
8687: in++;
8688: if (len != NULL) {
8689: *len = last - start;
8690: ret = (xmlChar *) start;
8691: } else {
8692: if (alloc) *alloc = 1;
8693: ret = xmlStrndup(start, last - start);
8694: }
8695: CUR_PTR = in;
8696: if (alloc) *alloc = 0;
8697: return ret;
8698: need_complex:
8699: if (alloc) *alloc = 1;
8700: return xmlParseAttValueComplex(ctxt, len, normalize);
8701: }
8702:
8703: /**
8704: * xmlParseAttribute2:
8705: * @ctxt: an XML parser context
8706: * @pref: the element prefix
8707: * @elem: the element name
8708: * @prefix: a xmlChar ** used to store the value of the attribute prefix
8709: * @value: a xmlChar ** used to store the value of the attribute
8710: * @len: an int * to save the length of the attribute
8711: * @alloc: an int * to indicate if the attribute was allocated
8712: *
8713: * parse an attribute in the new SAX2 framework.
8714: *
8715: * Returns the attribute name, and the value in *value, .
8716: */
8717:
8718: static const xmlChar *
8719: xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8720: const xmlChar * pref, const xmlChar * elem,
8721: const xmlChar ** prefix, xmlChar ** value,
8722: int *len, int *alloc)
8723: {
8724: const xmlChar *name;
8725: xmlChar *val, *internal_val = NULL;
8726: int normalize = 0;
8727:
8728: *value = NULL;
8729: GROW;
8730: name = xmlParseQName(ctxt, prefix);
8731: if (name == NULL) {
8732: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8733: "error parsing attribute name\n");
8734: return (NULL);
8735: }
8736:
8737: /*
8738: * get the type if needed
8739: */
8740: if (ctxt->attsSpecial != NULL) {
8741: int type;
8742:
8743: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8744: pref, elem, *prefix, name);
8745: if (type != 0)
8746: normalize = 1;
8747: }
8748:
8749: /*
8750: * read the value
8751: */
8752: SKIP_BLANKS;
8753: if (RAW == '=') {
8754: NEXT;
8755: SKIP_BLANKS;
8756: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8757: if (normalize) {
8758: /*
8759: * Sometimes a second normalisation pass for spaces is needed
8760: * but that only happens if charrefs or entities refernces
8761: * have been used in the attribute value, i.e. the attribute
8762: * value have been extracted in an allocated string already.
8763: */
8764: if (*alloc) {
8765: const xmlChar *val2;
8766:
8767: val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8768: if ((val2 != NULL) && (val2 != val)) {
8769: xmlFree(val);
8770: val = (xmlChar *) val2;
8771: }
8772: }
8773: }
8774: ctxt->instate = XML_PARSER_CONTENT;
8775: } else {
8776: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8777: "Specification mandate value for attribute %s\n",
8778: name);
8779: return (NULL);
8780: }
8781:
8782: if (*prefix == ctxt->str_xml) {
8783: /*
8784: * Check that xml:lang conforms to the specification
8785: * No more registered as an error, just generate a warning now
8786: * since this was deprecated in XML second edition
8787: */
8788: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8789: internal_val = xmlStrndup(val, *len);
8790: if (!xmlCheckLanguageID(internal_val)) {
8791: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8792: "Malformed value for xml:lang : %s\n",
8793: internal_val, NULL);
8794: }
8795: }
8796:
8797: /*
8798: * Check that xml:space conforms to the specification
8799: */
8800: if (xmlStrEqual(name, BAD_CAST "space")) {
8801: internal_val = xmlStrndup(val, *len);
8802: if (xmlStrEqual(internal_val, BAD_CAST "default"))
8803: *(ctxt->space) = 0;
8804: else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8805: *(ctxt->space) = 1;
8806: else {
8807: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8808: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8809: internal_val, NULL);
8810: }
8811: }
8812: if (internal_val) {
8813: xmlFree(internal_val);
8814: }
8815: }
8816:
8817: *value = val;
8818: return (name);
8819: }
8820: /**
8821: * xmlParseStartTag2:
8822: * @ctxt: an XML parser context
8823: *
8824: * parse a start of tag either for rule element or
8825: * EmptyElement. In both case we don't parse the tag closing chars.
8826: * This routine is called when running SAX2 parsing
8827: *
8828: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8829: *
8830: * [ WFC: Unique Att Spec ]
8831: * No attribute name may appear more than once in the same start-tag or
8832: * empty-element tag.
8833: *
8834: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8835: *
8836: * [ WFC: Unique Att Spec ]
8837: * No attribute name may appear more than once in the same start-tag or
8838: * empty-element tag.
8839: *
8840: * With namespace:
8841: *
8842: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8843: *
8844: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8845: *
8846: * Returns the element name parsed
8847: */
8848:
8849: static const xmlChar *
8850: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8851: const xmlChar **URI, int *tlen) {
8852: const xmlChar *localname;
8853: const xmlChar *prefix;
8854: const xmlChar *attname;
8855: const xmlChar *aprefix;
8856: const xmlChar *nsname;
8857: xmlChar *attvalue;
8858: const xmlChar **atts = ctxt->atts;
8859: int maxatts = ctxt->maxatts;
8860: int nratts, nbatts, nbdef;
8861: int i, j, nbNs, attval, oldline, oldcol;
8862: const xmlChar *base;
8863: unsigned long cur;
8864: int nsNr = ctxt->nsNr;
8865:
8866: if (RAW != '<') return(NULL);
8867: NEXT1;
8868:
8869: /*
8870: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8871: * point since the attribute values may be stored as pointers to
8872: * the buffer and calling SHRINK would destroy them !
8873: * The Shrinking is only possible once the full set of attribute
8874: * callbacks have been done.
8875: */
8876: reparse:
8877: SHRINK;
8878: base = ctxt->input->base;
8879: cur = ctxt->input->cur - ctxt->input->base;
8880: oldline = ctxt->input->line;
8881: oldcol = ctxt->input->col;
8882: nbatts = 0;
8883: nratts = 0;
8884: nbdef = 0;
8885: nbNs = 0;
8886: attval = 0;
8887: /* Forget any namespaces added during an earlier parse of this element. */
8888: ctxt->nsNr = nsNr;
8889:
8890: localname = xmlParseQName(ctxt, &prefix);
8891: if (localname == NULL) {
8892: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8893: "StartTag: invalid element name\n");
8894: return(NULL);
8895: }
8896: *tlen = ctxt->input->cur - ctxt->input->base - cur;
8897:
8898: /*
8899: * Now parse the attributes, it ends up with the ending
8900: *
8901: * (S Attribute)* S?
8902: */
8903: SKIP_BLANKS;
8904: GROW;
8905: if (ctxt->input->base != base) goto base_changed;
8906:
8907: while ((RAW != '>') &&
8908: ((RAW != '/') || (NXT(1) != '>')) &&
8909: (IS_BYTE_CHAR(RAW))) {
8910: const xmlChar *q = CUR_PTR;
8911: unsigned int cons = ctxt->input->consumed;
8912: int len = -1, alloc = 0;
8913:
8914: attname = xmlParseAttribute2(ctxt, prefix, localname,
8915: &aprefix, &attvalue, &len, &alloc);
8916: if (ctxt->input->base != base) {
8917: if ((attvalue != NULL) && (alloc != 0))
8918: xmlFree(attvalue);
8919: attvalue = NULL;
8920: goto base_changed;
8921: }
8922: if ((attname != NULL) && (attvalue != NULL)) {
8923: if (len < 0) len = xmlStrlen(attvalue);
8924: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8925: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8926: xmlURIPtr uri;
8927:
8928: if (*URL != 0) {
8929: uri = xmlParseURI((const char *) URL);
8930: if (uri == NULL) {
8931: xmlNsErr(ctxt, XML_WAR_NS_URI,
8932: "xmlns: '%s' is not a valid URI\n",
8933: URL, NULL, NULL);
8934: } else {
8935: if (uri->scheme == NULL) {
8936: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8937: "xmlns: URI %s is not absolute\n",
8938: URL, NULL, NULL);
8939: }
8940: xmlFreeURI(uri);
8941: }
8942: if (URL == ctxt->str_xml_ns) {
8943: if (attname != ctxt->str_xml) {
8944: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8945: "xml namespace URI cannot be the default namespace\n",
8946: NULL, NULL, NULL);
8947: }
8948: goto skip_default_ns;
8949: }
8950: if ((len == 29) &&
8951: (xmlStrEqual(URL,
8952: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8953: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8954: "reuse of the xmlns namespace name is forbidden\n",
8955: NULL, NULL, NULL);
8956: goto skip_default_ns;
8957: }
8958: }
8959: /*
8960: * check that it's not a defined namespace
8961: */
8962: for (j = 1;j <= nbNs;j++)
8963: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8964: break;
8965: if (j <= nbNs)
8966: xmlErrAttributeDup(ctxt, NULL, attname);
8967: else
8968: if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8969: skip_default_ns:
8970: if (alloc != 0) xmlFree(attvalue);
8971: SKIP_BLANKS;
8972: continue;
8973: }
8974: if (aprefix == ctxt->str_xmlns) {
8975: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8976: xmlURIPtr uri;
8977:
8978: if (attname == ctxt->str_xml) {
8979: if (URL != ctxt->str_xml_ns) {
8980: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8981: "xml namespace prefix mapped to wrong URI\n",
8982: NULL, NULL, NULL);
8983: }
8984: /*
8985: * Do not keep a namespace definition node
8986: */
8987: goto skip_ns;
8988: }
8989: if (URL == ctxt->str_xml_ns) {
8990: if (attname != ctxt->str_xml) {
8991: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8992: "xml namespace URI mapped to wrong prefix\n",
8993: NULL, NULL, NULL);
8994: }
8995: goto skip_ns;
8996: }
8997: if (attname == ctxt->str_xmlns) {
8998: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8999: "redefinition of the xmlns prefix is forbidden\n",
9000: NULL, NULL, NULL);
9001: goto skip_ns;
9002: }
9003: if ((len == 29) &&
9004: (xmlStrEqual(URL,
9005: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9006: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9007: "reuse of the xmlns namespace name is forbidden\n",
9008: NULL, NULL, NULL);
9009: goto skip_ns;
9010: }
9011: if ((URL == NULL) || (URL[0] == 0)) {
9012: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9013: "xmlns:%s: Empty XML namespace is not allowed\n",
9014: attname, NULL, NULL);
9015: goto skip_ns;
9016: } else {
9017: uri = xmlParseURI((const char *) URL);
9018: if (uri == NULL) {
9019: xmlNsErr(ctxt, XML_WAR_NS_URI,
9020: "xmlns:%s: '%s' is not a valid URI\n",
9021: attname, URL, NULL);
9022: } else {
9023: if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9024: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9025: "xmlns:%s: URI %s is not absolute\n",
9026: attname, URL, NULL);
9027: }
9028: xmlFreeURI(uri);
9029: }
9030: }
9031:
9032: /*
9033: * check that it's not a defined namespace
9034: */
9035: for (j = 1;j <= nbNs;j++)
9036: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9037: break;
9038: if (j <= nbNs)
9039: xmlErrAttributeDup(ctxt, aprefix, attname);
9040: else
9041: if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9042: skip_ns:
9043: if (alloc != 0) xmlFree(attvalue);
9044: SKIP_BLANKS;
9045: if (ctxt->input->base != base) goto base_changed;
9046: continue;
9047: }
9048:
9049: /*
9050: * Add the pair to atts
9051: */
9052: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9053: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9054: if (attvalue[len] == 0)
9055: xmlFree(attvalue);
9056: goto failed;
9057: }
9058: maxatts = ctxt->maxatts;
9059: atts = ctxt->atts;
9060: }
9061: ctxt->attallocs[nratts++] = alloc;
9062: atts[nbatts++] = attname;
9063: atts[nbatts++] = aprefix;
9064: atts[nbatts++] = NULL; /* the URI will be fetched later */
9065: atts[nbatts++] = attvalue;
9066: attvalue += len;
9067: atts[nbatts++] = attvalue;
9068: /*
9069: * tag if some deallocation is needed
9070: */
9071: if (alloc != 0) attval = 1;
9072: } else {
9073: if ((attvalue != NULL) && (attvalue[len] == 0))
9074: xmlFree(attvalue);
9075: }
9076:
9077: failed:
9078:
9079: GROW
9080: if (ctxt->input->base != base) goto base_changed;
9081: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9082: break;
9083: if (!IS_BLANK_CH(RAW)) {
9084: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9085: "attributes construct error\n");
9086: break;
9087: }
9088: SKIP_BLANKS;
9089: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9090: (attname == NULL) && (attvalue == NULL)) {
9091: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9092: "xmlParseStartTag: problem parsing attributes\n");
9093: break;
9094: }
9095: GROW;
9096: if (ctxt->input->base != base) goto base_changed;
9097: }
9098:
9099: /*
9100: * The attributes defaulting
9101: */
9102: if (ctxt->attsDefault != NULL) {
9103: xmlDefAttrsPtr defaults;
9104:
9105: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9106: if (defaults != NULL) {
9107: for (i = 0;i < defaults->nbAttrs;i++) {
9108: attname = defaults->values[5 * i];
9109: aprefix = defaults->values[5 * i + 1];
9110:
9111: /*
9112: * special work for namespaces defaulted defs
9113: */
9114: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9115: /*
9116: * check that it's not a defined namespace
9117: */
9118: for (j = 1;j <= nbNs;j++)
9119: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9120: break;
9121: if (j <= nbNs) continue;
9122:
9123: nsname = xmlGetNamespace(ctxt, NULL);
9124: if (nsname != defaults->values[5 * i + 2]) {
9125: if (nsPush(ctxt, NULL,
9126: defaults->values[5 * i + 2]) > 0)
9127: nbNs++;
9128: }
9129: } else if (aprefix == ctxt->str_xmlns) {
9130: /*
9131: * check that it's not a defined namespace
9132: */
9133: for (j = 1;j <= nbNs;j++)
9134: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9135: break;
9136: if (j <= nbNs) continue;
9137:
9138: nsname = xmlGetNamespace(ctxt, attname);
9139: if (nsname != defaults->values[2]) {
9140: if (nsPush(ctxt, attname,
9141: defaults->values[5 * i + 2]) > 0)
9142: nbNs++;
9143: }
9144: } else {
9145: /*
9146: * check that it's not a defined attribute
9147: */
9148: for (j = 0;j < nbatts;j+=5) {
9149: if ((attname == atts[j]) && (aprefix == atts[j+1]))
9150: break;
9151: }
9152: if (j < nbatts) continue;
9153:
9154: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9155: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9156: return(NULL);
9157: }
9158: maxatts = ctxt->maxatts;
9159: atts = ctxt->atts;
9160: }
9161: atts[nbatts++] = attname;
9162: atts[nbatts++] = aprefix;
9163: if (aprefix == NULL)
9164: atts[nbatts++] = NULL;
9165: else
9166: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9167: atts[nbatts++] = defaults->values[5 * i + 2];
9168: atts[nbatts++] = defaults->values[5 * i + 3];
9169: if ((ctxt->standalone == 1) &&
9170: (defaults->values[5 * i + 4] != NULL)) {
9171: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9172: "standalone: attribute %s on %s defaulted from external subset\n",
9173: attname, localname);
9174: }
9175: nbdef++;
9176: }
9177: }
9178: }
9179: }
9180:
9181: /*
9182: * The attributes checkings
9183: */
9184: for (i = 0; i < nbatts;i += 5) {
9185: /*
9186: * The default namespace does not apply to attribute names.
9187: */
9188: if (atts[i + 1] != NULL) {
9189: nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9190: if (nsname == NULL) {
9191: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9192: "Namespace prefix %s for %s on %s is not defined\n",
9193: atts[i + 1], atts[i], localname);
9194: }
9195: atts[i + 2] = nsname;
9196: } else
9197: nsname = NULL;
9198: /*
9199: * [ WFC: Unique Att Spec ]
9200: * No attribute name may appear more than once in the same
9201: * start-tag or empty-element tag.
9202: * As extended by the Namespace in XML REC.
9203: */
9204: for (j = 0; j < i;j += 5) {
9205: if (atts[i] == atts[j]) {
9206: if (atts[i+1] == atts[j+1]) {
9207: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9208: break;
9209: }
9210: if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9211: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9212: "Namespaced Attribute %s in '%s' redefined\n",
9213: atts[i], nsname, NULL);
9214: break;
9215: }
9216: }
9217: }
9218: }
9219:
9220: nsname = xmlGetNamespace(ctxt, prefix);
9221: if ((prefix != NULL) && (nsname == NULL)) {
9222: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9223: "Namespace prefix %s on %s is not defined\n",
9224: prefix, localname, NULL);
9225: }
9226: *pref = prefix;
9227: *URI = nsname;
9228:
9229: /*
9230: * SAX: Start of Element !
9231: */
9232: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9233: (!ctxt->disableSAX)) {
9234: if (nbNs > 0)
9235: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9236: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9237: nbatts / 5, nbdef, atts);
9238: else
9239: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9240: nsname, 0, NULL, nbatts / 5, nbdef, atts);
9241: }
9242:
9243: /*
9244: * Free up attribute allocated strings if needed
9245: */
9246: if (attval != 0) {
9247: for (i = 3,j = 0; j < nratts;i += 5,j++)
9248: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9249: xmlFree((xmlChar *) atts[i]);
9250: }
9251:
9252: return(localname);
9253:
9254: base_changed:
9255: /*
9256: * the attribute strings are valid iif the base didn't changed
9257: */
9258: if (attval != 0) {
9259: for (i = 3,j = 0; j < nratts;i += 5,j++)
9260: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261: xmlFree((xmlChar *) atts[i]);
9262: }
9263: ctxt->input->cur = ctxt->input->base + cur;
9264: ctxt->input->line = oldline;
9265: ctxt->input->col = oldcol;
9266: if (ctxt->wellFormed == 1) {
9267: goto reparse;
9268: }
9269: return(NULL);
9270: }
9271:
9272: /**
9273: * xmlParseEndTag2:
9274: * @ctxt: an XML parser context
9275: * @line: line of the start tag
9276: * @nsNr: number of namespaces on the start tag
9277: *
9278: * parse an end of tag
9279: *
9280: * [42] ETag ::= '</' Name S? '>'
9281: *
9282: * With namespace
9283: *
9284: * [NS 9] ETag ::= '</' QName S? '>'
9285: */
9286:
9287: static void
9288: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9289: const xmlChar *URI, int line, int nsNr, int tlen) {
9290: const xmlChar *name;
9291:
9292: GROW;
9293: if ((RAW != '<') || (NXT(1) != '/')) {
9294: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9295: return;
9296: }
9297: SKIP(2);
9298:
9299: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9300: if (ctxt->input->cur[tlen] == '>') {
9301: ctxt->input->cur += tlen + 1;
9302: goto done;
9303: }
9304: ctxt->input->cur += tlen;
9305: name = (xmlChar*)1;
9306: } else {
9307: if (prefix == NULL)
9308: name = xmlParseNameAndCompare(ctxt, ctxt->name);
9309: else
9310: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9311: }
9312:
9313: /*
9314: * We should definitely be at the ending "S? '>'" part
9315: */
9316: GROW;
9317: SKIP_BLANKS;
9318: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9319: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9320: } else
9321: NEXT1;
9322:
9323: /*
9324: * [ WFC: Element Type Match ]
9325: * The Name in an element's end-tag must match the element type in the
9326: * start-tag.
9327: *
9328: */
9329: if (name != (xmlChar*)1) {
9330: if (name == NULL) name = BAD_CAST "unparseable";
9331: if ((line == 0) && (ctxt->node != NULL))
9332: line = ctxt->node->line;
9333: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9334: "Opening and ending tag mismatch: %s line %d and %s\n",
9335: ctxt->name, line, name);
9336: }
9337:
9338: /*
9339: * SAX: End of Tag
9340: */
9341: done:
9342: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9343: (!ctxt->disableSAX))
9344: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9345:
9346: spacePop(ctxt);
9347: if (nsNr != 0)
9348: nsPop(ctxt, nsNr);
9349: return;
9350: }
9351:
9352: /**
9353: * xmlParseCDSect:
9354: * @ctxt: an XML parser context
9355: *
9356: * Parse escaped pure raw content.
9357: *
9358: * [18] CDSect ::= CDStart CData CDEnd
9359: *
9360: * [19] CDStart ::= '<![CDATA['
9361: *
9362: * [20] Data ::= (Char* - (Char* ']]>' Char*))
9363: *
9364: * [21] CDEnd ::= ']]>'
9365: */
9366: void
9367: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9368: xmlChar *buf = NULL;
9369: int len = 0;
9370: int size = XML_PARSER_BUFFER_SIZE;
9371: int r, rl;
9372: int s, sl;
9373: int cur, l;
9374: int count = 0;
9375:
9376: /* Check 2.6.0 was NXT(0) not RAW */
9377: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9378: SKIP(9);
9379: } else
9380: return;
9381:
9382: ctxt->instate = XML_PARSER_CDATA_SECTION;
9383: r = CUR_CHAR(rl);
9384: if (!IS_CHAR(r)) {
9385: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9386: ctxt->instate = XML_PARSER_CONTENT;
9387: return;
9388: }
9389: NEXTL(rl);
9390: s = CUR_CHAR(sl);
9391: if (!IS_CHAR(s)) {
9392: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9393: ctxt->instate = XML_PARSER_CONTENT;
9394: return;
9395: }
9396: NEXTL(sl);
9397: cur = CUR_CHAR(l);
9398: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9399: if (buf == NULL) {
9400: xmlErrMemory(ctxt, NULL);
9401: return;
9402: }
9403: while (IS_CHAR(cur) &&
9404: ((r != ']') || (s != ']') || (cur != '>'))) {
9405: if (len + 5 >= size) {
9406: xmlChar *tmp;
9407:
9408: size *= 2;
9409: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9410: if (tmp == NULL) {
9411: xmlFree(buf);
9412: xmlErrMemory(ctxt, NULL);
9413: return;
9414: }
9415: buf = tmp;
9416: }
9417: COPY_BUF(rl,buf,len,r);
9418: r = s;
9419: rl = sl;
9420: s = cur;
9421: sl = l;
9422: count++;
9423: if (count > 50) {
9424: GROW;
9425: count = 0;
9426: }
9427: NEXTL(l);
9428: cur = CUR_CHAR(l);
9429: }
9430: buf[len] = 0;
9431: ctxt->instate = XML_PARSER_CONTENT;
9432: if (cur != '>') {
9433: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9434: "CData section not finished\n%.50s\n", buf);
9435: xmlFree(buf);
9436: return;
9437: }
9438: NEXTL(l);
9439:
9440: /*
9441: * OK the buffer is to be consumed as cdata.
9442: */
9443: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9444: if (ctxt->sax->cdataBlock != NULL)
9445: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9446: else if (ctxt->sax->characters != NULL)
9447: ctxt->sax->characters(ctxt->userData, buf, len);
9448: }
9449: xmlFree(buf);
9450: }
9451:
9452: /**
9453: * xmlParseContent:
9454: * @ctxt: an XML parser context
9455: *
9456: * Parse a content:
9457: *
9458: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9459: */
9460:
9461: void
9462: xmlParseContent(xmlParserCtxtPtr ctxt) {
9463: GROW;
9464: while ((RAW != 0) &&
9465: ((RAW != '<') || (NXT(1) != '/')) &&
9466: (ctxt->instate != XML_PARSER_EOF)) {
9467: const xmlChar *test = CUR_PTR;
9468: unsigned int cons = ctxt->input->consumed;
9469: const xmlChar *cur = ctxt->input->cur;
9470:
9471: /*
9472: * First case : a Processing Instruction.
9473: */
9474: if ((*cur == '<') && (cur[1] == '?')) {
9475: xmlParsePI(ctxt);
9476: }
9477:
9478: /*
9479: * Second case : a CDSection
9480: */
9481: /* 2.6.0 test was *cur not RAW */
9482: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9483: xmlParseCDSect(ctxt);
9484: }
9485:
9486: /*
9487: * Third case : a comment
9488: */
9489: else if ((*cur == '<') && (NXT(1) == '!') &&
9490: (NXT(2) == '-') && (NXT(3) == '-')) {
9491: xmlParseComment(ctxt);
9492: ctxt->instate = XML_PARSER_CONTENT;
9493: }
9494:
9495: /*
9496: * Fourth case : a sub-element.
9497: */
9498: else if (*cur == '<') {
9499: xmlParseElement(ctxt);
9500: }
9501:
9502: /*
9503: * Fifth case : a reference. If if has not been resolved,
9504: * parsing returns it's Name, create the node
9505: */
9506:
9507: else if (*cur == '&') {
9508: xmlParseReference(ctxt);
9509: }
9510:
9511: /*
9512: * Last case, text. Note that References are handled directly.
9513: */
9514: else {
9515: xmlParseCharData(ctxt, 0);
9516: }
9517:
9518: GROW;
9519: /*
9520: * Pop-up of finished entities.
9521: */
9522: while ((RAW == 0) && (ctxt->inputNr > 1))
9523: xmlPopInput(ctxt);
9524: SHRINK;
9525:
9526: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9527: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9528: "detected an error in element content\n");
9529: ctxt->instate = XML_PARSER_EOF;
9530: break;
9531: }
9532: }
9533: }
9534:
9535: /**
9536: * xmlParseElement:
9537: * @ctxt: an XML parser context
9538: *
9539: * parse an XML element, this is highly recursive
9540: *
9541: * [39] element ::= EmptyElemTag | STag content ETag
9542: *
9543: * [ WFC: Element Type Match ]
9544: * The Name in an element's end-tag must match the element type in the
9545: * start-tag.
9546: *
9547: */
9548:
9549: void
9550: xmlParseElement(xmlParserCtxtPtr ctxt) {
9551: const xmlChar *name;
9552: const xmlChar *prefix = NULL;
9553: const xmlChar *URI = NULL;
9554: xmlParserNodeInfo node_info;
9555: int line, tlen;
9556: xmlNodePtr ret;
9557: int nsNr = ctxt->nsNr;
9558:
9559: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9560: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9561: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9562: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9563: xmlParserMaxDepth);
9564: ctxt->instate = XML_PARSER_EOF;
9565: return;
9566: }
9567:
9568: /* Capture start position */
9569: if (ctxt->record_info) {
9570: node_info.begin_pos = ctxt->input->consumed +
9571: (CUR_PTR - ctxt->input->base);
9572: node_info.begin_line = ctxt->input->line;
9573: }
9574:
9575: if (ctxt->spaceNr == 0)
9576: spacePush(ctxt, -1);
9577: else if (*ctxt->space == -2)
9578: spacePush(ctxt, -1);
9579: else
9580: spacePush(ctxt, *ctxt->space);
9581:
9582: line = ctxt->input->line;
9583: #ifdef LIBXML_SAX1_ENABLED
9584: if (ctxt->sax2)
9585: #endif /* LIBXML_SAX1_ENABLED */
9586: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9587: #ifdef LIBXML_SAX1_ENABLED
9588: else
9589: name = xmlParseStartTag(ctxt);
9590: #endif /* LIBXML_SAX1_ENABLED */
9591: if (name == NULL) {
9592: spacePop(ctxt);
9593: return;
9594: }
9595: namePush(ctxt, name);
9596: ret = ctxt->node;
9597:
9598: #ifdef LIBXML_VALID_ENABLED
9599: /*
9600: * [ VC: Root Element Type ]
9601: * The Name in the document type declaration must match the element
9602: * type of the root element.
9603: */
9604: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9605: ctxt->node && (ctxt->node == ctxt->myDoc->children))
9606: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9607: #endif /* LIBXML_VALID_ENABLED */
9608:
9609: /*
9610: * Check for an Empty Element.
9611: */
9612: if ((RAW == '/') && (NXT(1) == '>')) {
9613: SKIP(2);
9614: if (ctxt->sax2) {
9615: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9616: (!ctxt->disableSAX))
9617: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9618: #ifdef LIBXML_SAX1_ENABLED
9619: } else {
9620: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9621: (!ctxt->disableSAX))
9622: ctxt->sax->endElement(ctxt->userData, name);
9623: #endif /* LIBXML_SAX1_ENABLED */
9624: }
9625: namePop(ctxt);
9626: spacePop(ctxt);
9627: if (nsNr != ctxt->nsNr)
9628: nsPop(ctxt, ctxt->nsNr - nsNr);
9629: if ( ret != NULL && ctxt->record_info ) {
9630: node_info.end_pos = ctxt->input->consumed +
9631: (CUR_PTR - ctxt->input->base);
9632: node_info.end_line = ctxt->input->line;
9633: node_info.node = ret;
9634: xmlParserAddNodeInfo(ctxt, &node_info);
9635: }
9636: return;
9637: }
9638: if (RAW == '>') {
9639: NEXT1;
9640: } else {
9641: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9642: "Couldn't find end of Start Tag %s line %d\n",
9643: name, line, NULL);
9644:
9645: /*
9646: * end of parsing of this node.
9647: */
9648: nodePop(ctxt);
9649: namePop(ctxt);
9650: spacePop(ctxt);
9651: if (nsNr != ctxt->nsNr)
9652: nsPop(ctxt, ctxt->nsNr - nsNr);
9653:
9654: /*
9655: * Capture end position and add node
9656: */
9657: if ( ret != NULL && ctxt->record_info ) {
9658: node_info.end_pos = ctxt->input->consumed +
9659: (CUR_PTR - ctxt->input->base);
9660: node_info.end_line = ctxt->input->line;
9661: node_info.node = ret;
9662: xmlParserAddNodeInfo(ctxt, &node_info);
9663: }
9664: return;
9665: }
9666:
9667: /*
9668: * Parse the content of the element:
9669: */
9670: xmlParseContent(ctxt);
9671: if (!IS_BYTE_CHAR(RAW)) {
9672: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9673: "Premature end of data in tag %s line %d\n",
9674: name, line, NULL);
9675:
9676: /*
9677: * end of parsing of this node.
9678: */
9679: nodePop(ctxt);
9680: namePop(ctxt);
9681: spacePop(ctxt);
9682: if (nsNr != ctxt->nsNr)
9683: nsPop(ctxt, ctxt->nsNr - nsNr);
9684: return;
9685: }
9686:
9687: /*
9688: * parse the end of tag: '</' should be here.
9689: */
9690: if (ctxt->sax2) {
9691: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9692: namePop(ctxt);
9693: }
9694: #ifdef LIBXML_SAX1_ENABLED
9695: else
9696: xmlParseEndTag1(ctxt, line);
9697: #endif /* LIBXML_SAX1_ENABLED */
9698:
9699: /*
9700: * Capture end position and add node
9701: */
9702: if ( ret != NULL && ctxt->record_info ) {
9703: node_info.end_pos = ctxt->input->consumed +
9704: (CUR_PTR - ctxt->input->base);
9705: node_info.end_line = ctxt->input->line;
9706: node_info.node = ret;
9707: xmlParserAddNodeInfo(ctxt, &node_info);
9708: }
9709: }
9710:
9711: /**
9712: * xmlParseVersionNum:
9713: * @ctxt: an XML parser context
9714: *
9715: * parse the XML version value.
9716: *
9717: * [26] VersionNum ::= '1.' [0-9]+
9718: *
9719: * In practice allow [0-9].[0-9]+ at that level
9720: *
9721: * Returns the string giving the XML version number, or NULL
9722: */
9723: xmlChar *
9724: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9725: xmlChar *buf = NULL;
9726: int len = 0;
9727: int size = 10;
9728: xmlChar cur;
9729:
9730: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9731: if (buf == NULL) {
9732: xmlErrMemory(ctxt, NULL);
9733: return(NULL);
9734: }
9735: cur = CUR;
9736: if (!((cur >= '0') && (cur <= '9'))) {
9737: xmlFree(buf);
9738: return(NULL);
9739: }
9740: buf[len++] = cur;
9741: NEXT;
9742: cur=CUR;
9743: if (cur != '.') {
9744: xmlFree(buf);
9745: return(NULL);
9746: }
9747: buf[len++] = cur;
9748: NEXT;
9749: cur=CUR;
9750: while ((cur >= '0') && (cur <= '9')) {
9751: if (len + 1 >= size) {
9752: xmlChar *tmp;
9753:
9754: size *= 2;
9755: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9756: if (tmp == NULL) {
9757: xmlFree(buf);
9758: xmlErrMemory(ctxt, NULL);
9759: return(NULL);
9760: }
9761: buf = tmp;
9762: }
9763: buf[len++] = cur;
9764: NEXT;
9765: cur=CUR;
9766: }
9767: buf[len] = 0;
9768: return(buf);
9769: }
9770:
9771: /**
9772: * xmlParseVersionInfo:
9773: * @ctxt: an XML parser context
9774: *
9775: * parse the XML version.
9776: *
9777: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9778: *
9779: * [25] Eq ::= S? '=' S?
9780: *
9781: * Returns the version string, e.g. "1.0"
9782: */
9783:
9784: xmlChar *
9785: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9786: xmlChar *version = NULL;
9787:
9788: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9789: SKIP(7);
9790: SKIP_BLANKS;
9791: if (RAW != '=') {
9792: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9793: return(NULL);
9794: }
9795: NEXT;
9796: SKIP_BLANKS;
9797: if (RAW == '"') {
9798: NEXT;
9799: version = xmlParseVersionNum(ctxt);
9800: if (RAW != '"') {
9801: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9802: } else
9803: NEXT;
9804: } else if (RAW == '\''){
9805: NEXT;
9806: version = xmlParseVersionNum(ctxt);
9807: if (RAW != '\'') {
9808: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9809: } else
9810: NEXT;
9811: } else {
9812: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9813: }
9814: }
9815: return(version);
9816: }
9817:
9818: /**
9819: * xmlParseEncName:
9820: * @ctxt: an XML parser context
9821: *
9822: * parse the XML encoding name
9823: *
9824: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9825: *
9826: * Returns the encoding name value or NULL
9827: */
9828: xmlChar *
9829: xmlParseEncName(xmlParserCtxtPtr ctxt) {
9830: xmlChar *buf = NULL;
9831: int len = 0;
9832: int size = 10;
9833: xmlChar cur;
9834:
9835: cur = CUR;
9836: if (((cur >= 'a') && (cur <= 'z')) ||
9837: ((cur >= 'A') && (cur <= 'Z'))) {
9838: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9839: if (buf == NULL) {
9840: xmlErrMemory(ctxt, NULL);
9841: return(NULL);
9842: }
9843:
9844: buf[len++] = cur;
9845: NEXT;
9846: cur = CUR;
9847: while (((cur >= 'a') && (cur <= 'z')) ||
9848: ((cur >= 'A') && (cur <= 'Z')) ||
9849: ((cur >= '0') && (cur <= '9')) ||
9850: (cur == '.') || (cur == '_') ||
9851: (cur == '-')) {
9852: if (len + 1 >= size) {
9853: xmlChar *tmp;
9854:
9855: size *= 2;
9856: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9857: if (tmp == NULL) {
9858: xmlErrMemory(ctxt, NULL);
9859: xmlFree(buf);
9860: return(NULL);
9861: }
9862: buf = tmp;
9863: }
9864: buf[len++] = cur;
9865: NEXT;
9866: cur = CUR;
9867: if (cur == 0) {
9868: SHRINK;
9869: GROW;
9870: cur = CUR;
9871: }
9872: }
9873: buf[len] = 0;
9874: } else {
9875: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9876: }
9877: return(buf);
9878: }
9879:
9880: /**
9881: * xmlParseEncodingDecl:
9882: * @ctxt: an XML parser context
9883: *
9884: * parse the XML encoding declaration
9885: *
9886: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9887: *
9888: * this setups the conversion filters.
9889: *
9890: * Returns the encoding value or NULL
9891: */
9892:
9893: const xmlChar *
9894: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9895: xmlChar *encoding = NULL;
9896:
9897: SKIP_BLANKS;
9898: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9899: SKIP(8);
9900: SKIP_BLANKS;
9901: if (RAW != '=') {
9902: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9903: return(NULL);
9904: }
9905: NEXT;
9906: SKIP_BLANKS;
9907: if (RAW == '"') {
9908: NEXT;
9909: encoding = xmlParseEncName(ctxt);
9910: if (RAW != '"') {
9911: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9912: } else
9913: NEXT;
9914: } else if (RAW == '\''){
9915: NEXT;
9916: encoding = xmlParseEncName(ctxt);
9917: if (RAW != '\'') {
9918: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9919: } else
9920: NEXT;
9921: } else {
9922: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9923: }
9924: /*
9925: * UTF-16 encoding stwich has already taken place at this stage,
9926: * more over the little-endian/big-endian selection is already done
9927: */
9928: if ((encoding != NULL) &&
9929: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9930: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9931: /*
9932: * If no encoding was passed to the parser, that we are
9933: * using UTF-16 and no decoder is present i.e. the
9934: * document is apparently UTF-8 compatible, then raise an
9935: * encoding mismatch fatal error
9936: */
9937: if ((ctxt->encoding == NULL) &&
9938: (ctxt->input->buf != NULL) &&
9939: (ctxt->input->buf->encoder == NULL)) {
9940: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9941: "Document labelled UTF-16 but has UTF-8 content\n");
9942: }
9943: if (ctxt->encoding != NULL)
9944: xmlFree((xmlChar *) ctxt->encoding);
9945: ctxt->encoding = encoding;
9946: }
9947: /*
9948: * UTF-8 encoding is handled natively
9949: */
9950: else if ((encoding != NULL) &&
9951: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9952: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9953: if (ctxt->encoding != NULL)
9954: xmlFree((xmlChar *) ctxt->encoding);
9955: ctxt->encoding = encoding;
9956: }
9957: else if (encoding != NULL) {
9958: xmlCharEncodingHandlerPtr handler;
9959:
9960: if (ctxt->input->encoding != NULL)
9961: xmlFree((xmlChar *) ctxt->input->encoding);
9962: ctxt->input->encoding = encoding;
9963:
9964: handler = xmlFindCharEncodingHandler((const char *) encoding);
9965: if (handler != NULL) {
9966: xmlSwitchToEncoding(ctxt, handler);
9967: } else {
9968: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9969: "Unsupported encoding %s\n", encoding);
9970: return(NULL);
9971: }
9972: }
9973: }
9974: return(encoding);
9975: }
9976:
9977: /**
9978: * xmlParseSDDecl:
9979: * @ctxt: an XML parser context
9980: *
9981: * parse the XML standalone declaration
9982: *
9983: * [32] SDDecl ::= S 'standalone' Eq
9984: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9985: *
9986: * [ VC: Standalone Document Declaration ]
9987: * TODO The standalone document declaration must have the value "no"
9988: * if any external markup declarations contain declarations of:
9989: * - attributes with default values, if elements to which these
9990: * attributes apply appear in the document without specifications
9991: * of values for these attributes, or
9992: * - entities (other than amp, lt, gt, apos, quot), if references
9993: * to those entities appear in the document, or
9994: * - attributes with values subject to normalization, where the
9995: * attribute appears in the document with a value which will change
9996: * as a result of normalization, or
9997: * - element types with element content, if white space occurs directly
9998: * within any instance of those types.
9999: *
10000: * Returns:
10001: * 1 if standalone="yes"
10002: * 0 if standalone="no"
10003: * -2 if standalone attribute is missing or invalid
10004: * (A standalone value of -2 means that the XML declaration was found,
10005: * but no value was specified for the standalone attribute).
10006: */
10007:
10008: int
10009: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10010: int standalone = -2;
10011:
10012: SKIP_BLANKS;
10013: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10014: SKIP(10);
10015: SKIP_BLANKS;
10016: if (RAW != '=') {
10017: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10018: return(standalone);
10019: }
10020: NEXT;
10021: SKIP_BLANKS;
10022: if (RAW == '\''){
10023: NEXT;
10024: if ((RAW == 'n') && (NXT(1) == 'o')) {
10025: standalone = 0;
10026: SKIP(2);
10027: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10028: (NXT(2) == 's')) {
10029: standalone = 1;
10030: SKIP(3);
10031: } else {
10032: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10033: }
10034: if (RAW != '\'') {
10035: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10036: } else
10037: NEXT;
10038: } else if (RAW == '"'){
10039: NEXT;
10040: if ((RAW == 'n') && (NXT(1) == 'o')) {
10041: standalone = 0;
10042: SKIP(2);
10043: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10044: (NXT(2) == 's')) {
10045: standalone = 1;
10046: SKIP(3);
10047: } else {
10048: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10049: }
10050: if (RAW != '"') {
10051: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10052: } else
10053: NEXT;
10054: } else {
10055: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10056: }
10057: }
10058: return(standalone);
10059: }
10060:
10061: /**
10062: * xmlParseXMLDecl:
10063: * @ctxt: an XML parser context
10064: *
10065: * parse an XML declaration header
10066: *
10067: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10068: */
10069:
10070: void
10071: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10072: xmlChar *version;
10073:
10074: /*
10075: * This value for standalone indicates that the document has an
10076: * XML declaration but it does not have a standalone attribute.
10077: * It will be overwritten later if a standalone attribute is found.
10078: */
10079: ctxt->input->standalone = -2;
10080:
10081: /*
10082: * We know that '<?xml' is here.
10083: */
10084: SKIP(5);
10085:
10086: if (!IS_BLANK_CH(RAW)) {
10087: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10088: "Blank needed after '<?xml'\n");
10089: }
10090: SKIP_BLANKS;
10091:
10092: /*
10093: * We must have the VersionInfo here.
10094: */
10095: version = xmlParseVersionInfo(ctxt);
10096: if (version == NULL) {
10097: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10098: } else {
10099: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10100: /*
10101: * Changed here for XML-1.0 5th edition
10102: */
10103: if (ctxt->options & XML_PARSE_OLD10) {
10104: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10105: "Unsupported version '%s'\n",
10106: version);
10107: } else {
10108: if ((version[0] == '1') && ((version[1] == '.'))) {
10109: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10110: "Unsupported version '%s'\n",
10111: version, NULL);
10112: } else {
10113: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10114: "Unsupported version '%s'\n",
10115: version);
10116: }
10117: }
10118: }
10119: if (ctxt->version != NULL)
10120: xmlFree((void *) ctxt->version);
10121: ctxt->version = version;
10122: }
10123:
10124: /*
10125: * We may have the encoding declaration
10126: */
10127: if (!IS_BLANK_CH(RAW)) {
10128: if ((RAW == '?') && (NXT(1) == '>')) {
10129: SKIP(2);
10130: return;
10131: }
10132: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10133: }
10134: xmlParseEncodingDecl(ctxt);
10135: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10136: /*
10137: * The XML REC instructs us to stop parsing right here
10138: */
10139: return;
10140: }
10141:
10142: /*
10143: * We may have the standalone status.
10144: */
10145: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10146: if ((RAW == '?') && (NXT(1) == '>')) {
10147: SKIP(2);
10148: return;
10149: }
10150: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10151: }
10152:
10153: /*
10154: * We can grow the input buffer freely at that point
10155: */
10156: GROW;
10157:
10158: SKIP_BLANKS;
10159: ctxt->input->standalone = xmlParseSDDecl(ctxt);
10160:
10161: SKIP_BLANKS;
10162: if ((RAW == '?') && (NXT(1) == '>')) {
10163: SKIP(2);
10164: } else if (RAW == '>') {
10165: /* Deprecated old WD ... */
10166: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10167: NEXT;
10168: } else {
10169: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10170: MOVETO_ENDTAG(CUR_PTR);
10171: NEXT;
10172: }
10173: }
10174:
10175: /**
10176: * xmlParseMisc:
10177: * @ctxt: an XML parser context
10178: *
10179: * parse an XML Misc* optional field.
10180: *
10181: * [27] Misc ::= Comment | PI | S
10182: */
10183:
10184: void
10185: xmlParseMisc(xmlParserCtxtPtr ctxt) {
10186: while (((RAW == '<') && (NXT(1) == '?')) ||
10187: (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10188: IS_BLANK_CH(CUR)) {
10189: if ((RAW == '<') && (NXT(1) == '?')) {
10190: xmlParsePI(ctxt);
10191: } else if (IS_BLANK_CH(CUR)) {
10192: NEXT;
10193: } else
10194: xmlParseComment(ctxt);
10195: }
10196: }
10197:
10198: /**
10199: * xmlParseDocument:
10200: * @ctxt: an XML parser context
10201: *
10202: * parse an XML document (and build a tree if using the standard SAX
10203: * interface).
10204: *
10205: * [1] document ::= prolog element Misc*
10206: *
10207: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10208: *
10209: * Returns 0, -1 in case of error. the parser context is augmented
10210: * as a result of the parsing.
10211: */
10212:
10213: int
10214: xmlParseDocument(xmlParserCtxtPtr ctxt) {
10215: xmlChar start[4];
10216: xmlCharEncoding enc;
10217:
10218: xmlInitParser();
10219:
10220: if ((ctxt == NULL) || (ctxt->input == NULL))
10221: return(-1);
10222:
10223: GROW;
10224:
10225: /*
10226: * SAX: detecting the level.
10227: */
10228: xmlDetectSAX2(ctxt);
10229:
10230: /*
10231: * SAX: beginning of the document processing.
10232: */
10233: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10234: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10235:
10236: if ((ctxt->encoding == NULL) &&
10237: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10238: /*
10239: * Get the 4 first bytes and decode the charset
10240: * if enc != XML_CHAR_ENCODING_NONE
10241: * plug some encoding conversion routines.
10242: */
10243: start[0] = RAW;
10244: start[1] = NXT(1);
10245: start[2] = NXT(2);
10246: start[3] = NXT(3);
10247: enc = xmlDetectCharEncoding(&start[0], 4);
10248: if (enc != XML_CHAR_ENCODING_NONE) {
10249: xmlSwitchEncoding(ctxt, enc);
10250: }
10251: }
10252:
10253:
10254: if (CUR == 0) {
10255: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10256: }
10257:
10258: /*
10259: * Check for the XMLDecl in the Prolog.
10260: * do not GROW here to avoid the detected encoder to decode more
10261: * than just the first line, unless the amount of data is really
10262: * too small to hold "<?xml version="1.0" encoding="foo"
10263: */
10264: if ((ctxt->input->end - ctxt->input->cur) < 35) {
10265: GROW;
10266: }
10267: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10268:
10269: /*
10270: * Note that we will switch encoding on the fly.
10271: */
10272: xmlParseXMLDecl(ctxt);
10273: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10274: /*
10275: * The XML REC instructs us to stop parsing right here
10276: */
10277: return(-1);
10278: }
10279: ctxt->standalone = ctxt->input->standalone;
10280: SKIP_BLANKS;
10281: } else {
10282: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10283: }
10284: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10285: ctxt->sax->startDocument(ctxt->userData);
10286:
10287: /*
10288: * The Misc part of the Prolog
10289: */
10290: GROW;
10291: xmlParseMisc(ctxt);
10292:
10293: /*
10294: * Then possibly doc type declaration(s) and more Misc
10295: * (doctypedecl Misc*)?
10296: */
10297: GROW;
10298: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10299:
10300: ctxt->inSubset = 1;
10301: xmlParseDocTypeDecl(ctxt);
10302: if (RAW == '[') {
10303: ctxt->instate = XML_PARSER_DTD;
10304: xmlParseInternalSubset(ctxt);
10305: }
10306:
10307: /*
10308: * Create and update the external subset.
10309: */
10310: ctxt->inSubset = 2;
10311: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10312: (!ctxt->disableSAX))
10313: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10314: ctxt->extSubSystem, ctxt->extSubURI);
10315: ctxt->inSubset = 0;
10316:
10317: xmlCleanSpecialAttr(ctxt);
10318:
10319: ctxt->instate = XML_PARSER_PROLOG;
10320: xmlParseMisc(ctxt);
10321: }
10322:
10323: /*
10324: * Time to start parsing the tree itself
10325: */
10326: GROW;
10327: if (RAW != '<') {
10328: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10329: "Start tag expected, '<' not found\n");
10330: } else {
10331: ctxt->instate = XML_PARSER_CONTENT;
10332: xmlParseElement(ctxt);
10333: ctxt->instate = XML_PARSER_EPILOG;
10334:
10335:
10336: /*
10337: * The Misc part at the end
10338: */
10339: xmlParseMisc(ctxt);
10340:
10341: if (RAW != 0) {
10342: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10343: }
10344: ctxt->instate = XML_PARSER_EOF;
10345: }
10346:
10347: /*
10348: * SAX: end of the document processing.
10349: */
10350: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10351: ctxt->sax->endDocument(ctxt->userData);
10352:
10353: /*
10354: * Remove locally kept entity definitions if the tree was not built
10355: */
10356: if ((ctxt->myDoc != NULL) &&
10357: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10358: xmlFreeDoc(ctxt->myDoc);
10359: ctxt->myDoc = NULL;
10360: }
10361:
10362: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10363: ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10364: if (ctxt->valid)
10365: ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10366: if (ctxt->nsWellFormed)
10367: ctxt->myDoc->properties |= XML_DOC_NSVALID;
10368: if (ctxt->options & XML_PARSE_OLD10)
10369: ctxt->myDoc->properties |= XML_DOC_OLD10;
10370: }
10371: if (! ctxt->wellFormed) {
10372: ctxt->valid = 0;
10373: return(-1);
10374: }
10375: return(0);
10376: }
10377:
10378: /**
10379: * xmlParseExtParsedEnt:
10380: * @ctxt: an XML parser context
10381: *
10382: * parse a general parsed entity
10383: * An external general parsed entity is well-formed if it matches the
10384: * production labeled extParsedEnt.
10385: *
10386: * [78] extParsedEnt ::= TextDecl? content
10387: *
10388: * Returns 0, -1 in case of error. the parser context is augmented
10389: * as a result of the parsing.
10390: */
10391:
10392: int
10393: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10394: xmlChar start[4];
10395: xmlCharEncoding enc;
10396:
10397: if ((ctxt == NULL) || (ctxt->input == NULL))
10398: return(-1);
10399:
10400: xmlDefaultSAXHandlerInit();
10401:
10402: xmlDetectSAX2(ctxt);
10403:
10404: GROW;
10405:
10406: /*
10407: * SAX: beginning of the document processing.
10408: */
10409: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10410: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10411:
10412: /*
10413: * Get the 4 first bytes and decode the charset
10414: * if enc != XML_CHAR_ENCODING_NONE
10415: * plug some encoding conversion routines.
10416: */
10417: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10418: start[0] = RAW;
10419: start[1] = NXT(1);
10420: start[2] = NXT(2);
10421: start[3] = NXT(3);
10422: enc = xmlDetectCharEncoding(start, 4);
10423: if (enc != XML_CHAR_ENCODING_NONE) {
10424: xmlSwitchEncoding(ctxt, enc);
10425: }
10426: }
10427:
10428:
10429: if (CUR == 0) {
10430: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10431: }
10432:
10433: /*
10434: * Check for the XMLDecl in the Prolog.
10435: */
10436: GROW;
10437: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10438:
10439: /*
10440: * Note that we will switch encoding on the fly.
10441: */
10442: xmlParseXMLDecl(ctxt);
10443: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10444: /*
10445: * The XML REC instructs us to stop parsing right here
10446: */
10447: return(-1);
10448: }
10449: SKIP_BLANKS;
10450: } else {
10451: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10452: }
10453: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10454: ctxt->sax->startDocument(ctxt->userData);
10455:
10456: /*
10457: * Doing validity checking on chunk doesn't make sense
10458: */
10459: ctxt->instate = XML_PARSER_CONTENT;
10460: ctxt->validate = 0;
10461: ctxt->loadsubset = 0;
10462: ctxt->depth = 0;
10463:
10464: xmlParseContent(ctxt);
10465:
10466: if ((RAW == '<') && (NXT(1) == '/')) {
10467: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10468: } else if (RAW != 0) {
10469: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10470: }
10471:
10472: /*
10473: * SAX: end of the document processing.
10474: */
10475: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10476: ctxt->sax->endDocument(ctxt->userData);
10477:
10478: if (! ctxt->wellFormed) return(-1);
10479: return(0);
10480: }
10481:
10482: #ifdef LIBXML_PUSH_ENABLED
10483: /************************************************************************
10484: * *
10485: * Progressive parsing interfaces *
10486: * *
10487: ************************************************************************/
10488:
10489: /**
10490: * xmlParseLookupSequence:
10491: * @ctxt: an XML parser context
10492: * @first: the first char to lookup
10493: * @next: the next char to lookup or zero
10494: * @third: the next char to lookup or zero
10495: *
10496: * Try to find if a sequence (first, next, third) or just (first next) or
10497: * (first) is available in the input stream.
10498: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499: * to avoid rescanning sequences of bytes, it DOES change the state of the
10500: * parser, do not use liberally.
10501: *
10502: * Returns the index to the current parsing point if the full sequence
10503: * is available, -1 otherwise.
10504: */
10505: static int
10506: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507: xmlChar next, xmlChar third) {
10508: int base, len;
10509: xmlParserInputPtr in;
10510: const xmlChar *buf;
10511:
10512: in = ctxt->input;
10513: if (in == NULL) return(-1);
10514: base = in->cur - in->base;
10515: if (base < 0) return(-1);
10516: if (ctxt->checkIndex > base)
10517: base = ctxt->checkIndex;
10518: if (in->buf == NULL) {
10519: buf = in->base;
10520: len = in->length;
10521: } else {
10522: buf = in->buf->buffer->content;
10523: len = in->buf->buffer->use;
10524: }
10525: /* take into account the sequence length */
10526: if (third) len -= 2;
10527: else if (next) len --;
10528: for (;base < len;base++) {
10529: if (buf[base] == first) {
10530: if (third != 0) {
10531: if ((buf[base + 1] != next) ||
10532: (buf[base + 2] != third)) continue;
10533: } else if (next != 0) {
10534: if (buf[base + 1] != next) continue;
10535: }
10536: ctxt->checkIndex = 0;
10537: #ifdef DEBUG_PUSH
10538: if (next == 0)
10539: xmlGenericError(xmlGenericErrorContext,
10540: "PP: lookup '%c' found at %d\n",
10541: first, base);
10542: else if (third == 0)
10543: xmlGenericError(xmlGenericErrorContext,
10544: "PP: lookup '%c%c' found at %d\n",
10545: first, next, base);
10546: else
10547: xmlGenericError(xmlGenericErrorContext,
10548: "PP: lookup '%c%c%c' found at %d\n",
10549: first, next, third, base);
10550: #endif
10551: return(base - (in->cur - in->base));
10552: }
10553: }
10554: ctxt->checkIndex = base;
10555: #ifdef DEBUG_PUSH
10556: if (next == 0)
10557: xmlGenericError(xmlGenericErrorContext,
10558: "PP: lookup '%c' failed\n", first);
10559: else if (third == 0)
10560: xmlGenericError(xmlGenericErrorContext,
10561: "PP: lookup '%c%c' failed\n", first, next);
10562: else
10563: xmlGenericError(xmlGenericErrorContext,
10564: "PP: lookup '%c%c%c' failed\n", first, next, third);
10565: #endif
10566: return(-1);
10567: }
10568:
10569: /**
10570: * xmlParseGetLasts:
10571: * @ctxt: an XML parser context
10572: * @lastlt: pointer to store the last '<' from the input
10573: * @lastgt: pointer to store the last '>' from the input
10574: *
10575: * Lookup the last < and > in the current chunk
10576: */
10577: static void
10578: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579: const xmlChar **lastgt) {
10580: const xmlChar *tmp;
10581:
10582: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583: xmlGenericError(xmlGenericErrorContext,
10584: "Internal error: xmlParseGetLasts\n");
10585: return;
10586: }
10587: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10588: tmp = ctxt->input->end;
10589: tmp--;
10590: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10591: if (tmp < ctxt->input->base) {
10592: *lastlt = NULL;
10593: *lastgt = NULL;
10594: } else {
10595: *lastlt = tmp;
10596: tmp++;
10597: while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598: if (*tmp == '\'') {
10599: tmp++;
10600: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601: if (tmp < ctxt->input->end) tmp++;
10602: } else if (*tmp == '"') {
10603: tmp++;
10604: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605: if (tmp < ctxt->input->end) tmp++;
10606: } else
10607: tmp++;
10608: }
10609: if (tmp < ctxt->input->end)
10610: *lastgt = tmp;
10611: else {
10612: tmp = *lastlt;
10613: tmp--;
10614: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615: if (tmp >= ctxt->input->base)
10616: *lastgt = tmp;
10617: else
10618: *lastgt = NULL;
10619: }
10620: }
10621: } else {
10622: *lastlt = NULL;
10623: *lastgt = NULL;
10624: }
10625: }
10626: /**
10627: * xmlCheckCdataPush:
10628: * @cur: pointer to the bock of characters
10629: * @len: length of the block in bytes
10630: *
10631: * Check that the block of characters is okay as SCdata content [20]
10632: *
10633: * Returns the number of bytes to pass if okay, a negative index where an
10634: * UTF-8 error occured otherwise
10635: */
10636: static int
10637: xmlCheckCdataPush(const xmlChar *utf, int len) {
10638: int ix;
10639: unsigned char c;
10640: int codepoint;
10641:
10642: if ((utf == NULL) || (len <= 0))
10643: return(0);
10644:
10645: for (ix = 0; ix < len;) { /* string is 0-terminated */
10646: c = utf[ix];
10647: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10648: if (c >= 0x20)
10649: ix++;
10650: else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10651: ix++;
10652: else
10653: return(-ix);
10654: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655: if (ix + 2 > len) return(ix);
10656: if ((utf[ix+1] & 0xc0 ) != 0x80)
10657: return(-ix);
10658: codepoint = (utf[ix] & 0x1f) << 6;
10659: codepoint |= utf[ix+1] & 0x3f;
10660: if (!xmlIsCharQ(codepoint))
10661: return(-ix);
10662: ix += 2;
10663: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664: if (ix + 3 > len) return(ix);
10665: if (((utf[ix+1] & 0xc0) != 0x80) ||
10666: ((utf[ix+2] & 0xc0) != 0x80))
10667: return(-ix);
10668: codepoint = (utf[ix] & 0xf) << 12;
10669: codepoint |= (utf[ix+1] & 0x3f) << 6;
10670: codepoint |= utf[ix+2] & 0x3f;
10671: if (!xmlIsCharQ(codepoint))
10672: return(-ix);
10673: ix += 3;
10674: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675: if (ix + 4 > len) return(ix);
10676: if (((utf[ix+1] & 0xc0) != 0x80) ||
10677: ((utf[ix+2] & 0xc0) != 0x80) ||
10678: ((utf[ix+3] & 0xc0) != 0x80))
10679: return(-ix);
10680: codepoint = (utf[ix] & 0x7) << 18;
10681: codepoint |= (utf[ix+1] & 0x3f) << 12;
10682: codepoint |= (utf[ix+2] & 0x3f) << 6;
10683: codepoint |= utf[ix+3] & 0x3f;
10684: if (!xmlIsCharQ(codepoint))
10685: return(-ix);
10686: ix += 4;
10687: } else /* unknown encoding */
10688: return(-ix);
10689: }
10690: return(ix);
10691: }
10692:
10693: /**
10694: * xmlParseTryOrFinish:
10695: * @ctxt: an XML parser context
10696: * @terminate: last chunk indicator
10697: *
10698: * Try to progress on parsing
10699: *
10700: * Returns zero if no parsing was possible
10701: */
10702: static int
10703: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10704: int ret = 0;
10705: int avail, tlen;
10706: xmlChar cur, next;
10707: const xmlChar *lastlt, *lastgt;
10708:
10709: if (ctxt->input == NULL)
10710: return(0);
10711:
10712: #ifdef DEBUG_PUSH
10713: switch (ctxt->instate) {
10714: case XML_PARSER_EOF:
10715: xmlGenericError(xmlGenericErrorContext,
10716: "PP: try EOF\n"); break;
10717: case XML_PARSER_START:
10718: xmlGenericError(xmlGenericErrorContext,
10719: "PP: try START\n"); break;
10720: case XML_PARSER_MISC:
10721: xmlGenericError(xmlGenericErrorContext,
10722: "PP: try MISC\n");break;
10723: case XML_PARSER_COMMENT:
10724: xmlGenericError(xmlGenericErrorContext,
10725: "PP: try COMMENT\n");break;
10726: case XML_PARSER_PROLOG:
10727: xmlGenericError(xmlGenericErrorContext,
10728: "PP: try PROLOG\n");break;
10729: case XML_PARSER_START_TAG:
10730: xmlGenericError(xmlGenericErrorContext,
10731: "PP: try START_TAG\n");break;
10732: case XML_PARSER_CONTENT:
10733: xmlGenericError(xmlGenericErrorContext,
10734: "PP: try CONTENT\n");break;
10735: case XML_PARSER_CDATA_SECTION:
10736: xmlGenericError(xmlGenericErrorContext,
10737: "PP: try CDATA_SECTION\n");break;
10738: case XML_PARSER_END_TAG:
10739: xmlGenericError(xmlGenericErrorContext,
10740: "PP: try END_TAG\n");break;
10741: case XML_PARSER_ENTITY_DECL:
10742: xmlGenericError(xmlGenericErrorContext,
10743: "PP: try ENTITY_DECL\n");break;
10744: case XML_PARSER_ENTITY_VALUE:
10745: xmlGenericError(xmlGenericErrorContext,
10746: "PP: try ENTITY_VALUE\n");break;
10747: case XML_PARSER_ATTRIBUTE_VALUE:
10748: xmlGenericError(xmlGenericErrorContext,
10749: "PP: try ATTRIBUTE_VALUE\n");break;
10750: case XML_PARSER_DTD:
10751: xmlGenericError(xmlGenericErrorContext,
10752: "PP: try DTD\n");break;
10753: case XML_PARSER_EPILOG:
10754: xmlGenericError(xmlGenericErrorContext,
10755: "PP: try EPILOG\n");break;
10756: case XML_PARSER_PI:
10757: xmlGenericError(xmlGenericErrorContext,
10758: "PP: try PI\n");break;
10759: case XML_PARSER_IGNORE:
10760: xmlGenericError(xmlGenericErrorContext,
10761: "PP: try IGNORE\n");break;
10762: }
10763: #endif
10764:
10765: if ((ctxt->input != NULL) &&
10766: (ctxt->input->cur - ctxt->input->base > 4096)) {
10767: xmlSHRINK(ctxt);
10768: ctxt->checkIndex = 0;
10769: }
10770: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10771:
10772: while (1) {
10773: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10774: return(0);
10775:
10776:
10777: /*
10778: * Pop-up of finished entities.
10779: */
10780: while ((RAW == 0) && (ctxt->inputNr > 1))
10781: xmlPopInput(ctxt);
10782:
10783: if (ctxt->input == NULL) break;
10784: if (ctxt->input->buf == NULL)
10785: avail = ctxt->input->length -
10786: (ctxt->input->cur - ctxt->input->base);
10787: else {
10788: /*
10789: * If we are operating on converted input, try to flush
10790: * remainng chars to avoid them stalling in the non-converted
10791: * buffer.
10792: */
10793: if ((ctxt->input->buf->raw != NULL) &&
10794: (ctxt->input->buf->raw->use > 0)) {
10795: int base = ctxt->input->base -
10796: ctxt->input->buf->buffer->content;
10797: int current = ctxt->input->cur - ctxt->input->base;
10798:
10799: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800: ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801: ctxt->input->cur = ctxt->input->base + current;
10802: ctxt->input->end =
10803: &ctxt->input->buf->buffer->content[
10804: ctxt->input->buf->buffer->use];
10805: }
10806: avail = ctxt->input->buf->buffer->use -
10807: (ctxt->input->cur - ctxt->input->base);
10808: }
10809: if (avail < 1)
10810: goto done;
10811: switch (ctxt->instate) {
10812: case XML_PARSER_EOF:
10813: /*
10814: * Document parsing is done !
10815: */
10816: goto done;
10817: case XML_PARSER_START:
10818: if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10819: xmlChar start[4];
10820: xmlCharEncoding enc;
10821:
10822: /*
10823: * Very first chars read from the document flow.
10824: */
10825: if (avail < 4)
10826: goto done;
10827:
10828: /*
10829: * Get the 4 first bytes and decode the charset
10830: * if enc != XML_CHAR_ENCODING_NONE
10831: * plug some encoding conversion routines,
10832: * else xmlSwitchEncoding will set to (default)
10833: * UTF8.
10834: */
10835: start[0] = RAW;
10836: start[1] = NXT(1);
10837: start[2] = NXT(2);
10838: start[3] = NXT(3);
10839: enc = xmlDetectCharEncoding(start, 4);
10840: xmlSwitchEncoding(ctxt, enc);
10841: break;
10842: }
10843:
10844: if (avail < 2)
10845: goto done;
10846: cur = ctxt->input->cur[0];
10847: next = ctxt->input->cur[1];
10848: if (cur == 0) {
10849: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850: ctxt->sax->setDocumentLocator(ctxt->userData,
10851: &xmlDefaultSAXLocator);
10852: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10853: ctxt->instate = XML_PARSER_EOF;
10854: #ifdef DEBUG_PUSH
10855: xmlGenericError(xmlGenericErrorContext,
10856: "PP: entering EOF\n");
10857: #endif
10858: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859: ctxt->sax->endDocument(ctxt->userData);
10860: goto done;
10861: }
10862: if ((cur == '<') && (next == '?')) {
10863: /* PI or XML decl */
10864: if (avail < 5) return(ret);
10865: if ((!terminate) &&
10866: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10867: return(ret);
10868: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869: ctxt->sax->setDocumentLocator(ctxt->userData,
10870: &xmlDefaultSAXLocator);
10871: if ((ctxt->input->cur[2] == 'x') &&
10872: (ctxt->input->cur[3] == 'm') &&
10873: (ctxt->input->cur[4] == 'l') &&
10874: (IS_BLANK_CH(ctxt->input->cur[5]))) {
10875: ret += 5;
10876: #ifdef DEBUG_PUSH
10877: xmlGenericError(xmlGenericErrorContext,
10878: "PP: Parsing XML Decl\n");
10879: #endif
10880: xmlParseXMLDecl(ctxt);
10881: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10882: /*
10883: * The XML REC instructs us to stop parsing right
10884: * here
10885: */
10886: ctxt->instate = XML_PARSER_EOF;
10887: return(0);
10888: }
10889: ctxt->standalone = ctxt->input->standalone;
10890: if ((ctxt->encoding == NULL) &&
10891: (ctxt->input->encoding != NULL))
10892: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894: (!ctxt->disableSAX))
10895: ctxt->sax->startDocument(ctxt->userData);
10896: ctxt->instate = XML_PARSER_MISC;
10897: #ifdef DEBUG_PUSH
10898: xmlGenericError(xmlGenericErrorContext,
10899: "PP: entering MISC\n");
10900: #endif
10901: } else {
10902: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904: (!ctxt->disableSAX))
10905: ctxt->sax->startDocument(ctxt->userData);
10906: ctxt->instate = XML_PARSER_MISC;
10907: #ifdef DEBUG_PUSH
10908: xmlGenericError(xmlGenericErrorContext,
10909: "PP: entering MISC\n");
10910: #endif
10911: }
10912: } else {
10913: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914: ctxt->sax->setDocumentLocator(ctxt->userData,
10915: &xmlDefaultSAXLocator);
10916: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917: if (ctxt->version == NULL) {
10918: xmlErrMemory(ctxt, NULL);
10919: break;
10920: }
10921: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922: (!ctxt->disableSAX))
10923: ctxt->sax->startDocument(ctxt->userData);
10924: ctxt->instate = XML_PARSER_MISC;
10925: #ifdef DEBUG_PUSH
10926: xmlGenericError(xmlGenericErrorContext,
10927: "PP: entering MISC\n");
10928: #endif
10929: }
10930: break;
10931: case XML_PARSER_START_TAG: {
10932: const xmlChar *name;
10933: const xmlChar *prefix = NULL;
10934: const xmlChar *URI = NULL;
10935: int nsNr = ctxt->nsNr;
10936:
10937: if ((avail < 2) && (ctxt->inputNr == 1))
10938: goto done;
10939: cur = ctxt->input->cur[0];
10940: if (cur != '<') {
10941: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10942: ctxt->instate = XML_PARSER_EOF;
10943: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944: ctxt->sax->endDocument(ctxt->userData);
10945: goto done;
10946: }
10947: if (!terminate) {
10948: if (ctxt->progressive) {
10949: /* > can be found unescaped in attribute values */
10950: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10951: goto done;
10952: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10953: goto done;
10954: }
10955: }
10956: if (ctxt->spaceNr == 0)
10957: spacePush(ctxt, -1);
10958: else if (*ctxt->space == -2)
10959: spacePush(ctxt, -1);
10960: else
10961: spacePush(ctxt, *ctxt->space);
10962: #ifdef LIBXML_SAX1_ENABLED
10963: if (ctxt->sax2)
10964: #endif /* LIBXML_SAX1_ENABLED */
10965: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10966: #ifdef LIBXML_SAX1_ENABLED
10967: else
10968: name = xmlParseStartTag(ctxt);
10969: #endif /* LIBXML_SAX1_ENABLED */
10970: if (name == NULL) {
10971: spacePop(ctxt);
10972: ctxt->instate = XML_PARSER_EOF;
10973: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974: ctxt->sax->endDocument(ctxt->userData);
10975: goto done;
10976: }
10977: #ifdef LIBXML_VALID_ENABLED
10978: /*
10979: * [ VC: Root Element Type ]
10980: * The Name in the document type declaration must match
10981: * the element type of the root element.
10982: */
10983: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984: ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10986: #endif /* LIBXML_VALID_ENABLED */
10987:
10988: /*
10989: * Check for an Empty Element.
10990: */
10991: if ((RAW == '/') && (NXT(1) == '>')) {
10992: SKIP(2);
10993:
10994: if (ctxt->sax2) {
10995: if ((ctxt->sax != NULL) &&
10996: (ctxt->sax->endElementNs != NULL) &&
10997: (!ctxt->disableSAX))
10998: ctxt->sax->endElementNs(ctxt->userData, name,
10999: prefix, URI);
11000: if (ctxt->nsNr - nsNr > 0)
11001: nsPop(ctxt, ctxt->nsNr - nsNr);
11002: #ifdef LIBXML_SAX1_ENABLED
11003: } else {
11004: if ((ctxt->sax != NULL) &&
11005: (ctxt->sax->endElement != NULL) &&
11006: (!ctxt->disableSAX))
11007: ctxt->sax->endElement(ctxt->userData, name);
11008: #endif /* LIBXML_SAX1_ENABLED */
11009: }
11010: spacePop(ctxt);
11011: if (ctxt->nameNr == 0) {
11012: ctxt->instate = XML_PARSER_EPILOG;
11013: } else {
11014: ctxt->instate = XML_PARSER_CONTENT;
11015: }
11016: break;
11017: }
11018: if (RAW == '>') {
11019: NEXT;
11020: } else {
11021: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11022: "Couldn't find end of Start Tag %s\n",
11023: name);
11024: nodePop(ctxt);
11025: spacePop(ctxt);
11026: }
11027: if (ctxt->sax2)
11028: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11029: #ifdef LIBXML_SAX1_ENABLED
11030: else
11031: namePush(ctxt, name);
11032: #endif /* LIBXML_SAX1_ENABLED */
11033:
11034: ctxt->instate = XML_PARSER_CONTENT;
11035: break;
11036: }
11037: case XML_PARSER_CONTENT: {
11038: const xmlChar *test;
11039: unsigned int cons;
11040: if ((avail < 2) && (ctxt->inputNr == 1))
11041: goto done;
11042: cur = ctxt->input->cur[0];
11043: next = ctxt->input->cur[1];
11044:
11045: test = CUR_PTR;
11046: cons = ctxt->input->consumed;
11047: if ((cur == '<') && (next == '/')) {
11048: ctxt->instate = XML_PARSER_END_TAG;
11049: break;
11050: } else if ((cur == '<') && (next == '?')) {
11051: if ((!terminate) &&
11052: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11053: goto done;
11054: xmlParsePI(ctxt);
11055: } else if ((cur == '<') && (next != '!')) {
11056: ctxt->instate = XML_PARSER_START_TAG;
11057: break;
11058: } else if ((cur == '<') && (next == '!') &&
11059: (ctxt->input->cur[2] == '-') &&
11060: (ctxt->input->cur[3] == '-')) {
11061: int term;
11062:
11063: if (avail < 4)
11064: goto done;
11065: ctxt->input->cur += 4;
11066: term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11067: ctxt->input->cur -= 4;
11068: if ((!terminate) && (term < 0))
11069: goto done;
11070: xmlParseComment(ctxt);
11071: ctxt->instate = XML_PARSER_CONTENT;
11072: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11073: (ctxt->input->cur[2] == '[') &&
11074: (ctxt->input->cur[3] == 'C') &&
11075: (ctxt->input->cur[4] == 'D') &&
11076: (ctxt->input->cur[5] == 'A') &&
11077: (ctxt->input->cur[6] == 'T') &&
11078: (ctxt->input->cur[7] == 'A') &&
11079: (ctxt->input->cur[8] == '[')) {
11080: SKIP(9);
11081: ctxt->instate = XML_PARSER_CDATA_SECTION;
11082: break;
11083: } else if ((cur == '<') && (next == '!') &&
11084: (avail < 9)) {
11085: goto done;
11086: } else if (cur == '&') {
11087: if ((!terminate) &&
11088: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11089: goto done;
11090: xmlParseReference(ctxt);
11091: } else {
11092: /* TODO Avoid the extra copy, handle directly !!! */
11093: /*
11094: * Goal of the following test is:
11095: * - minimize calls to the SAX 'character' callback
11096: * when they are mergeable
11097: * - handle an problem for isBlank when we only parse
11098: * a sequence of blank chars and the next one is
11099: * not available to check against '<' presence.
11100: * - tries to homogenize the differences in SAX
11101: * callbacks between the push and pull versions
11102: * of the parser.
11103: */
11104: if ((ctxt->inputNr == 1) &&
11105: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11106: if (!terminate) {
11107: if (ctxt->progressive) {
11108: if ((lastlt == NULL) ||
11109: (ctxt->input->cur > lastlt))
11110: goto done;
11111: } else if (xmlParseLookupSequence(ctxt,
11112: '<', 0, 0) < 0) {
11113: goto done;
11114: }
11115: }
11116: }
11117: ctxt->checkIndex = 0;
11118: xmlParseCharData(ctxt, 0);
11119: }
11120: /*
11121: * Pop-up of finished entities.
11122: */
11123: while ((RAW == 0) && (ctxt->inputNr > 1))
11124: xmlPopInput(ctxt);
11125: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11126: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11127: "detected an error in element content\n");
11128: ctxt->instate = XML_PARSER_EOF;
11129: break;
11130: }
11131: break;
11132: }
11133: case XML_PARSER_END_TAG:
11134: if (avail < 2)
11135: goto done;
11136: if (!terminate) {
11137: if (ctxt->progressive) {
11138: /* > can be found unescaped in attribute values */
11139: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11140: goto done;
11141: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11142: goto done;
11143: }
11144: }
11145: if (ctxt->sax2) {
11146: xmlParseEndTag2(ctxt,
11147: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11148: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11149: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11150: nameNsPop(ctxt);
11151: }
11152: #ifdef LIBXML_SAX1_ENABLED
11153: else
11154: xmlParseEndTag1(ctxt, 0);
11155: #endif /* LIBXML_SAX1_ENABLED */
11156: if (ctxt->nameNr == 0) {
11157: ctxt->instate = XML_PARSER_EPILOG;
11158: } else {
11159: ctxt->instate = XML_PARSER_CONTENT;
11160: }
11161: break;
11162: case XML_PARSER_CDATA_SECTION: {
11163: /*
11164: * The Push mode need to have the SAX callback for
11165: * cdataBlock merge back contiguous callbacks.
11166: */
11167: int base;
11168:
11169: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11170: if (base < 0) {
11171: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11172: int tmp;
11173:
11174: tmp = xmlCheckCdataPush(ctxt->input->cur,
11175: XML_PARSER_BIG_BUFFER_SIZE);
11176: if (tmp < 0) {
11177: tmp = -tmp;
11178: ctxt->input->cur += tmp;
11179: goto encoding_error;
11180: }
11181: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11182: if (ctxt->sax->cdataBlock != NULL)
11183: ctxt->sax->cdataBlock(ctxt->userData,
11184: ctxt->input->cur, tmp);
11185: else if (ctxt->sax->characters != NULL)
11186: ctxt->sax->characters(ctxt->userData,
11187: ctxt->input->cur, tmp);
11188: }
11189: SKIPL(tmp);
11190: ctxt->checkIndex = 0;
11191: }
11192: goto done;
11193: } else {
11194: int tmp;
11195:
11196: tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11197: if ((tmp < 0) || (tmp != base)) {
11198: tmp = -tmp;
11199: ctxt->input->cur += tmp;
11200: goto encoding_error;
11201: }
11202: if ((ctxt->sax != NULL) && (base == 0) &&
11203: (ctxt->sax->cdataBlock != NULL) &&
11204: (!ctxt->disableSAX)) {
11205: /*
11206: * Special case to provide identical behaviour
11207: * between pull and push parsers on enpty CDATA
11208: * sections
11209: */
11210: if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11211: (!strncmp((const char *)&ctxt->input->cur[-9],
11212: "<![CDATA[", 9)))
11213: ctxt->sax->cdataBlock(ctxt->userData,
11214: BAD_CAST "", 0);
11215: } else if ((ctxt->sax != NULL) && (base > 0) &&
11216: (!ctxt->disableSAX)) {
11217: if (ctxt->sax->cdataBlock != NULL)
11218: ctxt->sax->cdataBlock(ctxt->userData,
11219: ctxt->input->cur, base);
11220: else if (ctxt->sax->characters != NULL)
11221: ctxt->sax->characters(ctxt->userData,
11222: ctxt->input->cur, base);
11223: }
11224: SKIPL(base + 3);
11225: ctxt->checkIndex = 0;
11226: ctxt->instate = XML_PARSER_CONTENT;
11227: #ifdef DEBUG_PUSH
11228: xmlGenericError(xmlGenericErrorContext,
11229: "PP: entering CONTENT\n");
11230: #endif
11231: }
11232: break;
11233: }
11234: case XML_PARSER_MISC:
11235: SKIP_BLANKS;
11236: if (ctxt->input->buf == NULL)
11237: avail = ctxt->input->length -
11238: (ctxt->input->cur - ctxt->input->base);
11239: else
11240: avail = ctxt->input->buf->buffer->use -
11241: (ctxt->input->cur - ctxt->input->base);
11242: if (avail < 2)
11243: goto done;
11244: cur = ctxt->input->cur[0];
11245: next = ctxt->input->cur[1];
11246: if ((cur == '<') && (next == '?')) {
11247: if ((!terminate) &&
11248: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11249: goto done;
11250: #ifdef DEBUG_PUSH
11251: xmlGenericError(xmlGenericErrorContext,
11252: "PP: Parsing PI\n");
11253: #endif
11254: xmlParsePI(ctxt);
11255: ctxt->checkIndex = 0;
11256: } else if ((cur == '<') && (next == '!') &&
11257: (ctxt->input->cur[2] == '-') &&
11258: (ctxt->input->cur[3] == '-')) {
11259: if ((!terminate) &&
11260: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11261: goto done;
11262: #ifdef DEBUG_PUSH
11263: xmlGenericError(xmlGenericErrorContext,
11264: "PP: Parsing Comment\n");
11265: #endif
11266: xmlParseComment(ctxt);
11267: ctxt->instate = XML_PARSER_MISC;
11268: ctxt->checkIndex = 0;
11269: } else if ((cur == '<') && (next == '!') &&
11270: (ctxt->input->cur[2] == 'D') &&
11271: (ctxt->input->cur[3] == 'O') &&
11272: (ctxt->input->cur[4] == 'C') &&
11273: (ctxt->input->cur[5] == 'T') &&
11274: (ctxt->input->cur[6] == 'Y') &&
11275: (ctxt->input->cur[7] == 'P') &&
11276: (ctxt->input->cur[8] == 'E')) {
11277: if ((!terminate) &&
11278: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11279: goto done;
11280: #ifdef DEBUG_PUSH
11281: xmlGenericError(xmlGenericErrorContext,
11282: "PP: Parsing internal subset\n");
11283: #endif
11284: ctxt->inSubset = 1;
11285: xmlParseDocTypeDecl(ctxt);
11286: if (RAW == '[') {
11287: ctxt->instate = XML_PARSER_DTD;
11288: #ifdef DEBUG_PUSH
11289: xmlGenericError(xmlGenericErrorContext,
11290: "PP: entering DTD\n");
11291: #endif
11292: } else {
11293: /*
11294: * Create and update the external subset.
11295: */
11296: ctxt->inSubset = 2;
11297: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11298: (ctxt->sax->externalSubset != NULL))
11299: ctxt->sax->externalSubset(ctxt->userData,
11300: ctxt->intSubName, ctxt->extSubSystem,
11301: ctxt->extSubURI);
11302: ctxt->inSubset = 0;
11303: xmlCleanSpecialAttr(ctxt);
11304: ctxt->instate = XML_PARSER_PROLOG;
11305: #ifdef DEBUG_PUSH
11306: xmlGenericError(xmlGenericErrorContext,
11307: "PP: entering PROLOG\n");
11308: #endif
11309: }
11310: } else if ((cur == '<') && (next == '!') &&
11311: (avail < 9)) {
11312: goto done;
11313: } else {
11314: ctxt->instate = XML_PARSER_START_TAG;
11315: ctxt->progressive = 1;
11316: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11317: #ifdef DEBUG_PUSH
11318: xmlGenericError(xmlGenericErrorContext,
11319: "PP: entering START_TAG\n");
11320: #endif
11321: }
11322: break;
11323: case XML_PARSER_PROLOG:
11324: SKIP_BLANKS;
11325: if (ctxt->input->buf == NULL)
11326: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11327: else
11328: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11329: if (avail < 2)
11330: goto done;
11331: cur = ctxt->input->cur[0];
11332: next = ctxt->input->cur[1];
11333: if ((cur == '<') && (next == '?')) {
11334: if ((!terminate) &&
11335: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11336: goto done;
11337: #ifdef DEBUG_PUSH
11338: xmlGenericError(xmlGenericErrorContext,
11339: "PP: Parsing PI\n");
11340: #endif
11341: xmlParsePI(ctxt);
11342: } else if ((cur == '<') && (next == '!') &&
11343: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11344: if ((!terminate) &&
11345: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11346: goto done;
11347: #ifdef DEBUG_PUSH
11348: xmlGenericError(xmlGenericErrorContext,
11349: "PP: Parsing Comment\n");
11350: #endif
11351: xmlParseComment(ctxt);
11352: ctxt->instate = XML_PARSER_PROLOG;
11353: } else if ((cur == '<') && (next == '!') &&
11354: (avail < 4)) {
11355: goto done;
11356: } else {
11357: ctxt->instate = XML_PARSER_START_TAG;
11358: if (ctxt->progressive == 0)
11359: ctxt->progressive = 1;
11360: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11361: #ifdef DEBUG_PUSH
11362: xmlGenericError(xmlGenericErrorContext,
11363: "PP: entering START_TAG\n");
11364: #endif
11365: }
11366: break;
11367: case XML_PARSER_EPILOG:
11368: SKIP_BLANKS;
11369: if (ctxt->input->buf == NULL)
11370: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11371: else
11372: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11373: if (avail < 2)
11374: goto done;
11375: cur = ctxt->input->cur[0];
11376: next = ctxt->input->cur[1];
11377: if ((cur == '<') && (next == '?')) {
11378: if ((!terminate) &&
11379: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11380: goto done;
11381: #ifdef DEBUG_PUSH
11382: xmlGenericError(xmlGenericErrorContext,
11383: "PP: Parsing PI\n");
11384: #endif
11385: xmlParsePI(ctxt);
11386: ctxt->instate = XML_PARSER_EPILOG;
11387: } else if ((cur == '<') && (next == '!') &&
11388: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389: if ((!terminate) &&
11390: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11391: goto done;
11392: #ifdef DEBUG_PUSH
11393: xmlGenericError(xmlGenericErrorContext,
11394: "PP: Parsing Comment\n");
11395: #endif
11396: xmlParseComment(ctxt);
11397: ctxt->instate = XML_PARSER_EPILOG;
11398: } else if ((cur == '<') && (next == '!') &&
11399: (avail < 4)) {
11400: goto done;
11401: } else {
11402: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11403: ctxt->instate = XML_PARSER_EOF;
11404: #ifdef DEBUG_PUSH
11405: xmlGenericError(xmlGenericErrorContext,
11406: "PP: entering EOF\n");
11407: #endif
11408: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11409: ctxt->sax->endDocument(ctxt->userData);
11410: goto done;
11411: }
11412: break;
11413: case XML_PARSER_DTD: {
11414: /*
11415: * Sorry but progressive parsing of the internal subset
11416: * is not expected to be supported. We first check that
11417: * the full content of the internal subset is available and
11418: * the parsing is launched only at that point.
11419: * Internal subset ends up with "']' S? '>'" in an unescaped
11420: * section and not in a ']]>' sequence which are conditional
11421: * sections (whoever argued to keep that crap in XML deserve
11422: * a place in hell !).
11423: */
11424: int base, i;
11425: xmlChar *buf;
11426: xmlChar quote = 0;
11427:
11428: base = ctxt->input->cur - ctxt->input->base;
11429: if (base < 0) return(0);
11430: if (ctxt->checkIndex > base)
11431: base = ctxt->checkIndex;
11432: buf = ctxt->input->buf->buffer->content;
11433: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11434: base++) {
11435: if (quote != 0) {
11436: if (buf[base] == quote)
11437: quote = 0;
11438: continue;
11439: }
11440: if ((quote == 0) && (buf[base] == '<')) {
11441: int found = 0;
11442: /* special handling of comments */
11443: if (((unsigned int) base + 4 <
11444: ctxt->input->buf->buffer->use) &&
11445: (buf[base + 1] == '!') &&
11446: (buf[base + 2] == '-') &&
11447: (buf[base + 3] == '-')) {
11448: for (;(unsigned int) base + 3 <
11449: ctxt->input->buf->buffer->use; base++) {
11450: if ((buf[base] == '-') &&
11451: (buf[base + 1] == '-') &&
11452: (buf[base + 2] == '>')) {
11453: found = 1;
11454: base += 2;
11455: break;
11456: }
11457: }
11458: if (!found) {
11459: #if 0
11460: fprintf(stderr, "unfinished comment\n");
11461: #endif
11462: break; /* for */
11463: }
11464: continue;
11465: }
11466: }
11467: if (buf[base] == '"') {
11468: quote = '"';
11469: continue;
11470: }
11471: if (buf[base] == '\'') {
11472: quote = '\'';
11473: continue;
11474: }
11475: if (buf[base] == ']') {
11476: #if 0
11477: fprintf(stderr, "%c%c%c%c: ", buf[base],
11478: buf[base + 1], buf[base + 2], buf[base + 3]);
11479: #endif
11480: if ((unsigned int) base +1 >=
11481: ctxt->input->buf->buffer->use)
11482: break;
11483: if (buf[base + 1] == ']') {
11484: /* conditional crap, skip both ']' ! */
11485: base++;
11486: continue;
11487: }
11488: for (i = 1;
11489: (unsigned int) base + i < ctxt->input->buf->buffer->use;
11490: i++) {
11491: if (buf[base + i] == '>') {
11492: #if 0
11493: fprintf(stderr, "found\n");
11494: #endif
11495: goto found_end_int_subset;
11496: }
11497: if (!IS_BLANK_CH(buf[base + i])) {
11498: #if 0
11499: fprintf(stderr, "not found\n");
11500: #endif
11501: goto not_end_of_int_subset;
11502: }
11503: }
11504: #if 0
11505: fprintf(stderr, "end of stream\n");
11506: #endif
11507: break;
11508:
11509: }
11510: not_end_of_int_subset:
11511: continue; /* for */
11512: }
11513: /*
11514: * We didn't found the end of the Internal subset
11515: */
11516: #ifdef DEBUG_PUSH
11517: if (next == 0)
11518: xmlGenericError(xmlGenericErrorContext,
11519: "PP: lookup of int subset end filed\n");
11520: #endif
11521: goto done;
11522:
11523: found_end_int_subset:
11524: xmlParseInternalSubset(ctxt);
11525: ctxt->inSubset = 2;
11526: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11527: (ctxt->sax->externalSubset != NULL))
11528: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11529: ctxt->extSubSystem, ctxt->extSubURI);
11530: ctxt->inSubset = 0;
11531: xmlCleanSpecialAttr(ctxt);
11532: ctxt->instate = XML_PARSER_PROLOG;
11533: ctxt->checkIndex = 0;
11534: #ifdef DEBUG_PUSH
11535: xmlGenericError(xmlGenericErrorContext,
11536: "PP: entering PROLOG\n");
11537: #endif
11538: break;
11539: }
11540: case XML_PARSER_COMMENT:
11541: xmlGenericError(xmlGenericErrorContext,
11542: "PP: internal error, state == COMMENT\n");
11543: ctxt->instate = XML_PARSER_CONTENT;
11544: #ifdef DEBUG_PUSH
11545: xmlGenericError(xmlGenericErrorContext,
11546: "PP: entering CONTENT\n");
11547: #endif
11548: break;
11549: case XML_PARSER_IGNORE:
11550: xmlGenericError(xmlGenericErrorContext,
11551: "PP: internal error, state == IGNORE");
11552: ctxt->instate = XML_PARSER_DTD;
11553: #ifdef DEBUG_PUSH
11554: xmlGenericError(xmlGenericErrorContext,
11555: "PP: entering DTD\n");
11556: #endif
11557: break;
11558: case XML_PARSER_PI:
11559: xmlGenericError(xmlGenericErrorContext,
11560: "PP: internal error, state == PI\n");
11561: ctxt->instate = XML_PARSER_CONTENT;
11562: #ifdef DEBUG_PUSH
11563: xmlGenericError(xmlGenericErrorContext,
11564: "PP: entering CONTENT\n");
11565: #endif
11566: break;
11567: case XML_PARSER_ENTITY_DECL:
11568: xmlGenericError(xmlGenericErrorContext,
11569: "PP: internal error, state == ENTITY_DECL\n");
11570: ctxt->instate = XML_PARSER_DTD;
11571: #ifdef DEBUG_PUSH
11572: xmlGenericError(xmlGenericErrorContext,
11573: "PP: entering DTD\n");
11574: #endif
11575: break;
11576: case XML_PARSER_ENTITY_VALUE:
11577: xmlGenericError(xmlGenericErrorContext,
11578: "PP: internal error, state == ENTITY_VALUE\n");
11579: ctxt->instate = XML_PARSER_CONTENT;
11580: #ifdef DEBUG_PUSH
11581: xmlGenericError(xmlGenericErrorContext,
11582: "PP: entering DTD\n");
11583: #endif
11584: break;
11585: case XML_PARSER_ATTRIBUTE_VALUE:
11586: xmlGenericError(xmlGenericErrorContext,
11587: "PP: internal error, state == ATTRIBUTE_VALUE\n");
11588: ctxt->instate = XML_PARSER_START_TAG;
11589: #ifdef DEBUG_PUSH
11590: xmlGenericError(xmlGenericErrorContext,
11591: "PP: entering START_TAG\n");
11592: #endif
11593: break;
11594: case XML_PARSER_SYSTEM_LITERAL:
11595: xmlGenericError(xmlGenericErrorContext,
11596: "PP: internal error, state == SYSTEM_LITERAL\n");
11597: ctxt->instate = XML_PARSER_START_TAG;
11598: #ifdef DEBUG_PUSH
11599: xmlGenericError(xmlGenericErrorContext,
11600: "PP: entering START_TAG\n");
11601: #endif
11602: break;
11603: case XML_PARSER_PUBLIC_LITERAL:
11604: xmlGenericError(xmlGenericErrorContext,
11605: "PP: internal error, state == PUBLIC_LITERAL\n");
11606: ctxt->instate = XML_PARSER_START_TAG;
11607: #ifdef DEBUG_PUSH
11608: xmlGenericError(xmlGenericErrorContext,
11609: "PP: entering START_TAG\n");
11610: #endif
11611: break;
11612: }
11613: }
11614: done:
11615: #ifdef DEBUG_PUSH
11616: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11617: #endif
11618: return(ret);
11619: encoding_error:
11620: {
11621: char buffer[150];
11622:
11623: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11624: ctxt->input->cur[0], ctxt->input->cur[1],
11625: ctxt->input->cur[2], ctxt->input->cur[3]);
11626: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11627: "Input is not proper UTF-8, indicate encoding !\n%s",
11628: BAD_CAST buffer, NULL);
11629: }
11630: return(0);
11631: }
11632:
11633: /**
11634: * xmlParseChunk:
11635: * @ctxt: an XML parser context
11636: * @chunk: an char array
11637: * @size: the size in byte of the chunk
11638: * @terminate: last chunk indicator
11639: *
11640: * Parse a Chunk of memory
11641: *
11642: * Returns zero if no error, the xmlParserErrors otherwise.
11643: */
11644: int
11645: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11646: int terminate) {
11647: int end_in_lf = 0;
11648: int remain = 0;
11649:
11650: if (ctxt == NULL)
11651: return(XML_ERR_INTERNAL_ERROR);
11652: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11653: return(ctxt->errNo);
11654: if (ctxt->instate == XML_PARSER_START)
11655: xmlDetectSAX2(ctxt);
11656: if ((size > 0) && (chunk != NULL) && (!terminate) &&
11657: (chunk[size - 1] == '\r')) {
11658: end_in_lf = 1;
11659: size--;
11660: }
11661:
11662: xmldecl_done:
11663:
11664: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11665: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11666: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11667: int cur = ctxt->input->cur - ctxt->input->base;
11668: int res;
11669:
11670: /*
11671: * Specific handling if we autodetected an encoding, we should not
11672: * push more than the first line ... which depend on the encoding
11673: * And only push the rest once the final encoding was detected
11674: */
11675: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11676: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11677: unsigned int len = 45;
11678:
11679: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11680: BAD_CAST "UTF-16")) ||
11681: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11682: BAD_CAST "UTF16")))
11683: len = 90;
11684: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11685: BAD_CAST "UCS-4")) ||
11686: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11687: BAD_CAST "UCS4")))
11688: len = 180;
11689:
11690: if (ctxt->input->buf->rawconsumed < len)
11691: len -= ctxt->input->buf->rawconsumed;
11692:
11693: /*
11694: * Change size for reading the initial declaration only
11695: * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11696: * will blindly copy extra bytes from memory.
11697: */
11698: if ((unsigned int) size > len) {
11699: remain = size - len;
11700: size = len;
11701: } else {
11702: remain = 0;
11703: }
11704: }
11705: res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11706: if (res < 0) {
11707: ctxt->errNo = XML_PARSER_EOF;
11708: ctxt->disableSAX = 1;
11709: return (XML_PARSER_EOF);
11710: }
11711: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11712: ctxt->input->cur = ctxt->input->base + cur;
11713: ctxt->input->end =
11714: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11715: #ifdef DEBUG_PUSH
11716: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11717: #endif
11718:
11719: } else if (ctxt->instate != XML_PARSER_EOF) {
11720: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11721: xmlParserInputBufferPtr in = ctxt->input->buf;
11722: if ((in->encoder != NULL) && (in->buffer != NULL) &&
11723: (in->raw != NULL)) {
11724: int nbchars;
11725:
11726: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11727: if (nbchars < 0) {
11728: /* TODO 2.6.0 */
11729: xmlGenericError(xmlGenericErrorContext,
11730: "xmlParseChunk: encoder error\n");
11731: return(XML_ERR_INVALID_ENCODING);
11732: }
11733: }
11734: }
11735: }
11736: if (remain != 0)
11737: xmlParseTryOrFinish(ctxt, 0);
11738: else
11739: xmlParseTryOrFinish(ctxt, terminate);
11740: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11741: return(ctxt->errNo);
11742:
11743: if (remain != 0) {
11744: chunk += size;
11745: size = remain;
11746: remain = 0;
11747: goto xmldecl_done;
11748: }
11749: if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11750: (ctxt->input->buf != NULL)) {
11751: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11752: }
11753: if (terminate) {
11754: /*
11755: * Check for termination
11756: */
11757: int avail = 0;
11758:
11759: if (ctxt->input != NULL) {
11760: if (ctxt->input->buf == NULL)
11761: avail = ctxt->input->length -
11762: (ctxt->input->cur - ctxt->input->base);
11763: else
11764: avail = ctxt->input->buf->buffer->use -
11765: (ctxt->input->cur - ctxt->input->base);
11766: }
11767:
11768: if ((ctxt->instate != XML_PARSER_EOF) &&
11769: (ctxt->instate != XML_PARSER_EPILOG)) {
11770: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11771: }
11772: if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11773: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11774: }
11775: if (ctxt->instate != XML_PARSER_EOF) {
11776: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777: ctxt->sax->endDocument(ctxt->userData);
11778: }
11779: ctxt->instate = XML_PARSER_EOF;
11780: }
11781: return((xmlParserErrors) ctxt->errNo);
11782: }
11783:
11784: /************************************************************************
11785: * *
11786: * I/O front end functions to the parser *
11787: * *
11788: ************************************************************************/
11789:
11790: /**
11791: * xmlCreatePushParserCtxt:
11792: * @sax: a SAX handler
11793: * @user_data: The user data returned on SAX callbacks
11794: * @chunk: a pointer to an array of chars
11795: * @size: number of chars in the array
11796: * @filename: an optional file name or URI
11797: *
11798: * Create a parser context for using the XML parser in push mode.
11799: * If @buffer and @size are non-NULL, the data is used to detect
11800: * the encoding. The remaining characters will be parsed so they
11801: * don't need to be fed in again through xmlParseChunk.
11802: * To allow content encoding detection, @size should be >= 4
11803: * The value of @filename is used for fetching external entities
11804: * and error/warning reports.
11805: *
11806: * Returns the new parser context or NULL
11807: */
11808:
11809: xmlParserCtxtPtr
11810: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11811: const char *chunk, int size, const char *filename) {
11812: xmlParserCtxtPtr ctxt;
11813: xmlParserInputPtr inputStream;
11814: xmlParserInputBufferPtr buf;
11815: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11816:
11817: /*
11818: * plug some encoding conversion routines
11819: */
11820: if ((chunk != NULL) && (size >= 4))
11821: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11822:
11823: buf = xmlAllocParserInputBuffer(enc);
11824: if (buf == NULL) return(NULL);
11825:
11826: ctxt = xmlNewParserCtxt();
11827: if (ctxt == NULL) {
11828: xmlErrMemory(NULL, "creating parser: out of memory\n");
11829: xmlFreeParserInputBuffer(buf);
11830: return(NULL);
11831: }
11832: ctxt->dictNames = 1;
11833: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11834: if (ctxt->pushTab == NULL) {
11835: xmlErrMemory(ctxt, NULL);
11836: xmlFreeParserInputBuffer(buf);
11837: xmlFreeParserCtxt(ctxt);
11838: return(NULL);
11839: }
11840: if (sax != NULL) {
11841: #ifdef LIBXML_SAX1_ENABLED
11842: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11843: #endif /* LIBXML_SAX1_ENABLED */
11844: xmlFree(ctxt->sax);
11845: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11846: if (ctxt->sax == NULL) {
11847: xmlErrMemory(ctxt, NULL);
11848: xmlFreeParserInputBuffer(buf);
11849: xmlFreeParserCtxt(ctxt);
11850: return(NULL);
11851: }
11852: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11853: if (sax->initialized == XML_SAX2_MAGIC)
11854: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11855: else
11856: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11857: if (user_data != NULL)
11858: ctxt->userData = user_data;
11859: }
11860: if (filename == NULL) {
11861: ctxt->directory = NULL;
11862: } else {
11863: ctxt->directory = xmlParserGetDirectory(filename);
11864: }
11865:
11866: inputStream = xmlNewInputStream(ctxt);
11867: if (inputStream == NULL) {
11868: xmlFreeParserCtxt(ctxt);
11869: xmlFreeParserInputBuffer(buf);
11870: return(NULL);
11871: }
11872:
11873: if (filename == NULL)
11874: inputStream->filename = NULL;
11875: else {
11876: inputStream->filename = (char *)
11877: xmlCanonicPath((const xmlChar *) filename);
11878: if (inputStream->filename == NULL) {
11879: xmlFreeParserCtxt(ctxt);
11880: xmlFreeParserInputBuffer(buf);
11881: return(NULL);
11882: }
11883: }
11884: inputStream->buf = buf;
11885: inputStream->base = inputStream->buf->buffer->content;
11886: inputStream->cur = inputStream->buf->buffer->content;
11887: inputStream->end =
11888: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11889:
11890: inputPush(ctxt, inputStream);
11891:
11892: /*
11893: * If the caller didn't provide an initial 'chunk' for determining
11894: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11895: * that it can be automatically determined later
11896: */
11897: if ((size == 0) || (chunk == NULL)) {
11898: ctxt->charset = XML_CHAR_ENCODING_NONE;
11899: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11900: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11901: int cur = ctxt->input->cur - ctxt->input->base;
11902:
11903: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11904:
11905: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11906: ctxt->input->cur = ctxt->input->base + cur;
11907: ctxt->input->end =
11908: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11909: #ifdef DEBUG_PUSH
11910: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11911: #endif
11912: }
11913:
11914: if (enc != XML_CHAR_ENCODING_NONE) {
11915: xmlSwitchEncoding(ctxt, enc);
11916: }
11917:
11918: return(ctxt);
11919: }
11920: #endif /* LIBXML_PUSH_ENABLED */
11921:
11922: /**
11923: * xmlStopParser:
11924: * @ctxt: an XML parser context
11925: *
11926: * Blocks further parser processing
11927: */
11928: void
11929: xmlStopParser(xmlParserCtxtPtr ctxt) {
11930: if (ctxt == NULL)
11931: return;
11932: ctxt->instate = XML_PARSER_EOF;
11933: ctxt->disableSAX = 1;
11934: if (ctxt->input != NULL) {
11935: ctxt->input->cur = BAD_CAST"";
11936: ctxt->input->base = ctxt->input->cur;
11937: }
11938: }
11939:
11940: /**
11941: * xmlCreateIOParserCtxt:
11942: * @sax: a SAX handler
11943: * @user_data: The user data returned on SAX callbacks
11944: * @ioread: an I/O read function
11945: * @ioclose: an I/O close function
11946: * @ioctx: an I/O handler
11947: * @enc: the charset encoding if known
11948: *
11949: * Create a parser context for using the XML parser with an existing
11950: * I/O stream
11951: *
11952: * Returns the new parser context or NULL
11953: */
11954: xmlParserCtxtPtr
11955: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11956: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11957: void *ioctx, xmlCharEncoding enc) {
11958: xmlParserCtxtPtr ctxt;
11959: xmlParserInputPtr inputStream;
11960: xmlParserInputBufferPtr buf;
11961:
11962: if (ioread == NULL) return(NULL);
11963:
11964: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11965: if (buf == NULL) return(NULL);
11966:
11967: ctxt = xmlNewParserCtxt();
11968: if (ctxt == NULL) {
11969: xmlFreeParserInputBuffer(buf);
11970: return(NULL);
11971: }
11972: if (sax != NULL) {
11973: #ifdef LIBXML_SAX1_ENABLED
11974: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11975: #endif /* LIBXML_SAX1_ENABLED */
11976: xmlFree(ctxt->sax);
11977: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11978: if (ctxt->sax == NULL) {
11979: xmlErrMemory(ctxt, NULL);
11980: xmlFreeParserCtxt(ctxt);
11981: return(NULL);
11982: }
11983: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11984: if (sax->initialized == XML_SAX2_MAGIC)
11985: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11986: else
11987: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11988: if (user_data != NULL)
11989: ctxt->userData = user_data;
11990: }
11991:
11992: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11993: if (inputStream == NULL) {
11994: xmlFreeParserCtxt(ctxt);
11995: return(NULL);
11996: }
11997: inputPush(ctxt, inputStream);
11998:
11999: return(ctxt);
12000: }
12001:
12002: #ifdef LIBXML_VALID_ENABLED
12003: /************************************************************************
12004: * *
12005: * Front ends when parsing a DTD *
12006: * *
12007: ************************************************************************/
12008:
12009: /**
12010: * xmlIOParseDTD:
12011: * @sax: the SAX handler block or NULL
12012: * @input: an Input Buffer
12013: * @enc: the charset encoding if known
12014: *
12015: * Load and parse a DTD
12016: *
12017: * Returns the resulting xmlDtdPtr or NULL in case of error.
12018: * @input will be freed by the function in any case.
12019: */
12020:
12021: xmlDtdPtr
12022: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12023: xmlCharEncoding enc) {
12024: xmlDtdPtr ret = NULL;
12025: xmlParserCtxtPtr ctxt;
12026: xmlParserInputPtr pinput = NULL;
12027: xmlChar start[4];
12028:
12029: if (input == NULL)
12030: return(NULL);
12031:
12032: ctxt = xmlNewParserCtxt();
12033: if (ctxt == NULL) {
12034: xmlFreeParserInputBuffer(input);
12035: return(NULL);
12036: }
12037:
12038: /*
12039: * Set-up the SAX context
12040: */
12041: if (sax != NULL) {
12042: if (ctxt->sax != NULL)
12043: xmlFree(ctxt->sax);
12044: ctxt->sax = sax;
12045: ctxt->userData = ctxt;
12046: }
12047: xmlDetectSAX2(ctxt);
12048:
12049: /*
12050: * generate a parser input from the I/O handler
12051: */
12052:
12053: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12054: if (pinput == NULL) {
12055: if (sax != NULL) ctxt->sax = NULL;
12056: xmlFreeParserInputBuffer(input);
12057: xmlFreeParserCtxt(ctxt);
12058: return(NULL);
12059: }
12060:
12061: /*
12062: * plug some encoding conversion routines here.
12063: */
12064: if (xmlPushInput(ctxt, pinput) < 0) {
12065: if (sax != NULL) ctxt->sax = NULL;
12066: xmlFreeParserCtxt(ctxt);
12067: return(NULL);
12068: }
12069: if (enc != XML_CHAR_ENCODING_NONE) {
12070: xmlSwitchEncoding(ctxt, enc);
12071: }
12072:
12073: pinput->filename = NULL;
12074: pinput->line = 1;
12075: pinput->col = 1;
12076: pinput->base = ctxt->input->cur;
12077: pinput->cur = ctxt->input->cur;
12078: pinput->free = NULL;
12079:
12080: /*
12081: * let's parse that entity knowing it's an external subset.
12082: */
12083: ctxt->inSubset = 2;
12084: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12085: if (ctxt->myDoc == NULL) {
12086: xmlErrMemory(ctxt, "New Doc failed");
12087: return(NULL);
12088: }
12089: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12090: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12091: BAD_CAST "none", BAD_CAST "none");
12092:
12093: if ((enc == XML_CHAR_ENCODING_NONE) &&
12094: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12095: /*
12096: * Get the 4 first bytes and decode the charset
12097: * if enc != XML_CHAR_ENCODING_NONE
12098: * plug some encoding conversion routines.
12099: */
12100: start[0] = RAW;
12101: start[1] = NXT(1);
12102: start[2] = NXT(2);
12103: start[3] = NXT(3);
12104: enc = xmlDetectCharEncoding(start, 4);
12105: if (enc != XML_CHAR_ENCODING_NONE) {
12106: xmlSwitchEncoding(ctxt, enc);
12107: }
12108: }
12109:
12110: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12111:
12112: if (ctxt->myDoc != NULL) {
12113: if (ctxt->wellFormed) {
12114: ret = ctxt->myDoc->extSubset;
12115: ctxt->myDoc->extSubset = NULL;
12116: if (ret != NULL) {
12117: xmlNodePtr tmp;
12118:
12119: ret->doc = NULL;
12120: tmp = ret->children;
12121: while (tmp != NULL) {
12122: tmp->doc = NULL;
12123: tmp = tmp->next;
12124: }
12125: }
12126: } else {
12127: ret = NULL;
12128: }
12129: xmlFreeDoc(ctxt->myDoc);
12130: ctxt->myDoc = NULL;
12131: }
12132: if (sax != NULL) ctxt->sax = NULL;
12133: xmlFreeParserCtxt(ctxt);
12134:
12135: return(ret);
12136: }
12137:
12138: /**
12139: * xmlSAXParseDTD:
12140: * @sax: the SAX handler block
12141: * @ExternalID: a NAME* containing the External ID of the DTD
12142: * @SystemID: a NAME* containing the URL to the DTD
12143: *
12144: * Load and parse an external subset.
12145: *
12146: * Returns the resulting xmlDtdPtr or NULL in case of error.
12147: */
12148:
12149: xmlDtdPtr
12150: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12151: const xmlChar *SystemID) {
12152: xmlDtdPtr ret = NULL;
12153: xmlParserCtxtPtr ctxt;
12154: xmlParserInputPtr input = NULL;
12155: xmlCharEncoding enc;
12156: xmlChar* systemIdCanonic;
12157:
12158: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12159:
12160: ctxt = xmlNewParserCtxt();
12161: if (ctxt == NULL) {
12162: return(NULL);
12163: }
12164:
12165: /*
12166: * Set-up the SAX context
12167: */
12168: if (sax != NULL) {
12169: if (ctxt->sax != NULL)
12170: xmlFree(ctxt->sax);
12171: ctxt->sax = sax;
12172: ctxt->userData = ctxt;
12173: }
12174:
12175: /*
12176: * Canonicalise the system ID
12177: */
12178: systemIdCanonic = xmlCanonicPath(SystemID);
12179: if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12180: xmlFreeParserCtxt(ctxt);
12181: return(NULL);
12182: }
12183:
12184: /*
12185: * Ask the Entity resolver to load the damn thing
12186: */
12187:
12188: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12189: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12190: systemIdCanonic);
12191: if (input == NULL) {
12192: if (sax != NULL) ctxt->sax = NULL;
12193: xmlFreeParserCtxt(ctxt);
12194: if (systemIdCanonic != NULL)
12195: xmlFree(systemIdCanonic);
12196: return(NULL);
12197: }
12198:
12199: /*
12200: * plug some encoding conversion routines here.
12201: */
12202: if (xmlPushInput(ctxt, input) < 0) {
12203: if (sax != NULL) ctxt->sax = NULL;
12204: xmlFreeParserCtxt(ctxt);
12205: if (systemIdCanonic != NULL)
12206: xmlFree(systemIdCanonic);
12207: return(NULL);
12208: }
12209: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12210: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12211: xmlSwitchEncoding(ctxt, enc);
12212: }
12213:
12214: if (input->filename == NULL)
12215: input->filename = (char *) systemIdCanonic;
12216: else
12217: xmlFree(systemIdCanonic);
12218: input->line = 1;
12219: input->col = 1;
12220: input->base = ctxt->input->cur;
12221: input->cur = ctxt->input->cur;
12222: input->free = NULL;
12223:
12224: /*
12225: * let's parse that entity knowing it's an external subset.
12226: */
12227: ctxt->inSubset = 2;
12228: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12229: if (ctxt->myDoc == NULL) {
12230: xmlErrMemory(ctxt, "New Doc failed");
12231: if (sax != NULL) ctxt->sax = NULL;
12232: xmlFreeParserCtxt(ctxt);
12233: return(NULL);
12234: }
12235: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12236: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12237: ExternalID, SystemID);
12238: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12239:
12240: if (ctxt->myDoc != NULL) {
12241: if (ctxt->wellFormed) {
12242: ret = ctxt->myDoc->extSubset;
12243: ctxt->myDoc->extSubset = NULL;
12244: if (ret != NULL) {
12245: xmlNodePtr tmp;
12246:
12247: ret->doc = NULL;
12248: tmp = ret->children;
12249: while (tmp != NULL) {
12250: tmp->doc = NULL;
12251: tmp = tmp->next;
12252: }
12253: }
12254: } else {
12255: ret = NULL;
12256: }
12257: xmlFreeDoc(ctxt->myDoc);
12258: ctxt->myDoc = NULL;
12259: }
12260: if (sax != NULL) ctxt->sax = NULL;
12261: xmlFreeParserCtxt(ctxt);
12262:
12263: return(ret);
12264: }
12265:
12266:
12267: /**
12268: * xmlParseDTD:
12269: * @ExternalID: a NAME* containing the External ID of the DTD
12270: * @SystemID: a NAME* containing the URL to the DTD
12271: *
12272: * Load and parse an external subset.
12273: *
12274: * Returns the resulting xmlDtdPtr or NULL in case of error.
12275: */
12276:
12277: xmlDtdPtr
12278: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12279: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12280: }
12281: #endif /* LIBXML_VALID_ENABLED */
12282:
12283: /************************************************************************
12284: * *
12285: * Front ends when parsing an Entity *
12286: * *
12287: ************************************************************************/
12288:
12289: /**
12290: * xmlParseCtxtExternalEntity:
12291: * @ctx: the existing parsing context
12292: * @URL: the URL for the entity to load
12293: * @ID: the System ID for the entity to load
12294: * @lst: the return value for the set of parsed nodes
12295: *
12296: * Parse an external general entity within an existing parsing context
12297: * An external general parsed entity is well-formed if it matches the
12298: * production labeled extParsedEnt.
12299: *
12300: * [78] extParsedEnt ::= TextDecl? content
12301: *
12302: * Returns 0 if the entity is well formed, -1 in case of args problem and
12303: * the parser error code otherwise
12304: */
12305:
12306: int
12307: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12308: const xmlChar *ID, xmlNodePtr *lst) {
12309: xmlParserCtxtPtr ctxt;
12310: xmlDocPtr newDoc;
12311: xmlNodePtr newRoot;
12312: xmlSAXHandlerPtr oldsax = NULL;
12313: int ret = 0;
12314: xmlChar start[4];
12315: xmlCharEncoding enc;
12316:
12317: if (ctx == NULL) return(-1);
12318:
12319: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12320: (ctx->depth > 1024)) {
12321: return(XML_ERR_ENTITY_LOOP);
12322: }
12323:
12324: if (lst != NULL)
12325: *lst = NULL;
12326: if ((URL == NULL) && (ID == NULL))
12327: return(-1);
12328: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12329: return(-1);
12330:
12331: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12332: if (ctxt == NULL) {
12333: return(-1);
12334: }
12335:
12336: oldsax = ctxt->sax;
12337: ctxt->sax = ctx->sax;
12338: xmlDetectSAX2(ctxt);
12339: newDoc = xmlNewDoc(BAD_CAST "1.0");
12340: if (newDoc == NULL) {
12341: xmlFreeParserCtxt(ctxt);
12342: return(-1);
12343: }
12344: newDoc->properties = XML_DOC_INTERNAL;
12345: if (ctx->myDoc->dict) {
12346: newDoc->dict = ctx->myDoc->dict;
12347: xmlDictReference(newDoc->dict);
12348: }
12349: if (ctx->myDoc != NULL) {
12350: newDoc->intSubset = ctx->myDoc->intSubset;
12351: newDoc->extSubset = ctx->myDoc->extSubset;
12352: }
12353: if (ctx->myDoc->URL != NULL) {
12354: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12355: }
12356: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12357: if (newRoot == NULL) {
12358: ctxt->sax = oldsax;
12359: xmlFreeParserCtxt(ctxt);
12360: newDoc->intSubset = NULL;
12361: newDoc->extSubset = NULL;
12362: xmlFreeDoc(newDoc);
12363: return(-1);
12364: }
12365: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12366: nodePush(ctxt, newDoc->children);
12367: if (ctx->myDoc == NULL) {
12368: ctxt->myDoc = newDoc;
12369: } else {
12370: ctxt->myDoc = ctx->myDoc;
12371: newDoc->children->doc = ctx->myDoc;
12372: }
12373:
12374: /*
12375: * Get the 4 first bytes and decode the charset
12376: * if enc != XML_CHAR_ENCODING_NONE
12377: * plug some encoding conversion routines.
12378: */
12379: GROW
12380: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12381: start[0] = RAW;
12382: start[1] = NXT(1);
12383: start[2] = NXT(2);
12384: start[3] = NXT(3);
12385: enc = xmlDetectCharEncoding(start, 4);
12386: if (enc != XML_CHAR_ENCODING_NONE) {
12387: xmlSwitchEncoding(ctxt, enc);
12388: }
12389: }
12390:
12391: /*
12392: * Parse a possible text declaration first
12393: */
12394: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12395: xmlParseTextDecl(ctxt);
12396: /*
12397: * An XML-1.0 document can't reference an entity not XML-1.0
12398: */
12399: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12400: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12401: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12402: "Version mismatch between document and entity\n");
12403: }
12404: }
12405:
12406: /*
12407: * Doing validity checking on chunk doesn't make sense
12408: */
12409: ctxt->instate = XML_PARSER_CONTENT;
12410: ctxt->validate = ctx->validate;
12411: ctxt->valid = ctx->valid;
12412: ctxt->loadsubset = ctx->loadsubset;
12413: ctxt->depth = ctx->depth + 1;
12414: ctxt->replaceEntities = ctx->replaceEntities;
12415: if (ctxt->validate) {
12416: ctxt->vctxt.error = ctx->vctxt.error;
12417: ctxt->vctxt.warning = ctx->vctxt.warning;
12418: } else {
12419: ctxt->vctxt.error = NULL;
12420: ctxt->vctxt.warning = NULL;
12421: }
12422: ctxt->vctxt.nodeTab = NULL;
12423: ctxt->vctxt.nodeNr = 0;
12424: ctxt->vctxt.nodeMax = 0;
12425: ctxt->vctxt.node = NULL;
12426: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12427: ctxt->dict = ctx->dict;
12428: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12429: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12430: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12431: ctxt->dictNames = ctx->dictNames;
12432: ctxt->attsDefault = ctx->attsDefault;
12433: ctxt->attsSpecial = ctx->attsSpecial;
12434: ctxt->linenumbers = ctx->linenumbers;
12435:
12436: xmlParseContent(ctxt);
12437:
12438: ctx->validate = ctxt->validate;
12439: ctx->valid = ctxt->valid;
12440: if ((RAW == '<') && (NXT(1) == '/')) {
12441: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12442: } else if (RAW != 0) {
12443: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12444: }
12445: if (ctxt->node != newDoc->children) {
12446: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12447: }
12448:
12449: if (!ctxt->wellFormed) {
12450: if (ctxt->errNo == 0)
12451: ret = 1;
12452: else
12453: ret = ctxt->errNo;
12454: } else {
12455: if (lst != NULL) {
12456: xmlNodePtr cur;
12457:
12458: /*
12459: * Return the newly created nodeset after unlinking it from
12460: * they pseudo parent.
12461: */
12462: cur = newDoc->children->children;
12463: *lst = cur;
12464: while (cur != NULL) {
12465: cur->parent = NULL;
12466: cur = cur->next;
12467: }
12468: newDoc->children->children = NULL;
12469: }
12470: ret = 0;
12471: }
12472: ctxt->sax = oldsax;
12473: ctxt->dict = NULL;
12474: ctxt->attsDefault = NULL;
12475: ctxt->attsSpecial = NULL;
12476: xmlFreeParserCtxt(ctxt);
12477: newDoc->intSubset = NULL;
12478: newDoc->extSubset = NULL;
12479: xmlFreeDoc(newDoc);
12480:
12481: return(ret);
12482: }
12483:
12484: /**
12485: * xmlParseExternalEntityPrivate:
12486: * @doc: the document the chunk pertains to
12487: * @oldctxt: the previous parser context if available
12488: * @sax: the SAX handler bloc (possibly NULL)
12489: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12490: * @depth: Used for loop detection, use 0
12491: * @URL: the URL for the entity to load
12492: * @ID: the System ID for the entity to load
12493: * @list: the return value for the set of parsed nodes
12494: *
12495: * Private version of xmlParseExternalEntity()
12496: *
12497: * Returns 0 if the entity is well formed, -1 in case of args problem and
12498: * the parser error code otherwise
12499: */
12500:
12501: static xmlParserErrors
12502: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12503: xmlSAXHandlerPtr sax,
12504: void *user_data, int depth, const xmlChar *URL,
12505: const xmlChar *ID, xmlNodePtr *list) {
12506: xmlParserCtxtPtr ctxt;
12507: xmlDocPtr newDoc;
12508: xmlNodePtr newRoot;
12509: xmlSAXHandlerPtr oldsax = NULL;
12510: xmlParserErrors ret = XML_ERR_OK;
12511: xmlChar start[4];
12512: xmlCharEncoding enc;
12513:
12514: if (((depth > 40) &&
12515: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12516: (depth > 1024)) {
12517: return(XML_ERR_ENTITY_LOOP);
12518: }
12519:
12520: if (list != NULL)
12521: *list = NULL;
12522: if ((URL == NULL) && (ID == NULL))
12523: return(XML_ERR_INTERNAL_ERROR);
12524: if (doc == NULL)
12525: return(XML_ERR_INTERNAL_ERROR);
12526:
12527:
12528: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12529: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12530: ctxt->userData = ctxt;
12531: if (oldctxt != NULL) {
12532: ctxt->_private = oldctxt->_private;
12533: ctxt->loadsubset = oldctxt->loadsubset;
12534: ctxt->validate = oldctxt->validate;
12535: ctxt->external = oldctxt->external;
12536: ctxt->record_info = oldctxt->record_info;
12537: ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12538: ctxt->node_seq.length = oldctxt->node_seq.length;
12539: ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12540: } else {
12541: /*
12542: * Doing validity checking on chunk without context
12543: * doesn't make sense
12544: */
12545: ctxt->_private = NULL;
12546: ctxt->validate = 0;
12547: ctxt->external = 2;
12548: ctxt->loadsubset = 0;
12549: }
12550: if (sax != NULL) {
12551: oldsax = ctxt->sax;
12552: ctxt->sax = sax;
12553: if (user_data != NULL)
12554: ctxt->userData = user_data;
12555: }
12556: xmlDetectSAX2(ctxt);
12557: newDoc = xmlNewDoc(BAD_CAST "1.0");
12558: if (newDoc == NULL) {
12559: ctxt->node_seq.maximum = 0;
12560: ctxt->node_seq.length = 0;
12561: ctxt->node_seq.buffer = NULL;
12562: xmlFreeParserCtxt(ctxt);
12563: return(XML_ERR_INTERNAL_ERROR);
12564: }
12565: newDoc->properties = XML_DOC_INTERNAL;
12566: newDoc->intSubset = doc->intSubset;
12567: newDoc->extSubset = doc->extSubset;
12568: newDoc->dict = doc->dict;
12569: xmlDictReference(newDoc->dict);
12570:
12571: if (doc->URL != NULL) {
12572: newDoc->URL = xmlStrdup(doc->URL);
12573: }
12574: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575: if (newRoot == NULL) {
12576: if (sax != NULL)
12577: ctxt->sax = oldsax;
12578: ctxt->node_seq.maximum = 0;
12579: ctxt->node_seq.length = 0;
12580: ctxt->node_seq.buffer = NULL;
12581: xmlFreeParserCtxt(ctxt);
12582: newDoc->intSubset = NULL;
12583: newDoc->extSubset = NULL;
12584: xmlFreeDoc(newDoc);
12585: return(XML_ERR_INTERNAL_ERROR);
12586: }
12587: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12588: nodePush(ctxt, newDoc->children);
12589: ctxt->myDoc = doc;
12590: newRoot->doc = doc;
12591:
12592: /*
12593: * Get the 4 first bytes and decode the charset
12594: * if enc != XML_CHAR_ENCODING_NONE
12595: * plug some encoding conversion routines.
12596: */
12597: GROW;
12598: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12599: start[0] = RAW;
12600: start[1] = NXT(1);
12601: start[2] = NXT(2);
12602: start[3] = NXT(3);
12603: enc = xmlDetectCharEncoding(start, 4);
12604: if (enc != XML_CHAR_ENCODING_NONE) {
12605: xmlSwitchEncoding(ctxt, enc);
12606: }
12607: }
12608:
12609: /*
12610: * Parse a possible text declaration first
12611: */
12612: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12613: xmlParseTextDecl(ctxt);
12614: }
12615:
12616: ctxt->instate = XML_PARSER_CONTENT;
12617: ctxt->depth = depth;
12618:
12619: xmlParseContent(ctxt);
12620:
12621: if ((RAW == '<') && (NXT(1) == '/')) {
12622: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12623: } else if (RAW != 0) {
12624: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12625: }
12626: if (ctxt->node != newDoc->children) {
12627: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12628: }
12629:
12630: if (!ctxt->wellFormed) {
12631: if (ctxt->errNo == 0)
12632: ret = XML_ERR_INTERNAL_ERROR;
12633: else
12634: ret = (xmlParserErrors)ctxt->errNo;
12635: } else {
12636: if (list != NULL) {
12637: xmlNodePtr cur;
12638:
12639: /*
12640: * Return the newly created nodeset after unlinking it from
12641: * they pseudo parent.
12642: */
12643: cur = newDoc->children->children;
12644: *list = cur;
12645: while (cur != NULL) {
12646: cur->parent = NULL;
12647: cur = cur->next;
12648: }
12649: newDoc->children->children = NULL;
12650: }
12651: ret = XML_ERR_OK;
12652: }
12653:
12654: /*
12655: * Record in the parent context the number of entities replacement
12656: * done when parsing that reference.
12657: */
12658: if (oldctxt != NULL)
12659: oldctxt->nbentities += ctxt->nbentities;
12660:
12661: /*
12662: * Also record the size of the entity parsed
12663: */
12664: if (ctxt->input != NULL) {
12665: oldctxt->sizeentities += ctxt->input->consumed;
12666: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12667: }
12668: /*
12669: * And record the last error if any
12670: */
12671: if (ctxt->lastError.code != XML_ERR_OK)
12672: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12673:
12674: if (sax != NULL)
12675: ctxt->sax = oldsax;
12676: oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12677: oldctxt->node_seq.length = ctxt->node_seq.length;
12678: oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12679: ctxt->node_seq.maximum = 0;
12680: ctxt->node_seq.length = 0;
12681: ctxt->node_seq.buffer = NULL;
12682: xmlFreeParserCtxt(ctxt);
12683: newDoc->intSubset = NULL;
12684: newDoc->extSubset = NULL;
12685: xmlFreeDoc(newDoc);
12686:
12687: return(ret);
12688: }
12689:
12690: #ifdef LIBXML_SAX1_ENABLED
12691: /**
12692: * xmlParseExternalEntity:
12693: * @doc: the document the chunk pertains to
12694: * @sax: the SAX handler bloc (possibly NULL)
12695: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12696: * @depth: Used for loop detection, use 0
12697: * @URL: the URL for the entity to load
12698: * @ID: the System ID for the entity to load
12699: * @lst: the return value for the set of parsed nodes
12700: *
12701: * Parse an external general entity
12702: * An external general parsed entity is well-formed if it matches the
12703: * production labeled extParsedEnt.
12704: *
12705: * [78] extParsedEnt ::= TextDecl? content
12706: *
12707: * Returns 0 if the entity is well formed, -1 in case of args problem and
12708: * the parser error code otherwise
12709: */
12710:
12711: int
12712: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12713: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12714: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12715: ID, lst));
12716: }
12717:
12718: /**
12719: * xmlParseBalancedChunkMemory:
12720: * @doc: the document the chunk pertains to
12721: * @sax: the SAX handler bloc (possibly NULL)
12722: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12723: * @depth: Used for loop detection, use 0
12724: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12725: * @lst: the return value for the set of parsed nodes
12726: *
12727: * Parse a well-balanced chunk of an XML document
12728: * called by the parser
12729: * The allowed sequence for the Well Balanced Chunk is the one defined by
12730: * the content production in the XML grammar:
12731: *
12732: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12733: *
12734: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12735: * the parser error code otherwise
12736: */
12737:
12738: int
12739: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12740: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12741: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12742: depth, string, lst, 0 );
12743: }
12744: #endif /* LIBXML_SAX1_ENABLED */
12745:
12746: /**
12747: * xmlParseBalancedChunkMemoryInternal:
12748: * @oldctxt: the existing parsing context
12749: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12750: * @user_data: the user data field for the parser context
12751: * @lst: the return value for the set of parsed nodes
12752: *
12753: *
12754: * Parse a well-balanced chunk of an XML document
12755: * called by the parser
12756: * The allowed sequence for the Well Balanced Chunk is the one defined by
12757: * the content production in the XML grammar:
12758: *
12759: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12760: *
12761: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12762: * error code otherwise
12763: *
12764: * In case recover is set to 1, the nodelist will not be empty even if
12765: * the parsed chunk is not well balanced.
12766: */
12767: static xmlParserErrors
12768: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12769: const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12770: xmlParserCtxtPtr ctxt;
12771: xmlDocPtr newDoc = NULL;
12772: xmlNodePtr newRoot;
12773: xmlSAXHandlerPtr oldsax = NULL;
12774: xmlNodePtr content = NULL;
12775: xmlNodePtr last = NULL;
12776: int size;
12777: xmlParserErrors ret = XML_ERR_OK;
12778: #ifdef SAX2
12779: int i;
12780: #endif
12781:
12782: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12783: (oldctxt->depth > 1024)) {
12784: return(XML_ERR_ENTITY_LOOP);
12785: }
12786:
12787:
12788: if (lst != NULL)
12789: *lst = NULL;
12790: if (string == NULL)
12791: return(XML_ERR_INTERNAL_ERROR);
12792:
12793: size = xmlStrlen(string);
12794:
12795: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12796: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12797: if (user_data != NULL)
12798: ctxt->userData = user_data;
12799: else
12800: ctxt->userData = ctxt;
12801: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12802: ctxt->dict = oldctxt->dict;
12803: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12804: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12805: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12806:
12807: #ifdef SAX2
12808: /* propagate namespaces down the entity */
12809: for (i = 0;i < oldctxt->nsNr;i += 2) {
12810: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12811: }
12812: #endif
12813:
12814: oldsax = ctxt->sax;
12815: ctxt->sax = oldctxt->sax;
12816: xmlDetectSAX2(ctxt);
12817: ctxt->replaceEntities = oldctxt->replaceEntities;
12818: ctxt->options = oldctxt->options;
12819:
12820: ctxt->_private = oldctxt->_private;
12821: if (oldctxt->myDoc == NULL) {
12822: newDoc = xmlNewDoc(BAD_CAST "1.0");
12823: if (newDoc == NULL) {
12824: ctxt->sax = oldsax;
12825: ctxt->dict = NULL;
12826: xmlFreeParserCtxt(ctxt);
12827: return(XML_ERR_INTERNAL_ERROR);
12828: }
12829: newDoc->properties = XML_DOC_INTERNAL;
12830: newDoc->dict = ctxt->dict;
12831: xmlDictReference(newDoc->dict);
12832: ctxt->myDoc = newDoc;
12833: } else {
12834: ctxt->myDoc = oldctxt->myDoc;
12835: content = ctxt->myDoc->children;
12836: last = ctxt->myDoc->last;
12837: }
12838: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12839: if (newRoot == NULL) {
12840: ctxt->sax = oldsax;
12841: ctxt->dict = NULL;
12842: xmlFreeParserCtxt(ctxt);
12843: if (newDoc != NULL) {
12844: xmlFreeDoc(newDoc);
12845: }
12846: return(XML_ERR_INTERNAL_ERROR);
12847: }
12848: ctxt->myDoc->children = NULL;
12849: ctxt->myDoc->last = NULL;
12850: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12851: nodePush(ctxt, ctxt->myDoc->children);
12852: ctxt->instate = XML_PARSER_CONTENT;
12853: ctxt->depth = oldctxt->depth + 1;
12854:
12855: ctxt->validate = 0;
12856: ctxt->loadsubset = oldctxt->loadsubset;
12857: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12858: /*
12859: * ID/IDREF registration will be done in xmlValidateElement below
12860: */
12861: ctxt->loadsubset |= XML_SKIP_IDS;
12862: }
12863: ctxt->dictNames = oldctxt->dictNames;
12864: ctxt->attsDefault = oldctxt->attsDefault;
12865: ctxt->attsSpecial = oldctxt->attsSpecial;
12866:
12867: xmlParseContent(ctxt);
12868: if ((RAW == '<') && (NXT(1) == '/')) {
12869: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12870: } else if (RAW != 0) {
12871: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12872: }
12873: if (ctxt->node != ctxt->myDoc->children) {
12874: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12875: }
12876:
12877: if (!ctxt->wellFormed) {
12878: if (ctxt->errNo == 0)
12879: ret = XML_ERR_INTERNAL_ERROR;
12880: else
12881: ret = (xmlParserErrors)ctxt->errNo;
12882: } else {
12883: ret = XML_ERR_OK;
12884: }
12885:
12886: if ((lst != NULL) && (ret == XML_ERR_OK)) {
12887: xmlNodePtr cur;
12888:
12889: /*
12890: * Return the newly created nodeset after unlinking it from
12891: * they pseudo parent.
12892: */
12893: cur = ctxt->myDoc->children->children;
12894: *lst = cur;
12895: while (cur != NULL) {
12896: #ifdef LIBXML_VALID_ENABLED
12897: if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12898: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12899: (cur->type == XML_ELEMENT_NODE)) {
12900: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12901: oldctxt->myDoc, cur);
12902: }
12903: #endif /* LIBXML_VALID_ENABLED */
12904: cur->parent = NULL;
12905: cur = cur->next;
12906: }
12907: ctxt->myDoc->children->children = NULL;
12908: }
12909: if (ctxt->myDoc != NULL) {
12910: xmlFreeNode(ctxt->myDoc->children);
12911: ctxt->myDoc->children = content;
12912: ctxt->myDoc->last = last;
12913: }
12914:
12915: /*
12916: * Record in the parent context the number of entities replacement
12917: * done when parsing that reference.
12918: */
12919: if (oldctxt != NULL)
12920: oldctxt->nbentities += ctxt->nbentities;
12921:
12922: /*
12923: * Also record the last error if any
12924: */
12925: if (ctxt->lastError.code != XML_ERR_OK)
12926: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12927:
12928: ctxt->sax = oldsax;
12929: ctxt->dict = NULL;
12930: ctxt->attsDefault = NULL;
12931: ctxt->attsSpecial = NULL;
12932: xmlFreeParserCtxt(ctxt);
12933: if (newDoc != NULL) {
12934: xmlFreeDoc(newDoc);
12935: }
12936:
12937: return(ret);
12938: }
12939:
12940: /**
12941: * xmlParseInNodeContext:
12942: * @node: the context node
12943: * @data: the input string
12944: * @datalen: the input string length in bytes
12945: * @options: a combination of xmlParserOption
12946: * @lst: the return value for the set of parsed nodes
12947: *
12948: * Parse a well-balanced chunk of an XML document
12949: * within the context (DTD, namespaces, etc ...) of the given node.
12950: *
12951: * The allowed sequence for the data is a Well Balanced Chunk defined by
12952: * the content production in the XML grammar:
12953: *
12954: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12955: *
12956: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12957: * error code otherwise
12958: */
12959: xmlParserErrors
12960: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12961: int options, xmlNodePtr *lst) {
12962: #ifdef SAX2
12963: xmlParserCtxtPtr ctxt;
12964: xmlDocPtr doc = NULL;
12965: xmlNodePtr fake, cur;
12966: int nsnr = 0;
12967:
12968: xmlParserErrors ret = XML_ERR_OK;
12969:
12970: /*
12971: * check all input parameters, grab the document
12972: */
12973: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12974: return(XML_ERR_INTERNAL_ERROR);
12975: switch (node->type) {
12976: case XML_ELEMENT_NODE:
12977: case XML_ATTRIBUTE_NODE:
12978: case XML_TEXT_NODE:
12979: case XML_CDATA_SECTION_NODE:
12980: case XML_ENTITY_REF_NODE:
12981: case XML_PI_NODE:
12982: case XML_COMMENT_NODE:
12983: case XML_DOCUMENT_NODE:
12984: case XML_HTML_DOCUMENT_NODE:
12985: break;
12986: default:
12987: return(XML_ERR_INTERNAL_ERROR);
12988:
12989: }
12990: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12991: (node->type != XML_DOCUMENT_NODE) &&
12992: (node->type != XML_HTML_DOCUMENT_NODE))
12993: node = node->parent;
12994: if (node == NULL)
12995: return(XML_ERR_INTERNAL_ERROR);
12996: if (node->type == XML_ELEMENT_NODE)
12997: doc = node->doc;
12998: else
12999: doc = (xmlDocPtr) node;
13000: if (doc == NULL)
13001: return(XML_ERR_INTERNAL_ERROR);
13002:
13003: /*
13004: * allocate a context and set-up everything not related to the
13005: * node position in the tree
13006: */
13007: if (doc->type == XML_DOCUMENT_NODE)
13008: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13009: #ifdef LIBXML_HTML_ENABLED
13010: else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13011: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13012: /*
13013: * When parsing in context, it makes no sense to add implied
13014: * elements like html/body/etc...
13015: */
13016: options |= HTML_PARSE_NOIMPLIED;
13017: }
13018: #endif
13019: else
13020: return(XML_ERR_INTERNAL_ERROR);
13021:
13022: if (ctxt == NULL)
13023: return(XML_ERR_NO_MEMORY);
13024:
13025: /*
13026: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13027: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13028: * we must wait until the last moment to free the original one.
13029: */
13030: if (doc->dict != NULL) {
13031: if (ctxt->dict != NULL)
13032: xmlDictFree(ctxt->dict);
13033: ctxt->dict = doc->dict;
13034: } else
13035: options |= XML_PARSE_NODICT;
13036:
13037: if (doc->encoding != NULL) {
13038: xmlCharEncodingHandlerPtr hdlr;
13039:
13040: if (ctxt->encoding != NULL)
13041: xmlFree((xmlChar *) ctxt->encoding);
13042: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13043:
13044: hdlr = xmlFindCharEncodingHandler(doc->encoding);
13045: if (hdlr != NULL) {
13046: xmlSwitchToEncoding(ctxt, hdlr);
13047: } else {
13048: return(XML_ERR_UNSUPPORTED_ENCODING);
13049: }
13050: }
13051:
13052: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13053: xmlDetectSAX2(ctxt);
13054: ctxt->myDoc = doc;
13055:
13056: fake = xmlNewComment(NULL);
13057: if (fake == NULL) {
13058: xmlFreeParserCtxt(ctxt);
13059: return(XML_ERR_NO_MEMORY);
13060: }
13061: xmlAddChild(node, fake);
13062:
13063: if (node->type == XML_ELEMENT_NODE) {
13064: nodePush(ctxt, node);
13065: /*
13066: * initialize the SAX2 namespaces stack
13067: */
13068: cur = node;
13069: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13070: xmlNsPtr ns = cur->nsDef;
13071: const xmlChar *iprefix, *ihref;
13072:
13073: while (ns != NULL) {
13074: if (ctxt->dict) {
13075: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13076: ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13077: } else {
13078: iprefix = ns->prefix;
13079: ihref = ns->href;
13080: }
13081:
13082: if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13083: nsPush(ctxt, iprefix, ihref);
13084: nsnr++;
13085: }
13086: ns = ns->next;
13087: }
13088: cur = cur->parent;
13089: }
13090: ctxt->instate = XML_PARSER_CONTENT;
13091: }
13092:
13093: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13094: /*
13095: * ID/IDREF registration will be done in xmlValidateElement below
13096: */
13097: ctxt->loadsubset |= XML_SKIP_IDS;
13098: }
13099:
13100: #ifdef LIBXML_HTML_ENABLED
13101: if (doc->type == XML_HTML_DOCUMENT_NODE)
13102: __htmlParseContent(ctxt);
13103: else
13104: #endif
13105: xmlParseContent(ctxt);
13106:
13107: nsPop(ctxt, nsnr);
13108: if ((RAW == '<') && (NXT(1) == '/')) {
13109: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13110: } else if (RAW != 0) {
13111: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13112: }
13113: if ((ctxt->node != NULL) && (ctxt->node != node)) {
13114: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13115: ctxt->wellFormed = 0;
13116: }
13117:
13118: if (!ctxt->wellFormed) {
13119: if (ctxt->errNo == 0)
13120: ret = XML_ERR_INTERNAL_ERROR;
13121: else
13122: ret = (xmlParserErrors)ctxt->errNo;
13123: } else {
13124: ret = XML_ERR_OK;
13125: }
13126:
13127: /*
13128: * Return the newly created nodeset after unlinking it from
13129: * the pseudo sibling.
13130: */
13131:
13132: cur = fake->next;
13133: fake->next = NULL;
13134: node->last = fake;
13135:
13136: if (cur != NULL) {
13137: cur->prev = NULL;
13138: }
13139:
13140: *lst = cur;
13141:
13142: while (cur != NULL) {
13143: cur->parent = NULL;
13144: cur = cur->next;
13145: }
13146:
13147: xmlUnlinkNode(fake);
13148: xmlFreeNode(fake);
13149:
13150:
13151: if (ret != XML_ERR_OK) {
13152: xmlFreeNodeList(*lst);
13153: *lst = NULL;
13154: }
13155:
13156: if (doc->dict != NULL)
13157: ctxt->dict = NULL;
13158: xmlFreeParserCtxt(ctxt);
13159:
13160: return(ret);
13161: #else /* !SAX2 */
13162: return(XML_ERR_INTERNAL_ERROR);
13163: #endif
13164: }
13165:
13166: #ifdef LIBXML_SAX1_ENABLED
13167: /**
13168: * xmlParseBalancedChunkMemoryRecover:
13169: * @doc: the document the chunk pertains to
13170: * @sax: the SAX handler bloc (possibly NULL)
13171: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13172: * @depth: Used for loop detection, use 0
13173: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13174: * @lst: the return value for the set of parsed nodes
13175: * @recover: return nodes even if the data is broken (use 0)
13176: *
13177: *
13178: * Parse a well-balanced chunk of an XML document
13179: * called by the parser
13180: * The allowed sequence for the Well Balanced Chunk is the one defined by
13181: * the content production in the XML grammar:
13182: *
13183: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13184: *
13185: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13186: * the parser error code otherwise
13187: *
13188: * In case recover is set to 1, the nodelist will not be empty even if
13189: * the parsed chunk is not well balanced, assuming the parsing succeeded to
13190: * some extent.
13191: */
13192: int
13193: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13194: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13195: int recover) {
13196: xmlParserCtxtPtr ctxt;
13197: xmlDocPtr newDoc;
13198: xmlSAXHandlerPtr oldsax = NULL;
13199: xmlNodePtr content, newRoot;
13200: int size;
13201: int ret = 0;
13202:
13203: if (depth > 40) {
13204: return(XML_ERR_ENTITY_LOOP);
13205: }
13206:
13207:
13208: if (lst != NULL)
13209: *lst = NULL;
13210: if (string == NULL)
13211: return(-1);
13212:
13213: size = xmlStrlen(string);
13214:
13215: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13216: if (ctxt == NULL) return(-1);
13217: ctxt->userData = ctxt;
13218: if (sax != NULL) {
13219: oldsax = ctxt->sax;
13220: ctxt->sax = sax;
13221: if (user_data != NULL)
13222: ctxt->userData = user_data;
13223: }
13224: newDoc = xmlNewDoc(BAD_CAST "1.0");
13225: if (newDoc == NULL) {
13226: xmlFreeParserCtxt(ctxt);
13227: return(-1);
13228: }
13229: newDoc->properties = XML_DOC_INTERNAL;
13230: if ((doc != NULL) && (doc->dict != NULL)) {
13231: xmlDictFree(ctxt->dict);
13232: ctxt->dict = doc->dict;
13233: xmlDictReference(ctxt->dict);
13234: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13235: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13236: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13237: ctxt->dictNames = 1;
13238: } else {
13239: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13240: }
13241: if (doc != NULL) {
13242: newDoc->intSubset = doc->intSubset;
13243: newDoc->extSubset = doc->extSubset;
13244: }
13245: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13246: if (newRoot == NULL) {
13247: if (sax != NULL)
13248: ctxt->sax = oldsax;
13249: xmlFreeParserCtxt(ctxt);
13250: newDoc->intSubset = NULL;
13251: newDoc->extSubset = NULL;
13252: xmlFreeDoc(newDoc);
13253: return(-1);
13254: }
13255: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13256: nodePush(ctxt, newRoot);
13257: if (doc == NULL) {
13258: ctxt->myDoc = newDoc;
13259: } else {
13260: ctxt->myDoc = newDoc;
13261: newDoc->children->doc = doc;
13262: /* Ensure that doc has XML spec namespace */
13263: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13264: newDoc->oldNs = doc->oldNs;
13265: }
13266: ctxt->instate = XML_PARSER_CONTENT;
13267: ctxt->depth = depth;
13268:
13269: /*
13270: * Doing validity checking on chunk doesn't make sense
13271: */
13272: ctxt->validate = 0;
13273: ctxt->loadsubset = 0;
13274: xmlDetectSAX2(ctxt);
13275:
13276: if ( doc != NULL ){
13277: content = doc->children;
13278: doc->children = NULL;
13279: xmlParseContent(ctxt);
13280: doc->children = content;
13281: }
13282: else {
13283: xmlParseContent(ctxt);
13284: }
13285: if ((RAW == '<') && (NXT(1) == '/')) {
13286: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13287: } else if (RAW != 0) {
13288: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13289: }
13290: if (ctxt->node != newDoc->children) {
13291: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13292: }
13293:
13294: if (!ctxt->wellFormed) {
13295: if (ctxt->errNo == 0)
13296: ret = 1;
13297: else
13298: ret = ctxt->errNo;
13299: } else {
13300: ret = 0;
13301: }
13302:
13303: if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13304: xmlNodePtr cur;
13305:
13306: /*
13307: * Return the newly created nodeset after unlinking it from
13308: * they pseudo parent.
13309: */
13310: cur = newDoc->children->children;
13311: *lst = cur;
13312: while (cur != NULL) {
13313: xmlSetTreeDoc(cur, doc);
13314: cur->parent = NULL;
13315: cur = cur->next;
13316: }
13317: newDoc->children->children = NULL;
13318: }
13319:
13320: if (sax != NULL)
13321: ctxt->sax = oldsax;
13322: xmlFreeParserCtxt(ctxt);
13323: newDoc->intSubset = NULL;
13324: newDoc->extSubset = NULL;
13325: newDoc->oldNs = NULL;
13326: xmlFreeDoc(newDoc);
13327:
13328: return(ret);
13329: }
13330:
13331: /**
13332: * xmlSAXParseEntity:
13333: * @sax: the SAX handler block
13334: * @filename: the filename
13335: *
13336: * parse an XML external entity out of context and build a tree.
13337: * It use the given SAX function block to handle the parsing callback.
13338: * If sax is NULL, fallback to the default DOM tree building routines.
13339: *
13340: * [78] extParsedEnt ::= TextDecl? content
13341: *
13342: * This correspond to a "Well Balanced" chunk
13343: *
13344: * Returns the resulting document tree
13345: */
13346:
13347: xmlDocPtr
13348: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13349: xmlDocPtr ret;
13350: xmlParserCtxtPtr ctxt;
13351:
13352: ctxt = xmlCreateFileParserCtxt(filename);
13353: if (ctxt == NULL) {
13354: return(NULL);
13355: }
13356: if (sax != NULL) {
13357: if (ctxt->sax != NULL)
13358: xmlFree(ctxt->sax);
13359: ctxt->sax = sax;
13360: ctxt->userData = NULL;
13361: }
13362:
13363: xmlParseExtParsedEnt(ctxt);
13364:
13365: if (ctxt->wellFormed)
13366: ret = ctxt->myDoc;
13367: else {
13368: ret = NULL;
13369: xmlFreeDoc(ctxt->myDoc);
13370: ctxt->myDoc = NULL;
13371: }
13372: if (sax != NULL)
13373: ctxt->sax = NULL;
13374: xmlFreeParserCtxt(ctxt);
13375:
13376: return(ret);
13377: }
13378:
13379: /**
13380: * xmlParseEntity:
13381: * @filename: the filename
13382: *
13383: * parse an XML external entity out of context and build a tree.
13384: *
13385: * [78] extParsedEnt ::= TextDecl? content
13386: *
13387: * This correspond to a "Well Balanced" chunk
13388: *
13389: * Returns the resulting document tree
13390: */
13391:
13392: xmlDocPtr
13393: xmlParseEntity(const char *filename) {
13394: return(xmlSAXParseEntity(NULL, filename));
13395: }
13396: #endif /* LIBXML_SAX1_ENABLED */
13397:
13398: /**
13399: * xmlCreateEntityParserCtxtInternal:
13400: * @URL: the entity URL
13401: * @ID: the entity PUBLIC ID
13402: * @base: a possible base for the target URI
13403: * @pctx: parser context used to set options on new context
13404: *
13405: * Create a parser context for an external entity
13406: * Automatic support for ZLIB/Compress compressed document is provided
13407: * by default if found at compile-time.
13408: *
13409: * Returns the new parser context or NULL
13410: */
13411: static xmlParserCtxtPtr
13412: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13413: const xmlChar *base, xmlParserCtxtPtr pctx) {
13414: xmlParserCtxtPtr ctxt;
13415: xmlParserInputPtr inputStream;
13416: char *directory = NULL;
13417: xmlChar *uri;
13418:
13419: ctxt = xmlNewParserCtxt();
13420: if (ctxt == NULL) {
13421: return(NULL);
13422: }
13423:
13424: if (pctx != NULL) {
13425: ctxt->options = pctx->options;
13426: ctxt->_private = pctx->_private;
13427: }
13428:
13429: uri = xmlBuildURI(URL, base);
13430:
13431: if (uri == NULL) {
13432: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13433: if (inputStream == NULL) {
13434: xmlFreeParserCtxt(ctxt);
13435: return(NULL);
13436: }
13437:
13438: inputPush(ctxt, inputStream);
13439:
13440: if ((ctxt->directory == NULL) && (directory == NULL))
13441: directory = xmlParserGetDirectory((char *)URL);
13442: if ((ctxt->directory == NULL) && (directory != NULL))
13443: ctxt->directory = directory;
13444: } else {
13445: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13446: if (inputStream == NULL) {
13447: xmlFree(uri);
13448: xmlFreeParserCtxt(ctxt);
13449: return(NULL);
13450: }
13451:
13452: inputPush(ctxt, inputStream);
13453:
13454: if ((ctxt->directory == NULL) && (directory == NULL))
13455: directory = xmlParserGetDirectory((char *)uri);
13456: if ((ctxt->directory == NULL) && (directory != NULL))
13457: ctxt->directory = directory;
13458: xmlFree(uri);
13459: }
13460: return(ctxt);
13461: }
13462:
13463: /**
13464: * xmlCreateEntityParserCtxt:
13465: * @URL: the entity URL
13466: * @ID: the entity PUBLIC ID
13467: * @base: a possible base for the target URI
13468: *
13469: * Create a parser context for an external entity
13470: * Automatic support for ZLIB/Compress compressed document is provided
13471: * by default if found at compile-time.
13472: *
13473: * Returns the new parser context or NULL
13474: */
13475: xmlParserCtxtPtr
13476: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13477: const xmlChar *base) {
13478: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13479:
13480: }
13481:
13482: /************************************************************************
13483: * *
13484: * Front ends when parsing from a file *
13485: * *
13486: ************************************************************************/
13487:
13488: /**
13489: * xmlCreateURLParserCtxt:
13490: * @filename: the filename or URL
13491: * @options: a combination of xmlParserOption
13492: *
13493: * Create a parser context for a file or URL content.
13494: * Automatic support for ZLIB/Compress compressed document is provided
13495: * by default if found at compile-time and for file accesses
13496: *
13497: * Returns the new parser context or NULL
13498: */
13499: xmlParserCtxtPtr
13500: xmlCreateURLParserCtxt(const char *filename, int options)
13501: {
13502: xmlParserCtxtPtr ctxt;
13503: xmlParserInputPtr inputStream;
13504: char *directory = NULL;
13505:
13506: ctxt = xmlNewParserCtxt();
13507: if (ctxt == NULL) {
13508: xmlErrMemory(NULL, "cannot allocate parser context");
13509: return(NULL);
13510: }
13511:
13512: if (options)
13513: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13514: ctxt->linenumbers = 1;
13515:
13516: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13517: if (inputStream == NULL) {
13518: xmlFreeParserCtxt(ctxt);
13519: return(NULL);
13520: }
13521:
13522: inputPush(ctxt, inputStream);
13523: if ((ctxt->directory == NULL) && (directory == NULL))
13524: directory = xmlParserGetDirectory(filename);
13525: if ((ctxt->directory == NULL) && (directory != NULL))
13526: ctxt->directory = directory;
13527:
13528: return(ctxt);
13529: }
13530:
13531: /**
13532: * xmlCreateFileParserCtxt:
13533: * @filename: the filename
13534: *
13535: * Create a parser context for a file content.
13536: * Automatic support for ZLIB/Compress compressed document is provided
13537: * by default if found at compile-time.
13538: *
13539: * Returns the new parser context or NULL
13540: */
13541: xmlParserCtxtPtr
13542: xmlCreateFileParserCtxt(const char *filename)
13543: {
13544: return(xmlCreateURLParserCtxt(filename, 0));
13545: }
13546:
13547: #ifdef LIBXML_SAX1_ENABLED
13548: /**
13549: * xmlSAXParseFileWithData:
13550: * @sax: the SAX handler block
13551: * @filename: the filename
13552: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13553: * documents
13554: * @data: the userdata
13555: *
13556: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13557: * compressed document is provided by default if found at compile-time.
13558: * It use the given SAX function block to handle the parsing callback.
13559: * If sax is NULL, fallback to the default DOM tree building routines.
13560: *
13561: * User data (void *) is stored within the parser context in the
13562: * context's _private member, so it is available nearly everywhere in libxml
13563: *
13564: * Returns the resulting document tree
13565: */
13566:
13567: xmlDocPtr
13568: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13569: int recovery, void *data) {
13570: xmlDocPtr ret;
13571: xmlParserCtxtPtr ctxt;
13572:
13573: xmlInitParser();
13574:
13575: ctxt = xmlCreateFileParserCtxt(filename);
13576: if (ctxt == NULL) {
13577: return(NULL);
13578: }
13579: if (sax != NULL) {
13580: if (ctxt->sax != NULL)
13581: xmlFree(ctxt->sax);
13582: ctxt->sax = sax;
13583: }
13584: xmlDetectSAX2(ctxt);
13585: if (data!=NULL) {
13586: ctxt->_private = data;
13587: }
13588:
13589: if (ctxt->directory == NULL)
13590: ctxt->directory = xmlParserGetDirectory(filename);
13591:
13592: ctxt->recovery = recovery;
13593:
13594: xmlParseDocument(ctxt);
13595:
13596: if ((ctxt->wellFormed) || recovery) {
13597: ret = ctxt->myDoc;
13598: if (ret != NULL) {
13599: if (ctxt->input->buf->compressed > 0)
13600: ret->compression = 9;
13601: else
13602: ret->compression = ctxt->input->buf->compressed;
13603: }
13604: }
13605: else {
13606: ret = NULL;
13607: xmlFreeDoc(ctxt->myDoc);
13608: ctxt->myDoc = NULL;
13609: }
13610: if (sax != NULL)
13611: ctxt->sax = NULL;
13612: xmlFreeParserCtxt(ctxt);
13613:
13614: return(ret);
13615: }
13616:
13617: /**
13618: * xmlSAXParseFile:
13619: * @sax: the SAX handler block
13620: * @filename: the filename
13621: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13622: * documents
13623: *
13624: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13625: * compressed document is provided by default if found at compile-time.
13626: * It use the given SAX function block to handle the parsing callback.
13627: * If sax is NULL, fallback to the default DOM tree building routines.
13628: *
13629: * Returns the resulting document tree
13630: */
13631:
13632: xmlDocPtr
13633: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13634: int recovery) {
13635: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13636: }
13637:
13638: /**
13639: * xmlRecoverDoc:
13640: * @cur: a pointer to an array of xmlChar
13641: *
13642: * parse an XML in-memory document and build a tree.
13643: * In the case the document is not Well Formed, a attempt to build a
13644: * tree is tried anyway
13645: *
13646: * Returns the resulting document tree or NULL in case of failure
13647: */
13648:
13649: xmlDocPtr
13650: xmlRecoverDoc(const xmlChar *cur) {
13651: return(xmlSAXParseDoc(NULL, cur, 1));
13652: }
13653:
13654: /**
13655: * xmlParseFile:
13656: * @filename: the filename
13657: *
13658: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13659: * compressed document is provided by default if found at compile-time.
13660: *
13661: * Returns the resulting document tree if the file was wellformed,
13662: * NULL otherwise.
13663: */
13664:
13665: xmlDocPtr
13666: xmlParseFile(const char *filename) {
13667: return(xmlSAXParseFile(NULL, filename, 0));
13668: }
13669:
13670: /**
13671: * xmlRecoverFile:
13672: * @filename: the filename
13673: *
13674: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13675: * compressed document is provided by default if found at compile-time.
13676: * In the case the document is not Well Formed, it attempts to build
13677: * a tree anyway
13678: *
13679: * Returns the resulting document tree or NULL in case of failure
13680: */
13681:
13682: xmlDocPtr
13683: xmlRecoverFile(const char *filename) {
13684: return(xmlSAXParseFile(NULL, filename, 1));
13685: }
13686:
13687:
13688: /**
13689: * xmlSetupParserForBuffer:
13690: * @ctxt: an XML parser context
13691: * @buffer: a xmlChar * buffer
13692: * @filename: a file name
13693: *
13694: * Setup the parser context to parse a new buffer; Clears any prior
13695: * contents from the parser context. The buffer parameter must not be
13696: * NULL, but the filename parameter can be
13697: */
13698: void
13699: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13700: const char* filename)
13701: {
13702: xmlParserInputPtr input;
13703:
13704: if ((ctxt == NULL) || (buffer == NULL))
13705: return;
13706:
13707: input = xmlNewInputStream(ctxt);
13708: if (input == NULL) {
13709: xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13710: xmlClearParserCtxt(ctxt);
13711: return;
13712: }
13713:
13714: xmlClearParserCtxt(ctxt);
13715: if (filename != NULL)
13716: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13717: input->base = buffer;
13718: input->cur = buffer;
13719: input->end = &buffer[xmlStrlen(buffer)];
13720: inputPush(ctxt, input);
13721: }
13722:
13723: /**
13724: * xmlSAXUserParseFile:
13725: * @sax: a SAX handler
13726: * @user_data: The user data returned on SAX callbacks
13727: * @filename: a file name
13728: *
13729: * parse an XML file and call the given SAX handler routines.
13730: * Automatic support for ZLIB/Compress compressed document is provided
13731: *
13732: * Returns 0 in case of success or a error number otherwise
13733: */
13734: int
13735: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13736: const char *filename) {
13737: int ret = 0;
13738: xmlParserCtxtPtr ctxt;
13739:
13740: ctxt = xmlCreateFileParserCtxt(filename);
13741: if (ctxt == NULL) return -1;
13742: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13743: xmlFree(ctxt->sax);
13744: ctxt->sax = sax;
13745: xmlDetectSAX2(ctxt);
13746:
13747: if (user_data != NULL)
13748: ctxt->userData = user_data;
13749:
13750: xmlParseDocument(ctxt);
13751:
13752: if (ctxt->wellFormed)
13753: ret = 0;
13754: else {
13755: if (ctxt->errNo != 0)
13756: ret = ctxt->errNo;
13757: else
13758: ret = -1;
13759: }
13760: if (sax != NULL)
13761: ctxt->sax = NULL;
13762: if (ctxt->myDoc != NULL) {
13763: xmlFreeDoc(ctxt->myDoc);
13764: ctxt->myDoc = NULL;
13765: }
13766: xmlFreeParserCtxt(ctxt);
13767:
13768: return ret;
13769: }
13770: #endif /* LIBXML_SAX1_ENABLED */
13771:
13772: /************************************************************************
13773: * *
13774: * Front ends when parsing from memory *
13775: * *
13776: ************************************************************************/
13777:
13778: /**
13779: * xmlCreateMemoryParserCtxt:
13780: * @buffer: a pointer to a char array
13781: * @size: the size of the array
13782: *
13783: * Create a parser context for an XML in-memory document.
13784: *
13785: * Returns the new parser context or NULL
13786: */
13787: xmlParserCtxtPtr
13788: xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13789: xmlParserCtxtPtr ctxt;
13790: xmlParserInputPtr input;
13791: xmlParserInputBufferPtr buf;
13792:
13793: if (buffer == NULL)
13794: return(NULL);
13795: if (size <= 0)
13796: return(NULL);
13797:
13798: ctxt = xmlNewParserCtxt();
13799: if (ctxt == NULL)
13800: return(NULL);
13801:
13802: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13803: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13804: if (buf == NULL) {
13805: xmlFreeParserCtxt(ctxt);
13806: return(NULL);
13807: }
13808:
13809: input = xmlNewInputStream(ctxt);
13810: if (input == NULL) {
13811: xmlFreeParserInputBuffer(buf);
13812: xmlFreeParserCtxt(ctxt);
13813: return(NULL);
13814: }
13815:
13816: input->filename = NULL;
13817: input->buf = buf;
13818: input->base = input->buf->buffer->content;
13819: input->cur = input->buf->buffer->content;
13820: input->end = &input->buf->buffer->content[input->buf->buffer->use];
13821:
13822: inputPush(ctxt, input);
13823: return(ctxt);
13824: }
13825:
13826: #ifdef LIBXML_SAX1_ENABLED
13827: /**
13828: * xmlSAXParseMemoryWithData:
13829: * @sax: the SAX handler block
13830: * @buffer: an pointer to a char array
13831: * @size: the size of the array
13832: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13833: * documents
13834: * @data: the userdata
13835: *
13836: * parse an XML in-memory block and use the given SAX function block
13837: * to handle the parsing callback. If sax is NULL, fallback to the default
13838: * DOM tree building routines.
13839: *
13840: * User data (void *) is stored within the parser context in the
13841: * context's _private member, so it is available nearly everywhere in libxml
13842: *
13843: * Returns the resulting document tree
13844: */
13845:
13846: xmlDocPtr
13847: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13848: int size, int recovery, void *data) {
13849: xmlDocPtr ret;
13850: xmlParserCtxtPtr ctxt;
13851:
13852: xmlInitParser();
13853:
13854: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13855: if (ctxt == NULL) return(NULL);
13856: if (sax != NULL) {
13857: if (ctxt->sax != NULL)
13858: xmlFree(ctxt->sax);
13859: ctxt->sax = sax;
13860: }
13861: xmlDetectSAX2(ctxt);
13862: if (data!=NULL) {
13863: ctxt->_private=data;
13864: }
13865:
13866: ctxt->recovery = recovery;
13867:
13868: xmlParseDocument(ctxt);
13869:
13870: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13871: else {
13872: ret = NULL;
13873: xmlFreeDoc(ctxt->myDoc);
13874: ctxt->myDoc = NULL;
13875: }
13876: if (sax != NULL)
13877: ctxt->sax = NULL;
13878: xmlFreeParserCtxt(ctxt);
13879:
13880: return(ret);
13881: }
13882:
13883: /**
13884: * xmlSAXParseMemory:
13885: * @sax: the SAX handler block
13886: * @buffer: an pointer to a char array
13887: * @size: the size of the array
13888: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13889: * documents
13890: *
13891: * parse an XML in-memory block and use the given SAX function block
13892: * to handle the parsing callback. If sax is NULL, fallback to the default
13893: * DOM tree building routines.
13894: *
13895: * Returns the resulting document tree
13896: */
13897: xmlDocPtr
13898: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13899: int size, int recovery) {
13900: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13901: }
13902:
13903: /**
13904: * xmlParseMemory:
13905: * @buffer: an pointer to a char array
13906: * @size: the size of the array
13907: *
13908: * parse an XML in-memory block and build a tree.
13909: *
13910: * Returns the resulting document tree
13911: */
13912:
13913: xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13914: return(xmlSAXParseMemory(NULL, buffer, size, 0));
13915: }
13916:
13917: /**
13918: * xmlRecoverMemory:
13919: * @buffer: an pointer to a char array
13920: * @size: the size of the array
13921: *
13922: * parse an XML in-memory block and build a tree.
13923: * In the case the document is not Well Formed, an attempt to
13924: * build a tree is tried anyway
13925: *
13926: * Returns the resulting document tree or NULL in case of error
13927: */
13928:
13929: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13930: return(xmlSAXParseMemory(NULL, buffer, size, 1));
13931: }
13932:
13933: /**
13934: * xmlSAXUserParseMemory:
13935: * @sax: a SAX handler
13936: * @user_data: The user data returned on SAX callbacks
13937: * @buffer: an in-memory XML document input
13938: * @size: the length of the XML document in bytes
13939: *
13940: * A better SAX parsing routine.
13941: * parse an XML in-memory buffer and call the given SAX handler routines.
13942: *
13943: * Returns 0 in case of success or a error number otherwise
13944: */
13945: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13946: const char *buffer, int size) {
13947: int ret = 0;
13948: xmlParserCtxtPtr ctxt;
13949:
13950: xmlInitParser();
13951:
13952: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13953: if (ctxt == NULL) return -1;
13954: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13955: xmlFree(ctxt->sax);
13956: ctxt->sax = sax;
13957: xmlDetectSAX2(ctxt);
13958:
13959: if (user_data != NULL)
13960: ctxt->userData = user_data;
13961:
13962: xmlParseDocument(ctxt);
13963:
13964: if (ctxt->wellFormed)
13965: ret = 0;
13966: else {
13967: if (ctxt->errNo != 0)
13968: ret = ctxt->errNo;
13969: else
13970: ret = -1;
13971: }
13972: if (sax != NULL)
13973: ctxt->sax = NULL;
13974: if (ctxt->myDoc != NULL) {
13975: xmlFreeDoc(ctxt->myDoc);
13976: ctxt->myDoc = NULL;
13977: }
13978: xmlFreeParserCtxt(ctxt);
13979:
13980: return ret;
13981: }
13982: #endif /* LIBXML_SAX1_ENABLED */
13983:
13984: /**
13985: * xmlCreateDocParserCtxt:
13986: * @cur: a pointer to an array of xmlChar
13987: *
13988: * Creates a parser context for an XML in-memory document.
13989: *
13990: * Returns the new parser context or NULL
13991: */
13992: xmlParserCtxtPtr
13993: xmlCreateDocParserCtxt(const xmlChar *cur) {
13994: int len;
13995:
13996: if (cur == NULL)
13997: return(NULL);
13998: len = xmlStrlen(cur);
13999: return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14000: }
14001:
14002: #ifdef LIBXML_SAX1_ENABLED
14003: /**
14004: * xmlSAXParseDoc:
14005: * @sax: the SAX handler block
14006: * @cur: a pointer to an array of xmlChar
14007: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14008: * documents
14009: *
14010: * parse an XML in-memory document and build a tree.
14011: * It use the given SAX function block to handle the parsing callback.
14012: * If sax is NULL, fallback to the default DOM tree building routines.
14013: *
14014: * Returns the resulting document tree
14015: */
14016:
14017: xmlDocPtr
14018: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14019: xmlDocPtr ret;
14020: xmlParserCtxtPtr ctxt;
14021: xmlSAXHandlerPtr oldsax = NULL;
14022:
14023: if (cur == NULL) return(NULL);
14024:
14025:
14026: ctxt = xmlCreateDocParserCtxt(cur);
14027: if (ctxt == NULL) return(NULL);
14028: if (sax != NULL) {
14029: oldsax = ctxt->sax;
14030: ctxt->sax = sax;
14031: ctxt->userData = NULL;
14032: }
14033: xmlDetectSAX2(ctxt);
14034:
14035: xmlParseDocument(ctxt);
14036: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14037: else {
14038: ret = NULL;
14039: xmlFreeDoc(ctxt->myDoc);
14040: ctxt->myDoc = NULL;
14041: }
14042: if (sax != NULL)
14043: ctxt->sax = oldsax;
14044: xmlFreeParserCtxt(ctxt);
14045:
14046: return(ret);
14047: }
14048:
14049: /**
14050: * xmlParseDoc:
14051: * @cur: a pointer to an array of xmlChar
14052: *
14053: * parse an XML in-memory document and build a tree.
14054: *
14055: * Returns the resulting document tree
14056: */
14057:
14058: xmlDocPtr
14059: xmlParseDoc(const xmlChar *cur) {
14060: return(xmlSAXParseDoc(NULL, cur, 0));
14061: }
14062: #endif /* LIBXML_SAX1_ENABLED */
14063:
14064: #ifdef LIBXML_LEGACY_ENABLED
14065: /************************************************************************
14066: * *
14067: * Specific function to keep track of entities references *
14068: * and used by the XSLT debugger *
14069: * *
14070: ************************************************************************/
14071:
14072: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14073:
14074: /**
14075: * xmlAddEntityReference:
14076: * @ent : A valid entity
14077: * @firstNode : A valid first node for children of entity
14078: * @lastNode : A valid last node of children entity
14079: *
14080: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14081: */
14082: static void
14083: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14084: xmlNodePtr lastNode)
14085: {
14086: if (xmlEntityRefFunc != NULL) {
14087: (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14088: }
14089: }
14090:
14091:
14092: /**
14093: * xmlSetEntityReferenceFunc:
14094: * @func: A valid function
14095: *
14096: * Set the function to call call back when a xml reference has been made
14097: */
14098: void
14099: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14100: {
14101: xmlEntityRefFunc = func;
14102: }
14103: #endif /* LIBXML_LEGACY_ENABLED */
14104:
14105: /************************************************************************
14106: * *
14107: * Miscellaneous *
14108: * *
14109: ************************************************************************/
14110:
14111: #ifdef LIBXML_XPATH_ENABLED
14112: #include <libxml/xpath.h>
14113: #endif
14114:
14115: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14116: static int xmlParserInitialized = 0;
14117:
14118: /**
14119: * xmlInitParser:
14120: *
14121: * Initialization function for the XML parser.
14122: * This is not reentrant. Call once before processing in case of
14123: * use in multithreaded programs.
14124: */
14125:
14126: void
14127: xmlInitParser(void) {
14128: if (xmlParserInitialized != 0)
14129: return;
14130:
14131: #ifdef LIBXML_THREAD_ENABLED
14132: __xmlGlobalInitMutexLock();
14133: if (xmlParserInitialized == 0) {
14134: #endif
14135: xmlInitThreads();
14136: xmlInitGlobals();
14137: if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14138: (xmlGenericError == NULL))
14139: initGenericErrorDefaultFunc(NULL);
14140: xmlInitMemory();
14141: xmlInitCharEncodingHandlers();
14142: xmlDefaultSAXHandlerInit();
14143: xmlRegisterDefaultInputCallbacks();
14144: #ifdef LIBXML_OUTPUT_ENABLED
14145: xmlRegisterDefaultOutputCallbacks();
14146: #endif /* LIBXML_OUTPUT_ENABLED */
14147: #ifdef LIBXML_HTML_ENABLED
14148: htmlInitAutoClose();
14149: htmlDefaultSAXHandlerInit();
14150: #endif
14151: #ifdef LIBXML_XPATH_ENABLED
14152: xmlXPathInit();
14153: #endif
14154: xmlParserInitialized = 1;
14155: #ifdef LIBXML_THREAD_ENABLED
14156: }
14157: __xmlGlobalInitMutexUnlock();
14158: #endif
14159: }
14160:
14161: /**
14162: * xmlCleanupParser:
14163: *
14164: * This function name is somewhat misleading. It does not clean up
14165: * parser state, it cleans up memory allocated by the library itself.
14166: * It is a cleanup function for the XML library. It tries to reclaim all
14167: * related global memory allocated for the library processing.
14168: * It doesn't deallocate any document related memory. One should
14169: * call xmlCleanupParser() only when the process has finished using
14170: * the library and all XML/HTML documents built with it.
14171: * See also xmlInitParser() which has the opposite function of preparing
14172: * the library for operations.
14173: *
14174: * WARNING: if your application is multithreaded or has plugin support
14175: * calling this may crash the application if another thread or
14176: * a plugin is still using libxml2. It's sometimes very hard to
14177: * guess if libxml2 is in use in the application, some libraries
14178: * or plugins may use it without notice. In case of doubt abstain
14179: * from calling this function or do it just before calling exit()
14180: * to avoid leak reports from valgrind !
14181: */
14182:
14183: void
14184: xmlCleanupParser(void) {
14185: if (!xmlParserInitialized)
14186: return;
14187:
14188: xmlCleanupCharEncodingHandlers();
14189: #ifdef LIBXML_CATALOG_ENABLED
14190: xmlCatalogCleanup();
14191: #endif
14192: xmlDictCleanup();
14193: xmlCleanupInputCallbacks();
14194: #ifdef LIBXML_OUTPUT_ENABLED
14195: xmlCleanupOutputCallbacks();
14196: #endif
14197: #ifdef LIBXML_SCHEMAS_ENABLED
14198: xmlSchemaCleanupTypes();
14199: xmlRelaxNGCleanupTypes();
14200: #endif
14201: xmlCleanupGlobals();
14202: xmlResetLastError();
14203: xmlCleanupThreads(); /* must be last if called not from the main thread */
14204: xmlCleanupMemory();
14205: xmlParserInitialized = 0;
14206: }
14207:
14208: /************************************************************************
14209: * *
14210: * New set (2.6.0) of simpler and more flexible APIs *
14211: * *
14212: ************************************************************************/
14213:
14214: /**
14215: * DICT_FREE:
14216: * @str: a string
14217: *
14218: * Free a string if it is not owned by the "dict" dictionnary in the
14219: * current scope
14220: */
14221: #define DICT_FREE(str) \
14222: if ((str) && ((!dict) || \
14223: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14224: xmlFree((char *)(str));
14225:
14226: /**
14227: * xmlCtxtReset:
14228: * @ctxt: an XML parser context
14229: *
14230: * Reset a parser context
14231: */
14232: void
14233: xmlCtxtReset(xmlParserCtxtPtr ctxt)
14234: {
14235: xmlParserInputPtr input;
14236: xmlDictPtr dict;
14237:
14238: if (ctxt == NULL)
14239: return;
14240:
14241: dict = ctxt->dict;
14242:
14243: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14244: xmlFreeInputStream(input);
14245: }
14246: ctxt->inputNr = 0;
14247: ctxt->input = NULL;
14248:
14249: ctxt->spaceNr = 0;
14250: if (ctxt->spaceTab != NULL) {
14251: ctxt->spaceTab[0] = -1;
14252: ctxt->space = &ctxt->spaceTab[0];
14253: } else {
14254: ctxt->space = NULL;
14255: }
14256:
14257:
14258: ctxt->nodeNr = 0;
14259: ctxt->node = NULL;
14260:
14261: ctxt->nameNr = 0;
14262: ctxt->name = NULL;
14263:
14264: DICT_FREE(ctxt->version);
14265: ctxt->version = NULL;
14266: DICT_FREE(ctxt->encoding);
14267: ctxt->encoding = NULL;
14268: DICT_FREE(ctxt->directory);
14269: ctxt->directory = NULL;
14270: DICT_FREE(ctxt->extSubURI);
14271: ctxt->extSubURI = NULL;
14272: DICT_FREE(ctxt->extSubSystem);
14273: ctxt->extSubSystem = NULL;
14274: if (ctxt->myDoc != NULL)
14275: xmlFreeDoc(ctxt->myDoc);
14276: ctxt->myDoc = NULL;
14277:
14278: ctxt->standalone = -1;
14279: ctxt->hasExternalSubset = 0;
14280: ctxt->hasPErefs = 0;
14281: ctxt->html = 0;
14282: ctxt->external = 0;
14283: ctxt->instate = XML_PARSER_START;
14284: ctxt->token = 0;
14285:
14286: ctxt->wellFormed = 1;
14287: ctxt->nsWellFormed = 1;
14288: ctxt->disableSAX = 0;
14289: ctxt->valid = 1;
14290: #if 0
14291: ctxt->vctxt.userData = ctxt;
14292: ctxt->vctxt.error = xmlParserValidityError;
14293: ctxt->vctxt.warning = xmlParserValidityWarning;
14294: #endif
14295: ctxt->record_info = 0;
14296: ctxt->nbChars = 0;
14297: ctxt->checkIndex = 0;
14298: ctxt->inSubset = 0;
14299: ctxt->errNo = XML_ERR_OK;
14300: ctxt->depth = 0;
14301: ctxt->charset = XML_CHAR_ENCODING_UTF8;
14302: ctxt->catalogs = NULL;
14303: ctxt->nbentities = 0;
14304: ctxt->sizeentities = 0;
14305: xmlInitNodeInfoSeq(&ctxt->node_seq);
14306:
14307: if (ctxt->attsDefault != NULL) {
14308: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14309: ctxt->attsDefault = NULL;
14310: }
14311: if (ctxt->attsSpecial != NULL) {
14312: xmlHashFree(ctxt->attsSpecial, NULL);
14313: ctxt->attsSpecial = NULL;
14314: }
14315:
14316: #ifdef LIBXML_CATALOG_ENABLED
14317: if (ctxt->catalogs != NULL)
14318: xmlCatalogFreeLocal(ctxt->catalogs);
14319: #endif
14320: if (ctxt->lastError.code != XML_ERR_OK)
14321: xmlResetError(&ctxt->lastError);
14322: }
14323:
14324: /**
14325: * xmlCtxtResetPush:
14326: * @ctxt: an XML parser context
14327: * @chunk: a pointer to an array of chars
14328: * @size: number of chars in the array
14329: * @filename: an optional file name or URI
14330: * @encoding: the document encoding, or NULL
14331: *
14332: * Reset a push parser context
14333: *
14334: * Returns 0 in case of success and 1 in case of error
14335: */
14336: int
14337: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14338: int size, const char *filename, const char *encoding)
14339: {
14340: xmlParserInputPtr inputStream;
14341: xmlParserInputBufferPtr buf;
14342: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14343:
14344: if (ctxt == NULL)
14345: return(1);
14346:
14347: if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14348: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14349:
14350: buf = xmlAllocParserInputBuffer(enc);
14351: if (buf == NULL)
14352: return(1);
14353:
14354: if (ctxt == NULL) {
14355: xmlFreeParserInputBuffer(buf);
14356: return(1);
14357: }
14358:
14359: xmlCtxtReset(ctxt);
14360:
14361: if (ctxt->pushTab == NULL) {
14362: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14363: sizeof(xmlChar *));
14364: if (ctxt->pushTab == NULL) {
14365: xmlErrMemory(ctxt, NULL);
14366: xmlFreeParserInputBuffer(buf);
14367: return(1);
14368: }
14369: }
14370:
14371: if (filename == NULL) {
14372: ctxt->directory = NULL;
14373: } else {
14374: ctxt->directory = xmlParserGetDirectory(filename);
14375: }
14376:
14377: inputStream = xmlNewInputStream(ctxt);
14378: if (inputStream == NULL) {
14379: xmlFreeParserInputBuffer(buf);
14380: return(1);
14381: }
14382:
14383: if (filename == NULL)
14384: inputStream->filename = NULL;
14385: else
14386: inputStream->filename = (char *)
14387: xmlCanonicPath((const xmlChar *) filename);
14388: inputStream->buf = buf;
14389: inputStream->base = inputStream->buf->buffer->content;
14390: inputStream->cur = inputStream->buf->buffer->content;
14391: inputStream->end =
14392: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14393:
14394: inputPush(ctxt, inputStream);
14395:
14396: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14397: (ctxt->input->buf != NULL)) {
14398: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14399: int cur = ctxt->input->cur - ctxt->input->base;
14400:
14401: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14402:
14403: ctxt->input->base = ctxt->input->buf->buffer->content + base;
14404: ctxt->input->cur = ctxt->input->base + cur;
14405: ctxt->input->end =
14406: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14407: use];
14408: #ifdef DEBUG_PUSH
14409: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14410: #endif
14411: }
14412:
14413: if (encoding != NULL) {
14414: xmlCharEncodingHandlerPtr hdlr;
14415:
14416: if (ctxt->encoding != NULL)
14417: xmlFree((xmlChar *) ctxt->encoding);
14418: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14419:
14420: hdlr = xmlFindCharEncodingHandler(encoding);
14421: if (hdlr != NULL) {
14422: xmlSwitchToEncoding(ctxt, hdlr);
14423: } else {
14424: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14425: "Unsupported encoding %s\n", BAD_CAST encoding);
14426: }
14427: } else if (enc != XML_CHAR_ENCODING_NONE) {
14428: xmlSwitchEncoding(ctxt, enc);
14429: }
14430:
14431: return(0);
14432: }
14433:
14434:
14435: /**
14436: * xmlCtxtUseOptionsInternal:
14437: * @ctxt: an XML parser context
14438: * @options: a combination of xmlParserOption
14439: * @encoding: the user provided encoding to use
14440: *
14441: * Applies the options to the parser context
14442: *
14443: * Returns 0 in case of success, the set of unknown or unimplemented options
14444: * in case of error.
14445: */
14446: static int
14447: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14448: {
14449: if (ctxt == NULL)
14450: return(-1);
14451: if (encoding != NULL) {
14452: if (ctxt->encoding != NULL)
14453: xmlFree((xmlChar *) ctxt->encoding);
14454: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14455: }
14456: if (options & XML_PARSE_RECOVER) {
14457: ctxt->recovery = 1;
14458: options -= XML_PARSE_RECOVER;
14459: ctxt->options |= XML_PARSE_RECOVER;
14460: } else
14461: ctxt->recovery = 0;
14462: if (options & XML_PARSE_DTDLOAD) {
14463: ctxt->loadsubset = XML_DETECT_IDS;
14464: options -= XML_PARSE_DTDLOAD;
14465: ctxt->options |= XML_PARSE_DTDLOAD;
14466: } else
14467: ctxt->loadsubset = 0;
14468: if (options & XML_PARSE_DTDATTR) {
14469: ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14470: options -= XML_PARSE_DTDATTR;
14471: ctxt->options |= XML_PARSE_DTDATTR;
14472: }
14473: if (options & XML_PARSE_NOENT) {
14474: ctxt->replaceEntities = 1;
14475: /* ctxt->loadsubset |= XML_DETECT_IDS; */
14476: options -= XML_PARSE_NOENT;
14477: ctxt->options |= XML_PARSE_NOENT;
14478: } else
14479: ctxt->replaceEntities = 0;
14480: if (options & XML_PARSE_PEDANTIC) {
14481: ctxt->pedantic = 1;
14482: options -= XML_PARSE_PEDANTIC;
14483: ctxt->options |= XML_PARSE_PEDANTIC;
14484: } else
14485: ctxt->pedantic = 0;
14486: if (options & XML_PARSE_NOBLANKS) {
14487: ctxt->keepBlanks = 0;
14488: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14489: options -= XML_PARSE_NOBLANKS;
14490: ctxt->options |= XML_PARSE_NOBLANKS;
14491: } else
14492: ctxt->keepBlanks = 1;
14493: if (options & XML_PARSE_DTDVALID) {
14494: ctxt->validate = 1;
14495: if (options & XML_PARSE_NOWARNING)
14496: ctxt->vctxt.warning = NULL;
14497: if (options & XML_PARSE_NOERROR)
14498: ctxt->vctxt.error = NULL;
14499: options -= XML_PARSE_DTDVALID;
14500: ctxt->options |= XML_PARSE_DTDVALID;
14501: } else
14502: ctxt->validate = 0;
14503: if (options & XML_PARSE_NOWARNING) {
14504: ctxt->sax->warning = NULL;
14505: options -= XML_PARSE_NOWARNING;
14506: }
14507: if (options & XML_PARSE_NOERROR) {
14508: ctxt->sax->error = NULL;
14509: ctxt->sax->fatalError = NULL;
14510: options -= XML_PARSE_NOERROR;
14511: }
14512: #ifdef LIBXML_SAX1_ENABLED
14513: if (options & XML_PARSE_SAX1) {
14514: ctxt->sax->startElement = xmlSAX2StartElement;
14515: ctxt->sax->endElement = xmlSAX2EndElement;
14516: ctxt->sax->startElementNs = NULL;
14517: ctxt->sax->endElementNs = NULL;
14518: ctxt->sax->initialized = 1;
14519: options -= XML_PARSE_SAX1;
14520: ctxt->options |= XML_PARSE_SAX1;
14521: }
14522: #endif /* LIBXML_SAX1_ENABLED */
14523: if (options & XML_PARSE_NODICT) {
14524: ctxt->dictNames = 0;
14525: options -= XML_PARSE_NODICT;
14526: ctxt->options |= XML_PARSE_NODICT;
14527: } else {
14528: ctxt->dictNames = 1;
14529: }
14530: if (options & XML_PARSE_NOCDATA) {
14531: ctxt->sax->cdataBlock = NULL;
14532: options -= XML_PARSE_NOCDATA;
14533: ctxt->options |= XML_PARSE_NOCDATA;
14534: }
14535: if (options & XML_PARSE_NSCLEAN) {
14536: ctxt->options |= XML_PARSE_NSCLEAN;
14537: options -= XML_PARSE_NSCLEAN;
14538: }
14539: if (options & XML_PARSE_NONET) {
14540: ctxt->options |= XML_PARSE_NONET;
14541: options -= XML_PARSE_NONET;
14542: }
14543: if (options & XML_PARSE_COMPACT) {
14544: ctxt->options |= XML_PARSE_COMPACT;
14545: options -= XML_PARSE_COMPACT;
14546: }
14547: if (options & XML_PARSE_OLD10) {
14548: ctxt->options |= XML_PARSE_OLD10;
14549: options -= XML_PARSE_OLD10;
14550: }
14551: if (options & XML_PARSE_NOBASEFIX) {
14552: ctxt->options |= XML_PARSE_NOBASEFIX;
14553: options -= XML_PARSE_NOBASEFIX;
14554: }
14555: if (options & XML_PARSE_HUGE) {
14556: ctxt->options |= XML_PARSE_HUGE;
14557: options -= XML_PARSE_HUGE;
14558: }
14559: if (options & XML_PARSE_OLDSAX) {
14560: ctxt->options |= XML_PARSE_OLDSAX;
14561: options -= XML_PARSE_OLDSAX;
14562: }
14563: ctxt->linenumbers = 1;
14564: return (options);
14565: }
14566:
14567: /**
14568: * xmlCtxtUseOptions:
14569: * @ctxt: an XML parser context
14570: * @options: a combination of xmlParserOption
14571: *
14572: * Applies the options to the parser context
14573: *
14574: * Returns 0 in case of success, the set of unknown or unimplemented options
14575: * in case of error.
14576: */
14577: int
14578: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14579: {
14580: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14581: }
14582:
14583: /**
14584: * xmlDoRead:
14585: * @ctxt: an XML parser context
14586: * @URL: the base URL to use for the document
14587: * @encoding: the document encoding, or NULL
14588: * @options: a combination of xmlParserOption
14589: * @reuse: keep the context for reuse
14590: *
14591: * Common front-end for the xmlRead functions
14592: *
14593: * Returns the resulting document tree or NULL
14594: */
14595: static xmlDocPtr
14596: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14597: int options, int reuse)
14598: {
14599: xmlDocPtr ret;
14600:
14601: xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14602: if (encoding != NULL) {
14603: xmlCharEncodingHandlerPtr hdlr;
14604:
14605: hdlr = xmlFindCharEncodingHandler(encoding);
14606: if (hdlr != NULL)
14607: xmlSwitchToEncoding(ctxt, hdlr);
14608: }
14609: if ((URL != NULL) && (ctxt->input != NULL) &&
14610: (ctxt->input->filename == NULL))
14611: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14612: xmlParseDocument(ctxt);
14613: if ((ctxt->wellFormed) || ctxt->recovery)
14614: ret = ctxt->myDoc;
14615: else {
14616: ret = NULL;
14617: if (ctxt->myDoc != NULL) {
14618: xmlFreeDoc(ctxt->myDoc);
14619: }
14620: }
14621: ctxt->myDoc = NULL;
14622: if (!reuse) {
14623: xmlFreeParserCtxt(ctxt);
14624: }
14625:
14626: return (ret);
14627: }
14628:
14629: /**
14630: * xmlReadDoc:
14631: * @cur: a pointer to a zero terminated string
14632: * @URL: the base URL to use for the document
14633: * @encoding: the document encoding, or NULL
14634: * @options: a combination of xmlParserOption
14635: *
14636: * parse an XML in-memory document and build a tree.
14637: *
14638: * Returns the resulting document tree
14639: */
14640: xmlDocPtr
14641: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14642: {
14643: xmlParserCtxtPtr ctxt;
14644:
14645: if (cur == NULL)
14646: return (NULL);
14647:
14648: ctxt = xmlCreateDocParserCtxt(cur);
14649: if (ctxt == NULL)
14650: return (NULL);
14651: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14652: }
14653:
14654: /**
14655: * xmlReadFile:
14656: * @filename: a file or URL
14657: * @encoding: the document encoding, or NULL
14658: * @options: a combination of xmlParserOption
14659: *
14660: * parse an XML file from the filesystem or the network.
14661: *
14662: * Returns the resulting document tree
14663: */
14664: xmlDocPtr
14665: xmlReadFile(const char *filename, const char *encoding, int options)
14666: {
14667: xmlParserCtxtPtr ctxt;
14668:
14669: ctxt = xmlCreateURLParserCtxt(filename, options);
14670: if (ctxt == NULL)
14671: return (NULL);
14672: return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14673: }
14674:
14675: /**
14676: * xmlReadMemory:
14677: * @buffer: a pointer to a char array
14678: * @size: the size of the array
14679: * @URL: the base URL to use for the document
14680: * @encoding: the document encoding, or NULL
14681: * @options: a combination of xmlParserOption
14682: *
14683: * parse an XML in-memory document and build a tree.
14684: *
14685: * Returns the resulting document tree
14686: */
14687: xmlDocPtr
14688: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14689: {
14690: xmlParserCtxtPtr ctxt;
14691:
14692: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14693: if (ctxt == NULL)
14694: return (NULL);
14695: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14696: }
14697:
14698: /**
14699: * xmlReadFd:
14700: * @fd: an open file descriptor
14701: * @URL: the base URL to use for the document
14702: * @encoding: the document encoding, or NULL
14703: * @options: a combination of xmlParserOption
14704: *
14705: * parse an XML from a file descriptor and build a tree.
14706: * NOTE that the file descriptor will not be closed when the
14707: * reader is closed or reset.
14708: *
14709: * Returns the resulting document tree
14710: */
14711: xmlDocPtr
14712: xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14713: {
14714: xmlParserCtxtPtr ctxt;
14715: xmlParserInputBufferPtr input;
14716: xmlParserInputPtr stream;
14717:
14718: if (fd < 0)
14719: return (NULL);
14720:
14721: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14722: if (input == NULL)
14723: return (NULL);
14724: input->closecallback = NULL;
14725: ctxt = xmlNewParserCtxt();
14726: if (ctxt == NULL) {
14727: xmlFreeParserInputBuffer(input);
14728: return (NULL);
14729: }
14730: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14731: if (stream == NULL) {
14732: xmlFreeParserInputBuffer(input);
14733: xmlFreeParserCtxt(ctxt);
14734: return (NULL);
14735: }
14736: inputPush(ctxt, stream);
14737: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14738: }
14739:
14740: /**
14741: * xmlReadIO:
14742: * @ioread: an I/O read function
14743: * @ioclose: an I/O close function
14744: * @ioctx: an I/O handler
14745: * @URL: the base URL to use for the document
14746: * @encoding: the document encoding, or NULL
14747: * @options: a combination of xmlParserOption
14748: *
14749: * parse an XML document from I/O functions and source and build a tree.
14750: *
14751: * Returns the resulting document tree
14752: */
14753: xmlDocPtr
14754: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14755: void *ioctx, const char *URL, const char *encoding, int options)
14756: {
14757: xmlParserCtxtPtr ctxt;
14758: xmlParserInputBufferPtr input;
14759: xmlParserInputPtr stream;
14760:
14761: if (ioread == NULL)
14762: return (NULL);
14763:
14764: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14765: XML_CHAR_ENCODING_NONE);
14766: if (input == NULL)
14767: return (NULL);
14768: ctxt = xmlNewParserCtxt();
14769: if (ctxt == NULL) {
14770: xmlFreeParserInputBuffer(input);
14771: return (NULL);
14772: }
14773: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14774: if (stream == NULL) {
14775: xmlFreeParserInputBuffer(input);
14776: xmlFreeParserCtxt(ctxt);
14777: return (NULL);
14778: }
14779: inputPush(ctxt, stream);
14780: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14781: }
14782:
14783: /**
14784: * xmlCtxtReadDoc:
14785: * @ctxt: an XML parser context
14786: * @cur: a pointer to a zero terminated string
14787: * @URL: the base URL to use for the document
14788: * @encoding: the document encoding, or NULL
14789: * @options: a combination of xmlParserOption
14790: *
14791: * parse an XML in-memory document and build a tree.
14792: * This reuses the existing @ctxt parser context
14793: *
14794: * Returns the resulting document tree
14795: */
14796: xmlDocPtr
14797: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14798: const char *URL, const char *encoding, int options)
14799: {
14800: xmlParserInputPtr stream;
14801:
14802: if (cur == NULL)
14803: return (NULL);
14804: if (ctxt == NULL)
14805: return (NULL);
14806:
14807: xmlCtxtReset(ctxt);
14808:
14809: stream = xmlNewStringInputStream(ctxt, cur);
14810: if (stream == NULL) {
14811: return (NULL);
14812: }
14813: inputPush(ctxt, stream);
14814: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14815: }
14816:
14817: /**
14818: * xmlCtxtReadFile:
14819: * @ctxt: an XML parser context
14820: * @filename: a file or URL
14821: * @encoding: the document encoding, or NULL
14822: * @options: a combination of xmlParserOption
14823: *
14824: * parse an XML file from the filesystem or the network.
14825: * This reuses the existing @ctxt parser context
14826: *
14827: * Returns the resulting document tree
14828: */
14829: xmlDocPtr
14830: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14831: const char *encoding, int options)
14832: {
14833: xmlParserInputPtr stream;
14834:
14835: if (filename == NULL)
14836: return (NULL);
14837: if (ctxt == NULL)
14838: return (NULL);
14839:
14840: xmlCtxtReset(ctxt);
14841:
14842: stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14843: if (stream == NULL) {
14844: return (NULL);
14845: }
14846: inputPush(ctxt, stream);
14847: return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14848: }
14849:
14850: /**
14851: * xmlCtxtReadMemory:
14852: * @ctxt: an XML parser context
14853: * @buffer: a pointer to a char array
14854: * @size: the size of the array
14855: * @URL: the base URL to use for the document
14856: * @encoding: the document encoding, or NULL
14857: * @options: a combination of xmlParserOption
14858: *
14859: * parse an XML in-memory document and build a tree.
14860: * This reuses the existing @ctxt parser context
14861: *
14862: * Returns the resulting document tree
14863: */
14864: xmlDocPtr
14865: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14866: const char *URL, const char *encoding, int options)
14867: {
14868: xmlParserInputBufferPtr input;
14869: xmlParserInputPtr stream;
14870:
14871: if (ctxt == NULL)
14872: return (NULL);
14873: if (buffer == NULL)
14874: return (NULL);
14875:
14876: xmlCtxtReset(ctxt);
14877:
14878: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879: if (input == NULL) {
14880: return(NULL);
14881: }
14882:
14883: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884: if (stream == NULL) {
14885: xmlFreeParserInputBuffer(input);
14886: return(NULL);
14887: }
14888:
14889: inputPush(ctxt, stream);
14890: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14891: }
14892:
14893: /**
14894: * xmlCtxtReadFd:
14895: * @ctxt: an XML parser context
14896: * @fd: an open file descriptor
14897: * @URL: the base URL to use for the document
14898: * @encoding: the document encoding, or NULL
14899: * @options: a combination of xmlParserOption
14900: *
14901: * parse an XML from a file descriptor and build a tree.
14902: * This reuses the existing @ctxt parser context
14903: * NOTE that the file descriptor will not be closed when the
14904: * reader is closed or reset.
14905: *
14906: * Returns the resulting document tree
14907: */
14908: xmlDocPtr
14909: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910: const char *URL, const char *encoding, int options)
14911: {
14912: xmlParserInputBufferPtr input;
14913: xmlParserInputPtr stream;
14914:
14915: if (fd < 0)
14916: return (NULL);
14917: if (ctxt == NULL)
14918: return (NULL);
14919:
14920: xmlCtxtReset(ctxt);
14921:
14922:
14923: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14924: if (input == NULL)
14925: return (NULL);
14926: input->closecallback = NULL;
14927: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14928: if (stream == NULL) {
14929: xmlFreeParserInputBuffer(input);
14930: return (NULL);
14931: }
14932: inputPush(ctxt, stream);
14933: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14934: }
14935:
14936: /**
14937: * xmlCtxtReadIO:
14938: * @ctxt: an XML parser context
14939: * @ioread: an I/O read function
14940: * @ioclose: an I/O close function
14941: * @ioctx: an I/O handler
14942: * @URL: the base URL to use for the document
14943: * @encoding: the document encoding, or NULL
14944: * @options: a combination of xmlParserOption
14945: *
14946: * parse an XML document from I/O functions and source and build a tree.
14947: * This reuses the existing @ctxt parser context
14948: *
14949: * Returns the resulting document tree
14950: */
14951: xmlDocPtr
14952: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14953: xmlInputCloseCallback ioclose, void *ioctx,
14954: const char *URL,
14955: const char *encoding, int options)
14956: {
14957: xmlParserInputBufferPtr input;
14958: xmlParserInputPtr stream;
14959:
14960: if (ioread == NULL)
14961: return (NULL);
14962: if (ctxt == NULL)
14963: return (NULL);
14964:
14965: xmlCtxtReset(ctxt);
14966:
14967: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14968: XML_CHAR_ENCODING_NONE);
14969: if (input == NULL)
14970: return (NULL);
14971: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14972: if (stream == NULL) {
14973: xmlFreeParserInputBuffer(input);
14974: return (NULL);
14975: }
14976: inputPush(ctxt, stream);
14977: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14978: }
14979:
14980: #define bottom_parser
14981: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>