Annotation of embedaddon/libxml2/parser.c, revision 1.1.1.2
1.1 misho 1: /*
2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
4: *
5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscellaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAX callbacks or as standalone functions using a preparsed
26: * document.
27: *
28: * See Copyright for the status of this software.
29: *
30: * daniel@veillard.com
31: */
32:
33: #define IN_LIBXML
34: #include "libxml.h"
35:
36: #if defined(WIN32) && !defined (__CYGWIN__)
37: #define XML_DIR_SEP '\\'
38: #else
39: #define XML_DIR_SEP '/'
40: #endif
41:
42: #include <stdlib.h>
43: #include <string.h>
44: #include <stdarg.h>
45: #include <libxml/xmlmemory.h>
46: #include <libxml/threads.h>
47: #include <libxml/globals.h>
48: #include <libxml/tree.h>
49: #include <libxml/parser.h>
50: #include <libxml/parserInternals.h>
51: #include <libxml/valid.h>
52: #include <libxml/entities.h>
53: #include <libxml/xmlerror.h>
54: #include <libxml/encoding.h>
55: #include <libxml/xmlIO.h>
56: #include <libxml/uri.h>
57: #ifdef LIBXML_CATALOG_ENABLED
58: #include <libxml/catalog.h>
59: #endif
60: #ifdef LIBXML_SCHEMAS_ENABLED
61: #include <libxml/xmlschemastypes.h>
62: #include <libxml/relaxng.h>
63: #endif
64: #ifdef HAVE_CTYPE_H
65: #include <ctype.h>
66: #endif
67: #ifdef HAVE_STDLIB_H
68: #include <stdlib.h>
69: #endif
70: #ifdef HAVE_SYS_STAT_H
71: #include <sys/stat.h>
72: #endif
73: #ifdef HAVE_FCNTL_H
74: #include <fcntl.h>
75: #endif
76: #ifdef HAVE_UNISTD_H
77: #include <unistd.h>
78: #endif
79: #ifdef HAVE_ZLIB_H
80: #include <zlib.h>
81: #endif
1.1.1.2 ! misho 82: #ifdef HAVE_LZMA_H
! 83: #include <lzma.h>
! 84: #endif
1.1 misho 85:
86: static void
87: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
88:
89: static xmlParserCtxtPtr
90: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91: const xmlChar *base, xmlParserCtxtPtr pctx);
92:
93: /************************************************************************
94: * *
95: * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
96: * *
97: ************************************************************************/
98:
99: #define XML_PARSER_BIG_ENTITY 1000
100: #define XML_PARSER_LOT_ENTITY 5000
101:
102: /*
103: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104: * replacement over the size in byte of the input indicates that you have
105: * and eponential behaviour. A value of 10 correspond to at least 3 entity
106: * replacement per byte of input.
107: */
108: #define XML_PARSER_NON_LINEAR 10
109:
110: /*
111: * xmlParserEntityCheck
112: *
113: * Function to check non-linear entity expansion behaviour
114: * This is here to detect and stop exponential linear entity expansion
115: * This is not a limitation of the parser but a safety
116: * boundary feature. It can be disabled with the XML_PARSE_HUGE
117: * parser option.
118: */
119: static int
120: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
121: xmlEntityPtr ent)
122: {
123: unsigned long consumed = 0;
124:
125: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
126: return (0);
127: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128: return (1);
129: if (size != 0) {
130: /*
131: * Do the check based on the replacement size of the entity
132: */
133: if (size < XML_PARSER_BIG_ENTITY)
134: return(0);
135:
136: /*
137: * A limit on the amount of text data reasonably used
138: */
139: if (ctxt->input != NULL) {
140: consumed = ctxt->input->consumed +
141: (ctxt->input->cur - ctxt->input->base);
142: }
143: consumed += ctxt->sizeentities;
144:
145: if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
147: return (0);
148: } else if (ent != NULL) {
149: /*
150: * use the number of parsed entities in the replacement
151: */
152: size = ent->checked;
153:
154: /*
155: * The amount of data parsed counting entities size only once
156: */
157: if (ctxt->input != NULL) {
158: consumed = ctxt->input->consumed +
159: (ctxt->input->cur - ctxt->input->base);
160: }
161: consumed += ctxt->sizeentities;
162:
163: /*
164: * Check the density of entities for the amount of data
165: * knowing an entity reference will take at least 3 bytes
166: */
167: if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168: return (0);
169: } else {
170: /*
171: * strange we got no data for checking just return
172: */
173: return (0);
174: }
175:
176: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
177: return (1);
178: }
179:
180: /**
181: * xmlParserMaxDepth:
182: *
183: * arbitrary depth limit for the XML documents that we allow to
184: * process. This is not a limitation of the parser but a safety
185: * boundary feature. It can be disabled with the XML_PARSE_HUGE
186: * parser option.
187: */
188: unsigned int xmlParserMaxDepth = 256;
189:
190:
191:
192: #define SAX2 1
193: #define XML_PARSER_BIG_BUFFER_SIZE 300
194: #define XML_PARSER_BUFFER_SIZE 100
195: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
196:
197: /*
198: * List of XML prefixed PI allowed by W3C specs
199: */
200:
201: static const char *xmlW3CPIs[] = {
202: "xml-stylesheet",
1.1.1.2 ! misho 203: "xml-model",
1.1 misho 204: NULL
205: };
206:
207:
208: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
209: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210: const xmlChar **str);
211:
212: static xmlParserErrors
213: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214: xmlSAXHandlerPtr sax,
215: void *user_data, int depth, const xmlChar *URL,
216: const xmlChar *ID, xmlNodePtr *list);
217:
218: static int
219: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220: const char *encoding);
221: #ifdef LIBXML_LEGACY_ENABLED
222: static void
223: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224: xmlNodePtr lastNode);
225: #endif /* LIBXML_LEGACY_ENABLED */
226:
227: static xmlParserErrors
228: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229: const xmlChar *string, void *user_data, xmlNodePtr *lst);
230:
231: static int
232: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
233:
234: /************************************************************************
235: * *
236: * Some factorized error routines *
237: * *
238: ************************************************************************/
239:
240: /**
241: * xmlErrAttributeDup:
242: * @ctxt: an XML parser context
243: * @prefix: the attribute prefix
244: * @localname: the attribute localname
245: *
246: * Handle a redefinition of attribute error
247: */
248: static void
249: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250: const xmlChar * localname)
251: {
252: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253: (ctxt->instate == XML_PARSER_EOF))
254: return;
255: if (ctxt != NULL)
256: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
257:
258: if (prefix == NULL)
259: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
261: (const char *) localname, NULL, NULL, 0, 0,
262: "Attribute %s redefined\n", localname);
263: else
264: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
265: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
266: (const char *) prefix, (const char *) localname,
267: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268: localname);
269: if (ctxt != NULL) {
270: ctxt->wellFormed = 0;
271: if (ctxt->recovery == 0)
272: ctxt->disableSAX = 1;
273: }
274: }
275:
276: /**
277: * xmlFatalErr:
278: * @ctxt: an XML parser context
279: * @error: the error number
280: * @extra: extra information string
281: *
282: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283: */
284: static void
285: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
286: {
287: const char *errmsg;
288:
289: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290: (ctxt->instate == XML_PARSER_EOF))
291: return;
292: switch (error) {
293: case XML_ERR_INVALID_HEX_CHARREF:
294: errmsg = "CharRef: invalid hexadecimal value\n";
295: break;
296: case XML_ERR_INVALID_DEC_CHARREF:
297: errmsg = "CharRef: invalid decimal value\n";
298: break;
299: case XML_ERR_INVALID_CHARREF:
300: errmsg = "CharRef: invalid value\n";
301: break;
302: case XML_ERR_INTERNAL_ERROR:
303: errmsg = "internal error";
304: break;
305: case XML_ERR_PEREF_AT_EOF:
306: errmsg = "PEReference at end of document\n";
307: break;
308: case XML_ERR_PEREF_IN_PROLOG:
309: errmsg = "PEReference in prolog\n";
310: break;
311: case XML_ERR_PEREF_IN_EPILOG:
312: errmsg = "PEReference in epilog\n";
313: break;
314: case XML_ERR_PEREF_NO_NAME:
315: errmsg = "PEReference: no name\n";
316: break;
317: case XML_ERR_PEREF_SEMICOL_MISSING:
318: errmsg = "PEReference: expecting ';'\n";
319: break;
320: case XML_ERR_ENTITY_LOOP:
321: errmsg = "Detected an entity reference loop\n";
322: break;
323: case XML_ERR_ENTITY_NOT_STARTED:
324: errmsg = "EntityValue: \" or ' expected\n";
325: break;
326: case XML_ERR_ENTITY_PE_INTERNAL:
327: errmsg = "PEReferences forbidden in internal subset\n";
328: break;
329: case XML_ERR_ENTITY_NOT_FINISHED:
330: errmsg = "EntityValue: \" or ' expected\n";
331: break;
332: case XML_ERR_ATTRIBUTE_NOT_STARTED:
333: errmsg = "AttValue: \" or ' expected\n";
334: break;
335: case XML_ERR_LT_IN_ATTRIBUTE:
336: errmsg = "Unescaped '<' not allowed in attributes values\n";
337: break;
338: case XML_ERR_LITERAL_NOT_STARTED:
339: errmsg = "SystemLiteral \" or ' expected\n";
340: break;
341: case XML_ERR_LITERAL_NOT_FINISHED:
342: errmsg = "Unfinished System or Public ID \" or ' expected\n";
343: break;
344: case XML_ERR_MISPLACED_CDATA_END:
345: errmsg = "Sequence ']]>' not allowed in content\n";
346: break;
347: case XML_ERR_URI_REQUIRED:
348: errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349: break;
350: case XML_ERR_PUBID_REQUIRED:
351: errmsg = "PUBLIC, the Public Identifier is missing\n";
352: break;
353: case XML_ERR_HYPHEN_IN_COMMENT:
354: errmsg = "Comment must not contain '--' (double-hyphen)\n";
355: break;
356: case XML_ERR_PI_NOT_STARTED:
357: errmsg = "xmlParsePI : no target name\n";
358: break;
359: case XML_ERR_RESERVED_XML_NAME:
360: errmsg = "Invalid PI name\n";
361: break;
362: case XML_ERR_NOTATION_NOT_STARTED:
363: errmsg = "NOTATION: Name expected here\n";
364: break;
365: case XML_ERR_NOTATION_NOT_FINISHED:
366: errmsg = "'>' required to close NOTATION declaration\n";
367: break;
368: case XML_ERR_VALUE_REQUIRED:
369: errmsg = "Entity value required\n";
370: break;
371: case XML_ERR_URI_FRAGMENT:
372: errmsg = "Fragment not allowed";
373: break;
374: case XML_ERR_ATTLIST_NOT_STARTED:
375: errmsg = "'(' required to start ATTLIST enumeration\n";
376: break;
377: case XML_ERR_NMTOKEN_REQUIRED:
378: errmsg = "NmToken expected in ATTLIST enumeration\n";
379: break;
380: case XML_ERR_ATTLIST_NOT_FINISHED:
381: errmsg = "')' required to finish ATTLIST enumeration\n";
382: break;
383: case XML_ERR_MIXED_NOT_STARTED:
384: errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385: break;
386: case XML_ERR_PCDATA_REQUIRED:
387: errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388: break;
389: case XML_ERR_ELEMCONTENT_NOT_STARTED:
390: errmsg = "ContentDecl : Name or '(' expected\n";
391: break;
392: case XML_ERR_ELEMCONTENT_NOT_FINISHED:
393: errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394: break;
395: case XML_ERR_PEREF_IN_INT_SUBSET:
396: errmsg =
397: "PEReference: forbidden within markup decl in internal subset\n";
398: break;
399: case XML_ERR_GT_REQUIRED:
400: errmsg = "expected '>'\n";
401: break;
402: case XML_ERR_CONDSEC_INVALID:
403: errmsg = "XML conditional section '[' expected\n";
404: break;
405: case XML_ERR_EXT_SUBSET_NOT_FINISHED:
406: errmsg = "Content error in the external subset\n";
407: break;
408: case XML_ERR_CONDSEC_INVALID_KEYWORD:
409: errmsg =
410: "conditional section INCLUDE or IGNORE keyword expected\n";
411: break;
412: case XML_ERR_CONDSEC_NOT_FINISHED:
413: errmsg = "XML conditional section not closed\n";
414: break;
415: case XML_ERR_XMLDECL_NOT_STARTED:
416: errmsg = "Text declaration '<?xml' required\n";
417: break;
418: case XML_ERR_XMLDECL_NOT_FINISHED:
419: errmsg = "parsing XML declaration: '?>' expected\n";
420: break;
421: case XML_ERR_EXT_ENTITY_STANDALONE:
422: errmsg = "external parsed entities cannot be standalone\n";
423: break;
424: case XML_ERR_ENTITYREF_SEMICOL_MISSING:
425: errmsg = "EntityRef: expecting ';'\n";
426: break;
427: case XML_ERR_DOCTYPE_NOT_FINISHED:
428: errmsg = "DOCTYPE improperly terminated\n";
429: break;
430: case XML_ERR_LTSLASH_REQUIRED:
431: errmsg = "EndTag: '</' not found\n";
432: break;
433: case XML_ERR_EQUAL_REQUIRED:
434: errmsg = "expected '='\n";
435: break;
436: case XML_ERR_STRING_NOT_CLOSED:
437: errmsg = "String not closed expecting \" or '\n";
438: break;
439: case XML_ERR_STRING_NOT_STARTED:
440: errmsg = "String not started expecting ' or \"\n";
441: break;
442: case XML_ERR_ENCODING_NAME:
443: errmsg = "Invalid XML encoding name\n";
444: break;
445: case XML_ERR_STANDALONE_VALUE:
446: errmsg = "standalone accepts only 'yes' or 'no'\n";
447: break;
448: case XML_ERR_DOCUMENT_EMPTY:
449: errmsg = "Document is empty\n";
450: break;
451: case XML_ERR_DOCUMENT_END:
452: errmsg = "Extra content at the end of the document\n";
453: break;
454: case XML_ERR_NOT_WELL_BALANCED:
455: errmsg = "chunk is not well balanced\n";
456: break;
457: case XML_ERR_EXTRA_CONTENT:
458: errmsg = "extra content at the end of well balanced chunk\n";
459: break;
460: case XML_ERR_VERSION_MISSING:
461: errmsg = "Malformed declaration expecting version\n";
462: break;
463: #if 0
464: case:
465: errmsg = "\n";
466: break;
467: #endif
468: default:
469: errmsg = "Unregistered error message\n";
470: }
471: if (ctxt != NULL)
472: ctxt->errNo = error;
473: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
474: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475: info);
476: if (ctxt != NULL) {
477: ctxt->wellFormed = 0;
478: if (ctxt->recovery == 0)
479: ctxt->disableSAX = 1;
480: }
481: }
482:
483: /**
484: * xmlFatalErrMsg:
485: * @ctxt: an XML parser context
486: * @error: the error number
487: * @msg: the error message
488: *
489: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490: */
491: static void
492: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493: const char *msg)
494: {
495: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496: (ctxt->instate == XML_PARSER_EOF))
497: return;
498: if (ctxt != NULL)
499: ctxt->errNo = error;
500: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
501: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
502: if (ctxt != NULL) {
503: ctxt->wellFormed = 0;
504: if (ctxt->recovery == 0)
505: ctxt->disableSAX = 1;
506: }
507: }
508:
509: /**
510: * xmlWarningMsg:
511: * @ctxt: an XML parser context
512: * @error: the error number
513: * @msg: the error message
514: * @str1: extra data
515: * @str2: extra data
516: *
517: * Handle a warning.
518: */
519: static void
520: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521: const char *msg, const xmlChar *str1, const xmlChar *str2)
522: {
523: xmlStructuredErrorFunc schannel = NULL;
524:
525: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526: (ctxt->instate == XML_PARSER_EOF))
527: return;
528: if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529: (ctxt->sax->initialized == XML_SAX2_MAGIC))
530: schannel = ctxt->sax->serror;
531: if (ctxt != NULL) {
532: __xmlRaiseError(schannel,
533: (ctxt->sax) ? ctxt->sax->warning : NULL,
534: ctxt->userData,
535: ctxt, NULL, XML_FROM_PARSER, error,
536: XML_ERR_WARNING, NULL, 0,
537: (const char *) str1, (const char *) str2, NULL, 0, 0,
538: msg, (const char *) str1, (const char *) str2);
539: } else {
540: __xmlRaiseError(schannel, NULL, NULL,
541: ctxt, NULL, XML_FROM_PARSER, error,
542: XML_ERR_WARNING, NULL, 0,
543: (const char *) str1, (const char *) str2, NULL, 0, 0,
544: msg, (const char *) str1, (const char *) str2);
545: }
546: }
547:
548: /**
549: * xmlValidityError:
550: * @ctxt: an XML parser context
551: * @error: the error number
552: * @msg: the error message
553: * @str1: extra data
554: *
555: * Handle a validity error.
556: */
557: static void
558: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
559: const char *msg, const xmlChar *str1, const xmlChar *str2)
560: {
561: xmlStructuredErrorFunc schannel = NULL;
562:
563: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564: (ctxt->instate == XML_PARSER_EOF))
565: return;
566: if (ctxt != NULL) {
567: ctxt->errNo = error;
568: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569: schannel = ctxt->sax->serror;
570: }
571: if (ctxt != NULL) {
572: __xmlRaiseError(schannel,
573: ctxt->vctxt.error, ctxt->vctxt.userData,
574: ctxt, NULL, XML_FROM_DTD, error,
575: XML_ERR_ERROR, NULL, 0, (const char *) str1,
576: (const char *) str2, NULL, 0, 0,
577: msg, (const char *) str1, (const char *) str2);
578: ctxt->valid = 0;
579: } else {
580: __xmlRaiseError(schannel, NULL, NULL,
581: ctxt, NULL, XML_FROM_DTD, error,
582: XML_ERR_ERROR, NULL, 0, (const char *) str1,
583: (const char *) str2, NULL, 0, 0,
584: msg, (const char *) str1, (const char *) str2);
585: }
586: }
587:
588: /**
589: * xmlFatalErrMsgInt:
590: * @ctxt: an XML parser context
591: * @error: the error number
592: * @msg: the error message
593: * @val: an integer value
594: *
595: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596: */
597: static void
598: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599: const char *msg, int val)
600: {
601: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602: (ctxt->instate == XML_PARSER_EOF))
603: return;
604: if (ctxt != NULL)
605: ctxt->errNo = error;
606: __xmlRaiseError(NULL, NULL, NULL,
607: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
609: if (ctxt != NULL) {
610: ctxt->wellFormed = 0;
611: if (ctxt->recovery == 0)
612: ctxt->disableSAX = 1;
613: }
614: }
615:
616: /**
617: * xmlFatalErrMsgStrIntStr:
618: * @ctxt: an XML parser context
619: * @error: the error number
620: * @msg: the error message
621: * @str1: an string info
622: * @val: an integer value
623: * @str2: an string info
624: *
625: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626: */
627: static void
628: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629: const char *msg, const xmlChar *str1, int val,
630: const xmlChar *str2)
631: {
632: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633: (ctxt->instate == XML_PARSER_EOF))
634: return;
635: if (ctxt != NULL)
636: ctxt->errNo = error;
637: __xmlRaiseError(NULL, NULL, NULL,
638: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639: NULL, 0, (const char *) str1, (const char *) str2,
640: NULL, val, 0, msg, str1, val, str2);
641: if (ctxt != NULL) {
642: ctxt->wellFormed = 0;
643: if (ctxt->recovery == 0)
644: ctxt->disableSAX = 1;
645: }
646: }
647:
648: /**
649: * xmlFatalErrMsgStr:
650: * @ctxt: an XML parser context
651: * @error: the error number
652: * @msg: the error message
653: * @val: a string value
654: *
655: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
656: */
657: static void
658: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659: const char *msg, const xmlChar * val)
660: {
661: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662: (ctxt->instate == XML_PARSER_EOF))
663: return;
664: if (ctxt != NULL)
665: ctxt->errNo = error;
666: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
667: XML_FROM_PARSER, error, XML_ERR_FATAL,
668: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
669: val);
670: if (ctxt != NULL) {
671: ctxt->wellFormed = 0;
672: if (ctxt->recovery == 0)
673: ctxt->disableSAX = 1;
674: }
675: }
676:
677: /**
678: * xmlErrMsgStr:
679: * @ctxt: an XML parser context
680: * @error: the error number
681: * @msg: the error message
682: * @val: a string value
683: *
684: * Handle a non fatal parser error
685: */
686: static void
687: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688: const char *msg, const xmlChar * val)
689: {
690: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691: (ctxt->instate == XML_PARSER_EOF))
692: return;
693: if (ctxt != NULL)
694: ctxt->errNo = error;
695: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
696: XML_FROM_PARSER, error, XML_ERR_ERROR,
697: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
698: val);
699: }
700:
701: /**
702: * xmlNsErr:
703: * @ctxt: an XML parser context
704: * @error: the error number
705: * @msg: the message
706: * @info1: extra information string
707: * @info2: extra information string
708: *
709: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
710: */
711: static void
712: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
713: const char *msg,
714: const xmlChar * info1, const xmlChar * info2,
715: const xmlChar * info3)
716: {
717: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718: (ctxt->instate == XML_PARSER_EOF))
719: return;
720: if (ctxt != NULL)
721: ctxt->errNo = error;
722: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
723: XML_ERR_ERROR, NULL, 0, (const char *) info1,
724: (const char *) info2, (const char *) info3, 0, 0, msg,
725: info1, info2, info3);
726: if (ctxt != NULL)
727: ctxt->nsWellFormed = 0;
728: }
729:
730: /**
731: * xmlNsWarn
732: * @ctxt: an XML parser context
733: * @error: the error number
734: * @msg: the message
735: * @info1: extra information string
736: * @info2: extra information string
737: *
1.1.1.2 ! misho 738: * Handle a namespace warning error
1.1 misho 739: */
740: static void
741: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742: const char *msg,
743: const xmlChar * info1, const xmlChar * info2,
744: const xmlChar * info3)
745: {
746: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747: (ctxt->instate == XML_PARSER_EOF))
748: return;
749: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750: XML_ERR_WARNING, NULL, 0, (const char *) info1,
751: (const char *) info2, (const char *) info3, 0, 0, msg,
752: info1, info2, info3);
753: }
754:
755: /************************************************************************
756: * *
757: * Library wide options *
758: * *
759: ************************************************************************/
760:
761: /**
762: * xmlHasFeature:
763: * @feature: the feature to be examined
764: *
765: * Examines if the library has been compiled with a given feature.
766: *
767: * Returns a non-zero value if the feature exist, otherwise zero.
768: * Returns zero (0) if the feature does not exist or an unknown
769: * unknown feature is requested, non-zero otherwise.
770: */
771: int
772: xmlHasFeature(xmlFeature feature)
773: {
774: switch (feature) {
775: case XML_WITH_THREAD:
776: #ifdef LIBXML_THREAD_ENABLED
777: return(1);
778: #else
779: return(0);
780: #endif
781: case XML_WITH_TREE:
782: #ifdef LIBXML_TREE_ENABLED
783: return(1);
784: #else
785: return(0);
786: #endif
787: case XML_WITH_OUTPUT:
788: #ifdef LIBXML_OUTPUT_ENABLED
789: return(1);
790: #else
791: return(0);
792: #endif
793: case XML_WITH_PUSH:
794: #ifdef LIBXML_PUSH_ENABLED
795: return(1);
796: #else
797: return(0);
798: #endif
799: case XML_WITH_READER:
800: #ifdef LIBXML_READER_ENABLED
801: return(1);
802: #else
803: return(0);
804: #endif
805: case XML_WITH_PATTERN:
806: #ifdef LIBXML_PATTERN_ENABLED
807: return(1);
808: #else
809: return(0);
810: #endif
811: case XML_WITH_WRITER:
812: #ifdef LIBXML_WRITER_ENABLED
813: return(1);
814: #else
815: return(0);
816: #endif
817: case XML_WITH_SAX1:
818: #ifdef LIBXML_SAX1_ENABLED
819: return(1);
820: #else
821: return(0);
822: #endif
823: case XML_WITH_FTP:
824: #ifdef LIBXML_FTP_ENABLED
825: return(1);
826: #else
827: return(0);
828: #endif
829: case XML_WITH_HTTP:
830: #ifdef LIBXML_HTTP_ENABLED
831: return(1);
832: #else
833: return(0);
834: #endif
835: case XML_WITH_VALID:
836: #ifdef LIBXML_VALID_ENABLED
837: return(1);
838: #else
839: return(0);
840: #endif
841: case XML_WITH_HTML:
842: #ifdef LIBXML_HTML_ENABLED
843: return(1);
844: #else
845: return(0);
846: #endif
847: case XML_WITH_LEGACY:
848: #ifdef LIBXML_LEGACY_ENABLED
849: return(1);
850: #else
851: return(0);
852: #endif
853: case XML_WITH_C14N:
854: #ifdef LIBXML_C14N_ENABLED
855: return(1);
856: #else
857: return(0);
858: #endif
859: case XML_WITH_CATALOG:
860: #ifdef LIBXML_CATALOG_ENABLED
861: return(1);
862: #else
863: return(0);
864: #endif
865: case XML_WITH_XPATH:
866: #ifdef LIBXML_XPATH_ENABLED
867: return(1);
868: #else
869: return(0);
870: #endif
871: case XML_WITH_XPTR:
872: #ifdef LIBXML_XPTR_ENABLED
873: return(1);
874: #else
875: return(0);
876: #endif
877: case XML_WITH_XINCLUDE:
878: #ifdef LIBXML_XINCLUDE_ENABLED
879: return(1);
880: #else
881: return(0);
882: #endif
883: case XML_WITH_ICONV:
884: #ifdef LIBXML_ICONV_ENABLED
885: return(1);
886: #else
887: return(0);
888: #endif
889: case XML_WITH_ISO8859X:
890: #ifdef LIBXML_ISO8859X_ENABLED
891: return(1);
892: #else
893: return(0);
894: #endif
895: case XML_WITH_UNICODE:
896: #ifdef LIBXML_UNICODE_ENABLED
897: return(1);
898: #else
899: return(0);
900: #endif
901: case XML_WITH_REGEXP:
902: #ifdef LIBXML_REGEXP_ENABLED
903: return(1);
904: #else
905: return(0);
906: #endif
907: case XML_WITH_AUTOMATA:
908: #ifdef LIBXML_AUTOMATA_ENABLED
909: return(1);
910: #else
911: return(0);
912: #endif
913: case XML_WITH_EXPR:
914: #ifdef LIBXML_EXPR_ENABLED
915: return(1);
916: #else
917: return(0);
918: #endif
919: case XML_WITH_SCHEMAS:
920: #ifdef LIBXML_SCHEMAS_ENABLED
921: return(1);
922: #else
923: return(0);
924: #endif
925: case XML_WITH_SCHEMATRON:
926: #ifdef LIBXML_SCHEMATRON_ENABLED
927: return(1);
928: #else
929: return(0);
930: #endif
931: case XML_WITH_MODULES:
932: #ifdef LIBXML_MODULES_ENABLED
933: return(1);
934: #else
935: return(0);
936: #endif
937: case XML_WITH_DEBUG:
938: #ifdef LIBXML_DEBUG_ENABLED
939: return(1);
940: #else
941: return(0);
942: #endif
943: case XML_WITH_DEBUG_MEM:
944: #ifdef DEBUG_MEMORY_LOCATION
945: return(1);
946: #else
947: return(0);
948: #endif
949: case XML_WITH_DEBUG_RUN:
950: #ifdef LIBXML_DEBUG_RUNTIME
951: return(1);
952: #else
953: return(0);
954: #endif
955: case XML_WITH_ZLIB:
956: #ifdef LIBXML_ZLIB_ENABLED
957: return(1);
958: #else
959: return(0);
960: #endif
1.1.1.2 ! misho 961: case XML_WITH_LZMA:
! 962: #ifdef LIBXML_LZMA_ENABLED
! 963: return(1);
! 964: #else
! 965: return(0);
! 966: #endif
1.1 misho 967: case XML_WITH_ICU:
968: #ifdef LIBXML_ICU_ENABLED
969: return(1);
970: #else
971: return(0);
972: #endif
973: default:
974: break;
975: }
976: return(0);
977: }
978:
979: /************************************************************************
980: * *
981: * SAX2 defaulted attributes handling *
982: * *
983: ************************************************************************/
984:
985: /**
986: * xmlDetectSAX2:
987: * @ctxt: an XML parser context
988: *
989: * Do the SAX2 detection and specific intialization
990: */
991: static void
992: xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993: if (ctxt == NULL) return;
994: #ifdef LIBXML_SAX1_ENABLED
995: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996: ((ctxt->sax->startElementNs != NULL) ||
997: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
998: #else
999: ctxt->sax2 = 1;
1000: #endif /* LIBXML_SAX1_ENABLED */
1001:
1002: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1005: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006: (ctxt->str_xml_ns == NULL)) {
1007: xmlErrMemory(ctxt, NULL);
1008: }
1009: }
1010:
1011: typedef struct _xmlDefAttrs xmlDefAttrs;
1012: typedef xmlDefAttrs *xmlDefAttrsPtr;
1013: struct _xmlDefAttrs {
1014: int nbAttrs; /* number of defaulted attributes on that element */
1015: int maxAttrs; /* the size of the array */
1016: const xmlChar *values[5]; /* array of localname/prefix/values/external */
1017: };
1018:
1019: /**
1020: * xmlAttrNormalizeSpace:
1021: * @src: the source string
1022: * @dst: the target string
1023: *
1024: * Normalize the space in non CDATA attribute values:
1025: * If the attribute type is not CDATA, then the XML processor MUST further
1026: * process the normalized attribute value by discarding any leading and
1027: * trailing space (#x20) characters, and by replacing sequences of space
1028: * (#x20) characters by a single space (#x20) character.
1029: * Note that the size of dst need to be at least src, and if one doesn't need
1030: * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031: * passing src as dst is just fine.
1032: *
1033: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034: * is needed.
1035: */
1036: static xmlChar *
1037: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1038: {
1039: if ((src == NULL) || (dst == NULL))
1040: return(NULL);
1041:
1042: while (*src == 0x20) src++;
1043: while (*src != 0) {
1044: if (*src == 0x20) {
1045: while (*src == 0x20) src++;
1046: if (*src != 0)
1047: *dst++ = 0x20;
1048: } else {
1049: *dst++ = *src++;
1050: }
1051: }
1052: *dst = 0;
1053: if (dst == src)
1054: return(NULL);
1055: return(dst);
1056: }
1057:
1058: /**
1059: * xmlAttrNormalizeSpace2:
1060: * @src: the source string
1061: *
1062: * Normalize the space in non CDATA attribute values, a slightly more complex
1063: * front end to avoid allocation problems when running on attribute values
1064: * coming from the input.
1065: *
1066: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1067: * is needed.
1068: */
1069: static const xmlChar *
1070: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1071: {
1072: int i;
1073: int remove_head = 0;
1074: int need_realloc = 0;
1075: const xmlChar *cur;
1076:
1077: if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1078: return(NULL);
1079: i = *len;
1080: if (i <= 0)
1081: return(NULL);
1082:
1083: cur = src;
1084: while (*cur == 0x20) {
1085: cur++;
1086: remove_head++;
1087: }
1088: while (*cur != 0) {
1089: if (*cur == 0x20) {
1090: cur++;
1091: if ((*cur == 0x20) || (*cur == 0)) {
1092: need_realloc = 1;
1093: break;
1094: }
1095: } else
1096: cur++;
1097: }
1098: if (need_realloc) {
1099: xmlChar *ret;
1100:
1101: ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1102: if (ret == NULL) {
1103: xmlErrMemory(ctxt, NULL);
1104: return(NULL);
1105: }
1106: xmlAttrNormalizeSpace(ret, ret);
1107: *len = (int) strlen((const char *)ret);
1108: return(ret);
1109: } else if (remove_head) {
1110: *len -= remove_head;
1111: memmove(src, src + remove_head, 1 + *len);
1112: return(src);
1113: }
1114: return(NULL);
1115: }
1116:
1117: /**
1118: * xmlAddDefAttrs:
1119: * @ctxt: an XML parser context
1120: * @fullname: the element fullname
1121: * @fullattr: the attribute fullname
1122: * @value: the attribute value
1123: *
1124: * Add a defaulted attribute for an element
1125: */
1126: static void
1127: xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128: const xmlChar *fullname,
1129: const xmlChar *fullattr,
1130: const xmlChar *value) {
1131: xmlDefAttrsPtr defaults;
1132: int len;
1133: const xmlChar *name;
1134: const xmlChar *prefix;
1135:
1136: /*
1137: * Allows to detect attribute redefinitions
1138: */
1139: if (ctxt->attsSpecial != NULL) {
1140: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1141: return;
1142: }
1143:
1144: if (ctxt->attsDefault == NULL) {
1145: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1146: if (ctxt->attsDefault == NULL)
1147: goto mem_error;
1148: }
1149:
1150: /*
1151: * split the element name into prefix:localname , the string found
1152: * are within the DTD and then not associated to namespace names.
1153: */
1154: name = xmlSplitQName3(fullname, &len);
1155: if (name == NULL) {
1156: name = xmlDictLookup(ctxt->dict, fullname, -1);
1157: prefix = NULL;
1158: } else {
1159: name = xmlDictLookup(ctxt->dict, name, -1);
1160: prefix = xmlDictLookup(ctxt->dict, fullname, len);
1161: }
1162:
1163: /*
1164: * make sure there is some storage
1165: */
1166: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167: if (defaults == NULL) {
1168: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1169: (4 * 5) * sizeof(const xmlChar *));
1170: if (defaults == NULL)
1171: goto mem_error;
1172: defaults->nbAttrs = 0;
1173: defaults->maxAttrs = 4;
1174: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175: defaults, NULL) < 0) {
1176: xmlFree(defaults);
1177: goto mem_error;
1178: }
1179: } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1180: xmlDefAttrsPtr temp;
1181:
1182: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1183: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1184: if (temp == NULL)
1185: goto mem_error;
1186: defaults = temp;
1187: defaults->maxAttrs *= 2;
1188: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189: defaults, NULL) < 0) {
1190: xmlFree(defaults);
1191: goto mem_error;
1192: }
1193: }
1194:
1195: /*
1196: * Split the element name into prefix:localname , the string found
1197: * are within the DTD and hen not associated to namespace names.
1198: */
1199: name = xmlSplitQName3(fullattr, &len);
1200: if (name == NULL) {
1201: name = xmlDictLookup(ctxt->dict, fullattr, -1);
1202: prefix = NULL;
1203: } else {
1204: name = xmlDictLookup(ctxt->dict, name, -1);
1205: prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1206: }
1207:
1208: defaults->values[5 * defaults->nbAttrs] = name;
1209: defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1210: /* intern the string and precompute the end */
1211: len = xmlStrlen(value);
1212: value = xmlDictLookup(ctxt->dict, value, len);
1213: defaults->values[5 * defaults->nbAttrs + 2] = value;
1214: defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1215: if (ctxt->external)
1216: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1217: else
1218: defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1219: defaults->nbAttrs++;
1220:
1221: return;
1222:
1223: mem_error:
1224: xmlErrMemory(ctxt, NULL);
1225: return;
1226: }
1227:
1228: /**
1229: * xmlAddSpecialAttr:
1230: * @ctxt: an XML parser context
1231: * @fullname: the element fullname
1232: * @fullattr: the attribute fullname
1233: * @type: the attribute type
1234: *
1235: * Register this attribute type
1236: */
1237: static void
1238: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239: const xmlChar *fullname,
1240: const xmlChar *fullattr,
1241: int type)
1242: {
1243: if (ctxt->attsSpecial == NULL) {
1244: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1245: if (ctxt->attsSpecial == NULL)
1246: goto mem_error;
1247: }
1248:
1249: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1250: return;
1251:
1252: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253: (void *) (long) type);
1254: return;
1255:
1256: mem_error:
1257: xmlErrMemory(ctxt, NULL);
1258: return;
1259: }
1260:
1261: /**
1262: * xmlCleanSpecialAttrCallback:
1263: *
1264: * Removes CDATA attributes from the special attribute table
1265: */
1266: static void
1267: xmlCleanSpecialAttrCallback(void *payload, void *data,
1268: const xmlChar *fullname, const xmlChar *fullattr,
1269: const xmlChar *unused ATTRIBUTE_UNUSED) {
1270: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1271:
1272: if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1273: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1274: }
1275: }
1276:
1277: /**
1278: * xmlCleanSpecialAttr:
1279: * @ctxt: an XML parser context
1280: *
1281: * Trim the list of attributes defined to remove all those of type
1282: * CDATA as they are not special. This call should be done when finishing
1283: * to parse the DTD and before starting to parse the document root.
1284: */
1285: static void
1286: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1287: {
1288: if (ctxt->attsSpecial == NULL)
1289: return;
1290:
1291: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1292:
1293: if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294: xmlHashFree(ctxt->attsSpecial, NULL);
1295: ctxt->attsSpecial = NULL;
1296: }
1297: return;
1298: }
1299:
1300: /**
1301: * xmlCheckLanguageID:
1302: * @lang: pointer to the string value
1303: *
1304: * Checks that the value conforms to the LanguageID production:
1305: *
1306: * NOTE: this is somewhat deprecated, those productions were removed from
1307: * the XML Second edition.
1308: *
1309: * [33] LanguageID ::= Langcode ('-' Subcode)*
1310: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1311: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314: * [38] Subcode ::= ([a-z] | [A-Z])+
1315: *
1316: * The current REC reference the sucessors of RFC 1766, currently 5646
1317: *
1318: * http://www.rfc-editor.org/rfc/rfc5646.txt
1319: * langtag = language
1320: * ["-" script]
1321: * ["-" region]
1322: * *("-" variant)
1323: * *("-" extension)
1324: * ["-" privateuse]
1325: * language = 2*3ALPHA ; shortest ISO 639 code
1326: * ["-" extlang] ; sometimes followed by
1327: * ; extended language subtags
1328: * / 4ALPHA ; or reserved for future use
1329: * / 5*8ALPHA ; or registered language subtag
1330: *
1331: * extlang = 3ALPHA ; selected ISO 639 codes
1332: * *2("-" 3ALPHA) ; permanently reserved
1333: *
1334: * script = 4ALPHA ; ISO 15924 code
1335: *
1336: * region = 2ALPHA ; ISO 3166-1 code
1337: * / 3DIGIT ; UN M.49 code
1338: *
1339: * variant = 5*8alphanum ; registered variants
1340: * / (DIGIT 3alphanum)
1341: *
1342: * extension = singleton 1*("-" (2*8alphanum))
1343: *
1344: * ; Single alphanumerics
1345: * ; "x" reserved for private use
1346: * singleton = DIGIT ; 0 - 9
1347: * / %x41-57 ; A - W
1348: * / %x59-5A ; Y - Z
1349: * / %x61-77 ; a - w
1350: * / %x79-7A ; y - z
1351: *
1352: * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353: * The parser below doesn't try to cope with extension or privateuse
1354: * that could be added but that's not interoperable anyway
1355: *
1356: * Returns 1 if correct 0 otherwise
1357: **/
1358: int
1359: xmlCheckLanguageID(const xmlChar * lang)
1360: {
1361: const xmlChar *cur = lang, *nxt;
1362:
1363: if (cur == NULL)
1364: return (0);
1365: if (((cur[0] == 'i') && (cur[1] == '-')) ||
1366: ((cur[0] == 'I') && (cur[1] == '-')) ||
1367: ((cur[0] == 'x') && (cur[1] == '-')) ||
1368: ((cur[0] == 'X') && (cur[1] == '-'))) {
1369: /*
1370: * Still allow IANA code and user code which were coming
1371: * from the previous version of the XML-1.0 specification
1372: * it's deprecated but we should not fail
1373: */
1374: cur += 2;
1375: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1376: ((cur[0] >= 'a') && (cur[0] <= 'z')))
1377: cur++;
1378: return(cur[0] == 0);
1379: }
1380: nxt = cur;
1381: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1383: nxt++;
1384: if (nxt - cur >= 4) {
1385: /*
1386: * Reserved
1387: */
1388: if ((nxt - cur > 8) || (nxt[0] != 0))
1389: return(0);
1390: return(1);
1391: }
1392: if (nxt - cur < 2)
1393: return(0);
1394: /* we got an ISO 639 code */
1395: if (nxt[0] == 0)
1396: return(1);
1397: if (nxt[0] != '-')
1398: return(0);
1399:
1400: nxt++;
1401: cur = nxt;
1402: /* now we can have extlang or script or region or variant */
1403: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1404: goto region_m49;
1405:
1406: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1408: nxt++;
1409: if (nxt - cur == 4)
1410: goto script;
1411: if (nxt - cur == 2)
1412: goto region;
1413: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1414: goto variant;
1415: if (nxt - cur != 3)
1416: return(0);
1417: /* we parsed an extlang */
1418: if (nxt[0] == 0)
1419: return(1);
1420: if (nxt[0] != '-')
1421: return(0);
1422:
1423: nxt++;
1424: cur = nxt;
1425: /* now we can have script or region or variant */
1426: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427: goto region_m49;
1428:
1429: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431: nxt++;
1432: if (nxt - cur == 2)
1433: goto region;
1434: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1435: goto variant;
1436: if (nxt - cur != 4)
1437: return(0);
1438: /* we parsed a script */
1439: script:
1440: if (nxt[0] == 0)
1441: return(1);
1442: if (nxt[0] != '-')
1443: return(0);
1444:
1445: nxt++;
1446: cur = nxt;
1447: /* now we can have region or variant */
1448: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1449: goto region_m49;
1450:
1451: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1453: nxt++;
1454:
1455: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456: goto variant;
1457: if (nxt - cur != 2)
1458: return(0);
1459: /* we parsed a region */
1460: region:
1461: if (nxt[0] == 0)
1462: return(1);
1463: if (nxt[0] != '-')
1464: return(0);
1465:
1466: nxt++;
1467: cur = nxt;
1468: /* now we can just have a variant */
1469: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471: nxt++;
1472:
1473: if ((nxt - cur < 5) || (nxt - cur > 8))
1474: return(0);
1475:
1476: /* we parsed a variant */
1477: variant:
1478: if (nxt[0] == 0)
1479: return(1);
1480: if (nxt[0] != '-')
1481: return(0);
1482: /* extensions and private use subtags not checked */
1483: return (1);
1484:
1485: region_m49:
1486: if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487: ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1488: nxt += 3;
1489: goto region;
1490: }
1491: return(0);
1492: }
1493:
1494: /************************************************************************
1495: * *
1496: * Parser stacks related functions and macros *
1497: * *
1498: ************************************************************************/
1499:
1500: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501: const xmlChar ** str);
1502:
1503: #ifdef SAX2
1504: /**
1505: * nsPush:
1506: * @ctxt: an XML parser context
1507: * @prefix: the namespace prefix or NULL
1508: * @URL: the namespace name
1509: *
1510: * Pushes a new parser namespace on top of the ns stack
1511: *
1512: * Returns -1 in case of error, -2 if the namespace should be discarded
1513: * and the index in the stack otherwise.
1514: */
1515: static int
1516: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1517: {
1518: if (ctxt->options & XML_PARSE_NSCLEAN) {
1519: int i;
1520: for (i = 0;i < ctxt->nsNr;i += 2) {
1521: if (ctxt->nsTab[i] == prefix) {
1522: /* in scope */
1523: if (ctxt->nsTab[i + 1] == URL)
1524: return(-2);
1525: /* out of scope keep it */
1526: break;
1527: }
1528: }
1529: }
1530: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1531: ctxt->nsMax = 10;
1532: ctxt->nsNr = 0;
1533: ctxt->nsTab = (const xmlChar **)
1534: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535: if (ctxt->nsTab == NULL) {
1536: xmlErrMemory(ctxt, NULL);
1537: ctxt->nsMax = 0;
1538: return (-1);
1539: }
1540: } else if (ctxt->nsNr >= ctxt->nsMax) {
1541: const xmlChar ** tmp;
1542: ctxt->nsMax *= 2;
1543: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544: ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1545: if (tmp == NULL) {
1546: xmlErrMemory(ctxt, NULL);
1547: ctxt->nsMax /= 2;
1548: return (-1);
1549: }
1550: ctxt->nsTab = tmp;
1551: }
1552: ctxt->nsTab[ctxt->nsNr++] = prefix;
1553: ctxt->nsTab[ctxt->nsNr++] = URL;
1554: return (ctxt->nsNr);
1555: }
1556: /**
1557: * nsPop:
1558: * @ctxt: an XML parser context
1559: * @nr: the number to pop
1560: *
1561: * Pops the top @nr parser prefix/namespace from the ns stack
1562: *
1563: * Returns the number of namespaces removed
1564: */
1565: static int
1566: nsPop(xmlParserCtxtPtr ctxt, int nr)
1567: {
1568: int i;
1569:
1570: if (ctxt->nsTab == NULL) return(0);
1571: if (ctxt->nsNr < nr) {
1572: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1573: nr = ctxt->nsNr;
1574: }
1575: if (ctxt->nsNr <= 0)
1576: return (0);
1577:
1578: for (i = 0;i < nr;i++) {
1579: ctxt->nsNr--;
1580: ctxt->nsTab[ctxt->nsNr] = NULL;
1581: }
1582: return(nr);
1583: }
1584: #endif
1585:
1586: static int
1587: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588: const xmlChar **atts;
1589: int *attallocs;
1590: int maxatts;
1591:
1592: if (ctxt->atts == NULL) {
1593: maxatts = 55; /* allow for 10 attrs by default */
1594: atts = (const xmlChar **)
1595: xmlMalloc(maxatts * sizeof(xmlChar *));
1596: if (atts == NULL) goto mem_error;
1597: ctxt->atts = atts;
1598: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599: if (attallocs == NULL) goto mem_error;
1600: ctxt->attallocs = attallocs;
1601: ctxt->maxatts = maxatts;
1602: } else if (nr + 5 > ctxt->maxatts) {
1603: maxatts = (nr + 5) * 2;
1604: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605: maxatts * sizeof(const xmlChar *));
1606: if (atts == NULL) goto mem_error;
1607: ctxt->atts = atts;
1608: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609: (maxatts / 5) * sizeof(int));
1610: if (attallocs == NULL) goto mem_error;
1611: ctxt->attallocs = attallocs;
1612: ctxt->maxatts = maxatts;
1613: }
1614: return(ctxt->maxatts);
1615: mem_error:
1616: xmlErrMemory(ctxt, NULL);
1617: return(-1);
1618: }
1619:
1620: /**
1621: * inputPush:
1622: * @ctxt: an XML parser context
1623: * @value: the parser input
1624: *
1625: * Pushes a new parser input on top of the input stack
1626: *
1627: * Returns -1 in case of error, the index in the stack otherwise
1628: */
1629: int
1630: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1631: {
1632: if ((ctxt == NULL) || (value == NULL))
1633: return(-1);
1634: if (ctxt->inputNr >= ctxt->inputMax) {
1635: ctxt->inputMax *= 2;
1636: ctxt->inputTab =
1637: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1638: ctxt->inputMax *
1639: sizeof(ctxt->inputTab[0]));
1640: if (ctxt->inputTab == NULL) {
1641: xmlErrMemory(ctxt, NULL);
1642: xmlFreeInputStream(value);
1643: ctxt->inputMax /= 2;
1644: value = NULL;
1645: return (-1);
1646: }
1647: }
1648: ctxt->inputTab[ctxt->inputNr] = value;
1649: ctxt->input = value;
1650: return (ctxt->inputNr++);
1651: }
1652: /**
1653: * inputPop:
1654: * @ctxt: an XML parser context
1655: *
1656: * Pops the top parser input from the input stack
1657: *
1658: * Returns the input just removed
1659: */
1660: xmlParserInputPtr
1661: inputPop(xmlParserCtxtPtr ctxt)
1662: {
1663: xmlParserInputPtr ret;
1664:
1665: if (ctxt == NULL)
1666: return(NULL);
1667: if (ctxt->inputNr <= 0)
1668: return (NULL);
1669: ctxt->inputNr--;
1670: if (ctxt->inputNr > 0)
1671: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1672: else
1673: ctxt->input = NULL;
1674: ret = ctxt->inputTab[ctxt->inputNr];
1675: ctxt->inputTab[ctxt->inputNr] = NULL;
1676: return (ret);
1677: }
1678: /**
1679: * nodePush:
1680: * @ctxt: an XML parser context
1681: * @value: the element node
1682: *
1683: * Pushes a new element node on top of the node stack
1684: *
1685: * Returns -1 in case of error, the index in the stack otherwise
1686: */
1687: int
1688: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1689: {
1690: if (ctxt == NULL) return(0);
1691: if (ctxt->nodeNr >= ctxt->nodeMax) {
1692: xmlNodePtr *tmp;
1693:
1694: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1695: ctxt->nodeMax * 2 *
1696: sizeof(ctxt->nodeTab[0]));
1697: if (tmp == NULL) {
1698: xmlErrMemory(ctxt, NULL);
1699: return (-1);
1700: }
1701: ctxt->nodeTab = tmp;
1702: ctxt->nodeMax *= 2;
1703: }
1704: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1706: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1707: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1708: xmlParserMaxDepth);
1709: ctxt->instate = XML_PARSER_EOF;
1710: return(-1);
1711: }
1712: ctxt->nodeTab[ctxt->nodeNr] = value;
1713: ctxt->node = value;
1714: return (ctxt->nodeNr++);
1715: }
1716:
1717: /**
1718: * nodePop:
1719: * @ctxt: an XML parser context
1720: *
1721: * Pops the top element node from the node stack
1722: *
1723: * Returns the node just removed
1724: */
1725: xmlNodePtr
1726: nodePop(xmlParserCtxtPtr ctxt)
1727: {
1728: xmlNodePtr ret;
1729:
1730: if (ctxt == NULL) return(NULL);
1731: if (ctxt->nodeNr <= 0)
1732: return (NULL);
1733: ctxt->nodeNr--;
1734: if (ctxt->nodeNr > 0)
1735: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1736: else
1737: ctxt->node = NULL;
1738: ret = ctxt->nodeTab[ctxt->nodeNr];
1739: ctxt->nodeTab[ctxt->nodeNr] = NULL;
1740: return (ret);
1741: }
1742:
1743: #ifdef LIBXML_PUSH_ENABLED
1744: /**
1745: * nameNsPush:
1746: * @ctxt: an XML parser context
1747: * @value: the element name
1748: * @prefix: the element prefix
1749: * @URI: the element namespace name
1750: *
1751: * Pushes a new element name/prefix/URL on top of the name stack
1752: *
1753: * Returns -1 in case of error, the index in the stack otherwise
1754: */
1755: static int
1756: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757: const xmlChar *prefix, const xmlChar *URI, int nsNr)
1758: {
1759: if (ctxt->nameNr >= ctxt->nameMax) {
1760: const xmlChar * *tmp;
1761: void **tmp2;
1762: ctxt->nameMax *= 2;
1763: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1764: ctxt->nameMax *
1765: sizeof(ctxt->nameTab[0]));
1766: if (tmp == NULL) {
1767: ctxt->nameMax /= 2;
1768: goto mem_error;
1769: }
1770: ctxt->nameTab = tmp;
1771: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1772: ctxt->nameMax * 3 *
1773: sizeof(ctxt->pushTab[0]));
1774: if (tmp2 == NULL) {
1775: ctxt->nameMax /= 2;
1776: goto mem_error;
1777: }
1778: ctxt->pushTab = tmp2;
1779: }
1780: ctxt->nameTab[ctxt->nameNr] = value;
1781: ctxt->name = value;
1782: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1784: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1785: return (ctxt->nameNr++);
1786: mem_error:
1787: xmlErrMemory(ctxt, NULL);
1788: return (-1);
1789: }
1790: /**
1791: * nameNsPop:
1792: * @ctxt: an XML parser context
1793: *
1794: * Pops the top element/prefix/URI name from the name stack
1795: *
1796: * Returns the name just removed
1797: */
1798: static const xmlChar *
1799: nameNsPop(xmlParserCtxtPtr ctxt)
1800: {
1801: const xmlChar *ret;
1802:
1803: if (ctxt->nameNr <= 0)
1804: return (NULL);
1805: ctxt->nameNr--;
1806: if (ctxt->nameNr > 0)
1807: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1808: else
1809: ctxt->name = NULL;
1810: ret = ctxt->nameTab[ctxt->nameNr];
1811: ctxt->nameTab[ctxt->nameNr] = NULL;
1812: return (ret);
1813: }
1814: #endif /* LIBXML_PUSH_ENABLED */
1815:
1816: /**
1817: * namePush:
1818: * @ctxt: an XML parser context
1819: * @value: the element name
1820: *
1821: * Pushes a new element name on top of the name stack
1822: *
1823: * Returns -1 in case of error, the index in the stack otherwise
1824: */
1825: int
1826: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1827: {
1828: if (ctxt == NULL) return (-1);
1829:
1830: if (ctxt->nameNr >= ctxt->nameMax) {
1831: const xmlChar * *tmp;
1832: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1.1.1.2 ! misho 1833: ctxt->nameMax * 2 *
1.1 misho 1834: sizeof(ctxt->nameTab[0]));
1835: if (tmp == NULL) {
1836: goto mem_error;
1837: }
1838: ctxt->nameTab = tmp;
1.1.1.2 ! misho 1839: ctxt->nameMax *= 2;
1.1 misho 1840: }
1841: ctxt->nameTab[ctxt->nameNr] = value;
1842: ctxt->name = value;
1843: return (ctxt->nameNr++);
1844: mem_error:
1845: xmlErrMemory(ctxt, NULL);
1846: return (-1);
1847: }
1848: /**
1849: * namePop:
1850: * @ctxt: an XML parser context
1851: *
1852: * Pops the top element name from the name stack
1853: *
1854: * Returns the name just removed
1855: */
1856: const xmlChar *
1857: namePop(xmlParserCtxtPtr ctxt)
1858: {
1859: const xmlChar *ret;
1860:
1861: if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1862: return (NULL);
1863: ctxt->nameNr--;
1864: if (ctxt->nameNr > 0)
1865: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1866: else
1867: ctxt->name = NULL;
1868: ret = ctxt->nameTab[ctxt->nameNr];
1869: ctxt->nameTab[ctxt->nameNr] = NULL;
1870: return (ret);
1871: }
1872:
1873: static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1874: if (ctxt->spaceNr >= ctxt->spaceMax) {
1875: int *tmp;
1876:
1877: ctxt->spaceMax *= 2;
1878: tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1880: if (tmp == NULL) {
1881: xmlErrMemory(ctxt, NULL);
1882: ctxt->spaceMax /=2;
1883: return(-1);
1884: }
1885: ctxt->spaceTab = tmp;
1886: }
1887: ctxt->spaceTab[ctxt->spaceNr] = val;
1888: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889: return(ctxt->spaceNr++);
1890: }
1891:
1892: static int spacePop(xmlParserCtxtPtr ctxt) {
1893: int ret;
1894: if (ctxt->spaceNr <= 0) return(0);
1895: ctxt->spaceNr--;
1896: if (ctxt->spaceNr > 0)
1897: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1898: else
1899: ctxt->space = &ctxt->spaceTab[0];
1900: ret = ctxt->spaceTab[ctxt->spaceNr];
1901: ctxt->spaceTab[ctxt->spaceNr] = -1;
1902: return(ret);
1903: }
1904:
1905: /*
1906: * Macros for accessing the content. Those should be used only by the parser,
1907: * and not exported.
1908: *
1909: * Dirty macros, i.e. one often need to make assumption on the context to
1910: * use them
1911: *
1912: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1913: * To be used with extreme caution since operations consuming
1914: * characters may move the input buffer to a different location !
1915: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1916: * This should be used internally by the parser
1917: * only to compare to ASCII values otherwise it would break when
1918: * running with UTF-8 encoding.
1919: * RAW same as CUR but in the input buffer, bypass any token
1920: * extraction that may have been done
1921: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1922: * to compare on ASCII based substring.
1923: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1924: * strings without newlines within the parser.
1925: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926: * defined char within the parser.
1927: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1928: *
1929: * NEXT Skip to the next character, this does the proper decoding
1930: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1931: * NEXTL(l) Skip the current unicode character of l xmlChars long.
1932: * CUR_CHAR(l) returns the current unicode character (int), set l
1933: * to the number of xmlChars used for the encoding [0-5].
1934: * CUR_SCHAR same but operate on a string instead of the context
1935: * COPY_BUF copy the current unicode char to the target buffer, increment
1936: * the index
1937: * GROW, SHRINK handling of input buffers
1938: */
1939:
1940: #define RAW (*ctxt->input->cur)
1941: #define CUR (*ctxt->input->cur)
1942: #define NXT(val) ctxt->input->cur[(val)]
1943: #define CUR_PTR ctxt->input->cur
1944:
1945: #define CMP4( s, c1, c2, c3, c4 ) \
1946: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948: #define CMP5( s, c1, c2, c3, c4, c5 ) \
1949: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958: ((unsigned char *) s)[ 8 ] == c9 )
1959: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961: ((unsigned char *) s)[ 9 ] == c10 )
1962:
1963: #define SKIP(val) do { \
1964: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1965: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1966: if ((*ctxt->input->cur == 0) && \
1967: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968: xmlPopInput(ctxt); \
1969: } while (0)
1970:
1971: #define SKIPL(val) do { \
1972: int skipl; \
1973: for(skipl=0; skipl<val; skipl++) { \
1974: if (*(ctxt->input->cur) == '\n') { \
1975: ctxt->input->line++; ctxt->input->col = 1; \
1976: } else ctxt->input->col++; \
1977: ctxt->nbChars++; \
1978: ctxt->input->cur++; \
1979: } \
1980: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1981: if ((*ctxt->input->cur == 0) && \
1982: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1983: xmlPopInput(ctxt); \
1984: } while (0)
1985:
1986: #define SHRINK if ((ctxt->progressive == 0) && \
1987: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1989: xmlSHRINK (ctxt);
1990:
1991: static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992: xmlParserInputShrink(ctxt->input);
1993: if ((*ctxt->input->cur == 0) &&
1994: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1995: xmlPopInput(ctxt);
1996: }
1997:
1998: #define GROW if ((ctxt->progressive == 0) && \
1999: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2000: xmlGROW (ctxt);
2001:
2002: static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2004: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2005: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2006: xmlPopInput(ctxt);
2007: }
2008:
2009: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2010:
2011: #define NEXT xmlNextChar(ctxt)
2012:
2013: #define NEXT1 { \
2014: ctxt->input->col++; \
2015: ctxt->input->cur++; \
2016: ctxt->nbChars++; \
2017: if (*ctxt->input->cur == 0) \
2018: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2019: }
2020:
2021: #define NEXTL(l) do { \
2022: if (*(ctxt->input->cur) == '\n') { \
2023: ctxt->input->line++; ctxt->input->col = 1; \
2024: } else ctxt->input->col++; \
2025: ctxt->input->cur += l; \
2026: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2027: } while (0)
2028:
2029: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2031:
2032: #define COPY_BUF(l,b,i,v) \
2033: if (l == 1) b[i++] = (xmlChar) v; \
2034: else i += xmlCopyCharMultiByte(&b[i],v)
2035:
2036: /**
2037: * xmlSkipBlankChars:
2038: * @ctxt: the XML parser context
2039: *
2040: * skip all blanks character found at that point in the input streams.
2041: * It pops up finished entities in the process if allowable at that point.
2042: *
2043: * Returns the number of space chars skipped
2044: */
2045:
2046: int
2047: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2048: int res = 0;
2049:
2050: /*
2051: * It's Okay to use CUR/NEXT here since all the blanks are on
2052: * the ASCII range.
2053: */
2054: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2055: const xmlChar *cur;
2056: /*
2057: * if we are in the document content, go really fast
2058: */
2059: cur = ctxt->input->cur;
2060: while (IS_BLANK_CH(*cur)) {
2061: if (*cur == '\n') {
2062: ctxt->input->line++; ctxt->input->col = 1;
2063: }
2064: cur++;
2065: res++;
2066: if (*cur == 0) {
2067: ctxt->input->cur = cur;
2068: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069: cur = ctxt->input->cur;
2070: }
2071: }
2072: ctxt->input->cur = cur;
2073: } else {
2074: int cur;
2075: do {
2076: cur = CUR;
2077: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2078: NEXT;
2079: cur = CUR;
2080: res++;
2081: }
2082: while ((cur == 0) && (ctxt->inputNr > 1) &&
2083: (ctxt->instate != XML_PARSER_COMMENT)) {
2084: xmlPopInput(ctxt);
2085: cur = CUR;
2086: }
2087: /*
2088: * Need to handle support of entities branching here
2089: */
2090: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2092: }
2093: return(res);
2094: }
2095:
2096: /************************************************************************
2097: * *
2098: * Commodity functions to handle entities *
2099: * *
2100: ************************************************************************/
2101:
2102: /**
2103: * xmlPopInput:
2104: * @ctxt: an XML parser context
2105: *
2106: * xmlPopInput: the current input pointed by ctxt->input came to an end
2107: * pop it and return the next char.
2108: *
2109: * Returns the current xmlChar in the parser context
2110: */
2111: xmlChar
2112: xmlPopInput(xmlParserCtxtPtr ctxt) {
2113: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2114: if (xmlParserDebugEntities)
2115: xmlGenericError(xmlGenericErrorContext,
2116: "Popping input %d\n", ctxt->inputNr);
2117: xmlFreeInputStream(inputPop(ctxt));
2118: if ((*ctxt->input->cur == 0) &&
2119: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120: return(xmlPopInput(ctxt));
2121: return(CUR);
2122: }
2123:
2124: /**
2125: * xmlPushInput:
2126: * @ctxt: an XML parser context
2127: * @input: an XML parser input fragment (entity, XML fragment ...).
2128: *
2129: * xmlPushInput: switch to a new input stream which is stacked on top
2130: * of the previous one(s).
2131: * Returns -1 in case of error or the index in the input stack
2132: */
2133: int
2134: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2135: int ret;
2136: if (input == NULL) return(-1);
2137:
2138: if (xmlParserDebugEntities) {
2139: if ((ctxt->input != NULL) && (ctxt->input->filename))
2140: xmlGenericError(xmlGenericErrorContext,
2141: "%s(%d): ", ctxt->input->filename,
2142: ctxt->input->line);
2143: xmlGenericError(xmlGenericErrorContext,
2144: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2145: }
2146: ret = inputPush(ctxt, input);
2147: GROW;
2148: return(ret);
2149: }
2150:
2151: /**
2152: * xmlParseCharRef:
2153: * @ctxt: an XML parser context
2154: *
2155: * parse Reference declarations
2156: *
2157: * [66] CharRef ::= '&#' [0-9]+ ';' |
2158: * '&#x' [0-9a-fA-F]+ ';'
2159: *
2160: * [ WFC: Legal Character ]
2161: * Characters referred to using character references must match the
2162: * production for Char.
2163: *
2164: * Returns the value parsed (as an int), 0 in case of error
2165: */
2166: int
2167: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2168: unsigned int val = 0;
2169: int count = 0;
2170: unsigned int outofrange = 0;
2171:
2172: /*
2173: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2174: */
2175: if ((RAW == '&') && (NXT(1) == '#') &&
2176: (NXT(2) == 'x')) {
2177: SKIP(3);
2178: GROW;
2179: while (RAW != ';') { /* loop blocked by count */
2180: if (count++ > 20) {
2181: count = 0;
2182: GROW;
2183: }
2184: if ((RAW >= '0') && (RAW <= '9'))
2185: val = val * 16 + (CUR - '0');
2186: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187: val = val * 16 + (CUR - 'a') + 10;
2188: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189: val = val * 16 + (CUR - 'A') + 10;
2190: else {
2191: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2192: val = 0;
2193: break;
2194: }
2195: if (val > 0x10FFFF)
2196: outofrange = val;
2197:
2198: NEXT;
2199: count++;
2200: }
2201: if (RAW == ';') {
2202: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2203: ctxt->input->col++;
2204: ctxt->nbChars ++;
2205: ctxt->input->cur++;
2206: }
2207: } else if ((RAW == '&') && (NXT(1) == '#')) {
2208: SKIP(2);
2209: GROW;
2210: while (RAW != ';') { /* loop blocked by count */
2211: if (count++ > 20) {
2212: count = 0;
2213: GROW;
2214: }
2215: if ((RAW >= '0') && (RAW <= '9'))
2216: val = val * 10 + (CUR - '0');
2217: else {
2218: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2219: val = 0;
2220: break;
2221: }
2222: if (val > 0x10FFFF)
2223: outofrange = val;
2224:
2225: NEXT;
2226: count++;
2227: }
2228: if (RAW == ';') {
2229: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2230: ctxt->input->col++;
2231: ctxt->nbChars ++;
2232: ctxt->input->cur++;
2233: }
2234: } else {
2235: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2236: }
2237:
2238: /*
2239: * [ WFC: Legal Character ]
2240: * Characters referred to using character references must match the
2241: * production for Char.
2242: */
2243: if ((IS_CHAR(val) && (outofrange == 0))) {
2244: return(val);
2245: } else {
2246: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247: "xmlParseCharRef: invalid xmlChar value %d\n",
2248: val);
2249: }
2250: return(0);
2251: }
2252:
2253: /**
2254: * xmlParseStringCharRef:
2255: * @ctxt: an XML parser context
2256: * @str: a pointer to an index in the string
2257: *
2258: * parse Reference declarations, variant parsing from a string rather
2259: * than an an input flow.
2260: *
2261: * [66] CharRef ::= '&#' [0-9]+ ';' |
2262: * '&#x' [0-9a-fA-F]+ ';'
2263: *
2264: * [ WFC: Legal Character ]
2265: * Characters referred to using character references must match the
2266: * production for Char.
2267: *
2268: * Returns the value parsed (as an int), 0 in case of error, str will be
2269: * updated to the current value of the index
2270: */
2271: static int
2272: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2273: const xmlChar *ptr;
2274: xmlChar cur;
2275: unsigned int val = 0;
2276: unsigned int outofrange = 0;
2277:
2278: if ((str == NULL) || (*str == NULL)) return(0);
2279: ptr = *str;
2280: cur = *ptr;
2281: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2282: ptr += 3;
2283: cur = *ptr;
2284: while (cur != ';') { /* Non input consuming loop */
2285: if ((cur >= '0') && (cur <= '9'))
2286: val = val * 16 + (cur - '0');
2287: else if ((cur >= 'a') && (cur <= 'f'))
2288: val = val * 16 + (cur - 'a') + 10;
2289: else if ((cur >= 'A') && (cur <= 'F'))
2290: val = val * 16 + (cur - 'A') + 10;
2291: else {
2292: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2293: val = 0;
2294: break;
2295: }
2296: if (val > 0x10FFFF)
2297: outofrange = val;
2298:
2299: ptr++;
2300: cur = *ptr;
2301: }
2302: if (cur == ';')
2303: ptr++;
2304: } else if ((cur == '&') && (ptr[1] == '#')){
2305: ptr += 2;
2306: cur = *ptr;
2307: while (cur != ';') { /* Non input consuming loops */
2308: if ((cur >= '0') && (cur <= '9'))
2309: val = val * 10 + (cur - '0');
2310: else {
2311: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2312: val = 0;
2313: break;
2314: }
2315: if (val > 0x10FFFF)
2316: outofrange = val;
2317:
2318: ptr++;
2319: cur = *ptr;
2320: }
2321: if (cur == ';')
2322: ptr++;
2323: } else {
2324: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2325: return(0);
2326: }
2327: *str = ptr;
2328:
2329: /*
2330: * [ WFC: Legal Character ]
2331: * Characters referred to using character references must match the
2332: * production for Char.
2333: */
2334: if ((IS_CHAR(val) && (outofrange == 0))) {
2335: return(val);
2336: } else {
2337: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338: "xmlParseStringCharRef: invalid xmlChar value %d\n",
2339: val);
2340: }
2341: return(0);
2342: }
2343:
2344: /**
2345: * xmlNewBlanksWrapperInputStream:
2346: * @ctxt: an XML parser context
2347: * @entity: an Entity pointer
2348: *
2349: * Create a new input stream for wrapping
2350: * blanks around a PEReference
2351: *
2352: * Returns the new input stream or NULL
2353: */
2354:
2355: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2356:
2357: static xmlParserInputPtr
2358: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359: xmlParserInputPtr input;
2360: xmlChar *buffer;
2361: size_t length;
2362: if (entity == NULL) {
2363: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364: "xmlNewBlanksWrapperInputStream entity\n");
2365: return(NULL);
2366: }
2367: if (xmlParserDebugEntities)
2368: xmlGenericError(xmlGenericErrorContext,
2369: "new blanks wrapper for entity: %s\n", entity->name);
2370: input = xmlNewInputStream(ctxt);
2371: if (input == NULL) {
2372: return(NULL);
2373: }
2374: length = xmlStrlen(entity->name) + 5;
2375: buffer = xmlMallocAtomic(length);
2376: if (buffer == NULL) {
2377: xmlErrMemory(ctxt, NULL);
2378: xmlFree(input);
2379: return(NULL);
2380: }
2381: buffer [0] = ' ';
2382: buffer [1] = '%';
2383: buffer [length-3] = ';';
2384: buffer [length-2] = ' ';
2385: buffer [length-1] = 0;
2386: memcpy(buffer + 2, entity->name, length - 5);
2387: input->free = deallocblankswrapper;
2388: input->base = buffer;
2389: input->cur = buffer;
2390: input->length = length;
2391: input->end = &buffer[length];
2392: return(input);
2393: }
2394:
2395: /**
2396: * xmlParserHandlePEReference:
2397: * @ctxt: the parser context
2398: *
2399: * [69] PEReference ::= '%' Name ';'
2400: *
2401: * [ WFC: No Recursion ]
2402: * A parsed entity must not contain a recursive
2403: * reference to itself, either directly or indirectly.
2404: *
2405: * [ WFC: Entity Declared ]
2406: * In a document without any DTD, a document with only an internal DTD
2407: * subset which contains no parameter entity references, or a document
2408: * with "standalone='yes'", ... ... The declaration of a parameter
2409: * entity must precede any reference to it...
2410: *
2411: * [ VC: Entity Declared ]
2412: * In a document with an external subset or external parameter entities
2413: * with "standalone='no'", ... ... The declaration of a parameter entity
2414: * must precede any reference to it...
2415: *
2416: * [ WFC: In DTD ]
2417: * Parameter-entity references may only appear in the DTD.
2418: * NOTE: misleading but this is handled.
2419: *
2420: * A PEReference may have been detected in the current input stream
2421: * the handling is done accordingly to
2422: * http://www.w3.org/TR/REC-xml#entproc
2423: * i.e.
2424: * - Included in literal in entity values
2425: * - Included as Parameter Entity reference within DTDs
2426: */
2427: void
2428: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2429: const xmlChar *name;
2430: xmlEntityPtr entity = NULL;
2431: xmlParserInputPtr input;
2432:
2433: if (RAW != '%') return;
2434: switch(ctxt->instate) {
2435: case XML_PARSER_CDATA_SECTION:
2436: return;
2437: case XML_PARSER_COMMENT:
2438: return;
2439: case XML_PARSER_START_TAG:
2440: return;
2441: case XML_PARSER_END_TAG:
2442: return;
2443: case XML_PARSER_EOF:
2444: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2445: return;
2446: case XML_PARSER_PROLOG:
2447: case XML_PARSER_START:
2448: case XML_PARSER_MISC:
2449: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2450: return;
2451: case XML_PARSER_ENTITY_DECL:
2452: case XML_PARSER_CONTENT:
2453: case XML_PARSER_ATTRIBUTE_VALUE:
2454: case XML_PARSER_PI:
2455: case XML_PARSER_SYSTEM_LITERAL:
2456: case XML_PARSER_PUBLIC_LITERAL:
2457: /* we just ignore it there */
2458: return;
2459: case XML_PARSER_EPILOG:
2460: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2461: return;
2462: case XML_PARSER_ENTITY_VALUE:
2463: /*
2464: * NOTE: in the case of entity values, we don't do the
2465: * substitution here since we need the literal
2466: * entity value to be able to save the internal
2467: * subset of the document.
2468: * This will be handled by xmlStringDecodeEntities
2469: */
2470: return;
2471: case XML_PARSER_DTD:
2472: /*
2473: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474: * In the internal DTD subset, parameter-entity references
2475: * can occur only where markup declarations can occur, not
2476: * within markup declarations.
2477: * In that case this is handled in xmlParseMarkupDecl
2478: */
2479: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2480: return;
2481: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2482: return;
2483: break;
2484: case XML_PARSER_IGNORE:
2485: return;
2486: }
2487:
2488: NEXT;
2489: name = xmlParseName(ctxt);
2490: if (xmlParserDebugEntities)
2491: xmlGenericError(xmlGenericErrorContext,
2492: "PEReference: %s\n", name);
2493: if (name == NULL) {
2494: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2495: } else {
2496: if (RAW == ';') {
2497: NEXT;
2498: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500: if (entity == NULL) {
2501:
2502: /*
2503: * [ WFC: Entity Declared ]
2504: * In a document without any DTD, a document with only an
2505: * internal DTD subset which contains no parameter entity
2506: * references, or a document with "standalone='yes'", ...
2507: * ... The declaration of a parameter entity must precede
2508: * any reference to it...
2509: */
2510: if ((ctxt->standalone == 1) ||
2511: ((ctxt->hasExternalSubset == 0) &&
2512: (ctxt->hasPErefs == 0))) {
2513: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2514: "PEReference: %%%s; not found\n", name);
2515: } else {
2516: /*
2517: * [ VC: Entity Declared ]
2518: * In a document with an external subset or external
2519: * parameter entities with "standalone='no'", ...
2520: * ... The declaration of a parameter entity must precede
2521: * any reference to it...
2522: */
2523: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525: "PEReference: %%%s; not found\n",
2526: name, NULL);
2527: } else
2528: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529: "PEReference: %%%s; not found\n",
2530: name, NULL);
2531: ctxt->valid = 0;
2532: }
2533: } else if (ctxt->input->free != deallocblankswrapper) {
2534: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2535: if (xmlPushInput(ctxt, input) < 0)
2536: return;
2537: } else {
2538: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2540: xmlChar start[4];
2541: xmlCharEncoding enc;
2542:
2543: /*
2544: * handle the extra spaces added before and after
2545: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2546: * this is done independently.
2547: */
2548: input = xmlNewEntityInputStream(ctxt, entity);
2549: if (xmlPushInput(ctxt, input) < 0)
2550: return;
2551:
2552: /*
2553: * Get the 4 first bytes and decode the charset
2554: * if enc != XML_CHAR_ENCODING_NONE
2555: * plug some encoding conversion routines.
2556: * Note that, since we may have some non-UTF8
2557: * encoding (like UTF16, bug 135229), the 'length'
2558: * is not known, but we can calculate based upon
2559: * the amount of data in the buffer.
2560: */
2561: GROW
2562: if ((ctxt->input->end - ctxt->input->cur)>=4) {
2563: start[0] = RAW;
2564: start[1] = NXT(1);
2565: start[2] = NXT(2);
2566: start[3] = NXT(3);
2567: enc = xmlDetectCharEncoding(start, 4);
2568: if (enc != XML_CHAR_ENCODING_NONE) {
2569: xmlSwitchEncoding(ctxt, enc);
2570: }
2571: }
2572:
2573: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2574: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575: (IS_BLANK_CH(NXT(5)))) {
2576: xmlParseTextDecl(ctxt);
2577: }
2578: } else {
2579: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580: "PEReference: %s is not a parameter entity\n",
2581: name);
2582: }
2583: }
2584: } else {
2585: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2586: }
2587: }
2588: }
2589:
2590: /*
2591: * Macro used to grow the current buffer.
2592: */
2593: #define growBuffer(buffer, n) { \
2594: xmlChar *tmp; \
2595: buffer##_size *= 2; \
2596: buffer##_size += n; \
2597: tmp = (xmlChar *) \
2598: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2599: if (tmp == NULL) goto mem_error; \
2600: buffer = tmp; \
2601: }
2602:
2603: /**
2604: * xmlStringLenDecodeEntities:
2605: * @ctxt: the parser context
2606: * @str: the input string
2607: * @len: the string length
2608: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609: * @end: an end marker xmlChar, 0 if none
2610: * @end2: an end marker xmlChar, 0 if none
2611: * @end3: an end marker xmlChar, 0 if none
2612: *
2613: * Takes a entity string content and process to do the adequate substitutions.
2614: *
2615: * [67] Reference ::= EntityRef | CharRef
2616: *
2617: * [69] PEReference ::= '%' Name ';'
2618: *
2619: * Returns A newly allocated string with the substitution done. The caller
2620: * must deallocate it !
2621: */
2622: xmlChar *
2623: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624: int what, xmlChar end, xmlChar end2, xmlChar end3) {
2625: xmlChar *buffer = NULL;
2626: int buffer_size = 0;
2627:
2628: xmlChar *current = NULL;
2629: xmlChar *rep = NULL;
2630: const xmlChar *last;
2631: xmlEntityPtr ent;
2632: int c,l;
2633: int nbchars = 0;
2634:
2635: if ((ctxt == NULL) || (str == NULL) || (len < 0))
2636: return(NULL);
2637: last = str + len;
2638:
2639: if (((ctxt->depth > 40) &&
2640: ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641: (ctxt->depth > 1024)) {
2642: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2643: return(NULL);
2644: }
2645:
2646: /*
2647: * allocate a translation buffer.
2648: */
2649: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2650: buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2651: if (buffer == NULL) goto mem_error;
2652:
2653: /*
2654: * OK loop until we reach one of the ending char or a size limit.
2655: * we are operating on already parsed values.
2656: */
2657: if (str < last)
2658: c = CUR_SCHAR(str, l);
2659: else
2660: c = 0;
2661: while ((c != 0) && (c != end) && /* non input consuming loop */
2662: (c != end2) && (c != end3)) {
2663:
2664: if (c == 0) break;
2665: if ((c == '&') && (str[1] == '#')) {
2666: int val = xmlParseStringCharRef(ctxt, &str);
2667: if (val != 0) {
2668: COPY_BUF(0,buffer,nbchars,val);
2669: }
2670: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2671: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2672: }
2673: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674: if (xmlParserDebugEntities)
2675: xmlGenericError(xmlGenericErrorContext,
2676: "String decoding Entity Reference: %.30s\n",
2677: str);
2678: ent = xmlParseStringEntityRef(ctxt, &str);
2679: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2681: goto int_error;
2682: if (ent != NULL)
2683: ctxt->nbentities += ent->checked;
2684: if ((ent != NULL) &&
2685: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686: if (ent->content != NULL) {
2687: COPY_BUF(0,buffer,nbchars,ent->content[0]);
2688: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2689: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2690: }
2691: } else {
2692: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693: "predefined entity has no content\n");
2694: }
2695: } else if ((ent != NULL) && (ent->content != NULL)) {
2696: ctxt->depth++;
2697: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2698: 0, 0, 0);
2699: ctxt->depth--;
2700:
2701: if (rep != NULL) {
2702: current = rep;
2703: while (*current != 0) { /* non input consuming loop */
2704: buffer[nbchars++] = *current++;
2705: if (nbchars >
2706: buffer_size - XML_PARSER_BUFFER_SIZE) {
2707: if (xmlParserEntityCheck(ctxt, nbchars, ent))
2708: goto int_error;
2709: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2710: }
2711: }
2712: xmlFree(rep);
2713: rep = NULL;
2714: }
2715: } else if (ent != NULL) {
2716: int i = xmlStrlen(ent->name);
2717: const xmlChar *cur = ent->name;
2718:
2719: buffer[nbchars++] = '&';
2720: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.1.1.2 ! misho 2721: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
1.1 misho 2722: }
2723: for (;i > 0;i--)
2724: buffer[nbchars++] = *cur++;
2725: buffer[nbchars++] = ';';
2726: }
2727: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728: if (xmlParserDebugEntities)
2729: xmlGenericError(xmlGenericErrorContext,
2730: "String decoding PE Reference: %.30s\n", str);
2731: ent = xmlParseStringPEReference(ctxt, &str);
2732: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2733: goto int_error;
2734: if (ent != NULL)
2735: ctxt->nbentities += ent->checked;
2736: if (ent != NULL) {
2737: if (ent->content == NULL) {
2738: xmlLoadEntityContent(ctxt, ent);
2739: }
2740: ctxt->depth++;
2741: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2742: 0, 0, 0);
2743: ctxt->depth--;
2744: if (rep != NULL) {
2745: current = rep;
2746: while (*current != 0) { /* non input consuming loop */
2747: buffer[nbchars++] = *current++;
2748: if (nbchars >
2749: buffer_size - XML_PARSER_BUFFER_SIZE) {
2750: if (xmlParserEntityCheck(ctxt, nbchars, ent))
2751: goto int_error;
2752: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2753: }
2754: }
2755: xmlFree(rep);
2756: rep = NULL;
2757: }
2758: }
2759: } else {
2760: COPY_BUF(l,buffer,nbchars,c);
2761: str += l;
2762: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2763: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2764: }
2765: }
2766: if (str < last)
2767: c = CUR_SCHAR(str, l);
2768: else
2769: c = 0;
2770: }
2771: buffer[nbchars] = 0;
2772: return(buffer);
2773:
2774: mem_error:
2775: xmlErrMemory(ctxt, NULL);
2776: int_error:
2777: if (rep != NULL)
2778: xmlFree(rep);
2779: if (buffer != NULL)
2780: xmlFree(buffer);
2781: return(NULL);
2782: }
2783:
2784: /**
2785: * xmlStringDecodeEntities:
2786: * @ctxt: the parser context
2787: * @str: the input string
2788: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789: * @end: an end marker xmlChar, 0 if none
2790: * @end2: an end marker xmlChar, 0 if none
2791: * @end3: an end marker xmlChar, 0 if none
2792: *
2793: * Takes a entity string content and process to do the adequate substitutions.
2794: *
2795: * [67] Reference ::= EntityRef | CharRef
2796: *
2797: * [69] PEReference ::= '%' Name ';'
2798: *
2799: * Returns A newly allocated string with the substitution done. The caller
2800: * must deallocate it !
2801: */
2802: xmlChar *
2803: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804: xmlChar end, xmlChar end2, xmlChar end3) {
2805: if ((ctxt == NULL) || (str == NULL)) return(NULL);
2806: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2807: end, end2, end3));
2808: }
2809:
2810: /************************************************************************
2811: * *
2812: * Commodity functions, cleanup needed ? *
2813: * *
2814: ************************************************************************/
2815:
2816: /**
2817: * areBlanks:
2818: * @ctxt: an XML parser context
2819: * @str: a xmlChar *
2820: * @len: the size of @str
2821: * @blank_chars: we know the chars are blanks
2822: *
2823: * Is this a sequence of blank chars that one can ignore ?
2824: *
2825: * Returns 1 if ignorable 0 otherwise.
2826: */
2827:
2828: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2829: int blank_chars) {
2830: int i, ret;
2831: xmlNodePtr lastChild;
2832:
2833: /*
2834: * Don't spend time trying to differentiate them, the same callback is
2835: * used !
2836: */
2837: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2838: return(0);
2839:
2840: /*
2841: * Check for xml:space value.
2842: */
2843: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844: (*(ctxt->space) == -2))
2845: return(0);
2846:
2847: /*
2848: * Check that the string is made of blanks
2849: */
2850: if (blank_chars == 0) {
2851: for (i = 0;i < len;i++)
2852: if (!(IS_BLANK_CH(str[i]))) return(0);
2853: }
2854:
2855: /*
2856: * Look if the element is mixed content in the DTD if available
2857: */
2858: if (ctxt->node == NULL) return(0);
2859: if (ctxt->myDoc != NULL) {
2860: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861: if (ret == 0) return(1);
2862: if (ret == 1) return(0);
2863: }
2864:
2865: /*
2866: * Otherwise, heuristic :-\
2867: */
2868: if ((RAW != '<') && (RAW != 0xD)) return(0);
2869: if ((ctxt->node->children == NULL) &&
2870: (RAW == '<') && (NXT(1) == '/')) return(0);
2871:
2872: lastChild = xmlGetLastChild(ctxt->node);
2873: if (lastChild == NULL) {
2874: if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875: (ctxt->node->content != NULL)) return(0);
2876: } else if (xmlNodeIsText(lastChild))
2877: return(0);
2878: else if ((ctxt->node->children != NULL) &&
2879: (xmlNodeIsText(ctxt->node->children)))
2880: return(0);
2881: return(1);
2882: }
2883:
2884: /************************************************************************
2885: * *
2886: * Extra stuff for namespace support *
2887: * Relates to http://www.w3.org/TR/WD-xml-names *
2888: * *
2889: ************************************************************************/
2890:
2891: /**
2892: * xmlSplitQName:
2893: * @ctxt: an XML parser context
2894: * @name: an XML parser context
2895: * @prefix: a xmlChar **
2896: *
2897: * parse an UTF8 encoded XML qualified name string
2898: *
2899: * [NS 5] QName ::= (Prefix ':')? LocalPart
2900: *
2901: * [NS 6] Prefix ::= NCName
2902: *
2903: * [NS 7] LocalPart ::= NCName
2904: *
2905: * Returns the local part, and prefix is updated
2906: * to get the Prefix if any.
2907: */
2908:
2909: xmlChar *
2910: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911: xmlChar buf[XML_MAX_NAMELEN + 5];
2912: xmlChar *buffer = NULL;
2913: int len = 0;
2914: int max = XML_MAX_NAMELEN;
2915: xmlChar *ret = NULL;
2916: const xmlChar *cur = name;
2917: int c;
2918:
2919: if (prefix == NULL) return(NULL);
2920: *prefix = NULL;
2921:
2922: if (cur == NULL) return(NULL);
2923:
2924: #ifndef XML_XML_NAMESPACE
2925: /* xml: prefix is not really a namespace */
2926: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927: (cur[2] == 'l') && (cur[3] == ':'))
2928: return(xmlStrdup(name));
2929: #endif
2930:
2931: /* nasty but well=formed */
2932: if (cur[0] == ':')
2933: return(xmlStrdup(name));
2934:
2935: c = *cur++;
2936: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2937: buf[len++] = c;
2938: c = *cur++;
2939: }
2940: if (len >= max) {
2941: /*
2942: * Okay someone managed to make a huge name, so he's ready to pay
2943: * for the processing speed.
2944: */
2945: max = len * 2;
2946:
2947: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2948: if (buffer == NULL) {
2949: xmlErrMemory(ctxt, NULL);
2950: return(NULL);
2951: }
2952: memcpy(buffer, buf, len);
2953: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954: if (len + 10 > max) {
2955: xmlChar *tmp;
2956:
2957: max *= 2;
2958: tmp = (xmlChar *) xmlRealloc(buffer,
2959: max * sizeof(xmlChar));
2960: if (tmp == NULL) {
2961: xmlFree(buffer);
2962: xmlErrMemory(ctxt, NULL);
2963: return(NULL);
2964: }
2965: buffer = tmp;
2966: }
2967: buffer[len++] = c;
2968: c = *cur++;
2969: }
2970: buffer[len] = 0;
2971: }
2972:
2973: if ((c == ':') && (*cur == 0)) {
2974: if (buffer != NULL)
2975: xmlFree(buffer);
2976: *prefix = NULL;
2977: return(xmlStrdup(name));
2978: }
2979:
2980: if (buffer == NULL)
2981: ret = xmlStrndup(buf, len);
2982: else {
2983: ret = buffer;
2984: buffer = NULL;
2985: max = XML_MAX_NAMELEN;
2986: }
2987:
2988:
2989: if (c == ':') {
2990: c = *cur;
2991: *prefix = ret;
2992: if (c == 0) {
2993: return(xmlStrndup(BAD_CAST "", 0));
2994: }
2995: len = 0;
2996:
2997: /*
2998: * Check that the first character is proper to start
2999: * a new name
3000: */
3001: if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002: ((c >= 0x41) && (c <= 0x5A)) ||
3003: (c == '_') || (c == ':'))) {
3004: int l;
3005: int first = CUR_SCHAR(cur, l);
3006:
3007: if (!IS_LETTER(first) && (first != '_')) {
3008: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3009: "Name %s is not XML Namespace compliant\n",
3010: name);
3011: }
3012: }
3013: cur++;
3014:
3015: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3016: buf[len++] = c;
3017: c = *cur++;
3018: }
3019: if (len >= max) {
3020: /*
3021: * Okay someone managed to make a huge name, so he's ready to pay
3022: * for the processing speed.
3023: */
3024: max = len * 2;
3025:
3026: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3027: if (buffer == NULL) {
3028: xmlErrMemory(ctxt, NULL);
3029: return(NULL);
3030: }
3031: memcpy(buffer, buf, len);
3032: while (c != 0) { /* tested bigname2.xml */
3033: if (len + 10 > max) {
3034: xmlChar *tmp;
3035:
3036: max *= 2;
3037: tmp = (xmlChar *) xmlRealloc(buffer,
3038: max * sizeof(xmlChar));
3039: if (tmp == NULL) {
3040: xmlErrMemory(ctxt, NULL);
3041: xmlFree(buffer);
3042: return(NULL);
3043: }
3044: buffer = tmp;
3045: }
3046: buffer[len++] = c;
3047: c = *cur++;
3048: }
3049: buffer[len] = 0;
3050: }
3051:
3052: if (buffer == NULL)
3053: ret = xmlStrndup(buf, len);
3054: else {
3055: ret = buffer;
3056: }
3057: }
3058:
3059: return(ret);
3060: }
3061:
3062: /************************************************************************
3063: * *
3064: * The parser itself *
3065: * Relates to http://www.w3.org/TR/REC-xml *
3066: * *
3067: ************************************************************************/
3068:
3069: /************************************************************************
3070: * *
3071: * Routines to parse Name, NCName and NmToken *
3072: * *
3073: ************************************************************************/
3074: #ifdef DEBUG
3075: static unsigned long nbParseName = 0;
3076: static unsigned long nbParseNmToken = 0;
3077: static unsigned long nbParseNCName = 0;
3078: static unsigned long nbParseNCNameComplex = 0;
3079: static unsigned long nbParseNameComplex = 0;
3080: static unsigned long nbParseStringName = 0;
3081: #endif
3082:
3083: /*
3084: * The two following functions are related to the change of accepted
3085: * characters for Name and NmToken in the Revision 5 of XML-1.0
3086: * They correspond to the modified production [4] and the new production [4a]
3087: * changes in that revision. Also note that the macros used for the
3088: * productions Letter, Digit, CombiningChar and Extender are not needed
3089: * anymore.
3090: * We still keep compatibility to pre-revision5 parsing semantic if the
3091: * new XML_PARSE_OLD10 option is given to the parser.
3092: */
3093: static int
3094: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3096: /*
3097: * Use the new checks of production [4] [4a] amd [5] of the
3098: * Update 5 of XML-1.0
3099: */
3100: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101: (((c >= 'a') && (c <= 'z')) ||
3102: ((c >= 'A') && (c <= 'Z')) ||
3103: (c == '_') || (c == ':') ||
3104: ((c >= 0xC0) && (c <= 0xD6)) ||
3105: ((c >= 0xD8) && (c <= 0xF6)) ||
3106: ((c >= 0xF8) && (c <= 0x2FF)) ||
3107: ((c >= 0x370) && (c <= 0x37D)) ||
3108: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109: ((c >= 0x200C) && (c <= 0x200D)) ||
3110: ((c >= 0x2070) && (c <= 0x218F)) ||
3111: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115: ((c >= 0x10000) && (c <= 0xEFFFF))))
3116: return(1);
3117: } else {
3118: if (IS_LETTER(c) || (c == '_') || (c == ':'))
3119: return(1);
3120: }
3121: return(0);
3122: }
3123:
3124: static int
3125: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3127: /*
3128: * Use the new checks of production [4] [4a] amd [5] of the
3129: * Update 5 of XML-1.0
3130: */
3131: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132: (((c >= 'a') && (c <= 'z')) ||
3133: ((c >= 'A') && (c <= 'Z')) ||
3134: ((c >= '0') && (c <= '9')) || /* !start */
3135: (c == '_') || (c == ':') ||
3136: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137: ((c >= 0xC0) && (c <= 0xD6)) ||
3138: ((c >= 0xD8) && (c <= 0xF6)) ||
3139: ((c >= 0xF8) && (c <= 0x2FF)) ||
3140: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141: ((c >= 0x370) && (c <= 0x37D)) ||
3142: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143: ((c >= 0x200C) && (c <= 0x200D)) ||
3144: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145: ((c >= 0x2070) && (c <= 0x218F)) ||
3146: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150: ((c >= 0x10000) && (c <= 0xEFFFF))))
3151: return(1);
3152: } else {
3153: if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154: (c == '.') || (c == '-') ||
3155: (c == '_') || (c == ':') ||
3156: (IS_COMBINING(c)) ||
3157: (IS_EXTENDER(c)))
3158: return(1);
3159: }
3160: return(0);
3161: }
3162:
3163: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3164: int *len, int *alloc, int normalize);
3165:
3166: static const xmlChar *
3167: xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3168: int len = 0, l;
3169: int c;
3170: int count = 0;
3171:
3172: #ifdef DEBUG
3173: nbParseNameComplex++;
3174: #endif
3175:
3176: /*
3177: * Handler for more complex cases
3178: */
3179: GROW;
3180: c = CUR_CHAR(l);
3181: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182: /*
3183: * Use the new checks of production [4] [4a] amd [5] of the
3184: * Update 5 of XML-1.0
3185: */
3186: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187: (!(((c >= 'a') && (c <= 'z')) ||
3188: ((c >= 'A') && (c <= 'Z')) ||
3189: (c == '_') || (c == ':') ||
3190: ((c >= 0xC0) && (c <= 0xD6)) ||
3191: ((c >= 0xD8) && (c <= 0xF6)) ||
3192: ((c >= 0xF8) && (c <= 0x2FF)) ||
3193: ((c >= 0x370) && (c <= 0x37D)) ||
3194: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195: ((c >= 0x200C) && (c <= 0x200D)) ||
3196: ((c >= 0x2070) && (c <= 0x218F)) ||
3197: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201: ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202: return(NULL);
3203: }
3204: len += l;
3205: NEXTL(l);
3206: c = CUR_CHAR(l);
3207: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208: (((c >= 'a') && (c <= 'z')) ||
3209: ((c >= 'A') && (c <= 'Z')) ||
3210: ((c >= '0') && (c <= '9')) || /* !start */
3211: (c == '_') || (c == ':') ||
3212: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213: ((c >= 0xC0) && (c <= 0xD6)) ||
3214: ((c >= 0xD8) && (c <= 0xF6)) ||
3215: ((c >= 0xF8) && (c <= 0x2FF)) ||
3216: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217: ((c >= 0x370) && (c <= 0x37D)) ||
3218: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219: ((c >= 0x200C) && (c <= 0x200D)) ||
3220: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221: ((c >= 0x2070) && (c <= 0x218F)) ||
3222: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226: ((c >= 0x10000) && (c <= 0xEFFFF))
3227: )) {
3228: if (count++ > 100) {
3229: count = 0;
3230: GROW;
3231: }
3232: len += l;
3233: NEXTL(l);
3234: c = CUR_CHAR(l);
3235: }
3236: } else {
3237: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238: (!IS_LETTER(c) && (c != '_') &&
3239: (c != ':'))) {
3240: return(NULL);
3241: }
3242: len += l;
3243: NEXTL(l);
3244: c = CUR_CHAR(l);
3245:
3246: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248: (c == '.') || (c == '-') ||
3249: (c == '_') || (c == ':') ||
3250: (IS_COMBINING(c)) ||
3251: (IS_EXTENDER(c)))) {
3252: if (count++ > 100) {
3253: count = 0;
3254: GROW;
3255: }
3256: len += l;
3257: NEXTL(l);
3258: c = CUR_CHAR(l);
3259: }
3260: }
3261: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3264: }
3265:
3266: /**
3267: * xmlParseName:
3268: * @ctxt: an XML parser context
3269: *
3270: * parse an XML name.
3271: *
3272: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273: * CombiningChar | Extender
3274: *
3275: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276: *
3277: * [6] Names ::= Name (#x20 Name)*
3278: *
3279: * Returns the Name parsed or NULL
3280: */
3281:
3282: const xmlChar *
3283: xmlParseName(xmlParserCtxtPtr ctxt) {
3284: const xmlChar *in;
3285: const xmlChar *ret;
3286: int count = 0;
3287:
3288: GROW;
3289:
3290: #ifdef DEBUG
3291: nbParseName++;
3292: #endif
3293:
3294: /*
3295: * Accelerator for simple ASCII names
3296: */
3297: in = ctxt->input->cur;
3298: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299: ((*in >= 0x41) && (*in <= 0x5A)) ||
3300: (*in == '_') || (*in == ':')) {
3301: in++;
3302: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303: ((*in >= 0x41) && (*in <= 0x5A)) ||
3304: ((*in >= 0x30) && (*in <= 0x39)) ||
3305: (*in == '_') || (*in == '-') ||
3306: (*in == ':') || (*in == '.'))
3307: in++;
3308: if ((*in > 0) && (*in < 0x80)) {
3309: count = in - ctxt->input->cur;
3310: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3311: ctxt->input->cur = in;
3312: ctxt->nbChars += count;
3313: ctxt->input->col += count;
3314: if (ret == NULL)
3315: xmlErrMemory(ctxt, NULL);
3316: return(ret);
3317: }
3318: }
3319: /* accelerator for special cases */
3320: return(xmlParseNameComplex(ctxt));
3321: }
3322:
3323: static const xmlChar *
3324: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3325: int len = 0, l;
3326: int c;
3327: int count = 0;
3328:
3329: #ifdef DEBUG
3330: nbParseNCNameComplex++;
3331: #endif
3332:
3333: /*
3334: * Handler for more complex cases
3335: */
3336: GROW;
3337: c = CUR_CHAR(l);
3338: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3340: return(NULL);
3341: }
3342:
3343: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344: (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345: if (count++ > 100) {
3346: count = 0;
3347: GROW;
3348: }
3349: len += l;
3350: NEXTL(l);
3351: c = CUR_CHAR(l);
3352: }
3353: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3354: }
3355:
3356: /**
3357: * xmlParseNCName:
3358: * @ctxt: an XML parser context
3359: * @len: lenght of the string parsed
3360: *
3361: * parse an XML name.
3362: *
3363: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364: * CombiningChar | Extender
3365: *
3366: * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3367: *
3368: * Returns the Name parsed or NULL
3369: */
3370:
3371: static const xmlChar *
3372: xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373: const xmlChar *in;
3374: const xmlChar *ret;
3375: int count = 0;
3376:
3377: #ifdef DEBUG
3378: nbParseNCName++;
3379: #endif
3380:
3381: /*
3382: * Accelerator for simple ASCII names
3383: */
3384: in = ctxt->input->cur;
3385: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386: ((*in >= 0x41) && (*in <= 0x5A)) ||
3387: (*in == '_')) {
3388: in++;
3389: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390: ((*in >= 0x41) && (*in <= 0x5A)) ||
3391: ((*in >= 0x30) && (*in <= 0x39)) ||
3392: (*in == '_') || (*in == '-') ||
3393: (*in == '.'))
3394: in++;
3395: if ((*in > 0) && (*in < 0x80)) {
3396: count = in - ctxt->input->cur;
3397: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398: ctxt->input->cur = in;
3399: ctxt->nbChars += count;
3400: ctxt->input->col += count;
3401: if (ret == NULL) {
3402: xmlErrMemory(ctxt, NULL);
3403: }
3404: return(ret);
3405: }
3406: }
3407: return(xmlParseNCNameComplex(ctxt));
3408: }
3409:
3410: /**
3411: * xmlParseNameAndCompare:
3412: * @ctxt: an XML parser context
3413: *
3414: * parse an XML name and compares for match
3415: * (specialized for endtag parsing)
3416: *
3417: * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418: * and the name for mismatch
3419: */
3420:
3421: static const xmlChar *
3422: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3423: register const xmlChar *cmp = other;
3424: register const xmlChar *in;
3425: const xmlChar *ret;
3426:
3427: GROW;
3428:
3429: in = ctxt->input->cur;
3430: while (*in != 0 && *in == *cmp) {
3431: ++in;
3432: ++cmp;
3433: ctxt->input->col++;
3434: }
3435: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3436: /* success */
3437: ctxt->input->cur = in;
3438: return (const xmlChar*) 1;
3439: }
3440: /* failure (or end of input buffer), check with full function */
3441: ret = xmlParseName (ctxt);
3442: /* strings coming from the dictionnary direct compare possible */
3443: if (ret == other) {
3444: return (const xmlChar*) 1;
3445: }
3446: return ret;
3447: }
3448:
3449: /**
3450: * xmlParseStringName:
3451: * @ctxt: an XML parser context
3452: * @str: a pointer to the string pointer (IN/OUT)
3453: *
3454: * parse an XML name.
3455: *
3456: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457: * CombiningChar | Extender
3458: *
3459: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460: *
3461: * [6] Names ::= Name (#x20 Name)*
3462: *
3463: * Returns the Name parsed or NULL. The @str pointer
3464: * is updated to the current location in the string.
3465: */
3466:
3467: static xmlChar *
3468: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469: xmlChar buf[XML_MAX_NAMELEN + 5];
3470: const xmlChar *cur = *str;
3471: int len = 0, l;
3472: int c;
3473:
3474: #ifdef DEBUG
3475: nbParseStringName++;
3476: #endif
3477:
3478: c = CUR_SCHAR(cur, l);
3479: if (!xmlIsNameStartChar(ctxt, c)) {
3480: return(NULL);
3481: }
3482:
3483: COPY_BUF(l,buf,len,c);
3484: cur += l;
3485: c = CUR_SCHAR(cur, l);
3486: while (xmlIsNameChar(ctxt, c)) {
3487: COPY_BUF(l,buf,len,c);
3488: cur += l;
3489: c = CUR_SCHAR(cur, l);
3490: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3491: /*
3492: * Okay someone managed to make a huge name, so he's ready to pay
3493: * for the processing speed.
3494: */
3495: xmlChar *buffer;
3496: int max = len * 2;
3497:
3498: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3499: if (buffer == NULL) {
3500: xmlErrMemory(ctxt, NULL);
3501: return(NULL);
3502: }
3503: memcpy(buffer, buf, len);
3504: while (xmlIsNameChar(ctxt, c)) {
3505: if (len + 10 > max) {
3506: xmlChar *tmp;
3507: max *= 2;
3508: tmp = (xmlChar *) xmlRealloc(buffer,
3509: max * sizeof(xmlChar));
3510: if (tmp == NULL) {
3511: xmlErrMemory(ctxt, NULL);
3512: xmlFree(buffer);
3513: return(NULL);
3514: }
3515: buffer = tmp;
3516: }
3517: COPY_BUF(l,buffer,len,c);
3518: cur += l;
3519: c = CUR_SCHAR(cur, l);
3520: }
3521: buffer[len] = 0;
3522: *str = cur;
3523: return(buffer);
3524: }
3525: }
3526: *str = cur;
3527: return(xmlStrndup(buf, len));
3528: }
3529:
3530: /**
3531: * xmlParseNmtoken:
3532: * @ctxt: an XML parser context
3533: *
3534: * parse an XML Nmtoken.
3535: *
3536: * [7] Nmtoken ::= (NameChar)+
3537: *
3538: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3539: *
3540: * Returns the Nmtoken parsed or NULL
3541: */
3542:
3543: xmlChar *
3544: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545: xmlChar buf[XML_MAX_NAMELEN + 5];
3546: int len = 0, l;
3547: int c;
3548: int count = 0;
3549:
3550: #ifdef DEBUG
3551: nbParseNmToken++;
3552: #endif
3553:
3554: GROW;
3555: c = CUR_CHAR(l);
3556:
3557: while (xmlIsNameChar(ctxt, c)) {
3558: if (count++ > 100) {
3559: count = 0;
3560: GROW;
3561: }
3562: COPY_BUF(l,buf,len,c);
3563: NEXTL(l);
3564: c = CUR_CHAR(l);
3565: if (len >= XML_MAX_NAMELEN) {
3566: /*
3567: * Okay someone managed to make a huge token, so he's ready to pay
3568: * for the processing speed.
3569: */
3570: xmlChar *buffer;
3571: int max = len * 2;
3572:
3573: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3574: if (buffer == NULL) {
3575: xmlErrMemory(ctxt, NULL);
3576: return(NULL);
3577: }
3578: memcpy(buffer, buf, len);
3579: while (xmlIsNameChar(ctxt, c)) {
3580: if (count++ > 100) {
3581: count = 0;
3582: GROW;
3583: }
3584: if (len + 10 > max) {
3585: xmlChar *tmp;
3586:
3587: max *= 2;
3588: tmp = (xmlChar *) xmlRealloc(buffer,
3589: max * sizeof(xmlChar));
3590: if (tmp == NULL) {
3591: xmlErrMemory(ctxt, NULL);
3592: xmlFree(buffer);
3593: return(NULL);
3594: }
3595: buffer = tmp;
3596: }
3597: COPY_BUF(l,buffer,len,c);
3598: NEXTL(l);
3599: c = CUR_CHAR(l);
3600: }
3601: buffer[len] = 0;
3602: return(buffer);
3603: }
3604: }
3605: if (len == 0)
3606: return(NULL);
3607: return(xmlStrndup(buf, len));
3608: }
3609:
3610: /**
3611: * xmlParseEntityValue:
3612: * @ctxt: an XML parser context
3613: * @orig: if non-NULL store a copy of the original entity value
3614: *
3615: * parse a value for ENTITY declarations
3616: *
3617: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618: * "'" ([^%&'] | PEReference | Reference)* "'"
3619: *
3620: * Returns the EntityValue parsed with reference substituted or NULL
3621: */
3622:
3623: xmlChar *
3624: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625: xmlChar *buf = NULL;
3626: int len = 0;
3627: int size = XML_PARSER_BUFFER_SIZE;
3628: int c, l;
3629: xmlChar stop;
3630: xmlChar *ret = NULL;
3631: const xmlChar *cur = NULL;
3632: xmlParserInputPtr input;
3633:
3634: if (RAW == '"') stop = '"';
3635: else if (RAW == '\'') stop = '\'';
3636: else {
3637: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3638: return(NULL);
3639: }
3640: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3641: if (buf == NULL) {
3642: xmlErrMemory(ctxt, NULL);
3643: return(NULL);
3644: }
3645:
3646: /*
3647: * The content of the entity definition is copied in a buffer.
3648: */
3649:
3650: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651: input = ctxt->input;
3652: GROW;
3653: NEXT;
3654: c = CUR_CHAR(l);
3655: /*
3656: * NOTE: 4.4.5 Included in Literal
3657: * When a parameter entity reference appears in a literal entity
3658: * value, ... a single or double quote character in the replacement
3659: * text is always treated as a normal data character and will not
3660: * terminate the literal.
3661: * In practice it means we stop the loop only when back at parsing
3662: * the initial entity and the quote is found
3663: */
3664: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3665: (ctxt->input != input))) {
3666: if (len + 5 >= size) {
3667: xmlChar *tmp;
3668:
3669: size *= 2;
3670: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3671: if (tmp == NULL) {
3672: xmlErrMemory(ctxt, NULL);
3673: xmlFree(buf);
3674: return(NULL);
3675: }
3676: buf = tmp;
3677: }
3678: COPY_BUF(l,buf,len,c);
3679: NEXTL(l);
3680: /*
3681: * Pop-up of finished entities.
3682: */
3683: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3684: xmlPopInput(ctxt);
3685:
3686: GROW;
3687: c = CUR_CHAR(l);
3688: if (c == 0) {
3689: GROW;
3690: c = CUR_CHAR(l);
3691: }
3692: }
3693: buf[len] = 0;
3694:
3695: /*
3696: * Raise problem w.r.t. '&' and '%' being used in non-entities
3697: * reference constructs. Note Charref will be handled in
3698: * xmlStringDecodeEntities()
3699: */
3700: cur = buf;
3701: while (*cur != 0) { /* non input consuming */
3702: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3703: xmlChar *name;
3704: xmlChar tmp = *cur;
3705:
3706: cur++;
3707: name = xmlParseStringName(ctxt, &cur);
3708: if ((name == NULL) || (*cur != ';')) {
3709: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3710: "EntityValue: '%c' forbidden except for entities references\n",
3711: tmp);
3712: }
3713: if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714: (ctxt->inputNr == 1)) {
3715: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3716: }
3717: if (name != NULL)
3718: xmlFree(name);
3719: if (*cur == 0)
3720: break;
3721: }
3722: cur++;
3723: }
3724:
3725: /*
3726: * Then PEReference entities are substituted.
3727: */
3728: if (c != stop) {
3729: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3730: xmlFree(buf);
3731: } else {
3732: NEXT;
3733: /*
3734: * NOTE: 4.4.7 Bypassed
3735: * When a general entity reference appears in the EntityValue in
3736: * an entity declaration, it is bypassed and left as is.
3737: * so XML_SUBSTITUTE_REF is not set here.
3738: */
3739: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3740: 0, 0, 0);
3741: if (orig != NULL)
3742: *orig = buf;
3743: else
3744: xmlFree(buf);
3745: }
3746:
3747: return(ret);
3748: }
3749:
3750: /**
3751: * xmlParseAttValueComplex:
3752: * @ctxt: an XML parser context
3753: * @len: the resulting attribute len
3754: * @normalize: wether to apply the inner normalization
3755: *
3756: * parse a value for an attribute, this is the fallback function
3757: * of xmlParseAttValue() when the attribute parsing requires handling
3758: * of non-ASCII characters, or normalization compaction.
3759: *
3760: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3761: */
3762: static xmlChar *
3763: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3764: xmlChar limit = 0;
3765: xmlChar *buf = NULL;
3766: xmlChar *rep = NULL;
3767: int len = 0;
3768: int buf_size = 0;
3769: int c, l, in_space = 0;
3770: xmlChar *current = NULL;
3771: xmlEntityPtr ent;
3772:
3773: if (NXT(0) == '"') {
3774: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3775: limit = '"';
3776: NEXT;
3777: } else if (NXT(0) == '\'') {
3778: limit = '\'';
3779: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3780: NEXT;
3781: } else {
3782: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3783: return(NULL);
3784: }
3785:
3786: /*
3787: * allocate a translation buffer.
3788: */
3789: buf_size = XML_PARSER_BUFFER_SIZE;
3790: buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3791: if (buf == NULL) goto mem_error;
3792:
3793: /*
3794: * OK loop until we reach one of the ending char or a size limit.
3795: */
3796: c = CUR_CHAR(l);
3797: while ((NXT(0) != limit) && /* checked */
3798: (IS_CHAR(c)) && (c != '<')) {
3799: if (c == 0) break;
3800: if (c == '&') {
3801: in_space = 0;
3802: if (NXT(1) == '#') {
3803: int val = xmlParseCharRef(ctxt);
3804:
3805: if (val == '&') {
3806: if (ctxt->replaceEntities) {
3807: if (len > buf_size - 10) {
3808: growBuffer(buf, 10);
3809: }
3810: buf[len++] = '&';
3811: } else {
3812: /*
3813: * The reparsing will be done in xmlStringGetNodeList()
3814: * called by the attribute() function in SAX.c
3815: */
3816: if (len > buf_size - 10) {
3817: growBuffer(buf, 10);
3818: }
3819: buf[len++] = '&';
3820: buf[len++] = '#';
3821: buf[len++] = '3';
3822: buf[len++] = '8';
3823: buf[len++] = ';';
3824: }
3825: } else if (val != 0) {
3826: if (len > buf_size - 10) {
3827: growBuffer(buf, 10);
3828: }
3829: len += xmlCopyChar(0, &buf[len], val);
3830: }
3831: } else {
3832: ent = xmlParseEntityRef(ctxt);
3833: ctxt->nbentities++;
3834: if (ent != NULL)
3835: ctxt->nbentities += ent->owner;
3836: if ((ent != NULL) &&
3837: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838: if (len > buf_size - 10) {
3839: growBuffer(buf, 10);
3840: }
3841: if ((ctxt->replaceEntities == 0) &&
3842: (ent->content[0] == '&')) {
3843: buf[len++] = '&';
3844: buf[len++] = '#';
3845: buf[len++] = '3';
3846: buf[len++] = '8';
3847: buf[len++] = ';';
3848: } else {
3849: buf[len++] = ent->content[0];
3850: }
3851: } else if ((ent != NULL) &&
3852: (ctxt->replaceEntities != 0)) {
3853: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854: rep = xmlStringDecodeEntities(ctxt, ent->content,
3855: XML_SUBSTITUTE_REF,
3856: 0, 0, 0);
3857: if (rep != NULL) {
3858: current = rep;
3859: while (*current != 0) { /* non input consuming */
3860: if ((*current == 0xD) || (*current == 0xA) ||
3861: (*current == 0x9)) {
3862: buf[len++] = 0x20;
3863: current++;
3864: } else
3865: buf[len++] = *current++;
3866: if (len > buf_size - 10) {
3867: growBuffer(buf, 10);
3868: }
3869: }
3870: xmlFree(rep);
3871: rep = NULL;
3872: }
3873: } else {
3874: if (len > buf_size - 10) {
3875: growBuffer(buf, 10);
3876: }
3877: if (ent->content != NULL)
3878: buf[len++] = ent->content[0];
3879: }
3880: } else if (ent != NULL) {
3881: int i = xmlStrlen(ent->name);
3882: const xmlChar *cur = ent->name;
3883:
3884: /*
3885: * This may look absurd but is needed to detect
3886: * entities problems
3887: */
3888: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889: (ent->content != NULL)) {
3890: rep = xmlStringDecodeEntities(ctxt, ent->content,
3891: XML_SUBSTITUTE_REF, 0, 0, 0);
3892: if (rep != NULL) {
3893: xmlFree(rep);
3894: rep = NULL;
3895: }
3896: }
3897:
3898: /*
3899: * Just output the reference
3900: */
3901: buf[len++] = '&';
3902: while (len > buf_size - i - 10) {
3903: growBuffer(buf, i + 10);
3904: }
3905: for (;i > 0;i--)
3906: buf[len++] = *cur++;
3907: buf[len++] = ';';
3908: }
3909: }
3910: } else {
3911: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3912: if ((len != 0) || (!normalize)) {
3913: if ((!normalize) || (!in_space)) {
3914: COPY_BUF(l,buf,len,0x20);
3915: while (len > buf_size - 10) {
3916: growBuffer(buf, 10);
3917: }
3918: }
3919: in_space = 1;
3920: }
3921: } else {
3922: in_space = 0;
3923: COPY_BUF(l,buf,len,c);
3924: if (len > buf_size - 10) {
3925: growBuffer(buf, 10);
3926: }
3927: }
3928: NEXTL(l);
3929: }
3930: GROW;
3931: c = CUR_CHAR(l);
3932: }
3933: if ((in_space) && (normalize)) {
3934: while (buf[len - 1] == 0x20) len--;
3935: }
3936: buf[len] = 0;
3937: if (RAW == '<') {
3938: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3939: } else if (RAW != limit) {
3940: if ((c != 0) && (!IS_CHAR(c))) {
3941: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942: "invalid character in attribute value\n");
3943: } else {
3944: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945: "AttValue: ' expected\n");
3946: }
3947: } else
3948: NEXT;
3949: if (attlen != NULL) *attlen = len;
3950: return(buf);
3951:
3952: mem_error:
3953: xmlErrMemory(ctxt, NULL);
3954: if (buf != NULL)
3955: xmlFree(buf);
3956: if (rep != NULL)
3957: xmlFree(rep);
3958: return(NULL);
3959: }
3960:
3961: /**
3962: * xmlParseAttValue:
3963: * @ctxt: an XML parser context
3964: *
3965: * parse a value for an attribute
3966: * Note: the parser won't do substitution of entities here, this
3967: * will be handled later in xmlStringGetNodeList
3968: *
3969: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970: * "'" ([^<&'] | Reference)* "'"
3971: *
3972: * 3.3.3 Attribute-Value Normalization:
3973: * Before the value of an attribute is passed to the application or
3974: * checked for validity, the XML processor must normalize it as follows:
3975: * - a character reference is processed by appending the referenced
3976: * character to the attribute value
3977: * - an entity reference is processed by recursively processing the
3978: * replacement text of the entity
3979: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980: * appending #x20 to the normalized value, except that only a single
3981: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3982: * parsed entity or the literal entity value of an internal parsed entity
3983: * - other characters are processed by appending them to the normalized value
3984: * If the declared value is not CDATA, then the XML processor must further
3985: * process the normalized attribute value by discarding any leading and
3986: * trailing space (#x20) characters, and by replacing sequences of space
3987: * (#x20) characters by a single space (#x20) character.
3988: * All attributes for which no declaration has been read should be treated
3989: * by a non-validating parser as if declared CDATA.
3990: *
3991: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3992: */
3993:
3994:
3995: xmlChar *
3996: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3997: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3998: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3999: }
4000:
4001: /**
4002: * xmlParseSystemLiteral:
4003: * @ctxt: an XML parser context
4004: *
4005: * parse an XML Literal
4006: *
4007: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4008: *
4009: * Returns the SystemLiteral parsed or NULL
4010: */
4011:
4012: xmlChar *
4013: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014: xmlChar *buf = NULL;
4015: int len = 0;
4016: int size = XML_PARSER_BUFFER_SIZE;
4017: int cur, l;
4018: xmlChar stop;
4019: int state = ctxt->instate;
4020: int count = 0;
4021:
4022: SHRINK;
4023: if (RAW == '"') {
4024: NEXT;
4025: stop = '"';
4026: } else if (RAW == '\'') {
4027: NEXT;
4028: stop = '\'';
4029: } else {
4030: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4031: return(NULL);
4032: }
4033:
4034: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4035: if (buf == NULL) {
4036: xmlErrMemory(ctxt, NULL);
4037: return(NULL);
4038: }
4039: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4040: cur = CUR_CHAR(l);
4041: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4042: if (len + 5 >= size) {
4043: xmlChar *tmp;
4044:
4045: size *= 2;
4046: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047: if (tmp == NULL) {
4048: xmlFree(buf);
4049: xmlErrMemory(ctxt, NULL);
4050: ctxt->instate = (xmlParserInputState) state;
4051: return(NULL);
4052: }
4053: buf = tmp;
4054: }
4055: count++;
4056: if (count > 50) {
4057: GROW;
4058: count = 0;
4059: }
4060: COPY_BUF(l,buf,len,cur);
4061: NEXTL(l);
4062: cur = CUR_CHAR(l);
4063: if (cur == 0) {
4064: GROW;
4065: SHRINK;
4066: cur = CUR_CHAR(l);
4067: }
4068: }
4069: buf[len] = 0;
4070: ctxt->instate = (xmlParserInputState) state;
4071: if (!IS_CHAR(cur)) {
4072: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4073: } else {
4074: NEXT;
4075: }
4076: return(buf);
4077: }
4078:
4079: /**
4080: * xmlParsePubidLiteral:
4081: * @ctxt: an XML parser context
4082: *
4083: * parse an XML public literal
4084: *
4085: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4086: *
4087: * Returns the PubidLiteral parsed or NULL.
4088: */
4089:
4090: xmlChar *
4091: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092: xmlChar *buf = NULL;
4093: int len = 0;
4094: int size = XML_PARSER_BUFFER_SIZE;
4095: xmlChar cur;
4096: xmlChar stop;
4097: int count = 0;
4098: xmlParserInputState oldstate = ctxt->instate;
4099:
4100: SHRINK;
4101: if (RAW == '"') {
4102: NEXT;
4103: stop = '"';
4104: } else if (RAW == '\'') {
4105: NEXT;
4106: stop = '\'';
4107: } else {
4108: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4109: return(NULL);
4110: }
4111: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4112: if (buf == NULL) {
4113: xmlErrMemory(ctxt, NULL);
4114: return(NULL);
4115: }
4116: ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4117: cur = CUR;
4118: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4119: if (len + 1 >= size) {
4120: xmlChar *tmp;
4121:
4122: size *= 2;
4123: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4124: if (tmp == NULL) {
4125: xmlErrMemory(ctxt, NULL);
4126: xmlFree(buf);
4127: return(NULL);
4128: }
4129: buf = tmp;
4130: }
4131: buf[len++] = cur;
4132: count++;
4133: if (count > 50) {
4134: GROW;
4135: count = 0;
4136: }
4137: NEXT;
4138: cur = CUR;
4139: if (cur == 0) {
4140: GROW;
4141: SHRINK;
4142: cur = CUR;
4143: }
4144: }
4145: buf[len] = 0;
4146: if (cur != stop) {
4147: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4148: } else {
4149: NEXT;
4150: }
4151: ctxt->instate = oldstate;
4152: return(buf);
4153: }
4154:
4155: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4156:
4157: /*
4158: * used for the test in the inner loop of the char data testing
4159: */
4160: static const unsigned char test_char_data[256] = {
4161: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4193: };
4194:
4195: /**
4196: * xmlParseCharData:
4197: * @ctxt: an XML parser context
4198: * @cdata: int indicating whether we are within a CDATA section
4199: *
4200: * parse a CharData section.
4201: * if we are within a CDATA section ']]>' marks an end of section.
4202: *
4203: * The right angle bracket (>) may be represented using the string ">",
4204: * and must, for compatibility, be escaped using ">" or a character
4205: * reference when it appears in the string "]]>" in content, when that
4206: * string is not marking the end of a CDATA section.
4207: *
4208: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4209: */
4210:
4211: void
4212: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4213: const xmlChar *in;
4214: int nbchar = 0;
4215: int line = ctxt->input->line;
4216: int col = ctxt->input->col;
4217: int ccol;
4218:
4219: SHRINK;
4220: GROW;
4221: /*
4222: * Accelerated common case where input don't need to be
4223: * modified before passing it to the handler.
4224: */
4225: if (!cdata) {
4226: in = ctxt->input->cur;
4227: do {
4228: get_more_space:
4229: while (*in == 0x20) { in++; ctxt->input->col++; }
4230: if (*in == 0xA) {
4231: do {
4232: ctxt->input->line++; ctxt->input->col = 1;
4233: in++;
4234: } while (*in == 0xA);
4235: goto get_more_space;
4236: }
4237: if (*in == '<') {
4238: nbchar = in - ctxt->input->cur;
4239: if (nbchar > 0) {
4240: const xmlChar *tmp = ctxt->input->cur;
4241: ctxt->input->cur = in;
4242:
4243: if ((ctxt->sax != NULL) &&
4244: (ctxt->sax->ignorableWhitespace !=
4245: ctxt->sax->characters)) {
4246: if (areBlanks(ctxt, tmp, nbchar, 1)) {
4247: if (ctxt->sax->ignorableWhitespace != NULL)
4248: ctxt->sax->ignorableWhitespace(ctxt->userData,
4249: tmp, nbchar);
4250: } else {
4251: if (ctxt->sax->characters != NULL)
4252: ctxt->sax->characters(ctxt->userData,
4253: tmp, nbchar);
4254: if (*ctxt->space == -1)
4255: *ctxt->space = -2;
4256: }
4257: } else if ((ctxt->sax != NULL) &&
4258: (ctxt->sax->characters != NULL)) {
4259: ctxt->sax->characters(ctxt->userData,
4260: tmp, nbchar);
4261: }
4262: }
4263: return;
4264: }
4265:
4266: get_more:
4267: ccol = ctxt->input->col;
4268: while (test_char_data[*in]) {
4269: in++;
4270: ccol++;
4271: }
4272: ctxt->input->col = ccol;
4273: if (*in == 0xA) {
4274: do {
4275: ctxt->input->line++; ctxt->input->col = 1;
4276: in++;
4277: } while (*in == 0xA);
4278: goto get_more;
4279: }
4280: if (*in == ']') {
4281: if ((in[1] == ']') && (in[2] == '>')) {
4282: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4283: ctxt->input->cur = in;
4284: return;
4285: }
4286: in++;
4287: ctxt->input->col++;
4288: goto get_more;
4289: }
4290: nbchar = in - ctxt->input->cur;
4291: if (nbchar > 0) {
4292: if ((ctxt->sax != NULL) &&
4293: (ctxt->sax->ignorableWhitespace !=
4294: ctxt->sax->characters) &&
4295: (IS_BLANK_CH(*ctxt->input->cur))) {
4296: const xmlChar *tmp = ctxt->input->cur;
4297: ctxt->input->cur = in;
4298:
4299: if (areBlanks(ctxt, tmp, nbchar, 0)) {
4300: if (ctxt->sax->ignorableWhitespace != NULL)
4301: ctxt->sax->ignorableWhitespace(ctxt->userData,
4302: tmp, nbchar);
4303: } else {
4304: if (ctxt->sax->characters != NULL)
4305: ctxt->sax->characters(ctxt->userData,
4306: tmp, nbchar);
4307: if (*ctxt->space == -1)
4308: *ctxt->space = -2;
4309: }
4310: line = ctxt->input->line;
4311: col = ctxt->input->col;
4312: } else if (ctxt->sax != NULL) {
4313: if (ctxt->sax->characters != NULL)
4314: ctxt->sax->characters(ctxt->userData,
4315: ctxt->input->cur, nbchar);
4316: line = ctxt->input->line;
4317: col = ctxt->input->col;
4318: }
4319: /* something really bad happened in the SAX callback */
4320: if (ctxt->instate != XML_PARSER_CONTENT)
4321: return;
4322: }
4323: ctxt->input->cur = in;
4324: if (*in == 0xD) {
4325: in++;
4326: if (*in == 0xA) {
4327: ctxt->input->cur = in;
4328: in++;
4329: ctxt->input->line++; ctxt->input->col = 1;
4330: continue; /* while */
4331: }
4332: in--;
4333: }
4334: if (*in == '<') {
4335: return;
4336: }
4337: if (*in == '&') {
4338: return;
4339: }
4340: SHRINK;
4341: GROW;
4342: in = ctxt->input->cur;
4343: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4344: nbchar = 0;
4345: }
4346: ctxt->input->line = line;
4347: ctxt->input->col = col;
4348: xmlParseCharDataComplex(ctxt, cdata);
4349: }
4350:
4351: /**
4352: * xmlParseCharDataComplex:
4353: * @ctxt: an XML parser context
4354: * @cdata: int indicating whether we are within a CDATA section
4355: *
4356: * parse a CharData section.this is the fallback function
4357: * of xmlParseCharData() when the parsing requires handling
4358: * of non-ASCII characters.
4359: */
4360: static void
4361: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4362: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4363: int nbchar = 0;
4364: int cur, l;
4365: int count = 0;
4366:
4367: SHRINK;
4368: GROW;
4369: cur = CUR_CHAR(l);
4370: while ((cur != '<') && /* checked */
4371: (cur != '&') &&
4372: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4373: if ((cur == ']') && (NXT(1) == ']') &&
4374: (NXT(2) == '>')) {
4375: if (cdata) break;
4376: else {
4377: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4378: }
4379: }
4380: COPY_BUF(l,buf,nbchar,cur);
4381: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4382: buf[nbchar] = 0;
4383:
4384: /*
4385: * OK the segment is to be consumed as chars.
4386: */
4387: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4388: if (areBlanks(ctxt, buf, nbchar, 0)) {
4389: if (ctxt->sax->ignorableWhitespace != NULL)
4390: ctxt->sax->ignorableWhitespace(ctxt->userData,
4391: buf, nbchar);
4392: } else {
4393: if (ctxt->sax->characters != NULL)
4394: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4395: if ((ctxt->sax->characters !=
4396: ctxt->sax->ignorableWhitespace) &&
4397: (*ctxt->space == -1))
4398: *ctxt->space = -2;
4399: }
4400: }
4401: nbchar = 0;
4402: /* something really bad happened in the SAX callback */
4403: if (ctxt->instate != XML_PARSER_CONTENT)
4404: return;
4405: }
4406: count++;
4407: if (count > 50) {
4408: GROW;
4409: count = 0;
4410: }
4411: NEXTL(l);
4412: cur = CUR_CHAR(l);
4413: }
4414: if (nbchar != 0) {
4415: buf[nbchar] = 0;
4416: /*
4417: * OK the segment is to be consumed as chars.
4418: */
4419: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4420: if (areBlanks(ctxt, buf, nbchar, 0)) {
4421: if (ctxt->sax->ignorableWhitespace != NULL)
4422: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4423: } else {
4424: if (ctxt->sax->characters != NULL)
4425: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4426: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427: (*ctxt->space == -1))
4428: *ctxt->space = -2;
4429: }
4430: }
4431: }
4432: if ((cur != 0) && (!IS_CHAR(cur))) {
4433: /* Generate the error and skip the offending character */
4434: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435: "PCDATA invalid Char value %d\n",
4436: cur);
4437: NEXTL(l);
4438: }
4439: }
4440:
4441: /**
4442: * xmlParseExternalID:
4443: * @ctxt: an XML parser context
4444: * @publicID: a xmlChar** receiving PubidLiteral
4445: * @strict: indicate whether we should restrict parsing to only
4446: * production [75], see NOTE below
4447: *
4448: * Parse an External ID or a Public ID
4449: *
4450: * NOTE: Productions [75] and [83] interact badly since [75] can generate
4451: * 'PUBLIC' S PubidLiteral S SystemLiteral
4452: *
4453: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454: * | 'PUBLIC' S PubidLiteral S SystemLiteral
4455: *
4456: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4457: *
4458: * Returns the function returns SystemLiteral and in the second
4459: * case publicID receives PubidLiteral, is strict is off
4460: * it is possible to return NULL and have publicID set.
4461: */
4462:
4463: xmlChar *
4464: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465: xmlChar *URI = NULL;
4466:
4467: SHRINK;
4468:
4469: *publicID = NULL;
4470: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4471: SKIP(6);
4472: if (!IS_BLANK_CH(CUR)) {
4473: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474: "Space required after 'SYSTEM'\n");
4475: }
4476: SKIP_BLANKS;
4477: URI = xmlParseSystemLiteral(ctxt);
4478: if (URI == NULL) {
4479: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4480: }
4481: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4482: SKIP(6);
4483: if (!IS_BLANK_CH(CUR)) {
4484: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4485: "Space required after 'PUBLIC'\n");
4486: }
4487: SKIP_BLANKS;
4488: *publicID = xmlParsePubidLiteral(ctxt);
4489: if (*publicID == NULL) {
4490: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4491: }
4492: if (strict) {
4493: /*
4494: * We don't handle [83] so "S SystemLiteral" is required.
4495: */
4496: if (!IS_BLANK_CH(CUR)) {
4497: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498: "Space required after the Public Identifier\n");
4499: }
4500: } else {
4501: /*
4502: * We handle [83] so we return immediately, if
4503: * "S SystemLiteral" is not detected. From a purely parsing
4504: * point of view that's a nice mess.
4505: */
4506: const xmlChar *ptr;
4507: GROW;
4508:
4509: ptr = CUR_PTR;
4510: if (!IS_BLANK_CH(*ptr)) return(NULL);
4511:
4512: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4513: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4514: }
4515: SKIP_BLANKS;
4516: URI = xmlParseSystemLiteral(ctxt);
4517: if (URI == NULL) {
4518: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4519: }
4520: }
4521: return(URI);
4522: }
4523:
4524: /**
4525: * xmlParseCommentComplex:
4526: * @ctxt: an XML parser context
4527: * @buf: the already parsed part of the buffer
4528: * @len: number of bytes filles in the buffer
4529: * @size: allocated size of the buffer
4530: *
4531: * Skip an XML (SGML) comment <!-- .... -->
4532: * The spec says that "For compatibility, the string "--" (double-hyphen)
4533: * must not occur within comments. "
4534: * This is the slow routine in case the accelerator for ascii didn't work
4535: *
4536: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4537: */
4538: static void
4539: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4540: int q, ql;
4541: int r, rl;
4542: int cur, l;
4543: int count = 0;
4544: int inputid;
4545:
4546: inputid = ctxt->input->id;
4547:
4548: if (buf == NULL) {
4549: len = 0;
4550: size = XML_PARSER_BUFFER_SIZE;
4551: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4552: if (buf == NULL) {
4553: xmlErrMemory(ctxt, NULL);
4554: return;
4555: }
4556: }
4557: GROW; /* Assure there's enough input data */
4558: q = CUR_CHAR(ql);
4559: if (q == 0)
4560: goto not_terminated;
4561: if (!IS_CHAR(q)) {
4562: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563: "xmlParseComment: invalid xmlChar value %d\n",
4564: q);
4565: xmlFree (buf);
4566: return;
4567: }
4568: NEXTL(ql);
4569: r = CUR_CHAR(rl);
4570: if (r == 0)
4571: goto not_terminated;
4572: if (!IS_CHAR(r)) {
4573: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574: "xmlParseComment: invalid xmlChar value %d\n",
4575: q);
4576: xmlFree (buf);
4577: return;
4578: }
4579: NEXTL(rl);
4580: cur = CUR_CHAR(l);
4581: if (cur == 0)
4582: goto not_terminated;
4583: while (IS_CHAR(cur) && /* checked */
4584: ((cur != '>') ||
4585: (r != '-') || (q != '-'))) {
4586: if ((r == '-') && (q == '-')) {
4587: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4588: }
4589: if (len + 5 >= size) {
4590: xmlChar *new_buf;
4591: size *= 2;
4592: new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593: if (new_buf == NULL) {
4594: xmlFree (buf);
4595: xmlErrMemory(ctxt, NULL);
4596: return;
4597: }
4598: buf = new_buf;
4599: }
4600: COPY_BUF(ql,buf,len,q);
4601: q = r;
4602: ql = rl;
4603: r = cur;
4604: rl = l;
4605:
4606: count++;
4607: if (count > 50) {
4608: GROW;
4609: count = 0;
4610: }
4611: NEXTL(l);
4612: cur = CUR_CHAR(l);
4613: if (cur == 0) {
4614: SHRINK;
4615: GROW;
4616: cur = CUR_CHAR(l);
4617: }
4618: }
4619: buf[len] = 0;
4620: if (cur == 0) {
4621: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4622: "Comment not terminated \n<!--%.50s\n", buf);
4623: } else if (!IS_CHAR(cur)) {
4624: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625: "xmlParseComment: invalid xmlChar value %d\n",
4626: cur);
4627: } else {
4628: if (inputid != ctxt->input->id) {
4629: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630: "Comment doesn't start and stop in the same entity\n");
4631: }
4632: NEXT;
4633: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634: (!ctxt->disableSAX))
4635: ctxt->sax->comment(ctxt->userData, buf);
4636: }
4637: xmlFree(buf);
4638: return;
4639: not_terminated:
4640: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641: "Comment not terminated\n", NULL);
4642: xmlFree(buf);
4643: return;
4644: }
4645:
4646: /**
4647: * xmlParseComment:
4648: * @ctxt: an XML parser context
4649: *
4650: * Skip an XML (SGML) comment <!-- .... -->
4651: * The spec says that "For compatibility, the string "--" (double-hyphen)
4652: * must not occur within comments. "
4653: *
4654: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4655: */
4656: void
4657: xmlParseComment(xmlParserCtxtPtr ctxt) {
4658: xmlChar *buf = NULL;
4659: int size = XML_PARSER_BUFFER_SIZE;
4660: int len = 0;
4661: xmlParserInputState state;
4662: const xmlChar *in;
4663: int nbchar = 0, ccol;
4664: int inputid;
4665:
4666: /*
4667: * Check that there is a comment right here.
4668: */
4669: if ((RAW != '<') || (NXT(1) != '!') ||
4670: (NXT(2) != '-') || (NXT(3) != '-')) return;
4671: state = ctxt->instate;
4672: ctxt->instate = XML_PARSER_COMMENT;
4673: inputid = ctxt->input->id;
4674: SKIP(4);
4675: SHRINK;
4676: GROW;
4677:
4678: /*
4679: * Accelerated common case where input don't need to be
4680: * modified before passing it to the handler.
4681: */
4682: in = ctxt->input->cur;
4683: do {
4684: if (*in == 0xA) {
4685: do {
4686: ctxt->input->line++; ctxt->input->col = 1;
4687: in++;
4688: } while (*in == 0xA);
4689: }
4690: get_more:
4691: ccol = ctxt->input->col;
4692: while (((*in > '-') && (*in <= 0x7F)) ||
4693: ((*in >= 0x20) && (*in < '-')) ||
4694: (*in == 0x09)) {
4695: in++;
4696: ccol++;
4697: }
4698: ctxt->input->col = ccol;
4699: if (*in == 0xA) {
4700: do {
4701: ctxt->input->line++; ctxt->input->col = 1;
4702: in++;
4703: } while (*in == 0xA);
4704: goto get_more;
4705: }
4706: nbchar = in - ctxt->input->cur;
4707: /*
4708: * save current set of data
4709: */
4710: if (nbchar > 0) {
4711: if ((ctxt->sax != NULL) &&
4712: (ctxt->sax->comment != NULL)) {
4713: if (buf == NULL) {
4714: if ((*in == '-') && (in[1] == '-'))
4715: size = nbchar + 1;
4716: else
4717: size = XML_PARSER_BUFFER_SIZE + nbchar;
4718: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4719: if (buf == NULL) {
4720: xmlErrMemory(ctxt, NULL);
4721: ctxt->instate = state;
4722: return;
4723: }
4724: len = 0;
4725: } else if (len + nbchar + 1 >= size) {
4726: xmlChar *new_buf;
4727: size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728: new_buf = (xmlChar *) xmlRealloc(buf,
4729: size * sizeof(xmlChar));
4730: if (new_buf == NULL) {
4731: xmlFree (buf);
4732: xmlErrMemory(ctxt, NULL);
4733: ctxt->instate = state;
4734: return;
4735: }
4736: buf = new_buf;
4737: }
4738: memcpy(&buf[len], ctxt->input->cur, nbchar);
4739: len += nbchar;
4740: buf[len] = 0;
4741: }
4742: }
4743: ctxt->input->cur = in;
4744: if (*in == 0xA) {
4745: in++;
4746: ctxt->input->line++; ctxt->input->col = 1;
4747: }
4748: if (*in == 0xD) {
4749: in++;
4750: if (*in == 0xA) {
4751: ctxt->input->cur = in;
4752: in++;
4753: ctxt->input->line++; ctxt->input->col = 1;
4754: continue; /* while */
4755: }
4756: in--;
4757: }
4758: SHRINK;
4759: GROW;
4760: in = ctxt->input->cur;
4761: if (*in == '-') {
4762: if (in[1] == '-') {
4763: if (in[2] == '>') {
4764: if (ctxt->input->id != inputid) {
4765: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766: "comment doesn't start and stop in the same entity\n");
4767: }
4768: SKIP(3);
4769: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770: (!ctxt->disableSAX)) {
4771: if (buf != NULL)
4772: ctxt->sax->comment(ctxt->userData, buf);
4773: else
4774: ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4775: }
4776: if (buf != NULL)
4777: xmlFree(buf);
4778: ctxt->instate = state;
4779: return;
4780: }
1.1.1.2 ! misho 4781: if (buf != NULL) {
! 4782: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
! 4783: "Double hyphen within comment: "
! 4784: "<!--%.50s\n",
1.1 misho 4785: buf);
1.1.1.2 ! misho 4786: } else
! 4787: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
! 4788: "Double hyphen within comment\n", NULL);
1.1 misho 4789: in++;
4790: ctxt->input->col++;
4791: }
4792: in++;
4793: ctxt->input->col++;
4794: goto get_more;
4795: }
4796: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4797: xmlParseCommentComplex(ctxt, buf, len, size);
4798: ctxt->instate = state;
4799: return;
4800: }
4801:
4802:
4803: /**
4804: * xmlParsePITarget:
4805: * @ctxt: an XML parser context
4806: *
4807: * parse the name of a PI
4808: *
4809: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4810: *
4811: * Returns the PITarget name or NULL
4812: */
4813:
4814: const xmlChar *
4815: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4816: const xmlChar *name;
4817:
4818: name = xmlParseName(ctxt);
4819: if ((name != NULL) &&
4820: ((name[0] == 'x') || (name[0] == 'X')) &&
4821: ((name[1] == 'm') || (name[1] == 'M')) &&
4822: ((name[2] == 'l') || (name[2] == 'L'))) {
4823: int i;
4824: if ((name[0] == 'x') && (name[1] == 'm') &&
4825: (name[2] == 'l') && (name[3] == 0)) {
4826: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4827: "XML declaration allowed only at the start of the document\n");
4828: return(name);
4829: } else if (name[3] == 0) {
4830: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4831: return(name);
4832: }
4833: for (i = 0;;i++) {
4834: if (xmlW3CPIs[i] == NULL) break;
4835: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4836: return(name);
4837: }
4838: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4839: "xmlParsePITarget: invalid name prefix 'xml'\n",
4840: NULL, NULL);
4841: }
4842: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4843: xmlNsErr(ctxt, XML_NS_ERR_COLON,
4844: "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4845: }
4846: return(name);
4847: }
4848:
4849: #ifdef LIBXML_CATALOG_ENABLED
4850: /**
4851: * xmlParseCatalogPI:
4852: * @ctxt: an XML parser context
4853: * @catalog: the PI value string
4854: *
4855: * parse an XML Catalog Processing Instruction.
4856: *
4857: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4858: *
4859: * Occurs only if allowed by the user and if happening in the Misc
4860: * part of the document before any doctype informations
4861: * This will add the given catalog to the parsing context in order
4862: * to be used if there is a resolution need further down in the document
4863: */
4864:
4865: static void
4866: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4867: xmlChar *URL = NULL;
4868: const xmlChar *tmp, *base;
4869: xmlChar marker;
4870:
4871: tmp = catalog;
4872: while (IS_BLANK_CH(*tmp)) tmp++;
4873: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4874: goto error;
4875: tmp += 7;
4876: while (IS_BLANK_CH(*tmp)) tmp++;
4877: if (*tmp != '=') {
4878: return;
4879: }
4880: tmp++;
4881: while (IS_BLANK_CH(*tmp)) tmp++;
4882: marker = *tmp;
4883: if ((marker != '\'') && (marker != '"'))
4884: goto error;
4885: tmp++;
4886: base = tmp;
4887: while ((*tmp != 0) && (*tmp != marker)) tmp++;
4888: if (*tmp == 0)
4889: goto error;
4890: URL = xmlStrndup(base, tmp - base);
4891: tmp++;
4892: while (IS_BLANK_CH(*tmp)) tmp++;
4893: if (*tmp != 0)
4894: goto error;
4895:
4896: if (URL != NULL) {
4897: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4898: xmlFree(URL);
4899: }
4900: return;
4901:
4902: error:
4903: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4904: "Catalog PI syntax error: %s\n",
4905: catalog, NULL);
4906: if (URL != NULL)
4907: xmlFree(URL);
4908: }
4909: #endif
4910:
4911: /**
4912: * xmlParsePI:
4913: * @ctxt: an XML parser context
4914: *
4915: * parse an XML Processing Instruction.
4916: *
4917: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4918: *
4919: * The processing is transfered to SAX once parsed.
4920: */
4921:
4922: void
4923: xmlParsePI(xmlParserCtxtPtr ctxt) {
4924: xmlChar *buf = NULL;
4925: int len = 0;
4926: int size = XML_PARSER_BUFFER_SIZE;
4927: int cur, l;
4928: const xmlChar *target;
4929: xmlParserInputState state;
4930: int count = 0;
4931:
4932: if ((RAW == '<') && (NXT(1) == '?')) {
4933: xmlParserInputPtr input = ctxt->input;
4934: state = ctxt->instate;
4935: ctxt->instate = XML_PARSER_PI;
4936: /*
4937: * this is a Processing Instruction.
4938: */
4939: SKIP(2);
4940: SHRINK;
4941:
4942: /*
4943: * Parse the target name and check for special support like
4944: * namespace.
4945: */
4946: target = xmlParsePITarget(ctxt);
4947: if (target != NULL) {
4948: if ((RAW == '?') && (NXT(1) == '>')) {
4949: if (input != ctxt->input) {
4950: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4951: "PI declaration doesn't start and stop in the same entity\n");
4952: }
4953: SKIP(2);
4954:
4955: /*
4956: * SAX: PI detected.
4957: */
4958: if ((ctxt->sax) && (!ctxt->disableSAX) &&
4959: (ctxt->sax->processingInstruction != NULL))
4960: ctxt->sax->processingInstruction(ctxt->userData,
4961: target, NULL);
1.1.1.2 ! misho 4962: if (ctxt->instate != XML_PARSER_EOF)
! 4963: ctxt->instate = state;
1.1 misho 4964: return;
4965: }
4966: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4967: if (buf == NULL) {
4968: xmlErrMemory(ctxt, NULL);
4969: ctxt->instate = state;
4970: return;
4971: }
4972: cur = CUR;
4973: if (!IS_BLANK(cur)) {
4974: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4975: "ParsePI: PI %s space expected\n", target);
4976: }
4977: SKIP_BLANKS;
4978: cur = CUR_CHAR(l);
4979: while (IS_CHAR(cur) && /* checked */
4980: ((cur != '?') || (NXT(1) != '>'))) {
4981: if (len + 5 >= size) {
4982: xmlChar *tmp;
4983:
4984: size *= 2;
4985: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4986: if (tmp == NULL) {
4987: xmlErrMemory(ctxt, NULL);
4988: xmlFree(buf);
4989: ctxt->instate = state;
4990: return;
4991: }
4992: buf = tmp;
4993: }
4994: count++;
4995: if (count > 50) {
4996: GROW;
4997: count = 0;
4998: }
4999: COPY_BUF(l,buf,len,cur);
5000: NEXTL(l);
5001: cur = CUR_CHAR(l);
5002: if (cur == 0) {
5003: SHRINK;
5004: GROW;
5005: cur = CUR_CHAR(l);
5006: }
5007: }
5008: buf[len] = 0;
5009: if (cur != '?') {
5010: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5011: "ParsePI: PI %s never end ...\n", target);
5012: } else {
5013: if (input != ctxt->input) {
5014: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5015: "PI declaration doesn't start and stop in the same entity\n");
5016: }
5017: SKIP(2);
5018:
5019: #ifdef LIBXML_CATALOG_ENABLED
5020: if (((state == XML_PARSER_MISC) ||
5021: (state == XML_PARSER_START)) &&
5022: (xmlStrEqual(target, XML_CATALOG_PI))) {
5023: xmlCatalogAllow allow = xmlCatalogGetDefaults();
5024: if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5025: (allow == XML_CATA_ALLOW_ALL))
5026: xmlParseCatalogPI(ctxt, buf);
5027: }
5028: #endif
5029:
5030:
5031: /*
5032: * SAX: PI detected.
5033: */
5034: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5035: (ctxt->sax->processingInstruction != NULL))
5036: ctxt->sax->processingInstruction(ctxt->userData,
5037: target, buf);
5038: }
5039: xmlFree(buf);
5040: } else {
5041: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5042: }
1.1.1.2 ! misho 5043: if (ctxt->instate != XML_PARSER_EOF)
! 5044: ctxt->instate = state;
1.1 misho 5045: }
5046: }
5047:
5048: /**
5049: * xmlParseNotationDecl:
5050: * @ctxt: an XML parser context
5051: *
5052: * parse a notation declaration
5053: *
5054: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5055: *
5056: * Hence there is actually 3 choices:
5057: * 'PUBLIC' S PubidLiteral
5058: * 'PUBLIC' S PubidLiteral S SystemLiteral
5059: * and 'SYSTEM' S SystemLiteral
5060: *
5061: * See the NOTE on xmlParseExternalID().
5062: */
5063:
5064: void
5065: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5066: const xmlChar *name;
5067: xmlChar *Pubid;
5068: xmlChar *Systemid;
5069:
5070: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5071: xmlParserInputPtr input = ctxt->input;
5072: SHRINK;
5073: SKIP(10);
5074: if (!IS_BLANK_CH(CUR)) {
5075: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076: "Space required after '<!NOTATION'\n");
5077: return;
5078: }
5079: SKIP_BLANKS;
5080:
5081: name = xmlParseName(ctxt);
5082: if (name == NULL) {
5083: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5084: return;
5085: }
5086: if (!IS_BLANK_CH(CUR)) {
5087: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5088: "Space required after the NOTATION name'\n");
5089: return;
5090: }
5091: if (xmlStrchr(name, ':') != NULL) {
5092: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5093: "colon are forbidden from notation names '%s'\n",
5094: name, NULL, NULL);
5095: }
5096: SKIP_BLANKS;
5097:
5098: /*
5099: * Parse the IDs.
5100: */
5101: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5102: SKIP_BLANKS;
5103:
5104: if (RAW == '>') {
5105: if (input != ctxt->input) {
5106: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5107: "Notation declaration doesn't start and stop in the same entity\n");
5108: }
5109: NEXT;
5110: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5111: (ctxt->sax->notationDecl != NULL))
5112: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5113: } else {
5114: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5115: }
5116: if (Systemid != NULL) xmlFree(Systemid);
5117: if (Pubid != NULL) xmlFree(Pubid);
5118: }
5119: }
5120:
5121: /**
5122: * xmlParseEntityDecl:
5123: * @ctxt: an XML parser context
5124: *
5125: * parse <!ENTITY declarations
5126: *
5127: * [70] EntityDecl ::= GEDecl | PEDecl
5128: *
5129: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5130: *
5131: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5132: *
5133: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5134: *
5135: * [74] PEDef ::= EntityValue | ExternalID
5136: *
5137: * [76] NDataDecl ::= S 'NDATA' S Name
5138: *
5139: * [ VC: Notation Declared ]
5140: * The Name must match the declared name of a notation.
5141: */
5142:
5143: void
5144: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5145: const xmlChar *name = NULL;
5146: xmlChar *value = NULL;
5147: xmlChar *URI = NULL, *literal = NULL;
5148: const xmlChar *ndata = NULL;
5149: int isParameter = 0;
5150: xmlChar *orig = NULL;
5151: int skipped;
5152:
5153: /* GROW; done in the caller */
5154: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5155: xmlParserInputPtr input = ctxt->input;
5156: SHRINK;
5157: SKIP(8);
5158: skipped = SKIP_BLANKS;
5159: if (skipped == 0) {
5160: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5161: "Space required after '<!ENTITY'\n");
5162: }
5163:
5164: if (RAW == '%') {
5165: NEXT;
5166: skipped = SKIP_BLANKS;
5167: if (skipped == 0) {
5168: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5169: "Space required after '%'\n");
5170: }
5171: isParameter = 1;
5172: }
5173:
5174: name = xmlParseName(ctxt);
5175: if (name == NULL) {
5176: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5177: "xmlParseEntityDecl: no name\n");
5178: return;
5179: }
5180: if (xmlStrchr(name, ':') != NULL) {
5181: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5182: "colon are forbidden from entities names '%s'\n",
5183: name, NULL, NULL);
5184: }
5185: skipped = SKIP_BLANKS;
5186: if (skipped == 0) {
5187: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5188: "Space required after the entity name\n");
5189: }
5190:
5191: ctxt->instate = XML_PARSER_ENTITY_DECL;
5192: /*
5193: * handle the various case of definitions...
5194: */
5195: if (isParameter) {
5196: if ((RAW == '"') || (RAW == '\'')) {
5197: value = xmlParseEntityValue(ctxt, &orig);
5198: if (value) {
5199: if ((ctxt->sax != NULL) &&
5200: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5201: ctxt->sax->entityDecl(ctxt->userData, name,
5202: XML_INTERNAL_PARAMETER_ENTITY,
5203: NULL, NULL, value);
5204: }
5205: } else {
5206: URI = xmlParseExternalID(ctxt, &literal, 1);
5207: if ((URI == NULL) && (literal == NULL)) {
5208: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5209: }
5210: if (URI) {
5211: xmlURIPtr uri;
5212:
5213: uri = xmlParseURI((const char *) URI);
5214: if (uri == NULL) {
5215: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5216: "Invalid URI: %s\n", URI);
5217: /*
5218: * This really ought to be a well formedness error
5219: * but the XML Core WG decided otherwise c.f. issue
5220: * E26 of the XML erratas.
5221: */
5222: } else {
5223: if (uri->fragment != NULL) {
5224: /*
5225: * Okay this is foolish to block those but not
5226: * invalid URIs.
5227: */
5228: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5229: } else {
5230: if ((ctxt->sax != NULL) &&
5231: (!ctxt->disableSAX) &&
5232: (ctxt->sax->entityDecl != NULL))
5233: ctxt->sax->entityDecl(ctxt->userData, name,
5234: XML_EXTERNAL_PARAMETER_ENTITY,
5235: literal, URI, NULL);
5236: }
5237: xmlFreeURI(uri);
5238: }
5239: }
5240: }
5241: } else {
5242: if ((RAW == '"') || (RAW == '\'')) {
5243: value = xmlParseEntityValue(ctxt, &orig);
5244: if ((ctxt->sax != NULL) &&
5245: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5246: ctxt->sax->entityDecl(ctxt->userData, name,
5247: XML_INTERNAL_GENERAL_ENTITY,
5248: NULL, NULL, value);
5249: /*
5250: * For expat compatibility in SAX mode.
5251: */
5252: if ((ctxt->myDoc == NULL) ||
5253: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5254: if (ctxt->myDoc == NULL) {
5255: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5256: if (ctxt->myDoc == NULL) {
5257: xmlErrMemory(ctxt, "New Doc failed");
5258: return;
5259: }
5260: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5261: }
5262: if (ctxt->myDoc->intSubset == NULL)
5263: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5264: BAD_CAST "fake", NULL, NULL);
5265:
5266: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5267: NULL, NULL, value);
5268: }
5269: } else {
5270: URI = xmlParseExternalID(ctxt, &literal, 1);
5271: if ((URI == NULL) && (literal == NULL)) {
5272: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5273: }
5274: if (URI) {
5275: xmlURIPtr uri;
5276:
5277: uri = xmlParseURI((const char *)URI);
5278: if (uri == NULL) {
5279: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5280: "Invalid URI: %s\n", URI);
5281: /*
5282: * This really ought to be a well formedness error
5283: * but the XML Core WG decided otherwise c.f. issue
5284: * E26 of the XML erratas.
5285: */
5286: } else {
5287: if (uri->fragment != NULL) {
5288: /*
5289: * Okay this is foolish to block those but not
5290: * invalid URIs.
5291: */
5292: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5293: }
5294: xmlFreeURI(uri);
5295: }
5296: }
5297: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5298: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5299: "Space required before 'NDATA'\n");
5300: }
5301: SKIP_BLANKS;
5302: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5303: SKIP(5);
5304: if (!IS_BLANK_CH(CUR)) {
5305: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5306: "Space required after 'NDATA'\n");
5307: }
5308: SKIP_BLANKS;
5309: ndata = xmlParseName(ctxt);
5310: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5311: (ctxt->sax->unparsedEntityDecl != NULL))
5312: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5313: literal, URI, ndata);
5314: } else {
5315: if ((ctxt->sax != NULL) &&
5316: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5317: ctxt->sax->entityDecl(ctxt->userData, name,
5318: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5319: literal, URI, NULL);
5320: /*
5321: * For expat compatibility in SAX mode.
5322: * assuming the entity repalcement was asked for
5323: */
5324: if ((ctxt->replaceEntities != 0) &&
5325: ((ctxt->myDoc == NULL) ||
5326: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5327: if (ctxt->myDoc == NULL) {
5328: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5329: if (ctxt->myDoc == NULL) {
5330: xmlErrMemory(ctxt, "New Doc failed");
5331: return;
5332: }
5333: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5334: }
5335:
5336: if (ctxt->myDoc->intSubset == NULL)
5337: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5338: BAD_CAST "fake", NULL, NULL);
5339: xmlSAX2EntityDecl(ctxt, name,
5340: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5341: literal, URI, NULL);
5342: }
5343: }
5344: }
5345: }
5346: SKIP_BLANKS;
5347: if (RAW != '>') {
5348: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5349: "xmlParseEntityDecl: entity %s not terminated\n", name);
5350: } else {
5351: if (input != ctxt->input) {
5352: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5353: "Entity declaration doesn't start and stop in the same entity\n");
5354: }
5355: NEXT;
5356: }
5357: if (orig != NULL) {
5358: /*
5359: * Ugly mechanism to save the raw entity value.
5360: */
5361: xmlEntityPtr cur = NULL;
5362:
5363: if (isParameter) {
5364: if ((ctxt->sax != NULL) &&
5365: (ctxt->sax->getParameterEntity != NULL))
5366: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5367: } else {
5368: if ((ctxt->sax != NULL) &&
5369: (ctxt->sax->getEntity != NULL))
5370: cur = ctxt->sax->getEntity(ctxt->userData, name);
5371: if ((cur == NULL) && (ctxt->userData==ctxt)) {
5372: cur = xmlSAX2GetEntity(ctxt, name);
5373: }
5374: }
5375: if (cur != NULL) {
5376: if (cur->orig != NULL)
5377: xmlFree(orig);
5378: else
5379: cur->orig = orig;
5380: } else
5381: xmlFree(orig);
5382: }
5383: if (value != NULL) xmlFree(value);
5384: if (URI != NULL) xmlFree(URI);
5385: if (literal != NULL) xmlFree(literal);
5386: }
5387: }
5388:
5389: /**
5390: * xmlParseDefaultDecl:
5391: * @ctxt: an XML parser context
5392: * @value: Receive a possible fixed default value for the attribute
5393: *
5394: * Parse an attribute default declaration
5395: *
5396: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5397: *
5398: * [ VC: Required Attribute ]
5399: * if the default declaration is the keyword #REQUIRED, then the
5400: * attribute must be specified for all elements of the type in the
5401: * attribute-list declaration.
5402: *
5403: * [ VC: Attribute Default Legal ]
5404: * The declared default value must meet the lexical constraints of
5405: * the declared attribute type c.f. xmlValidateAttributeDecl()
5406: *
5407: * [ VC: Fixed Attribute Default ]
5408: * if an attribute has a default value declared with the #FIXED
5409: * keyword, instances of that attribute must match the default value.
5410: *
5411: * [ WFC: No < in Attribute Values ]
5412: * handled in xmlParseAttValue()
5413: *
5414: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5415: * or XML_ATTRIBUTE_FIXED.
5416: */
5417:
5418: int
5419: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5420: int val;
5421: xmlChar *ret;
5422:
5423: *value = NULL;
5424: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5425: SKIP(9);
5426: return(XML_ATTRIBUTE_REQUIRED);
5427: }
5428: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5429: SKIP(8);
5430: return(XML_ATTRIBUTE_IMPLIED);
5431: }
5432: val = XML_ATTRIBUTE_NONE;
5433: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5434: SKIP(6);
5435: val = XML_ATTRIBUTE_FIXED;
5436: if (!IS_BLANK_CH(CUR)) {
5437: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438: "Space required after '#FIXED'\n");
5439: }
5440: SKIP_BLANKS;
5441: }
5442: ret = xmlParseAttValue(ctxt);
5443: ctxt->instate = XML_PARSER_DTD;
5444: if (ret == NULL) {
5445: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5446: "Attribute default value declaration error\n");
5447: } else
5448: *value = ret;
5449: return(val);
5450: }
5451:
5452: /**
5453: * xmlParseNotationType:
5454: * @ctxt: an XML parser context
5455: *
5456: * parse an Notation attribute type.
5457: *
5458: * Note: the leading 'NOTATION' S part has already being parsed...
5459: *
5460: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5461: *
5462: * [ VC: Notation Attributes ]
5463: * Values of this type must match one of the notation names included
5464: * in the declaration; all notation names in the declaration must be declared.
5465: *
5466: * Returns: the notation attribute tree built while parsing
5467: */
5468:
5469: xmlEnumerationPtr
5470: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5471: const xmlChar *name;
5472: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5473:
5474: if (RAW != '(') {
5475: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5476: return(NULL);
5477: }
5478: SHRINK;
5479: do {
5480: NEXT;
5481: SKIP_BLANKS;
5482: name = xmlParseName(ctxt);
5483: if (name == NULL) {
5484: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5485: "Name expected in NOTATION declaration\n");
5486: xmlFreeEnumeration(ret);
5487: return(NULL);
5488: }
5489: tmp = ret;
5490: while (tmp != NULL) {
5491: if (xmlStrEqual(name, tmp->name)) {
5492: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5493: "standalone: attribute notation value token %s duplicated\n",
5494: name, NULL);
5495: if (!xmlDictOwns(ctxt->dict, name))
5496: xmlFree((xmlChar *) name);
5497: break;
5498: }
5499: tmp = tmp->next;
5500: }
5501: if (tmp == NULL) {
5502: cur = xmlCreateEnumeration(name);
5503: if (cur == NULL) {
5504: xmlFreeEnumeration(ret);
5505: return(NULL);
5506: }
5507: if (last == NULL) ret = last = cur;
5508: else {
5509: last->next = cur;
5510: last = cur;
5511: }
5512: }
5513: SKIP_BLANKS;
5514: } while (RAW == '|');
5515: if (RAW != ')') {
5516: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5517: xmlFreeEnumeration(ret);
5518: return(NULL);
5519: }
5520: NEXT;
5521: return(ret);
5522: }
5523:
5524: /**
5525: * xmlParseEnumerationType:
5526: * @ctxt: an XML parser context
5527: *
5528: * parse an Enumeration attribute type.
5529: *
5530: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5531: *
5532: * [ VC: Enumeration ]
5533: * Values of this type must match one of the Nmtoken tokens in
5534: * the declaration
5535: *
5536: * Returns: the enumeration attribute tree built while parsing
5537: */
5538:
5539: xmlEnumerationPtr
5540: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5541: xmlChar *name;
5542: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5543:
5544: if (RAW != '(') {
5545: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5546: return(NULL);
5547: }
5548: SHRINK;
5549: do {
5550: NEXT;
5551: SKIP_BLANKS;
5552: name = xmlParseNmtoken(ctxt);
5553: if (name == NULL) {
5554: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5555: return(ret);
5556: }
5557: tmp = ret;
5558: while (tmp != NULL) {
5559: if (xmlStrEqual(name, tmp->name)) {
5560: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5561: "standalone: attribute enumeration value token %s duplicated\n",
5562: name, NULL);
5563: if (!xmlDictOwns(ctxt->dict, name))
5564: xmlFree(name);
5565: break;
5566: }
5567: tmp = tmp->next;
5568: }
5569: if (tmp == NULL) {
5570: cur = xmlCreateEnumeration(name);
5571: if (!xmlDictOwns(ctxt->dict, name))
5572: xmlFree(name);
5573: if (cur == NULL) {
5574: xmlFreeEnumeration(ret);
5575: return(NULL);
5576: }
5577: if (last == NULL) ret = last = cur;
5578: else {
5579: last->next = cur;
5580: last = cur;
5581: }
5582: }
5583: SKIP_BLANKS;
5584: } while (RAW == '|');
5585: if (RAW != ')') {
5586: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5587: return(ret);
5588: }
5589: NEXT;
5590: return(ret);
5591: }
5592:
5593: /**
5594: * xmlParseEnumeratedType:
5595: * @ctxt: an XML parser context
5596: * @tree: the enumeration tree built while parsing
5597: *
5598: * parse an Enumerated attribute type.
5599: *
5600: * [57] EnumeratedType ::= NotationType | Enumeration
5601: *
5602: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5603: *
5604: *
5605: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5606: */
5607:
5608: int
5609: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5610: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5611: SKIP(8);
5612: if (!IS_BLANK_CH(CUR)) {
5613: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5614: "Space required after 'NOTATION'\n");
5615: return(0);
5616: }
5617: SKIP_BLANKS;
5618: *tree = xmlParseNotationType(ctxt);
5619: if (*tree == NULL) return(0);
5620: return(XML_ATTRIBUTE_NOTATION);
5621: }
5622: *tree = xmlParseEnumerationType(ctxt);
5623: if (*tree == NULL) return(0);
5624: return(XML_ATTRIBUTE_ENUMERATION);
5625: }
5626:
5627: /**
5628: * xmlParseAttributeType:
5629: * @ctxt: an XML parser context
5630: * @tree: the enumeration tree built while parsing
5631: *
5632: * parse the Attribute list def for an element
5633: *
5634: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5635: *
5636: * [55] StringType ::= 'CDATA'
5637: *
5638: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5639: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5640: *
5641: * Validity constraints for attribute values syntax are checked in
5642: * xmlValidateAttributeValue()
5643: *
5644: * [ VC: ID ]
5645: * Values of type ID must match the Name production. A name must not
5646: * appear more than once in an XML document as a value of this type;
5647: * i.e., ID values must uniquely identify the elements which bear them.
5648: *
5649: * [ VC: One ID per Element Type ]
5650: * No element type may have more than one ID attribute specified.
5651: *
5652: * [ VC: ID Attribute Default ]
5653: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5654: *
5655: * [ VC: IDREF ]
5656: * Values of type IDREF must match the Name production, and values
5657: * of type IDREFS must match Names; each IDREF Name must match the value
5658: * of an ID attribute on some element in the XML document; i.e. IDREF
5659: * values must match the value of some ID attribute.
5660: *
5661: * [ VC: Entity Name ]
5662: * Values of type ENTITY must match the Name production, values
5663: * of type ENTITIES must match Names; each Entity Name must match the
5664: * name of an unparsed entity declared in the DTD.
5665: *
5666: * [ VC: Name Token ]
5667: * Values of type NMTOKEN must match the Nmtoken production; values
5668: * of type NMTOKENS must match Nmtokens.
5669: *
5670: * Returns the attribute type
5671: */
5672: int
5673: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5674: SHRINK;
5675: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5676: SKIP(5);
5677: return(XML_ATTRIBUTE_CDATA);
5678: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5679: SKIP(6);
5680: return(XML_ATTRIBUTE_IDREFS);
5681: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5682: SKIP(5);
5683: return(XML_ATTRIBUTE_IDREF);
5684: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5685: SKIP(2);
5686: return(XML_ATTRIBUTE_ID);
5687: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5688: SKIP(6);
5689: return(XML_ATTRIBUTE_ENTITY);
5690: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5691: SKIP(8);
5692: return(XML_ATTRIBUTE_ENTITIES);
5693: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5694: SKIP(8);
5695: return(XML_ATTRIBUTE_NMTOKENS);
5696: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5697: SKIP(7);
5698: return(XML_ATTRIBUTE_NMTOKEN);
5699: }
5700: return(xmlParseEnumeratedType(ctxt, tree));
5701: }
5702:
5703: /**
5704: * xmlParseAttributeListDecl:
5705: * @ctxt: an XML parser context
5706: *
5707: * : parse the Attribute list def for an element
5708: *
5709: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5710: *
5711: * [53] AttDef ::= S Name S AttType S DefaultDecl
5712: *
5713: */
5714: void
5715: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5716: const xmlChar *elemName;
5717: const xmlChar *attrName;
5718: xmlEnumerationPtr tree;
5719:
5720: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5721: xmlParserInputPtr input = ctxt->input;
5722:
5723: SKIP(9);
5724: if (!IS_BLANK_CH(CUR)) {
5725: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5726: "Space required after '<!ATTLIST'\n");
5727: }
5728: SKIP_BLANKS;
5729: elemName = xmlParseName(ctxt);
5730: if (elemName == NULL) {
5731: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5732: "ATTLIST: no name for Element\n");
5733: return;
5734: }
5735: SKIP_BLANKS;
5736: GROW;
5737: while (RAW != '>') {
5738: const xmlChar *check = CUR_PTR;
5739: int type;
5740: int def;
5741: xmlChar *defaultValue = NULL;
5742:
5743: GROW;
5744: tree = NULL;
5745: attrName = xmlParseName(ctxt);
5746: if (attrName == NULL) {
5747: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5748: "ATTLIST: no name for Attribute\n");
5749: break;
5750: }
5751: GROW;
5752: if (!IS_BLANK_CH(CUR)) {
5753: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5754: "Space required after the attribute name\n");
5755: break;
5756: }
5757: SKIP_BLANKS;
5758:
5759: type = xmlParseAttributeType(ctxt, &tree);
5760: if (type <= 0) {
5761: break;
5762: }
5763:
5764: GROW;
5765: if (!IS_BLANK_CH(CUR)) {
5766: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5767: "Space required after the attribute type\n");
5768: if (tree != NULL)
5769: xmlFreeEnumeration(tree);
5770: break;
5771: }
5772: SKIP_BLANKS;
5773:
5774: def = xmlParseDefaultDecl(ctxt, &defaultValue);
5775: if (def <= 0) {
5776: if (defaultValue != NULL)
5777: xmlFree(defaultValue);
5778: if (tree != NULL)
5779: xmlFreeEnumeration(tree);
5780: break;
5781: }
5782: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5783: xmlAttrNormalizeSpace(defaultValue, defaultValue);
5784:
5785: GROW;
5786: if (RAW != '>') {
5787: if (!IS_BLANK_CH(CUR)) {
5788: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5789: "Space required after the attribute default value\n");
5790: if (defaultValue != NULL)
5791: xmlFree(defaultValue);
5792: if (tree != NULL)
5793: xmlFreeEnumeration(tree);
5794: break;
5795: }
5796: SKIP_BLANKS;
5797: }
5798: if (check == CUR_PTR) {
5799: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5800: "in xmlParseAttributeListDecl\n");
5801: if (defaultValue != NULL)
5802: xmlFree(defaultValue);
5803: if (tree != NULL)
5804: xmlFreeEnumeration(tree);
5805: break;
5806: }
5807: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5808: (ctxt->sax->attributeDecl != NULL))
5809: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5810: type, def, defaultValue, tree);
5811: else if (tree != NULL)
5812: xmlFreeEnumeration(tree);
5813:
5814: if ((ctxt->sax2) && (defaultValue != NULL) &&
5815: (def != XML_ATTRIBUTE_IMPLIED) &&
5816: (def != XML_ATTRIBUTE_REQUIRED)) {
5817: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5818: }
5819: if (ctxt->sax2) {
5820: xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5821: }
5822: if (defaultValue != NULL)
5823: xmlFree(defaultValue);
5824: GROW;
5825: }
5826: if (RAW == '>') {
5827: if (input != ctxt->input) {
5828: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5829: "Attribute list declaration doesn't start and stop in the same entity\n",
5830: NULL, NULL);
5831: }
5832: NEXT;
5833: }
5834: }
5835: }
5836:
5837: /**
5838: * xmlParseElementMixedContentDecl:
5839: * @ctxt: an XML parser context
5840: * @inputchk: the input used for the current entity, needed for boundary checks
5841: *
5842: * parse the declaration for a Mixed Element content
5843: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5844: *
5845: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5846: * '(' S? '#PCDATA' S? ')'
5847: *
5848: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5849: *
5850: * [ VC: No Duplicate Types ]
5851: * The same name must not appear more than once in a single
5852: * mixed-content declaration.
5853: *
5854: * returns: the list of the xmlElementContentPtr describing the element choices
5855: */
5856: xmlElementContentPtr
5857: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5858: xmlElementContentPtr ret = NULL, cur = NULL, n;
5859: const xmlChar *elem = NULL;
5860:
5861: GROW;
5862: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5863: SKIP(7);
5864: SKIP_BLANKS;
5865: SHRINK;
5866: if (RAW == ')') {
5867: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5868: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869: "Element content declaration doesn't start and stop in the same entity\n",
5870: NULL, NULL);
5871: }
5872: NEXT;
5873: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5874: if (ret == NULL)
5875: return(NULL);
5876: if (RAW == '*') {
5877: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5878: NEXT;
5879: }
5880: return(ret);
5881: }
5882: if ((RAW == '(') || (RAW == '|')) {
5883: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5884: if (ret == NULL) return(NULL);
5885: }
5886: while (RAW == '|') {
5887: NEXT;
5888: if (elem == NULL) {
5889: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5890: if (ret == NULL) return(NULL);
5891: ret->c1 = cur;
5892: if (cur != NULL)
5893: cur->parent = ret;
5894: cur = ret;
5895: } else {
5896: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5897: if (n == NULL) return(NULL);
5898: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5899: if (n->c1 != NULL)
5900: n->c1->parent = n;
5901: cur->c2 = n;
5902: if (n != NULL)
5903: n->parent = cur;
5904: cur = n;
5905: }
5906: SKIP_BLANKS;
5907: elem = xmlParseName(ctxt);
5908: if (elem == NULL) {
5909: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5910: "xmlParseElementMixedContentDecl : Name expected\n");
5911: xmlFreeDocElementContent(ctxt->myDoc, cur);
5912: return(NULL);
5913: }
5914: SKIP_BLANKS;
5915: GROW;
5916: }
5917: if ((RAW == ')') && (NXT(1) == '*')) {
5918: if (elem != NULL) {
5919: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5920: XML_ELEMENT_CONTENT_ELEMENT);
5921: if (cur->c2 != NULL)
5922: cur->c2->parent = cur;
5923: }
5924: if (ret != NULL)
5925: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5926: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5927: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5928: "Element content declaration doesn't start and stop in the same entity\n",
5929: NULL, NULL);
5930: }
5931: SKIP(2);
5932: } else {
5933: xmlFreeDocElementContent(ctxt->myDoc, ret);
5934: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5935: return(NULL);
5936: }
5937:
5938: } else {
5939: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5940: }
5941: return(ret);
5942: }
5943:
5944: /**
5945: * xmlParseElementChildrenContentDeclPriv:
5946: * @ctxt: an XML parser context
5947: * @inputchk: the input used for the current entity, needed for boundary checks
5948: * @depth: the level of recursion
5949: *
5950: * parse the declaration for a Mixed Element content
5951: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5952: *
5953: *
5954: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5955: *
5956: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5957: *
5958: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5959: *
5960: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5961: *
5962: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5963: * TODO Parameter-entity replacement text must be properly nested
5964: * with parenthesized groups. That is to say, if either of the
5965: * opening or closing parentheses in a choice, seq, or Mixed
5966: * construct is contained in the replacement text for a parameter
5967: * entity, both must be contained in the same replacement text. For
5968: * interoperability, if a parameter-entity reference appears in a
5969: * choice, seq, or Mixed construct, its replacement text should not
5970: * be empty, and neither the first nor last non-blank character of
5971: * the replacement text should be a connector (| or ,).
5972: *
5973: * Returns the tree of xmlElementContentPtr describing the element
5974: * hierarchy.
5975: */
5976: static xmlElementContentPtr
5977: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5978: int depth) {
5979: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5980: const xmlChar *elem;
5981: xmlChar type = 0;
5982:
5983: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5984: (depth > 2048)) {
5985: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5986: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5987: depth);
5988: return(NULL);
5989: }
5990: SKIP_BLANKS;
5991: GROW;
5992: if (RAW == '(') {
5993: int inputid = ctxt->input->id;
5994:
5995: /* Recurse on first child */
5996: NEXT;
5997: SKIP_BLANKS;
5998: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5999: depth + 1);
6000: SKIP_BLANKS;
6001: GROW;
6002: } else {
6003: elem = xmlParseName(ctxt);
6004: if (elem == NULL) {
6005: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6006: return(NULL);
6007: }
6008: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6009: if (cur == NULL) {
6010: xmlErrMemory(ctxt, NULL);
6011: return(NULL);
6012: }
6013: GROW;
6014: if (RAW == '?') {
6015: cur->ocur = XML_ELEMENT_CONTENT_OPT;
6016: NEXT;
6017: } else if (RAW == '*') {
6018: cur->ocur = XML_ELEMENT_CONTENT_MULT;
6019: NEXT;
6020: } else if (RAW == '+') {
6021: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6022: NEXT;
6023: } else {
6024: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6025: }
6026: GROW;
6027: }
6028: SKIP_BLANKS;
6029: SHRINK;
6030: while (RAW != ')') {
6031: /*
6032: * Each loop we parse one separator and one element.
6033: */
6034: if (RAW == ',') {
6035: if (type == 0) type = CUR;
6036:
6037: /*
6038: * Detect "Name | Name , Name" error
6039: */
6040: else if (type != CUR) {
6041: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6042: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6043: type);
6044: if ((last != NULL) && (last != ret))
6045: xmlFreeDocElementContent(ctxt->myDoc, last);
6046: if (ret != NULL)
6047: xmlFreeDocElementContent(ctxt->myDoc, ret);
6048: return(NULL);
6049: }
6050: NEXT;
6051:
6052: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6053: if (op == NULL) {
6054: if ((last != NULL) && (last != ret))
6055: xmlFreeDocElementContent(ctxt->myDoc, last);
6056: xmlFreeDocElementContent(ctxt->myDoc, ret);
6057: return(NULL);
6058: }
6059: if (last == NULL) {
6060: op->c1 = ret;
6061: if (ret != NULL)
6062: ret->parent = op;
6063: ret = cur = op;
6064: } else {
6065: cur->c2 = op;
6066: if (op != NULL)
6067: op->parent = cur;
6068: op->c1 = last;
6069: if (last != NULL)
6070: last->parent = op;
6071: cur =op;
6072: last = NULL;
6073: }
6074: } else if (RAW == '|') {
6075: if (type == 0) type = CUR;
6076:
6077: /*
6078: * Detect "Name , Name | Name" error
6079: */
6080: else if (type != CUR) {
6081: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6082: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6083: type);
6084: if ((last != NULL) && (last != ret))
6085: xmlFreeDocElementContent(ctxt->myDoc, last);
6086: if (ret != NULL)
6087: xmlFreeDocElementContent(ctxt->myDoc, ret);
6088: return(NULL);
6089: }
6090: NEXT;
6091:
6092: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6093: if (op == NULL) {
6094: if ((last != NULL) && (last != ret))
6095: xmlFreeDocElementContent(ctxt->myDoc, last);
6096: if (ret != NULL)
6097: xmlFreeDocElementContent(ctxt->myDoc, ret);
6098: return(NULL);
6099: }
6100: if (last == NULL) {
6101: op->c1 = ret;
6102: if (ret != NULL)
6103: ret->parent = op;
6104: ret = cur = op;
6105: } else {
6106: cur->c2 = op;
6107: if (op != NULL)
6108: op->parent = cur;
6109: op->c1 = last;
6110: if (last != NULL)
6111: last->parent = op;
6112: cur =op;
6113: last = NULL;
6114: }
6115: } else {
6116: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6117: if ((last != NULL) && (last != ret))
6118: xmlFreeDocElementContent(ctxt->myDoc, last);
6119: if (ret != NULL)
6120: xmlFreeDocElementContent(ctxt->myDoc, ret);
6121: return(NULL);
6122: }
6123: GROW;
6124: SKIP_BLANKS;
6125: GROW;
6126: if (RAW == '(') {
6127: int inputid = ctxt->input->id;
6128: /* Recurse on second child */
6129: NEXT;
6130: SKIP_BLANKS;
6131: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6132: depth + 1);
6133: SKIP_BLANKS;
6134: } else {
6135: elem = xmlParseName(ctxt);
6136: if (elem == NULL) {
6137: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6138: if (ret != NULL)
6139: xmlFreeDocElementContent(ctxt->myDoc, ret);
6140: return(NULL);
6141: }
6142: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6143: if (last == NULL) {
6144: if (ret != NULL)
6145: xmlFreeDocElementContent(ctxt->myDoc, ret);
6146: return(NULL);
6147: }
6148: if (RAW == '?') {
6149: last->ocur = XML_ELEMENT_CONTENT_OPT;
6150: NEXT;
6151: } else if (RAW == '*') {
6152: last->ocur = XML_ELEMENT_CONTENT_MULT;
6153: NEXT;
6154: } else if (RAW == '+') {
6155: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6156: NEXT;
6157: } else {
6158: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6159: }
6160: }
6161: SKIP_BLANKS;
6162: GROW;
6163: }
6164: if ((cur != NULL) && (last != NULL)) {
6165: cur->c2 = last;
6166: if (last != NULL)
6167: last->parent = cur;
6168: }
6169: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6170: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6171: "Element content declaration doesn't start and stop in the same entity\n",
6172: NULL, NULL);
6173: }
6174: NEXT;
6175: if (RAW == '?') {
6176: if (ret != NULL) {
6177: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6178: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6179: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6180: else
6181: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6182: }
6183: NEXT;
6184: } else if (RAW == '*') {
6185: if (ret != NULL) {
6186: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6187: cur = ret;
6188: /*
6189: * Some normalization:
6190: * (a | b* | c?)* == (a | b | c)*
6191: */
6192: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6193: if ((cur->c1 != NULL) &&
6194: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6195: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6196: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6197: if ((cur->c2 != NULL) &&
6198: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6199: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6200: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6201: cur = cur->c2;
6202: }
6203: }
6204: NEXT;
6205: } else if (RAW == '+') {
6206: if (ret != NULL) {
6207: int found = 0;
6208:
6209: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6211: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6212: else
6213: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6214: /*
6215: * Some normalization:
6216: * (a | b*)+ == (a | b)*
6217: * (a | b?)+ == (a | b)*
6218: */
6219: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6220: if ((cur->c1 != NULL) &&
6221: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6223: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6224: found = 1;
6225: }
6226: if ((cur->c2 != NULL) &&
6227: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6228: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6229: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6230: found = 1;
6231: }
6232: cur = cur->c2;
6233: }
6234: if (found)
6235: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6236: }
6237: NEXT;
6238: }
6239: return(ret);
6240: }
6241:
6242: /**
6243: * xmlParseElementChildrenContentDecl:
6244: * @ctxt: an XML parser context
6245: * @inputchk: the input used for the current entity, needed for boundary checks
6246: *
6247: * parse the declaration for a Mixed Element content
6248: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6249: *
6250: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251: *
6252: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253: *
6254: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255: *
6256: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257: *
6258: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259: * TODO Parameter-entity replacement text must be properly nested
6260: * with parenthesized groups. That is to say, if either of the
6261: * opening or closing parentheses in a choice, seq, or Mixed
6262: * construct is contained in the replacement text for a parameter
6263: * entity, both must be contained in the same replacement text. For
6264: * interoperability, if a parameter-entity reference appears in a
6265: * choice, seq, or Mixed construct, its replacement text should not
6266: * be empty, and neither the first nor last non-blank character of
6267: * the replacement text should be a connector (| or ,).
6268: *
6269: * Returns the tree of xmlElementContentPtr describing the element
6270: * hierarchy.
6271: */
6272: xmlElementContentPtr
6273: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6274: /* stub left for API/ABI compat */
6275: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6276: }
6277:
6278: /**
6279: * xmlParseElementContentDecl:
6280: * @ctxt: an XML parser context
6281: * @name: the name of the element being defined.
6282: * @result: the Element Content pointer will be stored here if any
6283: *
6284: * parse the declaration for an Element content either Mixed or Children,
6285: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6286: *
6287: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6288: *
6289: * returns: the type of element content XML_ELEMENT_TYPE_xxx
6290: */
6291:
6292: int
6293: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6294: xmlElementContentPtr *result) {
6295:
6296: xmlElementContentPtr tree = NULL;
6297: int inputid = ctxt->input->id;
6298: int res;
6299:
6300: *result = NULL;
6301:
6302: if (RAW != '(') {
6303: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6304: "xmlParseElementContentDecl : %s '(' expected\n", name);
6305: return(-1);
6306: }
6307: NEXT;
6308: GROW;
6309: SKIP_BLANKS;
6310: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6311: tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6312: res = XML_ELEMENT_TYPE_MIXED;
6313: } else {
6314: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6315: res = XML_ELEMENT_TYPE_ELEMENT;
6316: }
6317: SKIP_BLANKS;
6318: *result = tree;
6319: return(res);
6320: }
6321:
6322: /**
6323: * xmlParseElementDecl:
6324: * @ctxt: an XML parser context
6325: *
6326: * parse an Element declaration.
6327: *
6328: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6329: *
6330: * [ VC: Unique Element Type Declaration ]
6331: * No element type may be declared more than once
6332: *
6333: * Returns the type of the element, or -1 in case of error
6334: */
6335: int
6336: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6337: const xmlChar *name;
6338: int ret = -1;
6339: xmlElementContentPtr content = NULL;
6340:
6341: /* GROW; done in the caller */
6342: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6343: xmlParserInputPtr input = ctxt->input;
6344:
6345: SKIP(9);
6346: if (!IS_BLANK_CH(CUR)) {
6347: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6348: "Space required after 'ELEMENT'\n");
6349: }
6350: SKIP_BLANKS;
6351: name = xmlParseName(ctxt);
6352: if (name == NULL) {
6353: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6354: "xmlParseElementDecl: no name for Element\n");
6355: return(-1);
6356: }
6357: while ((RAW == 0) && (ctxt->inputNr > 1))
6358: xmlPopInput(ctxt);
6359: if (!IS_BLANK_CH(CUR)) {
6360: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6361: "Space required after the element name\n");
6362: }
6363: SKIP_BLANKS;
6364: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6365: SKIP(5);
6366: /*
6367: * Element must always be empty.
6368: */
6369: ret = XML_ELEMENT_TYPE_EMPTY;
6370: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6371: (NXT(2) == 'Y')) {
6372: SKIP(3);
6373: /*
6374: * Element is a generic container.
6375: */
6376: ret = XML_ELEMENT_TYPE_ANY;
6377: } else if (RAW == '(') {
6378: ret = xmlParseElementContentDecl(ctxt, name, &content);
6379: } else {
6380: /*
6381: * [ WFC: PEs in Internal Subset ] error handling.
6382: */
6383: if ((RAW == '%') && (ctxt->external == 0) &&
6384: (ctxt->inputNr == 1)) {
6385: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6386: "PEReference: forbidden within markup decl in internal subset\n");
6387: } else {
6388: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6389: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6390: }
6391: return(-1);
6392: }
6393:
6394: SKIP_BLANKS;
6395: /*
6396: * Pop-up of finished entities.
6397: */
6398: while ((RAW == 0) && (ctxt->inputNr > 1))
6399: xmlPopInput(ctxt);
6400: SKIP_BLANKS;
6401:
6402: if (RAW != '>') {
6403: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6404: if (content != NULL) {
6405: xmlFreeDocElementContent(ctxt->myDoc, content);
6406: }
6407: } else {
6408: if (input != ctxt->input) {
6409: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410: "Element declaration doesn't start and stop in the same entity\n");
6411: }
6412:
6413: NEXT;
6414: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6415: (ctxt->sax->elementDecl != NULL)) {
6416: if (content != NULL)
6417: content->parent = NULL;
6418: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6419: content);
6420: if ((content != NULL) && (content->parent == NULL)) {
6421: /*
6422: * this is a trick: if xmlAddElementDecl is called,
6423: * instead of copying the full tree it is plugged directly
6424: * if called from the parser. Avoid duplicating the
6425: * interfaces or change the API/ABI
6426: */
6427: xmlFreeDocElementContent(ctxt->myDoc, content);
6428: }
6429: } else if (content != NULL) {
6430: xmlFreeDocElementContent(ctxt->myDoc, content);
6431: }
6432: }
6433: }
6434: return(ret);
6435: }
6436:
6437: /**
6438: * xmlParseConditionalSections
6439: * @ctxt: an XML parser context
6440: *
6441: * [61] conditionalSect ::= includeSect | ignoreSect
6442: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6443: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6444: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6445: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6446: */
6447:
6448: static void
6449: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6450: int id = ctxt->input->id;
6451:
6452: SKIP(3);
6453: SKIP_BLANKS;
6454: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6455: SKIP(7);
6456: SKIP_BLANKS;
6457: if (RAW != '[') {
6458: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6459: } else {
6460: if (ctxt->input->id != id) {
6461: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6462: "All markup of the conditional section is not in the same entity\n",
6463: NULL, NULL);
6464: }
6465: NEXT;
6466: }
6467: if (xmlParserDebugEntities) {
6468: if ((ctxt->input != NULL) && (ctxt->input->filename))
6469: xmlGenericError(xmlGenericErrorContext,
6470: "%s(%d): ", ctxt->input->filename,
6471: ctxt->input->line);
6472: xmlGenericError(xmlGenericErrorContext,
6473: "Entering INCLUDE Conditional Section\n");
6474: }
6475:
6476: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6477: (NXT(2) != '>'))) {
6478: const xmlChar *check = CUR_PTR;
6479: unsigned int cons = ctxt->input->consumed;
6480:
6481: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6482: xmlParseConditionalSections(ctxt);
6483: } else if (IS_BLANK_CH(CUR)) {
6484: NEXT;
6485: } else if (RAW == '%') {
6486: xmlParsePEReference(ctxt);
6487: } else
6488: xmlParseMarkupDecl(ctxt);
6489:
6490: /*
6491: * Pop-up of finished entities.
6492: */
6493: while ((RAW == 0) && (ctxt->inputNr > 1))
6494: xmlPopInput(ctxt);
6495:
6496: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6497: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6498: break;
6499: }
6500: }
6501: if (xmlParserDebugEntities) {
6502: if ((ctxt->input != NULL) && (ctxt->input->filename))
6503: xmlGenericError(xmlGenericErrorContext,
6504: "%s(%d): ", ctxt->input->filename,
6505: ctxt->input->line);
6506: xmlGenericError(xmlGenericErrorContext,
6507: "Leaving INCLUDE Conditional Section\n");
6508: }
6509:
6510: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6511: int state;
6512: xmlParserInputState instate;
6513: int depth = 0;
6514:
6515: SKIP(6);
6516: SKIP_BLANKS;
6517: if (RAW != '[') {
6518: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6519: } else {
6520: if (ctxt->input->id != id) {
6521: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6522: "All markup of the conditional section is not in the same entity\n",
6523: NULL, NULL);
6524: }
6525: NEXT;
6526: }
6527: if (xmlParserDebugEntities) {
6528: if ((ctxt->input != NULL) && (ctxt->input->filename))
6529: xmlGenericError(xmlGenericErrorContext,
6530: "%s(%d): ", ctxt->input->filename,
6531: ctxt->input->line);
6532: xmlGenericError(xmlGenericErrorContext,
6533: "Entering IGNORE Conditional Section\n");
6534: }
6535:
6536: /*
6537: * Parse up to the end of the conditional section
6538: * But disable SAX event generating DTD building in the meantime
6539: */
6540: state = ctxt->disableSAX;
6541: instate = ctxt->instate;
6542: if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6543: ctxt->instate = XML_PARSER_IGNORE;
6544:
6545: while ((depth >= 0) && (RAW != 0)) {
6546: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6547: depth++;
6548: SKIP(3);
6549: continue;
6550: }
6551: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6552: if (--depth >= 0) SKIP(3);
6553: continue;
6554: }
6555: NEXT;
6556: continue;
6557: }
6558:
6559: ctxt->disableSAX = state;
6560: ctxt->instate = instate;
6561:
6562: if (xmlParserDebugEntities) {
6563: if ((ctxt->input != NULL) && (ctxt->input->filename))
6564: xmlGenericError(xmlGenericErrorContext,
6565: "%s(%d): ", ctxt->input->filename,
6566: ctxt->input->line);
6567: xmlGenericError(xmlGenericErrorContext,
6568: "Leaving IGNORE Conditional Section\n");
6569: }
6570:
6571: } else {
6572: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6573: }
6574:
6575: if (RAW == 0)
6576: SHRINK;
6577:
6578: if (RAW == 0) {
6579: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6580: } else {
6581: if (ctxt->input->id != id) {
6582: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6583: "All markup of the conditional section is not in the same entity\n",
6584: NULL, NULL);
6585: }
6586: SKIP(3);
6587: }
6588: }
6589:
6590: /**
6591: * xmlParseMarkupDecl:
6592: * @ctxt: an XML parser context
6593: *
6594: * parse Markup declarations
6595: *
6596: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6597: * NotationDecl | PI | Comment
6598: *
6599: * [ VC: Proper Declaration/PE Nesting ]
6600: * Parameter-entity replacement text must be properly nested with
6601: * markup declarations. That is to say, if either the first character
6602: * or the last character of a markup declaration (markupdecl above) is
6603: * contained in the replacement text for a parameter-entity reference,
6604: * both must be contained in the same replacement text.
6605: *
6606: * [ WFC: PEs in Internal Subset ]
6607: * In the internal DTD subset, parameter-entity references can occur
6608: * only where markup declarations can occur, not within markup declarations.
6609: * (This does not apply to references that occur in external parameter
6610: * entities or to the external subset.)
6611: */
6612: void
6613: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6614: GROW;
6615: if (CUR == '<') {
6616: if (NXT(1) == '!') {
6617: switch (NXT(2)) {
6618: case 'E':
6619: if (NXT(3) == 'L')
6620: xmlParseElementDecl(ctxt);
6621: else if (NXT(3) == 'N')
6622: xmlParseEntityDecl(ctxt);
6623: break;
6624: case 'A':
6625: xmlParseAttributeListDecl(ctxt);
6626: break;
6627: case 'N':
6628: xmlParseNotationDecl(ctxt);
6629: break;
6630: case '-':
6631: xmlParseComment(ctxt);
6632: break;
6633: default:
6634: /* there is an error but it will be detected later */
6635: break;
6636: }
6637: } else if (NXT(1) == '?') {
6638: xmlParsePI(ctxt);
6639: }
6640: }
6641: /*
6642: * This is only for internal subset. On external entities,
6643: * the replacement is done before parsing stage
6644: */
6645: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6646: xmlParsePEReference(ctxt);
6647:
6648: /*
6649: * Conditional sections are allowed from entities included
6650: * by PE References in the internal subset.
6651: */
6652: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6653: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6654: xmlParseConditionalSections(ctxt);
6655: }
6656: }
6657:
6658: ctxt->instate = XML_PARSER_DTD;
6659: }
6660:
6661: /**
6662: * xmlParseTextDecl:
6663: * @ctxt: an XML parser context
6664: *
6665: * parse an XML declaration header for external entities
6666: *
6667: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6668: */
6669:
6670: void
6671: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6672: xmlChar *version;
6673: const xmlChar *encoding;
6674:
6675: /*
6676: * We know that '<?xml' is here.
6677: */
6678: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6679: SKIP(5);
6680: } else {
6681: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6682: return;
6683: }
6684:
6685: if (!IS_BLANK_CH(CUR)) {
6686: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6687: "Space needed after '<?xml'\n");
6688: }
6689: SKIP_BLANKS;
6690:
6691: /*
6692: * We may have the VersionInfo here.
6693: */
6694: version = xmlParseVersionInfo(ctxt);
6695: if (version == NULL)
6696: version = xmlCharStrdup(XML_DEFAULT_VERSION);
6697: else {
6698: if (!IS_BLANK_CH(CUR)) {
6699: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6700: "Space needed here\n");
6701: }
6702: }
6703: ctxt->input->version = version;
6704:
6705: /*
6706: * We must have the encoding declaration
6707: */
6708: encoding = xmlParseEncodingDecl(ctxt);
6709: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6710: /*
6711: * The XML REC instructs us to stop parsing right here
6712: */
6713: return;
6714: }
6715: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6716: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6717: "Missing encoding in text declaration\n");
6718: }
6719:
6720: SKIP_BLANKS;
6721: if ((RAW == '?') && (NXT(1) == '>')) {
6722: SKIP(2);
6723: } else if (RAW == '>') {
6724: /* Deprecated old WD ... */
6725: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6726: NEXT;
6727: } else {
6728: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6729: MOVETO_ENDTAG(CUR_PTR);
6730: NEXT;
6731: }
6732: }
6733:
6734: /**
6735: * xmlParseExternalSubset:
6736: * @ctxt: an XML parser context
6737: * @ExternalID: the external identifier
6738: * @SystemID: the system identifier (or URL)
6739: *
6740: * parse Markup declarations from an external subset
6741: *
6742: * [30] extSubset ::= textDecl? extSubsetDecl
6743: *
6744: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6745: */
6746: void
6747: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6748: const xmlChar *SystemID) {
6749: xmlDetectSAX2(ctxt);
6750: GROW;
6751:
6752: if ((ctxt->encoding == NULL) &&
6753: (ctxt->input->end - ctxt->input->cur >= 4)) {
6754: xmlChar start[4];
6755: xmlCharEncoding enc;
6756:
6757: start[0] = RAW;
6758: start[1] = NXT(1);
6759: start[2] = NXT(2);
6760: start[3] = NXT(3);
6761: enc = xmlDetectCharEncoding(start, 4);
6762: if (enc != XML_CHAR_ENCODING_NONE)
6763: xmlSwitchEncoding(ctxt, enc);
6764: }
6765:
6766: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6767: xmlParseTextDecl(ctxt);
6768: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6769: /*
6770: * The XML REC instructs us to stop parsing right here
6771: */
6772: ctxt->instate = XML_PARSER_EOF;
6773: return;
6774: }
6775: }
6776: if (ctxt->myDoc == NULL) {
6777: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6778: if (ctxt->myDoc == NULL) {
6779: xmlErrMemory(ctxt, "New Doc failed");
6780: return;
6781: }
6782: ctxt->myDoc->properties = XML_DOC_INTERNAL;
6783: }
6784: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6785: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6786:
6787: ctxt->instate = XML_PARSER_DTD;
6788: ctxt->external = 1;
6789: while (((RAW == '<') && (NXT(1) == '?')) ||
6790: ((RAW == '<') && (NXT(1) == '!')) ||
6791: (RAW == '%') || IS_BLANK_CH(CUR)) {
6792: const xmlChar *check = CUR_PTR;
6793: unsigned int cons = ctxt->input->consumed;
6794:
6795: GROW;
6796: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6797: xmlParseConditionalSections(ctxt);
6798: } else if (IS_BLANK_CH(CUR)) {
6799: NEXT;
6800: } else if (RAW == '%') {
6801: xmlParsePEReference(ctxt);
6802: } else
6803: xmlParseMarkupDecl(ctxt);
6804:
6805: /*
6806: * Pop-up of finished entities.
6807: */
6808: while ((RAW == 0) && (ctxt->inputNr > 1))
6809: xmlPopInput(ctxt);
6810:
6811: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6812: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6813: break;
6814: }
6815: }
6816:
6817: if (RAW != 0) {
6818: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6819: }
6820:
6821: }
6822:
6823: /**
6824: * xmlParseReference:
6825: * @ctxt: an XML parser context
6826: *
6827: * parse and handle entity references in content, depending on the SAX
6828: * interface, this may end-up in a call to character() if this is a
6829: * CharRef, a predefined entity, if there is no reference() callback.
6830: * or if the parser was asked to switch to that mode.
6831: *
6832: * [67] Reference ::= EntityRef | CharRef
6833: */
6834: void
6835: xmlParseReference(xmlParserCtxtPtr ctxt) {
6836: xmlEntityPtr ent;
6837: xmlChar *val;
6838: int was_checked;
6839: xmlNodePtr list = NULL;
6840: xmlParserErrors ret = XML_ERR_OK;
6841:
6842:
6843: if (RAW != '&')
6844: return;
6845:
6846: /*
6847: * Simple case of a CharRef
6848: */
6849: if (NXT(1) == '#') {
6850: int i = 0;
6851: xmlChar out[10];
6852: int hex = NXT(2);
6853: int value = xmlParseCharRef(ctxt);
6854:
6855: if (value == 0)
6856: return;
6857: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6858: /*
6859: * So we are using non-UTF-8 buffers
6860: * Check that the char fit on 8bits, if not
6861: * generate a CharRef.
6862: */
6863: if (value <= 0xFF) {
6864: out[0] = value;
6865: out[1] = 0;
6866: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6867: (!ctxt->disableSAX))
6868: ctxt->sax->characters(ctxt->userData, out, 1);
6869: } else {
6870: if ((hex == 'x') || (hex == 'X'))
6871: snprintf((char *)out, sizeof(out), "#x%X", value);
6872: else
6873: snprintf((char *)out, sizeof(out), "#%d", value);
6874: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6875: (!ctxt->disableSAX))
6876: ctxt->sax->reference(ctxt->userData, out);
6877: }
6878: } else {
6879: /*
6880: * Just encode the value in UTF-8
6881: */
6882: COPY_BUF(0 ,out, i, value);
6883: out[i] = 0;
6884: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6885: (!ctxt->disableSAX))
6886: ctxt->sax->characters(ctxt->userData, out, i);
6887: }
6888: return;
6889: }
6890:
6891: /*
6892: * We are seeing an entity reference
6893: */
6894: ent = xmlParseEntityRef(ctxt);
6895: if (ent == NULL) return;
6896: if (!ctxt->wellFormed)
6897: return;
6898: was_checked = ent->checked;
6899:
6900: /* special case of predefined entities */
6901: if ((ent->name == NULL) ||
6902: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6903: val = ent->content;
6904: if (val == NULL) return;
6905: /*
6906: * inline the entity.
6907: */
6908: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6909: (!ctxt->disableSAX))
6910: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6911: return;
6912: }
6913:
6914: /*
6915: * The first reference to the entity trigger a parsing phase
6916: * where the ent->children is filled with the result from
6917: * the parsing.
6918: */
6919: if (ent->checked == 0) {
6920: unsigned long oldnbent = ctxt->nbentities;
6921:
6922: /*
6923: * This is a bit hackish but this seems the best
6924: * way to make sure both SAX and DOM entity support
6925: * behaves okay.
6926: */
6927: void *user_data;
6928: if (ctxt->userData == ctxt)
6929: user_data = NULL;
6930: else
6931: user_data = ctxt->userData;
6932:
6933: /*
6934: * Check that this entity is well formed
6935: * 4.3.2: An internal general parsed entity is well-formed
6936: * if its replacement text matches the production labeled
6937: * content.
6938: */
6939: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6940: ctxt->depth++;
6941: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6942: user_data, &list);
6943: ctxt->depth--;
6944:
6945: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6946: ctxt->depth++;
6947: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6948: user_data, ctxt->depth, ent->URI,
6949: ent->ExternalID, &list);
6950: ctxt->depth--;
6951: } else {
6952: ret = XML_ERR_ENTITY_PE_INTERNAL;
6953: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6954: "invalid entity type found\n", NULL);
6955: }
6956:
6957: /*
6958: * Store the number of entities needing parsing for this entity
6959: * content and do checkings
6960: */
6961: ent->checked = ctxt->nbentities - oldnbent;
6962: if (ret == XML_ERR_ENTITY_LOOP) {
6963: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6964: xmlFreeNodeList(list);
6965: return;
6966: }
6967: if (xmlParserEntityCheck(ctxt, 0, ent)) {
6968: xmlFreeNodeList(list);
6969: return;
6970: }
6971:
6972: if ((ret == XML_ERR_OK) && (list != NULL)) {
6973: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6974: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6975: (ent->children == NULL)) {
6976: ent->children = list;
6977: if (ctxt->replaceEntities) {
6978: /*
6979: * Prune it directly in the generated document
6980: * except for single text nodes.
6981: */
6982: if (((list->type == XML_TEXT_NODE) &&
6983: (list->next == NULL)) ||
6984: (ctxt->parseMode == XML_PARSE_READER)) {
6985: list->parent = (xmlNodePtr) ent;
6986: list = NULL;
6987: ent->owner = 1;
6988: } else {
6989: ent->owner = 0;
6990: while (list != NULL) {
6991: list->parent = (xmlNodePtr) ctxt->node;
6992: list->doc = ctxt->myDoc;
6993: if (list->next == NULL)
6994: ent->last = list;
6995: list = list->next;
6996: }
6997: list = ent->children;
6998: #ifdef LIBXML_LEGACY_ENABLED
6999: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7000: xmlAddEntityReference(ent, list, NULL);
7001: #endif /* LIBXML_LEGACY_ENABLED */
7002: }
7003: } else {
7004: ent->owner = 1;
7005: while (list != NULL) {
7006: list->parent = (xmlNodePtr) ent;
1.1.1.2 ! misho 7007: xmlSetTreeDoc(list, ent->doc);
1.1 misho 7008: if (list->next == NULL)
7009: ent->last = list;
7010: list = list->next;
7011: }
7012: }
7013: } else {
7014: xmlFreeNodeList(list);
7015: list = NULL;
7016: }
7017: } else if ((ret != XML_ERR_OK) &&
7018: (ret != XML_WAR_UNDECLARED_ENTITY)) {
7019: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7020: "Entity '%s' failed to parse\n", ent->name);
7021: } else if (list != NULL) {
7022: xmlFreeNodeList(list);
7023: list = NULL;
7024: }
7025: if (ent->checked == 0)
7026: ent->checked = 1;
7027: } else if (ent->checked != 1) {
7028: ctxt->nbentities += ent->checked;
7029: }
7030:
7031: /*
7032: * Now that the entity content has been gathered
7033: * provide it to the application, this can take different forms based
7034: * on the parsing modes.
7035: */
7036: if (ent->children == NULL) {
7037: /*
7038: * Probably running in SAX mode and the callbacks don't
7039: * build the entity content. So unless we already went
7040: * though parsing for first checking go though the entity
7041: * content to generate callbacks associated to the entity
7042: */
7043: if (was_checked != 0) {
7044: void *user_data;
7045: /*
7046: * This is a bit hackish but this seems the best
7047: * way to make sure both SAX and DOM entity support
7048: * behaves okay.
7049: */
7050: if (ctxt->userData == ctxt)
7051: user_data = NULL;
7052: else
7053: user_data = ctxt->userData;
7054:
7055: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7056: ctxt->depth++;
7057: ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7058: ent->content, user_data, NULL);
7059: ctxt->depth--;
7060: } else if (ent->etype ==
7061: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7062: ctxt->depth++;
7063: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7064: ctxt->sax, user_data, ctxt->depth,
7065: ent->URI, ent->ExternalID, NULL);
7066: ctxt->depth--;
7067: } else {
7068: ret = XML_ERR_ENTITY_PE_INTERNAL;
7069: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7070: "invalid entity type found\n", NULL);
7071: }
7072: if (ret == XML_ERR_ENTITY_LOOP) {
7073: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7074: return;
7075: }
7076: }
7077: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7078: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7079: /*
7080: * Entity reference callback comes second, it's somewhat
7081: * superfluous but a compatibility to historical behaviour
7082: */
7083: ctxt->sax->reference(ctxt->userData, ent->name);
7084: }
7085: return;
7086: }
7087:
7088: /*
7089: * If we didn't get any children for the entity being built
7090: */
7091: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7092: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7093: /*
7094: * Create a node.
7095: */
7096: ctxt->sax->reference(ctxt->userData, ent->name);
7097: return;
7098: }
7099:
7100: if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7101: /*
7102: * There is a problem on the handling of _private for entities
7103: * (bug 155816): Should we copy the content of the field from
7104: * the entity (possibly overwriting some value set by the user
7105: * when a copy is created), should we leave it alone, or should
7106: * we try to take care of different situations? The problem
7107: * is exacerbated by the usage of this field by the xmlReader.
7108: * To fix this bug, we look at _private on the created node
7109: * and, if it's NULL, we copy in whatever was in the entity.
7110: * If it's not NULL we leave it alone. This is somewhat of a
7111: * hack - maybe we should have further tests to determine
7112: * what to do.
7113: */
7114: if ((ctxt->node != NULL) && (ent->children != NULL)) {
7115: /*
7116: * Seems we are generating the DOM content, do
7117: * a simple tree copy for all references except the first
7118: * In the first occurrence list contains the replacement.
7119: * progressive == 2 means we are operating on the Reader
7120: * and since nodes are discarded we must copy all the time.
7121: */
7122: if (((list == NULL) && (ent->owner == 0)) ||
7123: (ctxt->parseMode == XML_PARSE_READER)) {
7124: xmlNodePtr nw = NULL, cur, firstChild = NULL;
7125:
7126: /*
7127: * when operating on a reader, the entities definitions
7128: * are always owning the entities subtree.
7129: if (ctxt->parseMode == XML_PARSE_READER)
7130: ent->owner = 1;
7131: */
7132:
7133: cur = ent->children;
7134: while (cur != NULL) {
7135: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7136: if (nw != NULL) {
7137: if (nw->_private == NULL)
7138: nw->_private = cur->_private;
7139: if (firstChild == NULL){
7140: firstChild = nw;
7141: }
7142: nw = xmlAddChild(ctxt->node, nw);
7143: }
7144: if (cur == ent->last) {
7145: /*
7146: * needed to detect some strange empty
7147: * node cases in the reader tests
7148: */
7149: if ((ctxt->parseMode == XML_PARSE_READER) &&
7150: (nw != NULL) &&
7151: (nw->type == XML_ELEMENT_NODE) &&
7152: (nw->children == NULL))
7153: nw->extra = 1;
7154:
7155: break;
7156: }
7157: cur = cur->next;
7158: }
7159: #ifdef LIBXML_LEGACY_ENABLED
7160: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7161: xmlAddEntityReference(ent, firstChild, nw);
7162: #endif /* LIBXML_LEGACY_ENABLED */
7163: } else if (list == NULL) {
7164: xmlNodePtr nw = NULL, cur, next, last,
7165: firstChild = NULL;
7166: /*
7167: * Copy the entity child list and make it the new
7168: * entity child list. The goal is to make sure any
7169: * ID or REF referenced will be the one from the
7170: * document content and not the entity copy.
7171: */
7172: cur = ent->children;
7173: ent->children = NULL;
7174: last = ent->last;
7175: ent->last = NULL;
7176: while (cur != NULL) {
7177: next = cur->next;
7178: cur->next = NULL;
7179: cur->parent = NULL;
7180: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7181: if (nw != NULL) {
7182: if (nw->_private == NULL)
7183: nw->_private = cur->_private;
7184: if (firstChild == NULL){
7185: firstChild = cur;
7186: }
7187: xmlAddChild((xmlNodePtr) ent, nw);
7188: xmlAddChild(ctxt->node, cur);
7189: }
7190: if (cur == last)
7191: break;
7192: cur = next;
7193: }
7194: if (ent->owner == 0)
7195: ent->owner = 1;
7196: #ifdef LIBXML_LEGACY_ENABLED
7197: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7198: xmlAddEntityReference(ent, firstChild, nw);
7199: #endif /* LIBXML_LEGACY_ENABLED */
7200: } else {
7201: const xmlChar *nbktext;
7202:
7203: /*
7204: * the name change is to avoid coalescing of the
7205: * node with a possible previous text one which
7206: * would make ent->children a dangling pointer
7207: */
7208: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7209: -1);
7210: if (ent->children->type == XML_TEXT_NODE)
7211: ent->children->name = nbktext;
7212: if ((ent->last != ent->children) &&
7213: (ent->last->type == XML_TEXT_NODE))
7214: ent->last->name = nbktext;
7215: xmlAddChildList(ctxt->node, ent->children);
7216: }
7217:
7218: /*
7219: * This is to avoid a nasty side effect, see
7220: * characters() in SAX.c
7221: */
7222: ctxt->nodemem = 0;
7223: ctxt->nodelen = 0;
7224: return;
7225: }
7226: }
7227: }
7228:
7229: /**
7230: * xmlParseEntityRef:
7231: * @ctxt: an XML parser context
7232: *
7233: * parse ENTITY references declarations
7234: *
7235: * [68] EntityRef ::= '&' Name ';'
7236: *
7237: * [ WFC: Entity Declared ]
7238: * In a document without any DTD, a document with only an internal DTD
7239: * subset which contains no parameter entity references, or a document
7240: * with "standalone='yes'", the Name given in the entity reference
7241: * must match that in an entity declaration, except that well-formed
7242: * documents need not declare any of the following entities: amp, lt,
7243: * gt, apos, quot. The declaration of a parameter entity must precede
7244: * any reference to it. Similarly, the declaration of a general entity
7245: * must precede any reference to it which appears in a default value in an
7246: * attribute-list declaration. Note that if entities are declared in the
7247: * external subset or in external parameter entities, a non-validating
7248: * processor is not obligated to read and process their declarations;
7249: * for such documents, the rule that an entity must be declared is a
7250: * well-formedness constraint only if standalone='yes'.
7251: *
7252: * [ WFC: Parsed Entity ]
7253: * An entity reference must not contain the name of an unparsed entity
7254: *
7255: * Returns the xmlEntityPtr if found, or NULL otherwise.
7256: */
7257: xmlEntityPtr
7258: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7259: const xmlChar *name;
7260: xmlEntityPtr ent = NULL;
7261:
7262: GROW;
7263:
7264: if (RAW != '&')
7265: return(NULL);
7266: NEXT;
7267: name = xmlParseName(ctxt);
7268: if (name == NULL) {
7269: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7270: "xmlParseEntityRef: no name\n");
7271: return(NULL);
7272: }
7273: if (RAW != ';') {
7274: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7275: return(NULL);
7276: }
7277: NEXT;
7278:
7279: /*
7280: * Predefined entites override any extra definition
7281: */
7282: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7283: ent = xmlGetPredefinedEntity(name);
7284: if (ent != NULL)
7285: return(ent);
7286: }
7287:
7288: /*
7289: * Increate the number of entity references parsed
7290: */
7291: ctxt->nbentities++;
7292:
7293: /*
7294: * Ask first SAX for entity resolution, otherwise try the
7295: * entities which may have stored in the parser context.
7296: */
7297: if (ctxt->sax != NULL) {
7298: if (ctxt->sax->getEntity != NULL)
7299: ent = ctxt->sax->getEntity(ctxt->userData, name);
7300: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7301: (ctxt->options & XML_PARSE_OLDSAX))
7302: ent = xmlGetPredefinedEntity(name);
7303: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7304: (ctxt->userData==ctxt)) {
7305: ent = xmlSAX2GetEntity(ctxt, name);
7306: }
7307: }
7308: /*
7309: * [ WFC: Entity Declared ]
7310: * In a document without any DTD, a document with only an
7311: * internal DTD subset which contains no parameter entity
7312: * references, or a document with "standalone='yes'", the
7313: * Name given in the entity reference must match that in an
7314: * entity declaration, except that well-formed documents
7315: * need not declare any of the following entities: amp, lt,
7316: * gt, apos, quot.
7317: * The declaration of a parameter entity must precede any
7318: * reference to it.
7319: * Similarly, the declaration of a general entity must
7320: * precede any reference to it which appears in a default
7321: * value in an attribute-list declaration. Note that if
7322: * entities are declared in the external subset or in
7323: * external parameter entities, a non-validating processor
7324: * is not obligated to read and process their declarations;
7325: * for such documents, the rule that an entity must be
7326: * declared is a well-formedness constraint only if
7327: * standalone='yes'.
7328: */
7329: if (ent == NULL) {
7330: if ((ctxt->standalone == 1) ||
7331: ((ctxt->hasExternalSubset == 0) &&
7332: (ctxt->hasPErefs == 0))) {
7333: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334: "Entity '%s' not defined\n", name);
7335: } else {
7336: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7337: "Entity '%s' not defined\n", name);
7338: if ((ctxt->inSubset == 0) &&
7339: (ctxt->sax != NULL) &&
7340: (ctxt->sax->reference != NULL)) {
7341: ctxt->sax->reference(ctxt->userData, name);
7342: }
7343: }
7344: ctxt->valid = 0;
7345: }
7346:
7347: /*
7348: * [ WFC: Parsed Entity ]
7349: * An entity reference must not contain the name of an
7350: * unparsed entity
7351: */
7352: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7353: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7354: "Entity reference to unparsed entity %s\n", name);
7355: }
7356:
7357: /*
7358: * [ WFC: No External Entity References ]
7359: * Attribute values cannot contain direct or indirect
7360: * entity references to external entities.
7361: */
7362: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7363: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7364: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7365: "Attribute references external entity '%s'\n", name);
7366: }
7367: /*
7368: * [ WFC: No < in Attribute Values ]
7369: * The replacement text of any entity referred to directly or
7370: * indirectly in an attribute value (other than "<") must
7371: * not contain a <.
7372: */
7373: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7374: (ent != NULL) && (ent->content != NULL) &&
7375: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7376: (xmlStrchr(ent->content, '<'))) {
7377: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7378: "'<' in entity '%s' is not allowed in attributes values\n", name);
7379: }
7380:
7381: /*
7382: * Internal check, no parameter entities here ...
7383: */
7384: else {
7385: switch (ent->etype) {
7386: case XML_INTERNAL_PARAMETER_ENTITY:
7387: case XML_EXTERNAL_PARAMETER_ENTITY:
7388: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7389: "Attempt to reference the parameter entity '%s'\n",
7390: name);
7391: break;
7392: default:
7393: break;
7394: }
7395: }
7396:
7397: /*
7398: * [ WFC: No Recursion ]
7399: * A parsed entity must not contain a recursive reference
7400: * to itself, either directly or indirectly.
7401: * Done somewhere else
7402: */
7403: return(ent);
7404: }
7405:
7406: /**
7407: * xmlParseStringEntityRef:
7408: * @ctxt: an XML parser context
7409: * @str: a pointer to an index in the string
7410: *
7411: * parse ENTITY references declarations, but this version parses it from
7412: * a string value.
7413: *
7414: * [68] EntityRef ::= '&' Name ';'
7415: *
7416: * [ WFC: Entity Declared ]
7417: * In a document without any DTD, a document with only an internal DTD
7418: * subset which contains no parameter entity references, or a document
7419: * with "standalone='yes'", the Name given in the entity reference
7420: * must match that in an entity declaration, except that well-formed
7421: * documents need not declare any of the following entities: amp, lt,
7422: * gt, apos, quot. The declaration of a parameter entity must precede
7423: * any reference to it. Similarly, the declaration of a general entity
7424: * must precede any reference to it which appears in a default value in an
7425: * attribute-list declaration. Note that if entities are declared in the
7426: * external subset or in external parameter entities, a non-validating
7427: * processor is not obligated to read and process their declarations;
7428: * for such documents, the rule that an entity must be declared is a
7429: * well-formedness constraint only if standalone='yes'.
7430: *
7431: * [ WFC: Parsed Entity ]
7432: * An entity reference must not contain the name of an unparsed entity
7433: *
7434: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7435: * is updated to the current location in the string.
7436: */
7437: static xmlEntityPtr
7438: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7439: xmlChar *name;
7440: const xmlChar *ptr;
7441: xmlChar cur;
7442: xmlEntityPtr ent = NULL;
7443:
7444: if ((str == NULL) || (*str == NULL))
7445: return(NULL);
7446: ptr = *str;
7447: cur = *ptr;
7448: if (cur != '&')
7449: return(NULL);
7450:
7451: ptr++;
7452: name = xmlParseStringName(ctxt, &ptr);
7453: if (name == NULL) {
7454: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7455: "xmlParseStringEntityRef: no name\n");
7456: *str = ptr;
7457: return(NULL);
7458: }
7459: if (*ptr != ';') {
7460: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7461: xmlFree(name);
7462: *str = ptr;
7463: return(NULL);
7464: }
7465: ptr++;
7466:
7467:
7468: /*
7469: * Predefined entites override any extra definition
7470: */
7471: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7472: ent = xmlGetPredefinedEntity(name);
7473: if (ent != NULL) {
7474: xmlFree(name);
7475: *str = ptr;
7476: return(ent);
7477: }
7478: }
7479:
7480: /*
7481: * Increate the number of entity references parsed
7482: */
7483: ctxt->nbentities++;
7484:
7485: /*
7486: * Ask first SAX for entity resolution, otherwise try the
7487: * entities which may have stored in the parser context.
7488: */
7489: if (ctxt->sax != NULL) {
7490: if (ctxt->sax->getEntity != NULL)
7491: ent = ctxt->sax->getEntity(ctxt->userData, name);
7492: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7493: ent = xmlGetPredefinedEntity(name);
7494: if ((ent == NULL) && (ctxt->userData==ctxt)) {
7495: ent = xmlSAX2GetEntity(ctxt, name);
7496: }
7497: }
7498:
7499: /*
7500: * [ WFC: Entity Declared ]
7501: * In a document without any DTD, a document with only an
7502: * internal DTD subset which contains no parameter entity
7503: * references, or a document with "standalone='yes'", the
7504: * Name given in the entity reference must match that in an
7505: * entity declaration, except that well-formed documents
7506: * need not declare any of the following entities: amp, lt,
7507: * gt, apos, quot.
7508: * The declaration of a parameter entity must precede any
7509: * reference to it.
7510: * Similarly, the declaration of a general entity must
7511: * precede any reference to it which appears in a default
7512: * value in an attribute-list declaration. Note that if
7513: * entities are declared in the external subset or in
7514: * external parameter entities, a non-validating processor
7515: * is not obligated to read and process their declarations;
7516: * for such documents, the rule that an entity must be
7517: * declared is a well-formedness constraint only if
7518: * standalone='yes'.
7519: */
7520: if (ent == NULL) {
7521: if ((ctxt->standalone == 1) ||
7522: ((ctxt->hasExternalSubset == 0) &&
7523: (ctxt->hasPErefs == 0))) {
7524: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7525: "Entity '%s' not defined\n", name);
7526: } else {
7527: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7528: "Entity '%s' not defined\n",
7529: name);
7530: }
7531: /* TODO ? check regressions ctxt->valid = 0; */
7532: }
7533:
7534: /*
7535: * [ WFC: Parsed Entity ]
7536: * An entity reference must not contain the name of an
7537: * unparsed entity
7538: */
7539: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7540: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7541: "Entity reference to unparsed entity %s\n", name);
7542: }
7543:
7544: /*
7545: * [ WFC: No External Entity References ]
7546: * Attribute values cannot contain direct or indirect
7547: * entity references to external entities.
7548: */
7549: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7550: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7551: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7552: "Attribute references external entity '%s'\n", name);
7553: }
7554: /*
7555: * [ WFC: No < in Attribute Values ]
7556: * The replacement text of any entity referred to directly or
7557: * indirectly in an attribute value (other than "<") must
7558: * not contain a <.
7559: */
7560: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561: (ent != NULL) && (ent->content != NULL) &&
7562: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7563: (xmlStrchr(ent->content, '<'))) {
7564: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7565: "'<' in entity '%s' is not allowed in attributes values\n",
7566: name);
7567: }
7568:
7569: /*
7570: * Internal check, no parameter entities here ...
7571: */
7572: else {
7573: switch (ent->etype) {
7574: case XML_INTERNAL_PARAMETER_ENTITY:
7575: case XML_EXTERNAL_PARAMETER_ENTITY:
7576: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7577: "Attempt to reference the parameter entity '%s'\n",
7578: name);
7579: break;
7580: default:
7581: break;
7582: }
7583: }
7584:
7585: /*
7586: * [ WFC: No Recursion ]
7587: * A parsed entity must not contain a recursive reference
7588: * to itself, either directly or indirectly.
7589: * Done somewhere else
7590: */
7591:
7592: xmlFree(name);
7593: *str = ptr;
7594: return(ent);
7595: }
7596:
7597: /**
7598: * xmlParsePEReference:
7599: * @ctxt: an XML parser context
7600: *
7601: * parse PEReference declarations
7602: * The entity content is handled directly by pushing it's content as
7603: * a new input stream.
7604: *
7605: * [69] PEReference ::= '%' Name ';'
7606: *
7607: * [ WFC: No Recursion ]
7608: * A parsed entity must not contain a recursive
7609: * reference to itself, either directly or indirectly.
7610: *
7611: * [ WFC: Entity Declared ]
7612: * In a document without any DTD, a document with only an internal DTD
7613: * subset which contains no parameter entity references, or a document
7614: * with "standalone='yes'", ... ... The declaration of a parameter
7615: * entity must precede any reference to it...
7616: *
7617: * [ VC: Entity Declared ]
7618: * In a document with an external subset or external parameter entities
7619: * with "standalone='no'", ... ... The declaration of a parameter entity
7620: * must precede any reference to it...
7621: *
7622: * [ WFC: In DTD ]
7623: * Parameter-entity references may only appear in the DTD.
7624: * NOTE: misleading but this is handled.
7625: */
7626: void
7627: xmlParsePEReference(xmlParserCtxtPtr ctxt)
7628: {
7629: const xmlChar *name;
7630: xmlEntityPtr entity = NULL;
7631: xmlParserInputPtr input;
7632:
7633: if (RAW != '%')
7634: return;
7635: NEXT;
7636: name = xmlParseName(ctxt);
7637: if (name == NULL) {
7638: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7639: "xmlParsePEReference: no name\n");
7640: return;
7641: }
7642: if (RAW != ';') {
7643: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7644: return;
7645: }
7646:
7647: NEXT;
7648:
7649: /*
7650: * Increate the number of entity references parsed
7651: */
7652: ctxt->nbentities++;
7653:
7654: /*
7655: * Request the entity from SAX
7656: */
7657: if ((ctxt->sax != NULL) &&
7658: (ctxt->sax->getParameterEntity != NULL))
7659: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7660: name);
7661: if (entity == NULL) {
7662: /*
7663: * [ WFC: Entity Declared ]
7664: * In a document without any DTD, a document with only an
7665: * internal DTD subset which contains no parameter entity
7666: * references, or a document with "standalone='yes'", ...
7667: * ... The declaration of a parameter entity must precede
7668: * any reference to it...
7669: */
7670: if ((ctxt->standalone == 1) ||
7671: ((ctxt->hasExternalSubset == 0) &&
7672: (ctxt->hasPErefs == 0))) {
7673: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7674: "PEReference: %%%s; not found\n",
7675: name);
7676: } else {
7677: /*
7678: * [ VC: Entity Declared ]
7679: * In a document with an external subset or external
7680: * parameter entities with "standalone='no'", ...
7681: * ... The declaration of a parameter entity must
7682: * precede any reference to it...
7683: */
7684: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7685: "PEReference: %%%s; not found\n",
7686: name, NULL);
7687: ctxt->valid = 0;
7688: }
7689: } else {
7690: /*
7691: * Internal checking in case the entity quest barfed
7692: */
7693: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7694: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7695: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7696: "Internal: %%%s; is not a parameter entity\n",
7697: name, NULL);
7698: } else if (ctxt->input->free != deallocblankswrapper) {
7699: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7700: if (xmlPushInput(ctxt, input) < 0)
7701: return;
7702: } else {
7703: /*
7704: * TODO !!!
7705: * handle the extra spaces added before and after
7706: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7707: */
7708: input = xmlNewEntityInputStream(ctxt, entity);
7709: if (xmlPushInput(ctxt, input) < 0)
7710: return;
7711: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7712: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7713: (IS_BLANK_CH(NXT(5)))) {
7714: xmlParseTextDecl(ctxt);
7715: if (ctxt->errNo ==
7716: XML_ERR_UNSUPPORTED_ENCODING) {
7717: /*
7718: * The XML REC instructs us to stop parsing
7719: * right here
7720: */
7721: ctxt->instate = XML_PARSER_EOF;
7722: return;
7723: }
7724: }
7725: }
7726: }
7727: ctxt->hasPErefs = 1;
7728: }
7729:
7730: /**
7731: * xmlLoadEntityContent:
7732: * @ctxt: an XML parser context
7733: * @entity: an unloaded system entity
7734: *
7735: * Load the original content of the given system entity from the
7736: * ExternalID/SystemID given. This is to be used for Included in Literal
7737: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7738: *
7739: * Returns 0 in case of success and -1 in case of failure
7740: */
7741: static int
7742: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7743: xmlParserInputPtr input;
7744: xmlBufferPtr buf;
7745: int l, c;
7746: int count = 0;
7747:
7748: if ((ctxt == NULL) || (entity == NULL) ||
7749: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7750: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7751: (entity->content != NULL)) {
7752: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7753: "xmlLoadEntityContent parameter error");
7754: return(-1);
7755: }
7756:
7757: if (xmlParserDebugEntities)
7758: xmlGenericError(xmlGenericErrorContext,
7759: "Reading %s entity content input\n", entity->name);
7760:
7761: buf = xmlBufferCreate();
7762: if (buf == NULL) {
7763: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7764: "xmlLoadEntityContent parameter error");
7765: return(-1);
7766: }
7767:
7768: input = xmlNewEntityInputStream(ctxt, entity);
7769: if (input == NULL) {
7770: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7771: "xmlLoadEntityContent input error");
7772: xmlBufferFree(buf);
7773: return(-1);
7774: }
7775:
7776: /*
7777: * Push the entity as the current input, read char by char
7778: * saving to the buffer until the end of the entity or an error
7779: */
7780: if (xmlPushInput(ctxt, input) < 0) {
7781: xmlBufferFree(buf);
7782: return(-1);
7783: }
7784:
7785: GROW;
7786: c = CUR_CHAR(l);
7787: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7788: (IS_CHAR(c))) {
7789: xmlBufferAdd(buf, ctxt->input->cur, l);
7790: if (count++ > 100) {
7791: count = 0;
7792: GROW;
7793: }
7794: NEXTL(l);
7795: c = CUR_CHAR(l);
7796: }
7797:
7798: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7799: xmlPopInput(ctxt);
7800: } else if (!IS_CHAR(c)) {
7801: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7802: "xmlLoadEntityContent: invalid char value %d\n",
7803: c);
7804: xmlBufferFree(buf);
7805: return(-1);
7806: }
7807: entity->content = buf->content;
7808: buf->content = NULL;
7809: xmlBufferFree(buf);
7810:
7811: return(0);
7812: }
7813:
7814: /**
7815: * xmlParseStringPEReference:
7816: * @ctxt: an XML parser context
7817: * @str: a pointer to an index in the string
7818: *
7819: * parse PEReference declarations
7820: *
7821: * [69] PEReference ::= '%' Name ';'
7822: *
7823: * [ WFC: No Recursion ]
7824: * A parsed entity must not contain a recursive
7825: * reference to itself, either directly or indirectly.
7826: *
7827: * [ WFC: Entity Declared ]
7828: * In a document without any DTD, a document with only an internal DTD
7829: * subset which contains no parameter entity references, or a document
7830: * with "standalone='yes'", ... ... The declaration of a parameter
7831: * entity must precede any reference to it...
7832: *
7833: * [ VC: Entity Declared ]
7834: * In a document with an external subset or external parameter entities
7835: * with "standalone='no'", ... ... The declaration of a parameter entity
7836: * must precede any reference to it...
7837: *
7838: * [ WFC: In DTD ]
7839: * Parameter-entity references may only appear in the DTD.
7840: * NOTE: misleading but this is handled.
7841: *
7842: * Returns the string of the entity content.
7843: * str is updated to the current value of the index
7844: */
7845: static xmlEntityPtr
7846: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7847: const xmlChar *ptr;
7848: xmlChar cur;
7849: xmlChar *name;
7850: xmlEntityPtr entity = NULL;
7851:
7852: if ((str == NULL) || (*str == NULL)) return(NULL);
7853: ptr = *str;
7854: cur = *ptr;
7855: if (cur != '%')
7856: return(NULL);
7857: ptr++;
7858: name = xmlParseStringName(ctxt, &ptr);
7859: if (name == NULL) {
7860: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7861: "xmlParseStringPEReference: no name\n");
7862: *str = ptr;
7863: return(NULL);
7864: }
7865: cur = *ptr;
7866: if (cur != ';') {
7867: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7868: xmlFree(name);
7869: *str = ptr;
7870: return(NULL);
7871: }
7872: ptr++;
7873:
7874: /*
7875: * Increate the number of entity references parsed
7876: */
7877: ctxt->nbentities++;
7878:
7879: /*
7880: * Request the entity from SAX
7881: */
7882: if ((ctxt->sax != NULL) &&
7883: (ctxt->sax->getParameterEntity != NULL))
7884: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7885: name);
7886: if (entity == NULL) {
7887: /*
7888: * [ WFC: Entity Declared ]
7889: * In a document without any DTD, a document with only an
7890: * internal DTD subset which contains no parameter entity
7891: * references, or a document with "standalone='yes'", ...
7892: * ... The declaration of a parameter entity must precede
7893: * any reference to it...
7894: */
7895: if ((ctxt->standalone == 1) ||
7896: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7897: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7898: "PEReference: %%%s; not found\n", name);
7899: } else {
7900: /*
7901: * [ VC: Entity Declared ]
7902: * In a document with an external subset or external
7903: * parameter entities with "standalone='no'", ...
7904: * ... The declaration of a parameter entity must
7905: * precede any reference to it...
7906: */
7907: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7908: "PEReference: %%%s; not found\n",
7909: name, NULL);
7910: ctxt->valid = 0;
7911: }
7912: } else {
7913: /*
7914: * Internal checking in case the entity quest barfed
7915: */
7916: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7917: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7918: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7919: "%%%s; is not a parameter entity\n",
7920: name, NULL);
7921: }
7922: }
7923: ctxt->hasPErefs = 1;
7924: xmlFree(name);
7925: *str = ptr;
7926: return(entity);
7927: }
7928:
7929: /**
7930: * xmlParseDocTypeDecl:
7931: * @ctxt: an XML parser context
7932: *
7933: * parse a DOCTYPE declaration
7934: *
7935: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7936: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7937: *
7938: * [ VC: Root Element Type ]
7939: * The Name in the document type declaration must match the element
7940: * type of the root element.
7941: */
7942:
7943: void
7944: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7945: const xmlChar *name = NULL;
7946: xmlChar *ExternalID = NULL;
7947: xmlChar *URI = NULL;
7948:
7949: /*
7950: * We know that '<!DOCTYPE' has been detected.
7951: */
7952: SKIP(9);
7953:
7954: SKIP_BLANKS;
7955:
7956: /*
7957: * Parse the DOCTYPE name.
7958: */
7959: name = xmlParseName(ctxt);
7960: if (name == NULL) {
7961: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7962: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7963: }
7964: ctxt->intSubName = name;
7965:
7966: SKIP_BLANKS;
7967:
7968: /*
7969: * Check for SystemID and ExternalID
7970: */
7971: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7972:
7973: if ((URI != NULL) || (ExternalID != NULL)) {
7974: ctxt->hasExternalSubset = 1;
7975: }
7976: ctxt->extSubURI = URI;
7977: ctxt->extSubSystem = ExternalID;
7978:
7979: SKIP_BLANKS;
7980:
7981: /*
7982: * Create and update the internal subset.
7983: */
7984: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7985: (!ctxt->disableSAX))
7986: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7987:
7988: /*
7989: * Is there any internal subset declarations ?
7990: * they are handled separately in xmlParseInternalSubset()
7991: */
7992: if (RAW == '[')
7993: return;
7994:
7995: /*
7996: * We should be at the end of the DOCTYPE declaration.
7997: */
7998: if (RAW != '>') {
7999: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8000: }
8001: NEXT;
8002: }
8003:
8004: /**
8005: * xmlParseInternalSubset:
8006: * @ctxt: an XML parser context
8007: *
8008: * parse the internal subset declaration
8009: *
8010: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8011: */
8012:
8013: static void
8014: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8015: /*
8016: * Is there any DTD definition ?
8017: */
8018: if (RAW == '[') {
8019: ctxt->instate = XML_PARSER_DTD;
8020: NEXT;
8021: /*
8022: * Parse the succession of Markup declarations and
8023: * PEReferences.
8024: * Subsequence (markupdecl | PEReference | S)*
8025: */
8026: while (RAW != ']') {
8027: const xmlChar *check = CUR_PTR;
8028: unsigned int cons = ctxt->input->consumed;
8029:
8030: SKIP_BLANKS;
8031: xmlParseMarkupDecl(ctxt);
8032: xmlParsePEReference(ctxt);
8033:
8034: /*
8035: * Pop-up of finished entities.
8036: */
8037: while ((RAW == 0) && (ctxt->inputNr > 1))
8038: xmlPopInput(ctxt);
8039:
8040: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8041: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8042: "xmlParseInternalSubset: error detected in Markup declaration\n");
8043: break;
8044: }
8045: }
8046: if (RAW == ']') {
8047: NEXT;
8048: SKIP_BLANKS;
8049: }
8050: }
8051:
8052: /*
8053: * We should be at the end of the DOCTYPE declaration.
8054: */
8055: if (RAW != '>') {
8056: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8057: }
8058: NEXT;
8059: }
8060:
8061: #ifdef LIBXML_SAX1_ENABLED
8062: /**
8063: * xmlParseAttribute:
8064: * @ctxt: an XML parser context
8065: * @value: a xmlChar ** used to store the value of the attribute
8066: *
8067: * parse an attribute
8068: *
8069: * [41] Attribute ::= Name Eq AttValue
8070: *
8071: * [ WFC: No External Entity References ]
8072: * Attribute values cannot contain direct or indirect entity references
8073: * to external entities.
8074: *
8075: * [ WFC: No < in Attribute Values ]
8076: * The replacement text of any entity referred to directly or indirectly in
8077: * an attribute value (other than "<") must not contain a <.
8078: *
8079: * [ VC: Attribute Value Type ]
8080: * The attribute must have been declared; the value must be of the type
8081: * declared for it.
8082: *
8083: * [25] Eq ::= S? '=' S?
8084: *
8085: * With namespace:
8086: *
8087: * [NS 11] Attribute ::= QName Eq AttValue
8088: *
8089: * Also the case QName == xmlns:??? is handled independently as a namespace
8090: * definition.
8091: *
8092: * Returns the attribute name, and the value in *value.
8093: */
8094:
8095: const xmlChar *
8096: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8097: const xmlChar *name;
8098: xmlChar *val;
8099:
8100: *value = NULL;
8101: GROW;
8102: name = xmlParseName(ctxt);
8103: if (name == NULL) {
8104: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8105: "error parsing attribute name\n");
8106: return(NULL);
8107: }
8108:
8109: /*
8110: * read the value
8111: */
8112: SKIP_BLANKS;
8113: if (RAW == '=') {
8114: NEXT;
8115: SKIP_BLANKS;
8116: val = xmlParseAttValue(ctxt);
8117: ctxt->instate = XML_PARSER_CONTENT;
8118: } else {
8119: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8120: "Specification mandate value for attribute %s\n", name);
8121: return(NULL);
8122: }
8123:
8124: /*
8125: * Check that xml:lang conforms to the specification
8126: * No more registered as an error, just generate a warning now
8127: * since this was deprecated in XML second edition
8128: */
8129: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8130: if (!xmlCheckLanguageID(val)) {
8131: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8132: "Malformed value for xml:lang : %s\n",
8133: val, NULL);
8134: }
8135: }
8136:
8137: /*
8138: * Check that xml:space conforms to the specification
8139: */
8140: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8141: if (xmlStrEqual(val, BAD_CAST "default"))
8142: *(ctxt->space) = 0;
8143: else if (xmlStrEqual(val, BAD_CAST "preserve"))
8144: *(ctxt->space) = 1;
8145: else {
8146: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8147: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8148: val, NULL);
8149: }
8150: }
8151:
8152: *value = val;
8153: return(name);
8154: }
8155:
8156: /**
8157: * xmlParseStartTag:
8158: * @ctxt: an XML parser context
8159: *
8160: * parse a start of tag either for rule element or
8161: * EmptyElement. In both case we don't parse the tag closing chars.
8162: *
8163: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8164: *
8165: * [ WFC: Unique Att Spec ]
8166: * No attribute name may appear more than once in the same start-tag or
8167: * empty-element tag.
8168: *
8169: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8170: *
8171: * [ WFC: Unique Att Spec ]
8172: * No attribute name may appear more than once in the same start-tag or
8173: * empty-element tag.
8174: *
8175: * With namespace:
8176: *
8177: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8178: *
8179: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8180: *
8181: * Returns the element name parsed
8182: */
8183:
8184: const xmlChar *
8185: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8186: const xmlChar *name;
8187: const xmlChar *attname;
8188: xmlChar *attvalue;
8189: const xmlChar **atts = ctxt->atts;
8190: int nbatts = 0;
8191: int maxatts = ctxt->maxatts;
8192: int i;
8193:
8194: if (RAW != '<') return(NULL);
8195: NEXT1;
8196:
8197: name = xmlParseName(ctxt);
8198: if (name == NULL) {
8199: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8200: "xmlParseStartTag: invalid element name\n");
8201: return(NULL);
8202: }
8203:
8204: /*
8205: * Now parse the attributes, it ends up with the ending
8206: *
8207: * (S Attribute)* S?
8208: */
8209: SKIP_BLANKS;
8210: GROW;
8211:
8212: while ((RAW != '>') &&
8213: ((RAW != '/') || (NXT(1) != '>')) &&
8214: (IS_BYTE_CHAR(RAW))) {
8215: const xmlChar *q = CUR_PTR;
8216: unsigned int cons = ctxt->input->consumed;
8217:
8218: attname = xmlParseAttribute(ctxt, &attvalue);
8219: if ((attname != NULL) && (attvalue != NULL)) {
8220: /*
8221: * [ WFC: Unique Att Spec ]
8222: * No attribute name may appear more than once in the same
8223: * start-tag or empty-element tag.
8224: */
8225: for (i = 0; i < nbatts;i += 2) {
8226: if (xmlStrEqual(atts[i], attname)) {
8227: xmlErrAttributeDup(ctxt, NULL, attname);
8228: xmlFree(attvalue);
8229: goto failed;
8230: }
8231: }
8232: /*
8233: * Add the pair to atts
8234: */
8235: if (atts == NULL) {
8236: maxatts = 22; /* allow for 10 attrs by default */
8237: atts = (const xmlChar **)
8238: xmlMalloc(maxatts * sizeof(xmlChar *));
8239: if (atts == NULL) {
8240: xmlErrMemory(ctxt, NULL);
8241: if (attvalue != NULL)
8242: xmlFree(attvalue);
8243: goto failed;
8244: }
8245: ctxt->atts = atts;
8246: ctxt->maxatts = maxatts;
8247: } else if (nbatts + 4 > maxatts) {
8248: const xmlChar **n;
8249:
8250: maxatts *= 2;
8251: n = (const xmlChar **) xmlRealloc((void *) atts,
8252: maxatts * sizeof(const xmlChar *));
8253: if (n == NULL) {
8254: xmlErrMemory(ctxt, NULL);
8255: if (attvalue != NULL)
8256: xmlFree(attvalue);
8257: goto failed;
8258: }
8259: atts = n;
8260: ctxt->atts = atts;
8261: ctxt->maxatts = maxatts;
8262: }
8263: atts[nbatts++] = attname;
8264: atts[nbatts++] = attvalue;
8265: atts[nbatts] = NULL;
8266: atts[nbatts + 1] = NULL;
8267: } else {
8268: if (attvalue != NULL)
8269: xmlFree(attvalue);
8270: }
8271:
8272: failed:
8273:
8274: GROW
8275: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8276: break;
8277: if (!IS_BLANK_CH(RAW)) {
8278: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8279: "attributes construct error\n");
8280: }
8281: SKIP_BLANKS;
8282: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8283: (attname == NULL) && (attvalue == NULL)) {
8284: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8285: "xmlParseStartTag: problem parsing attributes\n");
8286: break;
8287: }
8288: SHRINK;
8289: GROW;
8290: }
8291:
8292: /*
8293: * SAX: Start of Element !
8294: */
8295: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8296: (!ctxt->disableSAX)) {
8297: if (nbatts > 0)
8298: ctxt->sax->startElement(ctxt->userData, name, atts);
8299: else
8300: ctxt->sax->startElement(ctxt->userData, name, NULL);
8301: }
8302:
8303: if (atts != NULL) {
8304: /* Free only the content strings */
8305: for (i = 1;i < nbatts;i+=2)
8306: if (atts[i] != NULL)
8307: xmlFree((xmlChar *) atts[i]);
8308: }
8309: return(name);
8310: }
8311:
8312: /**
8313: * xmlParseEndTag1:
8314: * @ctxt: an XML parser context
8315: * @line: line of the start tag
8316: * @nsNr: number of namespaces on the start tag
8317: *
8318: * parse an end of tag
8319: *
8320: * [42] ETag ::= '</' Name S? '>'
8321: *
8322: * With namespace
8323: *
8324: * [NS 9] ETag ::= '</' QName S? '>'
8325: */
8326:
8327: static void
8328: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8329: const xmlChar *name;
8330:
8331: GROW;
8332: if ((RAW != '<') || (NXT(1) != '/')) {
8333: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8334: "xmlParseEndTag: '</' not found\n");
8335: return;
8336: }
8337: SKIP(2);
8338:
8339: name = xmlParseNameAndCompare(ctxt,ctxt->name);
8340:
8341: /*
8342: * We should definitely be at the ending "S? '>'" part
8343: */
8344: GROW;
8345: SKIP_BLANKS;
8346: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8347: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8348: } else
8349: NEXT1;
8350:
8351: /*
8352: * [ WFC: Element Type Match ]
8353: * The Name in an element's end-tag must match the element type in the
8354: * start-tag.
8355: *
8356: */
8357: if (name != (xmlChar*)1) {
8358: if (name == NULL) name = BAD_CAST "unparseable";
8359: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8360: "Opening and ending tag mismatch: %s line %d and %s\n",
8361: ctxt->name, line, name);
8362: }
8363:
8364: /*
8365: * SAX: End of Tag
8366: */
8367: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8368: (!ctxt->disableSAX))
8369: ctxt->sax->endElement(ctxt->userData, ctxt->name);
8370:
8371: namePop(ctxt);
8372: spacePop(ctxt);
8373: return;
8374: }
8375:
8376: /**
8377: * xmlParseEndTag:
8378: * @ctxt: an XML parser context
8379: *
8380: * parse an end of tag
8381: *
8382: * [42] ETag ::= '</' Name S? '>'
8383: *
8384: * With namespace
8385: *
8386: * [NS 9] ETag ::= '</' QName S? '>'
8387: */
8388:
8389: void
8390: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8391: xmlParseEndTag1(ctxt, 0);
8392: }
8393: #endif /* LIBXML_SAX1_ENABLED */
8394:
8395: /************************************************************************
8396: * *
8397: * SAX 2 specific operations *
8398: * *
8399: ************************************************************************/
8400:
8401: /*
8402: * xmlGetNamespace:
8403: * @ctxt: an XML parser context
8404: * @prefix: the prefix to lookup
8405: *
8406: * Lookup the namespace name for the @prefix (which ca be NULL)
8407: * The prefix must come from the @ctxt->dict dictionnary
8408: *
8409: * Returns the namespace name or NULL if not bound
8410: */
8411: static const xmlChar *
8412: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8413: int i;
8414:
8415: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8416: for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8417: if (ctxt->nsTab[i] == prefix) {
8418: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8419: return(NULL);
8420: return(ctxt->nsTab[i + 1]);
8421: }
8422: return(NULL);
8423: }
8424:
8425: /**
8426: * xmlParseQName:
8427: * @ctxt: an XML parser context
8428: * @prefix: pointer to store the prefix part
8429: *
8430: * parse an XML Namespace QName
8431: *
8432: * [6] QName ::= (Prefix ':')? LocalPart
8433: * [7] Prefix ::= NCName
8434: * [8] LocalPart ::= NCName
8435: *
8436: * Returns the Name parsed or NULL
8437: */
8438:
8439: static const xmlChar *
8440: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8441: const xmlChar *l, *p;
8442:
8443: GROW;
8444:
8445: l = xmlParseNCName(ctxt);
8446: if (l == NULL) {
8447: if (CUR == ':') {
8448: l = xmlParseName(ctxt);
8449: if (l != NULL) {
8450: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8451: "Failed to parse QName '%s'\n", l, NULL, NULL);
8452: *prefix = NULL;
8453: return(l);
8454: }
8455: }
8456: return(NULL);
8457: }
8458: if (CUR == ':') {
8459: NEXT;
8460: p = l;
8461: l = xmlParseNCName(ctxt);
8462: if (l == NULL) {
8463: xmlChar *tmp;
8464:
8465: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8466: "Failed to parse QName '%s:'\n", p, NULL, NULL);
8467: l = xmlParseNmtoken(ctxt);
8468: if (l == NULL)
8469: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8470: else {
8471: tmp = xmlBuildQName(l, p, NULL, 0);
8472: xmlFree((char *)l);
8473: }
8474: p = xmlDictLookup(ctxt->dict, tmp, -1);
8475: if (tmp != NULL) xmlFree(tmp);
8476: *prefix = NULL;
8477: return(p);
8478: }
8479: if (CUR == ':') {
8480: xmlChar *tmp;
8481:
8482: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8483: "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8484: NEXT;
8485: tmp = (xmlChar *) xmlParseName(ctxt);
8486: if (tmp != NULL) {
8487: tmp = xmlBuildQName(tmp, l, NULL, 0);
8488: l = xmlDictLookup(ctxt->dict, tmp, -1);
8489: if (tmp != NULL) xmlFree(tmp);
8490: *prefix = p;
8491: return(l);
8492: }
8493: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8494: l = xmlDictLookup(ctxt->dict, tmp, -1);
8495: if (tmp != NULL) xmlFree(tmp);
8496: *prefix = p;
8497: return(l);
8498: }
8499: *prefix = p;
8500: } else
8501: *prefix = NULL;
8502: return(l);
8503: }
8504:
8505: /**
8506: * xmlParseQNameAndCompare:
8507: * @ctxt: an XML parser context
8508: * @name: the localname
8509: * @prefix: the prefix, if any.
8510: *
8511: * parse an XML name and compares for match
8512: * (specialized for endtag parsing)
8513: *
8514: * Returns NULL for an illegal name, (xmlChar*) 1 for success
8515: * and the name for mismatch
8516: */
8517:
8518: static const xmlChar *
8519: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8520: xmlChar const *prefix) {
8521: const xmlChar *cmp;
8522: const xmlChar *in;
8523: const xmlChar *ret;
8524: const xmlChar *prefix2;
8525:
8526: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8527:
8528: GROW;
8529: in = ctxt->input->cur;
8530:
8531: cmp = prefix;
8532: while (*in != 0 && *in == *cmp) {
8533: ++in;
8534: ++cmp;
8535: }
8536: if ((*cmp == 0) && (*in == ':')) {
8537: in++;
8538: cmp = name;
8539: while (*in != 0 && *in == *cmp) {
8540: ++in;
8541: ++cmp;
8542: }
8543: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8544: /* success */
8545: ctxt->input->cur = in;
8546: return((const xmlChar*) 1);
8547: }
8548: }
8549: /*
8550: * all strings coms from the dictionary, equality can be done directly
8551: */
8552: ret = xmlParseQName (ctxt, &prefix2);
8553: if ((ret == name) && (prefix == prefix2))
8554: return((const xmlChar*) 1);
8555: return ret;
8556: }
8557:
8558: /**
8559: * xmlParseAttValueInternal:
8560: * @ctxt: an XML parser context
8561: * @len: attribute len result
8562: * @alloc: whether the attribute was reallocated as a new string
8563: * @normalize: if 1 then further non-CDATA normalization must be done
8564: *
8565: * parse a value for an attribute.
8566: * NOTE: if no normalization is needed, the routine will return pointers
8567: * directly from the data buffer.
8568: *
8569: * 3.3.3 Attribute-Value Normalization:
8570: * Before the value of an attribute is passed to the application or
8571: * checked for validity, the XML processor must normalize it as follows:
8572: * - a character reference is processed by appending the referenced
8573: * character to the attribute value
8574: * - an entity reference is processed by recursively processing the
8575: * replacement text of the entity
8576: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8577: * appending #x20 to the normalized value, except that only a single
8578: * #x20 is appended for a "#xD#xA" sequence that is part of an external
8579: * parsed entity or the literal entity value of an internal parsed entity
8580: * - other characters are processed by appending them to the normalized value
8581: * If the declared value is not CDATA, then the XML processor must further
8582: * process the normalized attribute value by discarding any leading and
8583: * trailing space (#x20) characters, and by replacing sequences of space
8584: * (#x20) characters by a single space (#x20) character.
8585: * All attributes for which no declaration has been read should be treated
8586: * by a non-validating parser as if declared CDATA.
8587: *
8588: * Returns the AttValue parsed or NULL. The value has to be freed by the
8589: * caller if it was copied, this can be detected by val[*len] == 0.
8590: */
8591:
8592: static xmlChar *
8593: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8594: int normalize)
8595: {
8596: xmlChar limit = 0;
8597: const xmlChar *in = NULL, *start, *end, *last;
8598: xmlChar *ret = NULL;
8599:
8600: GROW;
8601: in = (xmlChar *) CUR_PTR;
8602: if (*in != '"' && *in != '\'') {
8603: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8604: return (NULL);
8605: }
8606: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8607:
8608: /*
8609: * try to handle in this routine the most common case where no
8610: * allocation of a new string is required and where content is
8611: * pure ASCII.
8612: */
8613: limit = *in++;
8614: end = ctxt->input->end;
8615: start = in;
8616: if (in >= end) {
8617: const xmlChar *oldbase = ctxt->input->base;
8618: GROW;
8619: if (oldbase != ctxt->input->base) {
8620: long delta = ctxt->input->base - oldbase;
8621: start = start + delta;
8622: in = in + delta;
8623: }
8624: end = ctxt->input->end;
8625: }
8626: if (normalize) {
8627: /*
8628: * Skip any leading spaces
8629: */
8630: while ((in < end) && (*in != limit) &&
8631: ((*in == 0x20) || (*in == 0x9) ||
8632: (*in == 0xA) || (*in == 0xD))) {
8633: in++;
8634: start = in;
8635: if (in >= end) {
8636: const xmlChar *oldbase = ctxt->input->base;
8637: GROW;
8638: if (oldbase != ctxt->input->base) {
8639: long delta = ctxt->input->base - oldbase;
8640: start = start + delta;
8641: in = in + delta;
8642: }
8643: end = ctxt->input->end;
8644: }
8645: }
8646: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8647: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8648: if ((*in++ == 0x20) && (*in == 0x20)) break;
8649: if (in >= end) {
8650: const xmlChar *oldbase = ctxt->input->base;
8651: GROW;
8652: if (oldbase != ctxt->input->base) {
8653: long delta = ctxt->input->base - oldbase;
8654: start = start + delta;
8655: in = in + delta;
8656: }
8657: end = ctxt->input->end;
8658: }
8659: }
8660: last = in;
8661: /*
8662: * skip the trailing blanks
8663: */
8664: while ((last[-1] == 0x20) && (last > start)) last--;
8665: while ((in < end) && (*in != limit) &&
8666: ((*in == 0x20) || (*in == 0x9) ||
8667: (*in == 0xA) || (*in == 0xD))) {
8668: in++;
8669: if (in >= end) {
8670: const xmlChar *oldbase = ctxt->input->base;
8671: GROW;
8672: if (oldbase != ctxt->input->base) {
8673: long delta = ctxt->input->base - oldbase;
8674: start = start + delta;
8675: in = in + delta;
8676: last = last + delta;
8677: }
8678: end = ctxt->input->end;
8679: }
8680: }
8681: if (*in != limit) goto need_complex;
8682: } else {
8683: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8684: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8685: in++;
8686: if (in >= end) {
8687: const xmlChar *oldbase = ctxt->input->base;
8688: GROW;
8689: if (oldbase != ctxt->input->base) {
8690: long delta = ctxt->input->base - oldbase;
8691: start = start + delta;
8692: in = in + delta;
8693: }
8694: end = ctxt->input->end;
8695: }
8696: }
8697: last = in;
8698: if (*in != limit) goto need_complex;
8699: }
8700: in++;
8701: if (len != NULL) {
8702: *len = last - start;
8703: ret = (xmlChar *) start;
8704: } else {
8705: if (alloc) *alloc = 1;
8706: ret = xmlStrndup(start, last - start);
8707: }
8708: CUR_PTR = in;
8709: if (alloc) *alloc = 0;
8710: return ret;
8711: need_complex:
8712: if (alloc) *alloc = 1;
8713: return xmlParseAttValueComplex(ctxt, len, normalize);
8714: }
8715:
8716: /**
8717: * xmlParseAttribute2:
8718: * @ctxt: an XML parser context
8719: * @pref: the element prefix
8720: * @elem: the element name
8721: * @prefix: a xmlChar ** used to store the value of the attribute prefix
8722: * @value: a xmlChar ** used to store the value of the attribute
8723: * @len: an int * to save the length of the attribute
8724: * @alloc: an int * to indicate if the attribute was allocated
8725: *
8726: * parse an attribute in the new SAX2 framework.
8727: *
8728: * Returns the attribute name, and the value in *value, .
8729: */
8730:
8731: static const xmlChar *
8732: xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8733: const xmlChar * pref, const xmlChar * elem,
8734: const xmlChar ** prefix, xmlChar ** value,
8735: int *len, int *alloc)
8736: {
8737: const xmlChar *name;
8738: xmlChar *val, *internal_val = NULL;
8739: int normalize = 0;
8740:
8741: *value = NULL;
8742: GROW;
8743: name = xmlParseQName(ctxt, prefix);
8744: if (name == NULL) {
8745: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8746: "error parsing attribute name\n");
8747: return (NULL);
8748: }
8749:
8750: /*
8751: * get the type if needed
8752: */
8753: if (ctxt->attsSpecial != NULL) {
8754: int type;
8755:
8756: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8757: pref, elem, *prefix, name);
8758: if (type != 0)
8759: normalize = 1;
8760: }
8761:
8762: /*
8763: * read the value
8764: */
8765: SKIP_BLANKS;
8766: if (RAW == '=') {
8767: NEXT;
8768: SKIP_BLANKS;
8769: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8770: if (normalize) {
8771: /*
8772: * Sometimes a second normalisation pass for spaces is needed
8773: * but that only happens if charrefs or entities refernces
8774: * have been used in the attribute value, i.e. the attribute
8775: * value have been extracted in an allocated string already.
8776: */
8777: if (*alloc) {
8778: const xmlChar *val2;
8779:
8780: val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8781: if ((val2 != NULL) && (val2 != val)) {
8782: xmlFree(val);
8783: val = (xmlChar *) val2;
8784: }
8785: }
8786: }
8787: ctxt->instate = XML_PARSER_CONTENT;
8788: } else {
8789: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8790: "Specification mandate value for attribute %s\n",
8791: name);
8792: return (NULL);
8793: }
8794:
8795: if (*prefix == ctxt->str_xml) {
8796: /*
8797: * Check that xml:lang conforms to the specification
8798: * No more registered as an error, just generate a warning now
8799: * since this was deprecated in XML second edition
8800: */
8801: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8802: internal_val = xmlStrndup(val, *len);
8803: if (!xmlCheckLanguageID(internal_val)) {
8804: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8805: "Malformed value for xml:lang : %s\n",
8806: internal_val, NULL);
8807: }
8808: }
8809:
8810: /*
8811: * Check that xml:space conforms to the specification
8812: */
8813: if (xmlStrEqual(name, BAD_CAST "space")) {
8814: internal_val = xmlStrndup(val, *len);
8815: if (xmlStrEqual(internal_val, BAD_CAST "default"))
8816: *(ctxt->space) = 0;
8817: else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8818: *(ctxt->space) = 1;
8819: else {
8820: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8821: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8822: internal_val, NULL);
8823: }
8824: }
8825: if (internal_val) {
8826: xmlFree(internal_val);
8827: }
8828: }
8829:
8830: *value = val;
8831: return (name);
8832: }
8833: /**
8834: * xmlParseStartTag2:
8835: * @ctxt: an XML parser context
8836: *
8837: * parse a start of tag either for rule element or
8838: * EmptyElement. In both case we don't parse the tag closing chars.
8839: * This routine is called when running SAX2 parsing
8840: *
8841: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8842: *
8843: * [ WFC: Unique Att Spec ]
8844: * No attribute name may appear more than once in the same start-tag or
8845: * empty-element tag.
8846: *
8847: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8848: *
8849: * [ WFC: Unique Att Spec ]
8850: * No attribute name may appear more than once in the same start-tag or
8851: * empty-element tag.
8852: *
8853: * With namespace:
8854: *
8855: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8856: *
8857: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8858: *
8859: * Returns the element name parsed
8860: */
8861:
8862: static const xmlChar *
8863: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8864: const xmlChar **URI, int *tlen) {
8865: const xmlChar *localname;
8866: const xmlChar *prefix;
8867: const xmlChar *attname;
8868: const xmlChar *aprefix;
8869: const xmlChar *nsname;
8870: xmlChar *attvalue;
8871: const xmlChar **atts = ctxt->atts;
8872: int maxatts = ctxt->maxatts;
8873: int nratts, nbatts, nbdef;
8874: int i, j, nbNs, attval, oldline, oldcol;
8875: const xmlChar *base;
8876: unsigned long cur;
8877: int nsNr = ctxt->nsNr;
8878:
8879: if (RAW != '<') return(NULL);
8880: NEXT1;
8881:
8882: /*
8883: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8884: * point since the attribute values may be stored as pointers to
8885: * the buffer and calling SHRINK would destroy them !
8886: * The Shrinking is only possible once the full set of attribute
8887: * callbacks have been done.
8888: */
8889: reparse:
8890: SHRINK;
8891: base = ctxt->input->base;
8892: cur = ctxt->input->cur - ctxt->input->base;
8893: oldline = ctxt->input->line;
8894: oldcol = ctxt->input->col;
8895: nbatts = 0;
8896: nratts = 0;
8897: nbdef = 0;
8898: nbNs = 0;
8899: attval = 0;
8900: /* Forget any namespaces added during an earlier parse of this element. */
8901: ctxt->nsNr = nsNr;
8902:
8903: localname = xmlParseQName(ctxt, &prefix);
8904: if (localname == NULL) {
8905: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8906: "StartTag: invalid element name\n");
8907: return(NULL);
8908: }
8909: *tlen = ctxt->input->cur - ctxt->input->base - cur;
8910:
8911: /*
8912: * Now parse the attributes, it ends up with the ending
8913: *
8914: * (S Attribute)* S?
8915: */
8916: SKIP_BLANKS;
8917: GROW;
8918: if (ctxt->input->base != base) goto base_changed;
8919:
8920: while ((RAW != '>') &&
8921: ((RAW != '/') || (NXT(1) != '>')) &&
8922: (IS_BYTE_CHAR(RAW))) {
8923: const xmlChar *q = CUR_PTR;
8924: unsigned int cons = ctxt->input->consumed;
8925: int len = -1, alloc = 0;
8926:
8927: attname = xmlParseAttribute2(ctxt, prefix, localname,
8928: &aprefix, &attvalue, &len, &alloc);
8929: if (ctxt->input->base != base) {
8930: if ((attvalue != NULL) && (alloc != 0))
8931: xmlFree(attvalue);
8932: attvalue = NULL;
8933: goto base_changed;
8934: }
8935: if ((attname != NULL) && (attvalue != NULL)) {
8936: if (len < 0) len = xmlStrlen(attvalue);
8937: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8938: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8939: xmlURIPtr uri;
8940:
8941: if (*URL != 0) {
8942: uri = xmlParseURI((const char *) URL);
8943: if (uri == NULL) {
8944: xmlNsErr(ctxt, XML_WAR_NS_URI,
8945: "xmlns: '%s' is not a valid URI\n",
8946: URL, NULL, NULL);
8947: } else {
8948: if (uri->scheme == NULL) {
8949: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8950: "xmlns: URI %s is not absolute\n",
8951: URL, NULL, NULL);
8952: }
8953: xmlFreeURI(uri);
8954: }
8955: if (URL == ctxt->str_xml_ns) {
8956: if (attname != ctxt->str_xml) {
8957: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8958: "xml namespace URI cannot be the default namespace\n",
8959: NULL, NULL, NULL);
8960: }
8961: goto skip_default_ns;
8962: }
8963: if ((len == 29) &&
8964: (xmlStrEqual(URL,
8965: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8966: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967: "reuse of the xmlns namespace name is forbidden\n",
8968: NULL, NULL, NULL);
8969: goto skip_default_ns;
8970: }
8971: }
8972: /*
8973: * check that it's not a defined namespace
8974: */
8975: for (j = 1;j <= nbNs;j++)
8976: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8977: break;
8978: if (j <= nbNs)
8979: xmlErrAttributeDup(ctxt, NULL, attname);
8980: else
8981: if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8982: skip_default_ns:
8983: if (alloc != 0) xmlFree(attvalue);
8984: SKIP_BLANKS;
8985: continue;
8986: }
8987: if (aprefix == ctxt->str_xmlns) {
8988: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8989: xmlURIPtr uri;
8990:
8991: if (attname == ctxt->str_xml) {
8992: if (URL != ctxt->str_xml_ns) {
8993: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8994: "xml namespace prefix mapped to wrong URI\n",
8995: NULL, NULL, NULL);
8996: }
8997: /*
8998: * Do not keep a namespace definition node
8999: */
9000: goto skip_ns;
9001: }
9002: if (URL == ctxt->str_xml_ns) {
9003: if (attname != ctxt->str_xml) {
9004: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9005: "xml namespace URI mapped to wrong prefix\n",
9006: NULL, NULL, NULL);
9007: }
9008: goto skip_ns;
9009: }
9010: if (attname == ctxt->str_xmlns) {
9011: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9012: "redefinition of the xmlns prefix is forbidden\n",
9013: NULL, NULL, NULL);
9014: goto skip_ns;
9015: }
9016: if ((len == 29) &&
9017: (xmlStrEqual(URL,
9018: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9019: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9020: "reuse of the xmlns namespace name is forbidden\n",
9021: NULL, NULL, NULL);
9022: goto skip_ns;
9023: }
9024: if ((URL == NULL) || (URL[0] == 0)) {
9025: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9026: "xmlns:%s: Empty XML namespace is not allowed\n",
9027: attname, NULL, NULL);
9028: goto skip_ns;
9029: } else {
9030: uri = xmlParseURI((const char *) URL);
9031: if (uri == NULL) {
9032: xmlNsErr(ctxt, XML_WAR_NS_URI,
9033: "xmlns:%s: '%s' is not a valid URI\n",
9034: attname, URL, NULL);
9035: } else {
9036: if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9037: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9038: "xmlns:%s: URI %s is not absolute\n",
9039: attname, URL, NULL);
9040: }
9041: xmlFreeURI(uri);
9042: }
9043: }
9044:
9045: /*
9046: * check that it's not a defined namespace
9047: */
9048: for (j = 1;j <= nbNs;j++)
9049: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9050: break;
9051: if (j <= nbNs)
9052: xmlErrAttributeDup(ctxt, aprefix, attname);
9053: else
9054: if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9055: skip_ns:
9056: if (alloc != 0) xmlFree(attvalue);
9057: SKIP_BLANKS;
9058: if (ctxt->input->base != base) goto base_changed;
9059: continue;
9060: }
9061:
9062: /*
9063: * Add the pair to atts
9064: */
9065: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9066: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9067: if (attvalue[len] == 0)
9068: xmlFree(attvalue);
9069: goto failed;
9070: }
9071: maxatts = ctxt->maxatts;
9072: atts = ctxt->atts;
9073: }
9074: ctxt->attallocs[nratts++] = alloc;
9075: atts[nbatts++] = attname;
9076: atts[nbatts++] = aprefix;
9077: atts[nbatts++] = NULL; /* the URI will be fetched later */
9078: atts[nbatts++] = attvalue;
9079: attvalue += len;
9080: atts[nbatts++] = attvalue;
9081: /*
9082: * tag if some deallocation is needed
9083: */
9084: if (alloc != 0) attval = 1;
9085: } else {
9086: if ((attvalue != NULL) && (attvalue[len] == 0))
9087: xmlFree(attvalue);
9088: }
9089:
9090: failed:
9091:
9092: GROW
9093: if (ctxt->input->base != base) goto base_changed;
9094: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9095: break;
9096: if (!IS_BLANK_CH(RAW)) {
9097: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9098: "attributes construct error\n");
9099: break;
9100: }
9101: SKIP_BLANKS;
9102: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9103: (attname == NULL) && (attvalue == NULL)) {
9104: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9105: "xmlParseStartTag: problem parsing attributes\n");
9106: break;
9107: }
9108: GROW;
9109: if (ctxt->input->base != base) goto base_changed;
9110: }
9111:
9112: /*
9113: * The attributes defaulting
9114: */
9115: if (ctxt->attsDefault != NULL) {
9116: xmlDefAttrsPtr defaults;
9117:
9118: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9119: if (defaults != NULL) {
9120: for (i = 0;i < defaults->nbAttrs;i++) {
9121: attname = defaults->values[5 * i];
9122: aprefix = defaults->values[5 * i + 1];
9123:
9124: /*
9125: * special work for namespaces defaulted defs
9126: */
9127: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9128: /*
9129: * check that it's not a defined namespace
9130: */
9131: for (j = 1;j <= nbNs;j++)
9132: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9133: break;
9134: if (j <= nbNs) continue;
9135:
9136: nsname = xmlGetNamespace(ctxt, NULL);
9137: if (nsname != defaults->values[5 * i + 2]) {
9138: if (nsPush(ctxt, NULL,
9139: defaults->values[5 * i + 2]) > 0)
9140: nbNs++;
9141: }
9142: } else if (aprefix == ctxt->str_xmlns) {
9143: /*
9144: * check that it's not a defined namespace
9145: */
9146: for (j = 1;j <= nbNs;j++)
9147: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9148: break;
9149: if (j <= nbNs) continue;
9150:
9151: nsname = xmlGetNamespace(ctxt, attname);
9152: if (nsname != defaults->values[2]) {
9153: if (nsPush(ctxt, attname,
9154: defaults->values[5 * i + 2]) > 0)
9155: nbNs++;
9156: }
9157: } else {
9158: /*
9159: * check that it's not a defined attribute
9160: */
9161: for (j = 0;j < nbatts;j+=5) {
9162: if ((attname == atts[j]) && (aprefix == atts[j+1]))
9163: break;
9164: }
9165: if (j < nbatts) continue;
9166:
9167: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9168: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9169: return(NULL);
9170: }
9171: maxatts = ctxt->maxatts;
9172: atts = ctxt->atts;
9173: }
9174: atts[nbatts++] = attname;
9175: atts[nbatts++] = aprefix;
9176: if (aprefix == NULL)
9177: atts[nbatts++] = NULL;
9178: else
9179: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9180: atts[nbatts++] = defaults->values[5 * i + 2];
9181: atts[nbatts++] = defaults->values[5 * i + 3];
9182: if ((ctxt->standalone == 1) &&
9183: (defaults->values[5 * i + 4] != NULL)) {
9184: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9185: "standalone: attribute %s on %s defaulted from external subset\n",
9186: attname, localname);
9187: }
9188: nbdef++;
9189: }
9190: }
9191: }
9192: }
9193:
9194: /*
9195: * The attributes checkings
9196: */
9197: for (i = 0; i < nbatts;i += 5) {
9198: /*
9199: * The default namespace does not apply to attribute names.
9200: */
9201: if (atts[i + 1] != NULL) {
9202: nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9203: if (nsname == NULL) {
9204: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9205: "Namespace prefix %s for %s on %s is not defined\n",
9206: atts[i + 1], atts[i], localname);
9207: }
9208: atts[i + 2] = nsname;
9209: } else
9210: nsname = NULL;
9211: /*
9212: * [ WFC: Unique Att Spec ]
9213: * No attribute name may appear more than once in the same
9214: * start-tag or empty-element tag.
9215: * As extended by the Namespace in XML REC.
9216: */
9217: for (j = 0; j < i;j += 5) {
9218: if (atts[i] == atts[j]) {
9219: if (atts[i+1] == atts[j+1]) {
9220: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9221: break;
9222: }
9223: if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9224: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9225: "Namespaced Attribute %s in '%s' redefined\n",
9226: atts[i], nsname, NULL);
9227: break;
9228: }
9229: }
9230: }
9231: }
9232:
9233: nsname = xmlGetNamespace(ctxt, prefix);
9234: if ((prefix != NULL) && (nsname == NULL)) {
9235: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9236: "Namespace prefix %s on %s is not defined\n",
9237: prefix, localname, NULL);
9238: }
9239: *pref = prefix;
9240: *URI = nsname;
9241:
9242: /*
9243: * SAX: Start of Element !
9244: */
9245: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9246: (!ctxt->disableSAX)) {
9247: if (nbNs > 0)
9248: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9249: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9250: nbatts / 5, nbdef, atts);
9251: else
9252: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9253: nsname, 0, NULL, nbatts / 5, nbdef, atts);
9254: }
9255:
9256: /*
9257: * Free up attribute allocated strings if needed
9258: */
9259: if (attval != 0) {
9260: for (i = 3,j = 0; j < nratts;i += 5,j++)
9261: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9262: xmlFree((xmlChar *) atts[i]);
9263: }
9264:
9265: return(localname);
9266:
9267: base_changed:
9268: /*
9269: * the attribute strings are valid iif the base didn't changed
9270: */
9271: if (attval != 0) {
9272: for (i = 3,j = 0; j < nratts;i += 5,j++)
9273: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9274: xmlFree((xmlChar *) atts[i]);
9275: }
9276: ctxt->input->cur = ctxt->input->base + cur;
9277: ctxt->input->line = oldline;
9278: ctxt->input->col = oldcol;
9279: if (ctxt->wellFormed == 1) {
9280: goto reparse;
9281: }
9282: return(NULL);
9283: }
9284:
9285: /**
9286: * xmlParseEndTag2:
9287: * @ctxt: an XML parser context
9288: * @line: line of the start tag
9289: * @nsNr: number of namespaces on the start tag
9290: *
9291: * parse an end of tag
9292: *
9293: * [42] ETag ::= '</' Name S? '>'
9294: *
9295: * With namespace
9296: *
9297: * [NS 9] ETag ::= '</' QName S? '>'
9298: */
9299:
9300: static void
9301: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9302: const xmlChar *URI, int line, int nsNr, int tlen) {
9303: const xmlChar *name;
9304:
9305: GROW;
9306: if ((RAW != '<') || (NXT(1) != '/')) {
9307: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9308: return;
9309: }
9310: SKIP(2);
9311:
9312: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9313: if (ctxt->input->cur[tlen] == '>') {
9314: ctxt->input->cur += tlen + 1;
9315: goto done;
9316: }
9317: ctxt->input->cur += tlen;
9318: name = (xmlChar*)1;
9319: } else {
9320: if (prefix == NULL)
9321: name = xmlParseNameAndCompare(ctxt, ctxt->name);
9322: else
9323: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9324: }
9325:
9326: /*
9327: * We should definitely be at the ending "S? '>'" part
9328: */
9329: GROW;
9330: SKIP_BLANKS;
9331: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9332: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9333: } else
9334: NEXT1;
9335:
9336: /*
9337: * [ WFC: Element Type Match ]
9338: * The Name in an element's end-tag must match the element type in the
9339: * start-tag.
9340: *
9341: */
9342: if (name != (xmlChar*)1) {
9343: if (name == NULL) name = BAD_CAST "unparseable";
9344: if ((line == 0) && (ctxt->node != NULL))
9345: line = ctxt->node->line;
9346: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9347: "Opening and ending tag mismatch: %s line %d and %s\n",
9348: ctxt->name, line, name);
9349: }
9350:
9351: /*
9352: * SAX: End of Tag
9353: */
9354: done:
9355: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9356: (!ctxt->disableSAX))
9357: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9358:
9359: spacePop(ctxt);
9360: if (nsNr != 0)
9361: nsPop(ctxt, nsNr);
9362: return;
9363: }
9364:
9365: /**
9366: * xmlParseCDSect:
9367: * @ctxt: an XML parser context
9368: *
9369: * Parse escaped pure raw content.
9370: *
9371: * [18] CDSect ::= CDStart CData CDEnd
9372: *
9373: * [19] CDStart ::= '<![CDATA['
9374: *
9375: * [20] Data ::= (Char* - (Char* ']]>' Char*))
9376: *
9377: * [21] CDEnd ::= ']]>'
9378: */
9379: void
9380: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9381: xmlChar *buf = NULL;
9382: int len = 0;
9383: int size = XML_PARSER_BUFFER_SIZE;
9384: int r, rl;
9385: int s, sl;
9386: int cur, l;
9387: int count = 0;
9388:
9389: /* Check 2.6.0 was NXT(0) not RAW */
9390: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9391: SKIP(9);
9392: } else
9393: return;
9394:
9395: ctxt->instate = XML_PARSER_CDATA_SECTION;
9396: r = CUR_CHAR(rl);
9397: if (!IS_CHAR(r)) {
9398: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9399: ctxt->instate = XML_PARSER_CONTENT;
9400: return;
9401: }
9402: NEXTL(rl);
9403: s = CUR_CHAR(sl);
9404: if (!IS_CHAR(s)) {
9405: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9406: ctxt->instate = XML_PARSER_CONTENT;
9407: return;
9408: }
9409: NEXTL(sl);
9410: cur = CUR_CHAR(l);
9411: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9412: if (buf == NULL) {
9413: xmlErrMemory(ctxt, NULL);
9414: return;
9415: }
9416: while (IS_CHAR(cur) &&
9417: ((r != ']') || (s != ']') || (cur != '>'))) {
9418: if (len + 5 >= size) {
9419: xmlChar *tmp;
9420:
9421: size *= 2;
9422: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9423: if (tmp == NULL) {
9424: xmlFree(buf);
9425: xmlErrMemory(ctxt, NULL);
9426: return;
9427: }
9428: buf = tmp;
9429: }
9430: COPY_BUF(rl,buf,len,r);
9431: r = s;
9432: rl = sl;
9433: s = cur;
9434: sl = l;
9435: count++;
9436: if (count > 50) {
9437: GROW;
9438: count = 0;
9439: }
9440: NEXTL(l);
9441: cur = CUR_CHAR(l);
9442: }
9443: buf[len] = 0;
9444: ctxt->instate = XML_PARSER_CONTENT;
9445: if (cur != '>') {
9446: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9447: "CData section not finished\n%.50s\n", buf);
9448: xmlFree(buf);
9449: return;
9450: }
9451: NEXTL(l);
9452:
9453: /*
9454: * OK the buffer is to be consumed as cdata.
9455: */
9456: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9457: if (ctxt->sax->cdataBlock != NULL)
9458: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9459: else if (ctxt->sax->characters != NULL)
9460: ctxt->sax->characters(ctxt->userData, buf, len);
9461: }
9462: xmlFree(buf);
9463: }
9464:
9465: /**
9466: * xmlParseContent:
9467: * @ctxt: an XML parser context
9468: *
9469: * Parse a content:
9470: *
9471: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9472: */
9473:
9474: void
9475: xmlParseContent(xmlParserCtxtPtr ctxt) {
9476: GROW;
9477: while ((RAW != 0) &&
9478: ((RAW != '<') || (NXT(1) != '/')) &&
9479: (ctxt->instate != XML_PARSER_EOF)) {
9480: const xmlChar *test = CUR_PTR;
9481: unsigned int cons = ctxt->input->consumed;
9482: const xmlChar *cur = ctxt->input->cur;
9483:
9484: /*
9485: * First case : a Processing Instruction.
9486: */
9487: if ((*cur == '<') && (cur[1] == '?')) {
9488: xmlParsePI(ctxt);
9489: }
9490:
9491: /*
9492: * Second case : a CDSection
9493: */
9494: /* 2.6.0 test was *cur not RAW */
9495: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9496: xmlParseCDSect(ctxt);
9497: }
9498:
9499: /*
9500: * Third case : a comment
9501: */
9502: else if ((*cur == '<') && (NXT(1) == '!') &&
9503: (NXT(2) == '-') && (NXT(3) == '-')) {
9504: xmlParseComment(ctxt);
9505: ctxt->instate = XML_PARSER_CONTENT;
9506: }
9507:
9508: /*
9509: * Fourth case : a sub-element.
9510: */
9511: else if (*cur == '<') {
9512: xmlParseElement(ctxt);
9513: }
9514:
9515: /*
9516: * Fifth case : a reference. If if has not been resolved,
9517: * parsing returns it's Name, create the node
9518: */
9519:
9520: else if (*cur == '&') {
9521: xmlParseReference(ctxt);
9522: }
9523:
9524: /*
9525: * Last case, text. Note that References are handled directly.
9526: */
9527: else {
9528: xmlParseCharData(ctxt, 0);
9529: }
9530:
9531: GROW;
9532: /*
9533: * Pop-up of finished entities.
9534: */
9535: while ((RAW == 0) && (ctxt->inputNr > 1))
9536: xmlPopInput(ctxt);
9537: SHRINK;
9538:
9539: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9540: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9541: "detected an error in element content\n");
9542: ctxt->instate = XML_PARSER_EOF;
9543: break;
9544: }
9545: }
9546: }
9547:
9548: /**
9549: * xmlParseElement:
9550: * @ctxt: an XML parser context
9551: *
9552: * parse an XML element, this is highly recursive
9553: *
9554: * [39] element ::= EmptyElemTag | STag content ETag
9555: *
9556: * [ WFC: Element Type Match ]
9557: * The Name in an element's end-tag must match the element type in the
9558: * start-tag.
9559: *
9560: */
9561:
9562: void
9563: xmlParseElement(xmlParserCtxtPtr ctxt) {
9564: const xmlChar *name;
9565: const xmlChar *prefix = NULL;
9566: const xmlChar *URI = NULL;
9567: xmlParserNodeInfo node_info;
1.1.1.2 ! misho 9568: int line, tlen = 0;
1.1 misho 9569: xmlNodePtr ret;
9570: int nsNr = ctxt->nsNr;
9571:
9572: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9573: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9574: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9575: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9576: xmlParserMaxDepth);
9577: ctxt->instate = XML_PARSER_EOF;
9578: return;
9579: }
9580:
9581: /* Capture start position */
9582: if (ctxt->record_info) {
9583: node_info.begin_pos = ctxt->input->consumed +
9584: (CUR_PTR - ctxt->input->base);
9585: node_info.begin_line = ctxt->input->line;
9586: }
9587:
9588: if (ctxt->spaceNr == 0)
9589: spacePush(ctxt, -1);
9590: else if (*ctxt->space == -2)
9591: spacePush(ctxt, -1);
9592: else
9593: spacePush(ctxt, *ctxt->space);
9594:
9595: line = ctxt->input->line;
9596: #ifdef LIBXML_SAX1_ENABLED
9597: if (ctxt->sax2)
9598: #endif /* LIBXML_SAX1_ENABLED */
9599: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9600: #ifdef LIBXML_SAX1_ENABLED
9601: else
9602: name = xmlParseStartTag(ctxt);
9603: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 ! misho 9604: if (ctxt->instate == XML_PARSER_EOF)
! 9605: return;
1.1 misho 9606: if (name == NULL) {
9607: spacePop(ctxt);
9608: return;
9609: }
9610: namePush(ctxt, name);
9611: ret = ctxt->node;
9612:
9613: #ifdef LIBXML_VALID_ENABLED
9614: /*
9615: * [ VC: Root Element Type ]
9616: * The Name in the document type declaration must match the element
9617: * type of the root element.
9618: */
9619: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9620: ctxt->node && (ctxt->node == ctxt->myDoc->children))
9621: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9622: #endif /* LIBXML_VALID_ENABLED */
9623:
9624: /*
9625: * Check for an Empty Element.
9626: */
9627: if ((RAW == '/') && (NXT(1) == '>')) {
9628: SKIP(2);
9629: if (ctxt->sax2) {
9630: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9631: (!ctxt->disableSAX))
9632: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9633: #ifdef LIBXML_SAX1_ENABLED
9634: } else {
9635: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9636: (!ctxt->disableSAX))
9637: ctxt->sax->endElement(ctxt->userData, name);
9638: #endif /* LIBXML_SAX1_ENABLED */
9639: }
9640: namePop(ctxt);
9641: spacePop(ctxt);
9642: if (nsNr != ctxt->nsNr)
9643: nsPop(ctxt, ctxt->nsNr - nsNr);
9644: if ( ret != NULL && ctxt->record_info ) {
9645: node_info.end_pos = ctxt->input->consumed +
9646: (CUR_PTR - ctxt->input->base);
9647: node_info.end_line = ctxt->input->line;
9648: node_info.node = ret;
9649: xmlParserAddNodeInfo(ctxt, &node_info);
9650: }
9651: return;
9652: }
9653: if (RAW == '>') {
9654: NEXT1;
9655: } else {
9656: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9657: "Couldn't find end of Start Tag %s line %d\n",
9658: name, line, NULL);
9659:
9660: /*
9661: * end of parsing of this node.
9662: */
9663: nodePop(ctxt);
9664: namePop(ctxt);
9665: spacePop(ctxt);
9666: if (nsNr != ctxt->nsNr)
9667: nsPop(ctxt, ctxt->nsNr - nsNr);
9668:
9669: /*
9670: * Capture end position and add node
9671: */
9672: if ( ret != NULL && ctxt->record_info ) {
9673: node_info.end_pos = ctxt->input->consumed +
9674: (CUR_PTR - ctxt->input->base);
9675: node_info.end_line = ctxt->input->line;
9676: node_info.node = ret;
9677: xmlParserAddNodeInfo(ctxt, &node_info);
9678: }
9679: return;
9680: }
9681:
9682: /*
9683: * Parse the content of the element:
9684: */
9685: xmlParseContent(ctxt);
9686: if (!IS_BYTE_CHAR(RAW)) {
9687: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9688: "Premature end of data in tag %s line %d\n",
9689: name, line, NULL);
9690:
9691: /*
9692: * end of parsing of this node.
9693: */
9694: nodePop(ctxt);
9695: namePop(ctxt);
9696: spacePop(ctxt);
9697: if (nsNr != ctxt->nsNr)
9698: nsPop(ctxt, ctxt->nsNr - nsNr);
9699: return;
9700: }
9701:
9702: /*
9703: * parse the end of tag: '</' should be here.
9704: */
9705: if (ctxt->sax2) {
9706: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9707: namePop(ctxt);
9708: }
9709: #ifdef LIBXML_SAX1_ENABLED
9710: else
9711: xmlParseEndTag1(ctxt, line);
9712: #endif /* LIBXML_SAX1_ENABLED */
9713:
9714: /*
9715: * Capture end position and add node
9716: */
9717: if ( ret != NULL && ctxt->record_info ) {
9718: node_info.end_pos = ctxt->input->consumed +
9719: (CUR_PTR - ctxt->input->base);
9720: node_info.end_line = ctxt->input->line;
9721: node_info.node = ret;
9722: xmlParserAddNodeInfo(ctxt, &node_info);
9723: }
9724: }
9725:
9726: /**
9727: * xmlParseVersionNum:
9728: * @ctxt: an XML parser context
9729: *
9730: * parse the XML version value.
9731: *
9732: * [26] VersionNum ::= '1.' [0-9]+
9733: *
9734: * In practice allow [0-9].[0-9]+ at that level
9735: *
9736: * Returns the string giving the XML version number, or NULL
9737: */
9738: xmlChar *
9739: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9740: xmlChar *buf = NULL;
9741: int len = 0;
9742: int size = 10;
9743: xmlChar cur;
9744:
9745: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9746: if (buf == NULL) {
9747: xmlErrMemory(ctxt, NULL);
9748: return(NULL);
9749: }
9750: cur = CUR;
9751: if (!((cur >= '0') && (cur <= '9'))) {
9752: xmlFree(buf);
9753: return(NULL);
9754: }
9755: buf[len++] = cur;
9756: NEXT;
9757: cur=CUR;
9758: if (cur != '.') {
9759: xmlFree(buf);
9760: return(NULL);
9761: }
9762: buf[len++] = cur;
9763: NEXT;
9764: cur=CUR;
9765: while ((cur >= '0') && (cur <= '9')) {
9766: if (len + 1 >= size) {
9767: xmlChar *tmp;
9768:
9769: size *= 2;
9770: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9771: if (tmp == NULL) {
9772: xmlFree(buf);
9773: xmlErrMemory(ctxt, NULL);
9774: return(NULL);
9775: }
9776: buf = tmp;
9777: }
9778: buf[len++] = cur;
9779: NEXT;
9780: cur=CUR;
9781: }
9782: buf[len] = 0;
9783: return(buf);
9784: }
9785:
9786: /**
9787: * xmlParseVersionInfo:
9788: * @ctxt: an XML parser context
9789: *
9790: * parse the XML version.
9791: *
9792: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9793: *
9794: * [25] Eq ::= S? '=' S?
9795: *
9796: * Returns the version string, e.g. "1.0"
9797: */
9798:
9799: xmlChar *
9800: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9801: xmlChar *version = NULL;
9802:
9803: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9804: SKIP(7);
9805: SKIP_BLANKS;
9806: if (RAW != '=') {
9807: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9808: return(NULL);
9809: }
9810: NEXT;
9811: SKIP_BLANKS;
9812: if (RAW == '"') {
9813: NEXT;
9814: version = xmlParseVersionNum(ctxt);
9815: if (RAW != '"') {
9816: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9817: } else
9818: NEXT;
9819: } else if (RAW == '\''){
9820: NEXT;
9821: version = xmlParseVersionNum(ctxt);
9822: if (RAW != '\'') {
9823: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9824: } else
9825: NEXT;
9826: } else {
9827: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9828: }
9829: }
9830: return(version);
9831: }
9832:
9833: /**
9834: * xmlParseEncName:
9835: * @ctxt: an XML parser context
9836: *
9837: * parse the XML encoding name
9838: *
9839: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9840: *
9841: * Returns the encoding name value or NULL
9842: */
9843: xmlChar *
9844: xmlParseEncName(xmlParserCtxtPtr ctxt) {
9845: xmlChar *buf = NULL;
9846: int len = 0;
9847: int size = 10;
9848: xmlChar cur;
9849:
9850: cur = CUR;
9851: if (((cur >= 'a') && (cur <= 'z')) ||
9852: ((cur >= 'A') && (cur <= 'Z'))) {
9853: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9854: if (buf == NULL) {
9855: xmlErrMemory(ctxt, NULL);
9856: return(NULL);
9857: }
9858:
9859: buf[len++] = cur;
9860: NEXT;
9861: cur = CUR;
9862: while (((cur >= 'a') && (cur <= 'z')) ||
9863: ((cur >= 'A') && (cur <= 'Z')) ||
9864: ((cur >= '0') && (cur <= '9')) ||
9865: (cur == '.') || (cur == '_') ||
9866: (cur == '-')) {
9867: if (len + 1 >= size) {
9868: xmlChar *tmp;
9869:
9870: size *= 2;
9871: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9872: if (tmp == NULL) {
9873: xmlErrMemory(ctxt, NULL);
9874: xmlFree(buf);
9875: return(NULL);
9876: }
9877: buf = tmp;
9878: }
9879: buf[len++] = cur;
9880: NEXT;
9881: cur = CUR;
9882: if (cur == 0) {
9883: SHRINK;
9884: GROW;
9885: cur = CUR;
9886: }
9887: }
9888: buf[len] = 0;
9889: } else {
9890: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9891: }
9892: return(buf);
9893: }
9894:
9895: /**
9896: * xmlParseEncodingDecl:
9897: * @ctxt: an XML parser context
9898: *
9899: * parse the XML encoding declaration
9900: *
9901: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9902: *
9903: * this setups the conversion filters.
9904: *
9905: * Returns the encoding value or NULL
9906: */
9907:
9908: const xmlChar *
9909: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9910: xmlChar *encoding = NULL;
9911:
9912: SKIP_BLANKS;
9913: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9914: SKIP(8);
9915: SKIP_BLANKS;
9916: if (RAW != '=') {
9917: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9918: return(NULL);
9919: }
9920: NEXT;
9921: SKIP_BLANKS;
9922: if (RAW == '"') {
9923: NEXT;
9924: encoding = xmlParseEncName(ctxt);
9925: if (RAW != '"') {
9926: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9927: } else
9928: NEXT;
9929: } else if (RAW == '\''){
9930: NEXT;
9931: encoding = xmlParseEncName(ctxt);
9932: if (RAW != '\'') {
9933: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9934: } else
9935: NEXT;
9936: } else {
9937: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9938: }
1.1.1.2 ! misho 9939:
! 9940: /*
! 9941: * Non standard parsing, allowing the user to ignore encoding
! 9942: */
! 9943: if (ctxt->options & XML_PARSE_IGNORE_ENC)
! 9944: return(encoding);
! 9945:
1.1 misho 9946: /*
9947: * UTF-16 encoding stwich has already taken place at this stage,
9948: * more over the little-endian/big-endian selection is already done
9949: */
9950: if ((encoding != NULL) &&
9951: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9952: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9953: /*
9954: * If no encoding was passed to the parser, that we are
9955: * using UTF-16 and no decoder is present i.e. the
9956: * document is apparently UTF-8 compatible, then raise an
9957: * encoding mismatch fatal error
9958: */
9959: if ((ctxt->encoding == NULL) &&
9960: (ctxt->input->buf != NULL) &&
9961: (ctxt->input->buf->encoder == NULL)) {
9962: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9963: "Document labelled UTF-16 but has UTF-8 content\n");
9964: }
9965: if (ctxt->encoding != NULL)
9966: xmlFree((xmlChar *) ctxt->encoding);
9967: ctxt->encoding = encoding;
9968: }
9969: /*
9970: * UTF-8 encoding is handled natively
9971: */
9972: else if ((encoding != NULL) &&
9973: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9974: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9975: if (ctxt->encoding != NULL)
9976: xmlFree((xmlChar *) ctxt->encoding);
9977: ctxt->encoding = encoding;
9978: }
9979: else if (encoding != NULL) {
9980: xmlCharEncodingHandlerPtr handler;
9981:
9982: if (ctxt->input->encoding != NULL)
9983: xmlFree((xmlChar *) ctxt->input->encoding);
9984: ctxt->input->encoding = encoding;
9985:
9986: handler = xmlFindCharEncodingHandler((const char *) encoding);
9987: if (handler != NULL) {
9988: xmlSwitchToEncoding(ctxt, handler);
9989: } else {
9990: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9991: "Unsupported encoding %s\n", encoding);
9992: return(NULL);
9993: }
9994: }
9995: }
9996: return(encoding);
9997: }
9998:
9999: /**
10000: * xmlParseSDDecl:
10001: * @ctxt: an XML parser context
10002: *
10003: * parse the XML standalone declaration
10004: *
10005: * [32] SDDecl ::= S 'standalone' Eq
10006: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10007: *
10008: * [ VC: Standalone Document Declaration ]
10009: * TODO The standalone document declaration must have the value "no"
10010: * if any external markup declarations contain declarations of:
10011: * - attributes with default values, if elements to which these
10012: * attributes apply appear in the document without specifications
10013: * of values for these attributes, or
10014: * - entities (other than amp, lt, gt, apos, quot), if references
10015: * to those entities appear in the document, or
10016: * - attributes with values subject to normalization, where the
10017: * attribute appears in the document with a value which will change
10018: * as a result of normalization, or
10019: * - element types with element content, if white space occurs directly
10020: * within any instance of those types.
10021: *
10022: * Returns:
10023: * 1 if standalone="yes"
10024: * 0 if standalone="no"
10025: * -2 if standalone attribute is missing or invalid
10026: * (A standalone value of -2 means that the XML declaration was found,
10027: * but no value was specified for the standalone attribute).
10028: */
10029:
10030: int
10031: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10032: int standalone = -2;
10033:
10034: SKIP_BLANKS;
10035: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10036: SKIP(10);
10037: SKIP_BLANKS;
10038: if (RAW != '=') {
10039: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10040: return(standalone);
10041: }
10042: NEXT;
10043: SKIP_BLANKS;
10044: if (RAW == '\''){
10045: NEXT;
10046: if ((RAW == 'n') && (NXT(1) == 'o')) {
10047: standalone = 0;
10048: SKIP(2);
10049: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10050: (NXT(2) == 's')) {
10051: standalone = 1;
10052: SKIP(3);
10053: } else {
10054: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10055: }
10056: if (RAW != '\'') {
10057: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10058: } else
10059: NEXT;
10060: } else if (RAW == '"'){
10061: NEXT;
10062: if ((RAW == 'n') && (NXT(1) == 'o')) {
10063: standalone = 0;
10064: SKIP(2);
10065: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10066: (NXT(2) == 's')) {
10067: standalone = 1;
10068: SKIP(3);
10069: } else {
10070: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10071: }
10072: if (RAW != '"') {
10073: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10074: } else
10075: NEXT;
10076: } else {
10077: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10078: }
10079: }
10080: return(standalone);
10081: }
10082:
10083: /**
10084: * xmlParseXMLDecl:
10085: * @ctxt: an XML parser context
10086: *
10087: * parse an XML declaration header
10088: *
10089: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10090: */
10091:
10092: void
10093: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10094: xmlChar *version;
10095:
10096: /*
10097: * This value for standalone indicates that the document has an
10098: * XML declaration but it does not have a standalone attribute.
10099: * It will be overwritten later if a standalone attribute is found.
10100: */
10101: ctxt->input->standalone = -2;
10102:
10103: /*
10104: * We know that '<?xml' is here.
10105: */
10106: SKIP(5);
10107:
10108: if (!IS_BLANK_CH(RAW)) {
10109: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10110: "Blank needed after '<?xml'\n");
10111: }
10112: SKIP_BLANKS;
10113:
10114: /*
10115: * We must have the VersionInfo here.
10116: */
10117: version = xmlParseVersionInfo(ctxt);
10118: if (version == NULL) {
10119: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10120: } else {
10121: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10122: /*
10123: * Changed here for XML-1.0 5th edition
10124: */
10125: if (ctxt->options & XML_PARSE_OLD10) {
10126: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10127: "Unsupported version '%s'\n",
10128: version);
10129: } else {
10130: if ((version[0] == '1') && ((version[1] == '.'))) {
10131: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10132: "Unsupported version '%s'\n",
10133: version, NULL);
10134: } else {
10135: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10136: "Unsupported version '%s'\n",
10137: version);
10138: }
10139: }
10140: }
10141: if (ctxt->version != NULL)
10142: xmlFree((void *) ctxt->version);
10143: ctxt->version = version;
10144: }
10145:
10146: /*
10147: * We may have the encoding declaration
10148: */
10149: if (!IS_BLANK_CH(RAW)) {
10150: if ((RAW == '?') && (NXT(1) == '>')) {
10151: SKIP(2);
10152: return;
10153: }
10154: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10155: }
10156: xmlParseEncodingDecl(ctxt);
10157: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10158: /*
10159: * The XML REC instructs us to stop parsing right here
10160: */
10161: return;
10162: }
10163:
10164: /*
10165: * We may have the standalone status.
10166: */
10167: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10168: if ((RAW == '?') && (NXT(1) == '>')) {
10169: SKIP(2);
10170: return;
10171: }
10172: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10173: }
10174:
10175: /*
10176: * We can grow the input buffer freely at that point
10177: */
10178: GROW;
10179:
10180: SKIP_BLANKS;
10181: ctxt->input->standalone = xmlParseSDDecl(ctxt);
10182:
10183: SKIP_BLANKS;
10184: if ((RAW == '?') && (NXT(1) == '>')) {
10185: SKIP(2);
10186: } else if (RAW == '>') {
10187: /* Deprecated old WD ... */
10188: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10189: NEXT;
10190: } else {
10191: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10192: MOVETO_ENDTAG(CUR_PTR);
10193: NEXT;
10194: }
10195: }
10196:
10197: /**
10198: * xmlParseMisc:
10199: * @ctxt: an XML parser context
10200: *
10201: * parse an XML Misc* optional field.
10202: *
10203: * [27] Misc ::= Comment | PI | S
10204: */
10205:
10206: void
10207: xmlParseMisc(xmlParserCtxtPtr ctxt) {
10208: while (((RAW == '<') && (NXT(1) == '?')) ||
10209: (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10210: IS_BLANK_CH(CUR)) {
10211: if ((RAW == '<') && (NXT(1) == '?')) {
10212: xmlParsePI(ctxt);
10213: } else if (IS_BLANK_CH(CUR)) {
10214: NEXT;
10215: } else
10216: xmlParseComment(ctxt);
10217: }
10218: }
10219:
10220: /**
10221: * xmlParseDocument:
10222: * @ctxt: an XML parser context
10223: *
10224: * parse an XML document (and build a tree if using the standard SAX
10225: * interface).
10226: *
10227: * [1] document ::= prolog element Misc*
10228: *
10229: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10230: *
10231: * Returns 0, -1 in case of error. the parser context is augmented
10232: * as a result of the parsing.
10233: */
10234:
10235: int
10236: xmlParseDocument(xmlParserCtxtPtr ctxt) {
10237: xmlChar start[4];
10238: xmlCharEncoding enc;
10239:
10240: xmlInitParser();
10241:
10242: if ((ctxt == NULL) || (ctxt->input == NULL))
10243: return(-1);
10244:
10245: GROW;
10246:
10247: /*
10248: * SAX: detecting the level.
10249: */
10250: xmlDetectSAX2(ctxt);
10251:
10252: /*
10253: * SAX: beginning of the document processing.
10254: */
10255: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10256: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10257:
10258: if ((ctxt->encoding == NULL) &&
10259: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10260: /*
10261: * Get the 4 first bytes and decode the charset
10262: * if enc != XML_CHAR_ENCODING_NONE
10263: * plug some encoding conversion routines.
10264: */
10265: start[0] = RAW;
10266: start[1] = NXT(1);
10267: start[2] = NXT(2);
10268: start[3] = NXT(3);
10269: enc = xmlDetectCharEncoding(&start[0], 4);
10270: if (enc != XML_CHAR_ENCODING_NONE) {
10271: xmlSwitchEncoding(ctxt, enc);
10272: }
10273: }
10274:
10275:
10276: if (CUR == 0) {
10277: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10278: }
10279:
10280: /*
10281: * Check for the XMLDecl in the Prolog.
10282: * do not GROW here to avoid the detected encoder to decode more
10283: * than just the first line, unless the amount of data is really
10284: * too small to hold "<?xml version="1.0" encoding="foo"
10285: */
10286: if ((ctxt->input->end - ctxt->input->cur) < 35) {
10287: GROW;
10288: }
10289: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10290:
10291: /*
10292: * Note that we will switch encoding on the fly.
10293: */
10294: xmlParseXMLDecl(ctxt);
10295: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10296: /*
10297: * The XML REC instructs us to stop parsing right here
10298: */
10299: return(-1);
10300: }
10301: ctxt->standalone = ctxt->input->standalone;
10302: SKIP_BLANKS;
10303: } else {
10304: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10305: }
10306: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10307: ctxt->sax->startDocument(ctxt->userData);
10308:
10309: /*
10310: * The Misc part of the Prolog
10311: */
10312: GROW;
10313: xmlParseMisc(ctxt);
10314:
10315: /*
10316: * Then possibly doc type declaration(s) and more Misc
10317: * (doctypedecl Misc*)?
10318: */
10319: GROW;
10320: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10321:
10322: ctxt->inSubset = 1;
10323: xmlParseDocTypeDecl(ctxt);
10324: if (RAW == '[') {
10325: ctxt->instate = XML_PARSER_DTD;
10326: xmlParseInternalSubset(ctxt);
10327: }
10328:
10329: /*
10330: * Create and update the external subset.
10331: */
10332: ctxt->inSubset = 2;
10333: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10334: (!ctxt->disableSAX))
10335: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10336: ctxt->extSubSystem, ctxt->extSubURI);
10337: ctxt->inSubset = 0;
10338:
10339: xmlCleanSpecialAttr(ctxt);
10340:
10341: ctxt->instate = XML_PARSER_PROLOG;
10342: xmlParseMisc(ctxt);
10343: }
10344:
10345: /*
10346: * Time to start parsing the tree itself
10347: */
10348: GROW;
10349: if (RAW != '<') {
10350: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10351: "Start tag expected, '<' not found\n");
10352: } else {
10353: ctxt->instate = XML_PARSER_CONTENT;
10354: xmlParseElement(ctxt);
10355: ctxt->instate = XML_PARSER_EPILOG;
10356:
10357:
10358: /*
10359: * The Misc part at the end
10360: */
10361: xmlParseMisc(ctxt);
10362:
10363: if (RAW != 0) {
10364: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10365: }
10366: ctxt->instate = XML_PARSER_EOF;
10367: }
10368:
10369: /*
10370: * SAX: end of the document processing.
10371: */
10372: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10373: ctxt->sax->endDocument(ctxt->userData);
10374:
10375: /*
10376: * Remove locally kept entity definitions if the tree was not built
10377: */
10378: if ((ctxt->myDoc != NULL) &&
10379: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10380: xmlFreeDoc(ctxt->myDoc);
10381: ctxt->myDoc = NULL;
10382: }
10383:
10384: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10385: ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10386: if (ctxt->valid)
10387: ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10388: if (ctxt->nsWellFormed)
10389: ctxt->myDoc->properties |= XML_DOC_NSVALID;
10390: if (ctxt->options & XML_PARSE_OLD10)
10391: ctxt->myDoc->properties |= XML_DOC_OLD10;
10392: }
10393: if (! ctxt->wellFormed) {
10394: ctxt->valid = 0;
10395: return(-1);
10396: }
10397: return(0);
10398: }
10399:
10400: /**
10401: * xmlParseExtParsedEnt:
10402: * @ctxt: an XML parser context
10403: *
10404: * parse a general parsed entity
10405: * An external general parsed entity is well-formed if it matches the
10406: * production labeled extParsedEnt.
10407: *
10408: * [78] extParsedEnt ::= TextDecl? content
10409: *
10410: * Returns 0, -1 in case of error. the parser context is augmented
10411: * as a result of the parsing.
10412: */
10413:
10414: int
10415: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10416: xmlChar start[4];
10417: xmlCharEncoding enc;
10418:
10419: if ((ctxt == NULL) || (ctxt->input == NULL))
10420: return(-1);
10421:
10422: xmlDefaultSAXHandlerInit();
10423:
10424: xmlDetectSAX2(ctxt);
10425:
10426: GROW;
10427:
10428: /*
10429: * SAX: beginning of the document processing.
10430: */
10431: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10432: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10433:
10434: /*
10435: * Get the 4 first bytes and decode the charset
10436: * if enc != XML_CHAR_ENCODING_NONE
10437: * plug some encoding conversion routines.
10438: */
10439: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10440: start[0] = RAW;
10441: start[1] = NXT(1);
10442: start[2] = NXT(2);
10443: start[3] = NXT(3);
10444: enc = xmlDetectCharEncoding(start, 4);
10445: if (enc != XML_CHAR_ENCODING_NONE) {
10446: xmlSwitchEncoding(ctxt, enc);
10447: }
10448: }
10449:
10450:
10451: if (CUR == 0) {
10452: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10453: }
10454:
10455: /*
10456: * Check for the XMLDecl in the Prolog.
10457: */
10458: GROW;
10459: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10460:
10461: /*
10462: * Note that we will switch encoding on the fly.
10463: */
10464: xmlParseXMLDecl(ctxt);
10465: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10466: /*
10467: * The XML REC instructs us to stop parsing right here
10468: */
10469: return(-1);
10470: }
10471: SKIP_BLANKS;
10472: } else {
10473: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10474: }
10475: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10476: ctxt->sax->startDocument(ctxt->userData);
10477:
10478: /*
10479: * Doing validity checking on chunk doesn't make sense
10480: */
10481: ctxt->instate = XML_PARSER_CONTENT;
10482: ctxt->validate = 0;
10483: ctxt->loadsubset = 0;
10484: ctxt->depth = 0;
10485:
10486: xmlParseContent(ctxt);
10487:
10488: if ((RAW == '<') && (NXT(1) == '/')) {
10489: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10490: } else if (RAW != 0) {
10491: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10492: }
10493:
10494: /*
10495: * SAX: end of the document processing.
10496: */
10497: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10498: ctxt->sax->endDocument(ctxt->userData);
10499:
10500: if (! ctxt->wellFormed) return(-1);
10501: return(0);
10502: }
10503:
10504: #ifdef LIBXML_PUSH_ENABLED
10505: /************************************************************************
10506: * *
10507: * Progressive parsing interfaces *
10508: * *
10509: ************************************************************************/
10510:
10511: /**
10512: * xmlParseLookupSequence:
10513: * @ctxt: an XML parser context
10514: * @first: the first char to lookup
10515: * @next: the next char to lookup or zero
10516: * @third: the next char to lookup or zero
10517: *
10518: * Try to find if a sequence (first, next, third) or just (first next) or
10519: * (first) is available in the input stream.
10520: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10521: * to avoid rescanning sequences of bytes, it DOES change the state of the
10522: * parser, do not use liberally.
10523: *
10524: * Returns the index to the current parsing point if the full sequence
10525: * is available, -1 otherwise.
10526: */
10527: static int
10528: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10529: xmlChar next, xmlChar third) {
10530: int base, len;
10531: xmlParserInputPtr in;
10532: const xmlChar *buf;
10533:
10534: in = ctxt->input;
10535: if (in == NULL) return(-1);
10536: base = in->cur - in->base;
10537: if (base < 0) return(-1);
10538: if (ctxt->checkIndex > base)
10539: base = ctxt->checkIndex;
10540: if (in->buf == NULL) {
10541: buf = in->base;
10542: len = in->length;
10543: } else {
10544: buf = in->buf->buffer->content;
10545: len = in->buf->buffer->use;
10546: }
10547: /* take into account the sequence length */
10548: if (third) len -= 2;
10549: else if (next) len --;
10550: for (;base < len;base++) {
10551: if (buf[base] == first) {
10552: if (third != 0) {
10553: if ((buf[base + 1] != next) ||
10554: (buf[base + 2] != third)) continue;
10555: } else if (next != 0) {
10556: if (buf[base + 1] != next) continue;
10557: }
10558: ctxt->checkIndex = 0;
10559: #ifdef DEBUG_PUSH
10560: if (next == 0)
10561: xmlGenericError(xmlGenericErrorContext,
10562: "PP: lookup '%c' found at %d\n",
10563: first, base);
10564: else if (third == 0)
10565: xmlGenericError(xmlGenericErrorContext,
10566: "PP: lookup '%c%c' found at %d\n",
10567: first, next, base);
10568: else
10569: xmlGenericError(xmlGenericErrorContext,
10570: "PP: lookup '%c%c%c' found at %d\n",
10571: first, next, third, base);
10572: #endif
10573: return(base - (in->cur - in->base));
10574: }
10575: }
10576: ctxt->checkIndex = base;
10577: #ifdef DEBUG_PUSH
10578: if (next == 0)
10579: xmlGenericError(xmlGenericErrorContext,
10580: "PP: lookup '%c' failed\n", first);
10581: else if (third == 0)
10582: xmlGenericError(xmlGenericErrorContext,
10583: "PP: lookup '%c%c' failed\n", first, next);
10584: else
10585: xmlGenericError(xmlGenericErrorContext,
10586: "PP: lookup '%c%c%c' failed\n", first, next, third);
10587: #endif
10588: return(-1);
10589: }
10590:
10591: /**
10592: * xmlParseGetLasts:
10593: * @ctxt: an XML parser context
10594: * @lastlt: pointer to store the last '<' from the input
10595: * @lastgt: pointer to store the last '>' from the input
10596: *
10597: * Lookup the last < and > in the current chunk
10598: */
10599: static void
10600: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10601: const xmlChar **lastgt) {
10602: const xmlChar *tmp;
10603:
10604: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10605: xmlGenericError(xmlGenericErrorContext,
10606: "Internal error: xmlParseGetLasts\n");
10607: return;
10608: }
10609: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10610: tmp = ctxt->input->end;
10611: tmp--;
10612: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10613: if (tmp < ctxt->input->base) {
10614: *lastlt = NULL;
10615: *lastgt = NULL;
10616: } else {
10617: *lastlt = tmp;
10618: tmp++;
10619: while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10620: if (*tmp == '\'') {
10621: tmp++;
10622: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10623: if (tmp < ctxt->input->end) tmp++;
10624: } else if (*tmp == '"') {
10625: tmp++;
10626: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10627: if (tmp < ctxt->input->end) tmp++;
10628: } else
10629: tmp++;
10630: }
10631: if (tmp < ctxt->input->end)
10632: *lastgt = tmp;
10633: else {
10634: tmp = *lastlt;
10635: tmp--;
10636: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10637: if (tmp >= ctxt->input->base)
10638: *lastgt = tmp;
10639: else
10640: *lastgt = NULL;
10641: }
10642: }
10643: } else {
10644: *lastlt = NULL;
10645: *lastgt = NULL;
10646: }
10647: }
10648: /**
10649: * xmlCheckCdataPush:
10650: * @cur: pointer to the bock of characters
10651: * @len: length of the block in bytes
10652: *
10653: * Check that the block of characters is okay as SCdata content [20]
10654: *
10655: * Returns the number of bytes to pass if okay, a negative index where an
10656: * UTF-8 error occured otherwise
10657: */
10658: static int
10659: xmlCheckCdataPush(const xmlChar *utf, int len) {
10660: int ix;
10661: unsigned char c;
10662: int codepoint;
10663:
10664: if ((utf == NULL) || (len <= 0))
10665: return(0);
10666:
10667: for (ix = 0; ix < len;) { /* string is 0-terminated */
10668: c = utf[ix];
10669: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10670: if (c >= 0x20)
10671: ix++;
10672: else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10673: ix++;
10674: else
10675: return(-ix);
10676: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10677: if (ix + 2 > len) return(ix);
10678: if ((utf[ix+1] & 0xc0 ) != 0x80)
10679: return(-ix);
10680: codepoint = (utf[ix] & 0x1f) << 6;
10681: codepoint |= utf[ix+1] & 0x3f;
10682: if (!xmlIsCharQ(codepoint))
10683: return(-ix);
10684: ix += 2;
10685: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10686: if (ix + 3 > len) return(ix);
10687: if (((utf[ix+1] & 0xc0) != 0x80) ||
10688: ((utf[ix+2] & 0xc0) != 0x80))
10689: return(-ix);
10690: codepoint = (utf[ix] & 0xf) << 12;
10691: codepoint |= (utf[ix+1] & 0x3f) << 6;
10692: codepoint |= utf[ix+2] & 0x3f;
10693: if (!xmlIsCharQ(codepoint))
10694: return(-ix);
10695: ix += 3;
10696: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10697: if (ix + 4 > len) return(ix);
10698: if (((utf[ix+1] & 0xc0) != 0x80) ||
10699: ((utf[ix+2] & 0xc0) != 0x80) ||
10700: ((utf[ix+3] & 0xc0) != 0x80))
10701: return(-ix);
10702: codepoint = (utf[ix] & 0x7) << 18;
10703: codepoint |= (utf[ix+1] & 0x3f) << 12;
10704: codepoint |= (utf[ix+2] & 0x3f) << 6;
10705: codepoint |= utf[ix+3] & 0x3f;
10706: if (!xmlIsCharQ(codepoint))
10707: return(-ix);
10708: ix += 4;
10709: } else /* unknown encoding */
10710: return(-ix);
10711: }
10712: return(ix);
10713: }
10714:
10715: /**
10716: * xmlParseTryOrFinish:
10717: * @ctxt: an XML parser context
10718: * @terminate: last chunk indicator
10719: *
10720: * Try to progress on parsing
10721: *
10722: * Returns zero if no parsing was possible
10723: */
10724: static int
10725: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10726: int ret = 0;
10727: int avail, tlen;
10728: xmlChar cur, next;
10729: const xmlChar *lastlt, *lastgt;
10730:
10731: if (ctxt->input == NULL)
10732: return(0);
10733:
10734: #ifdef DEBUG_PUSH
10735: switch (ctxt->instate) {
10736: case XML_PARSER_EOF:
10737: xmlGenericError(xmlGenericErrorContext,
10738: "PP: try EOF\n"); break;
10739: case XML_PARSER_START:
10740: xmlGenericError(xmlGenericErrorContext,
10741: "PP: try START\n"); break;
10742: case XML_PARSER_MISC:
10743: xmlGenericError(xmlGenericErrorContext,
10744: "PP: try MISC\n");break;
10745: case XML_PARSER_COMMENT:
10746: xmlGenericError(xmlGenericErrorContext,
10747: "PP: try COMMENT\n");break;
10748: case XML_PARSER_PROLOG:
10749: xmlGenericError(xmlGenericErrorContext,
10750: "PP: try PROLOG\n");break;
10751: case XML_PARSER_START_TAG:
10752: xmlGenericError(xmlGenericErrorContext,
10753: "PP: try START_TAG\n");break;
10754: case XML_PARSER_CONTENT:
10755: xmlGenericError(xmlGenericErrorContext,
10756: "PP: try CONTENT\n");break;
10757: case XML_PARSER_CDATA_SECTION:
10758: xmlGenericError(xmlGenericErrorContext,
10759: "PP: try CDATA_SECTION\n");break;
10760: case XML_PARSER_END_TAG:
10761: xmlGenericError(xmlGenericErrorContext,
10762: "PP: try END_TAG\n");break;
10763: case XML_PARSER_ENTITY_DECL:
10764: xmlGenericError(xmlGenericErrorContext,
10765: "PP: try ENTITY_DECL\n");break;
10766: case XML_PARSER_ENTITY_VALUE:
10767: xmlGenericError(xmlGenericErrorContext,
10768: "PP: try ENTITY_VALUE\n");break;
10769: case XML_PARSER_ATTRIBUTE_VALUE:
10770: xmlGenericError(xmlGenericErrorContext,
10771: "PP: try ATTRIBUTE_VALUE\n");break;
10772: case XML_PARSER_DTD:
10773: xmlGenericError(xmlGenericErrorContext,
10774: "PP: try DTD\n");break;
10775: case XML_PARSER_EPILOG:
10776: xmlGenericError(xmlGenericErrorContext,
10777: "PP: try EPILOG\n");break;
10778: case XML_PARSER_PI:
10779: xmlGenericError(xmlGenericErrorContext,
10780: "PP: try PI\n");break;
10781: case XML_PARSER_IGNORE:
10782: xmlGenericError(xmlGenericErrorContext,
10783: "PP: try IGNORE\n");break;
10784: }
10785: #endif
10786:
10787: if ((ctxt->input != NULL) &&
10788: (ctxt->input->cur - ctxt->input->base > 4096)) {
10789: xmlSHRINK(ctxt);
10790: ctxt->checkIndex = 0;
10791: }
10792: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10793:
10794: while (1) {
10795: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10796: return(0);
10797:
10798:
10799: /*
10800: * Pop-up of finished entities.
10801: */
10802: while ((RAW == 0) && (ctxt->inputNr > 1))
10803: xmlPopInput(ctxt);
10804:
10805: if (ctxt->input == NULL) break;
10806: if (ctxt->input->buf == NULL)
10807: avail = ctxt->input->length -
10808: (ctxt->input->cur - ctxt->input->base);
10809: else {
10810: /*
10811: * If we are operating on converted input, try to flush
10812: * remainng chars to avoid them stalling in the non-converted
10813: * buffer.
10814: */
10815: if ((ctxt->input->buf->raw != NULL) &&
10816: (ctxt->input->buf->raw->use > 0)) {
10817: int base = ctxt->input->base -
10818: ctxt->input->buf->buffer->content;
10819: int current = ctxt->input->cur - ctxt->input->base;
10820:
10821: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10822: ctxt->input->base = ctxt->input->buf->buffer->content + base;
10823: ctxt->input->cur = ctxt->input->base + current;
10824: ctxt->input->end =
10825: &ctxt->input->buf->buffer->content[
10826: ctxt->input->buf->buffer->use];
10827: }
10828: avail = ctxt->input->buf->buffer->use -
10829: (ctxt->input->cur - ctxt->input->base);
10830: }
10831: if (avail < 1)
10832: goto done;
10833: switch (ctxt->instate) {
10834: case XML_PARSER_EOF:
10835: /*
10836: * Document parsing is done !
10837: */
10838: goto done;
10839: case XML_PARSER_START:
10840: if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10841: xmlChar start[4];
10842: xmlCharEncoding enc;
10843:
10844: /*
10845: * Very first chars read from the document flow.
10846: */
10847: if (avail < 4)
10848: goto done;
10849:
10850: /*
10851: * Get the 4 first bytes and decode the charset
10852: * if enc != XML_CHAR_ENCODING_NONE
10853: * plug some encoding conversion routines,
10854: * else xmlSwitchEncoding will set to (default)
10855: * UTF8.
10856: */
10857: start[0] = RAW;
10858: start[1] = NXT(1);
10859: start[2] = NXT(2);
10860: start[3] = NXT(3);
10861: enc = xmlDetectCharEncoding(start, 4);
10862: xmlSwitchEncoding(ctxt, enc);
10863: break;
10864: }
10865:
10866: if (avail < 2)
10867: goto done;
10868: cur = ctxt->input->cur[0];
10869: next = ctxt->input->cur[1];
10870: if (cur == 0) {
10871: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10872: ctxt->sax->setDocumentLocator(ctxt->userData,
10873: &xmlDefaultSAXLocator);
10874: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10875: ctxt->instate = XML_PARSER_EOF;
10876: #ifdef DEBUG_PUSH
10877: xmlGenericError(xmlGenericErrorContext,
10878: "PP: entering EOF\n");
10879: #endif
10880: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10881: ctxt->sax->endDocument(ctxt->userData);
10882: goto done;
10883: }
10884: if ((cur == '<') && (next == '?')) {
10885: /* PI or XML decl */
10886: if (avail < 5) return(ret);
10887: if ((!terminate) &&
10888: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10889: return(ret);
10890: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10891: ctxt->sax->setDocumentLocator(ctxt->userData,
10892: &xmlDefaultSAXLocator);
10893: if ((ctxt->input->cur[2] == 'x') &&
10894: (ctxt->input->cur[3] == 'm') &&
10895: (ctxt->input->cur[4] == 'l') &&
10896: (IS_BLANK_CH(ctxt->input->cur[5]))) {
10897: ret += 5;
10898: #ifdef DEBUG_PUSH
10899: xmlGenericError(xmlGenericErrorContext,
10900: "PP: Parsing XML Decl\n");
10901: #endif
10902: xmlParseXMLDecl(ctxt);
10903: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10904: /*
10905: * The XML REC instructs us to stop parsing right
10906: * here
10907: */
10908: ctxt->instate = XML_PARSER_EOF;
10909: return(0);
10910: }
10911: ctxt->standalone = ctxt->input->standalone;
10912: if ((ctxt->encoding == NULL) &&
10913: (ctxt->input->encoding != NULL))
10914: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10915: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10916: (!ctxt->disableSAX))
10917: ctxt->sax->startDocument(ctxt->userData);
10918: ctxt->instate = XML_PARSER_MISC;
10919: #ifdef DEBUG_PUSH
10920: xmlGenericError(xmlGenericErrorContext,
10921: "PP: entering MISC\n");
10922: #endif
10923: } else {
10924: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10926: (!ctxt->disableSAX))
10927: ctxt->sax->startDocument(ctxt->userData);
10928: ctxt->instate = XML_PARSER_MISC;
10929: #ifdef DEBUG_PUSH
10930: xmlGenericError(xmlGenericErrorContext,
10931: "PP: entering MISC\n");
10932: #endif
10933: }
10934: } else {
10935: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10936: ctxt->sax->setDocumentLocator(ctxt->userData,
10937: &xmlDefaultSAXLocator);
10938: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10939: if (ctxt->version == NULL) {
10940: xmlErrMemory(ctxt, NULL);
10941: break;
10942: }
10943: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10944: (!ctxt->disableSAX))
10945: ctxt->sax->startDocument(ctxt->userData);
10946: ctxt->instate = XML_PARSER_MISC;
10947: #ifdef DEBUG_PUSH
10948: xmlGenericError(xmlGenericErrorContext,
10949: "PP: entering MISC\n");
10950: #endif
10951: }
10952: break;
10953: case XML_PARSER_START_TAG: {
10954: const xmlChar *name;
10955: const xmlChar *prefix = NULL;
10956: const xmlChar *URI = NULL;
10957: int nsNr = ctxt->nsNr;
10958:
10959: if ((avail < 2) && (ctxt->inputNr == 1))
10960: goto done;
10961: cur = ctxt->input->cur[0];
10962: if (cur != '<') {
10963: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10964: ctxt->instate = XML_PARSER_EOF;
10965: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10966: ctxt->sax->endDocument(ctxt->userData);
10967: goto done;
10968: }
10969: if (!terminate) {
10970: if (ctxt->progressive) {
10971: /* > can be found unescaped in attribute values */
10972: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10973: goto done;
10974: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10975: goto done;
10976: }
10977: }
10978: if (ctxt->spaceNr == 0)
10979: spacePush(ctxt, -1);
10980: else if (*ctxt->space == -2)
10981: spacePush(ctxt, -1);
10982: else
10983: spacePush(ctxt, *ctxt->space);
10984: #ifdef LIBXML_SAX1_ENABLED
10985: if (ctxt->sax2)
10986: #endif /* LIBXML_SAX1_ENABLED */
10987: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10988: #ifdef LIBXML_SAX1_ENABLED
10989: else
10990: name = xmlParseStartTag(ctxt);
10991: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 ! misho 10992: if (ctxt->instate == XML_PARSER_EOF)
! 10993: goto done;
1.1 misho 10994: if (name == NULL) {
10995: spacePop(ctxt);
10996: ctxt->instate = XML_PARSER_EOF;
10997: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10998: ctxt->sax->endDocument(ctxt->userData);
10999: goto done;
11000: }
11001: #ifdef LIBXML_VALID_ENABLED
11002: /*
11003: * [ VC: Root Element Type ]
11004: * The Name in the document type declaration must match
11005: * the element type of the root element.
11006: */
11007: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11008: ctxt->node && (ctxt->node == ctxt->myDoc->children))
11009: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11010: #endif /* LIBXML_VALID_ENABLED */
11011:
11012: /*
11013: * Check for an Empty Element.
11014: */
11015: if ((RAW == '/') && (NXT(1) == '>')) {
11016: SKIP(2);
11017:
11018: if (ctxt->sax2) {
11019: if ((ctxt->sax != NULL) &&
11020: (ctxt->sax->endElementNs != NULL) &&
11021: (!ctxt->disableSAX))
11022: ctxt->sax->endElementNs(ctxt->userData, name,
11023: prefix, URI);
11024: if (ctxt->nsNr - nsNr > 0)
11025: nsPop(ctxt, ctxt->nsNr - nsNr);
11026: #ifdef LIBXML_SAX1_ENABLED
11027: } else {
11028: if ((ctxt->sax != NULL) &&
11029: (ctxt->sax->endElement != NULL) &&
11030: (!ctxt->disableSAX))
11031: ctxt->sax->endElement(ctxt->userData, name);
11032: #endif /* LIBXML_SAX1_ENABLED */
11033: }
11034: spacePop(ctxt);
11035: if (ctxt->nameNr == 0) {
11036: ctxt->instate = XML_PARSER_EPILOG;
11037: } else {
11038: ctxt->instate = XML_PARSER_CONTENT;
11039: }
11040: break;
11041: }
11042: if (RAW == '>') {
11043: NEXT;
11044: } else {
11045: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11046: "Couldn't find end of Start Tag %s\n",
11047: name);
11048: nodePop(ctxt);
11049: spacePop(ctxt);
11050: }
11051: if (ctxt->sax2)
11052: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11053: #ifdef LIBXML_SAX1_ENABLED
11054: else
11055: namePush(ctxt, name);
11056: #endif /* LIBXML_SAX1_ENABLED */
11057:
11058: ctxt->instate = XML_PARSER_CONTENT;
11059: break;
11060: }
11061: case XML_PARSER_CONTENT: {
11062: const xmlChar *test;
11063: unsigned int cons;
11064: if ((avail < 2) && (ctxt->inputNr == 1))
11065: goto done;
11066: cur = ctxt->input->cur[0];
11067: next = ctxt->input->cur[1];
11068:
11069: test = CUR_PTR;
11070: cons = ctxt->input->consumed;
11071: if ((cur == '<') && (next == '/')) {
11072: ctxt->instate = XML_PARSER_END_TAG;
11073: break;
11074: } else if ((cur == '<') && (next == '?')) {
11075: if ((!terminate) &&
11076: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11077: goto done;
11078: xmlParsePI(ctxt);
11079: } else if ((cur == '<') && (next != '!')) {
11080: ctxt->instate = XML_PARSER_START_TAG;
11081: break;
11082: } else if ((cur == '<') && (next == '!') &&
11083: (ctxt->input->cur[2] == '-') &&
11084: (ctxt->input->cur[3] == '-')) {
11085: int term;
11086:
11087: if (avail < 4)
11088: goto done;
11089: ctxt->input->cur += 4;
11090: term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11091: ctxt->input->cur -= 4;
11092: if ((!terminate) && (term < 0))
11093: goto done;
11094: xmlParseComment(ctxt);
11095: ctxt->instate = XML_PARSER_CONTENT;
11096: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11097: (ctxt->input->cur[2] == '[') &&
11098: (ctxt->input->cur[3] == 'C') &&
11099: (ctxt->input->cur[4] == 'D') &&
11100: (ctxt->input->cur[5] == 'A') &&
11101: (ctxt->input->cur[6] == 'T') &&
11102: (ctxt->input->cur[7] == 'A') &&
11103: (ctxt->input->cur[8] == '[')) {
11104: SKIP(9);
11105: ctxt->instate = XML_PARSER_CDATA_SECTION;
11106: break;
11107: } else if ((cur == '<') && (next == '!') &&
11108: (avail < 9)) {
11109: goto done;
11110: } else if (cur == '&') {
11111: if ((!terminate) &&
11112: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11113: goto done;
11114: xmlParseReference(ctxt);
11115: } else {
11116: /* TODO Avoid the extra copy, handle directly !!! */
11117: /*
11118: * Goal of the following test is:
11119: * - minimize calls to the SAX 'character' callback
11120: * when they are mergeable
11121: * - handle an problem for isBlank when we only parse
11122: * a sequence of blank chars and the next one is
11123: * not available to check against '<' presence.
11124: * - tries to homogenize the differences in SAX
11125: * callbacks between the push and pull versions
11126: * of the parser.
11127: */
11128: if ((ctxt->inputNr == 1) &&
11129: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11130: if (!terminate) {
11131: if (ctxt->progressive) {
11132: if ((lastlt == NULL) ||
11133: (ctxt->input->cur > lastlt))
11134: goto done;
11135: } else if (xmlParseLookupSequence(ctxt,
11136: '<', 0, 0) < 0) {
11137: goto done;
11138: }
11139: }
11140: }
11141: ctxt->checkIndex = 0;
11142: xmlParseCharData(ctxt, 0);
11143: }
11144: /*
11145: * Pop-up of finished entities.
11146: */
11147: while ((RAW == 0) && (ctxt->inputNr > 1))
11148: xmlPopInput(ctxt);
11149: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11150: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11151: "detected an error in element content\n");
11152: ctxt->instate = XML_PARSER_EOF;
11153: break;
11154: }
11155: break;
11156: }
11157: case XML_PARSER_END_TAG:
11158: if (avail < 2)
11159: goto done;
11160: if (!terminate) {
11161: if (ctxt->progressive) {
11162: /* > can be found unescaped in attribute values */
11163: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11164: goto done;
11165: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11166: goto done;
11167: }
11168: }
11169: if (ctxt->sax2) {
11170: xmlParseEndTag2(ctxt,
11171: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11172: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11173: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11174: nameNsPop(ctxt);
11175: }
11176: #ifdef LIBXML_SAX1_ENABLED
11177: else
11178: xmlParseEndTag1(ctxt, 0);
11179: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 ! misho 11180: if (ctxt->instate == XML_PARSER_EOF) {
! 11181: /* Nothing */
! 11182: } else if (ctxt->nameNr == 0) {
1.1 misho 11183: ctxt->instate = XML_PARSER_EPILOG;
11184: } else {
11185: ctxt->instate = XML_PARSER_CONTENT;
11186: }
11187: break;
11188: case XML_PARSER_CDATA_SECTION: {
11189: /*
11190: * The Push mode need to have the SAX callback for
11191: * cdataBlock merge back contiguous callbacks.
11192: */
11193: int base;
11194:
11195: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11196: if (base < 0) {
11197: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11198: int tmp;
11199:
11200: tmp = xmlCheckCdataPush(ctxt->input->cur,
11201: XML_PARSER_BIG_BUFFER_SIZE);
11202: if (tmp < 0) {
11203: tmp = -tmp;
11204: ctxt->input->cur += tmp;
11205: goto encoding_error;
11206: }
11207: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11208: if (ctxt->sax->cdataBlock != NULL)
11209: ctxt->sax->cdataBlock(ctxt->userData,
11210: ctxt->input->cur, tmp);
11211: else if (ctxt->sax->characters != NULL)
11212: ctxt->sax->characters(ctxt->userData,
11213: ctxt->input->cur, tmp);
11214: }
11215: SKIPL(tmp);
11216: ctxt->checkIndex = 0;
11217: }
11218: goto done;
11219: } else {
11220: int tmp;
11221:
11222: tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11223: if ((tmp < 0) || (tmp != base)) {
11224: tmp = -tmp;
11225: ctxt->input->cur += tmp;
11226: goto encoding_error;
11227: }
11228: if ((ctxt->sax != NULL) && (base == 0) &&
11229: (ctxt->sax->cdataBlock != NULL) &&
11230: (!ctxt->disableSAX)) {
11231: /*
11232: * Special case to provide identical behaviour
11233: * between pull and push parsers on enpty CDATA
11234: * sections
11235: */
11236: if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11237: (!strncmp((const char *)&ctxt->input->cur[-9],
11238: "<![CDATA[", 9)))
11239: ctxt->sax->cdataBlock(ctxt->userData,
11240: BAD_CAST "", 0);
11241: } else if ((ctxt->sax != NULL) && (base > 0) &&
11242: (!ctxt->disableSAX)) {
11243: if (ctxt->sax->cdataBlock != NULL)
11244: ctxt->sax->cdataBlock(ctxt->userData,
11245: ctxt->input->cur, base);
11246: else if (ctxt->sax->characters != NULL)
11247: ctxt->sax->characters(ctxt->userData,
11248: ctxt->input->cur, base);
11249: }
11250: SKIPL(base + 3);
11251: ctxt->checkIndex = 0;
11252: ctxt->instate = XML_PARSER_CONTENT;
11253: #ifdef DEBUG_PUSH
11254: xmlGenericError(xmlGenericErrorContext,
11255: "PP: entering CONTENT\n");
11256: #endif
11257: }
11258: break;
11259: }
11260: case XML_PARSER_MISC:
11261: SKIP_BLANKS;
11262: if (ctxt->input->buf == NULL)
11263: avail = ctxt->input->length -
11264: (ctxt->input->cur - ctxt->input->base);
11265: else
11266: avail = ctxt->input->buf->buffer->use -
11267: (ctxt->input->cur - ctxt->input->base);
11268: if (avail < 2)
11269: goto done;
11270: cur = ctxt->input->cur[0];
11271: next = ctxt->input->cur[1];
11272: if ((cur == '<') && (next == '?')) {
11273: if ((!terminate) &&
11274: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11275: goto done;
11276: #ifdef DEBUG_PUSH
11277: xmlGenericError(xmlGenericErrorContext,
11278: "PP: Parsing PI\n");
11279: #endif
11280: xmlParsePI(ctxt);
11281: ctxt->checkIndex = 0;
11282: } else if ((cur == '<') && (next == '!') &&
11283: (ctxt->input->cur[2] == '-') &&
11284: (ctxt->input->cur[3] == '-')) {
11285: if ((!terminate) &&
11286: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11287: goto done;
11288: #ifdef DEBUG_PUSH
11289: xmlGenericError(xmlGenericErrorContext,
11290: "PP: Parsing Comment\n");
11291: #endif
11292: xmlParseComment(ctxt);
11293: ctxt->instate = XML_PARSER_MISC;
11294: ctxt->checkIndex = 0;
11295: } else if ((cur == '<') && (next == '!') &&
11296: (ctxt->input->cur[2] == 'D') &&
11297: (ctxt->input->cur[3] == 'O') &&
11298: (ctxt->input->cur[4] == 'C') &&
11299: (ctxt->input->cur[5] == 'T') &&
11300: (ctxt->input->cur[6] == 'Y') &&
11301: (ctxt->input->cur[7] == 'P') &&
11302: (ctxt->input->cur[8] == 'E')) {
11303: if ((!terminate) &&
11304: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11305: goto done;
11306: #ifdef DEBUG_PUSH
11307: xmlGenericError(xmlGenericErrorContext,
11308: "PP: Parsing internal subset\n");
11309: #endif
11310: ctxt->inSubset = 1;
11311: xmlParseDocTypeDecl(ctxt);
11312: if (RAW == '[') {
11313: ctxt->instate = XML_PARSER_DTD;
11314: #ifdef DEBUG_PUSH
11315: xmlGenericError(xmlGenericErrorContext,
11316: "PP: entering DTD\n");
11317: #endif
11318: } else {
11319: /*
11320: * Create and update the external subset.
11321: */
11322: ctxt->inSubset = 2;
11323: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11324: (ctxt->sax->externalSubset != NULL))
11325: ctxt->sax->externalSubset(ctxt->userData,
11326: ctxt->intSubName, ctxt->extSubSystem,
11327: ctxt->extSubURI);
11328: ctxt->inSubset = 0;
11329: xmlCleanSpecialAttr(ctxt);
11330: ctxt->instate = XML_PARSER_PROLOG;
11331: #ifdef DEBUG_PUSH
11332: xmlGenericError(xmlGenericErrorContext,
11333: "PP: entering PROLOG\n");
11334: #endif
11335: }
11336: } else if ((cur == '<') && (next == '!') &&
11337: (avail < 9)) {
11338: goto done;
11339: } else {
11340: ctxt->instate = XML_PARSER_START_TAG;
11341: ctxt->progressive = 1;
11342: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11343: #ifdef DEBUG_PUSH
11344: xmlGenericError(xmlGenericErrorContext,
11345: "PP: entering START_TAG\n");
11346: #endif
11347: }
11348: break;
11349: case XML_PARSER_PROLOG:
11350: SKIP_BLANKS;
11351: if (ctxt->input->buf == NULL)
11352: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11353: else
11354: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11355: if (avail < 2)
11356: goto done;
11357: cur = ctxt->input->cur[0];
11358: next = ctxt->input->cur[1];
11359: if ((cur == '<') && (next == '?')) {
11360: if ((!terminate) &&
11361: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11362: goto done;
11363: #ifdef DEBUG_PUSH
11364: xmlGenericError(xmlGenericErrorContext,
11365: "PP: Parsing PI\n");
11366: #endif
11367: xmlParsePI(ctxt);
11368: } else if ((cur == '<') && (next == '!') &&
11369: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11370: if ((!terminate) &&
11371: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11372: goto done;
11373: #ifdef DEBUG_PUSH
11374: xmlGenericError(xmlGenericErrorContext,
11375: "PP: Parsing Comment\n");
11376: #endif
11377: xmlParseComment(ctxt);
11378: ctxt->instate = XML_PARSER_PROLOG;
11379: } else if ((cur == '<') && (next == '!') &&
11380: (avail < 4)) {
11381: goto done;
11382: } else {
11383: ctxt->instate = XML_PARSER_START_TAG;
11384: if (ctxt->progressive == 0)
11385: ctxt->progressive = 1;
11386: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11387: #ifdef DEBUG_PUSH
11388: xmlGenericError(xmlGenericErrorContext,
11389: "PP: entering START_TAG\n");
11390: #endif
11391: }
11392: break;
11393: case XML_PARSER_EPILOG:
11394: SKIP_BLANKS;
11395: if (ctxt->input->buf == NULL)
11396: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11397: else
11398: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11399: if (avail < 2)
11400: goto done;
11401: cur = ctxt->input->cur[0];
11402: next = ctxt->input->cur[1];
11403: if ((cur == '<') && (next == '?')) {
11404: if ((!terminate) &&
11405: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11406: goto done;
11407: #ifdef DEBUG_PUSH
11408: xmlGenericError(xmlGenericErrorContext,
11409: "PP: Parsing PI\n");
11410: #endif
11411: xmlParsePI(ctxt);
11412: ctxt->instate = XML_PARSER_EPILOG;
11413: } else if ((cur == '<') && (next == '!') &&
11414: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11415: if ((!terminate) &&
11416: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11417: goto done;
11418: #ifdef DEBUG_PUSH
11419: xmlGenericError(xmlGenericErrorContext,
11420: "PP: Parsing Comment\n");
11421: #endif
11422: xmlParseComment(ctxt);
11423: ctxt->instate = XML_PARSER_EPILOG;
11424: } else if ((cur == '<') && (next == '!') &&
11425: (avail < 4)) {
11426: goto done;
11427: } else {
11428: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11429: ctxt->instate = XML_PARSER_EOF;
11430: #ifdef DEBUG_PUSH
11431: xmlGenericError(xmlGenericErrorContext,
11432: "PP: entering EOF\n");
11433: #endif
11434: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11435: ctxt->sax->endDocument(ctxt->userData);
11436: goto done;
11437: }
11438: break;
11439: case XML_PARSER_DTD: {
11440: /*
11441: * Sorry but progressive parsing of the internal subset
11442: * is not expected to be supported. We first check that
11443: * the full content of the internal subset is available and
11444: * the parsing is launched only at that point.
11445: * Internal subset ends up with "']' S? '>'" in an unescaped
11446: * section and not in a ']]>' sequence which are conditional
11447: * sections (whoever argued to keep that crap in XML deserve
11448: * a place in hell !).
11449: */
11450: int base, i;
11451: xmlChar *buf;
11452: xmlChar quote = 0;
11453:
11454: base = ctxt->input->cur - ctxt->input->base;
11455: if (base < 0) return(0);
11456: if (ctxt->checkIndex > base)
11457: base = ctxt->checkIndex;
11458: buf = ctxt->input->buf->buffer->content;
11459: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11460: base++) {
11461: if (quote != 0) {
11462: if (buf[base] == quote)
11463: quote = 0;
11464: continue;
11465: }
11466: if ((quote == 0) && (buf[base] == '<')) {
11467: int found = 0;
11468: /* special handling of comments */
11469: if (((unsigned int) base + 4 <
11470: ctxt->input->buf->buffer->use) &&
11471: (buf[base + 1] == '!') &&
11472: (buf[base + 2] == '-') &&
11473: (buf[base + 3] == '-')) {
11474: for (;(unsigned int) base + 3 <
11475: ctxt->input->buf->buffer->use; base++) {
11476: if ((buf[base] == '-') &&
11477: (buf[base + 1] == '-') &&
11478: (buf[base + 2] == '>')) {
11479: found = 1;
11480: base += 2;
11481: break;
11482: }
11483: }
11484: if (!found) {
11485: #if 0
11486: fprintf(stderr, "unfinished comment\n");
11487: #endif
11488: break; /* for */
11489: }
11490: continue;
11491: }
11492: }
11493: if (buf[base] == '"') {
11494: quote = '"';
11495: continue;
11496: }
11497: if (buf[base] == '\'') {
11498: quote = '\'';
11499: continue;
11500: }
11501: if (buf[base] == ']') {
11502: #if 0
11503: fprintf(stderr, "%c%c%c%c: ", buf[base],
11504: buf[base + 1], buf[base + 2], buf[base + 3]);
11505: #endif
11506: if ((unsigned int) base +1 >=
11507: ctxt->input->buf->buffer->use)
11508: break;
11509: if (buf[base + 1] == ']') {
11510: /* conditional crap, skip both ']' ! */
11511: base++;
11512: continue;
11513: }
11514: for (i = 1;
11515: (unsigned int) base + i < ctxt->input->buf->buffer->use;
11516: i++) {
11517: if (buf[base + i] == '>') {
11518: #if 0
11519: fprintf(stderr, "found\n");
11520: #endif
11521: goto found_end_int_subset;
11522: }
11523: if (!IS_BLANK_CH(buf[base + i])) {
11524: #if 0
11525: fprintf(stderr, "not found\n");
11526: #endif
11527: goto not_end_of_int_subset;
11528: }
11529: }
11530: #if 0
11531: fprintf(stderr, "end of stream\n");
11532: #endif
11533: break;
11534:
11535: }
11536: not_end_of_int_subset:
11537: continue; /* for */
11538: }
11539: /*
11540: * We didn't found the end of the Internal subset
11541: */
11542: #ifdef DEBUG_PUSH
11543: if (next == 0)
11544: xmlGenericError(xmlGenericErrorContext,
11545: "PP: lookup of int subset end filed\n");
11546: #endif
11547: goto done;
11548:
11549: found_end_int_subset:
11550: xmlParseInternalSubset(ctxt);
11551: ctxt->inSubset = 2;
11552: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11553: (ctxt->sax->externalSubset != NULL))
11554: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11555: ctxt->extSubSystem, ctxt->extSubURI);
11556: ctxt->inSubset = 0;
11557: xmlCleanSpecialAttr(ctxt);
11558: ctxt->instate = XML_PARSER_PROLOG;
11559: ctxt->checkIndex = 0;
11560: #ifdef DEBUG_PUSH
11561: xmlGenericError(xmlGenericErrorContext,
11562: "PP: entering PROLOG\n");
11563: #endif
11564: break;
11565: }
11566: case XML_PARSER_COMMENT:
11567: xmlGenericError(xmlGenericErrorContext,
11568: "PP: internal error, state == COMMENT\n");
11569: ctxt->instate = XML_PARSER_CONTENT;
11570: #ifdef DEBUG_PUSH
11571: xmlGenericError(xmlGenericErrorContext,
11572: "PP: entering CONTENT\n");
11573: #endif
11574: break;
11575: case XML_PARSER_IGNORE:
11576: xmlGenericError(xmlGenericErrorContext,
11577: "PP: internal error, state == IGNORE");
11578: ctxt->instate = XML_PARSER_DTD;
11579: #ifdef DEBUG_PUSH
11580: xmlGenericError(xmlGenericErrorContext,
11581: "PP: entering DTD\n");
11582: #endif
11583: break;
11584: case XML_PARSER_PI:
11585: xmlGenericError(xmlGenericErrorContext,
11586: "PP: internal error, state == PI\n");
11587: ctxt->instate = XML_PARSER_CONTENT;
11588: #ifdef DEBUG_PUSH
11589: xmlGenericError(xmlGenericErrorContext,
11590: "PP: entering CONTENT\n");
11591: #endif
11592: break;
11593: case XML_PARSER_ENTITY_DECL:
11594: xmlGenericError(xmlGenericErrorContext,
11595: "PP: internal error, state == ENTITY_DECL\n");
11596: ctxt->instate = XML_PARSER_DTD;
11597: #ifdef DEBUG_PUSH
11598: xmlGenericError(xmlGenericErrorContext,
11599: "PP: entering DTD\n");
11600: #endif
11601: break;
11602: case XML_PARSER_ENTITY_VALUE:
11603: xmlGenericError(xmlGenericErrorContext,
11604: "PP: internal error, state == ENTITY_VALUE\n");
11605: ctxt->instate = XML_PARSER_CONTENT;
11606: #ifdef DEBUG_PUSH
11607: xmlGenericError(xmlGenericErrorContext,
11608: "PP: entering DTD\n");
11609: #endif
11610: break;
11611: case XML_PARSER_ATTRIBUTE_VALUE:
11612: xmlGenericError(xmlGenericErrorContext,
11613: "PP: internal error, state == ATTRIBUTE_VALUE\n");
11614: ctxt->instate = XML_PARSER_START_TAG;
11615: #ifdef DEBUG_PUSH
11616: xmlGenericError(xmlGenericErrorContext,
11617: "PP: entering START_TAG\n");
11618: #endif
11619: break;
11620: case XML_PARSER_SYSTEM_LITERAL:
11621: xmlGenericError(xmlGenericErrorContext,
11622: "PP: internal error, state == SYSTEM_LITERAL\n");
11623: ctxt->instate = XML_PARSER_START_TAG;
11624: #ifdef DEBUG_PUSH
11625: xmlGenericError(xmlGenericErrorContext,
11626: "PP: entering START_TAG\n");
11627: #endif
11628: break;
11629: case XML_PARSER_PUBLIC_LITERAL:
11630: xmlGenericError(xmlGenericErrorContext,
11631: "PP: internal error, state == PUBLIC_LITERAL\n");
11632: ctxt->instate = XML_PARSER_START_TAG;
11633: #ifdef DEBUG_PUSH
11634: xmlGenericError(xmlGenericErrorContext,
11635: "PP: entering START_TAG\n");
11636: #endif
11637: break;
11638: }
11639: }
11640: done:
11641: #ifdef DEBUG_PUSH
11642: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11643: #endif
11644: return(ret);
11645: encoding_error:
11646: {
11647: char buffer[150];
11648:
11649: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11650: ctxt->input->cur[0], ctxt->input->cur[1],
11651: ctxt->input->cur[2], ctxt->input->cur[3]);
11652: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11653: "Input is not proper UTF-8, indicate encoding !\n%s",
11654: BAD_CAST buffer, NULL);
11655: }
11656: return(0);
11657: }
11658:
11659: /**
11660: * xmlParseChunk:
11661: * @ctxt: an XML parser context
11662: * @chunk: an char array
11663: * @size: the size in byte of the chunk
11664: * @terminate: last chunk indicator
11665: *
11666: * Parse a Chunk of memory
11667: *
11668: * Returns zero if no error, the xmlParserErrors otherwise.
11669: */
11670: int
11671: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11672: int terminate) {
11673: int end_in_lf = 0;
11674: int remain = 0;
11675:
11676: if (ctxt == NULL)
11677: return(XML_ERR_INTERNAL_ERROR);
11678: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11679: return(ctxt->errNo);
11680: if (ctxt->instate == XML_PARSER_START)
11681: xmlDetectSAX2(ctxt);
11682: if ((size > 0) && (chunk != NULL) && (!terminate) &&
11683: (chunk[size - 1] == '\r')) {
11684: end_in_lf = 1;
11685: size--;
11686: }
11687:
11688: xmldecl_done:
11689:
11690: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11691: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11692: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11693: int cur = ctxt->input->cur - ctxt->input->base;
11694: int res;
11695:
11696: /*
11697: * Specific handling if we autodetected an encoding, we should not
11698: * push more than the first line ... which depend on the encoding
11699: * And only push the rest once the final encoding was detected
11700: */
11701: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11702: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11703: unsigned int len = 45;
11704:
11705: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11706: BAD_CAST "UTF-16")) ||
11707: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11708: BAD_CAST "UTF16")))
11709: len = 90;
11710: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11711: BAD_CAST "UCS-4")) ||
11712: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11713: BAD_CAST "UCS4")))
11714: len = 180;
11715:
11716: if (ctxt->input->buf->rawconsumed < len)
11717: len -= ctxt->input->buf->rawconsumed;
11718:
11719: /*
11720: * Change size for reading the initial declaration only
11721: * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11722: * will blindly copy extra bytes from memory.
11723: */
11724: if ((unsigned int) size > len) {
11725: remain = size - len;
11726: size = len;
11727: } else {
11728: remain = 0;
11729: }
11730: }
11731: res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11732: if (res < 0) {
11733: ctxt->errNo = XML_PARSER_EOF;
11734: ctxt->disableSAX = 1;
11735: return (XML_PARSER_EOF);
11736: }
11737: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11738: ctxt->input->cur = ctxt->input->base + cur;
11739: ctxt->input->end =
11740: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11741: #ifdef DEBUG_PUSH
11742: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11743: #endif
11744:
11745: } else if (ctxt->instate != XML_PARSER_EOF) {
11746: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11747: xmlParserInputBufferPtr in = ctxt->input->buf;
11748: if ((in->encoder != NULL) && (in->buffer != NULL) &&
11749: (in->raw != NULL)) {
11750: int nbchars;
11751:
11752: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11753: if (nbchars < 0) {
11754: /* TODO 2.6.0 */
11755: xmlGenericError(xmlGenericErrorContext,
11756: "xmlParseChunk: encoder error\n");
11757: return(XML_ERR_INVALID_ENCODING);
11758: }
11759: }
11760: }
11761: }
11762: if (remain != 0)
11763: xmlParseTryOrFinish(ctxt, 0);
11764: else
11765: xmlParseTryOrFinish(ctxt, terminate);
11766: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11767: return(ctxt->errNo);
11768:
11769: if (remain != 0) {
11770: chunk += size;
11771: size = remain;
11772: remain = 0;
11773: goto xmldecl_done;
11774: }
11775: if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11776: (ctxt->input->buf != NULL)) {
11777: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11778: }
11779: if (terminate) {
11780: /*
11781: * Check for termination
11782: */
11783: int avail = 0;
11784:
11785: if (ctxt->input != NULL) {
11786: if (ctxt->input->buf == NULL)
11787: avail = ctxt->input->length -
11788: (ctxt->input->cur - ctxt->input->base);
11789: else
11790: avail = ctxt->input->buf->buffer->use -
11791: (ctxt->input->cur - ctxt->input->base);
11792: }
11793:
11794: if ((ctxt->instate != XML_PARSER_EOF) &&
11795: (ctxt->instate != XML_PARSER_EPILOG)) {
11796: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11797: }
11798: if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11799: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11800: }
11801: if (ctxt->instate != XML_PARSER_EOF) {
11802: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11803: ctxt->sax->endDocument(ctxt->userData);
11804: }
11805: ctxt->instate = XML_PARSER_EOF;
11806: }
11807: return((xmlParserErrors) ctxt->errNo);
11808: }
11809:
11810: /************************************************************************
11811: * *
11812: * I/O front end functions to the parser *
11813: * *
11814: ************************************************************************/
11815:
11816: /**
11817: * xmlCreatePushParserCtxt:
11818: * @sax: a SAX handler
11819: * @user_data: The user data returned on SAX callbacks
11820: * @chunk: a pointer to an array of chars
11821: * @size: number of chars in the array
11822: * @filename: an optional file name or URI
11823: *
11824: * Create a parser context for using the XML parser in push mode.
11825: * If @buffer and @size are non-NULL, the data is used to detect
11826: * the encoding. The remaining characters will be parsed so they
11827: * don't need to be fed in again through xmlParseChunk.
11828: * To allow content encoding detection, @size should be >= 4
11829: * The value of @filename is used for fetching external entities
11830: * and error/warning reports.
11831: *
11832: * Returns the new parser context or NULL
11833: */
11834:
11835: xmlParserCtxtPtr
11836: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11837: const char *chunk, int size, const char *filename) {
11838: xmlParserCtxtPtr ctxt;
11839: xmlParserInputPtr inputStream;
11840: xmlParserInputBufferPtr buf;
11841: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11842:
11843: /*
11844: * plug some encoding conversion routines
11845: */
11846: if ((chunk != NULL) && (size >= 4))
11847: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11848:
11849: buf = xmlAllocParserInputBuffer(enc);
11850: if (buf == NULL) return(NULL);
11851:
11852: ctxt = xmlNewParserCtxt();
11853: if (ctxt == NULL) {
11854: xmlErrMemory(NULL, "creating parser: out of memory\n");
11855: xmlFreeParserInputBuffer(buf);
11856: return(NULL);
11857: }
11858: ctxt->dictNames = 1;
11859: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11860: if (ctxt->pushTab == NULL) {
11861: xmlErrMemory(ctxt, NULL);
11862: xmlFreeParserInputBuffer(buf);
11863: xmlFreeParserCtxt(ctxt);
11864: return(NULL);
11865: }
11866: if (sax != NULL) {
11867: #ifdef LIBXML_SAX1_ENABLED
11868: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11869: #endif /* LIBXML_SAX1_ENABLED */
11870: xmlFree(ctxt->sax);
11871: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11872: if (ctxt->sax == NULL) {
11873: xmlErrMemory(ctxt, NULL);
11874: xmlFreeParserInputBuffer(buf);
11875: xmlFreeParserCtxt(ctxt);
11876: return(NULL);
11877: }
11878: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11879: if (sax->initialized == XML_SAX2_MAGIC)
11880: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11881: else
11882: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11883: if (user_data != NULL)
11884: ctxt->userData = user_data;
11885: }
11886: if (filename == NULL) {
11887: ctxt->directory = NULL;
11888: } else {
11889: ctxt->directory = xmlParserGetDirectory(filename);
11890: }
11891:
11892: inputStream = xmlNewInputStream(ctxt);
11893: if (inputStream == NULL) {
11894: xmlFreeParserCtxt(ctxt);
11895: xmlFreeParserInputBuffer(buf);
11896: return(NULL);
11897: }
11898:
11899: if (filename == NULL)
11900: inputStream->filename = NULL;
11901: else {
11902: inputStream->filename = (char *)
11903: xmlCanonicPath((const xmlChar *) filename);
11904: if (inputStream->filename == NULL) {
11905: xmlFreeParserCtxt(ctxt);
11906: xmlFreeParserInputBuffer(buf);
11907: return(NULL);
11908: }
11909: }
11910: inputStream->buf = buf;
11911: inputStream->base = inputStream->buf->buffer->content;
11912: inputStream->cur = inputStream->buf->buffer->content;
11913: inputStream->end =
11914: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11915:
11916: inputPush(ctxt, inputStream);
11917:
11918: /*
11919: * If the caller didn't provide an initial 'chunk' for determining
11920: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11921: * that it can be automatically determined later
11922: */
11923: if ((size == 0) || (chunk == NULL)) {
11924: ctxt->charset = XML_CHAR_ENCODING_NONE;
11925: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11926: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11927: int cur = ctxt->input->cur - ctxt->input->base;
11928:
11929: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11930:
11931: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11932: ctxt->input->cur = ctxt->input->base + cur;
11933: ctxt->input->end =
11934: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11935: #ifdef DEBUG_PUSH
11936: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11937: #endif
11938: }
11939:
11940: if (enc != XML_CHAR_ENCODING_NONE) {
11941: xmlSwitchEncoding(ctxt, enc);
11942: }
11943:
11944: return(ctxt);
11945: }
11946: #endif /* LIBXML_PUSH_ENABLED */
11947:
11948: /**
11949: * xmlStopParser:
11950: * @ctxt: an XML parser context
11951: *
11952: * Blocks further parser processing
11953: */
11954: void
11955: xmlStopParser(xmlParserCtxtPtr ctxt) {
11956: if (ctxt == NULL)
11957: return;
11958: ctxt->instate = XML_PARSER_EOF;
11959: ctxt->disableSAX = 1;
11960: if (ctxt->input != NULL) {
11961: ctxt->input->cur = BAD_CAST"";
11962: ctxt->input->base = ctxt->input->cur;
11963: }
11964: }
11965:
11966: /**
11967: * xmlCreateIOParserCtxt:
11968: * @sax: a SAX handler
11969: * @user_data: The user data returned on SAX callbacks
11970: * @ioread: an I/O read function
11971: * @ioclose: an I/O close function
11972: * @ioctx: an I/O handler
11973: * @enc: the charset encoding if known
11974: *
11975: * Create a parser context for using the XML parser with an existing
11976: * I/O stream
11977: *
11978: * Returns the new parser context or NULL
11979: */
11980: xmlParserCtxtPtr
11981: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11982: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11983: void *ioctx, xmlCharEncoding enc) {
11984: xmlParserCtxtPtr ctxt;
11985: xmlParserInputPtr inputStream;
11986: xmlParserInputBufferPtr buf;
1.1.1.2 ! misho 11987:
1.1 misho 11988: if (ioread == NULL) return(NULL);
11989:
11990: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
1.1.1.2 ! misho 11991: if (buf == NULL) {
! 11992: if (ioclose != NULL)
! 11993: ioclose(ioctx);
! 11994: return (NULL);
! 11995: }
1.1 misho 11996:
11997: ctxt = xmlNewParserCtxt();
11998: if (ctxt == NULL) {
11999: xmlFreeParserInputBuffer(buf);
12000: return(NULL);
12001: }
12002: if (sax != NULL) {
12003: #ifdef LIBXML_SAX1_ENABLED
12004: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12005: #endif /* LIBXML_SAX1_ENABLED */
12006: xmlFree(ctxt->sax);
12007: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12008: if (ctxt->sax == NULL) {
12009: xmlErrMemory(ctxt, NULL);
12010: xmlFreeParserCtxt(ctxt);
12011: return(NULL);
12012: }
12013: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12014: if (sax->initialized == XML_SAX2_MAGIC)
12015: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12016: else
12017: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12018: if (user_data != NULL)
12019: ctxt->userData = user_data;
1.1.1.2 ! misho 12020: }
1.1 misho 12021:
12022: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12023: if (inputStream == NULL) {
12024: xmlFreeParserCtxt(ctxt);
12025: return(NULL);
12026: }
12027: inputPush(ctxt, inputStream);
12028:
12029: return(ctxt);
12030: }
12031:
12032: #ifdef LIBXML_VALID_ENABLED
12033: /************************************************************************
12034: * *
12035: * Front ends when parsing a DTD *
12036: * *
12037: ************************************************************************/
12038:
12039: /**
12040: * xmlIOParseDTD:
12041: * @sax: the SAX handler block or NULL
12042: * @input: an Input Buffer
12043: * @enc: the charset encoding if known
12044: *
12045: * Load and parse a DTD
12046: *
12047: * Returns the resulting xmlDtdPtr or NULL in case of error.
12048: * @input will be freed by the function in any case.
12049: */
12050:
12051: xmlDtdPtr
12052: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12053: xmlCharEncoding enc) {
12054: xmlDtdPtr ret = NULL;
12055: xmlParserCtxtPtr ctxt;
12056: xmlParserInputPtr pinput = NULL;
12057: xmlChar start[4];
12058:
12059: if (input == NULL)
12060: return(NULL);
12061:
12062: ctxt = xmlNewParserCtxt();
12063: if (ctxt == NULL) {
12064: xmlFreeParserInputBuffer(input);
12065: return(NULL);
12066: }
12067:
12068: /*
12069: * Set-up the SAX context
12070: */
12071: if (sax != NULL) {
12072: if (ctxt->sax != NULL)
12073: xmlFree(ctxt->sax);
12074: ctxt->sax = sax;
12075: ctxt->userData = ctxt;
12076: }
12077: xmlDetectSAX2(ctxt);
12078:
12079: /*
12080: * generate a parser input from the I/O handler
12081: */
12082:
12083: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12084: if (pinput == NULL) {
12085: if (sax != NULL) ctxt->sax = NULL;
12086: xmlFreeParserInputBuffer(input);
12087: xmlFreeParserCtxt(ctxt);
12088: return(NULL);
12089: }
12090:
12091: /*
12092: * plug some encoding conversion routines here.
12093: */
12094: if (xmlPushInput(ctxt, pinput) < 0) {
12095: if (sax != NULL) ctxt->sax = NULL;
12096: xmlFreeParserCtxt(ctxt);
12097: return(NULL);
12098: }
12099: if (enc != XML_CHAR_ENCODING_NONE) {
12100: xmlSwitchEncoding(ctxt, enc);
12101: }
12102:
12103: pinput->filename = NULL;
12104: pinput->line = 1;
12105: pinput->col = 1;
12106: pinput->base = ctxt->input->cur;
12107: pinput->cur = ctxt->input->cur;
12108: pinput->free = NULL;
12109:
12110: /*
12111: * let's parse that entity knowing it's an external subset.
12112: */
12113: ctxt->inSubset = 2;
12114: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12115: if (ctxt->myDoc == NULL) {
12116: xmlErrMemory(ctxt, "New Doc failed");
12117: return(NULL);
12118: }
12119: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12120: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12121: BAD_CAST "none", BAD_CAST "none");
12122:
12123: if ((enc == XML_CHAR_ENCODING_NONE) &&
12124: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12125: /*
12126: * Get the 4 first bytes and decode the charset
12127: * if enc != XML_CHAR_ENCODING_NONE
12128: * plug some encoding conversion routines.
12129: */
12130: start[0] = RAW;
12131: start[1] = NXT(1);
12132: start[2] = NXT(2);
12133: start[3] = NXT(3);
12134: enc = xmlDetectCharEncoding(start, 4);
12135: if (enc != XML_CHAR_ENCODING_NONE) {
12136: xmlSwitchEncoding(ctxt, enc);
12137: }
12138: }
12139:
12140: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12141:
12142: if (ctxt->myDoc != NULL) {
12143: if (ctxt->wellFormed) {
12144: ret = ctxt->myDoc->extSubset;
12145: ctxt->myDoc->extSubset = NULL;
12146: if (ret != NULL) {
12147: xmlNodePtr tmp;
12148:
12149: ret->doc = NULL;
12150: tmp = ret->children;
12151: while (tmp != NULL) {
12152: tmp->doc = NULL;
12153: tmp = tmp->next;
12154: }
12155: }
12156: } else {
12157: ret = NULL;
12158: }
12159: xmlFreeDoc(ctxt->myDoc);
12160: ctxt->myDoc = NULL;
12161: }
12162: if (sax != NULL) ctxt->sax = NULL;
12163: xmlFreeParserCtxt(ctxt);
12164:
12165: return(ret);
12166: }
12167:
12168: /**
12169: * xmlSAXParseDTD:
12170: * @sax: the SAX handler block
12171: * @ExternalID: a NAME* containing the External ID of the DTD
12172: * @SystemID: a NAME* containing the URL to the DTD
12173: *
12174: * Load and parse an external subset.
12175: *
12176: * Returns the resulting xmlDtdPtr or NULL in case of error.
12177: */
12178:
12179: xmlDtdPtr
12180: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12181: const xmlChar *SystemID) {
12182: xmlDtdPtr ret = NULL;
12183: xmlParserCtxtPtr ctxt;
12184: xmlParserInputPtr input = NULL;
12185: xmlCharEncoding enc;
12186: xmlChar* systemIdCanonic;
12187:
12188: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12189:
12190: ctxt = xmlNewParserCtxt();
12191: if (ctxt == NULL) {
12192: return(NULL);
12193: }
12194:
12195: /*
12196: * Set-up the SAX context
12197: */
12198: if (sax != NULL) {
12199: if (ctxt->sax != NULL)
12200: xmlFree(ctxt->sax);
12201: ctxt->sax = sax;
12202: ctxt->userData = ctxt;
12203: }
12204:
12205: /*
12206: * Canonicalise the system ID
12207: */
12208: systemIdCanonic = xmlCanonicPath(SystemID);
12209: if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12210: xmlFreeParserCtxt(ctxt);
12211: return(NULL);
12212: }
12213:
12214: /*
12215: * Ask the Entity resolver to load the damn thing
12216: */
12217:
12218: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12219: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12220: systemIdCanonic);
12221: if (input == NULL) {
12222: if (sax != NULL) ctxt->sax = NULL;
12223: xmlFreeParserCtxt(ctxt);
12224: if (systemIdCanonic != NULL)
12225: xmlFree(systemIdCanonic);
12226: return(NULL);
12227: }
12228:
12229: /*
12230: * plug some encoding conversion routines here.
12231: */
12232: if (xmlPushInput(ctxt, input) < 0) {
12233: if (sax != NULL) ctxt->sax = NULL;
12234: xmlFreeParserCtxt(ctxt);
12235: if (systemIdCanonic != NULL)
12236: xmlFree(systemIdCanonic);
12237: return(NULL);
12238: }
12239: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12240: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12241: xmlSwitchEncoding(ctxt, enc);
12242: }
12243:
12244: if (input->filename == NULL)
12245: input->filename = (char *) systemIdCanonic;
12246: else
12247: xmlFree(systemIdCanonic);
12248: input->line = 1;
12249: input->col = 1;
12250: input->base = ctxt->input->cur;
12251: input->cur = ctxt->input->cur;
12252: input->free = NULL;
12253:
12254: /*
12255: * let's parse that entity knowing it's an external subset.
12256: */
12257: ctxt->inSubset = 2;
12258: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12259: if (ctxt->myDoc == NULL) {
12260: xmlErrMemory(ctxt, "New Doc failed");
12261: if (sax != NULL) ctxt->sax = NULL;
12262: xmlFreeParserCtxt(ctxt);
12263: return(NULL);
12264: }
12265: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12266: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12267: ExternalID, SystemID);
12268: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12269:
12270: if (ctxt->myDoc != NULL) {
12271: if (ctxt->wellFormed) {
12272: ret = ctxt->myDoc->extSubset;
12273: ctxt->myDoc->extSubset = NULL;
12274: if (ret != NULL) {
12275: xmlNodePtr tmp;
12276:
12277: ret->doc = NULL;
12278: tmp = ret->children;
12279: while (tmp != NULL) {
12280: tmp->doc = NULL;
12281: tmp = tmp->next;
12282: }
12283: }
12284: } else {
12285: ret = NULL;
12286: }
12287: xmlFreeDoc(ctxt->myDoc);
12288: ctxt->myDoc = NULL;
12289: }
12290: if (sax != NULL) ctxt->sax = NULL;
12291: xmlFreeParserCtxt(ctxt);
12292:
12293: return(ret);
12294: }
12295:
12296:
12297: /**
12298: * xmlParseDTD:
12299: * @ExternalID: a NAME* containing the External ID of the DTD
12300: * @SystemID: a NAME* containing the URL to the DTD
12301: *
12302: * Load and parse an external subset.
12303: *
12304: * Returns the resulting xmlDtdPtr or NULL in case of error.
12305: */
12306:
12307: xmlDtdPtr
12308: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12309: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12310: }
12311: #endif /* LIBXML_VALID_ENABLED */
12312:
12313: /************************************************************************
12314: * *
12315: * Front ends when parsing an Entity *
12316: * *
12317: ************************************************************************/
12318:
12319: /**
12320: * xmlParseCtxtExternalEntity:
12321: * @ctx: the existing parsing context
12322: * @URL: the URL for the entity to load
12323: * @ID: the System ID for the entity to load
12324: * @lst: the return value for the set of parsed nodes
12325: *
12326: * Parse an external general entity within an existing parsing context
12327: * An external general parsed entity is well-formed if it matches the
12328: * production labeled extParsedEnt.
12329: *
12330: * [78] extParsedEnt ::= TextDecl? content
12331: *
12332: * Returns 0 if the entity is well formed, -1 in case of args problem and
12333: * the parser error code otherwise
12334: */
12335:
12336: int
12337: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12338: const xmlChar *ID, xmlNodePtr *lst) {
12339: xmlParserCtxtPtr ctxt;
12340: xmlDocPtr newDoc;
12341: xmlNodePtr newRoot;
12342: xmlSAXHandlerPtr oldsax = NULL;
12343: int ret = 0;
12344: xmlChar start[4];
12345: xmlCharEncoding enc;
12346:
12347: if (ctx == NULL) return(-1);
12348:
12349: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12350: (ctx->depth > 1024)) {
12351: return(XML_ERR_ENTITY_LOOP);
12352: }
12353:
12354: if (lst != NULL)
12355: *lst = NULL;
12356: if ((URL == NULL) && (ID == NULL))
12357: return(-1);
12358: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12359: return(-1);
12360:
12361: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12362: if (ctxt == NULL) {
12363: return(-1);
12364: }
12365:
12366: oldsax = ctxt->sax;
12367: ctxt->sax = ctx->sax;
12368: xmlDetectSAX2(ctxt);
12369: newDoc = xmlNewDoc(BAD_CAST "1.0");
12370: if (newDoc == NULL) {
12371: xmlFreeParserCtxt(ctxt);
12372: return(-1);
12373: }
12374: newDoc->properties = XML_DOC_INTERNAL;
12375: if (ctx->myDoc->dict) {
12376: newDoc->dict = ctx->myDoc->dict;
12377: xmlDictReference(newDoc->dict);
12378: }
12379: if (ctx->myDoc != NULL) {
12380: newDoc->intSubset = ctx->myDoc->intSubset;
12381: newDoc->extSubset = ctx->myDoc->extSubset;
12382: }
12383: if (ctx->myDoc->URL != NULL) {
12384: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12385: }
12386: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12387: if (newRoot == NULL) {
12388: ctxt->sax = oldsax;
12389: xmlFreeParserCtxt(ctxt);
12390: newDoc->intSubset = NULL;
12391: newDoc->extSubset = NULL;
12392: xmlFreeDoc(newDoc);
12393: return(-1);
12394: }
12395: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12396: nodePush(ctxt, newDoc->children);
12397: if (ctx->myDoc == NULL) {
12398: ctxt->myDoc = newDoc;
12399: } else {
12400: ctxt->myDoc = ctx->myDoc;
12401: newDoc->children->doc = ctx->myDoc;
12402: }
12403:
12404: /*
12405: * Get the 4 first bytes and decode the charset
12406: * if enc != XML_CHAR_ENCODING_NONE
12407: * plug some encoding conversion routines.
12408: */
12409: GROW
12410: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12411: start[0] = RAW;
12412: start[1] = NXT(1);
12413: start[2] = NXT(2);
12414: start[3] = NXT(3);
12415: enc = xmlDetectCharEncoding(start, 4);
12416: if (enc != XML_CHAR_ENCODING_NONE) {
12417: xmlSwitchEncoding(ctxt, enc);
12418: }
12419: }
12420:
12421: /*
12422: * Parse a possible text declaration first
12423: */
12424: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12425: xmlParseTextDecl(ctxt);
12426: /*
12427: * An XML-1.0 document can't reference an entity not XML-1.0
12428: */
12429: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12430: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12431: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12432: "Version mismatch between document and entity\n");
12433: }
12434: }
12435:
12436: /*
1.1.1.2 ! misho 12437: * If the user provided its own SAX callbacks then reuse the
! 12438: * useData callback field, otherwise the expected setup in a
! 12439: * DOM builder is to have userData == ctxt
! 12440: */
! 12441: if (ctx->userData == ctx)
! 12442: ctxt->userData = ctxt;
! 12443: else
! 12444: ctxt->userData = ctx->userData;
! 12445:
! 12446: /*
1.1 misho 12447: * Doing validity checking on chunk doesn't make sense
12448: */
12449: ctxt->instate = XML_PARSER_CONTENT;
12450: ctxt->validate = ctx->validate;
12451: ctxt->valid = ctx->valid;
12452: ctxt->loadsubset = ctx->loadsubset;
12453: ctxt->depth = ctx->depth + 1;
12454: ctxt->replaceEntities = ctx->replaceEntities;
12455: if (ctxt->validate) {
12456: ctxt->vctxt.error = ctx->vctxt.error;
12457: ctxt->vctxt.warning = ctx->vctxt.warning;
12458: } else {
12459: ctxt->vctxt.error = NULL;
12460: ctxt->vctxt.warning = NULL;
12461: }
12462: ctxt->vctxt.nodeTab = NULL;
12463: ctxt->vctxt.nodeNr = 0;
12464: ctxt->vctxt.nodeMax = 0;
12465: ctxt->vctxt.node = NULL;
12466: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12467: ctxt->dict = ctx->dict;
12468: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12469: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12470: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12471: ctxt->dictNames = ctx->dictNames;
12472: ctxt->attsDefault = ctx->attsDefault;
12473: ctxt->attsSpecial = ctx->attsSpecial;
12474: ctxt->linenumbers = ctx->linenumbers;
12475:
12476: xmlParseContent(ctxt);
12477:
12478: ctx->validate = ctxt->validate;
12479: ctx->valid = ctxt->valid;
12480: if ((RAW == '<') && (NXT(1) == '/')) {
12481: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12482: } else if (RAW != 0) {
12483: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12484: }
12485: if (ctxt->node != newDoc->children) {
12486: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12487: }
12488:
12489: if (!ctxt->wellFormed) {
12490: if (ctxt->errNo == 0)
12491: ret = 1;
12492: else
12493: ret = ctxt->errNo;
12494: } else {
12495: if (lst != NULL) {
12496: xmlNodePtr cur;
12497:
12498: /*
12499: * Return the newly created nodeset after unlinking it from
12500: * they pseudo parent.
12501: */
12502: cur = newDoc->children->children;
12503: *lst = cur;
12504: while (cur != NULL) {
12505: cur->parent = NULL;
12506: cur = cur->next;
12507: }
12508: newDoc->children->children = NULL;
12509: }
12510: ret = 0;
12511: }
12512: ctxt->sax = oldsax;
12513: ctxt->dict = NULL;
12514: ctxt->attsDefault = NULL;
12515: ctxt->attsSpecial = NULL;
12516: xmlFreeParserCtxt(ctxt);
12517: newDoc->intSubset = NULL;
12518: newDoc->extSubset = NULL;
12519: xmlFreeDoc(newDoc);
12520:
12521: return(ret);
12522: }
12523:
12524: /**
12525: * xmlParseExternalEntityPrivate:
12526: * @doc: the document the chunk pertains to
12527: * @oldctxt: the previous parser context if available
12528: * @sax: the SAX handler bloc (possibly NULL)
12529: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12530: * @depth: Used for loop detection, use 0
12531: * @URL: the URL for the entity to load
12532: * @ID: the System ID for the entity to load
12533: * @list: the return value for the set of parsed nodes
12534: *
12535: * Private version of xmlParseExternalEntity()
12536: *
12537: * Returns 0 if the entity is well formed, -1 in case of args problem and
12538: * the parser error code otherwise
12539: */
12540:
12541: static xmlParserErrors
12542: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12543: xmlSAXHandlerPtr sax,
12544: void *user_data, int depth, const xmlChar *URL,
12545: const xmlChar *ID, xmlNodePtr *list) {
12546: xmlParserCtxtPtr ctxt;
12547: xmlDocPtr newDoc;
12548: xmlNodePtr newRoot;
12549: xmlSAXHandlerPtr oldsax = NULL;
12550: xmlParserErrors ret = XML_ERR_OK;
12551: xmlChar start[4];
12552: xmlCharEncoding enc;
12553:
12554: if (((depth > 40) &&
12555: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12556: (depth > 1024)) {
12557: return(XML_ERR_ENTITY_LOOP);
12558: }
12559:
12560: if (list != NULL)
12561: *list = NULL;
12562: if ((URL == NULL) && (ID == NULL))
12563: return(XML_ERR_INTERNAL_ERROR);
12564: if (doc == NULL)
12565: return(XML_ERR_INTERNAL_ERROR);
12566:
12567:
12568: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12569: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12570: ctxt->userData = ctxt;
12571: if (oldctxt != NULL) {
12572: ctxt->_private = oldctxt->_private;
12573: ctxt->loadsubset = oldctxt->loadsubset;
12574: ctxt->validate = oldctxt->validate;
12575: ctxt->external = oldctxt->external;
12576: ctxt->record_info = oldctxt->record_info;
12577: ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12578: ctxt->node_seq.length = oldctxt->node_seq.length;
12579: ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12580: } else {
12581: /*
12582: * Doing validity checking on chunk without context
12583: * doesn't make sense
12584: */
12585: ctxt->_private = NULL;
12586: ctxt->validate = 0;
12587: ctxt->external = 2;
12588: ctxt->loadsubset = 0;
12589: }
12590: if (sax != NULL) {
12591: oldsax = ctxt->sax;
12592: ctxt->sax = sax;
12593: if (user_data != NULL)
12594: ctxt->userData = user_data;
12595: }
12596: xmlDetectSAX2(ctxt);
12597: newDoc = xmlNewDoc(BAD_CAST "1.0");
12598: if (newDoc == NULL) {
12599: ctxt->node_seq.maximum = 0;
12600: ctxt->node_seq.length = 0;
12601: ctxt->node_seq.buffer = NULL;
12602: xmlFreeParserCtxt(ctxt);
12603: return(XML_ERR_INTERNAL_ERROR);
12604: }
12605: newDoc->properties = XML_DOC_INTERNAL;
12606: newDoc->intSubset = doc->intSubset;
12607: newDoc->extSubset = doc->extSubset;
12608: newDoc->dict = doc->dict;
12609: xmlDictReference(newDoc->dict);
12610:
12611: if (doc->URL != NULL) {
12612: newDoc->URL = xmlStrdup(doc->URL);
12613: }
12614: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12615: if (newRoot == NULL) {
12616: if (sax != NULL)
12617: ctxt->sax = oldsax;
12618: ctxt->node_seq.maximum = 0;
12619: ctxt->node_seq.length = 0;
12620: ctxt->node_seq.buffer = NULL;
12621: xmlFreeParserCtxt(ctxt);
12622: newDoc->intSubset = NULL;
12623: newDoc->extSubset = NULL;
12624: xmlFreeDoc(newDoc);
12625: return(XML_ERR_INTERNAL_ERROR);
12626: }
12627: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12628: nodePush(ctxt, newDoc->children);
12629: ctxt->myDoc = doc;
12630: newRoot->doc = doc;
12631:
12632: /*
12633: * Get the 4 first bytes and decode the charset
12634: * if enc != XML_CHAR_ENCODING_NONE
12635: * plug some encoding conversion routines.
12636: */
12637: GROW;
12638: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12639: start[0] = RAW;
12640: start[1] = NXT(1);
12641: start[2] = NXT(2);
12642: start[3] = NXT(3);
12643: enc = xmlDetectCharEncoding(start, 4);
12644: if (enc != XML_CHAR_ENCODING_NONE) {
12645: xmlSwitchEncoding(ctxt, enc);
12646: }
12647: }
12648:
12649: /*
12650: * Parse a possible text declaration first
12651: */
12652: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12653: xmlParseTextDecl(ctxt);
12654: }
12655:
12656: ctxt->instate = XML_PARSER_CONTENT;
12657: ctxt->depth = depth;
12658:
12659: xmlParseContent(ctxt);
12660:
12661: if ((RAW == '<') && (NXT(1) == '/')) {
12662: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12663: } else if (RAW != 0) {
12664: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12665: }
12666: if (ctxt->node != newDoc->children) {
12667: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12668: }
12669:
12670: if (!ctxt->wellFormed) {
12671: if (ctxt->errNo == 0)
12672: ret = XML_ERR_INTERNAL_ERROR;
12673: else
12674: ret = (xmlParserErrors)ctxt->errNo;
12675: } else {
12676: if (list != NULL) {
12677: xmlNodePtr cur;
12678:
12679: /*
12680: * Return the newly created nodeset after unlinking it from
12681: * they pseudo parent.
12682: */
12683: cur = newDoc->children->children;
12684: *list = cur;
12685: while (cur != NULL) {
12686: cur->parent = NULL;
12687: cur = cur->next;
12688: }
12689: newDoc->children->children = NULL;
12690: }
12691: ret = XML_ERR_OK;
12692: }
12693:
12694: /*
12695: * Record in the parent context the number of entities replacement
12696: * done when parsing that reference.
12697: */
12698: if (oldctxt != NULL)
12699: oldctxt->nbentities += ctxt->nbentities;
12700:
12701: /*
12702: * Also record the size of the entity parsed
12703: */
12704: if (ctxt->input != NULL) {
12705: oldctxt->sizeentities += ctxt->input->consumed;
12706: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12707: }
12708: /*
12709: * And record the last error if any
12710: */
12711: if (ctxt->lastError.code != XML_ERR_OK)
12712: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12713:
12714: if (sax != NULL)
12715: ctxt->sax = oldsax;
12716: oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12717: oldctxt->node_seq.length = ctxt->node_seq.length;
12718: oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12719: ctxt->node_seq.maximum = 0;
12720: ctxt->node_seq.length = 0;
12721: ctxt->node_seq.buffer = NULL;
12722: xmlFreeParserCtxt(ctxt);
12723: newDoc->intSubset = NULL;
12724: newDoc->extSubset = NULL;
12725: xmlFreeDoc(newDoc);
12726:
12727: return(ret);
12728: }
12729:
12730: #ifdef LIBXML_SAX1_ENABLED
12731: /**
12732: * xmlParseExternalEntity:
12733: * @doc: the document the chunk pertains to
12734: * @sax: the SAX handler bloc (possibly NULL)
12735: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12736: * @depth: Used for loop detection, use 0
12737: * @URL: the URL for the entity to load
12738: * @ID: the System ID for the entity to load
12739: * @lst: the return value for the set of parsed nodes
12740: *
12741: * Parse an external general entity
12742: * An external general parsed entity is well-formed if it matches the
12743: * production labeled extParsedEnt.
12744: *
12745: * [78] extParsedEnt ::= TextDecl? content
12746: *
12747: * Returns 0 if the entity is well formed, -1 in case of args problem and
12748: * the parser error code otherwise
12749: */
12750:
12751: int
12752: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12753: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12754: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12755: ID, lst));
12756: }
12757:
12758: /**
12759: * xmlParseBalancedChunkMemory:
12760: * @doc: the document the chunk pertains to
12761: * @sax: the SAX handler bloc (possibly NULL)
12762: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12763: * @depth: Used for loop detection, use 0
12764: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12765: * @lst: the return value for the set of parsed nodes
12766: *
12767: * Parse a well-balanced chunk of an XML document
12768: * called by the parser
12769: * The allowed sequence for the Well Balanced Chunk is the one defined by
12770: * the content production in the XML grammar:
12771: *
12772: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12773: *
12774: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12775: * the parser error code otherwise
12776: */
12777:
12778: int
12779: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12780: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12781: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12782: depth, string, lst, 0 );
12783: }
12784: #endif /* LIBXML_SAX1_ENABLED */
12785:
12786: /**
12787: * xmlParseBalancedChunkMemoryInternal:
12788: * @oldctxt: the existing parsing context
12789: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12790: * @user_data: the user data field for the parser context
12791: * @lst: the return value for the set of parsed nodes
12792: *
12793: *
12794: * Parse a well-balanced chunk of an XML document
12795: * called by the parser
12796: * The allowed sequence for the Well Balanced Chunk is the one defined by
12797: * the content production in the XML grammar:
12798: *
12799: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12800: *
12801: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12802: * error code otherwise
12803: *
12804: * In case recover is set to 1, the nodelist will not be empty even if
12805: * the parsed chunk is not well balanced.
12806: */
12807: static xmlParserErrors
12808: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12809: const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12810: xmlParserCtxtPtr ctxt;
12811: xmlDocPtr newDoc = NULL;
12812: xmlNodePtr newRoot;
12813: xmlSAXHandlerPtr oldsax = NULL;
12814: xmlNodePtr content = NULL;
12815: xmlNodePtr last = NULL;
12816: int size;
12817: xmlParserErrors ret = XML_ERR_OK;
12818: #ifdef SAX2
12819: int i;
12820: #endif
12821:
12822: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12823: (oldctxt->depth > 1024)) {
12824: return(XML_ERR_ENTITY_LOOP);
12825: }
12826:
12827:
12828: if (lst != NULL)
12829: *lst = NULL;
12830: if (string == NULL)
12831: return(XML_ERR_INTERNAL_ERROR);
12832:
12833: size = xmlStrlen(string);
12834:
12835: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12836: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12837: if (user_data != NULL)
12838: ctxt->userData = user_data;
12839: else
12840: ctxt->userData = ctxt;
12841: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12842: ctxt->dict = oldctxt->dict;
12843: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12844: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12845: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12846:
12847: #ifdef SAX2
12848: /* propagate namespaces down the entity */
12849: for (i = 0;i < oldctxt->nsNr;i += 2) {
12850: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12851: }
12852: #endif
12853:
12854: oldsax = ctxt->sax;
12855: ctxt->sax = oldctxt->sax;
12856: xmlDetectSAX2(ctxt);
12857: ctxt->replaceEntities = oldctxt->replaceEntities;
12858: ctxt->options = oldctxt->options;
12859:
12860: ctxt->_private = oldctxt->_private;
12861: if (oldctxt->myDoc == NULL) {
12862: newDoc = xmlNewDoc(BAD_CAST "1.0");
12863: if (newDoc == NULL) {
12864: ctxt->sax = oldsax;
12865: ctxt->dict = NULL;
12866: xmlFreeParserCtxt(ctxt);
12867: return(XML_ERR_INTERNAL_ERROR);
12868: }
12869: newDoc->properties = XML_DOC_INTERNAL;
12870: newDoc->dict = ctxt->dict;
12871: xmlDictReference(newDoc->dict);
12872: ctxt->myDoc = newDoc;
12873: } else {
12874: ctxt->myDoc = oldctxt->myDoc;
12875: content = ctxt->myDoc->children;
12876: last = ctxt->myDoc->last;
12877: }
12878: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12879: if (newRoot == NULL) {
12880: ctxt->sax = oldsax;
12881: ctxt->dict = NULL;
12882: xmlFreeParserCtxt(ctxt);
12883: if (newDoc != NULL) {
12884: xmlFreeDoc(newDoc);
12885: }
12886: return(XML_ERR_INTERNAL_ERROR);
12887: }
12888: ctxt->myDoc->children = NULL;
12889: ctxt->myDoc->last = NULL;
12890: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12891: nodePush(ctxt, ctxt->myDoc->children);
12892: ctxt->instate = XML_PARSER_CONTENT;
12893: ctxt->depth = oldctxt->depth + 1;
12894:
12895: ctxt->validate = 0;
12896: ctxt->loadsubset = oldctxt->loadsubset;
12897: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12898: /*
12899: * ID/IDREF registration will be done in xmlValidateElement below
12900: */
12901: ctxt->loadsubset |= XML_SKIP_IDS;
12902: }
12903: ctxt->dictNames = oldctxt->dictNames;
12904: ctxt->attsDefault = oldctxt->attsDefault;
12905: ctxt->attsSpecial = oldctxt->attsSpecial;
12906:
12907: xmlParseContent(ctxt);
12908: if ((RAW == '<') && (NXT(1) == '/')) {
12909: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12910: } else if (RAW != 0) {
12911: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12912: }
12913: if (ctxt->node != ctxt->myDoc->children) {
12914: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12915: }
12916:
12917: if (!ctxt->wellFormed) {
12918: if (ctxt->errNo == 0)
12919: ret = XML_ERR_INTERNAL_ERROR;
12920: else
12921: ret = (xmlParserErrors)ctxt->errNo;
12922: } else {
12923: ret = XML_ERR_OK;
12924: }
12925:
12926: if ((lst != NULL) && (ret == XML_ERR_OK)) {
12927: xmlNodePtr cur;
12928:
12929: /*
12930: * Return the newly created nodeset after unlinking it from
12931: * they pseudo parent.
12932: */
12933: cur = ctxt->myDoc->children->children;
12934: *lst = cur;
12935: while (cur != NULL) {
12936: #ifdef LIBXML_VALID_ENABLED
12937: if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12938: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12939: (cur->type == XML_ELEMENT_NODE)) {
12940: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12941: oldctxt->myDoc, cur);
12942: }
12943: #endif /* LIBXML_VALID_ENABLED */
12944: cur->parent = NULL;
12945: cur = cur->next;
12946: }
12947: ctxt->myDoc->children->children = NULL;
12948: }
12949: if (ctxt->myDoc != NULL) {
12950: xmlFreeNode(ctxt->myDoc->children);
12951: ctxt->myDoc->children = content;
12952: ctxt->myDoc->last = last;
12953: }
12954:
12955: /*
12956: * Record in the parent context the number of entities replacement
12957: * done when parsing that reference.
12958: */
12959: if (oldctxt != NULL)
12960: oldctxt->nbentities += ctxt->nbentities;
12961:
12962: /*
12963: * Also record the last error if any
12964: */
12965: if (ctxt->lastError.code != XML_ERR_OK)
12966: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12967:
12968: ctxt->sax = oldsax;
12969: ctxt->dict = NULL;
12970: ctxt->attsDefault = NULL;
12971: ctxt->attsSpecial = NULL;
12972: xmlFreeParserCtxt(ctxt);
12973: if (newDoc != NULL) {
12974: xmlFreeDoc(newDoc);
12975: }
12976:
12977: return(ret);
12978: }
12979:
12980: /**
12981: * xmlParseInNodeContext:
12982: * @node: the context node
12983: * @data: the input string
12984: * @datalen: the input string length in bytes
12985: * @options: a combination of xmlParserOption
12986: * @lst: the return value for the set of parsed nodes
12987: *
12988: * Parse a well-balanced chunk of an XML document
12989: * within the context (DTD, namespaces, etc ...) of the given node.
12990: *
12991: * The allowed sequence for the data is a Well Balanced Chunk defined by
12992: * the content production in the XML grammar:
12993: *
12994: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12995: *
12996: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12997: * error code otherwise
12998: */
12999: xmlParserErrors
13000: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13001: int options, xmlNodePtr *lst) {
13002: #ifdef SAX2
13003: xmlParserCtxtPtr ctxt;
13004: xmlDocPtr doc = NULL;
13005: xmlNodePtr fake, cur;
13006: int nsnr = 0;
13007:
13008: xmlParserErrors ret = XML_ERR_OK;
13009:
13010: /*
13011: * check all input parameters, grab the document
13012: */
13013: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13014: return(XML_ERR_INTERNAL_ERROR);
13015: switch (node->type) {
13016: case XML_ELEMENT_NODE:
13017: case XML_ATTRIBUTE_NODE:
13018: case XML_TEXT_NODE:
13019: case XML_CDATA_SECTION_NODE:
13020: case XML_ENTITY_REF_NODE:
13021: case XML_PI_NODE:
13022: case XML_COMMENT_NODE:
13023: case XML_DOCUMENT_NODE:
13024: case XML_HTML_DOCUMENT_NODE:
13025: break;
13026: default:
13027: return(XML_ERR_INTERNAL_ERROR);
13028:
13029: }
13030: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13031: (node->type != XML_DOCUMENT_NODE) &&
13032: (node->type != XML_HTML_DOCUMENT_NODE))
13033: node = node->parent;
13034: if (node == NULL)
13035: return(XML_ERR_INTERNAL_ERROR);
13036: if (node->type == XML_ELEMENT_NODE)
13037: doc = node->doc;
13038: else
13039: doc = (xmlDocPtr) node;
13040: if (doc == NULL)
13041: return(XML_ERR_INTERNAL_ERROR);
13042:
13043: /*
13044: * allocate a context and set-up everything not related to the
13045: * node position in the tree
13046: */
13047: if (doc->type == XML_DOCUMENT_NODE)
13048: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13049: #ifdef LIBXML_HTML_ENABLED
13050: else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13051: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13052: /*
13053: * When parsing in context, it makes no sense to add implied
13054: * elements like html/body/etc...
13055: */
13056: options |= HTML_PARSE_NOIMPLIED;
13057: }
13058: #endif
13059: else
13060: return(XML_ERR_INTERNAL_ERROR);
13061:
13062: if (ctxt == NULL)
13063: return(XML_ERR_NO_MEMORY);
13064:
13065: /*
13066: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13067: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13068: * we must wait until the last moment to free the original one.
13069: */
13070: if (doc->dict != NULL) {
13071: if (ctxt->dict != NULL)
13072: xmlDictFree(ctxt->dict);
13073: ctxt->dict = doc->dict;
13074: } else
13075: options |= XML_PARSE_NODICT;
13076:
13077: if (doc->encoding != NULL) {
13078: xmlCharEncodingHandlerPtr hdlr;
13079:
13080: if (ctxt->encoding != NULL)
13081: xmlFree((xmlChar *) ctxt->encoding);
13082: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13083:
13084: hdlr = xmlFindCharEncodingHandler(doc->encoding);
13085: if (hdlr != NULL) {
13086: xmlSwitchToEncoding(ctxt, hdlr);
13087: } else {
13088: return(XML_ERR_UNSUPPORTED_ENCODING);
13089: }
13090: }
13091:
13092: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13093: xmlDetectSAX2(ctxt);
13094: ctxt->myDoc = doc;
13095:
13096: fake = xmlNewComment(NULL);
13097: if (fake == NULL) {
13098: xmlFreeParserCtxt(ctxt);
13099: return(XML_ERR_NO_MEMORY);
13100: }
13101: xmlAddChild(node, fake);
13102:
13103: if (node->type == XML_ELEMENT_NODE) {
13104: nodePush(ctxt, node);
13105: /*
13106: * initialize the SAX2 namespaces stack
13107: */
13108: cur = node;
13109: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13110: xmlNsPtr ns = cur->nsDef;
13111: const xmlChar *iprefix, *ihref;
13112:
13113: while (ns != NULL) {
13114: if (ctxt->dict) {
13115: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13116: ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13117: } else {
13118: iprefix = ns->prefix;
13119: ihref = ns->href;
13120: }
13121:
13122: if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13123: nsPush(ctxt, iprefix, ihref);
13124: nsnr++;
13125: }
13126: ns = ns->next;
13127: }
13128: cur = cur->parent;
13129: }
13130: ctxt->instate = XML_PARSER_CONTENT;
13131: }
13132:
13133: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13134: /*
13135: * ID/IDREF registration will be done in xmlValidateElement below
13136: */
13137: ctxt->loadsubset |= XML_SKIP_IDS;
13138: }
13139:
13140: #ifdef LIBXML_HTML_ENABLED
13141: if (doc->type == XML_HTML_DOCUMENT_NODE)
13142: __htmlParseContent(ctxt);
13143: else
13144: #endif
13145: xmlParseContent(ctxt);
13146:
13147: nsPop(ctxt, nsnr);
13148: if ((RAW == '<') && (NXT(1) == '/')) {
13149: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150: } else if (RAW != 0) {
13151: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13152: }
13153: if ((ctxt->node != NULL) && (ctxt->node != node)) {
13154: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13155: ctxt->wellFormed = 0;
13156: }
13157:
13158: if (!ctxt->wellFormed) {
13159: if (ctxt->errNo == 0)
13160: ret = XML_ERR_INTERNAL_ERROR;
13161: else
13162: ret = (xmlParserErrors)ctxt->errNo;
13163: } else {
13164: ret = XML_ERR_OK;
13165: }
13166:
13167: /*
13168: * Return the newly created nodeset after unlinking it from
13169: * the pseudo sibling.
13170: */
13171:
13172: cur = fake->next;
13173: fake->next = NULL;
13174: node->last = fake;
13175:
13176: if (cur != NULL) {
13177: cur->prev = NULL;
13178: }
13179:
13180: *lst = cur;
13181:
13182: while (cur != NULL) {
13183: cur->parent = NULL;
13184: cur = cur->next;
13185: }
13186:
13187: xmlUnlinkNode(fake);
13188: xmlFreeNode(fake);
13189:
13190:
13191: if (ret != XML_ERR_OK) {
13192: xmlFreeNodeList(*lst);
13193: *lst = NULL;
13194: }
13195:
13196: if (doc->dict != NULL)
13197: ctxt->dict = NULL;
13198: xmlFreeParserCtxt(ctxt);
13199:
13200: return(ret);
13201: #else /* !SAX2 */
13202: return(XML_ERR_INTERNAL_ERROR);
13203: #endif
13204: }
13205:
13206: #ifdef LIBXML_SAX1_ENABLED
13207: /**
13208: * xmlParseBalancedChunkMemoryRecover:
13209: * @doc: the document the chunk pertains to
13210: * @sax: the SAX handler bloc (possibly NULL)
13211: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13212: * @depth: Used for loop detection, use 0
13213: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13214: * @lst: the return value for the set of parsed nodes
13215: * @recover: return nodes even if the data is broken (use 0)
13216: *
13217: *
13218: * Parse a well-balanced chunk of an XML document
13219: * called by the parser
13220: * The allowed sequence for the Well Balanced Chunk is the one defined by
13221: * the content production in the XML grammar:
13222: *
13223: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13224: *
13225: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13226: * the parser error code otherwise
13227: *
13228: * In case recover is set to 1, the nodelist will not be empty even if
13229: * the parsed chunk is not well balanced, assuming the parsing succeeded to
13230: * some extent.
13231: */
13232: int
13233: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13234: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13235: int recover) {
13236: xmlParserCtxtPtr ctxt;
13237: xmlDocPtr newDoc;
13238: xmlSAXHandlerPtr oldsax = NULL;
13239: xmlNodePtr content, newRoot;
13240: int size;
13241: int ret = 0;
13242:
13243: if (depth > 40) {
13244: return(XML_ERR_ENTITY_LOOP);
13245: }
13246:
13247:
13248: if (lst != NULL)
13249: *lst = NULL;
13250: if (string == NULL)
13251: return(-1);
13252:
13253: size = xmlStrlen(string);
13254:
13255: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13256: if (ctxt == NULL) return(-1);
13257: ctxt->userData = ctxt;
13258: if (sax != NULL) {
13259: oldsax = ctxt->sax;
13260: ctxt->sax = sax;
13261: if (user_data != NULL)
13262: ctxt->userData = user_data;
13263: }
13264: newDoc = xmlNewDoc(BAD_CAST "1.0");
13265: if (newDoc == NULL) {
13266: xmlFreeParserCtxt(ctxt);
13267: return(-1);
13268: }
13269: newDoc->properties = XML_DOC_INTERNAL;
13270: if ((doc != NULL) && (doc->dict != NULL)) {
13271: xmlDictFree(ctxt->dict);
13272: ctxt->dict = doc->dict;
13273: xmlDictReference(ctxt->dict);
13274: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13275: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13276: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13277: ctxt->dictNames = 1;
13278: } else {
13279: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13280: }
13281: if (doc != NULL) {
13282: newDoc->intSubset = doc->intSubset;
13283: newDoc->extSubset = doc->extSubset;
13284: }
13285: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13286: if (newRoot == NULL) {
13287: if (sax != NULL)
13288: ctxt->sax = oldsax;
13289: xmlFreeParserCtxt(ctxt);
13290: newDoc->intSubset = NULL;
13291: newDoc->extSubset = NULL;
13292: xmlFreeDoc(newDoc);
13293: return(-1);
13294: }
13295: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13296: nodePush(ctxt, newRoot);
13297: if (doc == NULL) {
13298: ctxt->myDoc = newDoc;
13299: } else {
13300: ctxt->myDoc = newDoc;
13301: newDoc->children->doc = doc;
13302: /* Ensure that doc has XML spec namespace */
13303: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13304: newDoc->oldNs = doc->oldNs;
13305: }
13306: ctxt->instate = XML_PARSER_CONTENT;
13307: ctxt->depth = depth;
13308:
13309: /*
13310: * Doing validity checking on chunk doesn't make sense
13311: */
13312: ctxt->validate = 0;
13313: ctxt->loadsubset = 0;
13314: xmlDetectSAX2(ctxt);
13315:
13316: if ( doc != NULL ){
13317: content = doc->children;
13318: doc->children = NULL;
13319: xmlParseContent(ctxt);
13320: doc->children = content;
13321: }
13322: else {
13323: xmlParseContent(ctxt);
13324: }
13325: if ((RAW == '<') && (NXT(1) == '/')) {
13326: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13327: } else if (RAW != 0) {
13328: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13329: }
13330: if (ctxt->node != newDoc->children) {
13331: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13332: }
13333:
13334: if (!ctxt->wellFormed) {
13335: if (ctxt->errNo == 0)
13336: ret = 1;
13337: else
13338: ret = ctxt->errNo;
13339: } else {
13340: ret = 0;
13341: }
13342:
13343: if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13344: xmlNodePtr cur;
13345:
13346: /*
13347: * Return the newly created nodeset after unlinking it from
13348: * they pseudo parent.
13349: */
13350: cur = newDoc->children->children;
13351: *lst = cur;
13352: while (cur != NULL) {
13353: xmlSetTreeDoc(cur, doc);
13354: cur->parent = NULL;
13355: cur = cur->next;
13356: }
13357: newDoc->children->children = NULL;
13358: }
13359:
13360: if (sax != NULL)
13361: ctxt->sax = oldsax;
13362: xmlFreeParserCtxt(ctxt);
13363: newDoc->intSubset = NULL;
13364: newDoc->extSubset = NULL;
13365: newDoc->oldNs = NULL;
13366: xmlFreeDoc(newDoc);
13367:
13368: return(ret);
13369: }
13370:
13371: /**
13372: * xmlSAXParseEntity:
13373: * @sax: the SAX handler block
13374: * @filename: the filename
13375: *
13376: * parse an XML external entity out of context and build a tree.
13377: * It use the given SAX function block to handle the parsing callback.
13378: * If sax is NULL, fallback to the default DOM tree building routines.
13379: *
13380: * [78] extParsedEnt ::= TextDecl? content
13381: *
13382: * This correspond to a "Well Balanced" chunk
13383: *
13384: * Returns the resulting document tree
13385: */
13386:
13387: xmlDocPtr
13388: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13389: xmlDocPtr ret;
13390: xmlParserCtxtPtr ctxt;
13391:
13392: ctxt = xmlCreateFileParserCtxt(filename);
13393: if (ctxt == NULL) {
13394: return(NULL);
13395: }
13396: if (sax != NULL) {
13397: if (ctxt->sax != NULL)
13398: xmlFree(ctxt->sax);
13399: ctxt->sax = sax;
13400: ctxt->userData = NULL;
13401: }
13402:
13403: xmlParseExtParsedEnt(ctxt);
13404:
13405: if (ctxt->wellFormed)
13406: ret = ctxt->myDoc;
13407: else {
13408: ret = NULL;
13409: xmlFreeDoc(ctxt->myDoc);
13410: ctxt->myDoc = NULL;
13411: }
13412: if (sax != NULL)
13413: ctxt->sax = NULL;
13414: xmlFreeParserCtxt(ctxt);
13415:
13416: return(ret);
13417: }
13418:
13419: /**
13420: * xmlParseEntity:
13421: * @filename: the filename
13422: *
13423: * parse an XML external entity out of context and build a tree.
13424: *
13425: * [78] extParsedEnt ::= TextDecl? content
13426: *
13427: * This correspond to a "Well Balanced" chunk
13428: *
13429: * Returns the resulting document tree
13430: */
13431:
13432: xmlDocPtr
13433: xmlParseEntity(const char *filename) {
13434: return(xmlSAXParseEntity(NULL, filename));
13435: }
13436: #endif /* LIBXML_SAX1_ENABLED */
13437:
13438: /**
13439: * xmlCreateEntityParserCtxtInternal:
13440: * @URL: the entity URL
13441: * @ID: the entity PUBLIC ID
13442: * @base: a possible base for the target URI
13443: * @pctx: parser context used to set options on new context
13444: *
13445: * Create a parser context for an external entity
13446: * Automatic support for ZLIB/Compress compressed document is provided
13447: * by default if found at compile-time.
13448: *
13449: * Returns the new parser context or NULL
13450: */
13451: static xmlParserCtxtPtr
13452: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13453: const xmlChar *base, xmlParserCtxtPtr pctx) {
13454: xmlParserCtxtPtr ctxt;
13455: xmlParserInputPtr inputStream;
13456: char *directory = NULL;
13457: xmlChar *uri;
13458:
13459: ctxt = xmlNewParserCtxt();
13460: if (ctxt == NULL) {
13461: return(NULL);
13462: }
13463:
13464: if (pctx != NULL) {
13465: ctxt->options = pctx->options;
13466: ctxt->_private = pctx->_private;
13467: }
13468:
13469: uri = xmlBuildURI(URL, base);
13470:
13471: if (uri == NULL) {
13472: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13473: if (inputStream == NULL) {
13474: xmlFreeParserCtxt(ctxt);
13475: return(NULL);
13476: }
13477:
13478: inputPush(ctxt, inputStream);
13479:
13480: if ((ctxt->directory == NULL) && (directory == NULL))
13481: directory = xmlParserGetDirectory((char *)URL);
13482: if ((ctxt->directory == NULL) && (directory != NULL))
13483: ctxt->directory = directory;
13484: } else {
13485: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13486: if (inputStream == NULL) {
13487: xmlFree(uri);
13488: xmlFreeParserCtxt(ctxt);
13489: return(NULL);
13490: }
13491:
13492: inputPush(ctxt, inputStream);
13493:
13494: if ((ctxt->directory == NULL) && (directory == NULL))
13495: directory = xmlParserGetDirectory((char *)uri);
13496: if ((ctxt->directory == NULL) && (directory != NULL))
13497: ctxt->directory = directory;
13498: xmlFree(uri);
13499: }
13500: return(ctxt);
13501: }
13502:
13503: /**
13504: * xmlCreateEntityParserCtxt:
13505: * @URL: the entity URL
13506: * @ID: the entity PUBLIC ID
13507: * @base: a possible base for the target URI
13508: *
13509: * Create a parser context for an external entity
13510: * Automatic support for ZLIB/Compress compressed document is provided
13511: * by default if found at compile-time.
13512: *
13513: * Returns the new parser context or NULL
13514: */
13515: xmlParserCtxtPtr
13516: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13517: const xmlChar *base) {
13518: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13519:
13520: }
13521:
13522: /************************************************************************
13523: * *
13524: * Front ends when parsing from a file *
13525: * *
13526: ************************************************************************/
13527:
13528: /**
13529: * xmlCreateURLParserCtxt:
13530: * @filename: the filename or URL
13531: * @options: a combination of xmlParserOption
13532: *
13533: * Create a parser context for a file or URL content.
13534: * Automatic support for ZLIB/Compress compressed document is provided
13535: * by default if found at compile-time and for file accesses
13536: *
13537: * Returns the new parser context or NULL
13538: */
13539: xmlParserCtxtPtr
13540: xmlCreateURLParserCtxt(const char *filename, int options)
13541: {
13542: xmlParserCtxtPtr ctxt;
13543: xmlParserInputPtr inputStream;
13544: char *directory = NULL;
13545:
13546: ctxt = xmlNewParserCtxt();
13547: if (ctxt == NULL) {
13548: xmlErrMemory(NULL, "cannot allocate parser context");
13549: return(NULL);
13550: }
13551:
13552: if (options)
13553: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13554: ctxt->linenumbers = 1;
13555:
13556: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13557: if (inputStream == NULL) {
13558: xmlFreeParserCtxt(ctxt);
13559: return(NULL);
13560: }
13561:
13562: inputPush(ctxt, inputStream);
13563: if ((ctxt->directory == NULL) && (directory == NULL))
13564: directory = xmlParserGetDirectory(filename);
13565: if ((ctxt->directory == NULL) && (directory != NULL))
13566: ctxt->directory = directory;
13567:
13568: return(ctxt);
13569: }
13570:
13571: /**
13572: * xmlCreateFileParserCtxt:
13573: * @filename: the filename
13574: *
13575: * Create a parser context for a file content.
13576: * Automatic support for ZLIB/Compress compressed document is provided
13577: * by default if found at compile-time.
13578: *
13579: * Returns the new parser context or NULL
13580: */
13581: xmlParserCtxtPtr
13582: xmlCreateFileParserCtxt(const char *filename)
13583: {
13584: return(xmlCreateURLParserCtxt(filename, 0));
13585: }
13586:
13587: #ifdef LIBXML_SAX1_ENABLED
13588: /**
13589: * xmlSAXParseFileWithData:
13590: * @sax: the SAX handler block
13591: * @filename: the filename
13592: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13593: * documents
13594: * @data: the userdata
13595: *
13596: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13597: * compressed document is provided by default if found at compile-time.
13598: * It use the given SAX function block to handle the parsing callback.
13599: * If sax is NULL, fallback to the default DOM tree building routines.
13600: *
13601: * User data (void *) is stored within the parser context in the
13602: * context's _private member, so it is available nearly everywhere in libxml
13603: *
13604: * Returns the resulting document tree
13605: */
13606:
13607: xmlDocPtr
13608: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13609: int recovery, void *data) {
13610: xmlDocPtr ret;
13611: xmlParserCtxtPtr ctxt;
13612:
13613: xmlInitParser();
13614:
13615: ctxt = xmlCreateFileParserCtxt(filename);
13616: if (ctxt == NULL) {
13617: return(NULL);
13618: }
13619: if (sax != NULL) {
13620: if (ctxt->sax != NULL)
13621: xmlFree(ctxt->sax);
13622: ctxt->sax = sax;
13623: }
13624: xmlDetectSAX2(ctxt);
13625: if (data!=NULL) {
13626: ctxt->_private = data;
13627: }
13628:
13629: if (ctxt->directory == NULL)
13630: ctxt->directory = xmlParserGetDirectory(filename);
13631:
13632: ctxt->recovery = recovery;
13633:
13634: xmlParseDocument(ctxt);
13635:
13636: if ((ctxt->wellFormed) || recovery) {
13637: ret = ctxt->myDoc;
13638: if (ret != NULL) {
13639: if (ctxt->input->buf->compressed > 0)
13640: ret->compression = 9;
13641: else
13642: ret->compression = ctxt->input->buf->compressed;
13643: }
13644: }
13645: else {
13646: ret = NULL;
13647: xmlFreeDoc(ctxt->myDoc);
13648: ctxt->myDoc = NULL;
13649: }
13650: if (sax != NULL)
13651: ctxt->sax = NULL;
13652: xmlFreeParserCtxt(ctxt);
13653:
13654: return(ret);
13655: }
13656:
13657: /**
13658: * xmlSAXParseFile:
13659: * @sax: the SAX handler block
13660: * @filename: the filename
13661: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13662: * documents
13663: *
13664: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13665: * compressed document is provided by default if found at compile-time.
13666: * It use the given SAX function block to handle the parsing callback.
13667: * If sax is NULL, fallback to the default DOM tree building routines.
13668: *
13669: * Returns the resulting document tree
13670: */
13671:
13672: xmlDocPtr
13673: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13674: int recovery) {
13675: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13676: }
13677:
13678: /**
13679: * xmlRecoverDoc:
13680: * @cur: a pointer to an array of xmlChar
13681: *
13682: * parse an XML in-memory document and build a tree.
13683: * In the case the document is not Well Formed, a attempt to build a
13684: * tree is tried anyway
13685: *
13686: * Returns the resulting document tree or NULL in case of failure
13687: */
13688:
13689: xmlDocPtr
13690: xmlRecoverDoc(const xmlChar *cur) {
13691: return(xmlSAXParseDoc(NULL, cur, 1));
13692: }
13693:
13694: /**
13695: * xmlParseFile:
13696: * @filename: the filename
13697: *
13698: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13699: * compressed document is provided by default if found at compile-time.
13700: *
13701: * Returns the resulting document tree if the file was wellformed,
13702: * NULL otherwise.
13703: */
13704:
13705: xmlDocPtr
13706: xmlParseFile(const char *filename) {
13707: return(xmlSAXParseFile(NULL, filename, 0));
13708: }
13709:
13710: /**
13711: * xmlRecoverFile:
13712: * @filename: the filename
13713: *
13714: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13715: * compressed document is provided by default if found at compile-time.
13716: * In the case the document is not Well Formed, it attempts to build
13717: * a tree anyway
13718: *
13719: * Returns the resulting document tree or NULL in case of failure
13720: */
13721:
13722: xmlDocPtr
13723: xmlRecoverFile(const char *filename) {
13724: return(xmlSAXParseFile(NULL, filename, 1));
13725: }
13726:
13727:
13728: /**
13729: * xmlSetupParserForBuffer:
13730: * @ctxt: an XML parser context
13731: * @buffer: a xmlChar * buffer
13732: * @filename: a file name
13733: *
13734: * Setup the parser context to parse a new buffer; Clears any prior
13735: * contents from the parser context. The buffer parameter must not be
13736: * NULL, but the filename parameter can be
13737: */
13738: void
13739: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13740: const char* filename)
13741: {
13742: xmlParserInputPtr input;
13743:
13744: if ((ctxt == NULL) || (buffer == NULL))
13745: return;
13746:
13747: input = xmlNewInputStream(ctxt);
13748: if (input == NULL) {
13749: xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13750: xmlClearParserCtxt(ctxt);
13751: return;
13752: }
13753:
13754: xmlClearParserCtxt(ctxt);
13755: if (filename != NULL)
13756: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13757: input->base = buffer;
13758: input->cur = buffer;
13759: input->end = &buffer[xmlStrlen(buffer)];
13760: inputPush(ctxt, input);
13761: }
13762:
13763: /**
13764: * xmlSAXUserParseFile:
13765: * @sax: a SAX handler
13766: * @user_data: The user data returned on SAX callbacks
13767: * @filename: a file name
13768: *
13769: * parse an XML file and call the given SAX handler routines.
13770: * Automatic support for ZLIB/Compress compressed document is provided
13771: *
13772: * Returns 0 in case of success or a error number otherwise
13773: */
13774: int
13775: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13776: const char *filename) {
13777: int ret = 0;
13778: xmlParserCtxtPtr ctxt;
13779:
13780: ctxt = xmlCreateFileParserCtxt(filename);
13781: if (ctxt == NULL) return -1;
13782: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13783: xmlFree(ctxt->sax);
13784: ctxt->sax = sax;
13785: xmlDetectSAX2(ctxt);
13786:
13787: if (user_data != NULL)
13788: ctxt->userData = user_data;
13789:
13790: xmlParseDocument(ctxt);
13791:
13792: if (ctxt->wellFormed)
13793: ret = 0;
13794: else {
13795: if (ctxt->errNo != 0)
13796: ret = ctxt->errNo;
13797: else
13798: ret = -1;
13799: }
13800: if (sax != NULL)
13801: ctxt->sax = NULL;
13802: if (ctxt->myDoc != NULL) {
13803: xmlFreeDoc(ctxt->myDoc);
13804: ctxt->myDoc = NULL;
13805: }
13806: xmlFreeParserCtxt(ctxt);
13807:
13808: return ret;
13809: }
13810: #endif /* LIBXML_SAX1_ENABLED */
13811:
13812: /************************************************************************
13813: * *
13814: * Front ends when parsing from memory *
13815: * *
13816: ************************************************************************/
13817:
13818: /**
13819: * xmlCreateMemoryParserCtxt:
13820: * @buffer: a pointer to a char array
13821: * @size: the size of the array
13822: *
13823: * Create a parser context for an XML in-memory document.
13824: *
13825: * Returns the new parser context or NULL
13826: */
13827: xmlParserCtxtPtr
13828: xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13829: xmlParserCtxtPtr ctxt;
13830: xmlParserInputPtr input;
13831: xmlParserInputBufferPtr buf;
13832:
13833: if (buffer == NULL)
13834: return(NULL);
13835: if (size <= 0)
13836: return(NULL);
13837:
13838: ctxt = xmlNewParserCtxt();
13839: if (ctxt == NULL)
13840: return(NULL);
13841:
13842: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13843: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13844: if (buf == NULL) {
13845: xmlFreeParserCtxt(ctxt);
13846: return(NULL);
13847: }
13848:
13849: input = xmlNewInputStream(ctxt);
13850: if (input == NULL) {
13851: xmlFreeParserInputBuffer(buf);
13852: xmlFreeParserCtxt(ctxt);
13853: return(NULL);
13854: }
13855:
13856: input->filename = NULL;
13857: input->buf = buf;
13858: input->base = input->buf->buffer->content;
13859: input->cur = input->buf->buffer->content;
13860: input->end = &input->buf->buffer->content[input->buf->buffer->use];
13861:
13862: inputPush(ctxt, input);
13863: return(ctxt);
13864: }
13865:
13866: #ifdef LIBXML_SAX1_ENABLED
13867: /**
13868: * xmlSAXParseMemoryWithData:
13869: * @sax: the SAX handler block
13870: * @buffer: an pointer to a char array
13871: * @size: the size of the array
13872: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13873: * documents
13874: * @data: the userdata
13875: *
13876: * parse an XML in-memory block and use the given SAX function block
13877: * to handle the parsing callback. If sax is NULL, fallback to the default
13878: * DOM tree building routines.
13879: *
13880: * User data (void *) is stored within the parser context in the
13881: * context's _private member, so it is available nearly everywhere in libxml
13882: *
13883: * Returns the resulting document tree
13884: */
13885:
13886: xmlDocPtr
13887: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13888: int size, int recovery, void *data) {
13889: xmlDocPtr ret;
13890: xmlParserCtxtPtr ctxt;
13891:
13892: xmlInitParser();
13893:
13894: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13895: if (ctxt == NULL) return(NULL);
13896: if (sax != NULL) {
13897: if (ctxt->sax != NULL)
13898: xmlFree(ctxt->sax);
13899: ctxt->sax = sax;
13900: }
13901: xmlDetectSAX2(ctxt);
13902: if (data!=NULL) {
13903: ctxt->_private=data;
13904: }
13905:
13906: ctxt->recovery = recovery;
13907:
13908: xmlParseDocument(ctxt);
13909:
13910: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13911: else {
13912: ret = NULL;
13913: xmlFreeDoc(ctxt->myDoc);
13914: ctxt->myDoc = NULL;
13915: }
13916: if (sax != NULL)
13917: ctxt->sax = NULL;
13918: xmlFreeParserCtxt(ctxt);
13919:
13920: return(ret);
13921: }
13922:
13923: /**
13924: * xmlSAXParseMemory:
13925: * @sax: the SAX handler block
13926: * @buffer: an pointer to a char array
13927: * @size: the size of the array
13928: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13929: * documents
13930: *
13931: * parse an XML in-memory block and use the given SAX function block
13932: * to handle the parsing callback. If sax is NULL, fallback to the default
13933: * DOM tree building routines.
13934: *
13935: * Returns the resulting document tree
13936: */
13937: xmlDocPtr
13938: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13939: int size, int recovery) {
13940: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13941: }
13942:
13943: /**
13944: * xmlParseMemory:
13945: * @buffer: an pointer to a char array
13946: * @size: the size of the array
13947: *
13948: * parse an XML in-memory block and build a tree.
13949: *
13950: * Returns the resulting document tree
13951: */
13952:
13953: xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13954: return(xmlSAXParseMemory(NULL, buffer, size, 0));
13955: }
13956:
13957: /**
13958: * xmlRecoverMemory:
13959: * @buffer: an pointer to a char array
13960: * @size: the size of the array
13961: *
13962: * parse an XML in-memory block and build a tree.
13963: * In the case the document is not Well Formed, an attempt to
13964: * build a tree is tried anyway
13965: *
13966: * Returns the resulting document tree or NULL in case of error
13967: */
13968:
13969: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13970: return(xmlSAXParseMemory(NULL, buffer, size, 1));
13971: }
13972:
13973: /**
13974: * xmlSAXUserParseMemory:
13975: * @sax: a SAX handler
13976: * @user_data: The user data returned on SAX callbacks
13977: * @buffer: an in-memory XML document input
13978: * @size: the length of the XML document in bytes
13979: *
13980: * A better SAX parsing routine.
13981: * parse an XML in-memory buffer and call the given SAX handler routines.
13982: *
13983: * Returns 0 in case of success or a error number otherwise
13984: */
13985: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13986: const char *buffer, int size) {
13987: int ret = 0;
13988: xmlParserCtxtPtr ctxt;
13989:
13990: xmlInitParser();
13991:
13992: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13993: if (ctxt == NULL) return -1;
13994: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13995: xmlFree(ctxt->sax);
13996: ctxt->sax = sax;
13997: xmlDetectSAX2(ctxt);
13998:
13999: if (user_data != NULL)
14000: ctxt->userData = user_data;
14001:
14002: xmlParseDocument(ctxt);
14003:
14004: if (ctxt->wellFormed)
14005: ret = 0;
14006: else {
14007: if (ctxt->errNo != 0)
14008: ret = ctxt->errNo;
14009: else
14010: ret = -1;
14011: }
14012: if (sax != NULL)
14013: ctxt->sax = NULL;
14014: if (ctxt->myDoc != NULL) {
14015: xmlFreeDoc(ctxt->myDoc);
14016: ctxt->myDoc = NULL;
14017: }
14018: xmlFreeParserCtxt(ctxt);
14019:
14020: return ret;
14021: }
14022: #endif /* LIBXML_SAX1_ENABLED */
14023:
14024: /**
14025: * xmlCreateDocParserCtxt:
14026: * @cur: a pointer to an array of xmlChar
14027: *
14028: * Creates a parser context for an XML in-memory document.
14029: *
14030: * Returns the new parser context or NULL
14031: */
14032: xmlParserCtxtPtr
14033: xmlCreateDocParserCtxt(const xmlChar *cur) {
14034: int len;
14035:
14036: if (cur == NULL)
14037: return(NULL);
14038: len = xmlStrlen(cur);
14039: return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14040: }
14041:
14042: #ifdef LIBXML_SAX1_ENABLED
14043: /**
14044: * xmlSAXParseDoc:
14045: * @sax: the SAX handler block
14046: * @cur: a pointer to an array of xmlChar
14047: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14048: * documents
14049: *
14050: * parse an XML in-memory document and build a tree.
14051: * It use the given SAX function block to handle the parsing callback.
14052: * If sax is NULL, fallback to the default DOM tree building routines.
14053: *
14054: * Returns the resulting document tree
14055: */
14056:
14057: xmlDocPtr
14058: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14059: xmlDocPtr ret;
14060: xmlParserCtxtPtr ctxt;
14061: xmlSAXHandlerPtr oldsax = NULL;
14062:
14063: if (cur == NULL) return(NULL);
14064:
14065:
14066: ctxt = xmlCreateDocParserCtxt(cur);
14067: if (ctxt == NULL) return(NULL);
14068: if (sax != NULL) {
14069: oldsax = ctxt->sax;
14070: ctxt->sax = sax;
14071: ctxt->userData = NULL;
14072: }
14073: xmlDetectSAX2(ctxt);
14074:
14075: xmlParseDocument(ctxt);
14076: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14077: else {
14078: ret = NULL;
14079: xmlFreeDoc(ctxt->myDoc);
14080: ctxt->myDoc = NULL;
14081: }
14082: if (sax != NULL)
14083: ctxt->sax = oldsax;
14084: xmlFreeParserCtxt(ctxt);
14085:
14086: return(ret);
14087: }
14088:
14089: /**
14090: * xmlParseDoc:
14091: * @cur: a pointer to an array of xmlChar
14092: *
14093: * parse an XML in-memory document and build a tree.
14094: *
14095: * Returns the resulting document tree
14096: */
14097:
14098: xmlDocPtr
14099: xmlParseDoc(const xmlChar *cur) {
14100: return(xmlSAXParseDoc(NULL, cur, 0));
14101: }
14102: #endif /* LIBXML_SAX1_ENABLED */
14103:
14104: #ifdef LIBXML_LEGACY_ENABLED
14105: /************************************************************************
14106: * *
14107: * Specific function to keep track of entities references *
14108: * and used by the XSLT debugger *
14109: * *
14110: ************************************************************************/
14111:
14112: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14113:
14114: /**
14115: * xmlAddEntityReference:
14116: * @ent : A valid entity
14117: * @firstNode : A valid first node for children of entity
14118: * @lastNode : A valid last node of children entity
14119: *
14120: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14121: */
14122: static void
14123: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14124: xmlNodePtr lastNode)
14125: {
14126: if (xmlEntityRefFunc != NULL) {
14127: (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14128: }
14129: }
14130:
14131:
14132: /**
14133: * xmlSetEntityReferenceFunc:
14134: * @func: A valid function
14135: *
14136: * Set the function to call call back when a xml reference has been made
14137: */
14138: void
14139: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14140: {
14141: xmlEntityRefFunc = func;
14142: }
14143: #endif /* LIBXML_LEGACY_ENABLED */
14144:
14145: /************************************************************************
14146: * *
14147: * Miscellaneous *
14148: * *
14149: ************************************************************************/
14150:
14151: #ifdef LIBXML_XPATH_ENABLED
14152: #include <libxml/xpath.h>
14153: #endif
14154:
14155: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14156: static int xmlParserInitialized = 0;
14157:
14158: /**
14159: * xmlInitParser:
14160: *
14161: * Initialization function for the XML parser.
14162: * This is not reentrant. Call once before processing in case of
14163: * use in multithreaded programs.
14164: */
14165:
14166: void
14167: xmlInitParser(void) {
14168: if (xmlParserInitialized != 0)
14169: return;
14170:
14171: #ifdef LIBXML_THREAD_ENABLED
14172: __xmlGlobalInitMutexLock();
14173: if (xmlParserInitialized == 0) {
14174: #endif
14175: xmlInitThreads();
14176: xmlInitGlobals();
14177: if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14178: (xmlGenericError == NULL))
14179: initGenericErrorDefaultFunc(NULL);
14180: xmlInitMemory();
1.1.1.2 ! misho 14181: xmlInitializeDict();
1.1 misho 14182: xmlInitCharEncodingHandlers();
14183: xmlDefaultSAXHandlerInit();
14184: xmlRegisterDefaultInputCallbacks();
14185: #ifdef LIBXML_OUTPUT_ENABLED
14186: xmlRegisterDefaultOutputCallbacks();
14187: #endif /* LIBXML_OUTPUT_ENABLED */
14188: #ifdef LIBXML_HTML_ENABLED
14189: htmlInitAutoClose();
14190: htmlDefaultSAXHandlerInit();
14191: #endif
14192: #ifdef LIBXML_XPATH_ENABLED
14193: xmlXPathInit();
14194: #endif
14195: xmlParserInitialized = 1;
14196: #ifdef LIBXML_THREAD_ENABLED
14197: }
14198: __xmlGlobalInitMutexUnlock();
14199: #endif
14200: }
14201:
14202: /**
14203: * xmlCleanupParser:
14204: *
14205: * This function name is somewhat misleading. It does not clean up
14206: * parser state, it cleans up memory allocated by the library itself.
14207: * It is a cleanup function for the XML library. It tries to reclaim all
14208: * related global memory allocated for the library processing.
14209: * It doesn't deallocate any document related memory. One should
14210: * call xmlCleanupParser() only when the process has finished using
14211: * the library and all XML/HTML documents built with it.
14212: * See also xmlInitParser() which has the opposite function of preparing
14213: * the library for operations.
14214: *
14215: * WARNING: if your application is multithreaded or has plugin support
14216: * calling this may crash the application if another thread or
14217: * a plugin is still using libxml2. It's sometimes very hard to
14218: * guess if libxml2 is in use in the application, some libraries
14219: * or plugins may use it without notice. In case of doubt abstain
14220: * from calling this function or do it just before calling exit()
14221: * to avoid leak reports from valgrind !
14222: */
14223:
14224: void
14225: xmlCleanupParser(void) {
14226: if (!xmlParserInitialized)
14227: return;
14228:
14229: xmlCleanupCharEncodingHandlers();
14230: #ifdef LIBXML_CATALOG_ENABLED
14231: xmlCatalogCleanup();
14232: #endif
14233: xmlDictCleanup();
14234: xmlCleanupInputCallbacks();
14235: #ifdef LIBXML_OUTPUT_ENABLED
14236: xmlCleanupOutputCallbacks();
14237: #endif
14238: #ifdef LIBXML_SCHEMAS_ENABLED
14239: xmlSchemaCleanupTypes();
14240: xmlRelaxNGCleanupTypes();
14241: #endif
14242: xmlCleanupGlobals();
14243: xmlResetLastError();
14244: xmlCleanupThreads(); /* must be last if called not from the main thread */
14245: xmlCleanupMemory();
14246: xmlParserInitialized = 0;
14247: }
14248:
14249: /************************************************************************
14250: * *
14251: * New set (2.6.0) of simpler and more flexible APIs *
14252: * *
14253: ************************************************************************/
14254:
14255: /**
14256: * DICT_FREE:
14257: * @str: a string
14258: *
14259: * Free a string if it is not owned by the "dict" dictionnary in the
14260: * current scope
14261: */
14262: #define DICT_FREE(str) \
14263: if ((str) && ((!dict) || \
14264: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14265: xmlFree((char *)(str));
14266:
14267: /**
14268: * xmlCtxtReset:
14269: * @ctxt: an XML parser context
14270: *
14271: * Reset a parser context
14272: */
14273: void
14274: xmlCtxtReset(xmlParserCtxtPtr ctxt)
14275: {
14276: xmlParserInputPtr input;
14277: xmlDictPtr dict;
14278:
14279: if (ctxt == NULL)
14280: return;
14281:
14282: dict = ctxt->dict;
14283:
14284: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14285: xmlFreeInputStream(input);
14286: }
14287: ctxt->inputNr = 0;
14288: ctxt->input = NULL;
14289:
14290: ctxt->spaceNr = 0;
14291: if (ctxt->spaceTab != NULL) {
14292: ctxt->spaceTab[0] = -1;
14293: ctxt->space = &ctxt->spaceTab[0];
14294: } else {
14295: ctxt->space = NULL;
14296: }
14297:
14298:
14299: ctxt->nodeNr = 0;
14300: ctxt->node = NULL;
14301:
14302: ctxt->nameNr = 0;
14303: ctxt->name = NULL;
14304:
14305: DICT_FREE(ctxt->version);
14306: ctxt->version = NULL;
14307: DICT_FREE(ctxt->encoding);
14308: ctxt->encoding = NULL;
14309: DICT_FREE(ctxt->directory);
14310: ctxt->directory = NULL;
14311: DICT_FREE(ctxt->extSubURI);
14312: ctxt->extSubURI = NULL;
14313: DICT_FREE(ctxt->extSubSystem);
14314: ctxt->extSubSystem = NULL;
14315: if (ctxt->myDoc != NULL)
14316: xmlFreeDoc(ctxt->myDoc);
14317: ctxt->myDoc = NULL;
14318:
14319: ctxt->standalone = -1;
14320: ctxt->hasExternalSubset = 0;
14321: ctxt->hasPErefs = 0;
14322: ctxt->html = 0;
14323: ctxt->external = 0;
14324: ctxt->instate = XML_PARSER_START;
14325: ctxt->token = 0;
14326:
14327: ctxt->wellFormed = 1;
14328: ctxt->nsWellFormed = 1;
14329: ctxt->disableSAX = 0;
14330: ctxt->valid = 1;
14331: #if 0
14332: ctxt->vctxt.userData = ctxt;
14333: ctxt->vctxt.error = xmlParserValidityError;
14334: ctxt->vctxt.warning = xmlParserValidityWarning;
14335: #endif
14336: ctxt->record_info = 0;
14337: ctxt->nbChars = 0;
14338: ctxt->checkIndex = 0;
14339: ctxt->inSubset = 0;
14340: ctxt->errNo = XML_ERR_OK;
14341: ctxt->depth = 0;
14342: ctxt->charset = XML_CHAR_ENCODING_UTF8;
14343: ctxt->catalogs = NULL;
14344: ctxt->nbentities = 0;
14345: ctxt->sizeentities = 0;
14346: xmlInitNodeInfoSeq(&ctxt->node_seq);
14347:
14348: if (ctxt->attsDefault != NULL) {
14349: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14350: ctxt->attsDefault = NULL;
14351: }
14352: if (ctxt->attsSpecial != NULL) {
14353: xmlHashFree(ctxt->attsSpecial, NULL);
14354: ctxt->attsSpecial = NULL;
14355: }
14356:
14357: #ifdef LIBXML_CATALOG_ENABLED
14358: if (ctxt->catalogs != NULL)
14359: xmlCatalogFreeLocal(ctxt->catalogs);
14360: #endif
14361: if (ctxt->lastError.code != XML_ERR_OK)
14362: xmlResetError(&ctxt->lastError);
14363: }
14364:
14365: /**
14366: * xmlCtxtResetPush:
14367: * @ctxt: an XML parser context
14368: * @chunk: a pointer to an array of chars
14369: * @size: number of chars in the array
14370: * @filename: an optional file name or URI
14371: * @encoding: the document encoding, or NULL
14372: *
14373: * Reset a push parser context
14374: *
14375: * Returns 0 in case of success and 1 in case of error
14376: */
14377: int
14378: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14379: int size, const char *filename, const char *encoding)
14380: {
14381: xmlParserInputPtr inputStream;
14382: xmlParserInputBufferPtr buf;
14383: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14384:
14385: if (ctxt == NULL)
14386: return(1);
14387:
14388: if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14389: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14390:
14391: buf = xmlAllocParserInputBuffer(enc);
14392: if (buf == NULL)
14393: return(1);
14394:
14395: if (ctxt == NULL) {
14396: xmlFreeParserInputBuffer(buf);
14397: return(1);
14398: }
14399:
14400: xmlCtxtReset(ctxt);
14401:
14402: if (ctxt->pushTab == NULL) {
14403: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14404: sizeof(xmlChar *));
14405: if (ctxt->pushTab == NULL) {
14406: xmlErrMemory(ctxt, NULL);
14407: xmlFreeParserInputBuffer(buf);
14408: return(1);
14409: }
14410: }
14411:
14412: if (filename == NULL) {
14413: ctxt->directory = NULL;
14414: } else {
14415: ctxt->directory = xmlParserGetDirectory(filename);
14416: }
14417:
14418: inputStream = xmlNewInputStream(ctxt);
14419: if (inputStream == NULL) {
14420: xmlFreeParserInputBuffer(buf);
14421: return(1);
14422: }
14423:
14424: if (filename == NULL)
14425: inputStream->filename = NULL;
14426: else
14427: inputStream->filename = (char *)
14428: xmlCanonicPath((const xmlChar *) filename);
14429: inputStream->buf = buf;
14430: inputStream->base = inputStream->buf->buffer->content;
14431: inputStream->cur = inputStream->buf->buffer->content;
14432: inputStream->end =
14433: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14434:
14435: inputPush(ctxt, inputStream);
14436:
14437: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14438: (ctxt->input->buf != NULL)) {
14439: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14440: int cur = ctxt->input->cur - ctxt->input->base;
14441:
14442: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14443:
14444: ctxt->input->base = ctxt->input->buf->buffer->content + base;
14445: ctxt->input->cur = ctxt->input->base + cur;
14446: ctxt->input->end =
14447: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14448: use];
14449: #ifdef DEBUG_PUSH
14450: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14451: #endif
14452: }
14453:
14454: if (encoding != NULL) {
14455: xmlCharEncodingHandlerPtr hdlr;
14456:
14457: if (ctxt->encoding != NULL)
14458: xmlFree((xmlChar *) ctxt->encoding);
14459: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14460:
14461: hdlr = xmlFindCharEncodingHandler(encoding);
14462: if (hdlr != NULL) {
14463: xmlSwitchToEncoding(ctxt, hdlr);
14464: } else {
14465: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14466: "Unsupported encoding %s\n", BAD_CAST encoding);
14467: }
14468: } else if (enc != XML_CHAR_ENCODING_NONE) {
14469: xmlSwitchEncoding(ctxt, enc);
14470: }
14471:
14472: return(0);
14473: }
14474:
14475:
14476: /**
14477: * xmlCtxtUseOptionsInternal:
14478: * @ctxt: an XML parser context
14479: * @options: a combination of xmlParserOption
14480: * @encoding: the user provided encoding to use
14481: *
14482: * Applies the options to the parser context
14483: *
14484: * Returns 0 in case of success, the set of unknown or unimplemented options
14485: * in case of error.
14486: */
14487: static int
14488: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14489: {
14490: if (ctxt == NULL)
14491: return(-1);
14492: if (encoding != NULL) {
14493: if (ctxt->encoding != NULL)
14494: xmlFree((xmlChar *) ctxt->encoding);
14495: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14496: }
14497: if (options & XML_PARSE_RECOVER) {
14498: ctxt->recovery = 1;
14499: options -= XML_PARSE_RECOVER;
14500: ctxt->options |= XML_PARSE_RECOVER;
14501: } else
14502: ctxt->recovery = 0;
14503: if (options & XML_PARSE_DTDLOAD) {
14504: ctxt->loadsubset = XML_DETECT_IDS;
14505: options -= XML_PARSE_DTDLOAD;
14506: ctxt->options |= XML_PARSE_DTDLOAD;
14507: } else
14508: ctxt->loadsubset = 0;
14509: if (options & XML_PARSE_DTDATTR) {
14510: ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14511: options -= XML_PARSE_DTDATTR;
14512: ctxt->options |= XML_PARSE_DTDATTR;
14513: }
14514: if (options & XML_PARSE_NOENT) {
14515: ctxt->replaceEntities = 1;
14516: /* ctxt->loadsubset |= XML_DETECT_IDS; */
14517: options -= XML_PARSE_NOENT;
14518: ctxt->options |= XML_PARSE_NOENT;
14519: } else
14520: ctxt->replaceEntities = 0;
14521: if (options & XML_PARSE_PEDANTIC) {
14522: ctxt->pedantic = 1;
14523: options -= XML_PARSE_PEDANTIC;
14524: ctxt->options |= XML_PARSE_PEDANTIC;
14525: } else
14526: ctxt->pedantic = 0;
14527: if (options & XML_PARSE_NOBLANKS) {
14528: ctxt->keepBlanks = 0;
14529: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14530: options -= XML_PARSE_NOBLANKS;
14531: ctxt->options |= XML_PARSE_NOBLANKS;
14532: } else
14533: ctxt->keepBlanks = 1;
14534: if (options & XML_PARSE_DTDVALID) {
14535: ctxt->validate = 1;
14536: if (options & XML_PARSE_NOWARNING)
14537: ctxt->vctxt.warning = NULL;
14538: if (options & XML_PARSE_NOERROR)
14539: ctxt->vctxt.error = NULL;
14540: options -= XML_PARSE_DTDVALID;
14541: ctxt->options |= XML_PARSE_DTDVALID;
14542: } else
14543: ctxt->validate = 0;
14544: if (options & XML_PARSE_NOWARNING) {
14545: ctxt->sax->warning = NULL;
14546: options -= XML_PARSE_NOWARNING;
14547: }
14548: if (options & XML_PARSE_NOERROR) {
14549: ctxt->sax->error = NULL;
14550: ctxt->sax->fatalError = NULL;
14551: options -= XML_PARSE_NOERROR;
14552: }
14553: #ifdef LIBXML_SAX1_ENABLED
14554: if (options & XML_PARSE_SAX1) {
14555: ctxt->sax->startElement = xmlSAX2StartElement;
14556: ctxt->sax->endElement = xmlSAX2EndElement;
14557: ctxt->sax->startElementNs = NULL;
14558: ctxt->sax->endElementNs = NULL;
14559: ctxt->sax->initialized = 1;
14560: options -= XML_PARSE_SAX1;
14561: ctxt->options |= XML_PARSE_SAX1;
14562: }
14563: #endif /* LIBXML_SAX1_ENABLED */
14564: if (options & XML_PARSE_NODICT) {
14565: ctxt->dictNames = 0;
14566: options -= XML_PARSE_NODICT;
14567: ctxt->options |= XML_PARSE_NODICT;
14568: } else {
14569: ctxt->dictNames = 1;
14570: }
14571: if (options & XML_PARSE_NOCDATA) {
14572: ctxt->sax->cdataBlock = NULL;
14573: options -= XML_PARSE_NOCDATA;
14574: ctxt->options |= XML_PARSE_NOCDATA;
14575: }
14576: if (options & XML_PARSE_NSCLEAN) {
14577: ctxt->options |= XML_PARSE_NSCLEAN;
14578: options -= XML_PARSE_NSCLEAN;
14579: }
14580: if (options & XML_PARSE_NONET) {
14581: ctxt->options |= XML_PARSE_NONET;
14582: options -= XML_PARSE_NONET;
14583: }
14584: if (options & XML_PARSE_COMPACT) {
14585: ctxt->options |= XML_PARSE_COMPACT;
14586: options -= XML_PARSE_COMPACT;
14587: }
14588: if (options & XML_PARSE_OLD10) {
14589: ctxt->options |= XML_PARSE_OLD10;
14590: options -= XML_PARSE_OLD10;
14591: }
14592: if (options & XML_PARSE_NOBASEFIX) {
14593: ctxt->options |= XML_PARSE_NOBASEFIX;
14594: options -= XML_PARSE_NOBASEFIX;
14595: }
14596: if (options & XML_PARSE_HUGE) {
14597: ctxt->options |= XML_PARSE_HUGE;
14598: options -= XML_PARSE_HUGE;
14599: }
14600: if (options & XML_PARSE_OLDSAX) {
14601: ctxt->options |= XML_PARSE_OLDSAX;
14602: options -= XML_PARSE_OLDSAX;
14603: }
1.1.1.2 ! misho 14604: if (options & XML_PARSE_IGNORE_ENC) {
! 14605: ctxt->options |= XML_PARSE_IGNORE_ENC;
! 14606: options -= XML_PARSE_IGNORE_ENC;
! 14607: }
1.1 misho 14608: ctxt->linenumbers = 1;
14609: return (options);
14610: }
14611:
14612: /**
14613: * xmlCtxtUseOptions:
14614: * @ctxt: an XML parser context
14615: * @options: a combination of xmlParserOption
14616: *
14617: * Applies the options to the parser context
14618: *
14619: * Returns 0 in case of success, the set of unknown or unimplemented options
14620: * in case of error.
14621: */
14622: int
14623: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14624: {
14625: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14626: }
14627:
14628: /**
14629: * xmlDoRead:
14630: * @ctxt: an XML parser context
14631: * @URL: the base URL to use for the document
14632: * @encoding: the document encoding, or NULL
14633: * @options: a combination of xmlParserOption
14634: * @reuse: keep the context for reuse
14635: *
14636: * Common front-end for the xmlRead functions
14637: *
14638: * Returns the resulting document tree or NULL
14639: */
14640: static xmlDocPtr
14641: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14642: int options, int reuse)
14643: {
14644: xmlDocPtr ret;
14645:
14646: xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14647: if (encoding != NULL) {
14648: xmlCharEncodingHandlerPtr hdlr;
14649:
14650: hdlr = xmlFindCharEncodingHandler(encoding);
14651: if (hdlr != NULL)
14652: xmlSwitchToEncoding(ctxt, hdlr);
14653: }
14654: if ((URL != NULL) && (ctxt->input != NULL) &&
14655: (ctxt->input->filename == NULL))
14656: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14657: xmlParseDocument(ctxt);
14658: if ((ctxt->wellFormed) || ctxt->recovery)
14659: ret = ctxt->myDoc;
14660: else {
14661: ret = NULL;
14662: if (ctxt->myDoc != NULL) {
14663: xmlFreeDoc(ctxt->myDoc);
14664: }
14665: }
14666: ctxt->myDoc = NULL;
14667: if (!reuse) {
14668: xmlFreeParserCtxt(ctxt);
14669: }
14670:
14671: return (ret);
14672: }
14673:
14674: /**
14675: * xmlReadDoc:
14676: * @cur: a pointer to a zero terminated string
14677: * @URL: the base URL to use for the document
14678: * @encoding: the document encoding, or NULL
14679: * @options: a combination of xmlParserOption
14680: *
14681: * parse an XML in-memory document and build a tree.
14682: *
14683: * Returns the resulting document tree
14684: */
14685: xmlDocPtr
14686: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14687: {
14688: xmlParserCtxtPtr ctxt;
14689:
14690: if (cur == NULL)
14691: return (NULL);
14692:
14693: ctxt = xmlCreateDocParserCtxt(cur);
14694: if (ctxt == NULL)
14695: return (NULL);
14696: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14697: }
14698:
14699: /**
14700: * xmlReadFile:
14701: * @filename: a file or URL
14702: * @encoding: the document encoding, or NULL
14703: * @options: a combination of xmlParserOption
14704: *
14705: * parse an XML file from the filesystem or the network.
14706: *
14707: * Returns the resulting document tree
14708: */
14709: xmlDocPtr
14710: xmlReadFile(const char *filename, const char *encoding, int options)
14711: {
14712: xmlParserCtxtPtr ctxt;
14713:
14714: ctxt = xmlCreateURLParserCtxt(filename, options);
14715: if (ctxt == NULL)
14716: return (NULL);
14717: return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14718: }
14719:
14720: /**
14721: * xmlReadMemory:
14722: * @buffer: a pointer to a char array
14723: * @size: the size of the array
14724: * @URL: the base URL to use for the document
14725: * @encoding: the document encoding, or NULL
14726: * @options: a combination of xmlParserOption
14727: *
14728: * parse an XML in-memory document and build a tree.
14729: *
14730: * Returns the resulting document tree
14731: */
14732: xmlDocPtr
14733: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14734: {
14735: xmlParserCtxtPtr ctxt;
14736:
14737: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14738: if (ctxt == NULL)
14739: return (NULL);
14740: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14741: }
14742:
14743: /**
14744: * xmlReadFd:
14745: * @fd: an open file descriptor
14746: * @URL: the base URL to use for the document
14747: * @encoding: the document encoding, or NULL
14748: * @options: a combination of xmlParserOption
14749: *
14750: * parse an XML from a file descriptor and build a tree.
14751: * NOTE that the file descriptor will not be closed when the
14752: * reader is closed or reset.
14753: *
14754: * Returns the resulting document tree
14755: */
14756: xmlDocPtr
14757: xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14758: {
14759: xmlParserCtxtPtr ctxt;
14760: xmlParserInputBufferPtr input;
14761: xmlParserInputPtr stream;
14762:
14763: if (fd < 0)
14764: return (NULL);
14765:
14766: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14767: if (input == NULL)
14768: return (NULL);
14769: input->closecallback = NULL;
14770: ctxt = xmlNewParserCtxt();
14771: if (ctxt == NULL) {
14772: xmlFreeParserInputBuffer(input);
14773: return (NULL);
14774: }
14775: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14776: if (stream == NULL) {
14777: xmlFreeParserInputBuffer(input);
14778: xmlFreeParserCtxt(ctxt);
14779: return (NULL);
14780: }
14781: inputPush(ctxt, stream);
14782: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14783: }
14784:
14785: /**
14786: * xmlReadIO:
14787: * @ioread: an I/O read function
14788: * @ioclose: an I/O close function
14789: * @ioctx: an I/O handler
14790: * @URL: the base URL to use for the document
14791: * @encoding: the document encoding, or NULL
14792: * @options: a combination of xmlParserOption
14793: *
14794: * parse an XML document from I/O functions and source and build a tree.
1.1.1.2 ! misho 14795: *
1.1 misho 14796: * Returns the resulting document tree
14797: */
14798: xmlDocPtr
14799: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14800: void *ioctx, const char *URL, const char *encoding, int options)
14801: {
14802: xmlParserCtxtPtr ctxt;
14803: xmlParserInputBufferPtr input;
14804: xmlParserInputPtr stream;
14805:
14806: if (ioread == NULL)
14807: return (NULL);
14808:
14809: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14810: XML_CHAR_ENCODING_NONE);
1.1.1.2 ! misho 14811: if (input == NULL) {
! 14812: if (ioclose != NULL)
! 14813: ioclose(ioctx);
1.1 misho 14814: return (NULL);
1.1.1.2 ! misho 14815: }
1.1 misho 14816: ctxt = xmlNewParserCtxt();
14817: if (ctxt == NULL) {
14818: xmlFreeParserInputBuffer(input);
14819: return (NULL);
14820: }
14821: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14822: if (stream == NULL) {
14823: xmlFreeParserInputBuffer(input);
14824: xmlFreeParserCtxt(ctxt);
14825: return (NULL);
14826: }
14827: inputPush(ctxt, stream);
14828: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14829: }
14830:
14831: /**
14832: * xmlCtxtReadDoc:
14833: * @ctxt: an XML parser context
14834: * @cur: a pointer to a zero terminated string
14835: * @URL: the base URL to use for the document
14836: * @encoding: the document encoding, or NULL
14837: * @options: a combination of xmlParserOption
14838: *
14839: * parse an XML in-memory document and build a tree.
14840: * This reuses the existing @ctxt parser context
1.1.1.2 ! misho 14841: *
1.1 misho 14842: * Returns the resulting document tree
14843: */
14844: xmlDocPtr
14845: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14846: const char *URL, const char *encoding, int options)
14847: {
14848: xmlParserInputPtr stream;
14849:
14850: if (cur == NULL)
14851: return (NULL);
14852: if (ctxt == NULL)
14853: return (NULL);
14854:
14855: xmlCtxtReset(ctxt);
14856:
14857: stream = xmlNewStringInputStream(ctxt, cur);
14858: if (stream == NULL) {
14859: return (NULL);
14860: }
14861: inputPush(ctxt, stream);
14862: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14863: }
14864:
14865: /**
14866: * xmlCtxtReadFile:
14867: * @ctxt: an XML parser context
14868: * @filename: a file or URL
14869: * @encoding: the document encoding, or NULL
14870: * @options: a combination of xmlParserOption
14871: *
14872: * parse an XML file from the filesystem or the network.
14873: * This reuses the existing @ctxt parser context
14874: *
14875: * Returns the resulting document tree
14876: */
14877: xmlDocPtr
14878: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14879: const char *encoding, int options)
14880: {
14881: xmlParserInputPtr stream;
14882:
14883: if (filename == NULL)
14884: return (NULL);
14885: if (ctxt == NULL)
14886: return (NULL);
14887:
14888: xmlCtxtReset(ctxt);
14889:
14890: stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14891: if (stream == NULL) {
14892: return (NULL);
14893: }
14894: inputPush(ctxt, stream);
14895: return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14896: }
14897:
14898: /**
14899: * xmlCtxtReadMemory:
14900: * @ctxt: an XML parser context
14901: * @buffer: a pointer to a char array
14902: * @size: the size of the array
14903: * @URL: the base URL to use for the document
14904: * @encoding: the document encoding, or NULL
14905: * @options: a combination of xmlParserOption
14906: *
14907: * parse an XML in-memory document and build a tree.
14908: * This reuses the existing @ctxt parser context
14909: *
14910: * Returns the resulting document tree
14911: */
14912: xmlDocPtr
14913: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14914: const char *URL, const char *encoding, int options)
14915: {
14916: xmlParserInputBufferPtr input;
14917: xmlParserInputPtr stream;
14918:
14919: if (ctxt == NULL)
14920: return (NULL);
14921: if (buffer == NULL)
14922: return (NULL);
14923:
14924: xmlCtxtReset(ctxt);
14925:
14926: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14927: if (input == NULL) {
14928: return(NULL);
14929: }
14930:
14931: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14932: if (stream == NULL) {
14933: xmlFreeParserInputBuffer(input);
14934: return(NULL);
14935: }
14936:
14937: inputPush(ctxt, stream);
14938: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14939: }
14940:
14941: /**
14942: * xmlCtxtReadFd:
14943: * @ctxt: an XML parser context
14944: * @fd: an open file descriptor
14945: * @URL: the base URL to use for the document
14946: * @encoding: the document encoding, or NULL
14947: * @options: a combination of xmlParserOption
14948: *
14949: * parse an XML from a file descriptor and build a tree.
14950: * This reuses the existing @ctxt parser context
14951: * NOTE that the file descriptor will not be closed when the
14952: * reader is closed or reset.
14953: *
14954: * Returns the resulting document tree
14955: */
14956: xmlDocPtr
14957: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14958: const char *URL, const char *encoding, int options)
14959: {
14960: xmlParserInputBufferPtr input;
14961: xmlParserInputPtr stream;
14962:
14963: if (fd < 0)
14964: return (NULL);
14965: if (ctxt == NULL)
14966: return (NULL);
14967:
14968: xmlCtxtReset(ctxt);
14969:
14970:
14971: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14972: if (input == NULL)
14973: return (NULL);
14974: input->closecallback = NULL;
14975: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14976: if (stream == NULL) {
14977: xmlFreeParserInputBuffer(input);
14978: return (NULL);
14979: }
14980: inputPush(ctxt, stream);
14981: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14982: }
14983:
14984: /**
14985: * xmlCtxtReadIO:
14986: * @ctxt: an XML parser context
14987: * @ioread: an I/O read function
14988: * @ioclose: an I/O close function
14989: * @ioctx: an I/O handler
14990: * @URL: the base URL to use for the document
14991: * @encoding: the document encoding, or NULL
14992: * @options: a combination of xmlParserOption
14993: *
14994: * parse an XML document from I/O functions and source and build a tree.
14995: * This reuses the existing @ctxt parser context
1.1.1.2 ! misho 14996: *
1.1 misho 14997: * Returns the resulting document tree
14998: */
14999: xmlDocPtr
15000: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15001: xmlInputCloseCallback ioclose, void *ioctx,
15002: const char *URL,
15003: const char *encoding, int options)
15004: {
15005: xmlParserInputBufferPtr input;
15006: xmlParserInputPtr stream;
15007:
15008: if (ioread == NULL)
15009: return (NULL);
15010: if (ctxt == NULL)
15011: return (NULL);
15012:
15013: xmlCtxtReset(ctxt);
15014:
15015: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15016: XML_CHAR_ENCODING_NONE);
1.1.1.2 ! misho 15017: if (input == NULL) {
! 15018: if (ioclose != NULL)
! 15019: ioclose(ioctx);
1.1 misho 15020: return (NULL);
1.1.1.2 ! misho 15021: }
1.1 misho 15022: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15023: if (stream == NULL) {
15024: xmlFreeParserInputBuffer(input);
15025: return (NULL);
15026: }
15027: inputPush(ctxt, stream);
15028: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15029: }
15030:
15031: #define bottom_parser
15032: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>