Annotation of embedaddon/libxml2/parser.c, revision 1.1.1.2.2.1
1.1 misho 1: /*
2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
4: *
5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscellaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAX callbacks or as standalone functions using a preparsed
26: * document.
27: *
28: * See Copyright for the status of this software.
29: *
30: * daniel@veillard.com
31: */
32:
33: #define IN_LIBXML
34: #include "libxml.h"
35:
36: #if defined(WIN32) && !defined (__CYGWIN__)
37: #define XML_DIR_SEP '\\'
38: #else
39: #define XML_DIR_SEP '/'
40: #endif
41:
42: #include <stdlib.h>
1.1.1.2.2.1! misho 43: #include <limits.h>
1.1 misho 44: #include <string.h>
45: #include <stdarg.h>
46: #include <libxml/xmlmemory.h>
47: #include <libxml/threads.h>
48: #include <libxml/globals.h>
49: #include <libxml/tree.h>
50: #include <libxml/parser.h>
51: #include <libxml/parserInternals.h>
52: #include <libxml/valid.h>
53: #include <libxml/entities.h>
54: #include <libxml/xmlerror.h>
55: #include <libxml/encoding.h>
56: #include <libxml/xmlIO.h>
57: #include <libxml/uri.h>
58: #ifdef LIBXML_CATALOG_ENABLED
59: #include <libxml/catalog.h>
60: #endif
61: #ifdef LIBXML_SCHEMAS_ENABLED
62: #include <libxml/xmlschemastypes.h>
63: #include <libxml/relaxng.h>
64: #endif
65: #ifdef HAVE_CTYPE_H
66: #include <ctype.h>
67: #endif
68: #ifdef HAVE_STDLIB_H
69: #include <stdlib.h>
70: #endif
71: #ifdef HAVE_SYS_STAT_H
72: #include <sys/stat.h>
73: #endif
74: #ifdef HAVE_FCNTL_H
75: #include <fcntl.h>
76: #endif
77: #ifdef HAVE_UNISTD_H
78: #include <unistd.h>
79: #endif
80: #ifdef HAVE_ZLIB_H
81: #include <zlib.h>
82: #endif
1.1.1.2 misho 83: #ifdef HAVE_LZMA_H
84: #include <lzma.h>
85: #endif
1.1 misho 86:
87: static void
88: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
89:
90: static xmlParserCtxtPtr
91: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
92: const xmlChar *base, xmlParserCtxtPtr pctx);
93:
94: /************************************************************************
95: * *
96: * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
97: * *
98: ************************************************************************/
99:
100: #define XML_PARSER_BIG_ENTITY 1000
101: #define XML_PARSER_LOT_ENTITY 5000
102:
103: /*
104: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
105: * replacement over the size in byte of the input indicates that you have
106: * and eponential behaviour. A value of 10 correspond to at least 3 entity
107: * replacement per byte of input.
108: */
109: #define XML_PARSER_NON_LINEAR 10
110:
111: /*
112: * xmlParserEntityCheck
113: *
114: * Function to check non-linear entity expansion behaviour
115: * This is here to detect and stop exponential linear entity expansion
116: * This is not a limitation of the parser but a safety
117: * boundary feature. It can be disabled with the XML_PARSE_HUGE
118: * parser option.
119: */
120: static int
1.1.1.2.2.1! misho 121: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
! 122: xmlEntityPtr ent, size_t replacement)
1.1 misho 123: {
1.1.1.2.2.1! misho 124: size_t consumed = 0;
1.1 misho 125:
126: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
127: return (0);
128: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
129: return (1);
1.1.1.2.2.1! misho 130: if (replacement != 0) {
! 131: if (replacement < XML_MAX_TEXT_LENGTH)
! 132: return(0);
! 133:
! 134: /*
! 135: * If the volume of entity copy reaches 10 times the
! 136: * amount of parsed data and over the large text threshold
! 137: * then that's very likely to be an abuse.
! 138: */
! 139: if (ctxt->input != NULL) {
! 140: consumed = ctxt->input->consumed +
! 141: (ctxt->input->cur - ctxt->input->base);
! 142: }
! 143: consumed += ctxt->sizeentities;
! 144:
! 145: if (replacement < XML_PARSER_NON_LINEAR * consumed)
! 146: return(0);
! 147: } else if (size != 0) {
1.1 misho 148: /*
149: * Do the check based on the replacement size of the entity
150: */
151: if (size < XML_PARSER_BIG_ENTITY)
152: return(0);
153:
154: /*
155: * A limit on the amount of text data reasonably used
156: */
157: if (ctxt->input != NULL) {
158: consumed = ctxt->input->consumed +
159: (ctxt->input->cur - ctxt->input->base);
160: }
161: consumed += ctxt->sizeentities;
162:
163: if ((size < XML_PARSER_NON_LINEAR * consumed) &&
164: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
165: return (0);
166: } else if (ent != NULL) {
167: /*
168: * use the number of parsed entities in the replacement
169: */
170: size = ent->checked;
171:
172: /*
173: * The amount of data parsed counting entities size only once
174: */
175: if (ctxt->input != NULL) {
176: consumed = ctxt->input->consumed +
177: (ctxt->input->cur - ctxt->input->base);
178: }
179: consumed += ctxt->sizeentities;
180:
181: /*
182: * Check the density of entities for the amount of data
183: * knowing an entity reference will take at least 3 bytes
184: */
185: if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
186: return (0);
187: } else {
188: /*
189: * strange we got no data for checking just return
190: */
191: return (0);
192: }
193: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
194: return (1);
195: }
196:
197: /**
198: * xmlParserMaxDepth:
199: *
200: * arbitrary depth limit for the XML documents that we allow to
201: * process. This is not a limitation of the parser but a safety
202: * boundary feature. It can be disabled with the XML_PARSE_HUGE
203: * parser option.
204: */
205: unsigned int xmlParserMaxDepth = 256;
206:
207:
208:
209: #define SAX2 1
210: #define XML_PARSER_BIG_BUFFER_SIZE 300
211: #define XML_PARSER_BUFFER_SIZE 100
212: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
213:
214: /*
215: * List of XML prefixed PI allowed by W3C specs
216: */
217:
218: static const char *xmlW3CPIs[] = {
219: "xml-stylesheet",
1.1.1.2 misho 220: "xml-model",
1.1 misho 221: NULL
222: };
223:
224:
225: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
226: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
227: const xmlChar **str);
228:
229: static xmlParserErrors
230: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
231: xmlSAXHandlerPtr sax,
232: void *user_data, int depth, const xmlChar *URL,
233: const xmlChar *ID, xmlNodePtr *list);
234:
235: static int
236: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
237: const char *encoding);
238: #ifdef LIBXML_LEGACY_ENABLED
239: static void
240: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
241: xmlNodePtr lastNode);
242: #endif /* LIBXML_LEGACY_ENABLED */
243:
244: static xmlParserErrors
245: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
246: const xmlChar *string, void *user_data, xmlNodePtr *lst);
247:
248: static int
249: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
250:
251: /************************************************************************
252: * *
253: * Some factorized error routines *
254: * *
255: ************************************************************************/
256:
257: /**
258: * xmlErrAttributeDup:
259: * @ctxt: an XML parser context
260: * @prefix: the attribute prefix
261: * @localname: the attribute localname
262: *
263: * Handle a redefinition of attribute error
264: */
265: static void
266: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
267: const xmlChar * localname)
268: {
269: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
270: (ctxt->instate == XML_PARSER_EOF))
271: return;
272: if (ctxt != NULL)
273: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
274:
275: if (prefix == NULL)
276: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
277: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
278: (const char *) localname, NULL, NULL, 0, 0,
279: "Attribute %s redefined\n", localname);
280: else
281: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
282: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
283: (const char *) prefix, (const char *) localname,
284: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
285: localname);
286: if (ctxt != NULL) {
287: ctxt->wellFormed = 0;
288: if (ctxt->recovery == 0)
289: ctxt->disableSAX = 1;
290: }
291: }
292:
293: /**
294: * xmlFatalErr:
295: * @ctxt: an XML parser context
296: * @error: the error number
297: * @extra: extra information string
298: *
299: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
300: */
301: static void
302: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
303: {
304: const char *errmsg;
305:
306: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
307: (ctxt->instate == XML_PARSER_EOF))
308: return;
309: switch (error) {
310: case XML_ERR_INVALID_HEX_CHARREF:
311: errmsg = "CharRef: invalid hexadecimal value\n";
312: break;
313: case XML_ERR_INVALID_DEC_CHARREF:
314: errmsg = "CharRef: invalid decimal value\n";
315: break;
316: case XML_ERR_INVALID_CHARREF:
317: errmsg = "CharRef: invalid value\n";
318: break;
319: case XML_ERR_INTERNAL_ERROR:
320: errmsg = "internal error";
321: break;
322: case XML_ERR_PEREF_AT_EOF:
323: errmsg = "PEReference at end of document\n";
324: break;
325: case XML_ERR_PEREF_IN_PROLOG:
326: errmsg = "PEReference in prolog\n";
327: break;
328: case XML_ERR_PEREF_IN_EPILOG:
329: errmsg = "PEReference in epilog\n";
330: break;
331: case XML_ERR_PEREF_NO_NAME:
332: errmsg = "PEReference: no name\n";
333: break;
334: case XML_ERR_PEREF_SEMICOL_MISSING:
335: errmsg = "PEReference: expecting ';'\n";
336: break;
337: case XML_ERR_ENTITY_LOOP:
338: errmsg = "Detected an entity reference loop\n";
339: break;
340: case XML_ERR_ENTITY_NOT_STARTED:
341: errmsg = "EntityValue: \" or ' expected\n";
342: break;
343: case XML_ERR_ENTITY_PE_INTERNAL:
344: errmsg = "PEReferences forbidden in internal subset\n";
345: break;
346: case XML_ERR_ENTITY_NOT_FINISHED:
347: errmsg = "EntityValue: \" or ' expected\n";
348: break;
349: case XML_ERR_ATTRIBUTE_NOT_STARTED:
350: errmsg = "AttValue: \" or ' expected\n";
351: break;
352: case XML_ERR_LT_IN_ATTRIBUTE:
353: errmsg = "Unescaped '<' not allowed in attributes values\n";
354: break;
355: case XML_ERR_LITERAL_NOT_STARTED:
356: errmsg = "SystemLiteral \" or ' expected\n";
357: break;
358: case XML_ERR_LITERAL_NOT_FINISHED:
359: errmsg = "Unfinished System or Public ID \" or ' expected\n";
360: break;
361: case XML_ERR_MISPLACED_CDATA_END:
362: errmsg = "Sequence ']]>' not allowed in content\n";
363: break;
364: case XML_ERR_URI_REQUIRED:
365: errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
366: break;
367: case XML_ERR_PUBID_REQUIRED:
368: errmsg = "PUBLIC, the Public Identifier is missing\n";
369: break;
370: case XML_ERR_HYPHEN_IN_COMMENT:
371: errmsg = "Comment must not contain '--' (double-hyphen)\n";
372: break;
373: case XML_ERR_PI_NOT_STARTED:
374: errmsg = "xmlParsePI : no target name\n";
375: break;
376: case XML_ERR_RESERVED_XML_NAME:
377: errmsg = "Invalid PI name\n";
378: break;
379: case XML_ERR_NOTATION_NOT_STARTED:
380: errmsg = "NOTATION: Name expected here\n";
381: break;
382: case XML_ERR_NOTATION_NOT_FINISHED:
383: errmsg = "'>' required to close NOTATION declaration\n";
384: break;
385: case XML_ERR_VALUE_REQUIRED:
386: errmsg = "Entity value required\n";
387: break;
388: case XML_ERR_URI_FRAGMENT:
389: errmsg = "Fragment not allowed";
390: break;
391: case XML_ERR_ATTLIST_NOT_STARTED:
392: errmsg = "'(' required to start ATTLIST enumeration\n";
393: break;
394: case XML_ERR_NMTOKEN_REQUIRED:
395: errmsg = "NmToken expected in ATTLIST enumeration\n";
396: break;
397: case XML_ERR_ATTLIST_NOT_FINISHED:
398: errmsg = "')' required to finish ATTLIST enumeration\n";
399: break;
400: case XML_ERR_MIXED_NOT_STARTED:
401: errmsg = "MixedContentDecl : '|' or ')*' expected\n";
402: break;
403: case XML_ERR_PCDATA_REQUIRED:
404: errmsg = "MixedContentDecl : '#PCDATA' expected\n";
405: break;
406: case XML_ERR_ELEMCONTENT_NOT_STARTED:
407: errmsg = "ContentDecl : Name or '(' expected\n";
408: break;
409: case XML_ERR_ELEMCONTENT_NOT_FINISHED:
410: errmsg = "ContentDecl : ',' '|' or ')' expected\n";
411: break;
412: case XML_ERR_PEREF_IN_INT_SUBSET:
413: errmsg =
414: "PEReference: forbidden within markup decl in internal subset\n";
415: break;
416: case XML_ERR_GT_REQUIRED:
417: errmsg = "expected '>'\n";
418: break;
419: case XML_ERR_CONDSEC_INVALID:
420: errmsg = "XML conditional section '[' expected\n";
421: break;
422: case XML_ERR_EXT_SUBSET_NOT_FINISHED:
423: errmsg = "Content error in the external subset\n";
424: break;
425: case XML_ERR_CONDSEC_INVALID_KEYWORD:
426: errmsg =
427: "conditional section INCLUDE or IGNORE keyword expected\n";
428: break;
429: case XML_ERR_CONDSEC_NOT_FINISHED:
430: errmsg = "XML conditional section not closed\n";
431: break;
432: case XML_ERR_XMLDECL_NOT_STARTED:
433: errmsg = "Text declaration '<?xml' required\n";
434: break;
435: case XML_ERR_XMLDECL_NOT_FINISHED:
436: errmsg = "parsing XML declaration: '?>' expected\n";
437: break;
438: case XML_ERR_EXT_ENTITY_STANDALONE:
439: errmsg = "external parsed entities cannot be standalone\n";
440: break;
441: case XML_ERR_ENTITYREF_SEMICOL_MISSING:
442: errmsg = "EntityRef: expecting ';'\n";
443: break;
444: case XML_ERR_DOCTYPE_NOT_FINISHED:
445: errmsg = "DOCTYPE improperly terminated\n";
446: break;
447: case XML_ERR_LTSLASH_REQUIRED:
448: errmsg = "EndTag: '</' not found\n";
449: break;
450: case XML_ERR_EQUAL_REQUIRED:
451: errmsg = "expected '='\n";
452: break;
453: case XML_ERR_STRING_NOT_CLOSED:
454: errmsg = "String not closed expecting \" or '\n";
455: break;
456: case XML_ERR_STRING_NOT_STARTED:
457: errmsg = "String not started expecting ' or \"\n";
458: break;
459: case XML_ERR_ENCODING_NAME:
460: errmsg = "Invalid XML encoding name\n";
461: break;
462: case XML_ERR_STANDALONE_VALUE:
463: errmsg = "standalone accepts only 'yes' or 'no'\n";
464: break;
465: case XML_ERR_DOCUMENT_EMPTY:
466: errmsg = "Document is empty\n";
467: break;
468: case XML_ERR_DOCUMENT_END:
469: errmsg = "Extra content at the end of the document\n";
470: break;
471: case XML_ERR_NOT_WELL_BALANCED:
472: errmsg = "chunk is not well balanced\n";
473: break;
474: case XML_ERR_EXTRA_CONTENT:
475: errmsg = "extra content at the end of well balanced chunk\n";
476: break;
477: case XML_ERR_VERSION_MISSING:
478: errmsg = "Malformed declaration expecting version\n";
479: break;
480: #if 0
481: case:
482: errmsg = "\n";
483: break;
484: #endif
485: default:
486: errmsg = "Unregistered error message\n";
487: }
488: if (ctxt != NULL)
489: ctxt->errNo = error;
490: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
491: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
492: info);
493: if (ctxt != NULL) {
494: ctxt->wellFormed = 0;
495: if (ctxt->recovery == 0)
496: ctxt->disableSAX = 1;
497: }
498: }
499:
500: /**
501: * xmlFatalErrMsg:
502: * @ctxt: an XML parser context
503: * @error: the error number
504: * @msg: the error message
505: *
506: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
507: */
508: static void
509: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
510: const char *msg)
511: {
512: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513: (ctxt->instate == XML_PARSER_EOF))
514: return;
515: if (ctxt != NULL)
516: ctxt->errNo = error;
517: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
518: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
519: if (ctxt != NULL) {
520: ctxt->wellFormed = 0;
521: if (ctxt->recovery == 0)
522: ctxt->disableSAX = 1;
523: }
524: }
525:
526: /**
527: * xmlWarningMsg:
528: * @ctxt: an XML parser context
529: * @error: the error number
530: * @msg: the error message
531: * @str1: extra data
532: * @str2: extra data
533: *
534: * Handle a warning.
535: */
536: static void
537: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
538: const char *msg, const xmlChar *str1, const xmlChar *str2)
539: {
540: xmlStructuredErrorFunc schannel = NULL;
541:
542: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
543: (ctxt->instate == XML_PARSER_EOF))
544: return;
545: if ((ctxt != NULL) && (ctxt->sax != NULL) &&
546: (ctxt->sax->initialized == XML_SAX2_MAGIC))
547: schannel = ctxt->sax->serror;
548: if (ctxt != NULL) {
549: __xmlRaiseError(schannel,
550: (ctxt->sax) ? ctxt->sax->warning : NULL,
551: ctxt->userData,
552: ctxt, NULL, XML_FROM_PARSER, error,
553: XML_ERR_WARNING, NULL, 0,
554: (const char *) str1, (const char *) str2, NULL, 0, 0,
555: msg, (const char *) str1, (const char *) str2);
556: } else {
557: __xmlRaiseError(schannel, NULL, NULL,
558: ctxt, NULL, XML_FROM_PARSER, error,
559: XML_ERR_WARNING, NULL, 0,
560: (const char *) str1, (const char *) str2, NULL, 0, 0,
561: msg, (const char *) str1, (const char *) str2);
562: }
563: }
564:
565: /**
566: * xmlValidityError:
567: * @ctxt: an XML parser context
568: * @error: the error number
569: * @msg: the error message
570: * @str1: extra data
571: *
572: * Handle a validity error.
573: */
574: static void
575: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
576: const char *msg, const xmlChar *str1, const xmlChar *str2)
577: {
578: xmlStructuredErrorFunc schannel = NULL;
579:
580: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
581: (ctxt->instate == XML_PARSER_EOF))
582: return;
583: if (ctxt != NULL) {
584: ctxt->errNo = error;
585: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
586: schannel = ctxt->sax->serror;
587: }
588: if (ctxt != NULL) {
589: __xmlRaiseError(schannel,
590: ctxt->vctxt.error, ctxt->vctxt.userData,
591: ctxt, NULL, XML_FROM_DTD, error,
592: XML_ERR_ERROR, NULL, 0, (const char *) str1,
593: (const char *) str2, NULL, 0, 0,
594: msg, (const char *) str1, (const char *) str2);
595: ctxt->valid = 0;
596: } else {
597: __xmlRaiseError(schannel, NULL, NULL,
598: ctxt, NULL, XML_FROM_DTD, error,
599: XML_ERR_ERROR, NULL, 0, (const char *) str1,
600: (const char *) str2, NULL, 0, 0,
601: msg, (const char *) str1, (const char *) str2);
602: }
603: }
604:
605: /**
606: * xmlFatalErrMsgInt:
607: * @ctxt: an XML parser context
608: * @error: the error number
609: * @msg: the error message
610: * @val: an integer value
611: *
612: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
613: */
614: static void
615: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
616: const char *msg, int val)
617: {
618: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
619: (ctxt->instate == XML_PARSER_EOF))
620: return;
621: if (ctxt != NULL)
622: ctxt->errNo = error;
623: __xmlRaiseError(NULL, NULL, NULL,
624: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
625: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
626: if (ctxt != NULL) {
627: ctxt->wellFormed = 0;
628: if (ctxt->recovery == 0)
629: ctxt->disableSAX = 1;
630: }
631: }
632:
633: /**
634: * xmlFatalErrMsgStrIntStr:
635: * @ctxt: an XML parser context
636: * @error: the error number
637: * @msg: the error message
638: * @str1: an string info
639: * @val: an integer value
640: * @str2: an string info
641: *
642: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
643: */
644: static void
645: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
646: const char *msg, const xmlChar *str1, int val,
647: const xmlChar *str2)
648: {
649: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
650: (ctxt->instate == XML_PARSER_EOF))
651: return;
652: if (ctxt != NULL)
653: ctxt->errNo = error;
654: __xmlRaiseError(NULL, NULL, NULL,
655: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
656: NULL, 0, (const char *) str1, (const char *) str2,
657: NULL, val, 0, msg, str1, val, str2);
658: if (ctxt != NULL) {
659: ctxt->wellFormed = 0;
660: if (ctxt->recovery == 0)
661: ctxt->disableSAX = 1;
662: }
663: }
664:
665: /**
666: * xmlFatalErrMsgStr:
667: * @ctxt: an XML parser context
668: * @error: the error number
669: * @msg: the error message
670: * @val: a string value
671: *
672: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
673: */
674: static void
675: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
676: const char *msg, const xmlChar * val)
677: {
678: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679: (ctxt->instate == XML_PARSER_EOF))
680: return;
681: if (ctxt != NULL)
682: ctxt->errNo = error;
683: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
684: XML_FROM_PARSER, error, XML_ERR_FATAL,
685: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
686: val);
687: if (ctxt != NULL) {
688: ctxt->wellFormed = 0;
689: if (ctxt->recovery == 0)
690: ctxt->disableSAX = 1;
691: }
692: }
693:
694: /**
695: * xmlErrMsgStr:
696: * @ctxt: an XML parser context
697: * @error: the error number
698: * @msg: the error message
699: * @val: a string value
700: *
701: * Handle a non fatal parser error
702: */
703: static void
704: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
705: const char *msg, const xmlChar * val)
706: {
707: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
708: (ctxt->instate == XML_PARSER_EOF))
709: return;
710: if (ctxt != NULL)
711: ctxt->errNo = error;
712: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
713: XML_FROM_PARSER, error, XML_ERR_ERROR,
714: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
715: val);
716: }
717:
718: /**
719: * xmlNsErr:
720: * @ctxt: an XML parser context
721: * @error: the error number
722: * @msg: the message
723: * @info1: extra information string
724: * @info2: extra information string
725: *
726: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
727: */
728: static void
729: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
730: const char *msg,
731: const xmlChar * info1, const xmlChar * info2,
732: const xmlChar * info3)
733: {
734: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735: (ctxt->instate == XML_PARSER_EOF))
736: return;
737: if (ctxt != NULL)
738: ctxt->errNo = error;
739: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
740: XML_ERR_ERROR, NULL, 0, (const char *) info1,
741: (const char *) info2, (const char *) info3, 0, 0, msg,
742: info1, info2, info3);
743: if (ctxt != NULL)
744: ctxt->nsWellFormed = 0;
745: }
746:
747: /**
748: * xmlNsWarn
749: * @ctxt: an XML parser context
750: * @error: the error number
751: * @msg: the message
752: * @info1: extra information string
753: * @info2: extra information string
754: *
1.1.1.2 misho 755: * Handle a namespace warning error
1.1 misho 756: */
757: static void
758: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
759: const char *msg,
760: const xmlChar * info1, const xmlChar * info2,
761: const xmlChar * info3)
762: {
763: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764: (ctxt->instate == XML_PARSER_EOF))
765: return;
766: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
767: XML_ERR_WARNING, NULL, 0, (const char *) info1,
768: (const char *) info2, (const char *) info3, 0, 0, msg,
769: info1, info2, info3);
770: }
771:
772: /************************************************************************
773: * *
774: * Library wide options *
775: * *
776: ************************************************************************/
777:
778: /**
779: * xmlHasFeature:
780: * @feature: the feature to be examined
781: *
782: * Examines if the library has been compiled with a given feature.
783: *
784: * Returns a non-zero value if the feature exist, otherwise zero.
785: * Returns zero (0) if the feature does not exist or an unknown
786: * unknown feature is requested, non-zero otherwise.
787: */
788: int
789: xmlHasFeature(xmlFeature feature)
790: {
791: switch (feature) {
792: case XML_WITH_THREAD:
793: #ifdef LIBXML_THREAD_ENABLED
794: return(1);
795: #else
796: return(0);
797: #endif
798: case XML_WITH_TREE:
799: #ifdef LIBXML_TREE_ENABLED
800: return(1);
801: #else
802: return(0);
803: #endif
804: case XML_WITH_OUTPUT:
805: #ifdef LIBXML_OUTPUT_ENABLED
806: return(1);
807: #else
808: return(0);
809: #endif
810: case XML_WITH_PUSH:
811: #ifdef LIBXML_PUSH_ENABLED
812: return(1);
813: #else
814: return(0);
815: #endif
816: case XML_WITH_READER:
817: #ifdef LIBXML_READER_ENABLED
818: return(1);
819: #else
820: return(0);
821: #endif
822: case XML_WITH_PATTERN:
823: #ifdef LIBXML_PATTERN_ENABLED
824: return(1);
825: #else
826: return(0);
827: #endif
828: case XML_WITH_WRITER:
829: #ifdef LIBXML_WRITER_ENABLED
830: return(1);
831: #else
832: return(0);
833: #endif
834: case XML_WITH_SAX1:
835: #ifdef LIBXML_SAX1_ENABLED
836: return(1);
837: #else
838: return(0);
839: #endif
840: case XML_WITH_FTP:
841: #ifdef LIBXML_FTP_ENABLED
842: return(1);
843: #else
844: return(0);
845: #endif
846: case XML_WITH_HTTP:
847: #ifdef LIBXML_HTTP_ENABLED
848: return(1);
849: #else
850: return(0);
851: #endif
852: case XML_WITH_VALID:
853: #ifdef LIBXML_VALID_ENABLED
854: return(1);
855: #else
856: return(0);
857: #endif
858: case XML_WITH_HTML:
859: #ifdef LIBXML_HTML_ENABLED
860: return(1);
861: #else
862: return(0);
863: #endif
864: case XML_WITH_LEGACY:
865: #ifdef LIBXML_LEGACY_ENABLED
866: return(1);
867: #else
868: return(0);
869: #endif
870: case XML_WITH_C14N:
871: #ifdef LIBXML_C14N_ENABLED
872: return(1);
873: #else
874: return(0);
875: #endif
876: case XML_WITH_CATALOG:
877: #ifdef LIBXML_CATALOG_ENABLED
878: return(1);
879: #else
880: return(0);
881: #endif
882: case XML_WITH_XPATH:
883: #ifdef LIBXML_XPATH_ENABLED
884: return(1);
885: #else
886: return(0);
887: #endif
888: case XML_WITH_XPTR:
889: #ifdef LIBXML_XPTR_ENABLED
890: return(1);
891: #else
892: return(0);
893: #endif
894: case XML_WITH_XINCLUDE:
895: #ifdef LIBXML_XINCLUDE_ENABLED
896: return(1);
897: #else
898: return(0);
899: #endif
900: case XML_WITH_ICONV:
901: #ifdef LIBXML_ICONV_ENABLED
902: return(1);
903: #else
904: return(0);
905: #endif
906: case XML_WITH_ISO8859X:
907: #ifdef LIBXML_ISO8859X_ENABLED
908: return(1);
909: #else
910: return(0);
911: #endif
912: case XML_WITH_UNICODE:
913: #ifdef LIBXML_UNICODE_ENABLED
914: return(1);
915: #else
916: return(0);
917: #endif
918: case XML_WITH_REGEXP:
919: #ifdef LIBXML_REGEXP_ENABLED
920: return(1);
921: #else
922: return(0);
923: #endif
924: case XML_WITH_AUTOMATA:
925: #ifdef LIBXML_AUTOMATA_ENABLED
926: return(1);
927: #else
928: return(0);
929: #endif
930: case XML_WITH_EXPR:
931: #ifdef LIBXML_EXPR_ENABLED
932: return(1);
933: #else
934: return(0);
935: #endif
936: case XML_WITH_SCHEMAS:
937: #ifdef LIBXML_SCHEMAS_ENABLED
938: return(1);
939: #else
940: return(0);
941: #endif
942: case XML_WITH_SCHEMATRON:
943: #ifdef LIBXML_SCHEMATRON_ENABLED
944: return(1);
945: #else
946: return(0);
947: #endif
948: case XML_WITH_MODULES:
949: #ifdef LIBXML_MODULES_ENABLED
950: return(1);
951: #else
952: return(0);
953: #endif
954: case XML_WITH_DEBUG:
955: #ifdef LIBXML_DEBUG_ENABLED
956: return(1);
957: #else
958: return(0);
959: #endif
960: case XML_WITH_DEBUG_MEM:
961: #ifdef DEBUG_MEMORY_LOCATION
962: return(1);
963: #else
964: return(0);
965: #endif
966: case XML_WITH_DEBUG_RUN:
967: #ifdef LIBXML_DEBUG_RUNTIME
968: return(1);
969: #else
970: return(0);
971: #endif
972: case XML_WITH_ZLIB:
973: #ifdef LIBXML_ZLIB_ENABLED
974: return(1);
975: #else
976: return(0);
977: #endif
1.1.1.2 misho 978: case XML_WITH_LZMA:
979: #ifdef LIBXML_LZMA_ENABLED
980: return(1);
981: #else
982: return(0);
983: #endif
1.1 misho 984: case XML_WITH_ICU:
985: #ifdef LIBXML_ICU_ENABLED
986: return(1);
987: #else
988: return(0);
989: #endif
990: default:
991: break;
992: }
993: return(0);
994: }
995:
996: /************************************************************************
997: * *
998: * SAX2 defaulted attributes handling *
999: * *
1000: ************************************************************************/
1001:
1002: /**
1003: * xmlDetectSAX2:
1004: * @ctxt: an XML parser context
1005: *
1006: * Do the SAX2 detection and specific intialization
1007: */
1008: static void
1009: xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1010: if (ctxt == NULL) return;
1011: #ifdef LIBXML_SAX1_ENABLED
1012: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1013: ((ctxt->sax->startElementNs != NULL) ||
1014: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1015: #else
1016: ctxt->sax2 = 1;
1017: #endif /* LIBXML_SAX1_ENABLED */
1018:
1019: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1020: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1021: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1022: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1023: (ctxt->str_xml_ns == NULL)) {
1024: xmlErrMemory(ctxt, NULL);
1025: }
1026: }
1027:
1028: typedef struct _xmlDefAttrs xmlDefAttrs;
1029: typedef xmlDefAttrs *xmlDefAttrsPtr;
1030: struct _xmlDefAttrs {
1031: int nbAttrs; /* number of defaulted attributes on that element */
1032: int maxAttrs; /* the size of the array */
1033: const xmlChar *values[5]; /* array of localname/prefix/values/external */
1034: };
1035:
1036: /**
1037: * xmlAttrNormalizeSpace:
1038: * @src: the source string
1039: * @dst: the target string
1040: *
1041: * Normalize the space in non CDATA attribute values:
1042: * If the attribute type is not CDATA, then the XML processor MUST further
1043: * process the normalized attribute value by discarding any leading and
1044: * trailing space (#x20) characters, and by replacing sequences of space
1045: * (#x20) characters by a single space (#x20) character.
1046: * Note that the size of dst need to be at least src, and if one doesn't need
1047: * to preserve dst (and it doesn't come from a dictionary or read-only) then
1048: * passing src as dst is just fine.
1049: *
1050: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1051: * is needed.
1052: */
1053: static xmlChar *
1054: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1055: {
1056: if ((src == NULL) || (dst == NULL))
1057: return(NULL);
1058:
1059: while (*src == 0x20) src++;
1060: while (*src != 0) {
1061: if (*src == 0x20) {
1062: while (*src == 0x20) src++;
1063: if (*src != 0)
1064: *dst++ = 0x20;
1065: } else {
1066: *dst++ = *src++;
1067: }
1068: }
1069: *dst = 0;
1070: if (dst == src)
1071: return(NULL);
1072: return(dst);
1073: }
1074:
1075: /**
1076: * xmlAttrNormalizeSpace2:
1077: * @src: the source string
1078: *
1079: * Normalize the space in non CDATA attribute values, a slightly more complex
1080: * front end to avoid allocation problems when running on attribute values
1081: * coming from the input.
1082: *
1083: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1084: * is needed.
1085: */
1086: static const xmlChar *
1087: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1088: {
1089: int i;
1090: int remove_head = 0;
1091: int need_realloc = 0;
1092: const xmlChar *cur;
1093:
1094: if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1095: return(NULL);
1096: i = *len;
1097: if (i <= 0)
1098: return(NULL);
1099:
1100: cur = src;
1101: while (*cur == 0x20) {
1102: cur++;
1103: remove_head++;
1104: }
1105: while (*cur != 0) {
1106: if (*cur == 0x20) {
1107: cur++;
1108: if ((*cur == 0x20) || (*cur == 0)) {
1109: need_realloc = 1;
1110: break;
1111: }
1112: } else
1113: cur++;
1114: }
1115: if (need_realloc) {
1116: xmlChar *ret;
1117:
1118: ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1119: if (ret == NULL) {
1120: xmlErrMemory(ctxt, NULL);
1121: return(NULL);
1122: }
1123: xmlAttrNormalizeSpace(ret, ret);
1124: *len = (int) strlen((const char *)ret);
1125: return(ret);
1126: } else if (remove_head) {
1127: *len -= remove_head;
1128: memmove(src, src + remove_head, 1 + *len);
1129: return(src);
1130: }
1131: return(NULL);
1132: }
1133:
1134: /**
1135: * xmlAddDefAttrs:
1136: * @ctxt: an XML parser context
1137: * @fullname: the element fullname
1138: * @fullattr: the attribute fullname
1139: * @value: the attribute value
1140: *
1141: * Add a defaulted attribute for an element
1142: */
1143: static void
1144: xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1145: const xmlChar *fullname,
1146: const xmlChar *fullattr,
1147: const xmlChar *value) {
1148: xmlDefAttrsPtr defaults;
1149: int len;
1150: const xmlChar *name;
1151: const xmlChar *prefix;
1152:
1153: /*
1154: * Allows to detect attribute redefinitions
1155: */
1156: if (ctxt->attsSpecial != NULL) {
1157: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1158: return;
1159: }
1160:
1161: if (ctxt->attsDefault == NULL) {
1162: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1163: if (ctxt->attsDefault == NULL)
1164: goto mem_error;
1165: }
1166:
1167: /*
1168: * split the element name into prefix:localname , the string found
1169: * are within the DTD and then not associated to namespace names.
1170: */
1171: name = xmlSplitQName3(fullname, &len);
1172: if (name == NULL) {
1173: name = xmlDictLookup(ctxt->dict, fullname, -1);
1174: prefix = NULL;
1175: } else {
1176: name = xmlDictLookup(ctxt->dict, name, -1);
1177: prefix = xmlDictLookup(ctxt->dict, fullname, len);
1178: }
1179:
1180: /*
1181: * make sure there is some storage
1182: */
1183: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1184: if (defaults == NULL) {
1185: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1186: (4 * 5) * sizeof(const xmlChar *));
1187: if (defaults == NULL)
1188: goto mem_error;
1189: defaults->nbAttrs = 0;
1190: defaults->maxAttrs = 4;
1191: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1192: defaults, NULL) < 0) {
1193: xmlFree(defaults);
1194: goto mem_error;
1195: }
1196: } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1197: xmlDefAttrsPtr temp;
1198:
1199: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1200: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1201: if (temp == NULL)
1202: goto mem_error;
1203: defaults = temp;
1204: defaults->maxAttrs *= 2;
1205: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1206: defaults, NULL) < 0) {
1207: xmlFree(defaults);
1208: goto mem_error;
1209: }
1210: }
1211:
1212: /*
1213: * Split the element name into prefix:localname , the string found
1214: * are within the DTD and hen not associated to namespace names.
1215: */
1216: name = xmlSplitQName3(fullattr, &len);
1217: if (name == NULL) {
1218: name = xmlDictLookup(ctxt->dict, fullattr, -1);
1219: prefix = NULL;
1220: } else {
1221: name = xmlDictLookup(ctxt->dict, name, -1);
1222: prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1223: }
1224:
1225: defaults->values[5 * defaults->nbAttrs] = name;
1226: defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1227: /* intern the string and precompute the end */
1228: len = xmlStrlen(value);
1229: value = xmlDictLookup(ctxt->dict, value, len);
1230: defaults->values[5 * defaults->nbAttrs + 2] = value;
1231: defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1232: if (ctxt->external)
1233: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1234: else
1235: defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1236: defaults->nbAttrs++;
1237:
1238: return;
1239:
1240: mem_error:
1241: xmlErrMemory(ctxt, NULL);
1242: return;
1243: }
1244:
1245: /**
1246: * xmlAddSpecialAttr:
1247: * @ctxt: an XML parser context
1248: * @fullname: the element fullname
1249: * @fullattr: the attribute fullname
1250: * @type: the attribute type
1251: *
1252: * Register this attribute type
1253: */
1254: static void
1255: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1256: const xmlChar *fullname,
1257: const xmlChar *fullattr,
1258: int type)
1259: {
1260: if (ctxt->attsSpecial == NULL) {
1261: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1262: if (ctxt->attsSpecial == NULL)
1263: goto mem_error;
1264: }
1265:
1266: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1267: return;
1268:
1269: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1270: (void *) (long) type);
1271: return;
1272:
1273: mem_error:
1274: xmlErrMemory(ctxt, NULL);
1275: return;
1276: }
1277:
1278: /**
1279: * xmlCleanSpecialAttrCallback:
1280: *
1281: * Removes CDATA attributes from the special attribute table
1282: */
1283: static void
1284: xmlCleanSpecialAttrCallback(void *payload, void *data,
1285: const xmlChar *fullname, const xmlChar *fullattr,
1286: const xmlChar *unused ATTRIBUTE_UNUSED) {
1287: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1288:
1289: if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1290: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1291: }
1292: }
1293:
1294: /**
1295: * xmlCleanSpecialAttr:
1296: * @ctxt: an XML parser context
1297: *
1298: * Trim the list of attributes defined to remove all those of type
1299: * CDATA as they are not special. This call should be done when finishing
1300: * to parse the DTD and before starting to parse the document root.
1301: */
1302: static void
1303: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1304: {
1305: if (ctxt->attsSpecial == NULL)
1306: return;
1307:
1308: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1309:
1310: if (xmlHashSize(ctxt->attsSpecial) == 0) {
1311: xmlHashFree(ctxt->attsSpecial, NULL);
1312: ctxt->attsSpecial = NULL;
1313: }
1314: return;
1315: }
1316:
1317: /**
1318: * xmlCheckLanguageID:
1319: * @lang: pointer to the string value
1320: *
1321: * Checks that the value conforms to the LanguageID production:
1322: *
1323: * NOTE: this is somewhat deprecated, those productions were removed from
1324: * the XML Second edition.
1325: *
1326: * [33] LanguageID ::= Langcode ('-' Subcode)*
1327: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1328: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1329: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1330: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1331: * [38] Subcode ::= ([a-z] | [A-Z])+
1332: *
1333: * The current REC reference the sucessors of RFC 1766, currently 5646
1334: *
1335: * http://www.rfc-editor.org/rfc/rfc5646.txt
1336: * langtag = language
1337: * ["-" script]
1338: * ["-" region]
1339: * *("-" variant)
1340: * *("-" extension)
1341: * ["-" privateuse]
1342: * language = 2*3ALPHA ; shortest ISO 639 code
1343: * ["-" extlang] ; sometimes followed by
1344: * ; extended language subtags
1345: * / 4ALPHA ; or reserved for future use
1346: * / 5*8ALPHA ; or registered language subtag
1347: *
1348: * extlang = 3ALPHA ; selected ISO 639 codes
1349: * *2("-" 3ALPHA) ; permanently reserved
1350: *
1351: * script = 4ALPHA ; ISO 15924 code
1352: *
1353: * region = 2ALPHA ; ISO 3166-1 code
1354: * / 3DIGIT ; UN M.49 code
1355: *
1356: * variant = 5*8alphanum ; registered variants
1357: * / (DIGIT 3alphanum)
1358: *
1359: * extension = singleton 1*("-" (2*8alphanum))
1360: *
1361: * ; Single alphanumerics
1362: * ; "x" reserved for private use
1363: * singleton = DIGIT ; 0 - 9
1364: * / %x41-57 ; A - W
1365: * / %x59-5A ; Y - Z
1366: * / %x61-77 ; a - w
1367: * / %x79-7A ; y - z
1368: *
1369: * it sounds right to still allow Irregular i-xxx IANA and user codes too
1370: * The parser below doesn't try to cope with extension or privateuse
1371: * that could be added but that's not interoperable anyway
1372: *
1373: * Returns 1 if correct 0 otherwise
1374: **/
1375: int
1376: xmlCheckLanguageID(const xmlChar * lang)
1377: {
1378: const xmlChar *cur = lang, *nxt;
1379:
1380: if (cur == NULL)
1381: return (0);
1382: if (((cur[0] == 'i') && (cur[1] == '-')) ||
1383: ((cur[0] == 'I') && (cur[1] == '-')) ||
1384: ((cur[0] == 'x') && (cur[1] == '-')) ||
1385: ((cur[0] == 'X') && (cur[1] == '-'))) {
1386: /*
1387: * Still allow IANA code and user code which were coming
1388: * from the previous version of the XML-1.0 specification
1389: * it's deprecated but we should not fail
1390: */
1391: cur += 2;
1392: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1393: ((cur[0] >= 'a') && (cur[0] <= 'z')))
1394: cur++;
1395: return(cur[0] == 0);
1396: }
1397: nxt = cur;
1398: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1399: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1400: nxt++;
1401: if (nxt - cur >= 4) {
1402: /*
1403: * Reserved
1404: */
1405: if ((nxt - cur > 8) || (nxt[0] != 0))
1406: return(0);
1407: return(1);
1408: }
1409: if (nxt - cur < 2)
1410: return(0);
1411: /* we got an ISO 639 code */
1412: if (nxt[0] == 0)
1413: return(1);
1414: if (nxt[0] != '-')
1415: return(0);
1416:
1417: nxt++;
1418: cur = nxt;
1419: /* now we can have extlang or script or region or variant */
1420: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1421: goto region_m49;
1422:
1423: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1424: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1425: nxt++;
1426: if (nxt - cur == 4)
1427: goto script;
1428: if (nxt - cur == 2)
1429: goto region;
1430: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1431: goto variant;
1432: if (nxt - cur != 3)
1433: return(0);
1434: /* we parsed an extlang */
1435: if (nxt[0] == 0)
1436: return(1);
1437: if (nxt[0] != '-')
1438: return(0);
1439:
1440: nxt++;
1441: cur = nxt;
1442: /* now we can have script or region or variant */
1443: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1444: goto region_m49;
1445:
1446: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448: nxt++;
1449: if (nxt - cur == 2)
1450: goto region;
1451: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1452: goto variant;
1453: if (nxt - cur != 4)
1454: return(0);
1455: /* we parsed a script */
1456: script:
1457: if (nxt[0] == 0)
1458: return(1);
1459: if (nxt[0] != '-')
1460: return(0);
1461:
1462: nxt++;
1463: cur = nxt;
1464: /* now we can have region or variant */
1465: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466: goto region_m49;
1467:
1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470: nxt++;
1471:
1472: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1473: goto variant;
1474: if (nxt - cur != 2)
1475: return(0);
1476: /* we parsed a region */
1477: region:
1478: if (nxt[0] == 0)
1479: return(1);
1480: if (nxt[0] != '-')
1481: return(0);
1482:
1483: nxt++;
1484: cur = nxt;
1485: /* now we can just have a variant */
1486: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1487: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1488: nxt++;
1489:
1490: if ((nxt - cur < 5) || (nxt - cur > 8))
1491: return(0);
1492:
1493: /* we parsed a variant */
1494: variant:
1495: if (nxt[0] == 0)
1496: return(1);
1497: if (nxt[0] != '-')
1498: return(0);
1499: /* extensions and private use subtags not checked */
1500: return (1);
1501:
1502: region_m49:
1503: if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1504: ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1505: nxt += 3;
1506: goto region;
1507: }
1508: return(0);
1509: }
1510:
1511: /************************************************************************
1512: * *
1513: * Parser stacks related functions and macros *
1514: * *
1515: ************************************************************************/
1516:
1517: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1518: const xmlChar ** str);
1519:
1520: #ifdef SAX2
1521: /**
1522: * nsPush:
1523: * @ctxt: an XML parser context
1524: * @prefix: the namespace prefix or NULL
1525: * @URL: the namespace name
1526: *
1527: * Pushes a new parser namespace on top of the ns stack
1528: *
1529: * Returns -1 in case of error, -2 if the namespace should be discarded
1530: * and the index in the stack otherwise.
1531: */
1532: static int
1533: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1534: {
1535: if (ctxt->options & XML_PARSE_NSCLEAN) {
1536: int i;
1537: for (i = 0;i < ctxt->nsNr;i += 2) {
1538: if (ctxt->nsTab[i] == prefix) {
1539: /* in scope */
1540: if (ctxt->nsTab[i + 1] == URL)
1541: return(-2);
1542: /* out of scope keep it */
1543: break;
1544: }
1545: }
1546: }
1547: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1548: ctxt->nsMax = 10;
1549: ctxt->nsNr = 0;
1550: ctxt->nsTab = (const xmlChar **)
1551: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1552: if (ctxt->nsTab == NULL) {
1553: xmlErrMemory(ctxt, NULL);
1554: ctxt->nsMax = 0;
1555: return (-1);
1556: }
1557: } else if (ctxt->nsNr >= ctxt->nsMax) {
1558: const xmlChar ** tmp;
1559: ctxt->nsMax *= 2;
1560: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1561: ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1562: if (tmp == NULL) {
1563: xmlErrMemory(ctxt, NULL);
1564: ctxt->nsMax /= 2;
1565: return (-1);
1566: }
1567: ctxt->nsTab = tmp;
1568: }
1569: ctxt->nsTab[ctxt->nsNr++] = prefix;
1570: ctxt->nsTab[ctxt->nsNr++] = URL;
1571: return (ctxt->nsNr);
1572: }
1573: /**
1574: * nsPop:
1575: * @ctxt: an XML parser context
1576: * @nr: the number to pop
1577: *
1578: * Pops the top @nr parser prefix/namespace from the ns stack
1579: *
1580: * Returns the number of namespaces removed
1581: */
1582: static int
1583: nsPop(xmlParserCtxtPtr ctxt, int nr)
1584: {
1585: int i;
1586:
1587: if (ctxt->nsTab == NULL) return(0);
1588: if (ctxt->nsNr < nr) {
1589: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1590: nr = ctxt->nsNr;
1591: }
1592: if (ctxt->nsNr <= 0)
1593: return (0);
1594:
1595: for (i = 0;i < nr;i++) {
1596: ctxt->nsNr--;
1597: ctxt->nsTab[ctxt->nsNr] = NULL;
1598: }
1599: return(nr);
1600: }
1601: #endif
1602:
1603: static int
1604: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1605: const xmlChar **atts;
1606: int *attallocs;
1607: int maxatts;
1608:
1609: if (ctxt->atts == NULL) {
1610: maxatts = 55; /* allow for 10 attrs by default */
1611: atts = (const xmlChar **)
1612: xmlMalloc(maxatts * sizeof(xmlChar *));
1613: if (atts == NULL) goto mem_error;
1614: ctxt->atts = atts;
1615: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1616: if (attallocs == NULL) goto mem_error;
1617: ctxt->attallocs = attallocs;
1618: ctxt->maxatts = maxatts;
1619: } else if (nr + 5 > ctxt->maxatts) {
1620: maxatts = (nr + 5) * 2;
1621: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1622: maxatts * sizeof(const xmlChar *));
1623: if (atts == NULL) goto mem_error;
1624: ctxt->atts = atts;
1625: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1626: (maxatts / 5) * sizeof(int));
1627: if (attallocs == NULL) goto mem_error;
1628: ctxt->attallocs = attallocs;
1629: ctxt->maxatts = maxatts;
1630: }
1631: return(ctxt->maxatts);
1632: mem_error:
1633: xmlErrMemory(ctxt, NULL);
1634: return(-1);
1635: }
1636:
1637: /**
1638: * inputPush:
1639: * @ctxt: an XML parser context
1640: * @value: the parser input
1641: *
1642: * Pushes a new parser input on top of the input stack
1643: *
1644: * Returns -1 in case of error, the index in the stack otherwise
1645: */
1646: int
1647: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1648: {
1649: if ((ctxt == NULL) || (value == NULL))
1650: return(-1);
1651: if (ctxt->inputNr >= ctxt->inputMax) {
1652: ctxt->inputMax *= 2;
1653: ctxt->inputTab =
1654: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1655: ctxt->inputMax *
1656: sizeof(ctxt->inputTab[0]));
1657: if (ctxt->inputTab == NULL) {
1658: xmlErrMemory(ctxt, NULL);
1659: xmlFreeInputStream(value);
1660: ctxt->inputMax /= 2;
1661: value = NULL;
1662: return (-1);
1663: }
1664: }
1665: ctxt->inputTab[ctxt->inputNr] = value;
1666: ctxt->input = value;
1667: return (ctxt->inputNr++);
1668: }
1669: /**
1670: * inputPop:
1671: * @ctxt: an XML parser context
1672: *
1673: * Pops the top parser input from the input stack
1674: *
1675: * Returns the input just removed
1676: */
1677: xmlParserInputPtr
1678: inputPop(xmlParserCtxtPtr ctxt)
1679: {
1680: xmlParserInputPtr ret;
1681:
1682: if (ctxt == NULL)
1683: return(NULL);
1684: if (ctxt->inputNr <= 0)
1685: return (NULL);
1686: ctxt->inputNr--;
1687: if (ctxt->inputNr > 0)
1688: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1689: else
1690: ctxt->input = NULL;
1691: ret = ctxt->inputTab[ctxt->inputNr];
1692: ctxt->inputTab[ctxt->inputNr] = NULL;
1693: return (ret);
1694: }
1695: /**
1696: * nodePush:
1697: * @ctxt: an XML parser context
1698: * @value: the element node
1699: *
1700: * Pushes a new element node on top of the node stack
1701: *
1702: * Returns -1 in case of error, the index in the stack otherwise
1703: */
1704: int
1705: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1706: {
1707: if (ctxt == NULL) return(0);
1708: if (ctxt->nodeNr >= ctxt->nodeMax) {
1709: xmlNodePtr *tmp;
1710:
1711: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1712: ctxt->nodeMax * 2 *
1713: sizeof(ctxt->nodeTab[0]));
1714: if (tmp == NULL) {
1715: xmlErrMemory(ctxt, NULL);
1716: return (-1);
1717: }
1718: ctxt->nodeTab = tmp;
1719: ctxt->nodeMax *= 2;
1720: }
1721: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1722: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1723: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1724: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1725: xmlParserMaxDepth);
1726: ctxt->instate = XML_PARSER_EOF;
1727: return(-1);
1728: }
1729: ctxt->nodeTab[ctxt->nodeNr] = value;
1730: ctxt->node = value;
1731: return (ctxt->nodeNr++);
1732: }
1733:
1734: /**
1735: * nodePop:
1736: * @ctxt: an XML parser context
1737: *
1738: * Pops the top element node from the node stack
1739: *
1740: * Returns the node just removed
1741: */
1742: xmlNodePtr
1743: nodePop(xmlParserCtxtPtr ctxt)
1744: {
1745: xmlNodePtr ret;
1746:
1747: if (ctxt == NULL) return(NULL);
1748: if (ctxt->nodeNr <= 0)
1749: return (NULL);
1750: ctxt->nodeNr--;
1751: if (ctxt->nodeNr > 0)
1752: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1753: else
1754: ctxt->node = NULL;
1755: ret = ctxt->nodeTab[ctxt->nodeNr];
1756: ctxt->nodeTab[ctxt->nodeNr] = NULL;
1757: return (ret);
1758: }
1759:
1760: #ifdef LIBXML_PUSH_ENABLED
1761: /**
1762: * nameNsPush:
1763: * @ctxt: an XML parser context
1764: * @value: the element name
1765: * @prefix: the element prefix
1766: * @URI: the element namespace name
1767: *
1768: * Pushes a new element name/prefix/URL on top of the name stack
1769: *
1770: * Returns -1 in case of error, the index in the stack otherwise
1771: */
1772: static int
1773: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1774: const xmlChar *prefix, const xmlChar *URI, int nsNr)
1775: {
1776: if (ctxt->nameNr >= ctxt->nameMax) {
1777: const xmlChar * *tmp;
1778: void **tmp2;
1779: ctxt->nameMax *= 2;
1780: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1781: ctxt->nameMax *
1782: sizeof(ctxt->nameTab[0]));
1783: if (tmp == NULL) {
1784: ctxt->nameMax /= 2;
1785: goto mem_error;
1786: }
1787: ctxt->nameTab = tmp;
1788: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1789: ctxt->nameMax * 3 *
1790: sizeof(ctxt->pushTab[0]));
1791: if (tmp2 == NULL) {
1792: ctxt->nameMax /= 2;
1793: goto mem_error;
1794: }
1795: ctxt->pushTab = tmp2;
1796: }
1797: ctxt->nameTab[ctxt->nameNr] = value;
1798: ctxt->name = value;
1799: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1800: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1801: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1802: return (ctxt->nameNr++);
1803: mem_error:
1804: xmlErrMemory(ctxt, NULL);
1805: return (-1);
1806: }
1807: /**
1808: * nameNsPop:
1809: * @ctxt: an XML parser context
1810: *
1811: * Pops the top element/prefix/URI name from the name stack
1812: *
1813: * Returns the name just removed
1814: */
1815: static const xmlChar *
1816: nameNsPop(xmlParserCtxtPtr ctxt)
1817: {
1818: const xmlChar *ret;
1819:
1820: if (ctxt->nameNr <= 0)
1821: return (NULL);
1822: ctxt->nameNr--;
1823: if (ctxt->nameNr > 0)
1824: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1825: else
1826: ctxt->name = NULL;
1827: ret = ctxt->nameTab[ctxt->nameNr];
1828: ctxt->nameTab[ctxt->nameNr] = NULL;
1829: return (ret);
1830: }
1831: #endif /* LIBXML_PUSH_ENABLED */
1832:
1833: /**
1834: * namePush:
1835: * @ctxt: an XML parser context
1836: * @value: the element name
1837: *
1838: * Pushes a new element name on top of the name stack
1839: *
1840: * Returns -1 in case of error, the index in the stack otherwise
1841: */
1842: int
1843: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1844: {
1845: if (ctxt == NULL) return (-1);
1846:
1847: if (ctxt->nameNr >= ctxt->nameMax) {
1848: const xmlChar * *tmp;
1849: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1.1.1.2 misho 1850: ctxt->nameMax * 2 *
1.1 misho 1851: sizeof(ctxt->nameTab[0]));
1852: if (tmp == NULL) {
1853: goto mem_error;
1854: }
1855: ctxt->nameTab = tmp;
1.1.1.2 misho 1856: ctxt->nameMax *= 2;
1.1 misho 1857: }
1858: ctxt->nameTab[ctxt->nameNr] = value;
1859: ctxt->name = value;
1860: return (ctxt->nameNr++);
1861: mem_error:
1862: xmlErrMemory(ctxt, NULL);
1863: return (-1);
1864: }
1865: /**
1866: * namePop:
1867: * @ctxt: an XML parser context
1868: *
1869: * Pops the top element name from the name stack
1870: *
1871: * Returns the name just removed
1872: */
1873: const xmlChar *
1874: namePop(xmlParserCtxtPtr ctxt)
1875: {
1876: const xmlChar *ret;
1877:
1878: if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1879: return (NULL);
1880: ctxt->nameNr--;
1881: if (ctxt->nameNr > 0)
1882: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1883: else
1884: ctxt->name = NULL;
1885: ret = ctxt->nameTab[ctxt->nameNr];
1886: ctxt->nameTab[ctxt->nameNr] = NULL;
1887: return (ret);
1888: }
1889:
1890: static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1891: if (ctxt->spaceNr >= ctxt->spaceMax) {
1892: int *tmp;
1893:
1894: ctxt->spaceMax *= 2;
1895: tmp = (int *) xmlRealloc(ctxt->spaceTab,
1896: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1897: if (tmp == NULL) {
1898: xmlErrMemory(ctxt, NULL);
1899: ctxt->spaceMax /=2;
1900: return(-1);
1901: }
1902: ctxt->spaceTab = tmp;
1903: }
1904: ctxt->spaceTab[ctxt->spaceNr] = val;
1905: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1906: return(ctxt->spaceNr++);
1907: }
1908:
1909: static int spacePop(xmlParserCtxtPtr ctxt) {
1910: int ret;
1911: if (ctxt->spaceNr <= 0) return(0);
1912: ctxt->spaceNr--;
1913: if (ctxt->spaceNr > 0)
1914: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1915: else
1916: ctxt->space = &ctxt->spaceTab[0];
1917: ret = ctxt->spaceTab[ctxt->spaceNr];
1918: ctxt->spaceTab[ctxt->spaceNr] = -1;
1919: return(ret);
1920: }
1921:
1922: /*
1923: * Macros for accessing the content. Those should be used only by the parser,
1924: * and not exported.
1925: *
1926: * Dirty macros, i.e. one often need to make assumption on the context to
1927: * use them
1928: *
1929: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1930: * To be used with extreme caution since operations consuming
1931: * characters may move the input buffer to a different location !
1932: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1933: * This should be used internally by the parser
1934: * only to compare to ASCII values otherwise it would break when
1935: * running with UTF-8 encoding.
1936: * RAW same as CUR but in the input buffer, bypass any token
1937: * extraction that may have been done
1938: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1939: * to compare on ASCII based substring.
1940: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1941: * strings without newlines within the parser.
1942: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1943: * defined char within the parser.
1944: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1945: *
1946: * NEXT Skip to the next character, this does the proper decoding
1947: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1948: * NEXTL(l) Skip the current unicode character of l xmlChars long.
1949: * CUR_CHAR(l) returns the current unicode character (int), set l
1950: * to the number of xmlChars used for the encoding [0-5].
1951: * CUR_SCHAR same but operate on a string instead of the context
1952: * COPY_BUF copy the current unicode char to the target buffer, increment
1953: * the index
1954: * GROW, SHRINK handling of input buffers
1955: */
1956:
1957: #define RAW (*ctxt->input->cur)
1958: #define CUR (*ctxt->input->cur)
1959: #define NXT(val) ctxt->input->cur[(val)]
1960: #define CUR_PTR ctxt->input->cur
1961:
1962: #define CMP4( s, c1, c2, c3, c4 ) \
1963: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1964: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1965: #define CMP5( s, c1, c2, c3, c4, c5 ) \
1966: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1967: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1968: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1969: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1970: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1971: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1972: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1973: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1974: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1975: ((unsigned char *) s)[ 8 ] == c9 )
1976: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1977: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1978: ((unsigned char *) s)[ 9 ] == c10 )
1979:
1980: #define SKIP(val) do { \
1981: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1982: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1983: if ((*ctxt->input->cur == 0) && \
1984: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1985: xmlPopInput(ctxt); \
1986: } while (0)
1987:
1988: #define SKIPL(val) do { \
1989: int skipl; \
1990: for(skipl=0; skipl<val; skipl++) { \
1991: if (*(ctxt->input->cur) == '\n') { \
1992: ctxt->input->line++; ctxt->input->col = 1; \
1993: } else ctxt->input->col++; \
1994: ctxt->nbChars++; \
1995: ctxt->input->cur++; \
1996: } \
1997: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1998: if ((*ctxt->input->cur == 0) && \
1999: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2000: xmlPopInput(ctxt); \
2001: } while (0)
2002:
2003: #define SHRINK if ((ctxt->progressive == 0) && \
2004: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2005: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2006: xmlSHRINK (ctxt);
2007:
2008: static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2009: xmlParserInputShrink(ctxt->input);
2010: if ((*ctxt->input->cur == 0) &&
2011: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2012: xmlPopInput(ctxt);
2013: }
2014:
2015: #define GROW if ((ctxt->progressive == 0) && \
2016: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2017: xmlGROW (ctxt);
2018:
2019: static void xmlGROW (xmlParserCtxtPtr ctxt) {
2020: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2021: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2022: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2023: xmlPopInput(ctxt);
2024: }
2025:
2026: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2027:
2028: #define NEXT xmlNextChar(ctxt)
2029:
2030: #define NEXT1 { \
2031: ctxt->input->col++; \
2032: ctxt->input->cur++; \
2033: ctxt->nbChars++; \
2034: if (*ctxt->input->cur == 0) \
2035: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2036: }
2037:
2038: #define NEXTL(l) do { \
2039: if (*(ctxt->input->cur) == '\n') { \
2040: ctxt->input->line++; ctxt->input->col = 1; \
2041: } else ctxt->input->col++; \
2042: ctxt->input->cur += l; \
2043: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2044: } while (0)
2045:
2046: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2047: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2048:
2049: #define COPY_BUF(l,b,i,v) \
2050: if (l == 1) b[i++] = (xmlChar) v; \
2051: else i += xmlCopyCharMultiByte(&b[i],v)
2052:
2053: /**
2054: * xmlSkipBlankChars:
2055: * @ctxt: the XML parser context
2056: *
2057: * skip all blanks character found at that point in the input streams.
2058: * It pops up finished entities in the process if allowable at that point.
2059: *
2060: * Returns the number of space chars skipped
2061: */
2062:
2063: int
2064: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2065: int res = 0;
2066:
2067: /*
2068: * It's Okay to use CUR/NEXT here since all the blanks are on
2069: * the ASCII range.
2070: */
2071: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2072: const xmlChar *cur;
2073: /*
2074: * if we are in the document content, go really fast
2075: */
2076: cur = ctxt->input->cur;
2077: while (IS_BLANK_CH(*cur)) {
2078: if (*cur == '\n') {
2079: ctxt->input->line++; ctxt->input->col = 1;
2080: }
2081: cur++;
2082: res++;
2083: if (*cur == 0) {
2084: ctxt->input->cur = cur;
2085: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086: cur = ctxt->input->cur;
2087: }
2088: }
2089: ctxt->input->cur = cur;
2090: } else {
2091: int cur;
2092: do {
2093: cur = CUR;
2094: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2095: NEXT;
2096: cur = CUR;
2097: res++;
2098: }
2099: while ((cur == 0) && (ctxt->inputNr > 1) &&
2100: (ctxt->instate != XML_PARSER_COMMENT)) {
2101: xmlPopInput(ctxt);
2102: cur = CUR;
2103: }
2104: /*
2105: * Need to handle support of entities branching here
2106: */
2107: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2108: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2109: }
2110: return(res);
2111: }
2112:
2113: /************************************************************************
2114: * *
2115: * Commodity functions to handle entities *
2116: * *
2117: ************************************************************************/
2118:
2119: /**
2120: * xmlPopInput:
2121: * @ctxt: an XML parser context
2122: *
2123: * xmlPopInput: the current input pointed by ctxt->input came to an end
2124: * pop it and return the next char.
2125: *
2126: * Returns the current xmlChar in the parser context
2127: */
2128: xmlChar
2129: xmlPopInput(xmlParserCtxtPtr ctxt) {
2130: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2131: if (xmlParserDebugEntities)
2132: xmlGenericError(xmlGenericErrorContext,
2133: "Popping input %d\n", ctxt->inputNr);
2134: xmlFreeInputStream(inputPop(ctxt));
2135: if ((*ctxt->input->cur == 0) &&
2136: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2137: return(xmlPopInput(ctxt));
2138: return(CUR);
2139: }
2140:
2141: /**
2142: * xmlPushInput:
2143: * @ctxt: an XML parser context
2144: * @input: an XML parser input fragment (entity, XML fragment ...).
2145: *
2146: * xmlPushInput: switch to a new input stream which is stacked on top
2147: * of the previous one(s).
2148: * Returns -1 in case of error or the index in the input stack
2149: */
2150: int
2151: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2152: int ret;
2153: if (input == NULL) return(-1);
2154:
2155: if (xmlParserDebugEntities) {
2156: if ((ctxt->input != NULL) && (ctxt->input->filename))
2157: xmlGenericError(xmlGenericErrorContext,
2158: "%s(%d): ", ctxt->input->filename,
2159: ctxt->input->line);
2160: xmlGenericError(xmlGenericErrorContext,
2161: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2162: }
2163: ret = inputPush(ctxt, input);
2164: GROW;
2165: return(ret);
2166: }
2167:
2168: /**
2169: * xmlParseCharRef:
2170: * @ctxt: an XML parser context
2171: *
2172: * parse Reference declarations
2173: *
2174: * [66] CharRef ::= '&#' [0-9]+ ';' |
2175: * '&#x' [0-9a-fA-F]+ ';'
2176: *
2177: * [ WFC: Legal Character ]
2178: * Characters referred to using character references must match the
2179: * production for Char.
2180: *
2181: * Returns the value parsed (as an int), 0 in case of error
2182: */
2183: int
2184: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2185: unsigned int val = 0;
2186: int count = 0;
2187: unsigned int outofrange = 0;
2188:
2189: /*
2190: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2191: */
2192: if ((RAW == '&') && (NXT(1) == '#') &&
2193: (NXT(2) == 'x')) {
2194: SKIP(3);
2195: GROW;
2196: while (RAW != ';') { /* loop blocked by count */
2197: if (count++ > 20) {
2198: count = 0;
2199: GROW;
2200: }
2201: if ((RAW >= '0') && (RAW <= '9'))
2202: val = val * 16 + (CUR - '0');
2203: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2204: val = val * 16 + (CUR - 'a') + 10;
2205: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2206: val = val * 16 + (CUR - 'A') + 10;
2207: else {
2208: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2209: val = 0;
2210: break;
2211: }
2212: if (val > 0x10FFFF)
2213: outofrange = val;
2214:
2215: NEXT;
2216: count++;
2217: }
2218: if (RAW == ';') {
2219: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2220: ctxt->input->col++;
2221: ctxt->nbChars ++;
2222: ctxt->input->cur++;
2223: }
2224: } else if ((RAW == '&') && (NXT(1) == '#')) {
2225: SKIP(2);
2226: GROW;
2227: while (RAW != ';') { /* loop blocked by count */
2228: if (count++ > 20) {
2229: count = 0;
2230: GROW;
2231: }
2232: if ((RAW >= '0') && (RAW <= '9'))
2233: val = val * 10 + (CUR - '0');
2234: else {
2235: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2236: val = 0;
2237: break;
2238: }
2239: if (val > 0x10FFFF)
2240: outofrange = val;
2241:
2242: NEXT;
2243: count++;
2244: }
2245: if (RAW == ';') {
2246: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2247: ctxt->input->col++;
2248: ctxt->nbChars ++;
2249: ctxt->input->cur++;
2250: }
2251: } else {
2252: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2253: }
2254:
2255: /*
2256: * [ WFC: Legal Character ]
2257: * Characters referred to using character references must match the
2258: * production for Char.
2259: */
2260: if ((IS_CHAR(val) && (outofrange == 0))) {
2261: return(val);
2262: } else {
2263: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2264: "xmlParseCharRef: invalid xmlChar value %d\n",
2265: val);
2266: }
2267: return(0);
2268: }
2269:
2270: /**
2271: * xmlParseStringCharRef:
2272: * @ctxt: an XML parser context
2273: * @str: a pointer to an index in the string
2274: *
2275: * parse Reference declarations, variant parsing from a string rather
2276: * than an an input flow.
2277: *
2278: * [66] CharRef ::= '&#' [0-9]+ ';' |
2279: * '&#x' [0-9a-fA-F]+ ';'
2280: *
2281: * [ WFC: Legal Character ]
2282: * Characters referred to using character references must match the
2283: * production for Char.
2284: *
2285: * Returns the value parsed (as an int), 0 in case of error, str will be
2286: * updated to the current value of the index
2287: */
2288: static int
2289: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2290: const xmlChar *ptr;
2291: xmlChar cur;
2292: unsigned int val = 0;
2293: unsigned int outofrange = 0;
2294:
2295: if ((str == NULL) || (*str == NULL)) return(0);
2296: ptr = *str;
2297: cur = *ptr;
2298: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2299: ptr += 3;
2300: cur = *ptr;
2301: while (cur != ';') { /* Non input consuming loop */
2302: if ((cur >= '0') && (cur <= '9'))
2303: val = val * 16 + (cur - '0');
2304: else if ((cur >= 'a') && (cur <= 'f'))
2305: val = val * 16 + (cur - 'a') + 10;
2306: else if ((cur >= 'A') && (cur <= 'F'))
2307: val = val * 16 + (cur - 'A') + 10;
2308: else {
2309: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310: val = 0;
2311: break;
2312: }
2313: if (val > 0x10FFFF)
2314: outofrange = val;
2315:
2316: ptr++;
2317: cur = *ptr;
2318: }
2319: if (cur == ';')
2320: ptr++;
2321: } else if ((cur == '&') && (ptr[1] == '#')){
2322: ptr += 2;
2323: cur = *ptr;
2324: while (cur != ';') { /* Non input consuming loops */
2325: if ((cur >= '0') && (cur <= '9'))
2326: val = val * 10 + (cur - '0');
2327: else {
2328: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2329: val = 0;
2330: break;
2331: }
2332: if (val > 0x10FFFF)
2333: outofrange = val;
2334:
2335: ptr++;
2336: cur = *ptr;
2337: }
2338: if (cur == ';')
2339: ptr++;
2340: } else {
2341: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2342: return(0);
2343: }
2344: *str = ptr;
2345:
2346: /*
2347: * [ WFC: Legal Character ]
2348: * Characters referred to using character references must match the
2349: * production for Char.
2350: */
2351: if ((IS_CHAR(val) && (outofrange == 0))) {
2352: return(val);
2353: } else {
2354: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2355: "xmlParseStringCharRef: invalid xmlChar value %d\n",
2356: val);
2357: }
2358: return(0);
2359: }
2360:
2361: /**
2362: * xmlNewBlanksWrapperInputStream:
2363: * @ctxt: an XML parser context
2364: * @entity: an Entity pointer
2365: *
2366: * Create a new input stream for wrapping
2367: * blanks around a PEReference
2368: *
2369: * Returns the new input stream or NULL
2370: */
2371:
2372: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2373:
2374: static xmlParserInputPtr
2375: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2376: xmlParserInputPtr input;
2377: xmlChar *buffer;
2378: size_t length;
2379: if (entity == NULL) {
2380: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2381: "xmlNewBlanksWrapperInputStream entity\n");
2382: return(NULL);
2383: }
2384: if (xmlParserDebugEntities)
2385: xmlGenericError(xmlGenericErrorContext,
2386: "new blanks wrapper for entity: %s\n", entity->name);
2387: input = xmlNewInputStream(ctxt);
2388: if (input == NULL) {
2389: return(NULL);
2390: }
2391: length = xmlStrlen(entity->name) + 5;
2392: buffer = xmlMallocAtomic(length);
2393: if (buffer == NULL) {
2394: xmlErrMemory(ctxt, NULL);
2395: xmlFree(input);
2396: return(NULL);
2397: }
2398: buffer [0] = ' ';
2399: buffer [1] = '%';
2400: buffer [length-3] = ';';
2401: buffer [length-2] = ' ';
2402: buffer [length-1] = 0;
2403: memcpy(buffer + 2, entity->name, length - 5);
2404: input->free = deallocblankswrapper;
2405: input->base = buffer;
2406: input->cur = buffer;
2407: input->length = length;
2408: input->end = &buffer[length];
2409: return(input);
2410: }
2411:
2412: /**
2413: * xmlParserHandlePEReference:
2414: * @ctxt: the parser context
2415: *
2416: * [69] PEReference ::= '%' Name ';'
2417: *
2418: * [ WFC: No Recursion ]
2419: * A parsed entity must not contain a recursive
2420: * reference to itself, either directly or indirectly.
2421: *
2422: * [ WFC: Entity Declared ]
2423: * In a document without any DTD, a document with only an internal DTD
2424: * subset which contains no parameter entity references, or a document
2425: * with "standalone='yes'", ... ... The declaration of a parameter
2426: * entity must precede any reference to it...
2427: *
2428: * [ VC: Entity Declared ]
2429: * In a document with an external subset or external parameter entities
2430: * with "standalone='no'", ... ... The declaration of a parameter entity
2431: * must precede any reference to it...
2432: *
2433: * [ WFC: In DTD ]
2434: * Parameter-entity references may only appear in the DTD.
2435: * NOTE: misleading but this is handled.
2436: *
2437: * A PEReference may have been detected in the current input stream
2438: * the handling is done accordingly to
2439: * http://www.w3.org/TR/REC-xml#entproc
2440: * i.e.
2441: * - Included in literal in entity values
2442: * - Included as Parameter Entity reference within DTDs
2443: */
2444: void
2445: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2446: const xmlChar *name;
2447: xmlEntityPtr entity = NULL;
2448: xmlParserInputPtr input;
2449:
2450: if (RAW != '%') return;
2451: switch(ctxt->instate) {
2452: case XML_PARSER_CDATA_SECTION:
2453: return;
2454: case XML_PARSER_COMMENT:
2455: return;
2456: case XML_PARSER_START_TAG:
2457: return;
2458: case XML_PARSER_END_TAG:
2459: return;
2460: case XML_PARSER_EOF:
2461: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2462: return;
2463: case XML_PARSER_PROLOG:
2464: case XML_PARSER_START:
2465: case XML_PARSER_MISC:
2466: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2467: return;
2468: case XML_PARSER_ENTITY_DECL:
2469: case XML_PARSER_CONTENT:
2470: case XML_PARSER_ATTRIBUTE_VALUE:
2471: case XML_PARSER_PI:
2472: case XML_PARSER_SYSTEM_LITERAL:
2473: case XML_PARSER_PUBLIC_LITERAL:
2474: /* we just ignore it there */
2475: return;
2476: case XML_PARSER_EPILOG:
2477: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2478: return;
2479: case XML_PARSER_ENTITY_VALUE:
2480: /*
2481: * NOTE: in the case of entity values, we don't do the
2482: * substitution here since we need the literal
2483: * entity value to be able to save the internal
2484: * subset of the document.
2485: * This will be handled by xmlStringDecodeEntities
2486: */
2487: return;
2488: case XML_PARSER_DTD:
2489: /*
2490: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2491: * In the internal DTD subset, parameter-entity references
2492: * can occur only where markup declarations can occur, not
2493: * within markup declarations.
2494: * In that case this is handled in xmlParseMarkupDecl
2495: */
2496: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2497: return;
2498: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2499: return;
2500: break;
2501: case XML_PARSER_IGNORE:
2502: return;
2503: }
2504:
2505: NEXT;
2506: name = xmlParseName(ctxt);
2507: if (xmlParserDebugEntities)
2508: xmlGenericError(xmlGenericErrorContext,
2509: "PEReference: %s\n", name);
2510: if (name == NULL) {
2511: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2512: } else {
2513: if (RAW == ';') {
2514: NEXT;
2515: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2516: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2517: if (entity == NULL) {
2518:
2519: /*
2520: * [ WFC: Entity Declared ]
2521: * In a document without any DTD, a document with only an
2522: * internal DTD subset which contains no parameter entity
2523: * references, or a document with "standalone='yes'", ...
2524: * ... The declaration of a parameter entity must precede
2525: * any reference to it...
2526: */
2527: if ((ctxt->standalone == 1) ||
2528: ((ctxt->hasExternalSubset == 0) &&
2529: (ctxt->hasPErefs == 0))) {
2530: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2531: "PEReference: %%%s; not found\n", name);
2532: } else {
2533: /*
2534: * [ VC: Entity Declared ]
2535: * In a document with an external subset or external
2536: * parameter entities with "standalone='no'", ...
2537: * ... The declaration of a parameter entity must precede
2538: * any reference to it...
2539: */
2540: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2541: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2542: "PEReference: %%%s; not found\n",
2543: name, NULL);
2544: } else
2545: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2546: "PEReference: %%%s; not found\n",
2547: name, NULL);
2548: ctxt->valid = 0;
2549: }
2550: } else if (ctxt->input->free != deallocblankswrapper) {
2551: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2552: if (xmlPushInput(ctxt, input) < 0)
2553: return;
2554: } else {
2555: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2556: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2557: xmlChar start[4];
2558: xmlCharEncoding enc;
2559:
2560: /*
2561: * handle the extra spaces added before and after
2562: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2563: * this is done independently.
2564: */
2565: input = xmlNewEntityInputStream(ctxt, entity);
2566: if (xmlPushInput(ctxt, input) < 0)
2567: return;
2568:
2569: /*
2570: * Get the 4 first bytes and decode the charset
2571: * if enc != XML_CHAR_ENCODING_NONE
2572: * plug some encoding conversion routines.
2573: * Note that, since we may have some non-UTF8
2574: * encoding (like UTF16, bug 135229), the 'length'
2575: * is not known, but we can calculate based upon
2576: * the amount of data in the buffer.
2577: */
2578: GROW
2579: if ((ctxt->input->end - ctxt->input->cur)>=4) {
2580: start[0] = RAW;
2581: start[1] = NXT(1);
2582: start[2] = NXT(2);
2583: start[3] = NXT(3);
2584: enc = xmlDetectCharEncoding(start, 4);
2585: if (enc != XML_CHAR_ENCODING_NONE) {
2586: xmlSwitchEncoding(ctxt, enc);
2587: }
2588: }
2589:
2590: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2591: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2592: (IS_BLANK_CH(NXT(5)))) {
2593: xmlParseTextDecl(ctxt);
2594: }
2595: } else {
2596: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2597: "PEReference: %s is not a parameter entity\n",
2598: name);
2599: }
2600: }
2601: } else {
2602: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2603: }
2604: }
2605: }
2606:
2607: /*
2608: * Macro used to grow the current buffer.
1.1.1.2.2.1! misho 2609: * buffer##_size is expected to be a size_t
! 2610: * mem_error: is expected to handle memory allocation failures
1.1 misho 2611: */
2612: #define growBuffer(buffer, n) { \
2613: xmlChar *tmp; \
1.1.1.2.2.1! misho 2614: size_t new_size = buffer##_size * 2 + n; \
! 2615: if (new_size < buffer##_size) goto mem_error; \
! 2616: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
1.1 misho 2617: if (tmp == NULL) goto mem_error; \
2618: buffer = tmp; \
1.1.1.2.2.1! misho 2619: buffer##_size = new_size; \
1.1 misho 2620: }
2621:
2622: /**
2623: * xmlStringLenDecodeEntities:
2624: * @ctxt: the parser context
2625: * @str: the input string
2626: * @len: the string length
2627: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2628: * @end: an end marker xmlChar, 0 if none
2629: * @end2: an end marker xmlChar, 0 if none
2630: * @end3: an end marker xmlChar, 0 if none
2631: *
2632: * Takes a entity string content and process to do the adequate substitutions.
2633: *
2634: * [67] Reference ::= EntityRef | CharRef
2635: *
2636: * [69] PEReference ::= '%' Name ';'
2637: *
2638: * Returns A newly allocated string with the substitution done. The caller
2639: * must deallocate it !
2640: */
2641: xmlChar *
2642: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2643: int what, xmlChar end, xmlChar end2, xmlChar end3) {
2644: xmlChar *buffer = NULL;
1.1.1.2.2.1! misho 2645: size_t buffer_size = 0;
! 2646: size_t nbchars = 0;
1.1 misho 2647:
2648: xmlChar *current = NULL;
2649: xmlChar *rep = NULL;
2650: const xmlChar *last;
2651: xmlEntityPtr ent;
2652: int c,l;
2653:
2654: if ((ctxt == NULL) || (str == NULL) || (len < 0))
2655: return(NULL);
2656: last = str + len;
2657:
2658: if (((ctxt->depth > 40) &&
2659: ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2660: (ctxt->depth > 1024)) {
2661: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2662: return(NULL);
2663: }
2664:
2665: /*
2666: * allocate a translation buffer.
2667: */
2668: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.1.1.2.2.1! misho 2669: buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
1.1 misho 2670: if (buffer == NULL) goto mem_error;
2671:
2672: /*
2673: * OK loop until we reach one of the ending char or a size limit.
2674: * we are operating on already parsed values.
2675: */
2676: if (str < last)
2677: c = CUR_SCHAR(str, l);
2678: else
2679: c = 0;
2680: while ((c != 0) && (c != end) && /* non input consuming loop */
2681: (c != end2) && (c != end3)) {
2682:
2683: if (c == 0) break;
2684: if ((c == '&') && (str[1] == '#')) {
2685: int val = xmlParseStringCharRef(ctxt, &str);
2686: if (val != 0) {
2687: COPY_BUF(0,buffer,nbchars,val);
2688: }
1.1.1.2.2.1! misho 2689: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2690: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2691: }
2692: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2693: if (xmlParserDebugEntities)
2694: xmlGenericError(xmlGenericErrorContext,
2695: "String decoding Entity Reference: %.30s\n",
2696: str);
2697: ent = xmlParseStringEntityRef(ctxt, &str);
2698: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2699: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2700: goto int_error;
2701: if (ent != NULL)
2702: ctxt->nbentities += ent->checked;
2703: if ((ent != NULL) &&
2704: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705: if (ent->content != NULL) {
2706: COPY_BUF(0,buffer,nbchars,ent->content[0]);
1.1.1.2.2.1! misho 2707: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2708: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2709: }
2710: } else {
2711: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712: "predefined entity has no content\n");
2713: }
2714: } else if ((ent != NULL) && (ent->content != NULL)) {
2715: ctxt->depth++;
2716: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2717: 0, 0, 0);
2718: ctxt->depth--;
2719:
2720: if (rep != NULL) {
2721: current = rep;
2722: while (*current != 0) { /* non input consuming loop */
2723: buffer[nbchars++] = *current++;
1.1.1.2.2.1! misho 2724: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2725: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2726: goto int_error;
2727: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2728: }
2729: }
2730: xmlFree(rep);
2731: rep = NULL;
2732: }
2733: } else if (ent != NULL) {
2734: int i = xmlStrlen(ent->name);
2735: const xmlChar *cur = ent->name;
2736:
2737: buffer[nbchars++] = '&';
1.1.1.2.2.1! misho 2738: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1.1.2 misho 2739: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
1.1 misho 2740: }
2741: for (;i > 0;i--)
2742: buffer[nbchars++] = *cur++;
2743: buffer[nbchars++] = ';';
2744: }
2745: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2746: if (xmlParserDebugEntities)
2747: xmlGenericError(xmlGenericErrorContext,
2748: "String decoding PE Reference: %.30s\n", str);
2749: ent = xmlParseStringPEReference(ctxt, &str);
2750: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2751: goto int_error;
2752: if (ent != NULL)
2753: ctxt->nbentities += ent->checked;
2754: if (ent != NULL) {
2755: if (ent->content == NULL) {
2756: xmlLoadEntityContent(ctxt, ent);
2757: }
2758: ctxt->depth++;
2759: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2760: 0, 0, 0);
2761: ctxt->depth--;
2762: if (rep != NULL) {
2763: current = rep;
2764: while (*current != 0) { /* non input consuming loop */
2765: buffer[nbchars++] = *current++;
1.1.1.2.2.1! misho 2766: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2767: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2768: goto int_error;
2769: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2770: }
2771: }
2772: xmlFree(rep);
2773: rep = NULL;
2774: }
2775: }
2776: } else {
2777: COPY_BUF(l,buffer,nbchars,c);
2778: str += l;
1.1.1.2.2.1! misho 2779: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
! 2780: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
1.1 misho 2781: }
2782: }
2783: if (str < last)
2784: c = CUR_SCHAR(str, l);
2785: else
2786: c = 0;
2787: }
2788: buffer[nbchars] = 0;
2789: return(buffer);
2790:
2791: mem_error:
2792: xmlErrMemory(ctxt, NULL);
2793: int_error:
2794: if (rep != NULL)
2795: xmlFree(rep);
2796: if (buffer != NULL)
2797: xmlFree(buffer);
2798: return(NULL);
2799: }
2800:
2801: /**
2802: * xmlStringDecodeEntities:
2803: * @ctxt: the parser context
2804: * @str: the input string
2805: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2806: * @end: an end marker xmlChar, 0 if none
2807: * @end2: an end marker xmlChar, 0 if none
2808: * @end3: an end marker xmlChar, 0 if none
2809: *
2810: * Takes a entity string content and process to do the adequate substitutions.
2811: *
2812: * [67] Reference ::= EntityRef | CharRef
2813: *
2814: * [69] PEReference ::= '%' Name ';'
2815: *
2816: * Returns A newly allocated string with the substitution done. The caller
2817: * must deallocate it !
2818: */
2819: xmlChar *
2820: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2821: xmlChar end, xmlChar end2, xmlChar end3) {
2822: if ((ctxt == NULL) || (str == NULL)) return(NULL);
2823: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2824: end, end2, end3));
2825: }
2826:
2827: /************************************************************************
2828: * *
2829: * Commodity functions, cleanup needed ? *
2830: * *
2831: ************************************************************************/
2832:
2833: /**
2834: * areBlanks:
2835: * @ctxt: an XML parser context
2836: * @str: a xmlChar *
2837: * @len: the size of @str
2838: * @blank_chars: we know the chars are blanks
2839: *
2840: * Is this a sequence of blank chars that one can ignore ?
2841: *
2842: * Returns 1 if ignorable 0 otherwise.
2843: */
2844:
2845: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2846: int blank_chars) {
2847: int i, ret;
2848: xmlNodePtr lastChild;
2849:
2850: /*
2851: * Don't spend time trying to differentiate them, the same callback is
2852: * used !
2853: */
2854: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2855: return(0);
2856:
2857: /*
2858: * Check for xml:space value.
2859: */
2860: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2861: (*(ctxt->space) == -2))
2862: return(0);
2863:
2864: /*
2865: * Check that the string is made of blanks
2866: */
2867: if (blank_chars == 0) {
2868: for (i = 0;i < len;i++)
2869: if (!(IS_BLANK_CH(str[i]))) return(0);
2870: }
2871:
2872: /*
2873: * Look if the element is mixed content in the DTD if available
2874: */
2875: if (ctxt->node == NULL) return(0);
2876: if (ctxt->myDoc != NULL) {
2877: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2878: if (ret == 0) return(1);
2879: if (ret == 1) return(0);
2880: }
2881:
2882: /*
2883: * Otherwise, heuristic :-\
2884: */
2885: if ((RAW != '<') && (RAW != 0xD)) return(0);
2886: if ((ctxt->node->children == NULL) &&
2887: (RAW == '<') && (NXT(1) == '/')) return(0);
2888:
2889: lastChild = xmlGetLastChild(ctxt->node);
2890: if (lastChild == NULL) {
2891: if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2892: (ctxt->node->content != NULL)) return(0);
2893: } else if (xmlNodeIsText(lastChild))
2894: return(0);
2895: else if ((ctxt->node->children != NULL) &&
2896: (xmlNodeIsText(ctxt->node->children)))
2897: return(0);
2898: return(1);
2899: }
2900:
2901: /************************************************************************
2902: * *
2903: * Extra stuff for namespace support *
2904: * Relates to http://www.w3.org/TR/WD-xml-names *
2905: * *
2906: ************************************************************************/
2907:
2908: /**
2909: * xmlSplitQName:
2910: * @ctxt: an XML parser context
2911: * @name: an XML parser context
2912: * @prefix: a xmlChar **
2913: *
2914: * parse an UTF8 encoded XML qualified name string
2915: *
2916: * [NS 5] QName ::= (Prefix ':')? LocalPart
2917: *
2918: * [NS 6] Prefix ::= NCName
2919: *
2920: * [NS 7] LocalPart ::= NCName
2921: *
2922: * Returns the local part, and prefix is updated
2923: * to get the Prefix if any.
2924: */
2925:
2926: xmlChar *
2927: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2928: xmlChar buf[XML_MAX_NAMELEN + 5];
2929: xmlChar *buffer = NULL;
2930: int len = 0;
2931: int max = XML_MAX_NAMELEN;
2932: xmlChar *ret = NULL;
2933: const xmlChar *cur = name;
2934: int c;
2935:
2936: if (prefix == NULL) return(NULL);
2937: *prefix = NULL;
2938:
2939: if (cur == NULL) return(NULL);
2940:
2941: #ifndef XML_XML_NAMESPACE
2942: /* xml: prefix is not really a namespace */
2943: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2944: (cur[2] == 'l') && (cur[3] == ':'))
2945: return(xmlStrdup(name));
2946: #endif
2947:
2948: /* nasty but well=formed */
2949: if (cur[0] == ':')
2950: return(xmlStrdup(name));
2951:
2952: c = *cur++;
2953: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2954: buf[len++] = c;
2955: c = *cur++;
2956: }
2957: if (len >= max) {
2958: /*
2959: * Okay someone managed to make a huge name, so he's ready to pay
2960: * for the processing speed.
2961: */
2962: max = len * 2;
2963:
2964: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2965: if (buffer == NULL) {
2966: xmlErrMemory(ctxt, NULL);
2967: return(NULL);
2968: }
2969: memcpy(buffer, buf, len);
2970: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2971: if (len + 10 > max) {
2972: xmlChar *tmp;
2973:
2974: max *= 2;
2975: tmp = (xmlChar *) xmlRealloc(buffer,
2976: max * sizeof(xmlChar));
2977: if (tmp == NULL) {
2978: xmlFree(buffer);
2979: xmlErrMemory(ctxt, NULL);
2980: return(NULL);
2981: }
2982: buffer = tmp;
2983: }
2984: buffer[len++] = c;
2985: c = *cur++;
2986: }
2987: buffer[len] = 0;
2988: }
2989:
2990: if ((c == ':') && (*cur == 0)) {
2991: if (buffer != NULL)
2992: xmlFree(buffer);
2993: *prefix = NULL;
2994: return(xmlStrdup(name));
2995: }
2996:
2997: if (buffer == NULL)
2998: ret = xmlStrndup(buf, len);
2999: else {
3000: ret = buffer;
3001: buffer = NULL;
3002: max = XML_MAX_NAMELEN;
3003: }
3004:
3005:
3006: if (c == ':') {
3007: c = *cur;
3008: *prefix = ret;
3009: if (c == 0) {
3010: return(xmlStrndup(BAD_CAST "", 0));
3011: }
3012: len = 0;
3013:
3014: /*
3015: * Check that the first character is proper to start
3016: * a new name
3017: */
3018: if (!(((c >= 0x61) && (c <= 0x7A)) ||
3019: ((c >= 0x41) && (c <= 0x5A)) ||
3020: (c == '_') || (c == ':'))) {
3021: int l;
3022: int first = CUR_SCHAR(cur, l);
3023:
3024: if (!IS_LETTER(first) && (first != '_')) {
3025: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3026: "Name %s is not XML Namespace compliant\n",
3027: name);
3028: }
3029: }
3030: cur++;
3031:
3032: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3033: buf[len++] = c;
3034: c = *cur++;
3035: }
3036: if (len >= max) {
3037: /*
3038: * Okay someone managed to make a huge name, so he's ready to pay
3039: * for the processing speed.
3040: */
3041: max = len * 2;
3042:
3043: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3044: if (buffer == NULL) {
3045: xmlErrMemory(ctxt, NULL);
3046: return(NULL);
3047: }
3048: memcpy(buffer, buf, len);
3049: while (c != 0) { /* tested bigname2.xml */
3050: if (len + 10 > max) {
3051: xmlChar *tmp;
3052:
3053: max *= 2;
3054: tmp = (xmlChar *) xmlRealloc(buffer,
3055: max * sizeof(xmlChar));
3056: if (tmp == NULL) {
3057: xmlErrMemory(ctxt, NULL);
3058: xmlFree(buffer);
3059: return(NULL);
3060: }
3061: buffer = tmp;
3062: }
3063: buffer[len++] = c;
3064: c = *cur++;
3065: }
3066: buffer[len] = 0;
3067: }
3068:
3069: if (buffer == NULL)
3070: ret = xmlStrndup(buf, len);
3071: else {
3072: ret = buffer;
3073: }
3074: }
3075:
3076: return(ret);
3077: }
3078:
3079: /************************************************************************
3080: * *
3081: * The parser itself *
3082: * Relates to http://www.w3.org/TR/REC-xml *
3083: * *
3084: ************************************************************************/
3085:
3086: /************************************************************************
3087: * *
3088: * Routines to parse Name, NCName and NmToken *
3089: * *
3090: ************************************************************************/
3091: #ifdef DEBUG
3092: static unsigned long nbParseName = 0;
3093: static unsigned long nbParseNmToken = 0;
3094: static unsigned long nbParseNCName = 0;
3095: static unsigned long nbParseNCNameComplex = 0;
3096: static unsigned long nbParseNameComplex = 0;
3097: static unsigned long nbParseStringName = 0;
3098: #endif
3099:
3100: /*
3101: * The two following functions are related to the change of accepted
3102: * characters for Name and NmToken in the Revision 5 of XML-1.0
3103: * They correspond to the modified production [4] and the new production [4a]
3104: * changes in that revision. Also note that the macros used for the
3105: * productions Letter, Digit, CombiningChar and Extender are not needed
3106: * anymore.
3107: * We still keep compatibility to pre-revision5 parsing semantic if the
3108: * new XML_PARSE_OLD10 option is given to the parser.
3109: */
3110: static int
3111: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3112: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3113: /*
3114: * Use the new checks of production [4] [4a] amd [5] of the
3115: * Update 5 of XML-1.0
3116: */
3117: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3118: (((c >= 'a') && (c <= 'z')) ||
3119: ((c >= 'A') && (c <= 'Z')) ||
3120: (c == '_') || (c == ':') ||
3121: ((c >= 0xC0) && (c <= 0xD6)) ||
3122: ((c >= 0xD8) && (c <= 0xF6)) ||
3123: ((c >= 0xF8) && (c <= 0x2FF)) ||
3124: ((c >= 0x370) && (c <= 0x37D)) ||
3125: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3126: ((c >= 0x200C) && (c <= 0x200D)) ||
3127: ((c >= 0x2070) && (c <= 0x218F)) ||
3128: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132: ((c >= 0x10000) && (c <= 0xEFFFF))))
3133: return(1);
3134: } else {
3135: if (IS_LETTER(c) || (c == '_') || (c == ':'))
3136: return(1);
3137: }
3138: return(0);
3139: }
3140:
3141: static int
3142: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3143: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3144: /*
3145: * Use the new checks of production [4] [4a] amd [5] of the
3146: * Update 5 of XML-1.0
3147: */
3148: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3149: (((c >= 'a') && (c <= 'z')) ||
3150: ((c >= 'A') && (c <= 'Z')) ||
3151: ((c >= '0') && (c <= '9')) || /* !start */
3152: (c == '_') || (c == ':') ||
3153: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3154: ((c >= 0xC0) && (c <= 0xD6)) ||
3155: ((c >= 0xD8) && (c <= 0xF6)) ||
3156: ((c >= 0xF8) && (c <= 0x2FF)) ||
3157: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3158: ((c >= 0x370) && (c <= 0x37D)) ||
3159: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3160: ((c >= 0x200C) && (c <= 0x200D)) ||
3161: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3162: ((c >= 0x2070) && (c <= 0x218F)) ||
3163: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3164: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3165: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3166: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3167: ((c >= 0x10000) && (c <= 0xEFFFF))))
3168: return(1);
3169: } else {
3170: if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3171: (c == '.') || (c == '-') ||
3172: (c == '_') || (c == ':') ||
3173: (IS_COMBINING(c)) ||
3174: (IS_EXTENDER(c)))
3175: return(1);
3176: }
3177: return(0);
3178: }
3179:
3180: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3181: int *len, int *alloc, int normalize);
3182:
3183: static const xmlChar *
3184: xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3185: int len = 0, l;
3186: int c;
3187: int count = 0;
3188:
3189: #ifdef DEBUG
3190: nbParseNameComplex++;
3191: #endif
3192:
3193: /*
3194: * Handler for more complex cases
3195: */
3196: GROW;
3197: c = CUR_CHAR(l);
3198: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3199: /*
3200: * Use the new checks of production [4] [4a] amd [5] of the
3201: * Update 5 of XML-1.0
3202: */
3203: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3204: (!(((c >= 'a') && (c <= 'z')) ||
3205: ((c >= 'A') && (c <= 'Z')) ||
3206: (c == '_') || (c == ':') ||
3207: ((c >= 0xC0) && (c <= 0xD6)) ||
3208: ((c >= 0xD8) && (c <= 0xF6)) ||
3209: ((c >= 0xF8) && (c <= 0x2FF)) ||
3210: ((c >= 0x370) && (c <= 0x37D)) ||
3211: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3212: ((c >= 0x200C) && (c <= 0x200D)) ||
3213: ((c >= 0x2070) && (c <= 0x218F)) ||
3214: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3215: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3216: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3217: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3218: ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3219: return(NULL);
3220: }
3221: len += l;
3222: NEXTL(l);
3223: c = CUR_CHAR(l);
3224: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3225: (((c >= 'a') && (c <= 'z')) ||
3226: ((c >= 'A') && (c <= 'Z')) ||
3227: ((c >= '0') && (c <= '9')) || /* !start */
3228: (c == '_') || (c == ':') ||
3229: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3230: ((c >= 0xC0) && (c <= 0xD6)) ||
3231: ((c >= 0xD8) && (c <= 0xF6)) ||
3232: ((c >= 0xF8) && (c <= 0x2FF)) ||
3233: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3234: ((c >= 0x370) && (c <= 0x37D)) ||
3235: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236: ((c >= 0x200C) && (c <= 0x200D)) ||
3237: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3238: ((c >= 0x2070) && (c <= 0x218F)) ||
3239: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3240: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3241: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3242: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3243: ((c >= 0x10000) && (c <= 0xEFFFF))
3244: )) {
3245: if (count++ > 100) {
3246: count = 0;
3247: GROW;
3248: }
3249: len += l;
3250: NEXTL(l);
3251: c = CUR_CHAR(l);
3252: }
3253: } else {
3254: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3255: (!IS_LETTER(c) && (c != '_') &&
3256: (c != ':'))) {
3257: return(NULL);
3258: }
3259: len += l;
3260: NEXTL(l);
3261: c = CUR_CHAR(l);
3262:
3263: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3264: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3265: (c == '.') || (c == '-') ||
3266: (c == '_') || (c == ':') ||
3267: (IS_COMBINING(c)) ||
3268: (IS_EXTENDER(c)))) {
3269: if (count++ > 100) {
3270: count = 0;
3271: GROW;
3272: }
3273: len += l;
3274: NEXTL(l);
3275: c = CUR_CHAR(l);
3276: }
3277: }
3278: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3279: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3280: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3281: }
3282:
3283: /**
3284: * xmlParseName:
3285: * @ctxt: an XML parser context
3286: *
3287: * parse an XML name.
3288: *
3289: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3290: * CombiningChar | Extender
3291: *
3292: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3293: *
3294: * [6] Names ::= Name (#x20 Name)*
3295: *
3296: * Returns the Name parsed or NULL
3297: */
3298:
3299: const xmlChar *
3300: xmlParseName(xmlParserCtxtPtr ctxt) {
3301: const xmlChar *in;
3302: const xmlChar *ret;
3303: int count = 0;
3304:
3305: GROW;
3306:
3307: #ifdef DEBUG
3308: nbParseName++;
3309: #endif
3310:
3311: /*
3312: * Accelerator for simple ASCII names
3313: */
3314: in = ctxt->input->cur;
3315: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3316: ((*in >= 0x41) && (*in <= 0x5A)) ||
3317: (*in == '_') || (*in == ':')) {
3318: in++;
3319: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3320: ((*in >= 0x41) && (*in <= 0x5A)) ||
3321: ((*in >= 0x30) && (*in <= 0x39)) ||
3322: (*in == '_') || (*in == '-') ||
3323: (*in == ':') || (*in == '.'))
3324: in++;
3325: if ((*in > 0) && (*in < 0x80)) {
3326: count = in - ctxt->input->cur;
3327: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3328: ctxt->input->cur = in;
3329: ctxt->nbChars += count;
3330: ctxt->input->col += count;
3331: if (ret == NULL)
3332: xmlErrMemory(ctxt, NULL);
3333: return(ret);
3334: }
3335: }
3336: /* accelerator for special cases */
3337: return(xmlParseNameComplex(ctxt));
3338: }
3339:
3340: static const xmlChar *
3341: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3342: int len = 0, l;
3343: int c;
3344: int count = 0;
3345:
3346: #ifdef DEBUG
3347: nbParseNCNameComplex++;
3348: #endif
3349:
3350: /*
3351: * Handler for more complex cases
3352: */
3353: GROW;
3354: c = CUR_CHAR(l);
3355: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3356: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3357: return(NULL);
3358: }
3359:
3360: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3361: (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3362: if (count++ > 100) {
3363: count = 0;
3364: GROW;
3365: }
3366: len += l;
3367: NEXTL(l);
3368: c = CUR_CHAR(l);
3369: }
3370: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3371: }
3372:
3373: /**
3374: * xmlParseNCName:
3375: * @ctxt: an XML parser context
3376: * @len: lenght of the string parsed
3377: *
3378: * parse an XML name.
3379: *
3380: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3381: * CombiningChar | Extender
3382: *
3383: * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3384: *
3385: * Returns the Name parsed or NULL
3386: */
3387:
3388: static const xmlChar *
3389: xmlParseNCName(xmlParserCtxtPtr ctxt) {
3390: const xmlChar *in;
3391: const xmlChar *ret;
3392: int count = 0;
3393:
3394: #ifdef DEBUG
3395: nbParseNCName++;
3396: #endif
3397:
3398: /*
3399: * Accelerator for simple ASCII names
3400: */
3401: in = ctxt->input->cur;
3402: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403: ((*in >= 0x41) && (*in <= 0x5A)) ||
3404: (*in == '_')) {
3405: in++;
3406: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407: ((*in >= 0x41) && (*in <= 0x5A)) ||
3408: ((*in >= 0x30) && (*in <= 0x39)) ||
3409: (*in == '_') || (*in == '-') ||
3410: (*in == '.'))
3411: in++;
3412: if ((*in > 0) && (*in < 0x80)) {
3413: count = in - ctxt->input->cur;
3414: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3415: ctxt->input->cur = in;
3416: ctxt->nbChars += count;
3417: ctxt->input->col += count;
3418: if (ret == NULL) {
3419: xmlErrMemory(ctxt, NULL);
3420: }
3421: return(ret);
3422: }
3423: }
3424: return(xmlParseNCNameComplex(ctxt));
3425: }
3426:
3427: /**
3428: * xmlParseNameAndCompare:
3429: * @ctxt: an XML parser context
3430: *
3431: * parse an XML name and compares for match
3432: * (specialized for endtag parsing)
3433: *
3434: * Returns NULL for an illegal name, (xmlChar*) 1 for success
3435: * and the name for mismatch
3436: */
3437:
3438: static const xmlChar *
3439: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3440: register const xmlChar *cmp = other;
3441: register const xmlChar *in;
3442: const xmlChar *ret;
3443:
3444: GROW;
3445:
3446: in = ctxt->input->cur;
3447: while (*in != 0 && *in == *cmp) {
3448: ++in;
3449: ++cmp;
3450: ctxt->input->col++;
3451: }
3452: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3453: /* success */
3454: ctxt->input->cur = in;
3455: return (const xmlChar*) 1;
3456: }
3457: /* failure (or end of input buffer), check with full function */
3458: ret = xmlParseName (ctxt);
3459: /* strings coming from the dictionnary direct compare possible */
3460: if (ret == other) {
3461: return (const xmlChar*) 1;
3462: }
3463: return ret;
3464: }
3465:
3466: /**
3467: * xmlParseStringName:
3468: * @ctxt: an XML parser context
3469: * @str: a pointer to the string pointer (IN/OUT)
3470: *
3471: * parse an XML name.
3472: *
3473: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3474: * CombiningChar | Extender
3475: *
3476: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3477: *
3478: * [6] Names ::= Name (#x20 Name)*
3479: *
3480: * Returns the Name parsed or NULL. The @str pointer
3481: * is updated to the current location in the string.
3482: */
3483:
3484: static xmlChar *
3485: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3486: xmlChar buf[XML_MAX_NAMELEN + 5];
3487: const xmlChar *cur = *str;
3488: int len = 0, l;
3489: int c;
3490:
3491: #ifdef DEBUG
3492: nbParseStringName++;
3493: #endif
3494:
3495: c = CUR_SCHAR(cur, l);
3496: if (!xmlIsNameStartChar(ctxt, c)) {
3497: return(NULL);
3498: }
3499:
3500: COPY_BUF(l,buf,len,c);
3501: cur += l;
3502: c = CUR_SCHAR(cur, l);
3503: while (xmlIsNameChar(ctxt, c)) {
3504: COPY_BUF(l,buf,len,c);
3505: cur += l;
3506: c = CUR_SCHAR(cur, l);
3507: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3508: /*
3509: * Okay someone managed to make a huge name, so he's ready to pay
3510: * for the processing speed.
3511: */
3512: xmlChar *buffer;
3513: int max = len * 2;
3514:
3515: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3516: if (buffer == NULL) {
3517: xmlErrMemory(ctxt, NULL);
3518: return(NULL);
3519: }
3520: memcpy(buffer, buf, len);
3521: while (xmlIsNameChar(ctxt, c)) {
3522: if (len + 10 > max) {
3523: xmlChar *tmp;
3524: max *= 2;
3525: tmp = (xmlChar *) xmlRealloc(buffer,
3526: max * sizeof(xmlChar));
3527: if (tmp == NULL) {
3528: xmlErrMemory(ctxt, NULL);
3529: xmlFree(buffer);
3530: return(NULL);
3531: }
3532: buffer = tmp;
3533: }
3534: COPY_BUF(l,buffer,len,c);
3535: cur += l;
3536: c = CUR_SCHAR(cur, l);
3537: }
3538: buffer[len] = 0;
3539: *str = cur;
3540: return(buffer);
3541: }
3542: }
3543: *str = cur;
3544: return(xmlStrndup(buf, len));
3545: }
3546:
3547: /**
3548: * xmlParseNmtoken:
3549: * @ctxt: an XML parser context
3550: *
3551: * parse an XML Nmtoken.
3552: *
3553: * [7] Nmtoken ::= (NameChar)+
3554: *
3555: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3556: *
3557: * Returns the Nmtoken parsed or NULL
3558: */
3559:
3560: xmlChar *
3561: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3562: xmlChar buf[XML_MAX_NAMELEN + 5];
3563: int len = 0, l;
3564: int c;
3565: int count = 0;
3566:
3567: #ifdef DEBUG
3568: nbParseNmToken++;
3569: #endif
3570:
3571: GROW;
3572: c = CUR_CHAR(l);
3573:
3574: while (xmlIsNameChar(ctxt, c)) {
3575: if (count++ > 100) {
3576: count = 0;
3577: GROW;
3578: }
3579: COPY_BUF(l,buf,len,c);
3580: NEXTL(l);
3581: c = CUR_CHAR(l);
3582: if (len >= XML_MAX_NAMELEN) {
3583: /*
3584: * Okay someone managed to make a huge token, so he's ready to pay
3585: * for the processing speed.
3586: */
3587: xmlChar *buffer;
3588: int max = len * 2;
3589:
3590: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3591: if (buffer == NULL) {
3592: xmlErrMemory(ctxt, NULL);
3593: return(NULL);
3594: }
3595: memcpy(buffer, buf, len);
3596: while (xmlIsNameChar(ctxt, c)) {
3597: if (count++ > 100) {
3598: count = 0;
3599: GROW;
3600: }
3601: if (len + 10 > max) {
3602: xmlChar *tmp;
3603:
3604: max *= 2;
3605: tmp = (xmlChar *) xmlRealloc(buffer,
3606: max * sizeof(xmlChar));
3607: if (tmp == NULL) {
3608: xmlErrMemory(ctxt, NULL);
3609: xmlFree(buffer);
3610: return(NULL);
3611: }
3612: buffer = tmp;
3613: }
3614: COPY_BUF(l,buffer,len,c);
3615: NEXTL(l);
3616: c = CUR_CHAR(l);
3617: }
3618: buffer[len] = 0;
3619: return(buffer);
3620: }
3621: }
3622: if (len == 0)
3623: return(NULL);
3624: return(xmlStrndup(buf, len));
3625: }
3626:
3627: /**
3628: * xmlParseEntityValue:
3629: * @ctxt: an XML parser context
3630: * @orig: if non-NULL store a copy of the original entity value
3631: *
3632: * parse a value for ENTITY declarations
3633: *
3634: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3635: * "'" ([^%&'] | PEReference | Reference)* "'"
3636: *
3637: * Returns the EntityValue parsed with reference substituted or NULL
3638: */
3639:
3640: xmlChar *
3641: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3642: xmlChar *buf = NULL;
3643: int len = 0;
3644: int size = XML_PARSER_BUFFER_SIZE;
3645: int c, l;
3646: xmlChar stop;
3647: xmlChar *ret = NULL;
3648: const xmlChar *cur = NULL;
3649: xmlParserInputPtr input;
3650:
3651: if (RAW == '"') stop = '"';
3652: else if (RAW == '\'') stop = '\'';
3653: else {
3654: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3655: return(NULL);
3656: }
3657: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3658: if (buf == NULL) {
3659: xmlErrMemory(ctxt, NULL);
3660: return(NULL);
3661: }
3662:
3663: /*
3664: * The content of the entity definition is copied in a buffer.
3665: */
3666:
3667: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3668: input = ctxt->input;
3669: GROW;
3670: NEXT;
3671: c = CUR_CHAR(l);
3672: /*
3673: * NOTE: 4.4.5 Included in Literal
3674: * When a parameter entity reference appears in a literal entity
3675: * value, ... a single or double quote character in the replacement
3676: * text is always treated as a normal data character and will not
3677: * terminate the literal.
3678: * In practice it means we stop the loop only when back at parsing
3679: * the initial entity and the quote is found
3680: */
3681: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3682: (ctxt->input != input))) {
3683: if (len + 5 >= size) {
3684: xmlChar *tmp;
3685:
3686: size *= 2;
3687: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3688: if (tmp == NULL) {
3689: xmlErrMemory(ctxt, NULL);
3690: xmlFree(buf);
3691: return(NULL);
3692: }
3693: buf = tmp;
3694: }
3695: COPY_BUF(l,buf,len,c);
3696: NEXTL(l);
3697: /*
3698: * Pop-up of finished entities.
3699: */
3700: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3701: xmlPopInput(ctxt);
3702:
3703: GROW;
3704: c = CUR_CHAR(l);
3705: if (c == 0) {
3706: GROW;
3707: c = CUR_CHAR(l);
3708: }
3709: }
3710: buf[len] = 0;
3711:
3712: /*
3713: * Raise problem w.r.t. '&' and '%' being used in non-entities
3714: * reference constructs. Note Charref will be handled in
3715: * xmlStringDecodeEntities()
3716: */
3717: cur = buf;
3718: while (*cur != 0) { /* non input consuming */
3719: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3720: xmlChar *name;
3721: xmlChar tmp = *cur;
3722:
3723: cur++;
3724: name = xmlParseStringName(ctxt, &cur);
3725: if ((name == NULL) || (*cur != ';')) {
3726: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3727: "EntityValue: '%c' forbidden except for entities references\n",
3728: tmp);
3729: }
3730: if ((tmp == '%') && (ctxt->inSubset == 1) &&
3731: (ctxt->inputNr == 1)) {
3732: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3733: }
3734: if (name != NULL)
3735: xmlFree(name);
3736: if (*cur == 0)
3737: break;
3738: }
3739: cur++;
3740: }
3741:
3742: /*
3743: * Then PEReference entities are substituted.
3744: */
3745: if (c != stop) {
3746: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3747: xmlFree(buf);
3748: } else {
3749: NEXT;
3750: /*
3751: * NOTE: 4.4.7 Bypassed
3752: * When a general entity reference appears in the EntityValue in
3753: * an entity declaration, it is bypassed and left as is.
3754: * so XML_SUBSTITUTE_REF is not set here.
3755: */
3756: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3757: 0, 0, 0);
3758: if (orig != NULL)
3759: *orig = buf;
3760: else
3761: xmlFree(buf);
3762: }
3763:
3764: return(ret);
3765: }
3766:
3767: /**
3768: * xmlParseAttValueComplex:
3769: * @ctxt: an XML parser context
3770: * @len: the resulting attribute len
3771: * @normalize: wether to apply the inner normalization
3772: *
3773: * parse a value for an attribute, this is the fallback function
3774: * of xmlParseAttValue() when the attribute parsing requires handling
3775: * of non-ASCII characters, or normalization compaction.
3776: *
3777: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3778: */
3779: static xmlChar *
3780: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3781: xmlChar limit = 0;
3782: xmlChar *buf = NULL;
3783: xmlChar *rep = NULL;
1.1.1.2.2.1! misho 3784: size_t len = 0;
! 3785: size_t buf_size = 0;
1.1 misho 3786: int c, l, in_space = 0;
3787: xmlChar *current = NULL;
3788: xmlEntityPtr ent;
3789:
3790: if (NXT(0) == '"') {
3791: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3792: limit = '"';
3793: NEXT;
3794: } else if (NXT(0) == '\'') {
3795: limit = '\'';
3796: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3797: NEXT;
3798: } else {
3799: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3800: return(NULL);
3801: }
3802:
3803: /*
3804: * allocate a translation buffer.
3805: */
3806: buf_size = XML_PARSER_BUFFER_SIZE;
1.1.1.2.2.1! misho 3807: buf = (xmlChar *) xmlMallocAtomic(buf_size);
1.1 misho 3808: if (buf == NULL) goto mem_error;
3809:
3810: /*
3811: * OK loop until we reach one of the ending char or a size limit.
3812: */
3813: c = CUR_CHAR(l);
3814: while ((NXT(0) != limit) && /* checked */
3815: (IS_CHAR(c)) && (c != '<')) {
3816: if (c == 0) break;
3817: if (c == '&') {
3818: in_space = 0;
3819: if (NXT(1) == '#') {
3820: int val = xmlParseCharRef(ctxt);
3821:
3822: if (val == '&') {
3823: if (ctxt->replaceEntities) {
1.1.1.2.2.1! misho 3824: if (len + 10 > buf_size) {
1.1 misho 3825: growBuffer(buf, 10);
3826: }
3827: buf[len++] = '&';
3828: } else {
3829: /*
3830: * The reparsing will be done in xmlStringGetNodeList()
3831: * called by the attribute() function in SAX.c
3832: */
1.1.1.2.2.1! misho 3833: if (len + 10 > buf_size) {
1.1 misho 3834: growBuffer(buf, 10);
3835: }
3836: buf[len++] = '&';
3837: buf[len++] = '#';
3838: buf[len++] = '3';
3839: buf[len++] = '8';
3840: buf[len++] = ';';
3841: }
3842: } else if (val != 0) {
1.1.1.2.2.1! misho 3843: if (len + 10 > buf_size) {
1.1 misho 3844: growBuffer(buf, 10);
3845: }
3846: len += xmlCopyChar(0, &buf[len], val);
3847: }
3848: } else {
3849: ent = xmlParseEntityRef(ctxt);
3850: ctxt->nbentities++;
3851: if (ent != NULL)
3852: ctxt->nbentities += ent->owner;
3853: if ((ent != NULL) &&
3854: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.1.1.2.2.1! misho 3855: if (len + 10 > buf_size) {
1.1 misho 3856: growBuffer(buf, 10);
3857: }
3858: if ((ctxt->replaceEntities == 0) &&
3859: (ent->content[0] == '&')) {
3860: buf[len++] = '&';
3861: buf[len++] = '#';
3862: buf[len++] = '3';
3863: buf[len++] = '8';
3864: buf[len++] = ';';
3865: } else {
3866: buf[len++] = ent->content[0];
3867: }
3868: } else if ((ent != NULL) &&
3869: (ctxt->replaceEntities != 0)) {
3870: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3871: rep = xmlStringDecodeEntities(ctxt, ent->content,
3872: XML_SUBSTITUTE_REF,
3873: 0, 0, 0);
3874: if (rep != NULL) {
3875: current = rep;
3876: while (*current != 0) { /* non input consuming */
3877: if ((*current == 0xD) || (*current == 0xA) ||
3878: (*current == 0x9)) {
3879: buf[len++] = 0x20;
3880: current++;
3881: } else
3882: buf[len++] = *current++;
1.1.1.2.2.1! misho 3883: if (len + 10 > buf_size) {
1.1 misho 3884: growBuffer(buf, 10);
3885: }
3886: }
3887: xmlFree(rep);
3888: rep = NULL;
3889: }
3890: } else {
1.1.1.2.2.1! misho 3891: if (len + 10 > buf_size) {
1.1 misho 3892: growBuffer(buf, 10);
3893: }
3894: if (ent->content != NULL)
3895: buf[len++] = ent->content[0];
3896: }
3897: } else if (ent != NULL) {
3898: int i = xmlStrlen(ent->name);
3899: const xmlChar *cur = ent->name;
3900:
3901: /*
3902: * This may look absurd but is needed to detect
3903: * entities problems
3904: */
3905: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3906: (ent->content != NULL)) {
3907: rep = xmlStringDecodeEntities(ctxt, ent->content,
3908: XML_SUBSTITUTE_REF, 0, 0, 0);
3909: if (rep != NULL) {
3910: xmlFree(rep);
3911: rep = NULL;
3912: }
3913: }
3914:
3915: /*
3916: * Just output the reference
3917: */
3918: buf[len++] = '&';
1.1.1.2.2.1! misho 3919: while (len + i + 10 > buf_size) {
1.1 misho 3920: growBuffer(buf, i + 10);
3921: }
3922: for (;i > 0;i--)
3923: buf[len++] = *cur++;
3924: buf[len++] = ';';
3925: }
3926: }
3927: } else {
3928: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3929: if ((len != 0) || (!normalize)) {
3930: if ((!normalize) || (!in_space)) {
3931: COPY_BUF(l,buf,len,0x20);
1.1.1.2.2.1! misho 3932: while (len + 10 > buf_size) {
1.1 misho 3933: growBuffer(buf, 10);
3934: }
3935: }
3936: in_space = 1;
3937: }
3938: } else {
3939: in_space = 0;
3940: COPY_BUF(l,buf,len,c);
1.1.1.2.2.1! misho 3941: if (len + 10 > buf_size) {
1.1 misho 3942: growBuffer(buf, 10);
3943: }
3944: }
3945: NEXTL(l);
3946: }
3947: GROW;
3948: c = CUR_CHAR(l);
3949: }
3950: if ((in_space) && (normalize)) {
1.1.1.2.2.1! misho 3951: while ((len > 0) && (buf[len - 1] == 0x20)) len--;
1.1 misho 3952: }
3953: buf[len] = 0;
3954: if (RAW == '<') {
3955: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3956: } else if (RAW != limit) {
3957: if ((c != 0) && (!IS_CHAR(c))) {
3958: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3959: "invalid character in attribute value\n");
3960: } else {
3961: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3962: "AttValue: ' expected\n");
3963: }
3964: } else
3965: NEXT;
1.1.1.2.2.1! misho 3966:
! 3967: /*
! 3968: * There we potentially risk an overflow, don't allow attribute value of
! 3969: * lenght more than INT_MAX it is a very reasonnable assumption !
! 3970: */
! 3971: if (len >= INT_MAX) {
! 3972: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
! 3973: "AttValue lenght too long\n");
! 3974: goto mem_error;
! 3975: }
! 3976:
! 3977: if (attlen != NULL) *attlen = (int) len;
1.1 misho 3978: return(buf);
3979:
3980: mem_error:
3981: xmlErrMemory(ctxt, NULL);
3982: if (buf != NULL)
3983: xmlFree(buf);
3984: if (rep != NULL)
3985: xmlFree(rep);
3986: return(NULL);
3987: }
3988:
3989: /**
3990: * xmlParseAttValue:
3991: * @ctxt: an XML parser context
3992: *
3993: * parse a value for an attribute
3994: * Note: the parser won't do substitution of entities here, this
3995: * will be handled later in xmlStringGetNodeList
3996: *
3997: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3998: * "'" ([^<&'] | Reference)* "'"
3999: *
4000: * 3.3.3 Attribute-Value Normalization:
4001: * Before the value of an attribute is passed to the application or
4002: * checked for validity, the XML processor must normalize it as follows:
4003: * - a character reference is processed by appending the referenced
4004: * character to the attribute value
4005: * - an entity reference is processed by recursively processing the
4006: * replacement text of the entity
4007: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4008: * appending #x20 to the normalized value, except that only a single
4009: * #x20 is appended for a "#xD#xA" sequence that is part of an external
4010: * parsed entity or the literal entity value of an internal parsed entity
4011: * - other characters are processed by appending them to the normalized value
4012: * If the declared value is not CDATA, then the XML processor must further
4013: * process the normalized attribute value by discarding any leading and
4014: * trailing space (#x20) characters, and by replacing sequences of space
4015: * (#x20) characters by a single space (#x20) character.
4016: * All attributes for which no declaration has been read should be treated
4017: * by a non-validating parser as if declared CDATA.
4018: *
4019: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4020: */
4021:
4022:
4023: xmlChar *
4024: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4025: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4026: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4027: }
4028:
4029: /**
4030: * xmlParseSystemLiteral:
4031: * @ctxt: an XML parser context
4032: *
4033: * parse an XML Literal
4034: *
4035: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4036: *
4037: * Returns the SystemLiteral parsed or NULL
4038: */
4039:
4040: xmlChar *
4041: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4042: xmlChar *buf = NULL;
4043: int len = 0;
4044: int size = XML_PARSER_BUFFER_SIZE;
4045: int cur, l;
4046: xmlChar stop;
4047: int state = ctxt->instate;
4048: int count = 0;
4049:
4050: SHRINK;
4051: if (RAW == '"') {
4052: NEXT;
4053: stop = '"';
4054: } else if (RAW == '\'') {
4055: NEXT;
4056: stop = '\'';
4057: } else {
4058: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4059: return(NULL);
4060: }
4061:
4062: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4063: if (buf == NULL) {
4064: xmlErrMemory(ctxt, NULL);
4065: return(NULL);
4066: }
4067: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4068: cur = CUR_CHAR(l);
4069: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4070: if (len + 5 >= size) {
4071: xmlChar *tmp;
4072:
4073: size *= 2;
4074: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4075: if (tmp == NULL) {
4076: xmlFree(buf);
4077: xmlErrMemory(ctxt, NULL);
4078: ctxt->instate = (xmlParserInputState) state;
4079: return(NULL);
4080: }
4081: buf = tmp;
4082: }
4083: count++;
4084: if (count > 50) {
4085: GROW;
4086: count = 0;
4087: }
4088: COPY_BUF(l,buf,len,cur);
4089: NEXTL(l);
4090: cur = CUR_CHAR(l);
4091: if (cur == 0) {
4092: GROW;
4093: SHRINK;
4094: cur = CUR_CHAR(l);
4095: }
4096: }
4097: buf[len] = 0;
4098: ctxt->instate = (xmlParserInputState) state;
4099: if (!IS_CHAR(cur)) {
4100: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4101: } else {
4102: NEXT;
4103: }
4104: return(buf);
4105: }
4106:
4107: /**
4108: * xmlParsePubidLiteral:
4109: * @ctxt: an XML parser context
4110: *
4111: * parse an XML public literal
4112: *
4113: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4114: *
4115: * Returns the PubidLiteral parsed or NULL.
4116: */
4117:
4118: xmlChar *
4119: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4120: xmlChar *buf = NULL;
4121: int len = 0;
4122: int size = XML_PARSER_BUFFER_SIZE;
4123: xmlChar cur;
4124: xmlChar stop;
4125: int count = 0;
4126: xmlParserInputState oldstate = ctxt->instate;
4127:
4128: SHRINK;
4129: if (RAW == '"') {
4130: NEXT;
4131: stop = '"';
4132: } else if (RAW == '\'') {
4133: NEXT;
4134: stop = '\'';
4135: } else {
4136: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4137: return(NULL);
4138: }
4139: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4140: if (buf == NULL) {
4141: xmlErrMemory(ctxt, NULL);
4142: return(NULL);
4143: }
4144: ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4145: cur = CUR;
4146: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4147: if (len + 1 >= size) {
4148: xmlChar *tmp;
4149:
4150: size *= 2;
4151: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4152: if (tmp == NULL) {
4153: xmlErrMemory(ctxt, NULL);
4154: xmlFree(buf);
4155: return(NULL);
4156: }
4157: buf = tmp;
4158: }
4159: buf[len++] = cur;
4160: count++;
4161: if (count > 50) {
4162: GROW;
4163: count = 0;
4164: }
4165: NEXT;
4166: cur = CUR;
4167: if (cur == 0) {
4168: GROW;
4169: SHRINK;
4170: cur = CUR;
4171: }
4172: }
4173: buf[len] = 0;
4174: if (cur != stop) {
4175: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4176: } else {
4177: NEXT;
4178: }
4179: ctxt->instate = oldstate;
4180: return(buf);
4181: }
4182:
4183: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4184:
4185: /*
4186: * used for the test in the inner loop of the char data testing
4187: */
4188: static const unsigned char test_char_data[256] = {
4189: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4191: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4193: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4194: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4195: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4196: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4197: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4198: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4199: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4200: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4201: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4202: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4203: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4204: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4205: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4206: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4207: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4208: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4209: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4210: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4211: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4212: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4213: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4214: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4215: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4216: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4217: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4218: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4219: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4220: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4221: };
4222:
4223: /**
4224: * xmlParseCharData:
4225: * @ctxt: an XML parser context
4226: * @cdata: int indicating whether we are within a CDATA section
4227: *
4228: * parse a CharData section.
4229: * if we are within a CDATA section ']]>' marks an end of section.
4230: *
4231: * The right angle bracket (>) may be represented using the string ">",
4232: * and must, for compatibility, be escaped using ">" or a character
4233: * reference when it appears in the string "]]>" in content, when that
4234: * string is not marking the end of a CDATA section.
4235: *
4236: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4237: */
4238:
4239: void
4240: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4241: const xmlChar *in;
4242: int nbchar = 0;
4243: int line = ctxt->input->line;
4244: int col = ctxt->input->col;
4245: int ccol;
4246:
4247: SHRINK;
4248: GROW;
4249: /*
4250: * Accelerated common case where input don't need to be
4251: * modified before passing it to the handler.
4252: */
4253: if (!cdata) {
4254: in = ctxt->input->cur;
4255: do {
4256: get_more_space:
4257: while (*in == 0x20) { in++; ctxt->input->col++; }
4258: if (*in == 0xA) {
4259: do {
4260: ctxt->input->line++; ctxt->input->col = 1;
4261: in++;
4262: } while (*in == 0xA);
4263: goto get_more_space;
4264: }
4265: if (*in == '<') {
4266: nbchar = in - ctxt->input->cur;
4267: if (nbchar > 0) {
4268: const xmlChar *tmp = ctxt->input->cur;
4269: ctxt->input->cur = in;
4270:
4271: if ((ctxt->sax != NULL) &&
4272: (ctxt->sax->ignorableWhitespace !=
4273: ctxt->sax->characters)) {
4274: if (areBlanks(ctxt, tmp, nbchar, 1)) {
4275: if (ctxt->sax->ignorableWhitespace != NULL)
4276: ctxt->sax->ignorableWhitespace(ctxt->userData,
4277: tmp, nbchar);
4278: } else {
4279: if (ctxt->sax->characters != NULL)
4280: ctxt->sax->characters(ctxt->userData,
4281: tmp, nbchar);
4282: if (*ctxt->space == -1)
4283: *ctxt->space = -2;
4284: }
4285: } else if ((ctxt->sax != NULL) &&
4286: (ctxt->sax->characters != NULL)) {
4287: ctxt->sax->characters(ctxt->userData,
4288: tmp, nbchar);
4289: }
4290: }
4291: return;
4292: }
4293:
4294: get_more:
4295: ccol = ctxt->input->col;
4296: while (test_char_data[*in]) {
4297: in++;
4298: ccol++;
4299: }
4300: ctxt->input->col = ccol;
4301: if (*in == 0xA) {
4302: do {
4303: ctxt->input->line++; ctxt->input->col = 1;
4304: in++;
4305: } while (*in == 0xA);
4306: goto get_more;
4307: }
4308: if (*in == ']') {
4309: if ((in[1] == ']') && (in[2] == '>')) {
4310: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4311: ctxt->input->cur = in;
4312: return;
4313: }
4314: in++;
4315: ctxt->input->col++;
4316: goto get_more;
4317: }
4318: nbchar = in - ctxt->input->cur;
4319: if (nbchar > 0) {
4320: if ((ctxt->sax != NULL) &&
4321: (ctxt->sax->ignorableWhitespace !=
4322: ctxt->sax->characters) &&
4323: (IS_BLANK_CH(*ctxt->input->cur))) {
4324: const xmlChar *tmp = ctxt->input->cur;
4325: ctxt->input->cur = in;
4326:
4327: if (areBlanks(ctxt, tmp, nbchar, 0)) {
4328: if (ctxt->sax->ignorableWhitespace != NULL)
4329: ctxt->sax->ignorableWhitespace(ctxt->userData,
4330: tmp, nbchar);
4331: } else {
4332: if (ctxt->sax->characters != NULL)
4333: ctxt->sax->characters(ctxt->userData,
4334: tmp, nbchar);
4335: if (*ctxt->space == -1)
4336: *ctxt->space = -2;
4337: }
4338: line = ctxt->input->line;
4339: col = ctxt->input->col;
4340: } else if (ctxt->sax != NULL) {
4341: if (ctxt->sax->characters != NULL)
4342: ctxt->sax->characters(ctxt->userData,
4343: ctxt->input->cur, nbchar);
4344: line = ctxt->input->line;
4345: col = ctxt->input->col;
4346: }
4347: /* something really bad happened in the SAX callback */
4348: if (ctxt->instate != XML_PARSER_CONTENT)
4349: return;
4350: }
4351: ctxt->input->cur = in;
4352: if (*in == 0xD) {
4353: in++;
4354: if (*in == 0xA) {
4355: ctxt->input->cur = in;
4356: in++;
4357: ctxt->input->line++; ctxt->input->col = 1;
4358: continue; /* while */
4359: }
4360: in--;
4361: }
4362: if (*in == '<') {
4363: return;
4364: }
4365: if (*in == '&') {
4366: return;
4367: }
4368: SHRINK;
4369: GROW;
4370: in = ctxt->input->cur;
4371: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4372: nbchar = 0;
4373: }
4374: ctxt->input->line = line;
4375: ctxt->input->col = col;
4376: xmlParseCharDataComplex(ctxt, cdata);
4377: }
4378:
4379: /**
4380: * xmlParseCharDataComplex:
4381: * @ctxt: an XML parser context
4382: * @cdata: int indicating whether we are within a CDATA section
4383: *
4384: * parse a CharData section.this is the fallback function
4385: * of xmlParseCharData() when the parsing requires handling
4386: * of non-ASCII characters.
4387: */
4388: static void
4389: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4390: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4391: int nbchar = 0;
4392: int cur, l;
4393: int count = 0;
4394:
4395: SHRINK;
4396: GROW;
4397: cur = CUR_CHAR(l);
4398: while ((cur != '<') && /* checked */
4399: (cur != '&') &&
4400: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4401: if ((cur == ']') && (NXT(1) == ']') &&
4402: (NXT(2) == '>')) {
4403: if (cdata) break;
4404: else {
4405: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4406: }
4407: }
4408: COPY_BUF(l,buf,nbchar,cur);
4409: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4410: buf[nbchar] = 0;
4411:
4412: /*
4413: * OK the segment is to be consumed as chars.
4414: */
4415: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4416: if (areBlanks(ctxt, buf, nbchar, 0)) {
4417: if (ctxt->sax->ignorableWhitespace != NULL)
4418: ctxt->sax->ignorableWhitespace(ctxt->userData,
4419: buf, nbchar);
4420: } else {
4421: if (ctxt->sax->characters != NULL)
4422: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4423: if ((ctxt->sax->characters !=
4424: ctxt->sax->ignorableWhitespace) &&
4425: (*ctxt->space == -1))
4426: *ctxt->space = -2;
4427: }
4428: }
4429: nbchar = 0;
4430: /* something really bad happened in the SAX callback */
4431: if (ctxt->instate != XML_PARSER_CONTENT)
4432: return;
4433: }
4434: count++;
4435: if (count > 50) {
4436: GROW;
4437: count = 0;
4438: }
4439: NEXTL(l);
4440: cur = CUR_CHAR(l);
4441: }
4442: if (nbchar != 0) {
4443: buf[nbchar] = 0;
4444: /*
4445: * OK the segment is to be consumed as chars.
4446: */
4447: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4448: if (areBlanks(ctxt, buf, nbchar, 0)) {
4449: if (ctxt->sax->ignorableWhitespace != NULL)
4450: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4451: } else {
4452: if (ctxt->sax->characters != NULL)
4453: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4454: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4455: (*ctxt->space == -1))
4456: *ctxt->space = -2;
4457: }
4458: }
4459: }
4460: if ((cur != 0) && (!IS_CHAR(cur))) {
4461: /* Generate the error and skip the offending character */
4462: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4463: "PCDATA invalid Char value %d\n",
4464: cur);
4465: NEXTL(l);
4466: }
4467: }
4468:
4469: /**
4470: * xmlParseExternalID:
4471: * @ctxt: an XML parser context
4472: * @publicID: a xmlChar** receiving PubidLiteral
4473: * @strict: indicate whether we should restrict parsing to only
4474: * production [75], see NOTE below
4475: *
4476: * Parse an External ID or a Public ID
4477: *
4478: * NOTE: Productions [75] and [83] interact badly since [75] can generate
4479: * 'PUBLIC' S PubidLiteral S SystemLiteral
4480: *
4481: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4482: * | 'PUBLIC' S PubidLiteral S SystemLiteral
4483: *
4484: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4485: *
4486: * Returns the function returns SystemLiteral and in the second
4487: * case publicID receives PubidLiteral, is strict is off
4488: * it is possible to return NULL and have publicID set.
4489: */
4490:
4491: xmlChar *
4492: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4493: xmlChar *URI = NULL;
4494:
4495: SHRINK;
4496:
4497: *publicID = NULL;
4498: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4499: SKIP(6);
4500: if (!IS_BLANK_CH(CUR)) {
4501: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4502: "Space required after 'SYSTEM'\n");
4503: }
4504: SKIP_BLANKS;
4505: URI = xmlParseSystemLiteral(ctxt);
4506: if (URI == NULL) {
4507: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4508: }
4509: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4510: SKIP(6);
4511: if (!IS_BLANK_CH(CUR)) {
4512: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4513: "Space required after 'PUBLIC'\n");
4514: }
4515: SKIP_BLANKS;
4516: *publicID = xmlParsePubidLiteral(ctxt);
4517: if (*publicID == NULL) {
4518: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4519: }
4520: if (strict) {
4521: /*
4522: * We don't handle [83] so "S SystemLiteral" is required.
4523: */
4524: if (!IS_BLANK_CH(CUR)) {
4525: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4526: "Space required after the Public Identifier\n");
4527: }
4528: } else {
4529: /*
4530: * We handle [83] so we return immediately, if
4531: * "S SystemLiteral" is not detected. From a purely parsing
4532: * point of view that's a nice mess.
4533: */
4534: const xmlChar *ptr;
4535: GROW;
4536:
4537: ptr = CUR_PTR;
4538: if (!IS_BLANK_CH(*ptr)) return(NULL);
4539:
4540: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4541: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4542: }
4543: SKIP_BLANKS;
4544: URI = xmlParseSystemLiteral(ctxt);
4545: if (URI == NULL) {
4546: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4547: }
4548: }
4549: return(URI);
4550: }
4551:
4552: /**
4553: * xmlParseCommentComplex:
4554: * @ctxt: an XML parser context
4555: * @buf: the already parsed part of the buffer
4556: * @len: number of bytes filles in the buffer
4557: * @size: allocated size of the buffer
4558: *
4559: * Skip an XML (SGML) comment <!-- .... -->
4560: * The spec says that "For compatibility, the string "--" (double-hyphen)
4561: * must not occur within comments. "
4562: * This is the slow routine in case the accelerator for ascii didn't work
4563: *
4564: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4565: */
4566: static void
4567: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4568: int q, ql;
4569: int r, rl;
4570: int cur, l;
4571: int count = 0;
4572: int inputid;
4573:
4574: inputid = ctxt->input->id;
4575:
4576: if (buf == NULL) {
4577: len = 0;
4578: size = XML_PARSER_BUFFER_SIZE;
4579: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4580: if (buf == NULL) {
4581: xmlErrMemory(ctxt, NULL);
4582: return;
4583: }
4584: }
4585: GROW; /* Assure there's enough input data */
4586: q = CUR_CHAR(ql);
4587: if (q == 0)
4588: goto not_terminated;
4589: if (!IS_CHAR(q)) {
4590: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4591: "xmlParseComment: invalid xmlChar value %d\n",
4592: q);
4593: xmlFree (buf);
4594: return;
4595: }
4596: NEXTL(ql);
4597: r = CUR_CHAR(rl);
4598: if (r == 0)
4599: goto not_terminated;
4600: if (!IS_CHAR(r)) {
4601: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4602: "xmlParseComment: invalid xmlChar value %d\n",
4603: q);
4604: xmlFree (buf);
4605: return;
4606: }
4607: NEXTL(rl);
4608: cur = CUR_CHAR(l);
4609: if (cur == 0)
4610: goto not_terminated;
4611: while (IS_CHAR(cur) && /* checked */
4612: ((cur != '>') ||
4613: (r != '-') || (q != '-'))) {
4614: if ((r == '-') && (q == '-')) {
4615: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4616: }
4617: if (len + 5 >= size) {
4618: xmlChar *new_buf;
4619: size *= 2;
4620: new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4621: if (new_buf == NULL) {
4622: xmlFree (buf);
4623: xmlErrMemory(ctxt, NULL);
4624: return;
4625: }
4626: buf = new_buf;
4627: }
4628: COPY_BUF(ql,buf,len,q);
4629: q = r;
4630: ql = rl;
4631: r = cur;
4632: rl = l;
4633:
4634: count++;
4635: if (count > 50) {
4636: GROW;
4637: count = 0;
4638: }
4639: NEXTL(l);
4640: cur = CUR_CHAR(l);
4641: if (cur == 0) {
4642: SHRINK;
4643: GROW;
4644: cur = CUR_CHAR(l);
4645: }
4646: }
4647: buf[len] = 0;
4648: if (cur == 0) {
4649: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4650: "Comment not terminated \n<!--%.50s\n", buf);
4651: } else if (!IS_CHAR(cur)) {
4652: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4653: "xmlParseComment: invalid xmlChar value %d\n",
4654: cur);
4655: } else {
4656: if (inputid != ctxt->input->id) {
4657: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4658: "Comment doesn't start and stop in the same entity\n");
4659: }
4660: NEXT;
4661: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4662: (!ctxt->disableSAX))
4663: ctxt->sax->comment(ctxt->userData, buf);
4664: }
4665: xmlFree(buf);
4666: return;
4667: not_terminated:
4668: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4669: "Comment not terminated\n", NULL);
4670: xmlFree(buf);
4671: return;
4672: }
4673:
4674: /**
4675: * xmlParseComment:
4676: * @ctxt: an XML parser context
4677: *
4678: * Skip an XML (SGML) comment <!-- .... -->
4679: * The spec says that "For compatibility, the string "--" (double-hyphen)
4680: * must not occur within comments. "
4681: *
4682: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4683: */
4684: void
4685: xmlParseComment(xmlParserCtxtPtr ctxt) {
4686: xmlChar *buf = NULL;
4687: int size = XML_PARSER_BUFFER_SIZE;
4688: int len = 0;
4689: xmlParserInputState state;
4690: const xmlChar *in;
4691: int nbchar = 0, ccol;
4692: int inputid;
4693:
4694: /*
4695: * Check that there is a comment right here.
4696: */
4697: if ((RAW != '<') || (NXT(1) != '!') ||
4698: (NXT(2) != '-') || (NXT(3) != '-')) return;
4699: state = ctxt->instate;
4700: ctxt->instate = XML_PARSER_COMMENT;
4701: inputid = ctxt->input->id;
4702: SKIP(4);
4703: SHRINK;
4704: GROW;
4705:
4706: /*
4707: * Accelerated common case where input don't need to be
4708: * modified before passing it to the handler.
4709: */
4710: in = ctxt->input->cur;
4711: do {
4712: if (*in == 0xA) {
4713: do {
4714: ctxt->input->line++; ctxt->input->col = 1;
4715: in++;
4716: } while (*in == 0xA);
4717: }
4718: get_more:
4719: ccol = ctxt->input->col;
4720: while (((*in > '-') && (*in <= 0x7F)) ||
4721: ((*in >= 0x20) && (*in < '-')) ||
4722: (*in == 0x09)) {
4723: in++;
4724: ccol++;
4725: }
4726: ctxt->input->col = ccol;
4727: if (*in == 0xA) {
4728: do {
4729: ctxt->input->line++; ctxt->input->col = 1;
4730: in++;
4731: } while (*in == 0xA);
4732: goto get_more;
4733: }
4734: nbchar = in - ctxt->input->cur;
4735: /*
4736: * save current set of data
4737: */
4738: if (nbchar > 0) {
4739: if ((ctxt->sax != NULL) &&
4740: (ctxt->sax->comment != NULL)) {
4741: if (buf == NULL) {
4742: if ((*in == '-') && (in[1] == '-'))
4743: size = nbchar + 1;
4744: else
4745: size = XML_PARSER_BUFFER_SIZE + nbchar;
4746: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4747: if (buf == NULL) {
4748: xmlErrMemory(ctxt, NULL);
4749: ctxt->instate = state;
4750: return;
4751: }
4752: len = 0;
4753: } else if (len + nbchar + 1 >= size) {
4754: xmlChar *new_buf;
4755: size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4756: new_buf = (xmlChar *) xmlRealloc(buf,
4757: size * sizeof(xmlChar));
4758: if (new_buf == NULL) {
4759: xmlFree (buf);
4760: xmlErrMemory(ctxt, NULL);
4761: ctxt->instate = state;
4762: return;
4763: }
4764: buf = new_buf;
4765: }
4766: memcpy(&buf[len], ctxt->input->cur, nbchar);
4767: len += nbchar;
4768: buf[len] = 0;
4769: }
4770: }
4771: ctxt->input->cur = in;
4772: if (*in == 0xA) {
4773: in++;
4774: ctxt->input->line++; ctxt->input->col = 1;
4775: }
4776: if (*in == 0xD) {
4777: in++;
4778: if (*in == 0xA) {
4779: ctxt->input->cur = in;
4780: in++;
4781: ctxt->input->line++; ctxt->input->col = 1;
4782: continue; /* while */
4783: }
4784: in--;
4785: }
4786: SHRINK;
4787: GROW;
4788: in = ctxt->input->cur;
4789: if (*in == '-') {
4790: if (in[1] == '-') {
4791: if (in[2] == '>') {
4792: if (ctxt->input->id != inputid) {
4793: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4794: "comment doesn't start and stop in the same entity\n");
4795: }
4796: SKIP(3);
4797: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4798: (!ctxt->disableSAX)) {
4799: if (buf != NULL)
4800: ctxt->sax->comment(ctxt->userData, buf);
4801: else
4802: ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4803: }
4804: if (buf != NULL)
4805: xmlFree(buf);
4806: ctxt->instate = state;
4807: return;
4808: }
1.1.1.2 misho 4809: if (buf != NULL) {
4810: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4811: "Double hyphen within comment: "
4812: "<!--%.50s\n",
1.1 misho 4813: buf);
1.1.1.2 misho 4814: } else
4815: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4816: "Double hyphen within comment\n", NULL);
1.1 misho 4817: in++;
4818: ctxt->input->col++;
4819: }
4820: in++;
4821: ctxt->input->col++;
4822: goto get_more;
4823: }
4824: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4825: xmlParseCommentComplex(ctxt, buf, len, size);
4826: ctxt->instate = state;
4827: return;
4828: }
4829:
4830:
4831: /**
4832: * xmlParsePITarget:
4833: * @ctxt: an XML parser context
4834: *
4835: * parse the name of a PI
4836: *
4837: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4838: *
4839: * Returns the PITarget name or NULL
4840: */
4841:
4842: const xmlChar *
4843: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4844: const xmlChar *name;
4845:
4846: name = xmlParseName(ctxt);
4847: if ((name != NULL) &&
4848: ((name[0] == 'x') || (name[0] == 'X')) &&
4849: ((name[1] == 'm') || (name[1] == 'M')) &&
4850: ((name[2] == 'l') || (name[2] == 'L'))) {
4851: int i;
4852: if ((name[0] == 'x') && (name[1] == 'm') &&
4853: (name[2] == 'l') && (name[3] == 0)) {
4854: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4855: "XML declaration allowed only at the start of the document\n");
4856: return(name);
4857: } else if (name[3] == 0) {
4858: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4859: return(name);
4860: }
4861: for (i = 0;;i++) {
4862: if (xmlW3CPIs[i] == NULL) break;
4863: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4864: return(name);
4865: }
4866: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4867: "xmlParsePITarget: invalid name prefix 'xml'\n",
4868: NULL, NULL);
4869: }
4870: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4871: xmlNsErr(ctxt, XML_NS_ERR_COLON,
4872: "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4873: }
4874: return(name);
4875: }
4876:
4877: #ifdef LIBXML_CATALOG_ENABLED
4878: /**
4879: * xmlParseCatalogPI:
4880: * @ctxt: an XML parser context
4881: * @catalog: the PI value string
4882: *
4883: * parse an XML Catalog Processing Instruction.
4884: *
4885: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4886: *
4887: * Occurs only if allowed by the user and if happening in the Misc
4888: * part of the document before any doctype informations
4889: * This will add the given catalog to the parsing context in order
4890: * to be used if there is a resolution need further down in the document
4891: */
4892:
4893: static void
4894: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4895: xmlChar *URL = NULL;
4896: const xmlChar *tmp, *base;
4897: xmlChar marker;
4898:
4899: tmp = catalog;
4900: while (IS_BLANK_CH(*tmp)) tmp++;
4901: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4902: goto error;
4903: tmp += 7;
4904: while (IS_BLANK_CH(*tmp)) tmp++;
4905: if (*tmp != '=') {
4906: return;
4907: }
4908: tmp++;
4909: while (IS_BLANK_CH(*tmp)) tmp++;
4910: marker = *tmp;
4911: if ((marker != '\'') && (marker != '"'))
4912: goto error;
4913: tmp++;
4914: base = tmp;
4915: while ((*tmp != 0) && (*tmp != marker)) tmp++;
4916: if (*tmp == 0)
4917: goto error;
4918: URL = xmlStrndup(base, tmp - base);
4919: tmp++;
4920: while (IS_BLANK_CH(*tmp)) tmp++;
4921: if (*tmp != 0)
4922: goto error;
4923:
4924: if (URL != NULL) {
4925: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4926: xmlFree(URL);
4927: }
4928: return;
4929:
4930: error:
4931: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4932: "Catalog PI syntax error: %s\n",
4933: catalog, NULL);
4934: if (URL != NULL)
4935: xmlFree(URL);
4936: }
4937: #endif
4938:
4939: /**
4940: * xmlParsePI:
4941: * @ctxt: an XML parser context
4942: *
4943: * parse an XML Processing Instruction.
4944: *
4945: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4946: *
4947: * The processing is transfered to SAX once parsed.
4948: */
4949:
4950: void
4951: xmlParsePI(xmlParserCtxtPtr ctxt) {
4952: xmlChar *buf = NULL;
4953: int len = 0;
4954: int size = XML_PARSER_BUFFER_SIZE;
4955: int cur, l;
4956: const xmlChar *target;
4957: xmlParserInputState state;
4958: int count = 0;
4959:
4960: if ((RAW == '<') && (NXT(1) == '?')) {
4961: xmlParserInputPtr input = ctxt->input;
4962: state = ctxt->instate;
4963: ctxt->instate = XML_PARSER_PI;
4964: /*
4965: * this is a Processing Instruction.
4966: */
4967: SKIP(2);
4968: SHRINK;
4969:
4970: /*
4971: * Parse the target name and check for special support like
4972: * namespace.
4973: */
4974: target = xmlParsePITarget(ctxt);
4975: if (target != NULL) {
4976: if ((RAW == '?') && (NXT(1) == '>')) {
4977: if (input != ctxt->input) {
4978: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4979: "PI declaration doesn't start and stop in the same entity\n");
4980: }
4981: SKIP(2);
4982:
4983: /*
4984: * SAX: PI detected.
4985: */
4986: if ((ctxt->sax) && (!ctxt->disableSAX) &&
4987: (ctxt->sax->processingInstruction != NULL))
4988: ctxt->sax->processingInstruction(ctxt->userData,
4989: target, NULL);
1.1.1.2 misho 4990: if (ctxt->instate != XML_PARSER_EOF)
4991: ctxt->instate = state;
1.1 misho 4992: return;
4993: }
4994: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4995: if (buf == NULL) {
4996: xmlErrMemory(ctxt, NULL);
4997: ctxt->instate = state;
4998: return;
4999: }
5000: cur = CUR;
5001: if (!IS_BLANK(cur)) {
5002: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5003: "ParsePI: PI %s space expected\n", target);
5004: }
5005: SKIP_BLANKS;
5006: cur = CUR_CHAR(l);
5007: while (IS_CHAR(cur) && /* checked */
5008: ((cur != '?') || (NXT(1) != '>'))) {
5009: if (len + 5 >= size) {
5010: xmlChar *tmp;
5011:
5012: size *= 2;
5013: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
5014: if (tmp == NULL) {
5015: xmlErrMemory(ctxt, NULL);
5016: xmlFree(buf);
5017: ctxt->instate = state;
5018: return;
5019: }
5020: buf = tmp;
5021: }
5022: count++;
5023: if (count > 50) {
5024: GROW;
5025: count = 0;
5026: }
5027: COPY_BUF(l,buf,len,cur);
5028: NEXTL(l);
5029: cur = CUR_CHAR(l);
5030: if (cur == 0) {
5031: SHRINK;
5032: GROW;
5033: cur = CUR_CHAR(l);
5034: }
5035: }
5036: buf[len] = 0;
5037: if (cur != '?') {
5038: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5039: "ParsePI: PI %s never end ...\n", target);
5040: } else {
5041: if (input != ctxt->input) {
5042: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5043: "PI declaration doesn't start and stop in the same entity\n");
5044: }
5045: SKIP(2);
5046:
5047: #ifdef LIBXML_CATALOG_ENABLED
5048: if (((state == XML_PARSER_MISC) ||
5049: (state == XML_PARSER_START)) &&
5050: (xmlStrEqual(target, XML_CATALOG_PI))) {
5051: xmlCatalogAllow allow = xmlCatalogGetDefaults();
5052: if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5053: (allow == XML_CATA_ALLOW_ALL))
5054: xmlParseCatalogPI(ctxt, buf);
5055: }
5056: #endif
5057:
5058:
5059: /*
5060: * SAX: PI detected.
5061: */
5062: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5063: (ctxt->sax->processingInstruction != NULL))
5064: ctxt->sax->processingInstruction(ctxt->userData,
5065: target, buf);
5066: }
5067: xmlFree(buf);
5068: } else {
5069: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5070: }
1.1.1.2 misho 5071: if (ctxt->instate != XML_PARSER_EOF)
5072: ctxt->instate = state;
1.1 misho 5073: }
5074: }
5075:
5076: /**
5077: * xmlParseNotationDecl:
5078: * @ctxt: an XML parser context
5079: *
5080: * parse a notation declaration
5081: *
5082: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5083: *
5084: * Hence there is actually 3 choices:
5085: * 'PUBLIC' S PubidLiteral
5086: * 'PUBLIC' S PubidLiteral S SystemLiteral
5087: * and 'SYSTEM' S SystemLiteral
5088: *
5089: * See the NOTE on xmlParseExternalID().
5090: */
5091:
5092: void
5093: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5094: const xmlChar *name;
5095: xmlChar *Pubid;
5096: xmlChar *Systemid;
5097:
5098: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5099: xmlParserInputPtr input = ctxt->input;
5100: SHRINK;
5101: SKIP(10);
5102: if (!IS_BLANK_CH(CUR)) {
5103: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5104: "Space required after '<!NOTATION'\n");
5105: return;
5106: }
5107: SKIP_BLANKS;
5108:
5109: name = xmlParseName(ctxt);
5110: if (name == NULL) {
5111: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5112: return;
5113: }
5114: if (!IS_BLANK_CH(CUR)) {
5115: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5116: "Space required after the NOTATION name'\n");
5117: return;
5118: }
5119: if (xmlStrchr(name, ':') != NULL) {
5120: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5121: "colon are forbidden from notation names '%s'\n",
5122: name, NULL, NULL);
5123: }
5124: SKIP_BLANKS;
5125:
5126: /*
5127: * Parse the IDs.
5128: */
5129: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5130: SKIP_BLANKS;
5131:
5132: if (RAW == '>') {
5133: if (input != ctxt->input) {
5134: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5135: "Notation declaration doesn't start and stop in the same entity\n");
5136: }
5137: NEXT;
5138: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5139: (ctxt->sax->notationDecl != NULL))
5140: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5141: } else {
5142: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5143: }
5144: if (Systemid != NULL) xmlFree(Systemid);
5145: if (Pubid != NULL) xmlFree(Pubid);
5146: }
5147: }
5148:
5149: /**
5150: * xmlParseEntityDecl:
5151: * @ctxt: an XML parser context
5152: *
5153: * parse <!ENTITY declarations
5154: *
5155: * [70] EntityDecl ::= GEDecl | PEDecl
5156: *
5157: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5158: *
5159: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5160: *
5161: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5162: *
5163: * [74] PEDef ::= EntityValue | ExternalID
5164: *
5165: * [76] NDataDecl ::= S 'NDATA' S Name
5166: *
5167: * [ VC: Notation Declared ]
5168: * The Name must match the declared name of a notation.
5169: */
5170:
5171: void
5172: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5173: const xmlChar *name = NULL;
5174: xmlChar *value = NULL;
5175: xmlChar *URI = NULL, *literal = NULL;
5176: const xmlChar *ndata = NULL;
5177: int isParameter = 0;
5178: xmlChar *orig = NULL;
5179: int skipped;
5180:
5181: /* GROW; done in the caller */
5182: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5183: xmlParserInputPtr input = ctxt->input;
5184: SHRINK;
5185: SKIP(8);
5186: skipped = SKIP_BLANKS;
5187: if (skipped == 0) {
5188: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5189: "Space required after '<!ENTITY'\n");
5190: }
5191:
5192: if (RAW == '%') {
5193: NEXT;
5194: skipped = SKIP_BLANKS;
5195: if (skipped == 0) {
5196: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5197: "Space required after '%'\n");
5198: }
5199: isParameter = 1;
5200: }
5201:
5202: name = xmlParseName(ctxt);
5203: if (name == NULL) {
5204: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5205: "xmlParseEntityDecl: no name\n");
5206: return;
5207: }
5208: if (xmlStrchr(name, ':') != NULL) {
5209: xmlNsErr(ctxt, XML_NS_ERR_COLON,
5210: "colon are forbidden from entities names '%s'\n",
5211: name, NULL, NULL);
5212: }
5213: skipped = SKIP_BLANKS;
5214: if (skipped == 0) {
5215: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5216: "Space required after the entity name\n");
5217: }
5218:
5219: ctxt->instate = XML_PARSER_ENTITY_DECL;
5220: /*
5221: * handle the various case of definitions...
5222: */
5223: if (isParameter) {
5224: if ((RAW == '"') || (RAW == '\'')) {
5225: value = xmlParseEntityValue(ctxt, &orig);
5226: if (value) {
5227: if ((ctxt->sax != NULL) &&
5228: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5229: ctxt->sax->entityDecl(ctxt->userData, name,
5230: XML_INTERNAL_PARAMETER_ENTITY,
5231: NULL, NULL, value);
5232: }
5233: } else {
5234: URI = xmlParseExternalID(ctxt, &literal, 1);
5235: if ((URI == NULL) && (literal == NULL)) {
5236: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5237: }
5238: if (URI) {
5239: xmlURIPtr uri;
5240:
5241: uri = xmlParseURI((const char *) URI);
5242: if (uri == NULL) {
5243: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5244: "Invalid URI: %s\n", URI);
5245: /*
5246: * This really ought to be a well formedness error
5247: * but the XML Core WG decided otherwise c.f. issue
5248: * E26 of the XML erratas.
5249: */
5250: } else {
5251: if (uri->fragment != NULL) {
5252: /*
5253: * Okay this is foolish to block those but not
5254: * invalid URIs.
5255: */
5256: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5257: } else {
5258: if ((ctxt->sax != NULL) &&
5259: (!ctxt->disableSAX) &&
5260: (ctxt->sax->entityDecl != NULL))
5261: ctxt->sax->entityDecl(ctxt->userData, name,
5262: XML_EXTERNAL_PARAMETER_ENTITY,
5263: literal, URI, NULL);
5264: }
5265: xmlFreeURI(uri);
5266: }
5267: }
5268: }
5269: } else {
5270: if ((RAW == '"') || (RAW == '\'')) {
5271: value = xmlParseEntityValue(ctxt, &orig);
5272: if ((ctxt->sax != NULL) &&
5273: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5274: ctxt->sax->entityDecl(ctxt->userData, name,
5275: XML_INTERNAL_GENERAL_ENTITY,
5276: NULL, NULL, value);
5277: /*
5278: * For expat compatibility in SAX mode.
5279: */
5280: if ((ctxt->myDoc == NULL) ||
5281: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5282: if (ctxt->myDoc == NULL) {
5283: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5284: if (ctxt->myDoc == NULL) {
5285: xmlErrMemory(ctxt, "New Doc failed");
5286: return;
5287: }
5288: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5289: }
5290: if (ctxt->myDoc->intSubset == NULL)
5291: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5292: BAD_CAST "fake", NULL, NULL);
5293:
5294: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5295: NULL, NULL, value);
5296: }
5297: } else {
5298: URI = xmlParseExternalID(ctxt, &literal, 1);
5299: if ((URI == NULL) && (literal == NULL)) {
5300: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5301: }
5302: if (URI) {
5303: xmlURIPtr uri;
5304:
5305: uri = xmlParseURI((const char *)URI);
5306: if (uri == NULL) {
5307: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5308: "Invalid URI: %s\n", URI);
5309: /*
5310: * This really ought to be a well formedness error
5311: * but the XML Core WG decided otherwise c.f. issue
5312: * E26 of the XML erratas.
5313: */
5314: } else {
5315: if (uri->fragment != NULL) {
5316: /*
5317: * Okay this is foolish to block those but not
5318: * invalid URIs.
5319: */
5320: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5321: }
5322: xmlFreeURI(uri);
5323: }
5324: }
5325: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5326: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5327: "Space required before 'NDATA'\n");
5328: }
5329: SKIP_BLANKS;
5330: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5331: SKIP(5);
5332: if (!IS_BLANK_CH(CUR)) {
5333: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5334: "Space required after 'NDATA'\n");
5335: }
5336: SKIP_BLANKS;
5337: ndata = xmlParseName(ctxt);
5338: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5339: (ctxt->sax->unparsedEntityDecl != NULL))
5340: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5341: literal, URI, ndata);
5342: } else {
5343: if ((ctxt->sax != NULL) &&
5344: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5345: ctxt->sax->entityDecl(ctxt->userData, name,
5346: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5347: literal, URI, NULL);
5348: /*
5349: * For expat compatibility in SAX mode.
5350: * assuming the entity repalcement was asked for
5351: */
5352: if ((ctxt->replaceEntities != 0) &&
5353: ((ctxt->myDoc == NULL) ||
5354: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5355: if (ctxt->myDoc == NULL) {
5356: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5357: if (ctxt->myDoc == NULL) {
5358: xmlErrMemory(ctxt, "New Doc failed");
5359: return;
5360: }
5361: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5362: }
5363:
5364: if (ctxt->myDoc->intSubset == NULL)
5365: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5366: BAD_CAST "fake", NULL, NULL);
5367: xmlSAX2EntityDecl(ctxt, name,
5368: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5369: literal, URI, NULL);
5370: }
5371: }
5372: }
5373: }
5374: SKIP_BLANKS;
5375: if (RAW != '>') {
5376: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5377: "xmlParseEntityDecl: entity %s not terminated\n", name);
5378: } else {
5379: if (input != ctxt->input) {
5380: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5381: "Entity declaration doesn't start and stop in the same entity\n");
5382: }
5383: NEXT;
5384: }
5385: if (orig != NULL) {
5386: /*
5387: * Ugly mechanism to save the raw entity value.
5388: */
5389: xmlEntityPtr cur = NULL;
5390:
5391: if (isParameter) {
5392: if ((ctxt->sax != NULL) &&
5393: (ctxt->sax->getParameterEntity != NULL))
5394: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5395: } else {
5396: if ((ctxt->sax != NULL) &&
5397: (ctxt->sax->getEntity != NULL))
5398: cur = ctxt->sax->getEntity(ctxt->userData, name);
5399: if ((cur == NULL) && (ctxt->userData==ctxt)) {
5400: cur = xmlSAX2GetEntity(ctxt, name);
5401: }
5402: }
5403: if (cur != NULL) {
5404: if (cur->orig != NULL)
5405: xmlFree(orig);
5406: else
5407: cur->orig = orig;
5408: } else
5409: xmlFree(orig);
5410: }
5411: if (value != NULL) xmlFree(value);
5412: if (URI != NULL) xmlFree(URI);
5413: if (literal != NULL) xmlFree(literal);
5414: }
5415: }
5416:
5417: /**
5418: * xmlParseDefaultDecl:
5419: * @ctxt: an XML parser context
5420: * @value: Receive a possible fixed default value for the attribute
5421: *
5422: * Parse an attribute default declaration
5423: *
5424: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5425: *
5426: * [ VC: Required Attribute ]
5427: * if the default declaration is the keyword #REQUIRED, then the
5428: * attribute must be specified for all elements of the type in the
5429: * attribute-list declaration.
5430: *
5431: * [ VC: Attribute Default Legal ]
5432: * The declared default value must meet the lexical constraints of
5433: * the declared attribute type c.f. xmlValidateAttributeDecl()
5434: *
5435: * [ VC: Fixed Attribute Default ]
5436: * if an attribute has a default value declared with the #FIXED
5437: * keyword, instances of that attribute must match the default value.
5438: *
5439: * [ WFC: No < in Attribute Values ]
5440: * handled in xmlParseAttValue()
5441: *
5442: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5443: * or XML_ATTRIBUTE_FIXED.
5444: */
5445:
5446: int
5447: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5448: int val;
5449: xmlChar *ret;
5450:
5451: *value = NULL;
5452: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5453: SKIP(9);
5454: return(XML_ATTRIBUTE_REQUIRED);
5455: }
5456: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5457: SKIP(8);
5458: return(XML_ATTRIBUTE_IMPLIED);
5459: }
5460: val = XML_ATTRIBUTE_NONE;
5461: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5462: SKIP(6);
5463: val = XML_ATTRIBUTE_FIXED;
5464: if (!IS_BLANK_CH(CUR)) {
5465: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5466: "Space required after '#FIXED'\n");
5467: }
5468: SKIP_BLANKS;
5469: }
5470: ret = xmlParseAttValue(ctxt);
5471: ctxt->instate = XML_PARSER_DTD;
5472: if (ret == NULL) {
5473: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5474: "Attribute default value declaration error\n");
5475: } else
5476: *value = ret;
5477: return(val);
5478: }
5479:
5480: /**
5481: * xmlParseNotationType:
5482: * @ctxt: an XML parser context
5483: *
5484: * parse an Notation attribute type.
5485: *
5486: * Note: the leading 'NOTATION' S part has already being parsed...
5487: *
5488: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5489: *
5490: * [ VC: Notation Attributes ]
5491: * Values of this type must match one of the notation names included
5492: * in the declaration; all notation names in the declaration must be declared.
5493: *
5494: * Returns: the notation attribute tree built while parsing
5495: */
5496:
5497: xmlEnumerationPtr
5498: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5499: const xmlChar *name;
5500: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5501:
5502: if (RAW != '(') {
5503: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5504: return(NULL);
5505: }
5506: SHRINK;
5507: do {
5508: NEXT;
5509: SKIP_BLANKS;
5510: name = xmlParseName(ctxt);
5511: if (name == NULL) {
5512: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5513: "Name expected in NOTATION declaration\n");
5514: xmlFreeEnumeration(ret);
5515: return(NULL);
5516: }
5517: tmp = ret;
5518: while (tmp != NULL) {
5519: if (xmlStrEqual(name, tmp->name)) {
5520: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5521: "standalone: attribute notation value token %s duplicated\n",
5522: name, NULL);
5523: if (!xmlDictOwns(ctxt->dict, name))
5524: xmlFree((xmlChar *) name);
5525: break;
5526: }
5527: tmp = tmp->next;
5528: }
5529: if (tmp == NULL) {
5530: cur = xmlCreateEnumeration(name);
5531: if (cur == NULL) {
5532: xmlFreeEnumeration(ret);
5533: return(NULL);
5534: }
5535: if (last == NULL) ret = last = cur;
5536: else {
5537: last->next = cur;
5538: last = cur;
5539: }
5540: }
5541: SKIP_BLANKS;
5542: } while (RAW == '|');
5543: if (RAW != ')') {
5544: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5545: xmlFreeEnumeration(ret);
5546: return(NULL);
5547: }
5548: NEXT;
5549: return(ret);
5550: }
5551:
5552: /**
5553: * xmlParseEnumerationType:
5554: * @ctxt: an XML parser context
5555: *
5556: * parse an Enumeration attribute type.
5557: *
5558: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5559: *
5560: * [ VC: Enumeration ]
5561: * Values of this type must match one of the Nmtoken tokens in
5562: * the declaration
5563: *
5564: * Returns: the enumeration attribute tree built while parsing
5565: */
5566:
5567: xmlEnumerationPtr
5568: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5569: xmlChar *name;
5570: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5571:
5572: if (RAW != '(') {
5573: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5574: return(NULL);
5575: }
5576: SHRINK;
5577: do {
5578: NEXT;
5579: SKIP_BLANKS;
5580: name = xmlParseNmtoken(ctxt);
5581: if (name == NULL) {
5582: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5583: return(ret);
5584: }
5585: tmp = ret;
5586: while (tmp != NULL) {
5587: if (xmlStrEqual(name, tmp->name)) {
5588: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5589: "standalone: attribute enumeration value token %s duplicated\n",
5590: name, NULL);
5591: if (!xmlDictOwns(ctxt->dict, name))
5592: xmlFree(name);
5593: break;
5594: }
5595: tmp = tmp->next;
5596: }
5597: if (tmp == NULL) {
5598: cur = xmlCreateEnumeration(name);
5599: if (!xmlDictOwns(ctxt->dict, name))
5600: xmlFree(name);
5601: if (cur == NULL) {
5602: xmlFreeEnumeration(ret);
5603: return(NULL);
5604: }
5605: if (last == NULL) ret = last = cur;
5606: else {
5607: last->next = cur;
5608: last = cur;
5609: }
5610: }
5611: SKIP_BLANKS;
5612: } while (RAW == '|');
5613: if (RAW != ')') {
5614: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5615: return(ret);
5616: }
5617: NEXT;
5618: return(ret);
5619: }
5620:
5621: /**
5622: * xmlParseEnumeratedType:
5623: * @ctxt: an XML parser context
5624: * @tree: the enumeration tree built while parsing
5625: *
5626: * parse an Enumerated attribute type.
5627: *
5628: * [57] EnumeratedType ::= NotationType | Enumeration
5629: *
5630: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5631: *
5632: *
5633: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5634: */
5635:
5636: int
5637: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5638: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5639: SKIP(8);
5640: if (!IS_BLANK_CH(CUR)) {
5641: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5642: "Space required after 'NOTATION'\n");
5643: return(0);
5644: }
5645: SKIP_BLANKS;
5646: *tree = xmlParseNotationType(ctxt);
5647: if (*tree == NULL) return(0);
5648: return(XML_ATTRIBUTE_NOTATION);
5649: }
5650: *tree = xmlParseEnumerationType(ctxt);
5651: if (*tree == NULL) return(0);
5652: return(XML_ATTRIBUTE_ENUMERATION);
5653: }
5654:
5655: /**
5656: * xmlParseAttributeType:
5657: * @ctxt: an XML parser context
5658: * @tree: the enumeration tree built while parsing
5659: *
5660: * parse the Attribute list def for an element
5661: *
5662: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5663: *
5664: * [55] StringType ::= 'CDATA'
5665: *
5666: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5667: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5668: *
5669: * Validity constraints for attribute values syntax are checked in
5670: * xmlValidateAttributeValue()
5671: *
5672: * [ VC: ID ]
5673: * Values of type ID must match the Name production. A name must not
5674: * appear more than once in an XML document as a value of this type;
5675: * i.e., ID values must uniquely identify the elements which bear them.
5676: *
5677: * [ VC: One ID per Element Type ]
5678: * No element type may have more than one ID attribute specified.
5679: *
5680: * [ VC: ID Attribute Default ]
5681: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5682: *
5683: * [ VC: IDREF ]
5684: * Values of type IDREF must match the Name production, and values
5685: * of type IDREFS must match Names; each IDREF Name must match the value
5686: * of an ID attribute on some element in the XML document; i.e. IDREF
5687: * values must match the value of some ID attribute.
5688: *
5689: * [ VC: Entity Name ]
5690: * Values of type ENTITY must match the Name production, values
5691: * of type ENTITIES must match Names; each Entity Name must match the
5692: * name of an unparsed entity declared in the DTD.
5693: *
5694: * [ VC: Name Token ]
5695: * Values of type NMTOKEN must match the Nmtoken production; values
5696: * of type NMTOKENS must match Nmtokens.
5697: *
5698: * Returns the attribute type
5699: */
5700: int
5701: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5702: SHRINK;
5703: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5704: SKIP(5);
5705: return(XML_ATTRIBUTE_CDATA);
5706: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5707: SKIP(6);
5708: return(XML_ATTRIBUTE_IDREFS);
5709: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5710: SKIP(5);
5711: return(XML_ATTRIBUTE_IDREF);
5712: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5713: SKIP(2);
5714: return(XML_ATTRIBUTE_ID);
5715: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5716: SKIP(6);
5717: return(XML_ATTRIBUTE_ENTITY);
5718: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5719: SKIP(8);
5720: return(XML_ATTRIBUTE_ENTITIES);
5721: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5722: SKIP(8);
5723: return(XML_ATTRIBUTE_NMTOKENS);
5724: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5725: SKIP(7);
5726: return(XML_ATTRIBUTE_NMTOKEN);
5727: }
5728: return(xmlParseEnumeratedType(ctxt, tree));
5729: }
5730:
5731: /**
5732: * xmlParseAttributeListDecl:
5733: * @ctxt: an XML parser context
5734: *
5735: * : parse the Attribute list def for an element
5736: *
5737: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5738: *
5739: * [53] AttDef ::= S Name S AttType S DefaultDecl
5740: *
5741: */
5742: void
5743: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5744: const xmlChar *elemName;
5745: const xmlChar *attrName;
5746: xmlEnumerationPtr tree;
5747:
5748: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5749: xmlParserInputPtr input = ctxt->input;
5750:
5751: SKIP(9);
5752: if (!IS_BLANK_CH(CUR)) {
5753: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5754: "Space required after '<!ATTLIST'\n");
5755: }
5756: SKIP_BLANKS;
5757: elemName = xmlParseName(ctxt);
5758: if (elemName == NULL) {
5759: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5760: "ATTLIST: no name for Element\n");
5761: return;
5762: }
5763: SKIP_BLANKS;
5764: GROW;
5765: while (RAW != '>') {
5766: const xmlChar *check = CUR_PTR;
5767: int type;
5768: int def;
5769: xmlChar *defaultValue = NULL;
5770:
5771: GROW;
5772: tree = NULL;
5773: attrName = xmlParseName(ctxt);
5774: if (attrName == NULL) {
5775: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5776: "ATTLIST: no name for Attribute\n");
5777: break;
5778: }
5779: GROW;
5780: if (!IS_BLANK_CH(CUR)) {
5781: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5782: "Space required after the attribute name\n");
5783: break;
5784: }
5785: SKIP_BLANKS;
5786:
5787: type = xmlParseAttributeType(ctxt, &tree);
5788: if (type <= 0) {
5789: break;
5790: }
5791:
5792: GROW;
5793: if (!IS_BLANK_CH(CUR)) {
5794: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5795: "Space required after the attribute type\n");
5796: if (tree != NULL)
5797: xmlFreeEnumeration(tree);
5798: break;
5799: }
5800: SKIP_BLANKS;
5801:
5802: def = xmlParseDefaultDecl(ctxt, &defaultValue);
5803: if (def <= 0) {
5804: if (defaultValue != NULL)
5805: xmlFree(defaultValue);
5806: if (tree != NULL)
5807: xmlFreeEnumeration(tree);
5808: break;
5809: }
5810: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5811: xmlAttrNormalizeSpace(defaultValue, defaultValue);
5812:
5813: GROW;
5814: if (RAW != '>') {
5815: if (!IS_BLANK_CH(CUR)) {
5816: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817: "Space required after the attribute default value\n");
5818: if (defaultValue != NULL)
5819: xmlFree(defaultValue);
5820: if (tree != NULL)
5821: xmlFreeEnumeration(tree);
5822: break;
5823: }
5824: SKIP_BLANKS;
5825: }
5826: if (check == CUR_PTR) {
5827: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5828: "in xmlParseAttributeListDecl\n");
5829: if (defaultValue != NULL)
5830: xmlFree(defaultValue);
5831: if (tree != NULL)
5832: xmlFreeEnumeration(tree);
5833: break;
5834: }
5835: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5836: (ctxt->sax->attributeDecl != NULL))
5837: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5838: type, def, defaultValue, tree);
5839: else if (tree != NULL)
5840: xmlFreeEnumeration(tree);
5841:
5842: if ((ctxt->sax2) && (defaultValue != NULL) &&
5843: (def != XML_ATTRIBUTE_IMPLIED) &&
5844: (def != XML_ATTRIBUTE_REQUIRED)) {
5845: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5846: }
5847: if (ctxt->sax2) {
5848: xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5849: }
5850: if (defaultValue != NULL)
5851: xmlFree(defaultValue);
5852: GROW;
5853: }
5854: if (RAW == '>') {
5855: if (input != ctxt->input) {
5856: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857: "Attribute list declaration doesn't start and stop in the same entity\n",
5858: NULL, NULL);
5859: }
5860: NEXT;
5861: }
5862: }
5863: }
5864:
5865: /**
5866: * xmlParseElementMixedContentDecl:
5867: * @ctxt: an XML parser context
5868: * @inputchk: the input used for the current entity, needed for boundary checks
5869: *
5870: * parse the declaration for a Mixed Element content
5871: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5872: *
5873: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5874: * '(' S? '#PCDATA' S? ')'
5875: *
5876: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5877: *
5878: * [ VC: No Duplicate Types ]
5879: * The same name must not appear more than once in a single
5880: * mixed-content declaration.
5881: *
5882: * returns: the list of the xmlElementContentPtr describing the element choices
5883: */
5884: xmlElementContentPtr
5885: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5886: xmlElementContentPtr ret = NULL, cur = NULL, n;
5887: const xmlChar *elem = NULL;
5888:
5889: GROW;
5890: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5891: SKIP(7);
5892: SKIP_BLANKS;
5893: SHRINK;
5894: if (RAW == ')') {
5895: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5896: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5897: "Element content declaration doesn't start and stop in the same entity\n",
5898: NULL, NULL);
5899: }
5900: NEXT;
5901: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5902: if (ret == NULL)
5903: return(NULL);
5904: if (RAW == '*') {
5905: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5906: NEXT;
5907: }
5908: return(ret);
5909: }
5910: if ((RAW == '(') || (RAW == '|')) {
5911: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5912: if (ret == NULL) return(NULL);
5913: }
5914: while (RAW == '|') {
5915: NEXT;
5916: if (elem == NULL) {
5917: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5918: if (ret == NULL) return(NULL);
5919: ret->c1 = cur;
5920: if (cur != NULL)
5921: cur->parent = ret;
5922: cur = ret;
5923: } else {
5924: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5925: if (n == NULL) return(NULL);
5926: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5927: if (n->c1 != NULL)
5928: n->c1->parent = n;
5929: cur->c2 = n;
5930: if (n != NULL)
5931: n->parent = cur;
5932: cur = n;
5933: }
5934: SKIP_BLANKS;
5935: elem = xmlParseName(ctxt);
5936: if (elem == NULL) {
5937: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5938: "xmlParseElementMixedContentDecl : Name expected\n");
5939: xmlFreeDocElementContent(ctxt->myDoc, cur);
5940: return(NULL);
5941: }
5942: SKIP_BLANKS;
5943: GROW;
5944: }
5945: if ((RAW == ')') && (NXT(1) == '*')) {
5946: if (elem != NULL) {
5947: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5948: XML_ELEMENT_CONTENT_ELEMENT);
5949: if (cur->c2 != NULL)
5950: cur->c2->parent = cur;
5951: }
5952: if (ret != NULL)
5953: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5954: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5955: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5956: "Element content declaration doesn't start and stop in the same entity\n",
5957: NULL, NULL);
5958: }
5959: SKIP(2);
5960: } else {
5961: xmlFreeDocElementContent(ctxt->myDoc, ret);
5962: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5963: return(NULL);
5964: }
5965:
5966: } else {
5967: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5968: }
5969: return(ret);
5970: }
5971:
5972: /**
5973: * xmlParseElementChildrenContentDeclPriv:
5974: * @ctxt: an XML parser context
5975: * @inputchk: the input used for the current entity, needed for boundary checks
5976: * @depth: the level of recursion
5977: *
5978: * parse the declaration for a Mixed Element content
5979: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5980: *
5981: *
5982: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5983: *
5984: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5985: *
5986: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5987: *
5988: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5989: *
5990: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5991: * TODO Parameter-entity replacement text must be properly nested
5992: * with parenthesized groups. That is to say, if either of the
5993: * opening or closing parentheses in a choice, seq, or Mixed
5994: * construct is contained in the replacement text for a parameter
5995: * entity, both must be contained in the same replacement text. For
5996: * interoperability, if a parameter-entity reference appears in a
5997: * choice, seq, or Mixed construct, its replacement text should not
5998: * be empty, and neither the first nor last non-blank character of
5999: * the replacement text should be a connector (| or ,).
6000: *
6001: * Returns the tree of xmlElementContentPtr describing the element
6002: * hierarchy.
6003: */
6004: static xmlElementContentPtr
6005: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6006: int depth) {
6007: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6008: const xmlChar *elem;
6009: xmlChar type = 0;
6010:
6011: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6012: (depth > 2048)) {
6013: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6014: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6015: depth);
6016: return(NULL);
6017: }
6018: SKIP_BLANKS;
6019: GROW;
6020: if (RAW == '(') {
6021: int inputid = ctxt->input->id;
6022:
6023: /* Recurse on first child */
6024: NEXT;
6025: SKIP_BLANKS;
6026: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6027: depth + 1);
6028: SKIP_BLANKS;
6029: GROW;
6030: } else {
6031: elem = xmlParseName(ctxt);
6032: if (elem == NULL) {
6033: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6034: return(NULL);
6035: }
6036: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6037: if (cur == NULL) {
6038: xmlErrMemory(ctxt, NULL);
6039: return(NULL);
6040: }
6041: GROW;
6042: if (RAW == '?') {
6043: cur->ocur = XML_ELEMENT_CONTENT_OPT;
6044: NEXT;
6045: } else if (RAW == '*') {
6046: cur->ocur = XML_ELEMENT_CONTENT_MULT;
6047: NEXT;
6048: } else if (RAW == '+') {
6049: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6050: NEXT;
6051: } else {
6052: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6053: }
6054: GROW;
6055: }
6056: SKIP_BLANKS;
6057: SHRINK;
6058: while (RAW != ')') {
6059: /*
6060: * Each loop we parse one separator and one element.
6061: */
6062: if (RAW == ',') {
6063: if (type == 0) type = CUR;
6064:
6065: /*
6066: * Detect "Name | Name , Name" error
6067: */
6068: else if (type != CUR) {
6069: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6071: type);
6072: if ((last != NULL) && (last != ret))
6073: xmlFreeDocElementContent(ctxt->myDoc, last);
6074: if (ret != NULL)
6075: xmlFreeDocElementContent(ctxt->myDoc, ret);
6076: return(NULL);
6077: }
6078: NEXT;
6079:
6080: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6081: if (op == NULL) {
6082: if ((last != NULL) && (last != ret))
6083: xmlFreeDocElementContent(ctxt->myDoc, last);
6084: xmlFreeDocElementContent(ctxt->myDoc, ret);
6085: return(NULL);
6086: }
6087: if (last == NULL) {
6088: op->c1 = ret;
6089: if (ret != NULL)
6090: ret->parent = op;
6091: ret = cur = op;
6092: } else {
6093: cur->c2 = op;
6094: if (op != NULL)
6095: op->parent = cur;
6096: op->c1 = last;
6097: if (last != NULL)
6098: last->parent = op;
6099: cur =op;
6100: last = NULL;
6101: }
6102: } else if (RAW == '|') {
6103: if (type == 0) type = CUR;
6104:
6105: /*
6106: * Detect "Name , Name | Name" error
6107: */
6108: else if (type != CUR) {
6109: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6110: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6111: type);
6112: if ((last != NULL) && (last != ret))
6113: xmlFreeDocElementContent(ctxt->myDoc, last);
6114: if (ret != NULL)
6115: xmlFreeDocElementContent(ctxt->myDoc, ret);
6116: return(NULL);
6117: }
6118: NEXT;
6119:
6120: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6121: if (op == NULL) {
6122: if ((last != NULL) && (last != ret))
6123: xmlFreeDocElementContent(ctxt->myDoc, last);
6124: if (ret != NULL)
6125: xmlFreeDocElementContent(ctxt->myDoc, ret);
6126: return(NULL);
6127: }
6128: if (last == NULL) {
6129: op->c1 = ret;
6130: if (ret != NULL)
6131: ret->parent = op;
6132: ret = cur = op;
6133: } else {
6134: cur->c2 = op;
6135: if (op != NULL)
6136: op->parent = cur;
6137: op->c1 = last;
6138: if (last != NULL)
6139: last->parent = op;
6140: cur =op;
6141: last = NULL;
6142: }
6143: } else {
6144: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6145: if ((last != NULL) && (last != ret))
6146: xmlFreeDocElementContent(ctxt->myDoc, last);
6147: if (ret != NULL)
6148: xmlFreeDocElementContent(ctxt->myDoc, ret);
6149: return(NULL);
6150: }
6151: GROW;
6152: SKIP_BLANKS;
6153: GROW;
6154: if (RAW == '(') {
6155: int inputid = ctxt->input->id;
6156: /* Recurse on second child */
6157: NEXT;
6158: SKIP_BLANKS;
6159: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6160: depth + 1);
6161: SKIP_BLANKS;
6162: } else {
6163: elem = xmlParseName(ctxt);
6164: if (elem == NULL) {
6165: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6166: if (ret != NULL)
6167: xmlFreeDocElementContent(ctxt->myDoc, ret);
6168: return(NULL);
6169: }
6170: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6171: if (last == NULL) {
6172: if (ret != NULL)
6173: xmlFreeDocElementContent(ctxt->myDoc, ret);
6174: return(NULL);
6175: }
6176: if (RAW == '?') {
6177: last->ocur = XML_ELEMENT_CONTENT_OPT;
6178: NEXT;
6179: } else if (RAW == '*') {
6180: last->ocur = XML_ELEMENT_CONTENT_MULT;
6181: NEXT;
6182: } else if (RAW == '+') {
6183: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6184: NEXT;
6185: } else {
6186: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6187: }
6188: }
6189: SKIP_BLANKS;
6190: GROW;
6191: }
6192: if ((cur != NULL) && (last != NULL)) {
6193: cur->c2 = last;
6194: if (last != NULL)
6195: last->parent = cur;
6196: }
6197: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6198: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6199: "Element content declaration doesn't start and stop in the same entity\n",
6200: NULL, NULL);
6201: }
6202: NEXT;
6203: if (RAW == '?') {
6204: if (ret != NULL) {
6205: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6206: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6207: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6208: else
6209: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6210: }
6211: NEXT;
6212: } else if (RAW == '*') {
6213: if (ret != NULL) {
6214: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6215: cur = ret;
6216: /*
6217: * Some normalization:
6218: * (a | b* | c?)* == (a | b | c)*
6219: */
6220: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6221: if ((cur->c1 != NULL) &&
6222: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6223: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6224: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6225: if ((cur->c2 != NULL) &&
6226: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6227: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6228: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6229: cur = cur->c2;
6230: }
6231: }
6232: NEXT;
6233: } else if (RAW == '+') {
6234: if (ret != NULL) {
6235: int found = 0;
6236:
6237: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6238: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6239: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6240: else
6241: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6242: /*
6243: * Some normalization:
6244: * (a | b*)+ == (a | b)*
6245: * (a | b?)+ == (a | b)*
6246: */
6247: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6248: if ((cur->c1 != NULL) &&
6249: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6250: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6251: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6252: found = 1;
6253: }
6254: if ((cur->c2 != NULL) &&
6255: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6256: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6257: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6258: found = 1;
6259: }
6260: cur = cur->c2;
6261: }
6262: if (found)
6263: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6264: }
6265: NEXT;
6266: }
6267: return(ret);
6268: }
6269:
6270: /**
6271: * xmlParseElementChildrenContentDecl:
6272: * @ctxt: an XML parser context
6273: * @inputchk: the input used for the current entity, needed for boundary checks
6274: *
6275: * parse the declaration for a Mixed Element content
6276: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6277: *
6278: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6279: *
6280: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6281: *
6282: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6283: *
6284: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6285: *
6286: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6287: * TODO Parameter-entity replacement text must be properly nested
6288: * with parenthesized groups. That is to say, if either of the
6289: * opening or closing parentheses in a choice, seq, or Mixed
6290: * construct is contained in the replacement text for a parameter
6291: * entity, both must be contained in the same replacement text. For
6292: * interoperability, if a parameter-entity reference appears in a
6293: * choice, seq, or Mixed construct, its replacement text should not
6294: * be empty, and neither the first nor last non-blank character of
6295: * the replacement text should be a connector (| or ,).
6296: *
6297: * Returns the tree of xmlElementContentPtr describing the element
6298: * hierarchy.
6299: */
6300: xmlElementContentPtr
6301: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6302: /* stub left for API/ABI compat */
6303: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6304: }
6305:
6306: /**
6307: * xmlParseElementContentDecl:
6308: * @ctxt: an XML parser context
6309: * @name: the name of the element being defined.
6310: * @result: the Element Content pointer will be stored here if any
6311: *
6312: * parse the declaration for an Element content either Mixed or Children,
6313: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6314: *
6315: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6316: *
6317: * returns: the type of element content XML_ELEMENT_TYPE_xxx
6318: */
6319:
6320: int
6321: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6322: xmlElementContentPtr *result) {
6323:
6324: xmlElementContentPtr tree = NULL;
6325: int inputid = ctxt->input->id;
6326: int res;
6327:
6328: *result = NULL;
6329:
6330: if (RAW != '(') {
6331: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6332: "xmlParseElementContentDecl : %s '(' expected\n", name);
6333: return(-1);
6334: }
6335: NEXT;
6336: GROW;
6337: SKIP_BLANKS;
6338: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6339: tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6340: res = XML_ELEMENT_TYPE_MIXED;
6341: } else {
6342: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6343: res = XML_ELEMENT_TYPE_ELEMENT;
6344: }
6345: SKIP_BLANKS;
6346: *result = tree;
6347: return(res);
6348: }
6349:
6350: /**
6351: * xmlParseElementDecl:
6352: * @ctxt: an XML parser context
6353: *
6354: * parse an Element declaration.
6355: *
6356: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6357: *
6358: * [ VC: Unique Element Type Declaration ]
6359: * No element type may be declared more than once
6360: *
6361: * Returns the type of the element, or -1 in case of error
6362: */
6363: int
6364: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6365: const xmlChar *name;
6366: int ret = -1;
6367: xmlElementContentPtr content = NULL;
6368:
6369: /* GROW; done in the caller */
6370: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6371: xmlParserInputPtr input = ctxt->input;
6372:
6373: SKIP(9);
6374: if (!IS_BLANK_CH(CUR)) {
6375: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6376: "Space required after 'ELEMENT'\n");
6377: }
6378: SKIP_BLANKS;
6379: name = xmlParseName(ctxt);
6380: if (name == NULL) {
6381: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6382: "xmlParseElementDecl: no name for Element\n");
6383: return(-1);
6384: }
6385: while ((RAW == 0) && (ctxt->inputNr > 1))
6386: xmlPopInput(ctxt);
6387: if (!IS_BLANK_CH(CUR)) {
6388: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6389: "Space required after the element name\n");
6390: }
6391: SKIP_BLANKS;
6392: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6393: SKIP(5);
6394: /*
6395: * Element must always be empty.
6396: */
6397: ret = XML_ELEMENT_TYPE_EMPTY;
6398: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6399: (NXT(2) == 'Y')) {
6400: SKIP(3);
6401: /*
6402: * Element is a generic container.
6403: */
6404: ret = XML_ELEMENT_TYPE_ANY;
6405: } else if (RAW == '(') {
6406: ret = xmlParseElementContentDecl(ctxt, name, &content);
6407: } else {
6408: /*
6409: * [ WFC: PEs in Internal Subset ] error handling.
6410: */
6411: if ((RAW == '%') && (ctxt->external == 0) &&
6412: (ctxt->inputNr == 1)) {
6413: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6414: "PEReference: forbidden within markup decl in internal subset\n");
6415: } else {
6416: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6417: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6418: }
6419: return(-1);
6420: }
6421:
6422: SKIP_BLANKS;
6423: /*
6424: * Pop-up of finished entities.
6425: */
6426: while ((RAW == 0) && (ctxt->inputNr > 1))
6427: xmlPopInput(ctxt);
6428: SKIP_BLANKS;
6429:
6430: if (RAW != '>') {
6431: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6432: if (content != NULL) {
6433: xmlFreeDocElementContent(ctxt->myDoc, content);
6434: }
6435: } else {
6436: if (input != ctxt->input) {
6437: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6438: "Element declaration doesn't start and stop in the same entity\n");
6439: }
6440:
6441: NEXT;
6442: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6443: (ctxt->sax->elementDecl != NULL)) {
6444: if (content != NULL)
6445: content->parent = NULL;
6446: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6447: content);
6448: if ((content != NULL) && (content->parent == NULL)) {
6449: /*
6450: * this is a trick: if xmlAddElementDecl is called,
6451: * instead of copying the full tree it is plugged directly
6452: * if called from the parser. Avoid duplicating the
6453: * interfaces or change the API/ABI
6454: */
6455: xmlFreeDocElementContent(ctxt->myDoc, content);
6456: }
6457: } else if (content != NULL) {
6458: xmlFreeDocElementContent(ctxt->myDoc, content);
6459: }
6460: }
6461: }
6462: return(ret);
6463: }
6464:
6465: /**
6466: * xmlParseConditionalSections
6467: * @ctxt: an XML parser context
6468: *
6469: * [61] conditionalSect ::= includeSect | ignoreSect
6470: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6471: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6472: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6473: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6474: */
6475:
6476: static void
6477: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6478: int id = ctxt->input->id;
6479:
6480: SKIP(3);
6481: SKIP_BLANKS;
6482: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6483: SKIP(7);
6484: SKIP_BLANKS;
6485: if (RAW != '[') {
6486: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6487: } else {
6488: if (ctxt->input->id != id) {
6489: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6490: "All markup of the conditional section is not in the same entity\n",
6491: NULL, NULL);
6492: }
6493: NEXT;
6494: }
6495: if (xmlParserDebugEntities) {
6496: if ((ctxt->input != NULL) && (ctxt->input->filename))
6497: xmlGenericError(xmlGenericErrorContext,
6498: "%s(%d): ", ctxt->input->filename,
6499: ctxt->input->line);
6500: xmlGenericError(xmlGenericErrorContext,
6501: "Entering INCLUDE Conditional Section\n");
6502: }
6503:
6504: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6505: (NXT(2) != '>'))) {
6506: const xmlChar *check = CUR_PTR;
6507: unsigned int cons = ctxt->input->consumed;
6508:
6509: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6510: xmlParseConditionalSections(ctxt);
6511: } else if (IS_BLANK_CH(CUR)) {
6512: NEXT;
6513: } else if (RAW == '%') {
6514: xmlParsePEReference(ctxt);
6515: } else
6516: xmlParseMarkupDecl(ctxt);
6517:
6518: /*
6519: * Pop-up of finished entities.
6520: */
6521: while ((RAW == 0) && (ctxt->inputNr > 1))
6522: xmlPopInput(ctxt);
6523:
6524: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6525: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6526: break;
6527: }
6528: }
6529: if (xmlParserDebugEntities) {
6530: if ((ctxt->input != NULL) && (ctxt->input->filename))
6531: xmlGenericError(xmlGenericErrorContext,
6532: "%s(%d): ", ctxt->input->filename,
6533: ctxt->input->line);
6534: xmlGenericError(xmlGenericErrorContext,
6535: "Leaving INCLUDE Conditional Section\n");
6536: }
6537:
6538: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6539: int state;
6540: xmlParserInputState instate;
6541: int depth = 0;
6542:
6543: SKIP(6);
6544: SKIP_BLANKS;
6545: if (RAW != '[') {
6546: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6547: } else {
6548: if (ctxt->input->id != id) {
6549: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6550: "All markup of the conditional section is not in the same entity\n",
6551: NULL, NULL);
6552: }
6553: NEXT;
6554: }
6555: if (xmlParserDebugEntities) {
6556: if ((ctxt->input != NULL) && (ctxt->input->filename))
6557: xmlGenericError(xmlGenericErrorContext,
6558: "%s(%d): ", ctxt->input->filename,
6559: ctxt->input->line);
6560: xmlGenericError(xmlGenericErrorContext,
6561: "Entering IGNORE Conditional Section\n");
6562: }
6563:
6564: /*
6565: * Parse up to the end of the conditional section
6566: * But disable SAX event generating DTD building in the meantime
6567: */
6568: state = ctxt->disableSAX;
6569: instate = ctxt->instate;
6570: if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6571: ctxt->instate = XML_PARSER_IGNORE;
6572:
6573: while ((depth >= 0) && (RAW != 0)) {
6574: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6575: depth++;
6576: SKIP(3);
6577: continue;
6578: }
6579: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6580: if (--depth >= 0) SKIP(3);
6581: continue;
6582: }
6583: NEXT;
6584: continue;
6585: }
6586:
6587: ctxt->disableSAX = state;
6588: ctxt->instate = instate;
6589:
6590: if (xmlParserDebugEntities) {
6591: if ((ctxt->input != NULL) && (ctxt->input->filename))
6592: xmlGenericError(xmlGenericErrorContext,
6593: "%s(%d): ", ctxt->input->filename,
6594: ctxt->input->line);
6595: xmlGenericError(xmlGenericErrorContext,
6596: "Leaving IGNORE Conditional Section\n");
6597: }
6598:
6599: } else {
6600: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6601: }
6602:
6603: if (RAW == 0)
6604: SHRINK;
6605:
6606: if (RAW == 0) {
6607: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6608: } else {
6609: if (ctxt->input->id != id) {
6610: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6611: "All markup of the conditional section is not in the same entity\n",
6612: NULL, NULL);
6613: }
6614: SKIP(3);
6615: }
6616: }
6617:
6618: /**
6619: * xmlParseMarkupDecl:
6620: * @ctxt: an XML parser context
6621: *
6622: * parse Markup declarations
6623: *
6624: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6625: * NotationDecl | PI | Comment
6626: *
6627: * [ VC: Proper Declaration/PE Nesting ]
6628: * Parameter-entity replacement text must be properly nested with
6629: * markup declarations. That is to say, if either the first character
6630: * or the last character of a markup declaration (markupdecl above) is
6631: * contained in the replacement text for a parameter-entity reference,
6632: * both must be contained in the same replacement text.
6633: *
6634: * [ WFC: PEs in Internal Subset ]
6635: * In the internal DTD subset, parameter-entity references can occur
6636: * only where markup declarations can occur, not within markup declarations.
6637: * (This does not apply to references that occur in external parameter
6638: * entities or to the external subset.)
6639: */
6640: void
6641: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6642: GROW;
6643: if (CUR == '<') {
6644: if (NXT(1) == '!') {
6645: switch (NXT(2)) {
6646: case 'E':
6647: if (NXT(3) == 'L')
6648: xmlParseElementDecl(ctxt);
6649: else if (NXT(3) == 'N')
6650: xmlParseEntityDecl(ctxt);
6651: break;
6652: case 'A':
6653: xmlParseAttributeListDecl(ctxt);
6654: break;
6655: case 'N':
6656: xmlParseNotationDecl(ctxt);
6657: break;
6658: case '-':
6659: xmlParseComment(ctxt);
6660: break;
6661: default:
6662: /* there is an error but it will be detected later */
6663: break;
6664: }
6665: } else if (NXT(1) == '?') {
6666: xmlParsePI(ctxt);
6667: }
6668: }
6669: /*
6670: * This is only for internal subset. On external entities,
6671: * the replacement is done before parsing stage
6672: */
6673: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6674: xmlParsePEReference(ctxt);
6675:
6676: /*
6677: * Conditional sections are allowed from entities included
6678: * by PE References in the internal subset.
6679: */
6680: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6681: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6682: xmlParseConditionalSections(ctxt);
6683: }
6684: }
6685:
6686: ctxt->instate = XML_PARSER_DTD;
6687: }
6688:
6689: /**
6690: * xmlParseTextDecl:
6691: * @ctxt: an XML parser context
6692: *
6693: * parse an XML declaration header for external entities
6694: *
6695: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6696: */
6697:
6698: void
6699: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6700: xmlChar *version;
6701: const xmlChar *encoding;
6702:
6703: /*
6704: * We know that '<?xml' is here.
6705: */
6706: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6707: SKIP(5);
6708: } else {
6709: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6710: return;
6711: }
6712:
6713: if (!IS_BLANK_CH(CUR)) {
6714: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6715: "Space needed after '<?xml'\n");
6716: }
6717: SKIP_BLANKS;
6718:
6719: /*
6720: * We may have the VersionInfo here.
6721: */
6722: version = xmlParseVersionInfo(ctxt);
6723: if (version == NULL)
6724: version = xmlCharStrdup(XML_DEFAULT_VERSION);
6725: else {
6726: if (!IS_BLANK_CH(CUR)) {
6727: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6728: "Space needed here\n");
6729: }
6730: }
6731: ctxt->input->version = version;
6732:
6733: /*
6734: * We must have the encoding declaration
6735: */
6736: encoding = xmlParseEncodingDecl(ctxt);
6737: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6738: /*
6739: * The XML REC instructs us to stop parsing right here
6740: */
6741: return;
6742: }
6743: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6744: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6745: "Missing encoding in text declaration\n");
6746: }
6747:
6748: SKIP_BLANKS;
6749: if ((RAW == '?') && (NXT(1) == '>')) {
6750: SKIP(2);
6751: } else if (RAW == '>') {
6752: /* Deprecated old WD ... */
6753: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6754: NEXT;
6755: } else {
6756: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6757: MOVETO_ENDTAG(CUR_PTR);
6758: NEXT;
6759: }
6760: }
6761:
6762: /**
6763: * xmlParseExternalSubset:
6764: * @ctxt: an XML parser context
6765: * @ExternalID: the external identifier
6766: * @SystemID: the system identifier (or URL)
6767: *
6768: * parse Markup declarations from an external subset
6769: *
6770: * [30] extSubset ::= textDecl? extSubsetDecl
6771: *
6772: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6773: */
6774: void
6775: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6776: const xmlChar *SystemID) {
6777: xmlDetectSAX2(ctxt);
6778: GROW;
6779:
6780: if ((ctxt->encoding == NULL) &&
6781: (ctxt->input->end - ctxt->input->cur >= 4)) {
6782: xmlChar start[4];
6783: xmlCharEncoding enc;
6784:
6785: start[0] = RAW;
6786: start[1] = NXT(1);
6787: start[2] = NXT(2);
6788: start[3] = NXT(3);
6789: enc = xmlDetectCharEncoding(start, 4);
6790: if (enc != XML_CHAR_ENCODING_NONE)
6791: xmlSwitchEncoding(ctxt, enc);
6792: }
6793:
6794: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6795: xmlParseTextDecl(ctxt);
6796: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6797: /*
6798: * The XML REC instructs us to stop parsing right here
6799: */
6800: ctxt->instate = XML_PARSER_EOF;
6801: return;
6802: }
6803: }
6804: if (ctxt->myDoc == NULL) {
6805: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6806: if (ctxt->myDoc == NULL) {
6807: xmlErrMemory(ctxt, "New Doc failed");
6808: return;
6809: }
6810: ctxt->myDoc->properties = XML_DOC_INTERNAL;
6811: }
6812: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6813: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6814:
6815: ctxt->instate = XML_PARSER_DTD;
6816: ctxt->external = 1;
6817: while (((RAW == '<') && (NXT(1) == '?')) ||
6818: ((RAW == '<') && (NXT(1) == '!')) ||
6819: (RAW == '%') || IS_BLANK_CH(CUR)) {
6820: const xmlChar *check = CUR_PTR;
6821: unsigned int cons = ctxt->input->consumed;
6822:
6823: GROW;
6824: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6825: xmlParseConditionalSections(ctxt);
6826: } else if (IS_BLANK_CH(CUR)) {
6827: NEXT;
6828: } else if (RAW == '%') {
6829: xmlParsePEReference(ctxt);
6830: } else
6831: xmlParseMarkupDecl(ctxt);
6832:
6833: /*
6834: * Pop-up of finished entities.
6835: */
6836: while ((RAW == 0) && (ctxt->inputNr > 1))
6837: xmlPopInput(ctxt);
6838:
6839: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6840: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6841: break;
6842: }
6843: }
6844:
6845: if (RAW != 0) {
6846: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6847: }
6848:
6849: }
6850:
6851: /**
6852: * xmlParseReference:
6853: * @ctxt: an XML parser context
6854: *
6855: * parse and handle entity references in content, depending on the SAX
6856: * interface, this may end-up in a call to character() if this is a
6857: * CharRef, a predefined entity, if there is no reference() callback.
6858: * or if the parser was asked to switch to that mode.
6859: *
6860: * [67] Reference ::= EntityRef | CharRef
6861: */
6862: void
6863: xmlParseReference(xmlParserCtxtPtr ctxt) {
6864: xmlEntityPtr ent;
6865: xmlChar *val;
6866: int was_checked;
6867: xmlNodePtr list = NULL;
6868: xmlParserErrors ret = XML_ERR_OK;
6869:
6870:
6871: if (RAW != '&')
6872: return;
6873:
6874: /*
6875: * Simple case of a CharRef
6876: */
6877: if (NXT(1) == '#') {
6878: int i = 0;
6879: xmlChar out[10];
6880: int hex = NXT(2);
6881: int value = xmlParseCharRef(ctxt);
6882:
6883: if (value == 0)
6884: return;
6885: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6886: /*
6887: * So we are using non-UTF-8 buffers
6888: * Check that the char fit on 8bits, if not
6889: * generate a CharRef.
6890: */
6891: if (value <= 0xFF) {
6892: out[0] = value;
6893: out[1] = 0;
6894: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6895: (!ctxt->disableSAX))
6896: ctxt->sax->characters(ctxt->userData, out, 1);
6897: } else {
6898: if ((hex == 'x') || (hex == 'X'))
6899: snprintf((char *)out, sizeof(out), "#x%X", value);
6900: else
6901: snprintf((char *)out, sizeof(out), "#%d", value);
6902: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6903: (!ctxt->disableSAX))
6904: ctxt->sax->reference(ctxt->userData, out);
6905: }
6906: } else {
6907: /*
6908: * Just encode the value in UTF-8
6909: */
6910: COPY_BUF(0 ,out, i, value);
6911: out[i] = 0;
6912: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6913: (!ctxt->disableSAX))
6914: ctxt->sax->characters(ctxt->userData, out, i);
6915: }
6916: return;
6917: }
6918:
6919: /*
6920: * We are seeing an entity reference
6921: */
6922: ent = xmlParseEntityRef(ctxt);
6923: if (ent == NULL) return;
6924: if (!ctxt->wellFormed)
6925: return;
6926: was_checked = ent->checked;
6927:
6928: /* special case of predefined entities */
6929: if ((ent->name == NULL) ||
6930: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6931: val = ent->content;
6932: if (val == NULL) return;
6933: /*
6934: * inline the entity.
6935: */
6936: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6937: (!ctxt->disableSAX))
6938: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6939: return;
6940: }
6941:
6942: /*
6943: * The first reference to the entity trigger a parsing phase
6944: * where the ent->children is filled with the result from
6945: * the parsing.
6946: */
6947: if (ent->checked == 0) {
6948: unsigned long oldnbent = ctxt->nbentities;
6949:
6950: /*
6951: * This is a bit hackish but this seems the best
6952: * way to make sure both SAX and DOM entity support
6953: * behaves okay.
6954: */
6955: void *user_data;
6956: if (ctxt->userData == ctxt)
6957: user_data = NULL;
6958: else
6959: user_data = ctxt->userData;
6960:
6961: /*
6962: * Check that this entity is well formed
6963: * 4.3.2: An internal general parsed entity is well-formed
6964: * if its replacement text matches the production labeled
6965: * content.
6966: */
6967: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6968: ctxt->depth++;
6969: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6970: user_data, &list);
6971: ctxt->depth--;
6972:
6973: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6974: ctxt->depth++;
6975: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6976: user_data, ctxt->depth, ent->URI,
6977: ent->ExternalID, &list);
6978: ctxt->depth--;
6979: } else {
6980: ret = XML_ERR_ENTITY_PE_INTERNAL;
6981: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6982: "invalid entity type found\n", NULL);
6983: }
6984:
6985: /*
6986: * Store the number of entities needing parsing for this entity
6987: * content and do checkings
6988: */
6989: ent->checked = ctxt->nbentities - oldnbent;
6990: if (ret == XML_ERR_ENTITY_LOOP) {
6991: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6992: xmlFreeNodeList(list);
6993: return;
6994: }
1.1.1.2.2.1! misho 6995: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
1.1 misho 6996: xmlFreeNodeList(list);
6997: return;
6998: }
6999:
7000: if ((ret == XML_ERR_OK) && (list != NULL)) {
7001: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7002: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7003: (ent->children == NULL)) {
7004: ent->children = list;
7005: if (ctxt->replaceEntities) {
7006: /*
7007: * Prune it directly in the generated document
7008: * except for single text nodes.
7009: */
7010: if (((list->type == XML_TEXT_NODE) &&
7011: (list->next == NULL)) ||
7012: (ctxt->parseMode == XML_PARSE_READER)) {
7013: list->parent = (xmlNodePtr) ent;
7014: list = NULL;
7015: ent->owner = 1;
7016: } else {
7017: ent->owner = 0;
7018: while (list != NULL) {
7019: list->parent = (xmlNodePtr) ctxt->node;
7020: list->doc = ctxt->myDoc;
7021: if (list->next == NULL)
7022: ent->last = list;
7023: list = list->next;
7024: }
7025: list = ent->children;
7026: #ifdef LIBXML_LEGACY_ENABLED
7027: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7028: xmlAddEntityReference(ent, list, NULL);
7029: #endif /* LIBXML_LEGACY_ENABLED */
7030: }
7031: } else {
7032: ent->owner = 1;
7033: while (list != NULL) {
7034: list->parent = (xmlNodePtr) ent;
1.1.1.2 misho 7035: xmlSetTreeDoc(list, ent->doc);
1.1 misho 7036: if (list->next == NULL)
7037: ent->last = list;
7038: list = list->next;
7039: }
7040: }
7041: } else {
7042: xmlFreeNodeList(list);
7043: list = NULL;
7044: }
7045: } else if ((ret != XML_ERR_OK) &&
7046: (ret != XML_WAR_UNDECLARED_ENTITY)) {
7047: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7048: "Entity '%s' failed to parse\n", ent->name);
7049: } else if (list != NULL) {
7050: xmlFreeNodeList(list);
7051: list = NULL;
7052: }
7053: if (ent->checked == 0)
7054: ent->checked = 1;
7055: } else if (ent->checked != 1) {
7056: ctxt->nbentities += ent->checked;
7057: }
7058:
7059: /*
7060: * Now that the entity content has been gathered
7061: * provide it to the application, this can take different forms based
7062: * on the parsing modes.
7063: */
7064: if (ent->children == NULL) {
7065: /*
7066: * Probably running in SAX mode and the callbacks don't
7067: * build the entity content. So unless we already went
7068: * though parsing for first checking go though the entity
7069: * content to generate callbacks associated to the entity
7070: */
7071: if (was_checked != 0) {
7072: void *user_data;
7073: /*
7074: * This is a bit hackish but this seems the best
7075: * way to make sure both SAX and DOM entity support
7076: * behaves okay.
7077: */
7078: if (ctxt->userData == ctxt)
7079: user_data = NULL;
7080: else
7081: user_data = ctxt->userData;
7082:
7083: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7084: ctxt->depth++;
7085: ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7086: ent->content, user_data, NULL);
7087: ctxt->depth--;
7088: } else if (ent->etype ==
7089: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7090: ctxt->depth++;
7091: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7092: ctxt->sax, user_data, ctxt->depth,
7093: ent->URI, ent->ExternalID, NULL);
7094: ctxt->depth--;
7095: } else {
7096: ret = XML_ERR_ENTITY_PE_INTERNAL;
7097: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7098: "invalid entity type found\n", NULL);
7099: }
7100: if (ret == XML_ERR_ENTITY_LOOP) {
7101: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7102: return;
7103: }
7104: }
7105: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7106: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7107: /*
7108: * Entity reference callback comes second, it's somewhat
7109: * superfluous but a compatibility to historical behaviour
7110: */
7111: ctxt->sax->reference(ctxt->userData, ent->name);
7112: }
7113: return;
7114: }
7115:
7116: /*
7117: * If we didn't get any children for the entity being built
7118: */
7119: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7120: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7121: /*
7122: * Create a node.
7123: */
7124: ctxt->sax->reference(ctxt->userData, ent->name);
7125: return;
7126: }
7127:
7128: if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7129: /*
7130: * There is a problem on the handling of _private for entities
7131: * (bug 155816): Should we copy the content of the field from
7132: * the entity (possibly overwriting some value set by the user
7133: * when a copy is created), should we leave it alone, or should
7134: * we try to take care of different situations? The problem
7135: * is exacerbated by the usage of this field by the xmlReader.
7136: * To fix this bug, we look at _private on the created node
7137: * and, if it's NULL, we copy in whatever was in the entity.
7138: * If it's not NULL we leave it alone. This is somewhat of a
7139: * hack - maybe we should have further tests to determine
7140: * what to do.
7141: */
7142: if ((ctxt->node != NULL) && (ent->children != NULL)) {
7143: /*
7144: * Seems we are generating the DOM content, do
7145: * a simple tree copy for all references except the first
7146: * In the first occurrence list contains the replacement.
7147: * progressive == 2 means we are operating on the Reader
7148: * and since nodes are discarded we must copy all the time.
7149: */
7150: if (((list == NULL) && (ent->owner == 0)) ||
7151: (ctxt->parseMode == XML_PARSE_READER)) {
7152: xmlNodePtr nw = NULL, cur, firstChild = NULL;
7153:
7154: /*
1.1.1.2.2.1! misho 7155: * We are copying here, make sure there is no abuse
! 7156: */
! 7157: ctxt->sizeentcopy += ent->length;
! 7158: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
! 7159: return;
! 7160:
! 7161: /*
1.1 misho 7162: * when operating on a reader, the entities definitions
7163: * are always owning the entities subtree.
7164: if (ctxt->parseMode == XML_PARSE_READER)
7165: ent->owner = 1;
7166: */
7167:
7168: cur = ent->children;
7169: while (cur != NULL) {
7170: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7171: if (nw != NULL) {
7172: if (nw->_private == NULL)
7173: nw->_private = cur->_private;
7174: if (firstChild == NULL){
7175: firstChild = nw;
7176: }
7177: nw = xmlAddChild(ctxt->node, nw);
7178: }
7179: if (cur == ent->last) {
7180: /*
7181: * needed to detect some strange empty
7182: * node cases in the reader tests
7183: */
7184: if ((ctxt->parseMode == XML_PARSE_READER) &&
7185: (nw != NULL) &&
7186: (nw->type == XML_ELEMENT_NODE) &&
7187: (nw->children == NULL))
7188: nw->extra = 1;
7189:
7190: break;
7191: }
7192: cur = cur->next;
7193: }
7194: #ifdef LIBXML_LEGACY_ENABLED
7195: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7196: xmlAddEntityReference(ent, firstChild, nw);
7197: #endif /* LIBXML_LEGACY_ENABLED */
7198: } else if (list == NULL) {
7199: xmlNodePtr nw = NULL, cur, next, last,
7200: firstChild = NULL;
1.1.1.2.2.1! misho 7201:
! 7202: /*
! 7203: * We are copying here, make sure there is no abuse
! 7204: */
! 7205: ctxt->sizeentcopy += ent->length;
! 7206: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
! 7207: return;
! 7208:
1.1 misho 7209: /*
7210: * Copy the entity child list and make it the new
7211: * entity child list. The goal is to make sure any
7212: * ID or REF referenced will be the one from the
7213: * document content and not the entity copy.
7214: */
7215: cur = ent->children;
7216: ent->children = NULL;
7217: last = ent->last;
7218: ent->last = NULL;
7219: while (cur != NULL) {
7220: next = cur->next;
7221: cur->next = NULL;
7222: cur->parent = NULL;
7223: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7224: if (nw != NULL) {
7225: if (nw->_private == NULL)
7226: nw->_private = cur->_private;
7227: if (firstChild == NULL){
7228: firstChild = cur;
7229: }
7230: xmlAddChild((xmlNodePtr) ent, nw);
7231: xmlAddChild(ctxt->node, cur);
7232: }
7233: if (cur == last)
7234: break;
7235: cur = next;
7236: }
7237: if (ent->owner == 0)
7238: ent->owner = 1;
7239: #ifdef LIBXML_LEGACY_ENABLED
7240: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7241: xmlAddEntityReference(ent, firstChild, nw);
7242: #endif /* LIBXML_LEGACY_ENABLED */
7243: } else {
7244: const xmlChar *nbktext;
7245:
7246: /*
7247: * the name change is to avoid coalescing of the
7248: * node with a possible previous text one which
7249: * would make ent->children a dangling pointer
7250: */
7251: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7252: -1);
7253: if (ent->children->type == XML_TEXT_NODE)
7254: ent->children->name = nbktext;
7255: if ((ent->last != ent->children) &&
7256: (ent->last->type == XML_TEXT_NODE))
7257: ent->last->name = nbktext;
7258: xmlAddChildList(ctxt->node, ent->children);
7259: }
7260:
7261: /*
7262: * This is to avoid a nasty side effect, see
7263: * characters() in SAX.c
7264: */
7265: ctxt->nodemem = 0;
7266: ctxt->nodelen = 0;
7267: return;
7268: }
7269: }
7270: }
7271:
7272: /**
7273: * xmlParseEntityRef:
7274: * @ctxt: an XML parser context
7275: *
7276: * parse ENTITY references declarations
7277: *
7278: * [68] EntityRef ::= '&' Name ';'
7279: *
7280: * [ WFC: Entity Declared ]
7281: * In a document without any DTD, a document with only an internal DTD
7282: * subset which contains no parameter entity references, or a document
7283: * with "standalone='yes'", the Name given in the entity reference
7284: * must match that in an entity declaration, except that well-formed
7285: * documents need not declare any of the following entities: amp, lt,
7286: * gt, apos, quot. The declaration of a parameter entity must precede
7287: * any reference to it. Similarly, the declaration of a general entity
7288: * must precede any reference to it which appears in a default value in an
7289: * attribute-list declaration. Note that if entities are declared in the
7290: * external subset or in external parameter entities, a non-validating
7291: * processor is not obligated to read and process their declarations;
7292: * for such documents, the rule that an entity must be declared is a
7293: * well-formedness constraint only if standalone='yes'.
7294: *
7295: * [ WFC: Parsed Entity ]
7296: * An entity reference must not contain the name of an unparsed entity
7297: *
7298: * Returns the xmlEntityPtr if found, or NULL otherwise.
7299: */
7300: xmlEntityPtr
7301: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7302: const xmlChar *name;
7303: xmlEntityPtr ent = NULL;
7304:
7305: GROW;
7306:
7307: if (RAW != '&')
7308: return(NULL);
7309: NEXT;
7310: name = xmlParseName(ctxt);
7311: if (name == NULL) {
7312: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7313: "xmlParseEntityRef: no name\n");
7314: return(NULL);
7315: }
7316: if (RAW != ';') {
7317: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7318: return(NULL);
7319: }
7320: NEXT;
7321:
7322: /*
7323: * Predefined entites override any extra definition
7324: */
7325: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7326: ent = xmlGetPredefinedEntity(name);
7327: if (ent != NULL)
7328: return(ent);
7329: }
7330:
7331: /*
7332: * Increate the number of entity references parsed
7333: */
7334: ctxt->nbentities++;
7335:
7336: /*
7337: * Ask first SAX for entity resolution, otherwise try the
7338: * entities which may have stored in the parser context.
7339: */
7340: if (ctxt->sax != NULL) {
7341: if (ctxt->sax->getEntity != NULL)
7342: ent = ctxt->sax->getEntity(ctxt->userData, name);
7343: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7344: (ctxt->options & XML_PARSE_OLDSAX))
7345: ent = xmlGetPredefinedEntity(name);
7346: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7347: (ctxt->userData==ctxt)) {
7348: ent = xmlSAX2GetEntity(ctxt, name);
7349: }
7350: }
7351: /*
7352: * [ WFC: Entity Declared ]
7353: * In a document without any DTD, a document with only an
7354: * internal DTD subset which contains no parameter entity
7355: * references, or a document with "standalone='yes'", the
7356: * Name given in the entity reference must match that in an
7357: * entity declaration, except that well-formed documents
7358: * need not declare any of the following entities: amp, lt,
7359: * gt, apos, quot.
7360: * The declaration of a parameter entity must precede any
7361: * reference to it.
7362: * Similarly, the declaration of a general entity must
7363: * precede any reference to it which appears in a default
7364: * value in an attribute-list declaration. Note that if
7365: * entities are declared in the external subset or in
7366: * external parameter entities, a non-validating processor
7367: * is not obligated to read and process their declarations;
7368: * for such documents, the rule that an entity must be
7369: * declared is a well-formedness constraint only if
7370: * standalone='yes'.
7371: */
7372: if (ent == NULL) {
7373: if ((ctxt->standalone == 1) ||
7374: ((ctxt->hasExternalSubset == 0) &&
7375: (ctxt->hasPErefs == 0))) {
7376: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7377: "Entity '%s' not defined\n", name);
7378: } else {
7379: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7380: "Entity '%s' not defined\n", name);
7381: if ((ctxt->inSubset == 0) &&
7382: (ctxt->sax != NULL) &&
7383: (ctxt->sax->reference != NULL)) {
7384: ctxt->sax->reference(ctxt->userData, name);
7385: }
7386: }
7387: ctxt->valid = 0;
7388: }
7389:
7390: /*
7391: * [ WFC: Parsed Entity ]
7392: * An entity reference must not contain the name of an
7393: * unparsed entity
7394: */
7395: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7396: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7397: "Entity reference to unparsed entity %s\n", name);
7398: }
7399:
7400: /*
7401: * [ WFC: No External Entity References ]
7402: * Attribute values cannot contain direct or indirect
7403: * entity references to external entities.
7404: */
7405: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7406: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7407: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7408: "Attribute references external entity '%s'\n", name);
7409: }
7410: /*
7411: * [ WFC: No < in Attribute Values ]
7412: * The replacement text of any entity referred to directly or
7413: * indirectly in an attribute value (other than "<") must
7414: * not contain a <.
7415: */
7416: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7417: (ent != NULL) && (ent->content != NULL) &&
7418: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7419: (xmlStrchr(ent->content, '<'))) {
7420: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7421: "'<' in entity '%s' is not allowed in attributes values\n", name);
7422: }
7423:
7424: /*
7425: * Internal check, no parameter entities here ...
7426: */
7427: else {
7428: switch (ent->etype) {
7429: case XML_INTERNAL_PARAMETER_ENTITY:
7430: case XML_EXTERNAL_PARAMETER_ENTITY:
7431: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7432: "Attempt to reference the parameter entity '%s'\n",
7433: name);
7434: break;
7435: default:
7436: break;
7437: }
7438: }
7439:
7440: /*
7441: * [ WFC: No Recursion ]
7442: * A parsed entity must not contain a recursive reference
7443: * to itself, either directly or indirectly.
7444: * Done somewhere else
7445: */
7446: return(ent);
7447: }
7448:
7449: /**
7450: * xmlParseStringEntityRef:
7451: * @ctxt: an XML parser context
7452: * @str: a pointer to an index in the string
7453: *
7454: * parse ENTITY references declarations, but this version parses it from
7455: * a string value.
7456: *
7457: * [68] EntityRef ::= '&' Name ';'
7458: *
7459: * [ WFC: Entity Declared ]
7460: * In a document without any DTD, a document with only an internal DTD
7461: * subset which contains no parameter entity references, or a document
7462: * with "standalone='yes'", the Name given in the entity reference
7463: * must match that in an entity declaration, except that well-formed
7464: * documents need not declare any of the following entities: amp, lt,
7465: * gt, apos, quot. The declaration of a parameter entity must precede
7466: * any reference to it. Similarly, the declaration of a general entity
7467: * must precede any reference to it which appears in a default value in an
7468: * attribute-list declaration. Note that if entities are declared in the
7469: * external subset or in external parameter entities, a non-validating
7470: * processor is not obligated to read and process their declarations;
7471: * for such documents, the rule that an entity must be declared is a
7472: * well-formedness constraint only if standalone='yes'.
7473: *
7474: * [ WFC: Parsed Entity ]
7475: * An entity reference must not contain the name of an unparsed entity
7476: *
7477: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7478: * is updated to the current location in the string.
7479: */
7480: static xmlEntityPtr
7481: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7482: xmlChar *name;
7483: const xmlChar *ptr;
7484: xmlChar cur;
7485: xmlEntityPtr ent = NULL;
7486:
7487: if ((str == NULL) || (*str == NULL))
7488: return(NULL);
7489: ptr = *str;
7490: cur = *ptr;
7491: if (cur != '&')
7492: return(NULL);
7493:
7494: ptr++;
7495: name = xmlParseStringName(ctxt, &ptr);
7496: if (name == NULL) {
7497: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498: "xmlParseStringEntityRef: no name\n");
7499: *str = ptr;
7500: return(NULL);
7501: }
7502: if (*ptr != ';') {
7503: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504: xmlFree(name);
7505: *str = ptr;
7506: return(NULL);
7507: }
7508: ptr++;
7509:
7510:
7511: /*
7512: * Predefined entites override any extra definition
7513: */
7514: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7515: ent = xmlGetPredefinedEntity(name);
7516: if (ent != NULL) {
7517: xmlFree(name);
7518: *str = ptr;
7519: return(ent);
7520: }
7521: }
7522:
7523: /*
7524: * Increate the number of entity references parsed
7525: */
7526: ctxt->nbentities++;
7527:
7528: /*
7529: * Ask first SAX for entity resolution, otherwise try the
7530: * entities which may have stored in the parser context.
7531: */
7532: if (ctxt->sax != NULL) {
7533: if (ctxt->sax->getEntity != NULL)
7534: ent = ctxt->sax->getEntity(ctxt->userData, name);
7535: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7536: ent = xmlGetPredefinedEntity(name);
7537: if ((ent == NULL) && (ctxt->userData==ctxt)) {
7538: ent = xmlSAX2GetEntity(ctxt, name);
7539: }
7540: }
7541:
7542: /*
7543: * [ WFC: Entity Declared ]
7544: * In a document without any DTD, a document with only an
7545: * internal DTD subset which contains no parameter entity
7546: * references, or a document with "standalone='yes'", the
7547: * Name given in the entity reference must match that in an
7548: * entity declaration, except that well-formed documents
7549: * need not declare any of the following entities: amp, lt,
7550: * gt, apos, quot.
7551: * The declaration of a parameter entity must precede any
7552: * reference to it.
7553: * Similarly, the declaration of a general entity must
7554: * precede any reference to it which appears in a default
7555: * value in an attribute-list declaration. Note that if
7556: * entities are declared in the external subset or in
7557: * external parameter entities, a non-validating processor
7558: * is not obligated to read and process their declarations;
7559: * for such documents, the rule that an entity must be
7560: * declared is a well-formedness constraint only if
7561: * standalone='yes'.
7562: */
7563: if (ent == NULL) {
7564: if ((ctxt->standalone == 1) ||
7565: ((ctxt->hasExternalSubset == 0) &&
7566: (ctxt->hasPErefs == 0))) {
7567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7568: "Entity '%s' not defined\n", name);
7569: } else {
7570: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7571: "Entity '%s' not defined\n",
7572: name);
7573: }
7574: /* TODO ? check regressions ctxt->valid = 0; */
7575: }
7576:
7577: /*
7578: * [ WFC: Parsed Entity ]
7579: * An entity reference must not contain the name of an
7580: * unparsed entity
7581: */
7582: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7583: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7584: "Entity reference to unparsed entity %s\n", name);
7585: }
7586:
7587: /*
7588: * [ WFC: No External Entity References ]
7589: * Attribute values cannot contain direct or indirect
7590: * entity references to external entities.
7591: */
7592: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7593: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7594: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7595: "Attribute references external entity '%s'\n", name);
7596: }
7597: /*
7598: * [ WFC: No < in Attribute Values ]
7599: * The replacement text of any entity referred to directly or
7600: * indirectly in an attribute value (other than "<") must
7601: * not contain a <.
7602: */
7603: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7604: (ent != NULL) && (ent->content != NULL) &&
7605: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7606: (xmlStrchr(ent->content, '<'))) {
7607: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7608: "'<' in entity '%s' is not allowed in attributes values\n",
7609: name);
7610: }
7611:
7612: /*
7613: * Internal check, no parameter entities here ...
7614: */
7615: else {
7616: switch (ent->etype) {
7617: case XML_INTERNAL_PARAMETER_ENTITY:
7618: case XML_EXTERNAL_PARAMETER_ENTITY:
7619: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7620: "Attempt to reference the parameter entity '%s'\n",
7621: name);
7622: break;
7623: default:
7624: break;
7625: }
7626: }
7627:
7628: /*
7629: * [ WFC: No Recursion ]
7630: * A parsed entity must not contain a recursive reference
7631: * to itself, either directly or indirectly.
7632: * Done somewhere else
7633: */
7634:
7635: xmlFree(name);
7636: *str = ptr;
7637: return(ent);
7638: }
7639:
7640: /**
7641: * xmlParsePEReference:
7642: * @ctxt: an XML parser context
7643: *
7644: * parse PEReference declarations
7645: * The entity content is handled directly by pushing it's content as
7646: * a new input stream.
7647: *
7648: * [69] PEReference ::= '%' Name ';'
7649: *
7650: * [ WFC: No Recursion ]
7651: * A parsed entity must not contain a recursive
7652: * reference to itself, either directly or indirectly.
7653: *
7654: * [ WFC: Entity Declared ]
7655: * In a document without any DTD, a document with only an internal DTD
7656: * subset which contains no parameter entity references, or a document
7657: * with "standalone='yes'", ... ... The declaration of a parameter
7658: * entity must precede any reference to it...
7659: *
7660: * [ VC: Entity Declared ]
7661: * In a document with an external subset or external parameter entities
7662: * with "standalone='no'", ... ... The declaration of a parameter entity
7663: * must precede any reference to it...
7664: *
7665: * [ WFC: In DTD ]
7666: * Parameter-entity references may only appear in the DTD.
7667: * NOTE: misleading but this is handled.
7668: */
7669: void
7670: xmlParsePEReference(xmlParserCtxtPtr ctxt)
7671: {
7672: const xmlChar *name;
7673: xmlEntityPtr entity = NULL;
7674: xmlParserInputPtr input;
7675:
7676: if (RAW != '%')
7677: return;
7678: NEXT;
7679: name = xmlParseName(ctxt);
7680: if (name == NULL) {
7681: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7682: "xmlParsePEReference: no name\n");
7683: return;
7684: }
7685: if (RAW != ';') {
7686: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7687: return;
7688: }
7689:
7690: NEXT;
7691:
7692: /*
7693: * Increate the number of entity references parsed
7694: */
7695: ctxt->nbentities++;
7696:
7697: /*
7698: * Request the entity from SAX
7699: */
7700: if ((ctxt->sax != NULL) &&
7701: (ctxt->sax->getParameterEntity != NULL))
7702: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7703: name);
7704: if (entity == NULL) {
7705: /*
7706: * [ WFC: Entity Declared ]
7707: * In a document without any DTD, a document with only an
7708: * internal DTD subset which contains no parameter entity
7709: * references, or a document with "standalone='yes'", ...
7710: * ... The declaration of a parameter entity must precede
7711: * any reference to it...
7712: */
7713: if ((ctxt->standalone == 1) ||
7714: ((ctxt->hasExternalSubset == 0) &&
7715: (ctxt->hasPErefs == 0))) {
7716: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7717: "PEReference: %%%s; not found\n",
7718: name);
7719: } else {
7720: /*
7721: * [ VC: Entity Declared ]
7722: * In a document with an external subset or external
7723: * parameter entities with "standalone='no'", ...
7724: * ... The declaration of a parameter entity must
7725: * precede any reference to it...
7726: */
7727: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7728: "PEReference: %%%s; not found\n",
7729: name, NULL);
7730: ctxt->valid = 0;
7731: }
7732: } else {
7733: /*
7734: * Internal checking in case the entity quest barfed
7735: */
7736: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7737: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7738: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7739: "Internal: %%%s; is not a parameter entity\n",
7740: name, NULL);
7741: } else if (ctxt->input->free != deallocblankswrapper) {
7742: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7743: if (xmlPushInput(ctxt, input) < 0)
7744: return;
7745: } else {
7746: /*
7747: * TODO !!!
7748: * handle the extra spaces added before and after
7749: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7750: */
7751: input = xmlNewEntityInputStream(ctxt, entity);
7752: if (xmlPushInput(ctxt, input) < 0)
7753: return;
7754: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7755: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7756: (IS_BLANK_CH(NXT(5)))) {
7757: xmlParseTextDecl(ctxt);
7758: if (ctxt->errNo ==
7759: XML_ERR_UNSUPPORTED_ENCODING) {
7760: /*
7761: * The XML REC instructs us to stop parsing
7762: * right here
7763: */
7764: ctxt->instate = XML_PARSER_EOF;
7765: return;
7766: }
7767: }
7768: }
7769: }
7770: ctxt->hasPErefs = 1;
7771: }
7772:
7773: /**
7774: * xmlLoadEntityContent:
7775: * @ctxt: an XML parser context
7776: * @entity: an unloaded system entity
7777: *
7778: * Load the original content of the given system entity from the
7779: * ExternalID/SystemID given. This is to be used for Included in Literal
7780: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7781: *
7782: * Returns 0 in case of success and -1 in case of failure
7783: */
7784: static int
7785: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7786: xmlParserInputPtr input;
7787: xmlBufferPtr buf;
7788: int l, c;
7789: int count = 0;
7790:
7791: if ((ctxt == NULL) || (entity == NULL) ||
7792: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7793: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7794: (entity->content != NULL)) {
7795: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7796: "xmlLoadEntityContent parameter error");
7797: return(-1);
7798: }
7799:
7800: if (xmlParserDebugEntities)
7801: xmlGenericError(xmlGenericErrorContext,
7802: "Reading %s entity content input\n", entity->name);
7803:
7804: buf = xmlBufferCreate();
7805: if (buf == NULL) {
7806: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7807: "xmlLoadEntityContent parameter error");
7808: return(-1);
7809: }
7810:
7811: input = xmlNewEntityInputStream(ctxt, entity);
7812: if (input == NULL) {
7813: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7814: "xmlLoadEntityContent input error");
7815: xmlBufferFree(buf);
7816: return(-1);
7817: }
7818:
7819: /*
7820: * Push the entity as the current input, read char by char
7821: * saving to the buffer until the end of the entity or an error
7822: */
7823: if (xmlPushInput(ctxt, input) < 0) {
7824: xmlBufferFree(buf);
7825: return(-1);
7826: }
7827:
7828: GROW;
7829: c = CUR_CHAR(l);
7830: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7831: (IS_CHAR(c))) {
7832: xmlBufferAdd(buf, ctxt->input->cur, l);
7833: if (count++ > 100) {
7834: count = 0;
7835: GROW;
7836: }
7837: NEXTL(l);
7838: c = CUR_CHAR(l);
7839: }
7840:
7841: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7842: xmlPopInput(ctxt);
7843: } else if (!IS_CHAR(c)) {
7844: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7845: "xmlLoadEntityContent: invalid char value %d\n",
7846: c);
7847: xmlBufferFree(buf);
7848: return(-1);
7849: }
7850: entity->content = buf->content;
7851: buf->content = NULL;
7852: xmlBufferFree(buf);
7853:
7854: return(0);
7855: }
7856:
7857: /**
7858: * xmlParseStringPEReference:
7859: * @ctxt: an XML parser context
7860: * @str: a pointer to an index in the string
7861: *
7862: * parse PEReference declarations
7863: *
7864: * [69] PEReference ::= '%' Name ';'
7865: *
7866: * [ WFC: No Recursion ]
7867: * A parsed entity must not contain a recursive
7868: * reference to itself, either directly or indirectly.
7869: *
7870: * [ WFC: Entity Declared ]
7871: * In a document without any DTD, a document with only an internal DTD
7872: * subset which contains no parameter entity references, or a document
7873: * with "standalone='yes'", ... ... The declaration of a parameter
7874: * entity must precede any reference to it...
7875: *
7876: * [ VC: Entity Declared ]
7877: * In a document with an external subset or external parameter entities
7878: * with "standalone='no'", ... ... The declaration of a parameter entity
7879: * must precede any reference to it...
7880: *
7881: * [ WFC: In DTD ]
7882: * Parameter-entity references may only appear in the DTD.
7883: * NOTE: misleading but this is handled.
7884: *
7885: * Returns the string of the entity content.
7886: * str is updated to the current value of the index
7887: */
7888: static xmlEntityPtr
7889: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7890: const xmlChar *ptr;
7891: xmlChar cur;
7892: xmlChar *name;
7893: xmlEntityPtr entity = NULL;
7894:
7895: if ((str == NULL) || (*str == NULL)) return(NULL);
7896: ptr = *str;
7897: cur = *ptr;
7898: if (cur != '%')
7899: return(NULL);
7900: ptr++;
7901: name = xmlParseStringName(ctxt, &ptr);
7902: if (name == NULL) {
7903: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7904: "xmlParseStringPEReference: no name\n");
7905: *str = ptr;
7906: return(NULL);
7907: }
7908: cur = *ptr;
7909: if (cur != ';') {
7910: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7911: xmlFree(name);
7912: *str = ptr;
7913: return(NULL);
7914: }
7915: ptr++;
7916:
7917: /*
7918: * Increate the number of entity references parsed
7919: */
7920: ctxt->nbentities++;
7921:
7922: /*
7923: * Request the entity from SAX
7924: */
7925: if ((ctxt->sax != NULL) &&
7926: (ctxt->sax->getParameterEntity != NULL))
7927: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7928: name);
7929: if (entity == NULL) {
7930: /*
7931: * [ WFC: Entity Declared ]
7932: * In a document without any DTD, a document with only an
7933: * internal DTD subset which contains no parameter entity
7934: * references, or a document with "standalone='yes'", ...
7935: * ... The declaration of a parameter entity must precede
7936: * any reference to it...
7937: */
7938: if ((ctxt->standalone == 1) ||
7939: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7940: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7941: "PEReference: %%%s; not found\n", name);
7942: } else {
7943: /*
7944: * [ VC: Entity Declared ]
7945: * In a document with an external subset or external
7946: * parameter entities with "standalone='no'", ...
7947: * ... The declaration of a parameter entity must
7948: * precede any reference to it...
7949: */
7950: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7951: "PEReference: %%%s; not found\n",
7952: name, NULL);
7953: ctxt->valid = 0;
7954: }
7955: } else {
7956: /*
7957: * Internal checking in case the entity quest barfed
7958: */
7959: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7960: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7961: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7962: "%%%s; is not a parameter entity\n",
7963: name, NULL);
7964: }
7965: }
7966: ctxt->hasPErefs = 1;
7967: xmlFree(name);
7968: *str = ptr;
7969: return(entity);
7970: }
7971:
7972: /**
7973: * xmlParseDocTypeDecl:
7974: * @ctxt: an XML parser context
7975: *
7976: * parse a DOCTYPE declaration
7977: *
7978: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7979: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7980: *
7981: * [ VC: Root Element Type ]
7982: * The Name in the document type declaration must match the element
7983: * type of the root element.
7984: */
7985:
7986: void
7987: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7988: const xmlChar *name = NULL;
7989: xmlChar *ExternalID = NULL;
7990: xmlChar *URI = NULL;
7991:
7992: /*
7993: * We know that '<!DOCTYPE' has been detected.
7994: */
7995: SKIP(9);
7996:
7997: SKIP_BLANKS;
7998:
7999: /*
8000: * Parse the DOCTYPE name.
8001: */
8002: name = xmlParseName(ctxt);
8003: if (name == NULL) {
8004: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8005: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8006: }
8007: ctxt->intSubName = name;
8008:
8009: SKIP_BLANKS;
8010:
8011: /*
8012: * Check for SystemID and ExternalID
8013: */
8014: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8015:
8016: if ((URI != NULL) || (ExternalID != NULL)) {
8017: ctxt->hasExternalSubset = 1;
8018: }
8019: ctxt->extSubURI = URI;
8020: ctxt->extSubSystem = ExternalID;
8021:
8022: SKIP_BLANKS;
8023:
8024: /*
8025: * Create and update the internal subset.
8026: */
8027: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8028: (!ctxt->disableSAX))
8029: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8030:
8031: /*
8032: * Is there any internal subset declarations ?
8033: * they are handled separately in xmlParseInternalSubset()
8034: */
8035: if (RAW == '[')
8036: return;
8037:
8038: /*
8039: * We should be at the end of the DOCTYPE declaration.
8040: */
8041: if (RAW != '>') {
8042: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8043: }
8044: NEXT;
8045: }
8046:
8047: /**
8048: * xmlParseInternalSubset:
8049: * @ctxt: an XML parser context
8050: *
8051: * parse the internal subset declaration
8052: *
8053: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8054: */
8055:
8056: static void
8057: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8058: /*
8059: * Is there any DTD definition ?
8060: */
8061: if (RAW == '[') {
8062: ctxt->instate = XML_PARSER_DTD;
8063: NEXT;
8064: /*
8065: * Parse the succession of Markup declarations and
8066: * PEReferences.
8067: * Subsequence (markupdecl | PEReference | S)*
8068: */
8069: while (RAW != ']') {
8070: const xmlChar *check = CUR_PTR;
8071: unsigned int cons = ctxt->input->consumed;
8072:
8073: SKIP_BLANKS;
8074: xmlParseMarkupDecl(ctxt);
8075: xmlParsePEReference(ctxt);
8076:
8077: /*
8078: * Pop-up of finished entities.
8079: */
8080: while ((RAW == 0) && (ctxt->inputNr > 1))
8081: xmlPopInput(ctxt);
8082:
8083: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8084: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8085: "xmlParseInternalSubset: error detected in Markup declaration\n");
8086: break;
8087: }
8088: }
8089: if (RAW == ']') {
8090: NEXT;
8091: SKIP_BLANKS;
8092: }
8093: }
8094:
8095: /*
8096: * We should be at the end of the DOCTYPE declaration.
8097: */
8098: if (RAW != '>') {
8099: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8100: }
8101: NEXT;
8102: }
8103:
8104: #ifdef LIBXML_SAX1_ENABLED
8105: /**
8106: * xmlParseAttribute:
8107: * @ctxt: an XML parser context
8108: * @value: a xmlChar ** used to store the value of the attribute
8109: *
8110: * parse an attribute
8111: *
8112: * [41] Attribute ::= Name Eq AttValue
8113: *
8114: * [ WFC: No External Entity References ]
8115: * Attribute values cannot contain direct or indirect entity references
8116: * to external entities.
8117: *
8118: * [ WFC: No < in Attribute Values ]
8119: * The replacement text of any entity referred to directly or indirectly in
8120: * an attribute value (other than "<") must not contain a <.
8121: *
8122: * [ VC: Attribute Value Type ]
8123: * The attribute must have been declared; the value must be of the type
8124: * declared for it.
8125: *
8126: * [25] Eq ::= S? '=' S?
8127: *
8128: * With namespace:
8129: *
8130: * [NS 11] Attribute ::= QName Eq AttValue
8131: *
8132: * Also the case QName == xmlns:??? is handled independently as a namespace
8133: * definition.
8134: *
8135: * Returns the attribute name, and the value in *value.
8136: */
8137:
8138: const xmlChar *
8139: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8140: const xmlChar *name;
8141: xmlChar *val;
8142:
8143: *value = NULL;
8144: GROW;
8145: name = xmlParseName(ctxt);
8146: if (name == NULL) {
8147: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8148: "error parsing attribute name\n");
8149: return(NULL);
8150: }
8151:
8152: /*
8153: * read the value
8154: */
8155: SKIP_BLANKS;
8156: if (RAW == '=') {
8157: NEXT;
8158: SKIP_BLANKS;
8159: val = xmlParseAttValue(ctxt);
8160: ctxt->instate = XML_PARSER_CONTENT;
8161: } else {
8162: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8163: "Specification mandate value for attribute %s\n", name);
8164: return(NULL);
8165: }
8166:
8167: /*
8168: * Check that xml:lang conforms to the specification
8169: * No more registered as an error, just generate a warning now
8170: * since this was deprecated in XML second edition
8171: */
8172: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8173: if (!xmlCheckLanguageID(val)) {
8174: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8175: "Malformed value for xml:lang : %s\n",
8176: val, NULL);
8177: }
8178: }
8179:
8180: /*
8181: * Check that xml:space conforms to the specification
8182: */
8183: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8184: if (xmlStrEqual(val, BAD_CAST "default"))
8185: *(ctxt->space) = 0;
8186: else if (xmlStrEqual(val, BAD_CAST "preserve"))
8187: *(ctxt->space) = 1;
8188: else {
8189: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8190: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8191: val, NULL);
8192: }
8193: }
8194:
8195: *value = val;
8196: return(name);
8197: }
8198:
8199: /**
8200: * xmlParseStartTag:
8201: * @ctxt: an XML parser context
8202: *
8203: * parse a start of tag either for rule element or
8204: * EmptyElement. In both case we don't parse the tag closing chars.
8205: *
8206: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8207: *
8208: * [ WFC: Unique Att Spec ]
8209: * No attribute name may appear more than once in the same start-tag or
8210: * empty-element tag.
8211: *
8212: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8213: *
8214: * [ WFC: Unique Att Spec ]
8215: * No attribute name may appear more than once in the same start-tag or
8216: * empty-element tag.
8217: *
8218: * With namespace:
8219: *
8220: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8221: *
8222: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8223: *
8224: * Returns the element name parsed
8225: */
8226:
8227: const xmlChar *
8228: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8229: const xmlChar *name;
8230: const xmlChar *attname;
8231: xmlChar *attvalue;
8232: const xmlChar **atts = ctxt->atts;
8233: int nbatts = 0;
8234: int maxatts = ctxt->maxatts;
8235: int i;
8236:
8237: if (RAW != '<') return(NULL);
8238: NEXT1;
8239:
8240: name = xmlParseName(ctxt);
8241: if (name == NULL) {
8242: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8243: "xmlParseStartTag: invalid element name\n");
8244: return(NULL);
8245: }
8246:
8247: /*
8248: * Now parse the attributes, it ends up with the ending
8249: *
8250: * (S Attribute)* S?
8251: */
8252: SKIP_BLANKS;
8253: GROW;
8254:
8255: while ((RAW != '>') &&
8256: ((RAW != '/') || (NXT(1) != '>')) &&
8257: (IS_BYTE_CHAR(RAW))) {
8258: const xmlChar *q = CUR_PTR;
8259: unsigned int cons = ctxt->input->consumed;
8260:
8261: attname = xmlParseAttribute(ctxt, &attvalue);
8262: if ((attname != NULL) && (attvalue != NULL)) {
8263: /*
8264: * [ WFC: Unique Att Spec ]
8265: * No attribute name may appear more than once in the same
8266: * start-tag or empty-element tag.
8267: */
8268: for (i = 0; i < nbatts;i += 2) {
8269: if (xmlStrEqual(atts[i], attname)) {
8270: xmlErrAttributeDup(ctxt, NULL, attname);
8271: xmlFree(attvalue);
8272: goto failed;
8273: }
8274: }
8275: /*
8276: * Add the pair to atts
8277: */
8278: if (atts == NULL) {
8279: maxatts = 22; /* allow for 10 attrs by default */
8280: atts = (const xmlChar **)
8281: xmlMalloc(maxatts * sizeof(xmlChar *));
8282: if (atts == NULL) {
8283: xmlErrMemory(ctxt, NULL);
8284: if (attvalue != NULL)
8285: xmlFree(attvalue);
8286: goto failed;
8287: }
8288: ctxt->atts = atts;
8289: ctxt->maxatts = maxatts;
8290: } else if (nbatts + 4 > maxatts) {
8291: const xmlChar **n;
8292:
8293: maxatts *= 2;
8294: n = (const xmlChar **) xmlRealloc((void *) atts,
8295: maxatts * sizeof(const xmlChar *));
8296: if (n == NULL) {
8297: xmlErrMemory(ctxt, NULL);
8298: if (attvalue != NULL)
8299: xmlFree(attvalue);
8300: goto failed;
8301: }
8302: atts = n;
8303: ctxt->atts = atts;
8304: ctxt->maxatts = maxatts;
8305: }
8306: atts[nbatts++] = attname;
8307: atts[nbatts++] = attvalue;
8308: atts[nbatts] = NULL;
8309: atts[nbatts + 1] = NULL;
8310: } else {
8311: if (attvalue != NULL)
8312: xmlFree(attvalue);
8313: }
8314:
8315: failed:
8316:
8317: GROW
8318: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8319: break;
8320: if (!IS_BLANK_CH(RAW)) {
8321: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8322: "attributes construct error\n");
8323: }
8324: SKIP_BLANKS;
8325: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8326: (attname == NULL) && (attvalue == NULL)) {
8327: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8328: "xmlParseStartTag: problem parsing attributes\n");
8329: break;
8330: }
8331: SHRINK;
8332: GROW;
8333: }
8334:
8335: /*
8336: * SAX: Start of Element !
8337: */
8338: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8339: (!ctxt->disableSAX)) {
8340: if (nbatts > 0)
8341: ctxt->sax->startElement(ctxt->userData, name, atts);
8342: else
8343: ctxt->sax->startElement(ctxt->userData, name, NULL);
8344: }
8345:
8346: if (atts != NULL) {
8347: /* Free only the content strings */
8348: for (i = 1;i < nbatts;i+=2)
8349: if (atts[i] != NULL)
8350: xmlFree((xmlChar *) atts[i]);
8351: }
8352: return(name);
8353: }
8354:
8355: /**
8356: * xmlParseEndTag1:
8357: * @ctxt: an XML parser context
8358: * @line: line of the start tag
8359: * @nsNr: number of namespaces on the start tag
8360: *
8361: * parse an end of tag
8362: *
8363: * [42] ETag ::= '</' Name S? '>'
8364: *
8365: * With namespace
8366: *
8367: * [NS 9] ETag ::= '</' QName S? '>'
8368: */
8369:
8370: static void
8371: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8372: const xmlChar *name;
8373:
8374: GROW;
8375: if ((RAW != '<') || (NXT(1) != '/')) {
8376: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8377: "xmlParseEndTag: '</' not found\n");
8378: return;
8379: }
8380: SKIP(2);
8381:
8382: name = xmlParseNameAndCompare(ctxt,ctxt->name);
8383:
8384: /*
8385: * We should definitely be at the ending "S? '>'" part
8386: */
8387: GROW;
8388: SKIP_BLANKS;
8389: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8390: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8391: } else
8392: NEXT1;
8393:
8394: /*
8395: * [ WFC: Element Type Match ]
8396: * The Name in an element's end-tag must match the element type in the
8397: * start-tag.
8398: *
8399: */
8400: if (name != (xmlChar*)1) {
8401: if (name == NULL) name = BAD_CAST "unparseable";
8402: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8403: "Opening and ending tag mismatch: %s line %d and %s\n",
8404: ctxt->name, line, name);
8405: }
8406:
8407: /*
8408: * SAX: End of Tag
8409: */
8410: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8411: (!ctxt->disableSAX))
8412: ctxt->sax->endElement(ctxt->userData, ctxt->name);
8413:
8414: namePop(ctxt);
8415: spacePop(ctxt);
8416: return;
8417: }
8418:
8419: /**
8420: * xmlParseEndTag:
8421: * @ctxt: an XML parser context
8422: *
8423: * parse an end of tag
8424: *
8425: * [42] ETag ::= '</' Name S? '>'
8426: *
8427: * With namespace
8428: *
8429: * [NS 9] ETag ::= '</' QName S? '>'
8430: */
8431:
8432: void
8433: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8434: xmlParseEndTag1(ctxt, 0);
8435: }
8436: #endif /* LIBXML_SAX1_ENABLED */
8437:
8438: /************************************************************************
8439: * *
8440: * SAX 2 specific operations *
8441: * *
8442: ************************************************************************/
8443:
8444: /*
8445: * xmlGetNamespace:
8446: * @ctxt: an XML parser context
8447: * @prefix: the prefix to lookup
8448: *
8449: * Lookup the namespace name for the @prefix (which ca be NULL)
8450: * The prefix must come from the @ctxt->dict dictionnary
8451: *
8452: * Returns the namespace name or NULL if not bound
8453: */
8454: static const xmlChar *
8455: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8456: int i;
8457:
8458: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8459: for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8460: if (ctxt->nsTab[i] == prefix) {
8461: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8462: return(NULL);
8463: return(ctxt->nsTab[i + 1]);
8464: }
8465: return(NULL);
8466: }
8467:
8468: /**
8469: * xmlParseQName:
8470: * @ctxt: an XML parser context
8471: * @prefix: pointer to store the prefix part
8472: *
8473: * parse an XML Namespace QName
8474: *
8475: * [6] QName ::= (Prefix ':')? LocalPart
8476: * [7] Prefix ::= NCName
8477: * [8] LocalPart ::= NCName
8478: *
8479: * Returns the Name parsed or NULL
8480: */
8481:
8482: static const xmlChar *
8483: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8484: const xmlChar *l, *p;
8485:
8486: GROW;
8487:
8488: l = xmlParseNCName(ctxt);
8489: if (l == NULL) {
8490: if (CUR == ':') {
8491: l = xmlParseName(ctxt);
8492: if (l != NULL) {
8493: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8494: "Failed to parse QName '%s'\n", l, NULL, NULL);
8495: *prefix = NULL;
8496: return(l);
8497: }
8498: }
8499: return(NULL);
8500: }
8501: if (CUR == ':') {
8502: NEXT;
8503: p = l;
8504: l = xmlParseNCName(ctxt);
8505: if (l == NULL) {
8506: xmlChar *tmp;
8507:
8508: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8509: "Failed to parse QName '%s:'\n", p, NULL, NULL);
8510: l = xmlParseNmtoken(ctxt);
8511: if (l == NULL)
8512: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8513: else {
8514: tmp = xmlBuildQName(l, p, NULL, 0);
8515: xmlFree((char *)l);
8516: }
8517: p = xmlDictLookup(ctxt->dict, tmp, -1);
8518: if (tmp != NULL) xmlFree(tmp);
8519: *prefix = NULL;
8520: return(p);
8521: }
8522: if (CUR == ':') {
8523: xmlChar *tmp;
8524:
8525: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8526: "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8527: NEXT;
8528: tmp = (xmlChar *) xmlParseName(ctxt);
8529: if (tmp != NULL) {
8530: tmp = xmlBuildQName(tmp, l, NULL, 0);
8531: l = xmlDictLookup(ctxt->dict, tmp, -1);
8532: if (tmp != NULL) xmlFree(tmp);
8533: *prefix = p;
8534: return(l);
8535: }
8536: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8537: l = xmlDictLookup(ctxt->dict, tmp, -1);
8538: if (tmp != NULL) xmlFree(tmp);
8539: *prefix = p;
8540: return(l);
8541: }
8542: *prefix = p;
8543: } else
8544: *prefix = NULL;
8545: return(l);
8546: }
8547:
8548: /**
8549: * xmlParseQNameAndCompare:
8550: * @ctxt: an XML parser context
8551: * @name: the localname
8552: * @prefix: the prefix, if any.
8553: *
8554: * parse an XML name and compares for match
8555: * (specialized for endtag parsing)
8556: *
8557: * Returns NULL for an illegal name, (xmlChar*) 1 for success
8558: * and the name for mismatch
8559: */
8560:
8561: static const xmlChar *
8562: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8563: xmlChar const *prefix) {
8564: const xmlChar *cmp;
8565: const xmlChar *in;
8566: const xmlChar *ret;
8567: const xmlChar *prefix2;
8568:
8569: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8570:
8571: GROW;
8572: in = ctxt->input->cur;
8573:
8574: cmp = prefix;
8575: while (*in != 0 && *in == *cmp) {
8576: ++in;
8577: ++cmp;
8578: }
8579: if ((*cmp == 0) && (*in == ':')) {
8580: in++;
8581: cmp = name;
8582: while (*in != 0 && *in == *cmp) {
8583: ++in;
8584: ++cmp;
8585: }
8586: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8587: /* success */
8588: ctxt->input->cur = in;
8589: return((const xmlChar*) 1);
8590: }
8591: }
8592: /*
8593: * all strings coms from the dictionary, equality can be done directly
8594: */
8595: ret = xmlParseQName (ctxt, &prefix2);
8596: if ((ret == name) && (prefix == prefix2))
8597: return((const xmlChar*) 1);
8598: return ret;
8599: }
8600:
8601: /**
8602: * xmlParseAttValueInternal:
8603: * @ctxt: an XML parser context
8604: * @len: attribute len result
8605: * @alloc: whether the attribute was reallocated as a new string
8606: * @normalize: if 1 then further non-CDATA normalization must be done
8607: *
8608: * parse a value for an attribute.
8609: * NOTE: if no normalization is needed, the routine will return pointers
8610: * directly from the data buffer.
8611: *
8612: * 3.3.3 Attribute-Value Normalization:
8613: * Before the value of an attribute is passed to the application or
8614: * checked for validity, the XML processor must normalize it as follows:
8615: * - a character reference is processed by appending the referenced
8616: * character to the attribute value
8617: * - an entity reference is processed by recursively processing the
8618: * replacement text of the entity
8619: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8620: * appending #x20 to the normalized value, except that only a single
8621: * #x20 is appended for a "#xD#xA" sequence that is part of an external
8622: * parsed entity or the literal entity value of an internal parsed entity
8623: * - other characters are processed by appending them to the normalized value
8624: * If the declared value is not CDATA, then the XML processor must further
8625: * process the normalized attribute value by discarding any leading and
8626: * trailing space (#x20) characters, and by replacing sequences of space
8627: * (#x20) characters by a single space (#x20) character.
8628: * All attributes for which no declaration has been read should be treated
8629: * by a non-validating parser as if declared CDATA.
8630: *
8631: * Returns the AttValue parsed or NULL. The value has to be freed by the
8632: * caller if it was copied, this can be detected by val[*len] == 0.
8633: */
8634:
8635: static xmlChar *
8636: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8637: int normalize)
8638: {
8639: xmlChar limit = 0;
8640: const xmlChar *in = NULL, *start, *end, *last;
8641: xmlChar *ret = NULL;
8642:
8643: GROW;
8644: in = (xmlChar *) CUR_PTR;
8645: if (*in != '"' && *in != '\'') {
8646: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8647: return (NULL);
8648: }
8649: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8650:
8651: /*
8652: * try to handle in this routine the most common case where no
8653: * allocation of a new string is required and where content is
8654: * pure ASCII.
8655: */
8656: limit = *in++;
8657: end = ctxt->input->end;
8658: start = in;
8659: if (in >= end) {
8660: const xmlChar *oldbase = ctxt->input->base;
8661: GROW;
8662: if (oldbase != ctxt->input->base) {
8663: long delta = ctxt->input->base - oldbase;
8664: start = start + delta;
8665: in = in + delta;
8666: }
8667: end = ctxt->input->end;
8668: }
8669: if (normalize) {
8670: /*
8671: * Skip any leading spaces
8672: */
8673: while ((in < end) && (*in != limit) &&
8674: ((*in == 0x20) || (*in == 0x9) ||
8675: (*in == 0xA) || (*in == 0xD))) {
8676: in++;
8677: start = in;
8678: if (in >= end) {
8679: const xmlChar *oldbase = ctxt->input->base;
8680: GROW;
8681: if (oldbase != ctxt->input->base) {
8682: long delta = ctxt->input->base - oldbase;
8683: start = start + delta;
8684: in = in + delta;
8685: }
8686: end = ctxt->input->end;
8687: }
8688: }
8689: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8690: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8691: if ((*in++ == 0x20) && (*in == 0x20)) break;
8692: if (in >= end) {
8693: const xmlChar *oldbase = ctxt->input->base;
8694: GROW;
8695: if (oldbase != ctxt->input->base) {
8696: long delta = ctxt->input->base - oldbase;
8697: start = start + delta;
8698: in = in + delta;
8699: }
8700: end = ctxt->input->end;
8701: }
8702: }
8703: last = in;
8704: /*
8705: * skip the trailing blanks
8706: */
8707: while ((last[-1] == 0x20) && (last > start)) last--;
8708: while ((in < end) && (*in != limit) &&
8709: ((*in == 0x20) || (*in == 0x9) ||
8710: (*in == 0xA) || (*in == 0xD))) {
8711: in++;
8712: if (in >= end) {
8713: const xmlChar *oldbase = ctxt->input->base;
8714: GROW;
8715: if (oldbase != ctxt->input->base) {
8716: long delta = ctxt->input->base - oldbase;
8717: start = start + delta;
8718: in = in + delta;
8719: last = last + delta;
8720: }
8721: end = ctxt->input->end;
8722: }
8723: }
8724: if (*in != limit) goto need_complex;
8725: } else {
8726: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8727: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8728: in++;
8729: if (in >= end) {
8730: const xmlChar *oldbase = ctxt->input->base;
8731: GROW;
8732: if (oldbase != ctxt->input->base) {
8733: long delta = ctxt->input->base - oldbase;
8734: start = start + delta;
8735: in = in + delta;
8736: }
8737: end = ctxt->input->end;
8738: }
8739: }
8740: last = in;
8741: if (*in != limit) goto need_complex;
8742: }
8743: in++;
8744: if (len != NULL) {
8745: *len = last - start;
8746: ret = (xmlChar *) start;
8747: } else {
8748: if (alloc) *alloc = 1;
8749: ret = xmlStrndup(start, last - start);
8750: }
8751: CUR_PTR = in;
8752: if (alloc) *alloc = 0;
8753: return ret;
8754: need_complex:
8755: if (alloc) *alloc = 1;
8756: return xmlParseAttValueComplex(ctxt, len, normalize);
8757: }
8758:
8759: /**
8760: * xmlParseAttribute2:
8761: * @ctxt: an XML parser context
8762: * @pref: the element prefix
8763: * @elem: the element name
8764: * @prefix: a xmlChar ** used to store the value of the attribute prefix
8765: * @value: a xmlChar ** used to store the value of the attribute
8766: * @len: an int * to save the length of the attribute
8767: * @alloc: an int * to indicate if the attribute was allocated
8768: *
8769: * parse an attribute in the new SAX2 framework.
8770: *
8771: * Returns the attribute name, and the value in *value, .
8772: */
8773:
8774: static const xmlChar *
8775: xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8776: const xmlChar * pref, const xmlChar * elem,
8777: const xmlChar ** prefix, xmlChar ** value,
8778: int *len, int *alloc)
8779: {
8780: const xmlChar *name;
8781: xmlChar *val, *internal_val = NULL;
8782: int normalize = 0;
8783:
8784: *value = NULL;
8785: GROW;
8786: name = xmlParseQName(ctxt, prefix);
8787: if (name == NULL) {
8788: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789: "error parsing attribute name\n");
8790: return (NULL);
8791: }
8792:
8793: /*
8794: * get the type if needed
8795: */
8796: if (ctxt->attsSpecial != NULL) {
8797: int type;
8798:
8799: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8800: pref, elem, *prefix, name);
8801: if (type != 0)
8802: normalize = 1;
8803: }
8804:
8805: /*
8806: * read the value
8807: */
8808: SKIP_BLANKS;
8809: if (RAW == '=') {
8810: NEXT;
8811: SKIP_BLANKS;
8812: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8813: if (normalize) {
8814: /*
8815: * Sometimes a second normalisation pass for spaces is needed
8816: * but that only happens if charrefs or entities refernces
8817: * have been used in the attribute value, i.e. the attribute
8818: * value have been extracted in an allocated string already.
8819: */
8820: if (*alloc) {
8821: const xmlChar *val2;
8822:
8823: val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8824: if ((val2 != NULL) && (val2 != val)) {
8825: xmlFree(val);
8826: val = (xmlChar *) val2;
8827: }
8828: }
8829: }
8830: ctxt->instate = XML_PARSER_CONTENT;
8831: } else {
8832: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8833: "Specification mandate value for attribute %s\n",
8834: name);
8835: return (NULL);
8836: }
8837:
8838: if (*prefix == ctxt->str_xml) {
8839: /*
8840: * Check that xml:lang conforms to the specification
8841: * No more registered as an error, just generate a warning now
8842: * since this was deprecated in XML second edition
8843: */
8844: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8845: internal_val = xmlStrndup(val, *len);
8846: if (!xmlCheckLanguageID(internal_val)) {
8847: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8848: "Malformed value for xml:lang : %s\n",
8849: internal_val, NULL);
8850: }
8851: }
8852:
8853: /*
8854: * Check that xml:space conforms to the specification
8855: */
8856: if (xmlStrEqual(name, BAD_CAST "space")) {
8857: internal_val = xmlStrndup(val, *len);
8858: if (xmlStrEqual(internal_val, BAD_CAST "default"))
8859: *(ctxt->space) = 0;
8860: else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8861: *(ctxt->space) = 1;
8862: else {
8863: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8864: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8865: internal_val, NULL);
8866: }
8867: }
8868: if (internal_val) {
8869: xmlFree(internal_val);
8870: }
8871: }
8872:
8873: *value = val;
8874: return (name);
8875: }
8876: /**
8877: * xmlParseStartTag2:
8878: * @ctxt: an XML parser context
8879: *
8880: * parse a start of tag either for rule element or
8881: * EmptyElement. In both case we don't parse the tag closing chars.
8882: * This routine is called when running SAX2 parsing
8883: *
8884: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8885: *
8886: * [ WFC: Unique Att Spec ]
8887: * No attribute name may appear more than once in the same start-tag or
8888: * empty-element tag.
8889: *
8890: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8891: *
8892: * [ WFC: Unique Att Spec ]
8893: * No attribute name may appear more than once in the same start-tag or
8894: * empty-element tag.
8895: *
8896: * With namespace:
8897: *
8898: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8899: *
8900: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8901: *
8902: * Returns the element name parsed
8903: */
8904:
8905: static const xmlChar *
8906: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8907: const xmlChar **URI, int *tlen) {
8908: const xmlChar *localname;
8909: const xmlChar *prefix;
8910: const xmlChar *attname;
8911: const xmlChar *aprefix;
8912: const xmlChar *nsname;
8913: xmlChar *attvalue;
8914: const xmlChar **atts = ctxt->atts;
8915: int maxatts = ctxt->maxatts;
8916: int nratts, nbatts, nbdef;
8917: int i, j, nbNs, attval, oldline, oldcol;
8918: const xmlChar *base;
8919: unsigned long cur;
8920: int nsNr = ctxt->nsNr;
8921:
8922: if (RAW != '<') return(NULL);
8923: NEXT1;
8924:
8925: /*
8926: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8927: * point since the attribute values may be stored as pointers to
8928: * the buffer and calling SHRINK would destroy them !
8929: * The Shrinking is only possible once the full set of attribute
8930: * callbacks have been done.
8931: */
8932: reparse:
8933: SHRINK;
8934: base = ctxt->input->base;
8935: cur = ctxt->input->cur - ctxt->input->base;
8936: oldline = ctxt->input->line;
8937: oldcol = ctxt->input->col;
8938: nbatts = 0;
8939: nratts = 0;
8940: nbdef = 0;
8941: nbNs = 0;
8942: attval = 0;
8943: /* Forget any namespaces added during an earlier parse of this element. */
8944: ctxt->nsNr = nsNr;
8945:
8946: localname = xmlParseQName(ctxt, &prefix);
8947: if (localname == NULL) {
8948: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8949: "StartTag: invalid element name\n");
8950: return(NULL);
8951: }
8952: *tlen = ctxt->input->cur - ctxt->input->base - cur;
8953:
8954: /*
8955: * Now parse the attributes, it ends up with the ending
8956: *
8957: * (S Attribute)* S?
8958: */
8959: SKIP_BLANKS;
8960: GROW;
8961: if (ctxt->input->base != base) goto base_changed;
8962:
8963: while ((RAW != '>') &&
8964: ((RAW != '/') || (NXT(1) != '>')) &&
8965: (IS_BYTE_CHAR(RAW))) {
8966: const xmlChar *q = CUR_PTR;
8967: unsigned int cons = ctxt->input->consumed;
8968: int len = -1, alloc = 0;
8969:
8970: attname = xmlParseAttribute2(ctxt, prefix, localname,
8971: &aprefix, &attvalue, &len, &alloc);
8972: if (ctxt->input->base != base) {
8973: if ((attvalue != NULL) && (alloc != 0))
8974: xmlFree(attvalue);
8975: attvalue = NULL;
8976: goto base_changed;
8977: }
8978: if ((attname != NULL) && (attvalue != NULL)) {
8979: if (len < 0) len = xmlStrlen(attvalue);
8980: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8981: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8982: xmlURIPtr uri;
8983:
8984: if (*URL != 0) {
8985: uri = xmlParseURI((const char *) URL);
8986: if (uri == NULL) {
8987: xmlNsErr(ctxt, XML_WAR_NS_URI,
8988: "xmlns: '%s' is not a valid URI\n",
8989: URL, NULL, NULL);
8990: } else {
8991: if (uri->scheme == NULL) {
8992: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8993: "xmlns: URI %s is not absolute\n",
8994: URL, NULL, NULL);
8995: }
8996: xmlFreeURI(uri);
8997: }
8998: if (URL == ctxt->str_xml_ns) {
8999: if (attname != ctxt->str_xml) {
9000: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9001: "xml namespace URI cannot be the default namespace\n",
9002: NULL, NULL, NULL);
9003: }
9004: goto skip_default_ns;
9005: }
9006: if ((len == 29) &&
9007: (xmlStrEqual(URL,
9008: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9009: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9010: "reuse of the xmlns namespace name is forbidden\n",
9011: NULL, NULL, NULL);
9012: goto skip_default_ns;
9013: }
9014: }
9015: /*
9016: * check that it's not a defined namespace
9017: */
9018: for (j = 1;j <= nbNs;j++)
9019: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9020: break;
9021: if (j <= nbNs)
9022: xmlErrAttributeDup(ctxt, NULL, attname);
9023: else
9024: if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9025: skip_default_ns:
9026: if (alloc != 0) xmlFree(attvalue);
9027: SKIP_BLANKS;
9028: continue;
9029: }
9030: if (aprefix == ctxt->str_xmlns) {
9031: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9032: xmlURIPtr uri;
9033:
9034: if (attname == ctxt->str_xml) {
9035: if (URL != ctxt->str_xml_ns) {
9036: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9037: "xml namespace prefix mapped to wrong URI\n",
9038: NULL, NULL, NULL);
9039: }
9040: /*
9041: * Do not keep a namespace definition node
9042: */
9043: goto skip_ns;
9044: }
9045: if (URL == ctxt->str_xml_ns) {
9046: if (attname != ctxt->str_xml) {
9047: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9048: "xml namespace URI mapped to wrong prefix\n",
9049: NULL, NULL, NULL);
9050: }
9051: goto skip_ns;
9052: }
9053: if (attname == ctxt->str_xmlns) {
9054: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9055: "redefinition of the xmlns prefix is forbidden\n",
9056: NULL, NULL, NULL);
9057: goto skip_ns;
9058: }
9059: if ((len == 29) &&
9060: (xmlStrEqual(URL,
9061: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9062: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9063: "reuse of the xmlns namespace name is forbidden\n",
9064: NULL, NULL, NULL);
9065: goto skip_ns;
9066: }
9067: if ((URL == NULL) || (URL[0] == 0)) {
9068: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9069: "xmlns:%s: Empty XML namespace is not allowed\n",
9070: attname, NULL, NULL);
9071: goto skip_ns;
9072: } else {
9073: uri = xmlParseURI((const char *) URL);
9074: if (uri == NULL) {
9075: xmlNsErr(ctxt, XML_WAR_NS_URI,
9076: "xmlns:%s: '%s' is not a valid URI\n",
9077: attname, URL, NULL);
9078: } else {
9079: if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9080: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9081: "xmlns:%s: URI %s is not absolute\n",
9082: attname, URL, NULL);
9083: }
9084: xmlFreeURI(uri);
9085: }
9086: }
9087:
9088: /*
9089: * check that it's not a defined namespace
9090: */
9091: for (j = 1;j <= nbNs;j++)
9092: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9093: break;
9094: if (j <= nbNs)
9095: xmlErrAttributeDup(ctxt, aprefix, attname);
9096: else
9097: if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9098: skip_ns:
9099: if (alloc != 0) xmlFree(attvalue);
9100: SKIP_BLANKS;
9101: if (ctxt->input->base != base) goto base_changed;
9102: continue;
9103: }
9104:
9105: /*
9106: * Add the pair to atts
9107: */
9108: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9109: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9110: if (attvalue[len] == 0)
9111: xmlFree(attvalue);
9112: goto failed;
9113: }
9114: maxatts = ctxt->maxatts;
9115: atts = ctxt->atts;
9116: }
9117: ctxt->attallocs[nratts++] = alloc;
9118: atts[nbatts++] = attname;
9119: atts[nbatts++] = aprefix;
9120: atts[nbatts++] = NULL; /* the URI will be fetched later */
9121: atts[nbatts++] = attvalue;
9122: attvalue += len;
9123: atts[nbatts++] = attvalue;
9124: /*
9125: * tag if some deallocation is needed
9126: */
9127: if (alloc != 0) attval = 1;
9128: } else {
9129: if ((attvalue != NULL) && (attvalue[len] == 0))
9130: xmlFree(attvalue);
9131: }
9132:
9133: failed:
9134:
9135: GROW
9136: if (ctxt->input->base != base) goto base_changed;
9137: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9138: break;
9139: if (!IS_BLANK_CH(RAW)) {
9140: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9141: "attributes construct error\n");
9142: break;
9143: }
9144: SKIP_BLANKS;
9145: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9146: (attname == NULL) && (attvalue == NULL)) {
9147: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9148: "xmlParseStartTag: problem parsing attributes\n");
9149: break;
9150: }
9151: GROW;
9152: if (ctxt->input->base != base) goto base_changed;
9153: }
9154:
9155: /*
9156: * The attributes defaulting
9157: */
9158: if (ctxt->attsDefault != NULL) {
9159: xmlDefAttrsPtr defaults;
9160:
9161: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9162: if (defaults != NULL) {
9163: for (i = 0;i < defaults->nbAttrs;i++) {
9164: attname = defaults->values[5 * i];
9165: aprefix = defaults->values[5 * i + 1];
9166:
9167: /*
9168: * special work for namespaces defaulted defs
9169: */
9170: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9171: /*
9172: * check that it's not a defined namespace
9173: */
9174: for (j = 1;j <= nbNs;j++)
9175: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9176: break;
9177: if (j <= nbNs) continue;
9178:
9179: nsname = xmlGetNamespace(ctxt, NULL);
9180: if (nsname != defaults->values[5 * i + 2]) {
9181: if (nsPush(ctxt, NULL,
9182: defaults->values[5 * i + 2]) > 0)
9183: nbNs++;
9184: }
9185: } else if (aprefix == ctxt->str_xmlns) {
9186: /*
9187: * check that it's not a defined namespace
9188: */
9189: for (j = 1;j <= nbNs;j++)
9190: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9191: break;
9192: if (j <= nbNs) continue;
9193:
9194: nsname = xmlGetNamespace(ctxt, attname);
9195: if (nsname != defaults->values[2]) {
9196: if (nsPush(ctxt, attname,
9197: defaults->values[5 * i + 2]) > 0)
9198: nbNs++;
9199: }
9200: } else {
9201: /*
9202: * check that it's not a defined attribute
9203: */
9204: for (j = 0;j < nbatts;j+=5) {
9205: if ((attname == atts[j]) && (aprefix == atts[j+1]))
9206: break;
9207: }
9208: if (j < nbatts) continue;
9209:
9210: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9211: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9212: return(NULL);
9213: }
9214: maxatts = ctxt->maxatts;
9215: atts = ctxt->atts;
9216: }
9217: atts[nbatts++] = attname;
9218: atts[nbatts++] = aprefix;
9219: if (aprefix == NULL)
9220: atts[nbatts++] = NULL;
9221: else
9222: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9223: atts[nbatts++] = defaults->values[5 * i + 2];
9224: atts[nbatts++] = defaults->values[5 * i + 3];
9225: if ((ctxt->standalone == 1) &&
9226: (defaults->values[5 * i + 4] != NULL)) {
9227: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9228: "standalone: attribute %s on %s defaulted from external subset\n",
9229: attname, localname);
9230: }
9231: nbdef++;
9232: }
9233: }
9234: }
9235: }
9236:
9237: /*
9238: * The attributes checkings
9239: */
9240: for (i = 0; i < nbatts;i += 5) {
9241: /*
9242: * The default namespace does not apply to attribute names.
9243: */
9244: if (atts[i + 1] != NULL) {
9245: nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9246: if (nsname == NULL) {
9247: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9248: "Namespace prefix %s for %s on %s is not defined\n",
9249: atts[i + 1], atts[i], localname);
9250: }
9251: atts[i + 2] = nsname;
9252: } else
9253: nsname = NULL;
9254: /*
9255: * [ WFC: Unique Att Spec ]
9256: * No attribute name may appear more than once in the same
9257: * start-tag or empty-element tag.
9258: * As extended by the Namespace in XML REC.
9259: */
9260: for (j = 0; j < i;j += 5) {
9261: if (atts[i] == atts[j]) {
9262: if (atts[i+1] == atts[j+1]) {
9263: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9264: break;
9265: }
9266: if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9267: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9268: "Namespaced Attribute %s in '%s' redefined\n",
9269: atts[i], nsname, NULL);
9270: break;
9271: }
9272: }
9273: }
9274: }
9275:
9276: nsname = xmlGetNamespace(ctxt, prefix);
9277: if ((prefix != NULL) && (nsname == NULL)) {
9278: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9279: "Namespace prefix %s on %s is not defined\n",
9280: prefix, localname, NULL);
9281: }
9282: *pref = prefix;
9283: *URI = nsname;
9284:
9285: /*
9286: * SAX: Start of Element !
9287: */
9288: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9289: (!ctxt->disableSAX)) {
9290: if (nbNs > 0)
9291: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9292: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9293: nbatts / 5, nbdef, atts);
9294: else
9295: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9296: nsname, 0, NULL, nbatts / 5, nbdef, atts);
9297: }
9298:
9299: /*
9300: * Free up attribute allocated strings if needed
9301: */
9302: if (attval != 0) {
9303: for (i = 3,j = 0; j < nratts;i += 5,j++)
9304: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9305: xmlFree((xmlChar *) atts[i]);
9306: }
9307:
9308: return(localname);
9309:
9310: base_changed:
9311: /*
9312: * the attribute strings are valid iif the base didn't changed
9313: */
9314: if (attval != 0) {
9315: for (i = 3,j = 0; j < nratts;i += 5,j++)
9316: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9317: xmlFree((xmlChar *) atts[i]);
9318: }
9319: ctxt->input->cur = ctxt->input->base + cur;
9320: ctxt->input->line = oldline;
9321: ctxt->input->col = oldcol;
9322: if (ctxt->wellFormed == 1) {
9323: goto reparse;
9324: }
9325: return(NULL);
9326: }
9327:
9328: /**
9329: * xmlParseEndTag2:
9330: * @ctxt: an XML parser context
9331: * @line: line of the start tag
9332: * @nsNr: number of namespaces on the start tag
9333: *
9334: * parse an end of tag
9335: *
9336: * [42] ETag ::= '</' Name S? '>'
9337: *
9338: * With namespace
9339: *
9340: * [NS 9] ETag ::= '</' QName S? '>'
9341: */
9342:
9343: static void
9344: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9345: const xmlChar *URI, int line, int nsNr, int tlen) {
9346: const xmlChar *name;
9347:
9348: GROW;
9349: if ((RAW != '<') || (NXT(1) != '/')) {
9350: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9351: return;
9352: }
9353: SKIP(2);
9354:
9355: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9356: if (ctxt->input->cur[tlen] == '>') {
9357: ctxt->input->cur += tlen + 1;
9358: goto done;
9359: }
9360: ctxt->input->cur += tlen;
9361: name = (xmlChar*)1;
9362: } else {
9363: if (prefix == NULL)
9364: name = xmlParseNameAndCompare(ctxt, ctxt->name);
9365: else
9366: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9367: }
9368:
9369: /*
9370: * We should definitely be at the ending "S? '>'" part
9371: */
9372: GROW;
9373: SKIP_BLANKS;
9374: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9375: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9376: } else
9377: NEXT1;
9378:
9379: /*
9380: * [ WFC: Element Type Match ]
9381: * The Name in an element's end-tag must match the element type in the
9382: * start-tag.
9383: *
9384: */
9385: if (name != (xmlChar*)1) {
9386: if (name == NULL) name = BAD_CAST "unparseable";
9387: if ((line == 0) && (ctxt->node != NULL))
9388: line = ctxt->node->line;
9389: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9390: "Opening and ending tag mismatch: %s line %d and %s\n",
9391: ctxt->name, line, name);
9392: }
9393:
9394: /*
9395: * SAX: End of Tag
9396: */
9397: done:
9398: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9399: (!ctxt->disableSAX))
9400: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9401:
9402: spacePop(ctxt);
9403: if (nsNr != 0)
9404: nsPop(ctxt, nsNr);
9405: return;
9406: }
9407:
9408: /**
9409: * xmlParseCDSect:
9410: * @ctxt: an XML parser context
9411: *
9412: * Parse escaped pure raw content.
9413: *
9414: * [18] CDSect ::= CDStart CData CDEnd
9415: *
9416: * [19] CDStart ::= '<![CDATA['
9417: *
9418: * [20] Data ::= (Char* - (Char* ']]>' Char*))
9419: *
9420: * [21] CDEnd ::= ']]>'
9421: */
9422: void
9423: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9424: xmlChar *buf = NULL;
9425: int len = 0;
9426: int size = XML_PARSER_BUFFER_SIZE;
9427: int r, rl;
9428: int s, sl;
9429: int cur, l;
9430: int count = 0;
9431:
9432: /* Check 2.6.0 was NXT(0) not RAW */
9433: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9434: SKIP(9);
9435: } else
9436: return;
9437:
9438: ctxt->instate = XML_PARSER_CDATA_SECTION;
9439: r = CUR_CHAR(rl);
9440: if (!IS_CHAR(r)) {
9441: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9442: ctxt->instate = XML_PARSER_CONTENT;
9443: return;
9444: }
9445: NEXTL(rl);
9446: s = CUR_CHAR(sl);
9447: if (!IS_CHAR(s)) {
9448: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9449: ctxt->instate = XML_PARSER_CONTENT;
9450: return;
9451: }
9452: NEXTL(sl);
9453: cur = CUR_CHAR(l);
9454: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9455: if (buf == NULL) {
9456: xmlErrMemory(ctxt, NULL);
9457: return;
9458: }
9459: while (IS_CHAR(cur) &&
9460: ((r != ']') || (s != ']') || (cur != '>'))) {
9461: if (len + 5 >= size) {
9462: xmlChar *tmp;
9463:
9464: size *= 2;
9465: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9466: if (tmp == NULL) {
9467: xmlFree(buf);
9468: xmlErrMemory(ctxt, NULL);
9469: return;
9470: }
9471: buf = tmp;
9472: }
9473: COPY_BUF(rl,buf,len,r);
9474: r = s;
9475: rl = sl;
9476: s = cur;
9477: sl = l;
9478: count++;
9479: if (count > 50) {
9480: GROW;
9481: count = 0;
9482: }
9483: NEXTL(l);
9484: cur = CUR_CHAR(l);
9485: }
9486: buf[len] = 0;
9487: ctxt->instate = XML_PARSER_CONTENT;
9488: if (cur != '>') {
9489: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9490: "CData section not finished\n%.50s\n", buf);
9491: xmlFree(buf);
9492: return;
9493: }
9494: NEXTL(l);
9495:
9496: /*
9497: * OK the buffer is to be consumed as cdata.
9498: */
9499: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9500: if (ctxt->sax->cdataBlock != NULL)
9501: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9502: else if (ctxt->sax->characters != NULL)
9503: ctxt->sax->characters(ctxt->userData, buf, len);
9504: }
9505: xmlFree(buf);
9506: }
9507:
9508: /**
9509: * xmlParseContent:
9510: * @ctxt: an XML parser context
9511: *
9512: * Parse a content:
9513: *
9514: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9515: */
9516:
9517: void
9518: xmlParseContent(xmlParserCtxtPtr ctxt) {
9519: GROW;
9520: while ((RAW != 0) &&
9521: ((RAW != '<') || (NXT(1) != '/')) &&
9522: (ctxt->instate != XML_PARSER_EOF)) {
9523: const xmlChar *test = CUR_PTR;
9524: unsigned int cons = ctxt->input->consumed;
9525: const xmlChar *cur = ctxt->input->cur;
9526:
9527: /*
9528: * First case : a Processing Instruction.
9529: */
9530: if ((*cur == '<') && (cur[1] == '?')) {
9531: xmlParsePI(ctxt);
9532: }
9533:
9534: /*
9535: * Second case : a CDSection
9536: */
9537: /* 2.6.0 test was *cur not RAW */
9538: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9539: xmlParseCDSect(ctxt);
9540: }
9541:
9542: /*
9543: * Third case : a comment
9544: */
9545: else if ((*cur == '<') && (NXT(1) == '!') &&
9546: (NXT(2) == '-') && (NXT(3) == '-')) {
9547: xmlParseComment(ctxt);
9548: ctxt->instate = XML_PARSER_CONTENT;
9549: }
9550:
9551: /*
9552: * Fourth case : a sub-element.
9553: */
9554: else if (*cur == '<') {
9555: xmlParseElement(ctxt);
9556: }
9557:
9558: /*
9559: * Fifth case : a reference. If if has not been resolved,
9560: * parsing returns it's Name, create the node
9561: */
9562:
9563: else if (*cur == '&') {
9564: xmlParseReference(ctxt);
9565: }
9566:
9567: /*
9568: * Last case, text. Note that References are handled directly.
9569: */
9570: else {
9571: xmlParseCharData(ctxt, 0);
9572: }
9573:
9574: GROW;
9575: /*
9576: * Pop-up of finished entities.
9577: */
9578: while ((RAW == 0) && (ctxt->inputNr > 1))
9579: xmlPopInput(ctxt);
9580: SHRINK;
9581:
9582: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9583: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9584: "detected an error in element content\n");
9585: ctxt->instate = XML_PARSER_EOF;
9586: break;
9587: }
9588: }
9589: }
9590:
9591: /**
9592: * xmlParseElement:
9593: * @ctxt: an XML parser context
9594: *
9595: * parse an XML element, this is highly recursive
9596: *
9597: * [39] element ::= EmptyElemTag | STag content ETag
9598: *
9599: * [ WFC: Element Type Match ]
9600: * The Name in an element's end-tag must match the element type in the
9601: * start-tag.
9602: *
9603: */
9604:
9605: void
9606: xmlParseElement(xmlParserCtxtPtr ctxt) {
9607: const xmlChar *name;
9608: const xmlChar *prefix = NULL;
9609: const xmlChar *URI = NULL;
9610: xmlParserNodeInfo node_info;
1.1.1.2 misho 9611: int line, tlen = 0;
1.1 misho 9612: xmlNodePtr ret;
9613: int nsNr = ctxt->nsNr;
9614:
9615: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9616: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9617: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9618: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9619: xmlParserMaxDepth);
9620: ctxt->instate = XML_PARSER_EOF;
9621: return;
9622: }
9623:
9624: /* Capture start position */
9625: if (ctxt->record_info) {
9626: node_info.begin_pos = ctxt->input->consumed +
9627: (CUR_PTR - ctxt->input->base);
9628: node_info.begin_line = ctxt->input->line;
9629: }
9630:
9631: if (ctxt->spaceNr == 0)
9632: spacePush(ctxt, -1);
9633: else if (*ctxt->space == -2)
9634: spacePush(ctxt, -1);
9635: else
9636: spacePush(ctxt, *ctxt->space);
9637:
9638: line = ctxt->input->line;
9639: #ifdef LIBXML_SAX1_ENABLED
9640: if (ctxt->sax2)
9641: #endif /* LIBXML_SAX1_ENABLED */
9642: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9643: #ifdef LIBXML_SAX1_ENABLED
9644: else
9645: name = xmlParseStartTag(ctxt);
9646: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 9647: if (ctxt->instate == XML_PARSER_EOF)
9648: return;
1.1 misho 9649: if (name == NULL) {
9650: spacePop(ctxt);
9651: return;
9652: }
9653: namePush(ctxt, name);
9654: ret = ctxt->node;
9655:
9656: #ifdef LIBXML_VALID_ENABLED
9657: /*
9658: * [ VC: Root Element Type ]
9659: * The Name in the document type declaration must match the element
9660: * type of the root element.
9661: */
9662: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9663: ctxt->node && (ctxt->node == ctxt->myDoc->children))
9664: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9665: #endif /* LIBXML_VALID_ENABLED */
9666:
9667: /*
9668: * Check for an Empty Element.
9669: */
9670: if ((RAW == '/') && (NXT(1) == '>')) {
9671: SKIP(2);
9672: if (ctxt->sax2) {
9673: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9674: (!ctxt->disableSAX))
9675: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9676: #ifdef LIBXML_SAX1_ENABLED
9677: } else {
9678: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9679: (!ctxt->disableSAX))
9680: ctxt->sax->endElement(ctxt->userData, name);
9681: #endif /* LIBXML_SAX1_ENABLED */
9682: }
9683: namePop(ctxt);
9684: spacePop(ctxt);
9685: if (nsNr != ctxt->nsNr)
9686: nsPop(ctxt, ctxt->nsNr - nsNr);
9687: if ( ret != NULL && ctxt->record_info ) {
9688: node_info.end_pos = ctxt->input->consumed +
9689: (CUR_PTR - ctxt->input->base);
9690: node_info.end_line = ctxt->input->line;
9691: node_info.node = ret;
9692: xmlParserAddNodeInfo(ctxt, &node_info);
9693: }
9694: return;
9695: }
9696: if (RAW == '>') {
9697: NEXT1;
9698: } else {
9699: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9700: "Couldn't find end of Start Tag %s line %d\n",
9701: name, line, NULL);
9702:
9703: /*
9704: * end of parsing of this node.
9705: */
9706: nodePop(ctxt);
9707: namePop(ctxt);
9708: spacePop(ctxt);
9709: if (nsNr != ctxt->nsNr)
9710: nsPop(ctxt, ctxt->nsNr - nsNr);
9711:
9712: /*
9713: * Capture end position and add node
9714: */
9715: if ( ret != NULL && ctxt->record_info ) {
9716: node_info.end_pos = ctxt->input->consumed +
9717: (CUR_PTR - ctxt->input->base);
9718: node_info.end_line = ctxt->input->line;
9719: node_info.node = ret;
9720: xmlParserAddNodeInfo(ctxt, &node_info);
9721: }
9722: return;
9723: }
9724:
9725: /*
9726: * Parse the content of the element:
9727: */
9728: xmlParseContent(ctxt);
9729: if (!IS_BYTE_CHAR(RAW)) {
9730: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9731: "Premature end of data in tag %s line %d\n",
9732: name, line, NULL);
9733:
9734: /*
9735: * end of parsing of this node.
9736: */
9737: nodePop(ctxt);
9738: namePop(ctxt);
9739: spacePop(ctxt);
9740: if (nsNr != ctxt->nsNr)
9741: nsPop(ctxt, ctxt->nsNr - nsNr);
9742: return;
9743: }
9744:
9745: /*
9746: * parse the end of tag: '</' should be here.
9747: */
9748: if (ctxt->sax2) {
9749: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9750: namePop(ctxt);
9751: }
9752: #ifdef LIBXML_SAX1_ENABLED
9753: else
9754: xmlParseEndTag1(ctxt, line);
9755: #endif /* LIBXML_SAX1_ENABLED */
9756:
9757: /*
9758: * Capture end position and add node
9759: */
9760: if ( ret != NULL && ctxt->record_info ) {
9761: node_info.end_pos = ctxt->input->consumed +
9762: (CUR_PTR - ctxt->input->base);
9763: node_info.end_line = ctxt->input->line;
9764: node_info.node = ret;
9765: xmlParserAddNodeInfo(ctxt, &node_info);
9766: }
9767: }
9768:
9769: /**
9770: * xmlParseVersionNum:
9771: * @ctxt: an XML parser context
9772: *
9773: * parse the XML version value.
9774: *
9775: * [26] VersionNum ::= '1.' [0-9]+
9776: *
9777: * In practice allow [0-9].[0-9]+ at that level
9778: *
9779: * Returns the string giving the XML version number, or NULL
9780: */
9781: xmlChar *
9782: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9783: xmlChar *buf = NULL;
9784: int len = 0;
9785: int size = 10;
9786: xmlChar cur;
9787:
9788: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9789: if (buf == NULL) {
9790: xmlErrMemory(ctxt, NULL);
9791: return(NULL);
9792: }
9793: cur = CUR;
9794: if (!((cur >= '0') && (cur <= '9'))) {
9795: xmlFree(buf);
9796: return(NULL);
9797: }
9798: buf[len++] = cur;
9799: NEXT;
9800: cur=CUR;
9801: if (cur != '.') {
9802: xmlFree(buf);
9803: return(NULL);
9804: }
9805: buf[len++] = cur;
9806: NEXT;
9807: cur=CUR;
9808: while ((cur >= '0') && (cur <= '9')) {
9809: if (len + 1 >= size) {
9810: xmlChar *tmp;
9811:
9812: size *= 2;
9813: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9814: if (tmp == NULL) {
9815: xmlFree(buf);
9816: xmlErrMemory(ctxt, NULL);
9817: return(NULL);
9818: }
9819: buf = tmp;
9820: }
9821: buf[len++] = cur;
9822: NEXT;
9823: cur=CUR;
9824: }
9825: buf[len] = 0;
9826: return(buf);
9827: }
9828:
9829: /**
9830: * xmlParseVersionInfo:
9831: * @ctxt: an XML parser context
9832: *
9833: * parse the XML version.
9834: *
9835: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9836: *
9837: * [25] Eq ::= S? '=' S?
9838: *
9839: * Returns the version string, e.g. "1.0"
9840: */
9841:
9842: xmlChar *
9843: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9844: xmlChar *version = NULL;
9845:
9846: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9847: SKIP(7);
9848: SKIP_BLANKS;
9849: if (RAW != '=') {
9850: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9851: return(NULL);
9852: }
9853: NEXT;
9854: SKIP_BLANKS;
9855: if (RAW == '"') {
9856: NEXT;
9857: version = xmlParseVersionNum(ctxt);
9858: if (RAW != '"') {
9859: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9860: } else
9861: NEXT;
9862: } else if (RAW == '\''){
9863: NEXT;
9864: version = xmlParseVersionNum(ctxt);
9865: if (RAW != '\'') {
9866: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9867: } else
9868: NEXT;
9869: } else {
9870: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9871: }
9872: }
9873: return(version);
9874: }
9875:
9876: /**
9877: * xmlParseEncName:
9878: * @ctxt: an XML parser context
9879: *
9880: * parse the XML encoding name
9881: *
9882: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9883: *
9884: * Returns the encoding name value or NULL
9885: */
9886: xmlChar *
9887: xmlParseEncName(xmlParserCtxtPtr ctxt) {
9888: xmlChar *buf = NULL;
9889: int len = 0;
9890: int size = 10;
9891: xmlChar cur;
9892:
9893: cur = CUR;
9894: if (((cur >= 'a') && (cur <= 'z')) ||
9895: ((cur >= 'A') && (cur <= 'Z'))) {
9896: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9897: if (buf == NULL) {
9898: xmlErrMemory(ctxt, NULL);
9899: return(NULL);
9900: }
9901:
9902: buf[len++] = cur;
9903: NEXT;
9904: cur = CUR;
9905: while (((cur >= 'a') && (cur <= 'z')) ||
9906: ((cur >= 'A') && (cur <= 'Z')) ||
9907: ((cur >= '0') && (cur <= '9')) ||
9908: (cur == '.') || (cur == '_') ||
9909: (cur == '-')) {
9910: if (len + 1 >= size) {
9911: xmlChar *tmp;
9912:
9913: size *= 2;
9914: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9915: if (tmp == NULL) {
9916: xmlErrMemory(ctxt, NULL);
9917: xmlFree(buf);
9918: return(NULL);
9919: }
9920: buf = tmp;
9921: }
9922: buf[len++] = cur;
9923: NEXT;
9924: cur = CUR;
9925: if (cur == 0) {
9926: SHRINK;
9927: GROW;
9928: cur = CUR;
9929: }
9930: }
9931: buf[len] = 0;
9932: } else {
9933: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9934: }
9935: return(buf);
9936: }
9937:
9938: /**
9939: * xmlParseEncodingDecl:
9940: * @ctxt: an XML parser context
9941: *
9942: * parse the XML encoding declaration
9943: *
9944: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9945: *
9946: * this setups the conversion filters.
9947: *
9948: * Returns the encoding value or NULL
9949: */
9950:
9951: const xmlChar *
9952: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9953: xmlChar *encoding = NULL;
9954:
9955: SKIP_BLANKS;
9956: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9957: SKIP(8);
9958: SKIP_BLANKS;
9959: if (RAW != '=') {
9960: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9961: return(NULL);
9962: }
9963: NEXT;
9964: SKIP_BLANKS;
9965: if (RAW == '"') {
9966: NEXT;
9967: encoding = xmlParseEncName(ctxt);
9968: if (RAW != '"') {
9969: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9970: } else
9971: NEXT;
9972: } else if (RAW == '\''){
9973: NEXT;
9974: encoding = xmlParseEncName(ctxt);
9975: if (RAW != '\'') {
9976: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9977: } else
9978: NEXT;
9979: } else {
9980: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9981: }
1.1.1.2 misho 9982:
9983: /*
9984: * Non standard parsing, allowing the user to ignore encoding
9985: */
9986: if (ctxt->options & XML_PARSE_IGNORE_ENC)
9987: return(encoding);
9988:
1.1 misho 9989: /*
9990: * UTF-16 encoding stwich has already taken place at this stage,
9991: * more over the little-endian/big-endian selection is already done
9992: */
9993: if ((encoding != NULL) &&
9994: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9995: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9996: /*
9997: * If no encoding was passed to the parser, that we are
9998: * using UTF-16 and no decoder is present i.e. the
9999: * document is apparently UTF-8 compatible, then raise an
10000: * encoding mismatch fatal error
10001: */
10002: if ((ctxt->encoding == NULL) &&
10003: (ctxt->input->buf != NULL) &&
10004: (ctxt->input->buf->encoder == NULL)) {
10005: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10006: "Document labelled UTF-16 but has UTF-8 content\n");
10007: }
10008: if (ctxt->encoding != NULL)
10009: xmlFree((xmlChar *) ctxt->encoding);
10010: ctxt->encoding = encoding;
10011: }
10012: /*
10013: * UTF-8 encoding is handled natively
10014: */
10015: else if ((encoding != NULL) &&
10016: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10017: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10018: if (ctxt->encoding != NULL)
10019: xmlFree((xmlChar *) ctxt->encoding);
10020: ctxt->encoding = encoding;
10021: }
10022: else if (encoding != NULL) {
10023: xmlCharEncodingHandlerPtr handler;
10024:
10025: if (ctxt->input->encoding != NULL)
10026: xmlFree((xmlChar *) ctxt->input->encoding);
10027: ctxt->input->encoding = encoding;
10028:
10029: handler = xmlFindCharEncodingHandler((const char *) encoding);
10030: if (handler != NULL) {
10031: xmlSwitchToEncoding(ctxt, handler);
10032: } else {
10033: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10034: "Unsupported encoding %s\n", encoding);
10035: return(NULL);
10036: }
10037: }
10038: }
10039: return(encoding);
10040: }
10041:
10042: /**
10043: * xmlParseSDDecl:
10044: * @ctxt: an XML parser context
10045: *
10046: * parse the XML standalone declaration
10047: *
10048: * [32] SDDecl ::= S 'standalone' Eq
10049: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10050: *
10051: * [ VC: Standalone Document Declaration ]
10052: * TODO The standalone document declaration must have the value "no"
10053: * if any external markup declarations contain declarations of:
10054: * - attributes with default values, if elements to which these
10055: * attributes apply appear in the document without specifications
10056: * of values for these attributes, or
10057: * - entities (other than amp, lt, gt, apos, quot), if references
10058: * to those entities appear in the document, or
10059: * - attributes with values subject to normalization, where the
10060: * attribute appears in the document with a value which will change
10061: * as a result of normalization, or
10062: * - element types with element content, if white space occurs directly
10063: * within any instance of those types.
10064: *
10065: * Returns:
10066: * 1 if standalone="yes"
10067: * 0 if standalone="no"
10068: * -2 if standalone attribute is missing or invalid
10069: * (A standalone value of -2 means that the XML declaration was found,
10070: * but no value was specified for the standalone attribute).
10071: */
10072:
10073: int
10074: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10075: int standalone = -2;
10076:
10077: SKIP_BLANKS;
10078: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10079: SKIP(10);
10080: SKIP_BLANKS;
10081: if (RAW != '=') {
10082: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10083: return(standalone);
10084: }
10085: NEXT;
10086: SKIP_BLANKS;
10087: if (RAW == '\''){
10088: NEXT;
10089: if ((RAW == 'n') && (NXT(1) == 'o')) {
10090: standalone = 0;
10091: SKIP(2);
10092: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10093: (NXT(2) == 's')) {
10094: standalone = 1;
10095: SKIP(3);
10096: } else {
10097: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10098: }
10099: if (RAW != '\'') {
10100: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10101: } else
10102: NEXT;
10103: } else if (RAW == '"'){
10104: NEXT;
10105: if ((RAW == 'n') && (NXT(1) == 'o')) {
10106: standalone = 0;
10107: SKIP(2);
10108: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10109: (NXT(2) == 's')) {
10110: standalone = 1;
10111: SKIP(3);
10112: } else {
10113: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10114: }
10115: if (RAW != '"') {
10116: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117: } else
10118: NEXT;
10119: } else {
10120: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121: }
10122: }
10123: return(standalone);
10124: }
10125:
10126: /**
10127: * xmlParseXMLDecl:
10128: * @ctxt: an XML parser context
10129: *
10130: * parse an XML declaration header
10131: *
10132: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10133: */
10134:
10135: void
10136: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10137: xmlChar *version;
10138:
10139: /*
10140: * This value for standalone indicates that the document has an
10141: * XML declaration but it does not have a standalone attribute.
10142: * It will be overwritten later if a standalone attribute is found.
10143: */
10144: ctxt->input->standalone = -2;
10145:
10146: /*
10147: * We know that '<?xml' is here.
10148: */
10149: SKIP(5);
10150:
10151: if (!IS_BLANK_CH(RAW)) {
10152: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10153: "Blank needed after '<?xml'\n");
10154: }
10155: SKIP_BLANKS;
10156:
10157: /*
10158: * We must have the VersionInfo here.
10159: */
10160: version = xmlParseVersionInfo(ctxt);
10161: if (version == NULL) {
10162: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10163: } else {
10164: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10165: /*
10166: * Changed here for XML-1.0 5th edition
10167: */
10168: if (ctxt->options & XML_PARSE_OLD10) {
10169: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10170: "Unsupported version '%s'\n",
10171: version);
10172: } else {
10173: if ((version[0] == '1') && ((version[1] == '.'))) {
10174: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10175: "Unsupported version '%s'\n",
10176: version, NULL);
10177: } else {
10178: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10179: "Unsupported version '%s'\n",
10180: version);
10181: }
10182: }
10183: }
10184: if (ctxt->version != NULL)
10185: xmlFree((void *) ctxt->version);
10186: ctxt->version = version;
10187: }
10188:
10189: /*
10190: * We may have the encoding declaration
10191: */
10192: if (!IS_BLANK_CH(RAW)) {
10193: if ((RAW == '?') && (NXT(1) == '>')) {
10194: SKIP(2);
10195: return;
10196: }
10197: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10198: }
10199: xmlParseEncodingDecl(ctxt);
10200: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10201: /*
10202: * The XML REC instructs us to stop parsing right here
10203: */
10204: return;
10205: }
10206:
10207: /*
10208: * We may have the standalone status.
10209: */
10210: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10211: if ((RAW == '?') && (NXT(1) == '>')) {
10212: SKIP(2);
10213: return;
10214: }
10215: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10216: }
10217:
10218: /*
10219: * We can grow the input buffer freely at that point
10220: */
10221: GROW;
10222:
10223: SKIP_BLANKS;
10224: ctxt->input->standalone = xmlParseSDDecl(ctxt);
10225:
10226: SKIP_BLANKS;
10227: if ((RAW == '?') && (NXT(1) == '>')) {
10228: SKIP(2);
10229: } else if (RAW == '>') {
10230: /* Deprecated old WD ... */
10231: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10232: NEXT;
10233: } else {
10234: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10235: MOVETO_ENDTAG(CUR_PTR);
10236: NEXT;
10237: }
10238: }
10239:
10240: /**
10241: * xmlParseMisc:
10242: * @ctxt: an XML parser context
10243: *
10244: * parse an XML Misc* optional field.
10245: *
10246: * [27] Misc ::= Comment | PI | S
10247: */
10248:
10249: void
10250: xmlParseMisc(xmlParserCtxtPtr ctxt) {
10251: while (((RAW == '<') && (NXT(1) == '?')) ||
10252: (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10253: IS_BLANK_CH(CUR)) {
10254: if ((RAW == '<') && (NXT(1) == '?')) {
10255: xmlParsePI(ctxt);
10256: } else if (IS_BLANK_CH(CUR)) {
10257: NEXT;
10258: } else
10259: xmlParseComment(ctxt);
10260: }
10261: }
10262:
10263: /**
10264: * xmlParseDocument:
10265: * @ctxt: an XML parser context
10266: *
10267: * parse an XML document (and build a tree if using the standard SAX
10268: * interface).
10269: *
10270: * [1] document ::= prolog element Misc*
10271: *
10272: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10273: *
10274: * Returns 0, -1 in case of error. the parser context is augmented
10275: * as a result of the parsing.
10276: */
10277:
10278: int
10279: xmlParseDocument(xmlParserCtxtPtr ctxt) {
10280: xmlChar start[4];
10281: xmlCharEncoding enc;
10282:
10283: xmlInitParser();
10284:
10285: if ((ctxt == NULL) || (ctxt->input == NULL))
10286: return(-1);
10287:
10288: GROW;
10289:
10290: /*
10291: * SAX: detecting the level.
10292: */
10293: xmlDetectSAX2(ctxt);
10294:
10295: /*
10296: * SAX: beginning of the document processing.
10297: */
10298: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10299: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10300:
10301: if ((ctxt->encoding == NULL) &&
10302: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10303: /*
10304: * Get the 4 first bytes and decode the charset
10305: * if enc != XML_CHAR_ENCODING_NONE
10306: * plug some encoding conversion routines.
10307: */
10308: start[0] = RAW;
10309: start[1] = NXT(1);
10310: start[2] = NXT(2);
10311: start[3] = NXT(3);
10312: enc = xmlDetectCharEncoding(&start[0], 4);
10313: if (enc != XML_CHAR_ENCODING_NONE) {
10314: xmlSwitchEncoding(ctxt, enc);
10315: }
10316: }
10317:
10318:
10319: if (CUR == 0) {
10320: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10321: }
10322:
10323: /*
10324: * Check for the XMLDecl in the Prolog.
10325: * do not GROW here to avoid the detected encoder to decode more
10326: * than just the first line, unless the amount of data is really
10327: * too small to hold "<?xml version="1.0" encoding="foo"
10328: */
10329: if ((ctxt->input->end - ctxt->input->cur) < 35) {
10330: GROW;
10331: }
10332: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10333:
10334: /*
10335: * Note that we will switch encoding on the fly.
10336: */
10337: xmlParseXMLDecl(ctxt);
10338: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10339: /*
10340: * The XML REC instructs us to stop parsing right here
10341: */
10342: return(-1);
10343: }
10344: ctxt->standalone = ctxt->input->standalone;
10345: SKIP_BLANKS;
10346: } else {
10347: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10348: }
10349: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10350: ctxt->sax->startDocument(ctxt->userData);
10351:
10352: /*
10353: * The Misc part of the Prolog
10354: */
10355: GROW;
10356: xmlParseMisc(ctxt);
10357:
10358: /*
10359: * Then possibly doc type declaration(s) and more Misc
10360: * (doctypedecl Misc*)?
10361: */
10362: GROW;
10363: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10364:
10365: ctxt->inSubset = 1;
10366: xmlParseDocTypeDecl(ctxt);
10367: if (RAW == '[') {
10368: ctxt->instate = XML_PARSER_DTD;
10369: xmlParseInternalSubset(ctxt);
10370: }
10371:
10372: /*
10373: * Create and update the external subset.
10374: */
10375: ctxt->inSubset = 2;
10376: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10377: (!ctxt->disableSAX))
10378: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10379: ctxt->extSubSystem, ctxt->extSubURI);
10380: ctxt->inSubset = 0;
10381:
10382: xmlCleanSpecialAttr(ctxt);
10383:
10384: ctxt->instate = XML_PARSER_PROLOG;
10385: xmlParseMisc(ctxt);
10386: }
10387:
10388: /*
10389: * Time to start parsing the tree itself
10390: */
10391: GROW;
10392: if (RAW != '<') {
10393: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10394: "Start tag expected, '<' not found\n");
10395: } else {
10396: ctxt->instate = XML_PARSER_CONTENT;
10397: xmlParseElement(ctxt);
10398: ctxt->instate = XML_PARSER_EPILOG;
10399:
10400:
10401: /*
10402: * The Misc part at the end
10403: */
10404: xmlParseMisc(ctxt);
10405:
10406: if (RAW != 0) {
10407: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10408: }
10409: ctxt->instate = XML_PARSER_EOF;
10410: }
10411:
10412: /*
10413: * SAX: end of the document processing.
10414: */
10415: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10416: ctxt->sax->endDocument(ctxt->userData);
10417:
10418: /*
10419: * Remove locally kept entity definitions if the tree was not built
10420: */
10421: if ((ctxt->myDoc != NULL) &&
10422: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10423: xmlFreeDoc(ctxt->myDoc);
10424: ctxt->myDoc = NULL;
10425: }
10426:
10427: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10428: ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10429: if (ctxt->valid)
10430: ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10431: if (ctxt->nsWellFormed)
10432: ctxt->myDoc->properties |= XML_DOC_NSVALID;
10433: if (ctxt->options & XML_PARSE_OLD10)
10434: ctxt->myDoc->properties |= XML_DOC_OLD10;
10435: }
10436: if (! ctxt->wellFormed) {
10437: ctxt->valid = 0;
10438: return(-1);
10439: }
10440: return(0);
10441: }
10442:
10443: /**
10444: * xmlParseExtParsedEnt:
10445: * @ctxt: an XML parser context
10446: *
10447: * parse a general parsed entity
10448: * An external general parsed entity is well-formed if it matches the
10449: * production labeled extParsedEnt.
10450: *
10451: * [78] extParsedEnt ::= TextDecl? content
10452: *
10453: * Returns 0, -1 in case of error. the parser context is augmented
10454: * as a result of the parsing.
10455: */
10456:
10457: int
10458: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10459: xmlChar start[4];
10460: xmlCharEncoding enc;
10461:
10462: if ((ctxt == NULL) || (ctxt->input == NULL))
10463: return(-1);
10464:
10465: xmlDefaultSAXHandlerInit();
10466:
10467: xmlDetectSAX2(ctxt);
10468:
10469: GROW;
10470:
10471: /*
10472: * SAX: beginning of the document processing.
10473: */
10474: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10475: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10476:
10477: /*
10478: * Get the 4 first bytes and decode the charset
10479: * if enc != XML_CHAR_ENCODING_NONE
10480: * plug some encoding conversion routines.
10481: */
10482: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10483: start[0] = RAW;
10484: start[1] = NXT(1);
10485: start[2] = NXT(2);
10486: start[3] = NXT(3);
10487: enc = xmlDetectCharEncoding(start, 4);
10488: if (enc != XML_CHAR_ENCODING_NONE) {
10489: xmlSwitchEncoding(ctxt, enc);
10490: }
10491: }
10492:
10493:
10494: if (CUR == 0) {
10495: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10496: }
10497:
10498: /*
10499: * Check for the XMLDecl in the Prolog.
10500: */
10501: GROW;
10502: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10503:
10504: /*
10505: * Note that we will switch encoding on the fly.
10506: */
10507: xmlParseXMLDecl(ctxt);
10508: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10509: /*
10510: * The XML REC instructs us to stop parsing right here
10511: */
10512: return(-1);
10513: }
10514: SKIP_BLANKS;
10515: } else {
10516: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10517: }
10518: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10519: ctxt->sax->startDocument(ctxt->userData);
10520:
10521: /*
10522: * Doing validity checking on chunk doesn't make sense
10523: */
10524: ctxt->instate = XML_PARSER_CONTENT;
10525: ctxt->validate = 0;
10526: ctxt->loadsubset = 0;
10527: ctxt->depth = 0;
10528:
10529: xmlParseContent(ctxt);
10530:
10531: if ((RAW == '<') && (NXT(1) == '/')) {
10532: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10533: } else if (RAW != 0) {
10534: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10535: }
10536:
10537: /*
10538: * SAX: end of the document processing.
10539: */
10540: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10541: ctxt->sax->endDocument(ctxt->userData);
10542:
10543: if (! ctxt->wellFormed) return(-1);
10544: return(0);
10545: }
10546:
10547: #ifdef LIBXML_PUSH_ENABLED
10548: /************************************************************************
10549: * *
10550: * Progressive parsing interfaces *
10551: * *
10552: ************************************************************************/
10553:
10554: /**
10555: * xmlParseLookupSequence:
10556: * @ctxt: an XML parser context
10557: * @first: the first char to lookup
10558: * @next: the next char to lookup or zero
10559: * @third: the next char to lookup or zero
10560: *
10561: * Try to find if a sequence (first, next, third) or just (first next) or
10562: * (first) is available in the input stream.
10563: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10564: * to avoid rescanning sequences of bytes, it DOES change the state of the
10565: * parser, do not use liberally.
10566: *
10567: * Returns the index to the current parsing point if the full sequence
10568: * is available, -1 otherwise.
10569: */
10570: static int
10571: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10572: xmlChar next, xmlChar third) {
10573: int base, len;
10574: xmlParserInputPtr in;
10575: const xmlChar *buf;
10576:
10577: in = ctxt->input;
10578: if (in == NULL) return(-1);
10579: base = in->cur - in->base;
10580: if (base < 0) return(-1);
10581: if (ctxt->checkIndex > base)
10582: base = ctxt->checkIndex;
10583: if (in->buf == NULL) {
10584: buf = in->base;
10585: len = in->length;
10586: } else {
10587: buf = in->buf->buffer->content;
10588: len = in->buf->buffer->use;
10589: }
10590: /* take into account the sequence length */
10591: if (third) len -= 2;
10592: else if (next) len --;
10593: for (;base < len;base++) {
10594: if (buf[base] == first) {
10595: if (third != 0) {
10596: if ((buf[base + 1] != next) ||
10597: (buf[base + 2] != third)) continue;
10598: } else if (next != 0) {
10599: if (buf[base + 1] != next) continue;
10600: }
10601: ctxt->checkIndex = 0;
10602: #ifdef DEBUG_PUSH
10603: if (next == 0)
10604: xmlGenericError(xmlGenericErrorContext,
10605: "PP: lookup '%c' found at %d\n",
10606: first, base);
10607: else if (third == 0)
10608: xmlGenericError(xmlGenericErrorContext,
10609: "PP: lookup '%c%c' found at %d\n",
10610: first, next, base);
10611: else
10612: xmlGenericError(xmlGenericErrorContext,
10613: "PP: lookup '%c%c%c' found at %d\n",
10614: first, next, third, base);
10615: #endif
10616: return(base - (in->cur - in->base));
10617: }
10618: }
10619: ctxt->checkIndex = base;
10620: #ifdef DEBUG_PUSH
10621: if (next == 0)
10622: xmlGenericError(xmlGenericErrorContext,
10623: "PP: lookup '%c' failed\n", first);
10624: else if (third == 0)
10625: xmlGenericError(xmlGenericErrorContext,
10626: "PP: lookup '%c%c' failed\n", first, next);
10627: else
10628: xmlGenericError(xmlGenericErrorContext,
10629: "PP: lookup '%c%c%c' failed\n", first, next, third);
10630: #endif
10631: return(-1);
10632: }
10633:
10634: /**
10635: * xmlParseGetLasts:
10636: * @ctxt: an XML parser context
10637: * @lastlt: pointer to store the last '<' from the input
10638: * @lastgt: pointer to store the last '>' from the input
10639: *
10640: * Lookup the last < and > in the current chunk
10641: */
10642: static void
10643: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10644: const xmlChar **lastgt) {
10645: const xmlChar *tmp;
10646:
10647: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10648: xmlGenericError(xmlGenericErrorContext,
10649: "Internal error: xmlParseGetLasts\n");
10650: return;
10651: }
10652: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10653: tmp = ctxt->input->end;
10654: tmp--;
10655: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10656: if (tmp < ctxt->input->base) {
10657: *lastlt = NULL;
10658: *lastgt = NULL;
10659: } else {
10660: *lastlt = tmp;
10661: tmp++;
10662: while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10663: if (*tmp == '\'') {
10664: tmp++;
10665: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10666: if (tmp < ctxt->input->end) tmp++;
10667: } else if (*tmp == '"') {
10668: tmp++;
10669: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10670: if (tmp < ctxt->input->end) tmp++;
10671: } else
10672: tmp++;
10673: }
10674: if (tmp < ctxt->input->end)
10675: *lastgt = tmp;
10676: else {
10677: tmp = *lastlt;
10678: tmp--;
10679: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10680: if (tmp >= ctxt->input->base)
10681: *lastgt = tmp;
10682: else
10683: *lastgt = NULL;
10684: }
10685: }
10686: } else {
10687: *lastlt = NULL;
10688: *lastgt = NULL;
10689: }
10690: }
10691: /**
10692: * xmlCheckCdataPush:
10693: * @cur: pointer to the bock of characters
10694: * @len: length of the block in bytes
10695: *
10696: * Check that the block of characters is okay as SCdata content [20]
10697: *
10698: * Returns the number of bytes to pass if okay, a negative index where an
10699: * UTF-8 error occured otherwise
10700: */
10701: static int
10702: xmlCheckCdataPush(const xmlChar *utf, int len) {
10703: int ix;
10704: unsigned char c;
10705: int codepoint;
10706:
10707: if ((utf == NULL) || (len <= 0))
10708: return(0);
10709:
10710: for (ix = 0; ix < len;) { /* string is 0-terminated */
10711: c = utf[ix];
10712: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10713: if (c >= 0x20)
10714: ix++;
10715: else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10716: ix++;
10717: else
10718: return(-ix);
10719: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10720: if (ix + 2 > len) return(ix);
10721: if ((utf[ix+1] & 0xc0 ) != 0x80)
10722: return(-ix);
10723: codepoint = (utf[ix] & 0x1f) << 6;
10724: codepoint |= utf[ix+1] & 0x3f;
10725: if (!xmlIsCharQ(codepoint))
10726: return(-ix);
10727: ix += 2;
10728: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10729: if (ix + 3 > len) return(ix);
10730: if (((utf[ix+1] & 0xc0) != 0x80) ||
10731: ((utf[ix+2] & 0xc0) != 0x80))
10732: return(-ix);
10733: codepoint = (utf[ix] & 0xf) << 12;
10734: codepoint |= (utf[ix+1] & 0x3f) << 6;
10735: codepoint |= utf[ix+2] & 0x3f;
10736: if (!xmlIsCharQ(codepoint))
10737: return(-ix);
10738: ix += 3;
10739: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10740: if (ix + 4 > len) return(ix);
10741: if (((utf[ix+1] & 0xc0) != 0x80) ||
10742: ((utf[ix+2] & 0xc0) != 0x80) ||
10743: ((utf[ix+3] & 0xc0) != 0x80))
10744: return(-ix);
10745: codepoint = (utf[ix] & 0x7) << 18;
10746: codepoint |= (utf[ix+1] & 0x3f) << 12;
10747: codepoint |= (utf[ix+2] & 0x3f) << 6;
10748: codepoint |= utf[ix+3] & 0x3f;
10749: if (!xmlIsCharQ(codepoint))
10750: return(-ix);
10751: ix += 4;
10752: } else /* unknown encoding */
10753: return(-ix);
10754: }
10755: return(ix);
10756: }
10757:
10758: /**
10759: * xmlParseTryOrFinish:
10760: * @ctxt: an XML parser context
10761: * @terminate: last chunk indicator
10762: *
10763: * Try to progress on parsing
10764: *
10765: * Returns zero if no parsing was possible
10766: */
10767: static int
10768: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10769: int ret = 0;
10770: int avail, tlen;
10771: xmlChar cur, next;
10772: const xmlChar *lastlt, *lastgt;
10773:
10774: if (ctxt->input == NULL)
10775: return(0);
10776:
10777: #ifdef DEBUG_PUSH
10778: switch (ctxt->instate) {
10779: case XML_PARSER_EOF:
10780: xmlGenericError(xmlGenericErrorContext,
10781: "PP: try EOF\n"); break;
10782: case XML_PARSER_START:
10783: xmlGenericError(xmlGenericErrorContext,
10784: "PP: try START\n"); break;
10785: case XML_PARSER_MISC:
10786: xmlGenericError(xmlGenericErrorContext,
10787: "PP: try MISC\n");break;
10788: case XML_PARSER_COMMENT:
10789: xmlGenericError(xmlGenericErrorContext,
10790: "PP: try COMMENT\n");break;
10791: case XML_PARSER_PROLOG:
10792: xmlGenericError(xmlGenericErrorContext,
10793: "PP: try PROLOG\n");break;
10794: case XML_PARSER_START_TAG:
10795: xmlGenericError(xmlGenericErrorContext,
10796: "PP: try START_TAG\n");break;
10797: case XML_PARSER_CONTENT:
10798: xmlGenericError(xmlGenericErrorContext,
10799: "PP: try CONTENT\n");break;
10800: case XML_PARSER_CDATA_SECTION:
10801: xmlGenericError(xmlGenericErrorContext,
10802: "PP: try CDATA_SECTION\n");break;
10803: case XML_PARSER_END_TAG:
10804: xmlGenericError(xmlGenericErrorContext,
10805: "PP: try END_TAG\n");break;
10806: case XML_PARSER_ENTITY_DECL:
10807: xmlGenericError(xmlGenericErrorContext,
10808: "PP: try ENTITY_DECL\n");break;
10809: case XML_PARSER_ENTITY_VALUE:
10810: xmlGenericError(xmlGenericErrorContext,
10811: "PP: try ENTITY_VALUE\n");break;
10812: case XML_PARSER_ATTRIBUTE_VALUE:
10813: xmlGenericError(xmlGenericErrorContext,
10814: "PP: try ATTRIBUTE_VALUE\n");break;
10815: case XML_PARSER_DTD:
10816: xmlGenericError(xmlGenericErrorContext,
10817: "PP: try DTD\n");break;
10818: case XML_PARSER_EPILOG:
10819: xmlGenericError(xmlGenericErrorContext,
10820: "PP: try EPILOG\n");break;
10821: case XML_PARSER_PI:
10822: xmlGenericError(xmlGenericErrorContext,
10823: "PP: try PI\n");break;
10824: case XML_PARSER_IGNORE:
10825: xmlGenericError(xmlGenericErrorContext,
10826: "PP: try IGNORE\n");break;
10827: }
10828: #endif
10829:
10830: if ((ctxt->input != NULL) &&
10831: (ctxt->input->cur - ctxt->input->base > 4096)) {
10832: xmlSHRINK(ctxt);
10833: ctxt->checkIndex = 0;
10834: }
10835: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10836:
10837: while (1) {
10838: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10839: return(0);
10840:
10841:
10842: /*
10843: * Pop-up of finished entities.
10844: */
10845: while ((RAW == 0) && (ctxt->inputNr > 1))
10846: xmlPopInput(ctxt);
10847:
10848: if (ctxt->input == NULL) break;
10849: if (ctxt->input->buf == NULL)
10850: avail = ctxt->input->length -
10851: (ctxt->input->cur - ctxt->input->base);
10852: else {
10853: /*
10854: * If we are operating on converted input, try to flush
10855: * remainng chars to avoid them stalling in the non-converted
10856: * buffer.
10857: */
10858: if ((ctxt->input->buf->raw != NULL) &&
10859: (ctxt->input->buf->raw->use > 0)) {
10860: int base = ctxt->input->base -
10861: ctxt->input->buf->buffer->content;
10862: int current = ctxt->input->cur - ctxt->input->base;
10863:
10864: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10865: ctxt->input->base = ctxt->input->buf->buffer->content + base;
10866: ctxt->input->cur = ctxt->input->base + current;
10867: ctxt->input->end =
10868: &ctxt->input->buf->buffer->content[
10869: ctxt->input->buf->buffer->use];
10870: }
10871: avail = ctxt->input->buf->buffer->use -
10872: (ctxt->input->cur - ctxt->input->base);
10873: }
10874: if (avail < 1)
10875: goto done;
10876: switch (ctxt->instate) {
10877: case XML_PARSER_EOF:
10878: /*
10879: * Document parsing is done !
10880: */
10881: goto done;
10882: case XML_PARSER_START:
10883: if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10884: xmlChar start[4];
10885: xmlCharEncoding enc;
10886:
10887: /*
10888: * Very first chars read from the document flow.
10889: */
10890: if (avail < 4)
10891: goto done;
10892:
10893: /*
10894: * Get the 4 first bytes and decode the charset
10895: * if enc != XML_CHAR_ENCODING_NONE
10896: * plug some encoding conversion routines,
10897: * else xmlSwitchEncoding will set to (default)
10898: * UTF8.
10899: */
10900: start[0] = RAW;
10901: start[1] = NXT(1);
10902: start[2] = NXT(2);
10903: start[3] = NXT(3);
10904: enc = xmlDetectCharEncoding(start, 4);
10905: xmlSwitchEncoding(ctxt, enc);
10906: break;
10907: }
10908:
10909: if (avail < 2)
10910: goto done;
10911: cur = ctxt->input->cur[0];
10912: next = ctxt->input->cur[1];
10913: if (cur == 0) {
10914: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10915: ctxt->sax->setDocumentLocator(ctxt->userData,
10916: &xmlDefaultSAXLocator);
10917: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10918: ctxt->instate = XML_PARSER_EOF;
10919: #ifdef DEBUG_PUSH
10920: xmlGenericError(xmlGenericErrorContext,
10921: "PP: entering EOF\n");
10922: #endif
10923: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10924: ctxt->sax->endDocument(ctxt->userData);
10925: goto done;
10926: }
10927: if ((cur == '<') && (next == '?')) {
10928: /* PI or XML decl */
10929: if (avail < 5) return(ret);
10930: if ((!terminate) &&
10931: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10932: return(ret);
10933: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10934: ctxt->sax->setDocumentLocator(ctxt->userData,
10935: &xmlDefaultSAXLocator);
10936: if ((ctxt->input->cur[2] == 'x') &&
10937: (ctxt->input->cur[3] == 'm') &&
10938: (ctxt->input->cur[4] == 'l') &&
10939: (IS_BLANK_CH(ctxt->input->cur[5]))) {
10940: ret += 5;
10941: #ifdef DEBUG_PUSH
10942: xmlGenericError(xmlGenericErrorContext,
10943: "PP: Parsing XML Decl\n");
10944: #endif
10945: xmlParseXMLDecl(ctxt);
10946: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10947: /*
10948: * The XML REC instructs us to stop parsing right
10949: * here
10950: */
10951: ctxt->instate = XML_PARSER_EOF;
10952: return(0);
10953: }
10954: ctxt->standalone = ctxt->input->standalone;
10955: if ((ctxt->encoding == NULL) &&
10956: (ctxt->input->encoding != NULL))
10957: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10958: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10959: (!ctxt->disableSAX))
10960: ctxt->sax->startDocument(ctxt->userData);
10961: ctxt->instate = XML_PARSER_MISC;
10962: #ifdef DEBUG_PUSH
10963: xmlGenericError(xmlGenericErrorContext,
10964: "PP: entering MISC\n");
10965: #endif
10966: } else {
10967: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10968: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10969: (!ctxt->disableSAX))
10970: ctxt->sax->startDocument(ctxt->userData);
10971: ctxt->instate = XML_PARSER_MISC;
10972: #ifdef DEBUG_PUSH
10973: xmlGenericError(xmlGenericErrorContext,
10974: "PP: entering MISC\n");
10975: #endif
10976: }
10977: } else {
10978: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10979: ctxt->sax->setDocumentLocator(ctxt->userData,
10980: &xmlDefaultSAXLocator);
10981: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10982: if (ctxt->version == NULL) {
10983: xmlErrMemory(ctxt, NULL);
10984: break;
10985: }
10986: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10987: (!ctxt->disableSAX))
10988: ctxt->sax->startDocument(ctxt->userData);
10989: ctxt->instate = XML_PARSER_MISC;
10990: #ifdef DEBUG_PUSH
10991: xmlGenericError(xmlGenericErrorContext,
10992: "PP: entering MISC\n");
10993: #endif
10994: }
10995: break;
10996: case XML_PARSER_START_TAG: {
10997: const xmlChar *name;
10998: const xmlChar *prefix = NULL;
10999: const xmlChar *URI = NULL;
11000: int nsNr = ctxt->nsNr;
11001:
11002: if ((avail < 2) && (ctxt->inputNr == 1))
11003: goto done;
11004: cur = ctxt->input->cur[0];
11005: if (cur != '<') {
11006: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11007: ctxt->instate = XML_PARSER_EOF;
11008: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11009: ctxt->sax->endDocument(ctxt->userData);
11010: goto done;
11011: }
11012: if (!terminate) {
11013: if (ctxt->progressive) {
11014: /* > can be found unescaped in attribute values */
11015: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11016: goto done;
11017: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11018: goto done;
11019: }
11020: }
11021: if (ctxt->spaceNr == 0)
11022: spacePush(ctxt, -1);
11023: else if (*ctxt->space == -2)
11024: spacePush(ctxt, -1);
11025: else
11026: spacePush(ctxt, *ctxt->space);
11027: #ifdef LIBXML_SAX1_ENABLED
11028: if (ctxt->sax2)
11029: #endif /* LIBXML_SAX1_ENABLED */
11030: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11031: #ifdef LIBXML_SAX1_ENABLED
11032: else
11033: name = xmlParseStartTag(ctxt);
11034: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11035: if (ctxt->instate == XML_PARSER_EOF)
11036: goto done;
1.1 misho 11037: if (name == NULL) {
11038: spacePop(ctxt);
11039: ctxt->instate = XML_PARSER_EOF;
11040: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11041: ctxt->sax->endDocument(ctxt->userData);
11042: goto done;
11043: }
11044: #ifdef LIBXML_VALID_ENABLED
11045: /*
11046: * [ VC: Root Element Type ]
11047: * The Name in the document type declaration must match
11048: * the element type of the root element.
11049: */
11050: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11051: ctxt->node && (ctxt->node == ctxt->myDoc->children))
11052: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11053: #endif /* LIBXML_VALID_ENABLED */
11054:
11055: /*
11056: * Check for an Empty Element.
11057: */
11058: if ((RAW == '/') && (NXT(1) == '>')) {
11059: SKIP(2);
11060:
11061: if (ctxt->sax2) {
11062: if ((ctxt->sax != NULL) &&
11063: (ctxt->sax->endElementNs != NULL) &&
11064: (!ctxt->disableSAX))
11065: ctxt->sax->endElementNs(ctxt->userData, name,
11066: prefix, URI);
11067: if (ctxt->nsNr - nsNr > 0)
11068: nsPop(ctxt, ctxt->nsNr - nsNr);
11069: #ifdef LIBXML_SAX1_ENABLED
11070: } else {
11071: if ((ctxt->sax != NULL) &&
11072: (ctxt->sax->endElement != NULL) &&
11073: (!ctxt->disableSAX))
11074: ctxt->sax->endElement(ctxt->userData, name);
11075: #endif /* LIBXML_SAX1_ENABLED */
11076: }
11077: spacePop(ctxt);
11078: if (ctxt->nameNr == 0) {
11079: ctxt->instate = XML_PARSER_EPILOG;
11080: } else {
11081: ctxt->instate = XML_PARSER_CONTENT;
11082: }
11083: break;
11084: }
11085: if (RAW == '>') {
11086: NEXT;
11087: } else {
11088: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11089: "Couldn't find end of Start Tag %s\n",
11090: name);
11091: nodePop(ctxt);
11092: spacePop(ctxt);
11093: }
11094: if (ctxt->sax2)
11095: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11096: #ifdef LIBXML_SAX1_ENABLED
11097: else
11098: namePush(ctxt, name);
11099: #endif /* LIBXML_SAX1_ENABLED */
11100:
11101: ctxt->instate = XML_PARSER_CONTENT;
11102: break;
11103: }
11104: case XML_PARSER_CONTENT: {
11105: const xmlChar *test;
11106: unsigned int cons;
11107: if ((avail < 2) && (ctxt->inputNr == 1))
11108: goto done;
11109: cur = ctxt->input->cur[0];
11110: next = ctxt->input->cur[1];
11111:
11112: test = CUR_PTR;
11113: cons = ctxt->input->consumed;
11114: if ((cur == '<') && (next == '/')) {
11115: ctxt->instate = XML_PARSER_END_TAG;
11116: break;
11117: } else if ((cur == '<') && (next == '?')) {
11118: if ((!terminate) &&
11119: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11120: goto done;
11121: xmlParsePI(ctxt);
11122: } else if ((cur == '<') && (next != '!')) {
11123: ctxt->instate = XML_PARSER_START_TAG;
11124: break;
11125: } else if ((cur == '<') && (next == '!') &&
11126: (ctxt->input->cur[2] == '-') &&
11127: (ctxt->input->cur[3] == '-')) {
11128: int term;
11129:
11130: if (avail < 4)
11131: goto done;
11132: ctxt->input->cur += 4;
11133: term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11134: ctxt->input->cur -= 4;
11135: if ((!terminate) && (term < 0))
11136: goto done;
11137: xmlParseComment(ctxt);
11138: ctxt->instate = XML_PARSER_CONTENT;
11139: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11140: (ctxt->input->cur[2] == '[') &&
11141: (ctxt->input->cur[3] == 'C') &&
11142: (ctxt->input->cur[4] == 'D') &&
11143: (ctxt->input->cur[5] == 'A') &&
11144: (ctxt->input->cur[6] == 'T') &&
11145: (ctxt->input->cur[7] == 'A') &&
11146: (ctxt->input->cur[8] == '[')) {
11147: SKIP(9);
11148: ctxt->instate = XML_PARSER_CDATA_SECTION;
11149: break;
11150: } else if ((cur == '<') && (next == '!') &&
11151: (avail < 9)) {
11152: goto done;
11153: } else if (cur == '&') {
11154: if ((!terminate) &&
11155: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11156: goto done;
11157: xmlParseReference(ctxt);
11158: } else {
11159: /* TODO Avoid the extra copy, handle directly !!! */
11160: /*
11161: * Goal of the following test is:
11162: * - minimize calls to the SAX 'character' callback
11163: * when they are mergeable
11164: * - handle an problem for isBlank when we only parse
11165: * a sequence of blank chars and the next one is
11166: * not available to check against '<' presence.
11167: * - tries to homogenize the differences in SAX
11168: * callbacks between the push and pull versions
11169: * of the parser.
11170: */
11171: if ((ctxt->inputNr == 1) &&
11172: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11173: if (!terminate) {
11174: if (ctxt->progressive) {
11175: if ((lastlt == NULL) ||
11176: (ctxt->input->cur > lastlt))
11177: goto done;
11178: } else if (xmlParseLookupSequence(ctxt,
11179: '<', 0, 0) < 0) {
11180: goto done;
11181: }
11182: }
11183: }
11184: ctxt->checkIndex = 0;
11185: xmlParseCharData(ctxt, 0);
11186: }
11187: /*
11188: * Pop-up of finished entities.
11189: */
11190: while ((RAW == 0) && (ctxt->inputNr > 1))
11191: xmlPopInput(ctxt);
11192: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11193: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11194: "detected an error in element content\n");
11195: ctxt->instate = XML_PARSER_EOF;
11196: break;
11197: }
11198: break;
11199: }
11200: case XML_PARSER_END_TAG:
11201: if (avail < 2)
11202: goto done;
11203: if (!terminate) {
11204: if (ctxt->progressive) {
11205: /* > can be found unescaped in attribute values */
11206: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11207: goto done;
11208: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11209: goto done;
11210: }
11211: }
11212: if (ctxt->sax2) {
11213: xmlParseEndTag2(ctxt,
11214: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11215: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11216: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11217: nameNsPop(ctxt);
11218: }
11219: #ifdef LIBXML_SAX1_ENABLED
11220: else
11221: xmlParseEndTag1(ctxt, 0);
11222: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11223: if (ctxt->instate == XML_PARSER_EOF) {
11224: /* Nothing */
11225: } else if (ctxt->nameNr == 0) {
1.1 misho 11226: ctxt->instate = XML_PARSER_EPILOG;
11227: } else {
11228: ctxt->instate = XML_PARSER_CONTENT;
11229: }
11230: break;
11231: case XML_PARSER_CDATA_SECTION: {
11232: /*
11233: * The Push mode need to have the SAX callback for
11234: * cdataBlock merge back contiguous callbacks.
11235: */
11236: int base;
11237:
11238: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11239: if (base < 0) {
11240: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11241: int tmp;
11242:
11243: tmp = xmlCheckCdataPush(ctxt->input->cur,
11244: XML_PARSER_BIG_BUFFER_SIZE);
11245: if (tmp < 0) {
11246: tmp = -tmp;
11247: ctxt->input->cur += tmp;
11248: goto encoding_error;
11249: }
11250: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11251: if (ctxt->sax->cdataBlock != NULL)
11252: ctxt->sax->cdataBlock(ctxt->userData,
11253: ctxt->input->cur, tmp);
11254: else if (ctxt->sax->characters != NULL)
11255: ctxt->sax->characters(ctxt->userData,
11256: ctxt->input->cur, tmp);
11257: }
11258: SKIPL(tmp);
11259: ctxt->checkIndex = 0;
11260: }
11261: goto done;
11262: } else {
11263: int tmp;
11264:
11265: tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11266: if ((tmp < 0) || (tmp != base)) {
11267: tmp = -tmp;
11268: ctxt->input->cur += tmp;
11269: goto encoding_error;
11270: }
11271: if ((ctxt->sax != NULL) && (base == 0) &&
11272: (ctxt->sax->cdataBlock != NULL) &&
11273: (!ctxt->disableSAX)) {
11274: /*
11275: * Special case to provide identical behaviour
11276: * between pull and push parsers on enpty CDATA
11277: * sections
11278: */
11279: if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11280: (!strncmp((const char *)&ctxt->input->cur[-9],
11281: "<![CDATA[", 9)))
11282: ctxt->sax->cdataBlock(ctxt->userData,
11283: BAD_CAST "", 0);
11284: } else if ((ctxt->sax != NULL) && (base > 0) &&
11285: (!ctxt->disableSAX)) {
11286: if (ctxt->sax->cdataBlock != NULL)
11287: ctxt->sax->cdataBlock(ctxt->userData,
11288: ctxt->input->cur, base);
11289: else if (ctxt->sax->characters != NULL)
11290: ctxt->sax->characters(ctxt->userData,
11291: ctxt->input->cur, base);
11292: }
11293: SKIPL(base + 3);
11294: ctxt->checkIndex = 0;
11295: ctxt->instate = XML_PARSER_CONTENT;
11296: #ifdef DEBUG_PUSH
11297: xmlGenericError(xmlGenericErrorContext,
11298: "PP: entering CONTENT\n");
11299: #endif
11300: }
11301: break;
11302: }
11303: case XML_PARSER_MISC:
11304: SKIP_BLANKS;
11305: if (ctxt->input->buf == NULL)
11306: avail = ctxt->input->length -
11307: (ctxt->input->cur - ctxt->input->base);
11308: else
11309: avail = ctxt->input->buf->buffer->use -
11310: (ctxt->input->cur - ctxt->input->base);
11311: if (avail < 2)
11312: goto done;
11313: cur = ctxt->input->cur[0];
11314: next = ctxt->input->cur[1];
11315: if ((cur == '<') && (next == '?')) {
11316: if ((!terminate) &&
11317: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11318: goto done;
11319: #ifdef DEBUG_PUSH
11320: xmlGenericError(xmlGenericErrorContext,
11321: "PP: Parsing PI\n");
11322: #endif
11323: xmlParsePI(ctxt);
11324: ctxt->checkIndex = 0;
11325: } else if ((cur == '<') && (next == '!') &&
11326: (ctxt->input->cur[2] == '-') &&
11327: (ctxt->input->cur[3] == '-')) {
11328: if ((!terminate) &&
11329: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11330: goto done;
11331: #ifdef DEBUG_PUSH
11332: xmlGenericError(xmlGenericErrorContext,
11333: "PP: Parsing Comment\n");
11334: #endif
11335: xmlParseComment(ctxt);
11336: ctxt->instate = XML_PARSER_MISC;
11337: ctxt->checkIndex = 0;
11338: } else if ((cur == '<') && (next == '!') &&
11339: (ctxt->input->cur[2] == 'D') &&
11340: (ctxt->input->cur[3] == 'O') &&
11341: (ctxt->input->cur[4] == 'C') &&
11342: (ctxt->input->cur[5] == 'T') &&
11343: (ctxt->input->cur[6] == 'Y') &&
11344: (ctxt->input->cur[7] == 'P') &&
11345: (ctxt->input->cur[8] == 'E')) {
11346: if ((!terminate) &&
11347: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11348: goto done;
11349: #ifdef DEBUG_PUSH
11350: xmlGenericError(xmlGenericErrorContext,
11351: "PP: Parsing internal subset\n");
11352: #endif
11353: ctxt->inSubset = 1;
11354: xmlParseDocTypeDecl(ctxt);
11355: if (RAW == '[') {
11356: ctxt->instate = XML_PARSER_DTD;
11357: #ifdef DEBUG_PUSH
11358: xmlGenericError(xmlGenericErrorContext,
11359: "PP: entering DTD\n");
11360: #endif
11361: } else {
11362: /*
11363: * Create and update the external subset.
11364: */
11365: ctxt->inSubset = 2;
11366: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11367: (ctxt->sax->externalSubset != NULL))
11368: ctxt->sax->externalSubset(ctxt->userData,
11369: ctxt->intSubName, ctxt->extSubSystem,
11370: ctxt->extSubURI);
11371: ctxt->inSubset = 0;
11372: xmlCleanSpecialAttr(ctxt);
11373: ctxt->instate = XML_PARSER_PROLOG;
11374: #ifdef DEBUG_PUSH
11375: xmlGenericError(xmlGenericErrorContext,
11376: "PP: entering PROLOG\n");
11377: #endif
11378: }
11379: } else if ((cur == '<') && (next == '!') &&
11380: (avail < 9)) {
11381: goto done;
11382: } else {
11383: ctxt->instate = XML_PARSER_START_TAG;
11384: ctxt->progressive = 1;
11385: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11386: #ifdef DEBUG_PUSH
11387: xmlGenericError(xmlGenericErrorContext,
11388: "PP: entering START_TAG\n");
11389: #endif
11390: }
11391: break;
11392: case XML_PARSER_PROLOG:
11393: SKIP_BLANKS;
11394: if (ctxt->input->buf == NULL)
11395: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11396: else
11397: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11398: if (avail < 2)
11399: goto done;
11400: cur = ctxt->input->cur[0];
11401: next = ctxt->input->cur[1];
11402: if ((cur == '<') && (next == '?')) {
11403: if ((!terminate) &&
11404: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11405: goto done;
11406: #ifdef DEBUG_PUSH
11407: xmlGenericError(xmlGenericErrorContext,
11408: "PP: Parsing PI\n");
11409: #endif
11410: xmlParsePI(ctxt);
11411: } else if ((cur == '<') && (next == '!') &&
11412: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11413: if ((!terminate) &&
11414: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11415: goto done;
11416: #ifdef DEBUG_PUSH
11417: xmlGenericError(xmlGenericErrorContext,
11418: "PP: Parsing Comment\n");
11419: #endif
11420: xmlParseComment(ctxt);
11421: ctxt->instate = XML_PARSER_PROLOG;
11422: } else if ((cur == '<') && (next == '!') &&
11423: (avail < 4)) {
11424: goto done;
11425: } else {
11426: ctxt->instate = XML_PARSER_START_TAG;
11427: if (ctxt->progressive == 0)
11428: ctxt->progressive = 1;
11429: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11430: #ifdef DEBUG_PUSH
11431: xmlGenericError(xmlGenericErrorContext,
11432: "PP: entering START_TAG\n");
11433: #endif
11434: }
11435: break;
11436: case XML_PARSER_EPILOG:
11437: SKIP_BLANKS;
11438: if (ctxt->input->buf == NULL)
11439: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11440: else
11441: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11442: if (avail < 2)
11443: goto done;
11444: cur = ctxt->input->cur[0];
11445: next = ctxt->input->cur[1];
11446: if ((cur == '<') && (next == '?')) {
11447: if ((!terminate) &&
11448: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11449: goto done;
11450: #ifdef DEBUG_PUSH
11451: xmlGenericError(xmlGenericErrorContext,
11452: "PP: Parsing PI\n");
11453: #endif
11454: xmlParsePI(ctxt);
11455: ctxt->instate = XML_PARSER_EPILOG;
11456: } else if ((cur == '<') && (next == '!') &&
11457: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11458: if ((!terminate) &&
11459: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11460: goto done;
11461: #ifdef DEBUG_PUSH
11462: xmlGenericError(xmlGenericErrorContext,
11463: "PP: Parsing Comment\n");
11464: #endif
11465: xmlParseComment(ctxt);
11466: ctxt->instate = XML_PARSER_EPILOG;
11467: } else if ((cur == '<') && (next == '!') &&
11468: (avail < 4)) {
11469: goto done;
11470: } else {
11471: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11472: ctxt->instate = XML_PARSER_EOF;
11473: #ifdef DEBUG_PUSH
11474: xmlGenericError(xmlGenericErrorContext,
11475: "PP: entering EOF\n");
11476: #endif
11477: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11478: ctxt->sax->endDocument(ctxt->userData);
11479: goto done;
11480: }
11481: break;
11482: case XML_PARSER_DTD: {
11483: /*
11484: * Sorry but progressive parsing of the internal subset
11485: * is not expected to be supported. We first check that
11486: * the full content of the internal subset is available and
11487: * the parsing is launched only at that point.
11488: * Internal subset ends up with "']' S? '>'" in an unescaped
11489: * section and not in a ']]>' sequence which are conditional
11490: * sections (whoever argued to keep that crap in XML deserve
11491: * a place in hell !).
11492: */
11493: int base, i;
11494: xmlChar *buf;
11495: xmlChar quote = 0;
11496:
11497: base = ctxt->input->cur - ctxt->input->base;
11498: if (base < 0) return(0);
11499: if (ctxt->checkIndex > base)
11500: base = ctxt->checkIndex;
11501: buf = ctxt->input->buf->buffer->content;
11502: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11503: base++) {
11504: if (quote != 0) {
11505: if (buf[base] == quote)
11506: quote = 0;
11507: continue;
11508: }
11509: if ((quote == 0) && (buf[base] == '<')) {
11510: int found = 0;
11511: /* special handling of comments */
11512: if (((unsigned int) base + 4 <
11513: ctxt->input->buf->buffer->use) &&
11514: (buf[base + 1] == '!') &&
11515: (buf[base + 2] == '-') &&
11516: (buf[base + 3] == '-')) {
11517: for (;(unsigned int) base + 3 <
11518: ctxt->input->buf->buffer->use; base++) {
11519: if ((buf[base] == '-') &&
11520: (buf[base + 1] == '-') &&
11521: (buf[base + 2] == '>')) {
11522: found = 1;
11523: base += 2;
11524: break;
11525: }
11526: }
11527: if (!found) {
11528: #if 0
11529: fprintf(stderr, "unfinished comment\n");
11530: #endif
11531: break; /* for */
11532: }
11533: continue;
11534: }
11535: }
11536: if (buf[base] == '"') {
11537: quote = '"';
11538: continue;
11539: }
11540: if (buf[base] == '\'') {
11541: quote = '\'';
11542: continue;
11543: }
11544: if (buf[base] == ']') {
11545: #if 0
11546: fprintf(stderr, "%c%c%c%c: ", buf[base],
11547: buf[base + 1], buf[base + 2], buf[base + 3]);
11548: #endif
11549: if ((unsigned int) base +1 >=
11550: ctxt->input->buf->buffer->use)
11551: break;
11552: if (buf[base + 1] == ']') {
11553: /* conditional crap, skip both ']' ! */
11554: base++;
11555: continue;
11556: }
11557: for (i = 1;
11558: (unsigned int) base + i < ctxt->input->buf->buffer->use;
11559: i++) {
11560: if (buf[base + i] == '>') {
11561: #if 0
11562: fprintf(stderr, "found\n");
11563: #endif
11564: goto found_end_int_subset;
11565: }
11566: if (!IS_BLANK_CH(buf[base + i])) {
11567: #if 0
11568: fprintf(stderr, "not found\n");
11569: #endif
11570: goto not_end_of_int_subset;
11571: }
11572: }
11573: #if 0
11574: fprintf(stderr, "end of stream\n");
11575: #endif
11576: break;
11577:
11578: }
11579: not_end_of_int_subset:
11580: continue; /* for */
11581: }
11582: /*
11583: * We didn't found the end of the Internal subset
11584: */
11585: #ifdef DEBUG_PUSH
11586: if (next == 0)
11587: xmlGenericError(xmlGenericErrorContext,
11588: "PP: lookup of int subset end filed\n");
11589: #endif
11590: goto done;
11591:
11592: found_end_int_subset:
11593: xmlParseInternalSubset(ctxt);
11594: ctxt->inSubset = 2;
11595: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11596: (ctxt->sax->externalSubset != NULL))
11597: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11598: ctxt->extSubSystem, ctxt->extSubURI);
11599: ctxt->inSubset = 0;
11600: xmlCleanSpecialAttr(ctxt);
11601: ctxt->instate = XML_PARSER_PROLOG;
11602: ctxt->checkIndex = 0;
11603: #ifdef DEBUG_PUSH
11604: xmlGenericError(xmlGenericErrorContext,
11605: "PP: entering PROLOG\n");
11606: #endif
11607: break;
11608: }
11609: case XML_PARSER_COMMENT:
11610: xmlGenericError(xmlGenericErrorContext,
11611: "PP: internal error, state == COMMENT\n");
11612: ctxt->instate = XML_PARSER_CONTENT;
11613: #ifdef DEBUG_PUSH
11614: xmlGenericError(xmlGenericErrorContext,
11615: "PP: entering CONTENT\n");
11616: #endif
11617: break;
11618: case XML_PARSER_IGNORE:
11619: xmlGenericError(xmlGenericErrorContext,
11620: "PP: internal error, state == IGNORE");
11621: ctxt->instate = XML_PARSER_DTD;
11622: #ifdef DEBUG_PUSH
11623: xmlGenericError(xmlGenericErrorContext,
11624: "PP: entering DTD\n");
11625: #endif
11626: break;
11627: case XML_PARSER_PI:
11628: xmlGenericError(xmlGenericErrorContext,
11629: "PP: internal error, state == PI\n");
11630: ctxt->instate = XML_PARSER_CONTENT;
11631: #ifdef DEBUG_PUSH
11632: xmlGenericError(xmlGenericErrorContext,
11633: "PP: entering CONTENT\n");
11634: #endif
11635: break;
11636: case XML_PARSER_ENTITY_DECL:
11637: xmlGenericError(xmlGenericErrorContext,
11638: "PP: internal error, state == ENTITY_DECL\n");
11639: ctxt->instate = XML_PARSER_DTD;
11640: #ifdef DEBUG_PUSH
11641: xmlGenericError(xmlGenericErrorContext,
11642: "PP: entering DTD\n");
11643: #endif
11644: break;
11645: case XML_PARSER_ENTITY_VALUE:
11646: xmlGenericError(xmlGenericErrorContext,
11647: "PP: internal error, state == ENTITY_VALUE\n");
11648: ctxt->instate = XML_PARSER_CONTENT;
11649: #ifdef DEBUG_PUSH
11650: xmlGenericError(xmlGenericErrorContext,
11651: "PP: entering DTD\n");
11652: #endif
11653: break;
11654: case XML_PARSER_ATTRIBUTE_VALUE:
11655: xmlGenericError(xmlGenericErrorContext,
11656: "PP: internal error, state == ATTRIBUTE_VALUE\n");
11657: ctxt->instate = XML_PARSER_START_TAG;
11658: #ifdef DEBUG_PUSH
11659: xmlGenericError(xmlGenericErrorContext,
11660: "PP: entering START_TAG\n");
11661: #endif
11662: break;
11663: case XML_PARSER_SYSTEM_LITERAL:
11664: xmlGenericError(xmlGenericErrorContext,
11665: "PP: internal error, state == SYSTEM_LITERAL\n");
11666: ctxt->instate = XML_PARSER_START_TAG;
11667: #ifdef DEBUG_PUSH
11668: xmlGenericError(xmlGenericErrorContext,
11669: "PP: entering START_TAG\n");
11670: #endif
11671: break;
11672: case XML_PARSER_PUBLIC_LITERAL:
11673: xmlGenericError(xmlGenericErrorContext,
11674: "PP: internal error, state == PUBLIC_LITERAL\n");
11675: ctxt->instate = XML_PARSER_START_TAG;
11676: #ifdef DEBUG_PUSH
11677: xmlGenericError(xmlGenericErrorContext,
11678: "PP: entering START_TAG\n");
11679: #endif
11680: break;
11681: }
11682: }
11683: done:
11684: #ifdef DEBUG_PUSH
11685: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11686: #endif
11687: return(ret);
11688: encoding_error:
11689: {
11690: char buffer[150];
11691:
11692: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11693: ctxt->input->cur[0], ctxt->input->cur[1],
11694: ctxt->input->cur[2], ctxt->input->cur[3]);
11695: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11696: "Input is not proper UTF-8, indicate encoding !\n%s",
11697: BAD_CAST buffer, NULL);
11698: }
11699: return(0);
11700: }
11701:
11702: /**
11703: * xmlParseChunk:
11704: * @ctxt: an XML parser context
11705: * @chunk: an char array
11706: * @size: the size in byte of the chunk
11707: * @terminate: last chunk indicator
11708: *
11709: * Parse a Chunk of memory
11710: *
11711: * Returns zero if no error, the xmlParserErrors otherwise.
11712: */
11713: int
11714: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11715: int terminate) {
11716: int end_in_lf = 0;
11717: int remain = 0;
11718:
11719: if (ctxt == NULL)
11720: return(XML_ERR_INTERNAL_ERROR);
11721: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11722: return(ctxt->errNo);
11723: if (ctxt->instate == XML_PARSER_START)
11724: xmlDetectSAX2(ctxt);
11725: if ((size > 0) && (chunk != NULL) && (!terminate) &&
11726: (chunk[size - 1] == '\r')) {
11727: end_in_lf = 1;
11728: size--;
11729: }
11730:
11731: xmldecl_done:
11732:
11733: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11734: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11735: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11736: int cur = ctxt->input->cur - ctxt->input->base;
11737: int res;
11738:
11739: /*
11740: * Specific handling if we autodetected an encoding, we should not
11741: * push more than the first line ... which depend on the encoding
11742: * And only push the rest once the final encoding was detected
11743: */
11744: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11745: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11746: unsigned int len = 45;
11747:
11748: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11749: BAD_CAST "UTF-16")) ||
11750: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11751: BAD_CAST "UTF16")))
11752: len = 90;
11753: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11754: BAD_CAST "UCS-4")) ||
11755: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11756: BAD_CAST "UCS4")))
11757: len = 180;
11758:
11759: if (ctxt->input->buf->rawconsumed < len)
11760: len -= ctxt->input->buf->rawconsumed;
11761:
11762: /*
11763: * Change size for reading the initial declaration only
11764: * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11765: * will blindly copy extra bytes from memory.
11766: */
11767: if ((unsigned int) size > len) {
11768: remain = size - len;
11769: size = len;
11770: } else {
11771: remain = 0;
11772: }
11773: }
11774: res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11775: if (res < 0) {
11776: ctxt->errNo = XML_PARSER_EOF;
11777: ctxt->disableSAX = 1;
11778: return (XML_PARSER_EOF);
11779: }
11780: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11781: ctxt->input->cur = ctxt->input->base + cur;
11782: ctxt->input->end =
11783: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11784: #ifdef DEBUG_PUSH
11785: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11786: #endif
11787:
11788: } else if (ctxt->instate != XML_PARSER_EOF) {
11789: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11790: xmlParserInputBufferPtr in = ctxt->input->buf;
11791: if ((in->encoder != NULL) && (in->buffer != NULL) &&
11792: (in->raw != NULL)) {
11793: int nbchars;
11794:
11795: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11796: if (nbchars < 0) {
11797: /* TODO 2.6.0 */
11798: xmlGenericError(xmlGenericErrorContext,
11799: "xmlParseChunk: encoder error\n");
11800: return(XML_ERR_INVALID_ENCODING);
11801: }
11802: }
11803: }
11804: }
11805: if (remain != 0)
11806: xmlParseTryOrFinish(ctxt, 0);
11807: else
11808: xmlParseTryOrFinish(ctxt, terminate);
11809: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11810: return(ctxt->errNo);
11811:
11812: if (remain != 0) {
11813: chunk += size;
11814: size = remain;
11815: remain = 0;
11816: goto xmldecl_done;
11817: }
11818: if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11819: (ctxt->input->buf != NULL)) {
11820: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11821: }
11822: if (terminate) {
11823: /*
11824: * Check for termination
11825: */
11826: int avail = 0;
11827:
11828: if (ctxt->input != NULL) {
11829: if (ctxt->input->buf == NULL)
11830: avail = ctxt->input->length -
11831: (ctxt->input->cur - ctxt->input->base);
11832: else
11833: avail = ctxt->input->buf->buffer->use -
11834: (ctxt->input->cur - ctxt->input->base);
11835: }
11836:
11837: if ((ctxt->instate != XML_PARSER_EOF) &&
11838: (ctxt->instate != XML_PARSER_EPILOG)) {
11839: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11840: }
11841: if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11842: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11843: }
11844: if (ctxt->instate != XML_PARSER_EOF) {
11845: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11846: ctxt->sax->endDocument(ctxt->userData);
11847: }
11848: ctxt->instate = XML_PARSER_EOF;
11849: }
11850: return((xmlParserErrors) ctxt->errNo);
11851: }
11852:
11853: /************************************************************************
11854: * *
11855: * I/O front end functions to the parser *
11856: * *
11857: ************************************************************************/
11858:
11859: /**
11860: * xmlCreatePushParserCtxt:
11861: * @sax: a SAX handler
11862: * @user_data: The user data returned on SAX callbacks
11863: * @chunk: a pointer to an array of chars
11864: * @size: number of chars in the array
11865: * @filename: an optional file name or URI
11866: *
11867: * Create a parser context for using the XML parser in push mode.
11868: * If @buffer and @size are non-NULL, the data is used to detect
11869: * the encoding. The remaining characters will be parsed so they
11870: * don't need to be fed in again through xmlParseChunk.
11871: * To allow content encoding detection, @size should be >= 4
11872: * The value of @filename is used for fetching external entities
11873: * and error/warning reports.
11874: *
11875: * Returns the new parser context or NULL
11876: */
11877:
11878: xmlParserCtxtPtr
11879: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11880: const char *chunk, int size, const char *filename) {
11881: xmlParserCtxtPtr ctxt;
11882: xmlParserInputPtr inputStream;
11883: xmlParserInputBufferPtr buf;
11884: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11885:
11886: /*
11887: * plug some encoding conversion routines
11888: */
11889: if ((chunk != NULL) && (size >= 4))
11890: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11891:
11892: buf = xmlAllocParserInputBuffer(enc);
11893: if (buf == NULL) return(NULL);
11894:
11895: ctxt = xmlNewParserCtxt();
11896: if (ctxt == NULL) {
11897: xmlErrMemory(NULL, "creating parser: out of memory\n");
11898: xmlFreeParserInputBuffer(buf);
11899: return(NULL);
11900: }
11901: ctxt->dictNames = 1;
11902: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11903: if (ctxt->pushTab == NULL) {
11904: xmlErrMemory(ctxt, NULL);
11905: xmlFreeParserInputBuffer(buf);
11906: xmlFreeParserCtxt(ctxt);
11907: return(NULL);
11908: }
11909: if (sax != NULL) {
11910: #ifdef LIBXML_SAX1_ENABLED
11911: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11912: #endif /* LIBXML_SAX1_ENABLED */
11913: xmlFree(ctxt->sax);
11914: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11915: if (ctxt->sax == NULL) {
11916: xmlErrMemory(ctxt, NULL);
11917: xmlFreeParserInputBuffer(buf);
11918: xmlFreeParserCtxt(ctxt);
11919: return(NULL);
11920: }
11921: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11922: if (sax->initialized == XML_SAX2_MAGIC)
11923: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11924: else
11925: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11926: if (user_data != NULL)
11927: ctxt->userData = user_data;
11928: }
11929: if (filename == NULL) {
11930: ctxt->directory = NULL;
11931: } else {
11932: ctxt->directory = xmlParserGetDirectory(filename);
11933: }
11934:
11935: inputStream = xmlNewInputStream(ctxt);
11936: if (inputStream == NULL) {
11937: xmlFreeParserCtxt(ctxt);
11938: xmlFreeParserInputBuffer(buf);
11939: return(NULL);
11940: }
11941:
11942: if (filename == NULL)
11943: inputStream->filename = NULL;
11944: else {
11945: inputStream->filename = (char *)
11946: xmlCanonicPath((const xmlChar *) filename);
11947: if (inputStream->filename == NULL) {
11948: xmlFreeParserCtxt(ctxt);
11949: xmlFreeParserInputBuffer(buf);
11950: return(NULL);
11951: }
11952: }
11953: inputStream->buf = buf;
11954: inputStream->base = inputStream->buf->buffer->content;
11955: inputStream->cur = inputStream->buf->buffer->content;
11956: inputStream->end =
11957: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11958:
11959: inputPush(ctxt, inputStream);
11960:
11961: /*
11962: * If the caller didn't provide an initial 'chunk' for determining
11963: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11964: * that it can be automatically determined later
11965: */
11966: if ((size == 0) || (chunk == NULL)) {
11967: ctxt->charset = XML_CHAR_ENCODING_NONE;
11968: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11969: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11970: int cur = ctxt->input->cur - ctxt->input->base;
11971:
11972: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11973:
11974: ctxt->input->base = ctxt->input->buf->buffer->content + base;
11975: ctxt->input->cur = ctxt->input->base + cur;
11976: ctxt->input->end =
11977: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11978: #ifdef DEBUG_PUSH
11979: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11980: #endif
11981: }
11982:
11983: if (enc != XML_CHAR_ENCODING_NONE) {
11984: xmlSwitchEncoding(ctxt, enc);
11985: }
11986:
11987: return(ctxt);
11988: }
11989: #endif /* LIBXML_PUSH_ENABLED */
11990:
11991: /**
11992: * xmlStopParser:
11993: * @ctxt: an XML parser context
11994: *
11995: * Blocks further parser processing
11996: */
11997: void
11998: xmlStopParser(xmlParserCtxtPtr ctxt) {
11999: if (ctxt == NULL)
12000: return;
12001: ctxt->instate = XML_PARSER_EOF;
12002: ctxt->disableSAX = 1;
12003: if (ctxt->input != NULL) {
12004: ctxt->input->cur = BAD_CAST"";
12005: ctxt->input->base = ctxt->input->cur;
12006: }
12007: }
12008:
12009: /**
12010: * xmlCreateIOParserCtxt:
12011: * @sax: a SAX handler
12012: * @user_data: The user data returned on SAX callbacks
12013: * @ioread: an I/O read function
12014: * @ioclose: an I/O close function
12015: * @ioctx: an I/O handler
12016: * @enc: the charset encoding if known
12017: *
12018: * Create a parser context for using the XML parser with an existing
12019: * I/O stream
12020: *
12021: * Returns the new parser context or NULL
12022: */
12023: xmlParserCtxtPtr
12024: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12025: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12026: void *ioctx, xmlCharEncoding enc) {
12027: xmlParserCtxtPtr ctxt;
12028: xmlParserInputPtr inputStream;
12029: xmlParserInputBufferPtr buf;
1.1.1.2 misho 12030:
1.1 misho 12031: if (ioread == NULL) return(NULL);
12032:
12033: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
1.1.1.2 misho 12034: if (buf == NULL) {
12035: if (ioclose != NULL)
12036: ioclose(ioctx);
12037: return (NULL);
12038: }
1.1 misho 12039:
12040: ctxt = xmlNewParserCtxt();
12041: if (ctxt == NULL) {
12042: xmlFreeParserInputBuffer(buf);
12043: return(NULL);
12044: }
12045: if (sax != NULL) {
12046: #ifdef LIBXML_SAX1_ENABLED
12047: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12048: #endif /* LIBXML_SAX1_ENABLED */
12049: xmlFree(ctxt->sax);
12050: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12051: if (ctxt->sax == NULL) {
12052: xmlErrMemory(ctxt, NULL);
12053: xmlFreeParserCtxt(ctxt);
12054: return(NULL);
12055: }
12056: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12057: if (sax->initialized == XML_SAX2_MAGIC)
12058: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12059: else
12060: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12061: if (user_data != NULL)
12062: ctxt->userData = user_data;
1.1.1.2 misho 12063: }
1.1 misho 12064:
12065: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12066: if (inputStream == NULL) {
12067: xmlFreeParserCtxt(ctxt);
12068: return(NULL);
12069: }
12070: inputPush(ctxt, inputStream);
12071:
12072: return(ctxt);
12073: }
12074:
12075: #ifdef LIBXML_VALID_ENABLED
12076: /************************************************************************
12077: * *
12078: * Front ends when parsing a DTD *
12079: * *
12080: ************************************************************************/
12081:
12082: /**
12083: * xmlIOParseDTD:
12084: * @sax: the SAX handler block or NULL
12085: * @input: an Input Buffer
12086: * @enc: the charset encoding if known
12087: *
12088: * Load and parse a DTD
12089: *
12090: * Returns the resulting xmlDtdPtr or NULL in case of error.
12091: * @input will be freed by the function in any case.
12092: */
12093:
12094: xmlDtdPtr
12095: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12096: xmlCharEncoding enc) {
12097: xmlDtdPtr ret = NULL;
12098: xmlParserCtxtPtr ctxt;
12099: xmlParserInputPtr pinput = NULL;
12100: xmlChar start[4];
12101:
12102: if (input == NULL)
12103: return(NULL);
12104:
12105: ctxt = xmlNewParserCtxt();
12106: if (ctxt == NULL) {
12107: xmlFreeParserInputBuffer(input);
12108: return(NULL);
12109: }
12110:
12111: /*
12112: * Set-up the SAX context
12113: */
12114: if (sax != NULL) {
12115: if (ctxt->sax != NULL)
12116: xmlFree(ctxt->sax);
12117: ctxt->sax = sax;
12118: ctxt->userData = ctxt;
12119: }
12120: xmlDetectSAX2(ctxt);
12121:
12122: /*
12123: * generate a parser input from the I/O handler
12124: */
12125:
12126: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12127: if (pinput == NULL) {
12128: if (sax != NULL) ctxt->sax = NULL;
12129: xmlFreeParserInputBuffer(input);
12130: xmlFreeParserCtxt(ctxt);
12131: return(NULL);
12132: }
12133:
12134: /*
12135: * plug some encoding conversion routines here.
12136: */
12137: if (xmlPushInput(ctxt, pinput) < 0) {
12138: if (sax != NULL) ctxt->sax = NULL;
12139: xmlFreeParserCtxt(ctxt);
12140: return(NULL);
12141: }
12142: if (enc != XML_CHAR_ENCODING_NONE) {
12143: xmlSwitchEncoding(ctxt, enc);
12144: }
12145:
12146: pinput->filename = NULL;
12147: pinput->line = 1;
12148: pinput->col = 1;
12149: pinput->base = ctxt->input->cur;
12150: pinput->cur = ctxt->input->cur;
12151: pinput->free = NULL;
12152:
12153: /*
12154: * let's parse that entity knowing it's an external subset.
12155: */
12156: ctxt->inSubset = 2;
12157: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12158: if (ctxt->myDoc == NULL) {
12159: xmlErrMemory(ctxt, "New Doc failed");
12160: return(NULL);
12161: }
12162: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12163: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12164: BAD_CAST "none", BAD_CAST "none");
12165:
12166: if ((enc == XML_CHAR_ENCODING_NONE) &&
12167: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12168: /*
12169: * Get the 4 first bytes and decode the charset
12170: * if enc != XML_CHAR_ENCODING_NONE
12171: * plug some encoding conversion routines.
12172: */
12173: start[0] = RAW;
12174: start[1] = NXT(1);
12175: start[2] = NXT(2);
12176: start[3] = NXT(3);
12177: enc = xmlDetectCharEncoding(start, 4);
12178: if (enc != XML_CHAR_ENCODING_NONE) {
12179: xmlSwitchEncoding(ctxt, enc);
12180: }
12181: }
12182:
12183: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12184:
12185: if (ctxt->myDoc != NULL) {
12186: if (ctxt->wellFormed) {
12187: ret = ctxt->myDoc->extSubset;
12188: ctxt->myDoc->extSubset = NULL;
12189: if (ret != NULL) {
12190: xmlNodePtr tmp;
12191:
12192: ret->doc = NULL;
12193: tmp = ret->children;
12194: while (tmp != NULL) {
12195: tmp->doc = NULL;
12196: tmp = tmp->next;
12197: }
12198: }
12199: } else {
12200: ret = NULL;
12201: }
12202: xmlFreeDoc(ctxt->myDoc);
12203: ctxt->myDoc = NULL;
12204: }
12205: if (sax != NULL) ctxt->sax = NULL;
12206: xmlFreeParserCtxt(ctxt);
12207:
12208: return(ret);
12209: }
12210:
12211: /**
12212: * xmlSAXParseDTD:
12213: * @sax: the SAX handler block
12214: * @ExternalID: a NAME* containing the External ID of the DTD
12215: * @SystemID: a NAME* containing the URL to the DTD
12216: *
12217: * Load and parse an external subset.
12218: *
12219: * Returns the resulting xmlDtdPtr or NULL in case of error.
12220: */
12221:
12222: xmlDtdPtr
12223: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12224: const xmlChar *SystemID) {
12225: xmlDtdPtr ret = NULL;
12226: xmlParserCtxtPtr ctxt;
12227: xmlParserInputPtr input = NULL;
12228: xmlCharEncoding enc;
12229: xmlChar* systemIdCanonic;
12230:
12231: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12232:
12233: ctxt = xmlNewParserCtxt();
12234: if (ctxt == NULL) {
12235: return(NULL);
12236: }
12237:
12238: /*
12239: * Set-up the SAX context
12240: */
12241: if (sax != NULL) {
12242: if (ctxt->sax != NULL)
12243: xmlFree(ctxt->sax);
12244: ctxt->sax = sax;
12245: ctxt->userData = ctxt;
12246: }
12247:
12248: /*
12249: * Canonicalise the system ID
12250: */
12251: systemIdCanonic = xmlCanonicPath(SystemID);
12252: if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12253: xmlFreeParserCtxt(ctxt);
12254: return(NULL);
12255: }
12256:
12257: /*
12258: * Ask the Entity resolver to load the damn thing
12259: */
12260:
12261: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12262: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12263: systemIdCanonic);
12264: if (input == NULL) {
12265: if (sax != NULL) ctxt->sax = NULL;
12266: xmlFreeParserCtxt(ctxt);
12267: if (systemIdCanonic != NULL)
12268: xmlFree(systemIdCanonic);
12269: return(NULL);
12270: }
12271:
12272: /*
12273: * plug some encoding conversion routines here.
12274: */
12275: if (xmlPushInput(ctxt, input) < 0) {
12276: if (sax != NULL) ctxt->sax = NULL;
12277: xmlFreeParserCtxt(ctxt);
12278: if (systemIdCanonic != NULL)
12279: xmlFree(systemIdCanonic);
12280: return(NULL);
12281: }
12282: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12283: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12284: xmlSwitchEncoding(ctxt, enc);
12285: }
12286:
12287: if (input->filename == NULL)
12288: input->filename = (char *) systemIdCanonic;
12289: else
12290: xmlFree(systemIdCanonic);
12291: input->line = 1;
12292: input->col = 1;
12293: input->base = ctxt->input->cur;
12294: input->cur = ctxt->input->cur;
12295: input->free = NULL;
12296:
12297: /*
12298: * let's parse that entity knowing it's an external subset.
12299: */
12300: ctxt->inSubset = 2;
12301: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12302: if (ctxt->myDoc == NULL) {
12303: xmlErrMemory(ctxt, "New Doc failed");
12304: if (sax != NULL) ctxt->sax = NULL;
12305: xmlFreeParserCtxt(ctxt);
12306: return(NULL);
12307: }
12308: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12309: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12310: ExternalID, SystemID);
12311: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12312:
12313: if (ctxt->myDoc != NULL) {
12314: if (ctxt->wellFormed) {
12315: ret = ctxt->myDoc->extSubset;
12316: ctxt->myDoc->extSubset = NULL;
12317: if (ret != NULL) {
12318: xmlNodePtr tmp;
12319:
12320: ret->doc = NULL;
12321: tmp = ret->children;
12322: while (tmp != NULL) {
12323: tmp->doc = NULL;
12324: tmp = tmp->next;
12325: }
12326: }
12327: } else {
12328: ret = NULL;
12329: }
12330: xmlFreeDoc(ctxt->myDoc);
12331: ctxt->myDoc = NULL;
12332: }
12333: if (sax != NULL) ctxt->sax = NULL;
12334: xmlFreeParserCtxt(ctxt);
12335:
12336: return(ret);
12337: }
12338:
12339:
12340: /**
12341: * xmlParseDTD:
12342: * @ExternalID: a NAME* containing the External ID of the DTD
12343: * @SystemID: a NAME* containing the URL to the DTD
12344: *
12345: * Load and parse an external subset.
12346: *
12347: * Returns the resulting xmlDtdPtr or NULL in case of error.
12348: */
12349:
12350: xmlDtdPtr
12351: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12352: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12353: }
12354: #endif /* LIBXML_VALID_ENABLED */
12355:
12356: /************************************************************************
12357: * *
12358: * Front ends when parsing an Entity *
12359: * *
12360: ************************************************************************/
12361:
12362: /**
12363: * xmlParseCtxtExternalEntity:
12364: * @ctx: the existing parsing context
12365: * @URL: the URL for the entity to load
12366: * @ID: the System ID for the entity to load
12367: * @lst: the return value for the set of parsed nodes
12368: *
12369: * Parse an external general entity within an existing parsing context
12370: * An external general parsed entity is well-formed if it matches the
12371: * production labeled extParsedEnt.
12372: *
12373: * [78] extParsedEnt ::= TextDecl? content
12374: *
12375: * Returns 0 if the entity is well formed, -1 in case of args problem and
12376: * the parser error code otherwise
12377: */
12378:
12379: int
12380: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12381: const xmlChar *ID, xmlNodePtr *lst) {
12382: xmlParserCtxtPtr ctxt;
12383: xmlDocPtr newDoc;
12384: xmlNodePtr newRoot;
12385: xmlSAXHandlerPtr oldsax = NULL;
12386: int ret = 0;
12387: xmlChar start[4];
12388: xmlCharEncoding enc;
12389:
12390: if (ctx == NULL) return(-1);
12391:
12392: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12393: (ctx->depth > 1024)) {
12394: return(XML_ERR_ENTITY_LOOP);
12395: }
12396:
12397: if (lst != NULL)
12398: *lst = NULL;
12399: if ((URL == NULL) && (ID == NULL))
12400: return(-1);
12401: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12402: return(-1);
12403:
12404: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12405: if (ctxt == NULL) {
12406: return(-1);
12407: }
12408:
12409: oldsax = ctxt->sax;
12410: ctxt->sax = ctx->sax;
12411: xmlDetectSAX2(ctxt);
12412: newDoc = xmlNewDoc(BAD_CAST "1.0");
12413: if (newDoc == NULL) {
12414: xmlFreeParserCtxt(ctxt);
12415: return(-1);
12416: }
12417: newDoc->properties = XML_DOC_INTERNAL;
12418: if (ctx->myDoc->dict) {
12419: newDoc->dict = ctx->myDoc->dict;
12420: xmlDictReference(newDoc->dict);
12421: }
12422: if (ctx->myDoc != NULL) {
12423: newDoc->intSubset = ctx->myDoc->intSubset;
12424: newDoc->extSubset = ctx->myDoc->extSubset;
12425: }
12426: if (ctx->myDoc->URL != NULL) {
12427: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12428: }
12429: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12430: if (newRoot == NULL) {
12431: ctxt->sax = oldsax;
12432: xmlFreeParserCtxt(ctxt);
12433: newDoc->intSubset = NULL;
12434: newDoc->extSubset = NULL;
12435: xmlFreeDoc(newDoc);
12436: return(-1);
12437: }
12438: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12439: nodePush(ctxt, newDoc->children);
12440: if (ctx->myDoc == NULL) {
12441: ctxt->myDoc = newDoc;
12442: } else {
12443: ctxt->myDoc = ctx->myDoc;
12444: newDoc->children->doc = ctx->myDoc;
12445: }
12446:
12447: /*
12448: * Get the 4 first bytes and decode the charset
12449: * if enc != XML_CHAR_ENCODING_NONE
12450: * plug some encoding conversion routines.
12451: */
12452: GROW
12453: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12454: start[0] = RAW;
12455: start[1] = NXT(1);
12456: start[2] = NXT(2);
12457: start[3] = NXT(3);
12458: enc = xmlDetectCharEncoding(start, 4);
12459: if (enc != XML_CHAR_ENCODING_NONE) {
12460: xmlSwitchEncoding(ctxt, enc);
12461: }
12462: }
12463:
12464: /*
12465: * Parse a possible text declaration first
12466: */
12467: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12468: xmlParseTextDecl(ctxt);
12469: /*
12470: * An XML-1.0 document can't reference an entity not XML-1.0
12471: */
12472: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12473: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12474: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12475: "Version mismatch between document and entity\n");
12476: }
12477: }
12478:
12479: /*
1.1.1.2 misho 12480: * If the user provided its own SAX callbacks then reuse the
12481: * useData callback field, otherwise the expected setup in a
12482: * DOM builder is to have userData == ctxt
12483: */
12484: if (ctx->userData == ctx)
12485: ctxt->userData = ctxt;
12486: else
12487: ctxt->userData = ctx->userData;
12488:
12489: /*
1.1 misho 12490: * Doing validity checking on chunk doesn't make sense
12491: */
12492: ctxt->instate = XML_PARSER_CONTENT;
12493: ctxt->validate = ctx->validate;
12494: ctxt->valid = ctx->valid;
12495: ctxt->loadsubset = ctx->loadsubset;
12496: ctxt->depth = ctx->depth + 1;
12497: ctxt->replaceEntities = ctx->replaceEntities;
12498: if (ctxt->validate) {
12499: ctxt->vctxt.error = ctx->vctxt.error;
12500: ctxt->vctxt.warning = ctx->vctxt.warning;
12501: } else {
12502: ctxt->vctxt.error = NULL;
12503: ctxt->vctxt.warning = NULL;
12504: }
12505: ctxt->vctxt.nodeTab = NULL;
12506: ctxt->vctxt.nodeNr = 0;
12507: ctxt->vctxt.nodeMax = 0;
12508: ctxt->vctxt.node = NULL;
12509: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12510: ctxt->dict = ctx->dict;
12511: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12512: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12513: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12514: ctxt->dictNames = ctx->dictNames;
12515: ctxt->attsDefault = ctx->attsDefault;
12516: ctxt->attsSpecial = ctx->attsSpecial;
12517: ctxt->linenumbers = ctx->linenumbers;
12518:
12519: xmlParseContent(ctxt);
12520:
12521: ctx->validate = ctxt->validate;
12522: ctx->valid = ctxt->valid;
12523: if ((RAW == '<') && (NXT(1) == '/')) {
12524: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12525: } else if (RAW != 0) {
12526: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12527: }
12528: if (ctxt->node != newDoc->children) {
12529: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12530: }
12531:
12532: if (!ctxt->wellFormed) {
12533: if (ctxt->errNo == 0)
12534: ret = 1;
12535: else
12536: ret = ctxt->errNo;
12537: } else {
12538: if (lst != NULL) {
12539: xmlNodePtr cur;
12540:
12541: /*
12542: * Return the newly created nodeset after unlinking it from
12543: * they pseudo parent.
12544: */
12545: cur = newDoc->children->children;
12546: *lst = cur;
12547: while (cur != NULL) {
12548: cur->parent = NULL;
12549: cur = cur->next;
12550: }
12551: newDoc->children->children = NULL;
12552: }
12553: ret = 0;
12554: }
12555: ctxt->sax = oldsax;
12556: ctxt->dict = NULL;
12557: ctxt->attsDefault = NULL;
12558: ctxt->attsSpecial = NULL;
12559: xmlFreeParserCtxt(ctxt);
12560: newDoc->intSubset = NULL;
12561: newDoc->extSubset = NULL;
12562: xmlFreeDoc(newDoc);
12563:
12564: return(ret);
12565: }
12566:
12567: /**
12568: * xmlParseExternalEntityPrivate:
12569: * @doc: the document the chunk pertains to
12570: * @oldctxt: the previous parser context if available
12571: * @sax: the SAX handler bloc (possibly NULL)
12572: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12573: * @depth: Used for loop detection, use 0
12574: * @URL: the URL for the entity to load
12575: * @ID: the System ID for the entity to load
12576: * @list: the return value for the set of parsed nodes
12577: *
12578: * Private version of xmlParseExternalEntity()
12579: *
12580: * Returns 0 if the entity is well formed, -1 in case of args problem and
12581: * the parser error code otherwise
12582: */
12583:
12584: static xmlParserErrors
12585: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12586: xmlSAXHandlerPtr sax,
12587: void *user_data, int depth, const xmlChar *URL,
12588: const xmlChar *ID, xmlNodePtr *list) {
12589: xmlParserCtxtPtr ctxt;
12590: xmlDocPtr newDoc;
12591: xmlNodePtr newRoot;
12592: xmlSAXHandlerPtr oldsax = NULL;
12593: xmlParserErrors ret = XML_ERR_OK;
12594: xmlChar start[4];
12595: xmlCharEncoding enc;
12596:
12597: if (((depth > 40) &&
12598: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12599: (depth > 1024)) {
12600: return(XML_ERR_ENTITY_LOOP);
12601: }
12602:
12603: if (list != NULL)
12604: *list = NULL;
12605: if ((URL == NULL) && (ID == NULL))
12606: return(XML_ERR_INTERNAL_ERROR);
12607: if (doc == NULL)
12608: return(XML_ERR_INTERNAL_ERROR);
12609:
12610:
12611: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12612: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12613: ctxt->userData = ctxt;
12614: if (oldctxt != NULL) {
12615: ctxt->_private = oldctxt->_private;
12616: ctxt->loadsubset = oldctxt->loadsubset;
12617: ctxt->validate = oldctxt->validate;
12618: ctxt->external = oldctxt->external;
12619: ctxt->record_info = oldctxt->record_info;
12620: ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12621: ctxt->node_seq.length = oldctxt->node_seq.length;
12622: ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12623: } else {
12624: /*
12625: * Doing validity checking on chunk without context
12626: * doesn't make sense
12627: */
12628: ctxt->_private = NULL;
12629: ctxt->validate = 0;
12630: ctxt->external = 2;
12631: ctxt->loadsubset = 0;
12632: }
12633: if (sax != NULL) {
12634: oldsax = ctxt->sax;
12635: ctxt->sax = sax;
12636: if (user_data != NULL)
12637: ctxt->userData = user_data;
12638: }
12639: xmlDetectSAX2(ctxt);
12640: newDoc = xmlNewDoc(BAD_CAST "1.0");
12641: if (newDoc == NULL) {
12642: ctxt->node_seq.maximum = 0;
12643: ctxt->node_seq.length = 0;
12644: ctxt->node_seq.buffer = NULL;
12645: xmlFreeParserCtxt(ctxt);
12646: return(XML_ERR_INTERNAL_ERROR);
12647: }
12648: newDoc->properties = XML_DOC_INTERNAL;
12649: newDoc->intSubset = doc->intSubset;
12650: newDoc->extSubset = doc->extSubset;
12651: newDoc->dict = doc->dict;
12652: xmlDictReference(newDoc->dict);
12653:
12654: if (doc->URL != NULL) {
12655: newDoc->URL = xmlStrdup(doc->URL);
12656: }
12657: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12658: if (newRoot == NULL) {
12659: if (sax != NULL)
12660: ctxt->sax = oldsax;
12661: ctxt->node_seq.maximum = 0;
12662: ctxt->node_seq.length = 0;
12663: ctxt->node_seq.buffer = NULL;
12664: xmlFreeParserCtxt(ctxt);
12665: newDoc->intSubset = NULL;
12666: newDoc->extSubset = NULL;
12667: xmlFreeDoc(newDoc);
12668: return(XML_ERR_INTERNAL_ERROR);
12669: }
12670: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12671: nodePush(ctxt, newDoc->children);
12672: ctxt->myDoc = doc;
12673: newRoot->doc = doc;
12674:
12675: /*
12676: * Get the 4 first bytes and decode the charset
12677: * if enc != XML_CHAR_ENCODING_NONE
12678: * plug some encoding conversion routines.
12679: */
12680: GROW;
12681: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12682: start[0] = RAW;
12683: start[1] = NXT(1);
12684: start[2] = NXT(2);
12685: start[3] = NXT(3);
12686: enc = xmlDetectCharEncoding(start, 4);
12687: if (enc != XML_CHAR_ENCODING_NONE) {
12688: xmlSwitchEncoding(ctxt, enc);
12689: }
12690: }
12691:
12692: /*
12693: * Parse a possible text declaration first
12694: */
12695: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12696: xmlParseTextDecl(ctxt);
12697: }
12698:
12699: ctxt->instate = XML_PARSER_CONTENT;
12700: ctxt->depth = depth;
12701:
12702: xmlParseContent(ctxt);
12703:
12704: if ((RAW == '<') && (NXT(1) == '/')) {
12705: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12706: } else if (RAW != 0) {
12707: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12708: }
12709: if (ctxt->node != newDoc->children) {
12710: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12711: }
12712:
12713: if (!ctxt->wellFormed) {
12714: if (ctxt->errNo == 0)
12715: ret = XML_ERR_INTERNAL_ERROR;
12716: else
12717: ret = (xmlParserErrors)ctxt->errNo;
12718: } else {
12719: if (list != NULL) {
12720: xmlNodePtr cur;
12721:
12722: /*
12723: * Return the newly created nodeset after unlinking it from
12724: * they pseudo parent.
12725: */
12726: cur = newDoc->children->children;
12727: *list = cur;
12728: while (cur != NULL) {
12729: cur->parent = NULL;
12730: cur = cur->next;
12731: }
12732: newDoc->children->children = NULL;
12733: }
12734: ret = XML_ERR_OK;
12735: }
12736:
12737: /*
12738: * Record in the parent context the number of entities replacement
12739: * done when parsing that reference.
12740: */
12741: if (oldctxt != NULL)
12742: oldctxt->nbentities += ctxt->nbentities;
12743:
12744: /*
12745: * Also record the size of the entity parsed
12746: */
12747: if (ctxt->input != NULL) {
12748: oldctxt->sizeentities += ctxt->input->consumed;
12749: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12750: }
12751: /*
12752: * And record the last error if any
12753: */
12754: if (ctxt->lastError.code != XML_ERR_OK)
12755: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12756:
12757: if (sax != NULL)
12758: ctxt->sax = oldsax;
12759: oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12760: oldctxt->node_seq.length = ctxt->node_seq.length;
12761: oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12762: ctxt->node_seq.maximum = 0;
12763: ctxt->node_seq.length = 0;
12764: ctxt->node_seq.buffer = NULL;
12765: xmlFreeParserCtxt(ctxt);
12766: newDoc->intSubset = NULL;
12767: newDoc->extSubset = NULL;
12768: xmlFreeDoc(newDoc);
12769:
12770: return(ret);
12771: }
12772:
12773: #ifdef LIBXML_SAX1_ENABLED
12774: /**
12775: * xmlParseExternalEntity:
12776: * @doc: the document the chunk pertains to
12777: * @sax: the SAX handler bloc (possibly NULL)
12778: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12779: * @depth: Used for loop detection, use 0
12780: * @URL: the URL for the entity to load
12781: * @ID: the System ID for the entity to load
12782: * @lst: the return value for the set of parsed nodes
12783: *
12784: * Parse an external general entity
12785: * An external general parsed entity is well-formed if it matches the
12786: * production labeled extParsedEnt.
12787: *
12788: * [78] extParsedEnt ::= TextDecl? content
12789: *
12790: * Returns 0 if the entity is well formed, -1 in case of args problem and
12791: * the parser error code otherwise
12792: */
12793:
12794: int
12795: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12796: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12797: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12798: ID, lst));
12799: }
12800:
12801: /**
12802: * xmlParseBalancedChunkMemory:
12803: * @doc: the document the chunk pertains to
12804: * @sax: the SAX handler bloc (possibly NULL)
12805: * @user_data: The user data returned on SAX callbacks (possibly NULL)
12806: * @depth: Used for loop detection, use 0
12807: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12808: * @lst: the return value for the set of parsed nodes
12809: *
12810: * Parse a well-balanced chunk of an XML document
12811: * called by the parser
12812: * The allowed sequence for the Well Balanced Chunk is the one defined by
12813: * the content production in the XML grammar:
12814: *
12815: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12816: *
12817: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12818: * the parser error code otherwise
12819: */
12820:
12821: int
12822: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12823: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12824: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12825: depth, string, lst, 0 );
12826: }
12827: #endif /* LIBXML_SAX1_ENABLED */
12828:
12829: /**
12830: * xmlParseBalancedChunkMemoryInternal:
12831: * @oldctxt: the existing parsing context
12832: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12833: * @user_data: the user data field for the parser context
12834: * @lst: the return value for the set of parsed nodes
12835: *
12836: *
12837: * Parse a well-balanced chunk of an XML document
12838: * called by the parser
12839: * The allowed sequence for the Well Balanced Chunk is the one defined by
12840: * the content production in the XML grammar:
12841: *
12842: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12843: *
12844: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12845: * error code otherwise
12846: *
12847: * In case recover is set to 1, the nodelist will not be empty even if
12848: * the parsed chunk is not well balanced.
12849: */
12850: static xmlParserErrors
12851: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12852: const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12853: xmlParserCtxtPtr ctxt;
12854: xmlDocPtr newDoc = NULL;
12855: xmlNodePtr newRoot;
12856: xmlSAXHandlerPtr oldsax = NULL;
12857: xmlNodePtr content = NULL;
12858: xmlNodePtr last = NULL;
12859: int size;
12860: xmlParserErrors ret = XML_ERR_OK;
12861: #ifdef SAX2
12862: int i;
12863: #endif
12864:
12865: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12866: (oldctxt->depth > 1024)) {
12867: return(XML_ERR_ENTITY_LOOP);
12868: }
12869:
12870:
12871: if (lst != NULL)
12872: *lst = NULL;
12873: if (string == NULL)
12874: return(XML_ERR_INTERNAL_ERROR);
12875:
12876: size = xmlStrlen(string);
12877:
12878: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12879: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12880: if (user_data != NULL)
12881: ctxt->userData = user_data;
12882: else
12883: ctxt->userData = ctxt;
12884: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12885: ctxt->dict = oldctxt->dict;
12886: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12887: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12888: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12889:
12890: #ifdef SAX2
12891: /* propagate namespaces down the entity */
12892: for (i = 0;i < oldctxt->nsNr;i += 2) {
12893: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12894: }
12895: #endif
12896:
12897: oldsax = ctxt->sax;
12898: ctxt->sax = oldctxt->sax;
12899: xmlDetectSAX2(ctxt);
12900: ctxt->replaceEntities = oldctxt->replaceEntities;
12901: ctxt->options = oldctxt->options;
12902:
12903: ctxt->_private = oldctxt->_private;
12904: if (oldctxt->myDoc == NULL) {
12905: newDoc = xmlNewDoc(BAD_CAST "1.0");
12906: if (newDoc == NULL) {
12907: ctxt->sax = oldsax;
12908: ctxt->dict = NULL;
12909: xmlFreeParserCtxt(ctxt);
12910: return(XML_ERR_INTERNAL_ERROR);
12911: }
12912: newDoc->properties = XML_DOC_INTERNAL;
12913: newDoc->dict = ctxt->dict;
12914: xmlDictReference(newDoc->dict);
12915: ctxt->myDoc = newDoc;
12916: } else {
12917: ctxt->myDoc = oldctxt->myDoc;
12918: content = ctxt->myDoc->children;
12919: last = ctxt->myDoc->last;
12920: }
12921: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12922: if (newRoot == NULL) {
12923: ctxt->sax = oldsax;
12924: ctxt->dict = NULL;
12925: xmlFreeParserCtxt(ctxt);
12926: if (newDoc != NULL) {
12927: xmlFreeDoc(newDoc);
12928: }
12929: return(XML_ERR_INTERNAL_ERROR);
12930: }
12931: ctxt->myDoc->children = NULL;
12932: ctxt->myDoc->last = NULL;
12933: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12934: nodePush(ctxt, ctxt->myDoc->children);
12935: ctxt->instate = XML_PARSER_CONTENT;
12936: ctxt->depth = oldctxt->depth + 1;
12937:
12938: ctxt->validate = 0;
12939: ctxt->loadsubset = oldctxt->loadsubset;
12940: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12941: /*
12942: * ID/IDREF registration will be done in xmlValidateElement below
12943: */
12944: ctxt->loadsubset |= XML_SKIP_IDS;
12945: }
12946: ctxt->dictNames = oldctxt->dictNames;
12947: ctxt->attsDefault = oldctxt->attsDefault;
12948: ctxt->attsSpecial = oldctxt->attsSpecial;
12949:
12950: xmlParseContent(ctxt);
12951: if ((RAW == '<') && (NXT(1) == '/')) {
12952: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12953: } else if (RAW != 0) {
12954: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12955: }
12956: if (ctxt->node != ctxt->myDoc->children) {
12957: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12958: }
12959:
12960: if (!ctxt->wellFormed) {
12961: if (ctxt->errNo == 0)
12962: ret = XML_ERR_INTERNAL_ERROR;
12963: else
12964: ret = (xmlParserErrors)ctxt->errNo;
12965: } else {
12966: ret = XML_ERR_OK;
12967: }
12968:
12969: if ((lst != NULL) && (ret == XML_ERR_OK)) {
12970: xmlNodePtr cur;
12971:
12972: /*
12973: * Return the newly created nodeset after unlinking it from
12974: * they pseudo parent.
12975: */
12976: cur = ctxt->myDoc->children->children;
12977: *lst = cur;
12978: while (cur != NULL) {
12979: #ifdef LIBXML_VALID_ENABLED
12980: if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12981: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12982: (cur->type == XML_ELEMENT_NODE)) {
12983: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12984: oldctxt->myDoc, cur);
12985: }
12986: #endif /* LIBXML_VALID_ENABLED */
12987: cur->parent = NULL;
12988: cur = cur->next;
12989: }
12990: ctxt->myDoc->children->children = NULL;
12991: }
12992: if (ctxt->myDoc != NULL) {
12993: xmlFreeNode(ctxt->myDoc->children);
12994: ctxt->myDoc->children = content;
12995: ctxt->myDoc->last = last;
12996: }
12997:
12998: /*
12999: * Record in the parent context the number of entities replacement
13000: * done when parsing that reference.
13001: */
13002: if (oldctxt != NULL)
13003: oldctxt->nbentities += ctxt->nbentities;
13004:
13005: /*
13006: * Also record the last error if any
13007: */
13008: if (ctxt->lastError.code != XML_ERR_OK)
13009: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13010:
13011: ctxt->sax = oldsax;
13012: ctxt->dict = NULL;
13013: ctxt->attsDefault = NULL;
13014: ctxt->attsSpecial = NULL;
13015: xmlFreeParserCtxt(ctxt);
13016: if (newDoc != NULL) {
13017: xmlFreeDoc(newDoc);
13018: }
13019:
13020: return(ret);
13021: }
13022:
13023: /**
13024: * xmlParseInNodeContext:
13025: * @node: the context node
13026: * @data: the input string
13027: * @datalen: the input string length in bytes
13028: * @options: a combination of xmlParserOption
13029: * @lst: the return value for the set of parsed nodes
13030: *
13031: * Parse a well-balanced chunk of an XML document
13032: * within the context (DTD, namespaces, etc ...) of the given node.
13033: *
13034: * The allowed sequence for the data is a Well Balanced Chunk defined by
13035: * the content production in the XML grammar:
13036: *
13037: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13038: *
13039: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13040: * error code otherwise
13041: */
13042: xmlParserErrors
13043: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13044: int options, xmlNodePtr *lst) {
13045: #ifdef SAX2
13046: xmlParserCtxtPtr ctxt;
13047: xmlDocPtr doc = NULL;
13048: xmlNodePtr fake, cur;
13049: int nsnr = 0;
13050:
13051: xmlParserErrors ret = XML_ERR_OK;
13052:
13053: /*
13054: * check all input parameters, grab the document
13055: */
13056: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13057: return(XML_ERR_INTERNAL_ERROR);
13058: switch (node->type) {
13059: case XML_ELEMENT_NODE:
13060: case XML_ATTRIBUTE_NODE:
13061: case XML_TEXT_NODE:
13062: case XML_CDATA_SECTION_NODE:
13063: case XML_ENTITY_REF_NODE:
13064: case XML_PI_NODE:
13065: case XML_COMMENT_NODE:
13066: case XML_DOCUMENT_NODE:
13067: case XML_HTML_DOCUMENT_NODE:
13068: break;
13069: default:
13070: return(XML_ERR_INTERNAL_ERROR);
13071:
13072: }
13073: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13074: (node->type != XML_DOCUMENT_NODE) &&
13075: (node->type != XML_HTML_DOCUMENT_NODE))
13076: node = node->parent;
13077: if (node == NULL)
13078: return(XML_ERR_INTERNAL_ERROR);
13079: if (node->type == XML_ELEMENT_NODE)
13080: doc = node->doc;
13081: else
13082: doc = (xmlDocPtr) node;
13083: if (doc == NULL)
13084: return(XML_ERR_INTERNAL_ERROR);
13085:
13086: /*
13087: * allocate a context and set-up everything not related to the
13088: * node position in the tree
13089: */
13090: if (doc->type == XML_DOCUMENT_NODE)
13091: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13092: #ifdef LIBXML_HTML_ENABLED
13093: else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13094: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13095: /*
13096: * When parsing in context, it makes no sense to add implied
13097: * elements like html/body/etc...
13098: */
13099: options |= HTML_PARSE_NOIMPLIED;
13100: }
13101: #endif
13102: else
13103: return(XML_ERR_INTERNAL_ERROR);
13104:
13105: if (ctxt == NULL)
13106: return(XML_ERR_NO_MEMORY);
13107:
13108: /*
13109: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13110: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13111: * we must wait until the last moment to free the original one.
13112: */
13113: if (doc->dict != NULL) {
13114: if (ctxt->dict != NULL)
13115: xmlDictFree(ctxt->dict);
13116: ctxt->dict = doc->dict;
13117: } else
13118: options |= XML_PARSE_NODICT;
13119:
13120: if (doc->encoding != NULL) {
13121: xmlCharEncodingHandlerPtr hdlr;
13122:
13123: if (ctxt->encoding != NULL)
13124: xmlFree((xmlChar *) ctxt->encoding);
13125: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13126:
13127: hdlr = xmlFindCharEncodingHandler(doc->encoding);
13128: if (hdlr != NULL) {
13129: xmlSwitchToEncoding(ctxt, hdlr);
13130: } else {
13131: return(XML_ERR_UNSUPPORTED_ENCODING);
13132: }
13133: }
13134:
13135: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13136: xmlDetectSAX2(ctxt);
13137: ctxt->myDoc = doc;
13138:
13139: fake = xmlNewComment(NULL);
13140: if (fake == NULL) {
13141: xmlFreeParserCtxt(ctxt);
13142: return(XML_ERR_NO_MEMORY);
13143: }
13144: xmlAddChild(node, fake);
13145:
13146: if (node->type == XML_ELEMENT_NODE) {
13147: nodePush(ctxt, node);
13148: /*
13149: * initialize the SAX2 namespaces stack
13150: */
13151: cur = node;
13152: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13153: xmlNsPtr ns = cur->nsDef;
13154: const xmlChar *iprefix, *ihref;
13155:
13156: while (ns != NULL) {
13157: if (ctxt->dict) {
13158: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13159: ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13160: } else {
13161: iprefix = ns->prefix;
13162: ihref = ns->href;
13163: }
13164:
13165: if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13166: nsPush(ctxt, iprefix, ihref);
13167: nsnr++;
13168: }
13169: ns = ns->next;
13170: }
13171: cur = cur->parent;
13172: }
13173: ctxt->instate = XML_PARSER_CONTENT;
13174: }
13175:
13176: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13177: /*
13178: * ID/IDREF registration will be done in xmlValidateElement below
13179: */
13180: ctxt->loadsubset |= XML_SKIP_IDS;
13181: }
13182:
13183: #ifdef LIBXML_HTML_ENABLED
13184: if (doc->type == XML_HTML_DOCUMENT_NODE)
13185: __htmlParseContent(ctxt);
13186: else
13187: #endif
13188: xmlParseContent(ctxt);
13189:
13190: nsPop(ctxt, nsnr);
13191: if ((RAW == '<') && (NXT(1) == '/')) {
13192: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193: } else if (RAW != 0) {
13194: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13195: }
13196: if ((ctxt->node != NULL) && (ctxt->node != node)) {
13197: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13198: ctxt->wellFormed = 0;
13199: }
13200:
13201: if (!ctxt->wellFormed) {
13202: if (ctxt->errNo == 0)
13203: ret = XML_ERR_INTERNAL_ERROR;
13204: else
13205: ret = (xmlParserErrors)ctxt->errNo;
13206: } else {
13207: ret = XML_ERR_OK;
13208: }
13209:
13210: /*
13211: * Return the newly created nodeset after unlinking it from
13212: * the pseudo sibling.
13213: */
13214:
13215: cur = fake->next;
13216: fake->next = NULL;
13217: node->last = fake;
13218:
13219: if (cur != NULL) {
13220: cur->prev = NULL;
13221: }
13222:
13223: *lst = cur;
13224:
13225: while (cur != NULL) {
13226: cur->parent = NULL;
13227: cur = cur->next;
13228: }
13229:
13230: xmlUnlinkNode(fake);
13231: xmlFreeNode(fake);
13232:
13233:
13234: if (ret != XML_ERR_OK) {
13235: xmlFreeNodeList(*lst);
13236: *lst = NULL;
13237: }
13238:
13239: if (doc->dict != NULL)
13240: ctxt->dict = NULL;
13241: xmlFreeParserCtxt(ctxt);
13242:
13243: return(ret);
13244: #else /* !SAX2 */
13245: return(XML_ERR_INTERNAL_ERROR);
13246: #endif
13247: }
13248:
13249: #ifdef LIBXML_SAX1_ENABLED
13250: /**
13251: * xmlParseBalancedChunkMemoryRecover:
13252: * @doc: the document the chunk pertains to
13253: * @sax: the SAX handler bloc (possibly NULL)
13254: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13255: * @depth: Used for loop detection, use 0
13256: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13257: * @lst: the return value for the set of parsed nodes
13258: * @recover: return nodes even if the data is broken (use 0)
13259: *
13260: *
13261: * Parse a well-balanced chunk of an XML document
13262: * called by the parser
13263: * The allowed sequence for the Well Balanced Chunk is the one defined by
13264: * the content production in the XML grammar:
13265: *
13266: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13267: *
13268: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13269: * the parser error code otherwise
13270: *
13271: * In case recover is set to 1, the nodelist will not be empty even if
13272: * the parsed chunk is not well balanced, assuming the parsing succeeded to
13273: * some extent.
13274: */
13275: int
13276: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13277: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13278: int recover) {
13279: xmlParserCtxtPtr ctxt;
13280: xmlDocPtr newDoc;
13281: xmlSAXHandlerPtr oldsax = NULL;
13282: xmlNodePtr content, newRoot;
13283: int size;
13284: int ret = 0;
13285:
13286: if (depth > 40) {
13287: return(XML_ERR_ENTITY_LOOP);
13288: }
13289:
13290:
13291: if (lst != NULL)
13292: *lst = NULL;
13293: if (string == NULL)
13294: return(-1);
13295:
13296: size = xmlStrlen(string);
13297:
13298: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13299: if (ctxt == NULL) return(-1);
13300: ctxt->userData = ctxt;
13301: if (sax != NULL) {
13302: oldsax = ctxt->sax;
13303: ctxt->sax = sax;
13304: if (user_data != NULL)
13305: ctxt->userData = user_data;
13306: }
13307: newDoc = xmlNewDoc(BAD_CAST "1.0");
13308: if (newDoc == NULL) {
13309: xmlFreeParserCtxt(ctxt);
13310: return(-1);
13311: }
13312: newDoc->properties = XML_DOC_INTERNAL;
13313: if ((doc != NULL) && (doc->dict != NULL)) {
13314: xmlDictFree(ctxt->dict);
13315: ctxt->dict = doc->dict;
13316: xmlDictReference(ctxt->dict);
13317: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13318: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13319: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13320: ctxt->dictNames = 1;
13321: } else {
13322: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13323: }
13324: if (doc != NULL) {
13325: newDoc->intSubset = doc->intSubset;
13326: newDoc->extSubset = doc->extSubset;
13327: }
13328: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13329: if (newRoot == NULL) {
13330: if (sax != NULL)
13331: ctxt->sax = oldsax;
13332: xmlFreeParserCtxt(ctxt);
13333: newDoc->intSubset = NULL;
13334: newDoc->extSubset = NULL;
13335: xmlFreeDoc(newDoc);
13336: return(-1);
13337: }
13338: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13339: nodePush(ctxt, newRoot);
13340: if (doc == NULL) {
13341: ctxt->myDoc = newDoc;
13342: } else {
13343: ctxt->myDoc = newDoc;
13344: newDoc->children->doc = doc;
13345: /* Ensure that doc has XML spec namespace */
13346: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13347: newDoc->oldNs = doc->oldNs;
13348: }
13349: ctxt->instate = XML_PARSER_CONTENT;
13350: ctxt->depth = depth;
13351:
13352: /*
13353: * Doing validity checking on chunk doesn't make sense
13354: */
13355: ctxt->validate = 0;
13356: ctxt->loadsubset = 0;
13357: xmlDetectSAX2(ctxt);
13358:
13359: if ( doc != NULL ){
13360: content = doc->children;
13361: doc->children = NULL;
13362: xmlParseContent(ctxt);
13363: doc->children = content;
13364: }
13365: else {
13366: xmlParseContent(ctxt);
13367: }
13368: if ((RAW == '<') && (NXT(1) == '/')) {
13369: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13370: } else if (RAW != 0) {
13371: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13372: }
13373: if (ctxt->node != newDoc->children) {
13374: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13375: }
13376:
13377: if (!ctxt->wellFormed) {
13378: if (ctxt->errNo == 0)
13379: ret = 1;
13380: else
13381: ret = ctxt->errNo;
13382: } else {
13383: ret = 0;
13384: }
13385:
13386: if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13387: xmlNodePtr cur;
13388:
13389: /*
13390: * Return the newly created nodeset after unlinking it from
13391: * they pseudo parent.
13392: */
13393: cur = newDoc->children->children;
13394: *lst = cur;
13395: while (cur != NULL) {
13396: xmlSetTreeDoc(cur, doc);
13397: cur->parent = NULL;
13398: cur = cur->next;
13399: }
13400: newDoc->children->children = NULL;
13401: }
13402:
13403: if (sax != NULL)
13404: ctxt->sax = oldsax;
13405: xmlFreeParserCtxt(ctxt);
13406: newDoc->intSubset = NULL;
13407: newDoc->extSubset = NULL;
13408: newDoc->oldNs = NULL;
13409: xmlFreeDoc(newDoc);
13410:
13411: return(ret);
13412: }
13413:
13414: /**
13415: * xmlSAXParseEntity:
13416: * @sax: the SAX handler block
13417: * @filename: the filename
13418: *
13419: * parse an XML external entity out of context and build a tree.
13420: * It use the given SAX function block to handle the parsing callback.
13421: * If sax is NULL, fallback to the default DOM tree building routines.
13422: *
13423: * [78] extParsedEnt ::= TextDecl? content
13424: *
13425: * This correspond to a "Well Balanced" chunk
13426: *
13427: * Returns the resulting document tree
13428: */
13429:
13430: xmlDocPtr
13431: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13432: xmlDocPtr ret;
13433: xmlParserCtxtPtr ctxt;
13434:
13435: ctxt = xmlCreateFileParserCtxt(filename);
13436: if (ctxt == NULL) {
13437: return(NULL);
13438: }
13439: if (sax != NULL) {
13440: if (ctxt->sax != NULL)
13441: xmlFree(ctxt->sax);
13442: ctxt->sax = sax;
13443: ctxt->userData = NULL;
13444: }
13445:
13446: xmlParseExtParsedEnt(ctxt);
13447:
13448: if (ctxt->wellFormed)
13449: ret = ctxt->myDoc;
13450: else {
13451: ret = NULL;
13452: xmlFreeDoc(ctxt->myDoc);
13453: ctxt->myDoc = NULL;
13454: }
13455: if (sax != NULL)
13456: ctxt->sax = NULL;
13457: xmlFreeParserCtxt(ctxt);
13458:
13459: return(ret);
13460: }
13461:
13462: /**
13463: * xmlParseEntity:
13464: * @filename: the filename
13465: *
13466: * parse an XML external entity out of context and build a tree.
13467: *
13468: * [78] extParsedEnt ::= TextDecl? content
13469: *
13470: * This correspond to a "Well Balanced" chunk
13471: *
13472: * Returns the resulting document tree
13473: */
13474:
13475: xmlDocPtr
13476: xmlParseEntity(const char *filename) {
13477: return(xmlSAXParseEntity(NULL, filename));
13478: }
13479: #endif /* LIBXML_SAX1_ENABLED */
13480:
13481: /**
13482: * xmlCreateEntityParserCtxtInternal:
13483: * @URL: the entity URL
13484: * @ID: the entity PUBLIC ID
13485: * @base: a possible base for the target URI
13486: * @pctx: parser context used to set options on new context
13487: *
13488: * Create a parser context for an external entity
13489: * Automatic support for ZLIB/Compress compressed document is provided
13490: * by default if found at compile-time.
13491: *
13492: * Returns the new parser context or NULL
13493: */
13494: static xmlParserCtxtPtr
13495: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13496: const xmlChar *base, xmlParserCtxtPtr pctx) {
13497: xmlParserCtxtPtr ctxt;
13498: xmlParserInputPtr inputStream;
13499: char *directory = NULL;
13500: xmlChar *uri;
13501:
13502: ctxt = xmlNewParserCtxt();
13503: if (ctxt == NULL) {
13504: return(NULL);
13505: }
13506:
13507: if (pctx != NULL) {
13508: ctxt->options = pctx->options;
13509: ctxt->_private = pctx->_private;
13510: }
13511:
13512: uri = xmlBuildURI(URL, base);
13513:
13514: if (uri == NULL) {
13515: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13516: if (inputStream == NULL) {
13517: xmlFreeParserCtxt(ctxt);
13518: return(NULL);
13519: }
13520:
13521: inputPush(ctxt, inputStream);
13522:
13523: if ((ctxt->directory == NULL) && (directory == NULL))
13524: directory = xmlParserGetDirectory((char *)URL);
13525: if ((ctxt->directory == NULL) && (directory != NULL))
13526: ctxt->directory = directory;
13527: } else {
13528: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13529: if (inputStream == NULL) {
13530: xmlFree(uri);
13531: xmlFreeParserCtxt(ctxt);
13532: return(NULL);
13533: }
13534:
13535: inputPush(ctxt, inputStream);
13536:
13537: if ((ctxt->directory == NULL) && (directory == NULL))
13538: directory = xmlParserGetDirectory((char *)uri);
13539: if ((ctxt->directory == NULL) && (directory != NULL))
13540: ctxt->directory = directory;
13541: xmlFree(uri);
13542: }
13543: return(ctxt);
13544: }
13545:
13546: /**
13547: * xmlCreateEntityParserCtxt:
13548: * @URL: the entity URL
13549: * @ID: the entity PUBLIC ID
13550: * @base: a possible base for the target URI
13551: *
13552: * Create a parser context for an external entity
13553: * Automatic support for ZLIB/Compress compressed document is provided
13554: * by default if found at compile-time.
13555: *
13556: * Returns the new parser context or NULL
13557: */
13558: xmlParserCtxtPtr
13559: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13560: const xmlChar *base) {
13561: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13562:
13563: }
13564:
13565: /************************************************************************
13566: * *
13567: * Front ends when parsing from a file *
13568: * *
13569: ************************************************************************/
13570:
13571: /**
13572: * xmlCreateURLParserCtxt:
13573: * @filename: the filename or URL
13574: * @options: a combination of xmlParserOption
13575: *
13576: * Create a parser context for a file or URL content.
13577: * Automatic support for ZLIB/Compress compressed document is provided
13578: * by default if found at compile-time and for file accesses
13579: *
13580: * Returns the new parser context or NULL
13581: */
13582: xmlParserCtxtPtr
13583: xmlCreateURLParserCtxt(const char *filename, int options)
13584: {
13585: xmlParserCtxtPtr ctxt;
13586: xmlParserInputPtr inputStream;
13587: char *directory = NULL;
13588:
13589: ctxt = xmlNewParserCtxt();
13590: if (ctxt == NULL) {
13591: xmlErrMemory(NULL, "cannot allocate parser context");
13592: return(NULL);
13593: }
13594:
13595: if (options)
13596: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13597: ctxt->linenumbers = 1;
13598:
13599: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13600: if (inputStream == NULL) {
13601: xmlFreeParserCtxt(ctxt);
13602: return(NULL);
13603: }
13604:
13605: inputPush(ctxt, inputStream);
13606: if ((ctxt->directory == NULL) && (directory == NULL))
13607: directory = xmlParserGetDirectory(filename);
13608: if ((ctxt->directory == NULL) && (directory != NULL))
13609: ctxt->directory = directory;
13610:
13611: return(ctxt);
13612: }
13613:
13614: /**
13615: * xmlCreateFileParserCtxt:
13616: * @filename: the filename
13617: *
13618: * Create a parser context for a file content.
13619: * Automatic support for ZLIB/Compress compressed document is provided
13620: * by default if found at compile-time.
13621: *
13622: * Returns the new parser context or NULL
13623: */
13624: xmlParserCtxtPtr
13625: xmlCreateFileParserCtxt(const char *filename)
13626: {
13627: return(xmlCreateURLParserCtxt(filename, 0));
13628: }
13629:
13630: #ifdef LIBXML_SAX1_ENABLED
13631: /**
13632: * xmlSAXParseFileWithData:
13633: * @sax: the SAX handler block
13634: * @filename: the filename
13635: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13636: * documents
13637: * @data: the userdata
13638: *
13639: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13640: * compressed document is provided by default if found at compile-time.
13641: * It use the given SAX function block to handle the parsing callback.
13642: * If sax is NULL, fallback to the default DOM tree building routines.
13643: *
13644: * User data (void *) is stored within the parser context in the
13645: * context's _private member, so it is available nearly everywhere in libxml
13646: *
13647: * Returns the resulting document tree
13648: */
13649:
13650: xmlDocPtr
13651: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13652: int recovery, void *data) {
13653: xmlDocPtr ret;
13654: xmlParserCtxtPtr ctxt;
13655:
13656: xmlInitParser();
13657:
13658: ctxt = xmlCreateFileParserCtxt(filename);
13659: if (ctxt == NULL) {
13660: return(NULL);
13661: }
13662: if (sax != NULL) {
13663: if (ctxt->sax != NULL)
13664: xmlFree(ctxt->sax);
13665: ctxt->sax = sax;
13666: }
13667: xmlDetectSAX2(ctxt);
13668: if (data!=NULL) {
13669: ctxt->_private = data;
13670: }
13671:
13672: if (ctxt->directory == NULL)
13673: ctxt->directory = xmlParserGetDirectory(filename);
13674:
13675: ctxt->recovery = recovery;
13676:
13677: xmlParseDocument(ctxt);
13678:
13679: if ((ctxt->wellFormed) || recovery) {
13680: ret = ctxt->myDoc;
13681: if (ret != NULL) {
13682: if (ctxt->input->buf->compressed > 0)
13683: ret->compression = 9;
13684: else
13685: ret->compression = ctxt->input->buf->compressed;
13686: }
13687: }
13688: else {
13689: ret = NULL;
13690: xmlFreeDoc(ctxt->myDoc);
13691: ctxt->myDoc = NULL;
13692: }
13693: if (sax != NULL)
13694: ctxt->sax = NULL;
13695: xmlFreeParserCtxt(ctxt);
13696:
13697: return(ret);
13698: }
13699:
13700: /**
13701: * xmlSAXParseFile:
13702: * @sax: the SAX handler block
13703: * @filename: the filename
13704: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13705: * documents
13706: *
13707: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13708: * compressed document is provided by default if found at compile-time.
13709: * It use the given SAX function block to handle the parsing callback.
13710: * If sax is NULL, fallback to the default DOM tree building routines.
13711: *
13712: * Returns the resulting document tree
13713: */
13714:
13715: xmlDocPtr
13716: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13717: int recovery) {
13718: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13719: }
13720:
13721: /**
13722: * xmlRecoverDoc:
13723: * @cur: a pointer to an array of xmlChar
13724: *
13725: * parse an XML in-memory document and build a tree.
13726: * In the case the document is not Well Formed, a attempt to build a
13727: * tree is tried anyway
13728: *
13729: * Returns the resulting document tree or NULL in case of failure
13730: */
13731:
13732: xmlDocPtr
13733: xmlRecoverDoc(const xmlChar *cur) {
13734: return(xmlSAXParseDoc(NULL, cur, 1));
13735: }
13736:
13737: /**
13738: * xmlParseFile:
13739: * @filename: the filename
13740: *
13741: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13742: * compressed document is provided by default if found at compile-time.
13743: *
13744: * Returns the resulting document tree if the file was wellformed,
13745: * NULL otherwise.
13746: */
13747:
13748: xmlDocPtr
13749: xmlParseFile(const char *filename) {
13750: return(xmlSAXParseFile(NULL, filename, 0));
13751: }
13752:
13753: /**
13754: * xmlRecoverFile:
13755: * @filename: the filename
13756: *
13757: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13758: * compressed document is provided by default if found at compile-time.
13759: * In the case the document is not Well Formed, it attempts to build
13760: * a tree anyway
13761: *
13762: * Returns the resulting document tree or NULL in case of failure
13763: */
13764:
13765: xmlDocPtr
13766: xmlRecoverFile(const char *filename) {
13767: return(xmlSAXParseFile(NULL, filename, 1));
13768: }
13769:
13770:
13771: /**
13772: * xmlSetupParserForBuffer:
13773: * @ctxt: an XML parser context
13774: * @buffer: a xmlChar * buffer
13775: * @filename: a file name
13776: *
13777: * Setup the parser context to parse a new buffer; Clears any prior
13778: * contents from the parser context. The buffer parameter must not be
13779: * NULL, but the filename parameter can be
13780: */
13781: void
13782: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13783: const char* filename)
13784: {
13785: xmlParserInputPtr input;
13786:
13787: if ((ctxt == NULL) || (buffer == NULL))
13788: return;
13789:
13790: input = xmlNewInputStream(ctxt);
13791: if (input == NULL) {
13792: xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13793: xmlClearParserCtxt(ctxt);
13794: return;
13795: }
13796:
13797: xmlClearParserCtxt(ctxt);
13798: if (filename != NULL)
13799: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13800: input->base = buffer;
13801: input->cur = buffer;
13802: input->end = &buffer[xmlStrlen(buffer)];
13803: inputPush(ctxt, input);
13804: }
13805:
13806: /**
13807: * xmlSAXUserParseFile:
13808: * @sax: a SAX handler
13809: * @user_data: The user data returned on SAX callbacks
13810: * @filename: a file name
13811: *
13812: * parse an XML file and call the given SAX handler routines.
13813: * Automatic support for ZLIB/Compress compressed document is provided
13814: *
13815: * Returns 0 in case of success or a error number otherwise
13816: */
13817: int
13818: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13819: const char *filename) {
13820: int ret = 0;
13821: xmlParserCtxtPtr ctxt;
13822:
13823: ctxt = xmlCreateFileParserCtxt(filename);
13824: if (ctxt == NULL) return -1;
13825: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13826: xmlFree(ctxt->sax);
13827: ctxt->sax = sax;
13828: xmlDetectSAX2(ctxt);
13829:
13830: if (user_data != NULL)
13831: ctxt->userData = user_data;
13832:
13833: xmlParseDocument(ctxt);
13834:
13835: if (ctxt->wellFormed)
13836: ret = 0;
13837: else {
13838: if (ctxt->errNo != 0)
13839: ret = ctxt->errNo;
13840: else
13841: ret = -1;
13842: }
13843: if (sax != NULL)
13844: ctxt->sax = NULL;
13845: if (ctxt->myDoc != NULL) {
13846: xmlFreeDoc(ctxt->myDoc);
13847: ctxt->myDoc = NULL;
13848: }
13849: xmlFreeParserCtxt(ctxt);
13850:
13851: return ret;
13852: }
13853: #endif /* LIBXML_SAX1_ENABLED */
13854:
13855: /************************************************************************
13856: * *
13857: * Front ends when parsing from memory *
13858: * *
13859: ************************************************************************/
13860:
13861: /**
13862: * xmlCreateMemoryParserCtxt:
13863: * @buffer: a pointer to a char array
13864: * @size: the size of the array
13865: *
13866: * Create a parser context for an XML in-memory document.
13867: *
13868: * Returns the new parser context or NULL
13869: */
13870: xmlParserCtxtPtr
13871: xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13872: xmlParserCtxtPtr ctxt;
13873: xmlParserInputPtr input;
13874: xmlParserInputBufferPtr buf;
13875:
13876: if (buffer == NULL)
13877: return(NULL);
13878: if (size <= 0)
13879: return(NULL);
13880:
13881: ctxt = xmlNewParserCtxt();
13882: if (ctxt == NULL)
13883: return(NULL);
13884:
13885: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13886: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13887: if (buf == NULL) {
13888: xmlFreeParserCtxt(ctxt);
13889: return(NULL);
13890: }
13891:
13892: input = xmlNewInputStream(ctxt);
13893: if (input == NULL) {
13894: xmlFreeParserInputBuffer(buf);
13895: xmlFreeParserCtxt(ctxt);
13896: return(NULL);
13897: }
13898:
13899: input->filename = NULL;
13900: input->buf = buf;
13901: input->base = input->buf->buffer->content;
13902: input->cur = input->buf->buffer->content;
13903: input->end = &input->buf->buffer->content[input->buf->buffer->use];
13904:
13905: inputPush(ctxt, input);
13906: return(ctxt);
13907: }
13908:
13909: #ifdef LIBXML_SAX1_ENABLED
13910: /**
13911: * xmlSAXParseMemoryWithData:
13912: * @sax: the SAX handler block
13913: * @buffer: an pointer to a char array
13914: * @size: the size of the array
13915: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13916: * documents
13917: * @data: the userdata
13918: *
13919: * parse an XML in-memory block and use the given SAX function block
13920: * to handle the parsing callback. If sax is NULL, fallback to the default
13921: * DOM tree building routines.
13922: *
13923: * User data (void *) is stored within the parser context in the
13924: * context's _private member, so it is available nearly everywhere in libxml
13925: *
13926: * Returns the resulting document tree
13927: */
13928:
13929: xmlDocPtr
13930: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13931: int size, int recovery, void *data) {
13932: xmlDocPtr ret;
13933: xmlParserCtxtPtr ctxt;
13934:
13935: xmlInitParser();
13936:
13937: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13938: if (ctxt == NULL) return(NULL);
13939: if (sax != NULL) {
13940: if (ctxt->sax != NULL)
13941: xmlFree(ctxt->sax);
13942: ctxt->sax = sax;
13943: }
13944: xmlDetectSAX2(ctxt);
13945: if (data!=NULL) {
13946: ctxt->_private=data;
13947: }
13948:
13949: ctxt->recovery = recovery;
13950:
13951: xmlParseDocument(ctxt);
13952:
13953: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13954: else {
13955: ret = NULL;
13956: xmlFreeDoc(ctxt->myDoc);
13957: ctxt->myDoc = NULL;
13958: }
13959: if (sax != NULL)
13960: ctxt->sax = NULL;
13961: xmlFreeParserCtxt(ctxt);
13962:
13963: return(ret);
13964: }
13965:
13966: /**
13967: * xmlSAXParseMemory:
13968: * @sax: the SAX handler block
13969: * @buffer: an pointer to a char array
13970: * @size: the size of the array
13971: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13972: * documents
13973: *
13974: * parse an XML in-memory block and use the given SAX function block
13975: * to handle the parsing callback. If sax is NULL, fallback to the default
13976: * DOM tree building routines.
13977: *
13978: * Returns the resulting document tree
13979: */
13980: xmlDocPtr
13981: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13982: int size, int recovery) {
13983: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13984: }
13985:
13986: /**
13987: * xmlParseMemory:
13988: * @buffer: an pointer to a char array
13989: * @size: the size of the array
13990: *
13991: * parse an XML in-memory block and build a tree.
13992: *
13993: * Returns the resulting document tree
13994: */
13995:
13996: xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13997: return(xmlSAXParseMemory(NULL, buffer, size, 0));
13998: }
13999:
14000: /**
14001: * xmlRecoverMemory:
14002: * @buffer: an pointer to a char array
14003: * @size: the size of the array
14004: *
14005: * parse an XML in-memory block and build a tree.
14006: * In the case the document is not Well Formed, an attempt to
14007: * build a tree is tried anyway
14008: *
14009: * Returns the resulting document tree or NULL in case of error
14010: */
14011:
14012: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14013: return(xmlSAXParseMemory(NULL, buffer, size, 1));
14014: }
14015:
14016: /**
14017: * xmlSAXUserParseMemory:
14018: * @sax: a SAX handler
14019: * @user_data: The user data returned on SAX callbacks
14020: * @buffer: an in-memory XML document input
14021: * @size: the length of the XML document in bytes
14022: *
14023: * A better SAX parsing routine.
14024: * parse an XML in-memory buffer and call the given SAX handler routines.
14025: *
14026: * Returns 0 in case of success or a error number otherwise
14027: */
14028: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14029: const char *buffer, int size) {
14030: int ret = 0;
14031: xmlParserCtxtPtr ctxt;
14032:
14033: xmlInitParser();
14034:
14035: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14036: if (ctxt == NULL) return -1;
14037: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14038: xmlFree(ctxt->sax);
14039: ctxt->sax = sax;
14040: xmlDetectSAX2(ctxt);
14041:
14042: if (user_data != NULL)
14043: ctxt->userData = user_data;
14044:
14045: xmlParseDocument(ctxt);
14046:
14047: if (ctxt->wellFormed)
14048: ret = 0;
14049: else {
14050: if (ctxt->errNo != 0)
14051: ret = ctxt->errNo;
14052: else
14053: ret = -1;
14054: }
14055: if (sax != NULL)
14056: ctxt->sax = NULL;
14057: if (ctxt->myDoc != NULL) {
14058: xmlFreeDoc(ctxt->myDoc);
14059: ctxt->myDoc = NULL;
14060: }
14061: xmlFreeParserCtxt(ctxt);
14062:
14063: return ret;
14064: }
14065: #endif /* LIBXML_SAX1_ENABLED */
14066:
14067: /**
14068: * xmlCreateDocParserCtxt:
14069: * @cur: a pointer to an array of xmlChar
14070: *
14071: * Creates a parser context for an XML in-memory document.
14072: *
14073: * Returns the new parser context or NULL
14074: */
14075: xmlParserCtxtPtr
14076: xmlCreateDocParserCtxt(const xmlChar *cur) {
14077: int len;
14078:
14079: if (cur == NULL)
14080: return(NULL);
14081: len = xmlStrlen(cur);
14082: return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14083: }
14084:
14085: #ifdef LIBXML_SAX1_ENABLED
14086: /**
14087: * xmlSAXParseDoc:
14088: * @sax: the SAX handler block
14089: * @cur: a pointer to an array of xmlChar
14090: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14091: * documents
14092: *
14093: * parse an XML in-memory document and build a tree.
14094: * It use the given SAX function block to handle the parsing callback.
14095: * If sax is NULL, fallback to the default DOM tree building routines.
14096: *
14097: * Returns the resulting document tree
14098: */
14099:
14100: xmlDocPtr
14101: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14102: xmlDocPtr ret;
14103: xmlParserCtxtPtr ctxt;
14104: xmlSAXHandlerPtr oldsax = NULL;
14105:
14106: if (cur == NULL) return(NULL);
14107:
14108:
14109: ctxt = xmlCreateDocParserCtxt(cur);
14110: if (ctxt == NULL) return(NULL);
14111: if (sax != NULL) {
14112: oldsax = ctxt->sax;
14113: ctxt->sax = sax;
14114: ctxt->userData = NULL;
14115: }
14116: xmlDetectSAX2(ctxt);
14117:
14118: xmlParseDocument(ctxt);
14119: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14120: else {
14121: ret = NULL;
14122: xmlFreeDoc(ctxt->myDoc);
14123: ctxt->myDoc = NULL;
14124: }
14125: if (sax != NULL)
14126: ctxt->sax = oldsax;
14127: xmlFreeParserCtxt(ctxt);
14128:
14129: return(ret);
14130: }
14131:
14132: /**
14133: * xmlParseDoc:
14134: * @cur: a pointer to an array of xmlChar
14135: *
14136: * parse an XML in-memory document and build a tree.
14137: *
14138: * Returns the resulting document tree
14139: */
14140:
14141: xmlDocPtr
14142: xmlParseDoc(const xmlChar *cur) {
14143: return(xmlSAXParseDoc(NULL, cur, 0));
14144: }
14145: #endif /* LIBXML_SAX1_ENABLED */
14146:
14147: #ifdef LIBXML_LEGACY_ENABLED
14148: /************************************************************************
14149: * *
14150: * Specific function to keep track of entities references *
14151: * and used by the XSLT debugger *
14152: * *
14153: ************************************************************************/
14154:
14155: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14156:
14157: /**
14158: * xmlAddEntityReference:
14159: * @ent : A valid entity
14160: * @firstNode : A valid first node for children of entity
14161: * @lastNode : A valid last node of children entity
14162: *
14163: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14164: */
14165: static void
14166: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14167: xmlNodePtr lastNode)
14168: {
14169: if (xmlEntityRefFunc != NULL) {
14170: (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14171: }
14172: }
14173:
14174:
14175: /**
14176: * xmlSetEntityReferenceFunc:
14177: * @func: A valid function
14178: *
14179: * Set the function to call call back when a xml reference has been made
14180: */
14181: void
14182: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14183: {
14184: xmlEntityRefFunc = func;
14185: }
14186: #endif /* LIBXML_LEGACY_ENABLED */
14187:
14188: /************************************************************************
14189: * *
14190: * Miscellaneous *
14191: * *
14192: ************************************************************************/
14193:
14194: #ifdef LIBXML_XPATH_ENABLED
14195: #include <libxml/xpath.h>
14196: #endif
14197:
14198: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14199: static int xmlParserInitialized = 0;
14200:
14201: /**
14202: * xmlInitParser:
14203: *
14204: * Initialization function for the XML parser.
14205: * This is not reentrant. Call once before processing in case of
14206: * use in multithreaded programs.
14207: */
14208:
14209: void
14210: xmlInitParser(void) {
14211: if (xmlParserInitialized != 0)
14212: return;
14213:
14214: #ifdef LIBXML_THREAD_ENABLED
14215: __xmlGlobalInitMutexLock();
14216: if (xmlParserInitialized == 0) {
14217: #endif
14218: xmlInitThreads();
14219: xmlInitGlobals();
14220: if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14221: (xmlGenericError == NULL))
14222: initGenericErrorDefaultFunc(NULL);
14223: xmlInitMemory();
1.1.1.2 misho 14224: xmlInitializeDict();
1.1 misho 14225: xmlInitCharEncodingHandlers();
14226: xmlDefaultSAXHandlerInit();
14227: xmlRegisterDefaultInputCallbacks();
14228: #ifdef LIBXML_OUTPUT_ENABLED
14229: xmlRegisterDefaultOutputCallbacks();
14230: #endif /* LIBXML_OUTPUT_ENABLED */
14231: #ifdef LIBXML_HTML_ENABLED
14232: htmlInitAutoClose();
14233: htmlDefaultSAXHandlerInit();
14234: #endif
14235: #ifdef LIBXML_XPATH_ENABLED
14236: xmlXPathInit();
14237: #endif
14238: xmlParserInitialized = 1;
14239: #ifdef LIBXML_THREAD_ENABLED
14240: }
14241: __xmlGlobalInitMutexUnlock();
14242: #endif
14243: }
14244:
14245: /**
14246: * xmlCleanupParser:
14247: *
14248: * This function name is somewhat misleading. It does not clean up
14249: * parser state, it cleans up memory allocated by the library itself.
14250: * It is a cleanup function for the XML library. It tries to reclaim all
14251: * related global memory allocated for the library processing.
14252: * It doesn't deallocate any document related memory. One should
14253: * call xmlCleanupParser() only when the process has finished using
14254: * the library and all XML/HTML documents built with it.
14255: * See also xmlInitParser() which has the opposite function of preparing
14256: * the library for operations.
14257: *
14258: * WARNING: if your application is multithreaded or has plugin support
14259: * calling this may crash the application if another thread or
14260: * a plugin is still using libxml2. It's sometimes very hard to
14261: * guess if libxml2 is in use in the application, some libraries
14262: * or plugins may use it without notice. In case of doubt abstain
14263: * from calling this function or do it just before calling exit()
14264: * to avoid leak reports from valgrind !
14265: */
14266:
14267: void
14268: xmlCleanupParser(void) {
14269: if (!xmlParserInitialized)
14270: return;
14271:
14272: xmlCleanupCharEncodingHandlers();
14273: #ifdef LIBXML_CATALOG_ENABLED
14274: xmlCatalogCleanup();
14275: #endif
14276: xmlDictCleanup();
14277: xmlCleanupInputCallbacks();
14278: #ifdef LIBXML_OUTPUT_ENABLED
14279: xmlCleanupOutputCallbacks();
14280: #endif
14281: #ifdef LIBXML_SCHEMAS_ENABLED
14282: xmlSchemaCleanupTypes();
14283: xmlRelaxNGCleanupTypes();
14284: #endif
14285: xmlCleanupGlobals();
14286: xmlResetLastError();
14287: xmlCleanupThreads(); /* must be last if called not from the main thread */
14288: xmlCleanupMemory();
14289: xmlParserInitialized = 0;
14290: }
14291:
14292: /************************************************************************
14293: * *
14294: * New set (2.6.0) of simpler and more flexible APIs *
14295: * *
14296: ************************************************************************/
14297:
14298: /**
14299: * DICT_FREE:
14300: * @str: a string
14301: *
14302: * Free a string if it is not owned by the "dict" dictionnary in the
14303: * current scope
14304: */
14305: #define DICT_FREE(str) \
14306: if ((str) && ((!dict) || \
14307: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14308: xmlFree((char *)(str));
14309:
14310: /**
14311: * xmlCtxtReset:
14312: * @ctxt: an XML parser context
14313: *
14314: * Reset a parser context
14315: */
14316: void
14317: xmlCtxtReset(xmlParserCtxtPtr ctxt)
14318: {
14319: xmlParserInputPtr input;
14320: xmlDictPtr dict;
14321:
14322: if (ctxt == NULL)
14323: return;
14324:
14325: dict = ctxt->dict;
14326:
14327: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14328: xmlFreeInputStream(input);
14329: }
14330: ctxt->inputNr = 0;
14331: ctxt->input = NULL;
14332:
14333: ctxt->spaceNr = 0;
14334: if (ctxt->spaceTab != NULL) {
14335: ctxt->spaceTab[0] = -1;
14336: ctxt->space = &ctxt->spaceTab[0];
14337: } else {
14338: ctxt->space = NULL;
14339: }
14340:
14341:
14342: ctxt->nodeNr = 0;
14343: ctxt->node = NULL;
14344:
14345: ctxt->nameNr = 0;
14346: ctxt->name = NULL;
14347:
14348: DICT_FREE(ctxt->version);
14349: ctxt->version = NULL;
14350: DICT_FREE(ctxt->encoding);
14351: ctxt->encoding = NULL;
14352: DICT_FREE(ctxt->directory);
14353: ctxt->directory = NULL;
14354: DICT_FREE(ctxt->extSubURI);
14355: ctxt->extSubURI = NULL;
14356: DICT_FREE(ctxt->extSubSystem);
14357: ctxt->extSubSystem = NULL;
14358: if (ctxt->myDoc != NULL)
14359: xmlFreeDoc(ctxt->myDoc);
14360: ctxt->myDoc = NULL;
14361:
14362: ctxt->standalone = -1;
14363: ctxt->hasExternalSubset = 0;
14364: ctxt->hasPErefs = 0;
14365: ctxt->html = 0;
14366: ctxt->external = 0;
14367: ctxt->instate = XML_PARSER_START;
14368: ctxt->token = 0;
14369:
14370: ctxt->wellFormed = 1;
14371: ctxt->nsWellFormed = 1;
14372: ctxt->disableSAX = 0;
14373: ctxt->valid = 1;
14374: #if 0
14375: ctxt->vctxt.userData = ctxt;
14376: ctxt->vctxt.error = xmlParserValidityError;
14377: ctxt->vctxt.warning = xmlParserValidityWarning;
14378: #endif
14379: ctxt->record_info = 0;
14380: ctxt->nbChars = 0;
14381: ctxt->checkIndex = 0;
14382: ctxt->inSubset = 0;
14383: ctxt->errNo = XML_ERR_OK;
14384: ctxt->depth = 0;
14385: ctxt->charset = XML_CHAR_ENCODING_UTF8;
14386: ctxt->catalogs = NULL;
14387: ctxt->nbentities = 0;
14388: ctxt->sizeentities = 0;
1.1.1.2.2.1! misho 14389: ctxt->sizeentcopy = 0;
1.1 misho 14390: xmlInitNodeInfoSeq(&ctxt->node_seq);
14391:
14392: if (ctxt->attsDefault != NULL) {
14393: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14394: ctxt->attsDefault = NULL;
14395: }
14396: if (ctxt->attsSpecial != NULL) {
14397: xmlHashFree(ctxt->attsSpecial, NULL);
14398: ctxt->attsSpecial = NULL;
14399: }
14400:
14401: #ifdef LIBXML_CATALOG_ENABLED
14402: if (ctxt->catalogs != NULL)
14403: xmlCatalogFreeLocal(ctxt->catalogs);
14404: #endif
14405: if (ctxt->lastError.code != XML_ERR_OK)
14406: xmlResetError(&ctxt->lastError);
14407: }
14408:
14409: /**
14410: * xmlCtxtResetPush:
14411: * @ctxt: an XML parser context
14412: * @chunk: a pointer to an array of chars
14413: * @size: number of chars in the array
14414: * @filename: an optional file name or URI
14415: * @encoding: the document encoding, or NULL
14416: *
14417: * Reset a push parser context
14418: *
14419: * Returns 0 in case of success and 1 in case of error
14420: */
14421: int
14422: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14423: int size, const char *filename, const char *encoding)
14424: {
14425: xmlParserInputPtr inputStream;
14426: xmlParserInputBufferPtr buf;
14427: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14428:
14429: if (ctxt == NULL)
14430: return(1);
14431:
14432: if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14433: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14434:
14435: buf = xmlAllocParserInputBuffer(enc);
14436: if (buf == NULL)
14437: return(1);
14438:
14439: if (ctxt == NULL) {
14440: xmlFreeParserInputBuffer(buf);
14441: return(1);
14442: }
14443:
14444: xmlCtxtReset(ctxt);
14445:
14446: if (ctxt->pushTab == NULL) {
14447: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14448: sizeof(xmlChar *));
14449: if (ctxt->pushTab == NULL) {
14450: xmlErrMemory(ctxt, NULL);
14451: xmlFreeParserInputBuffer(buf);
14452: return(1);
14453: }
14454: }
14455:
14456: if (filename == NULL) {
14457: ctxt->directory = NULL;
14458: } else {
14459: ctxt->directory = xmlParserGetDirectory(filename);
14460: }
14461:
14462: inputStream = xmlNewInputStream(ctxt);
14463: if (inputStream == NULL) {
14464: xmlFreeParserInputBuffer(buf);
14465: return(1);
14466: }
14467:
14468: if (filename == NULL)
14469: inputStream->filename = NULL;
14470: else
14471: inputStream->filename = (char *)
14472: xmlCanonicPath((const xmlChar *) filename);
14473: inputStream->buf = buf;
14474: inputStream->base = inputStream->buf->buffer->content;
14475: inputStream->cur = inputStream->buf->buffer->content;
14476: inputStream->end =
14477: &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14478:
14479: inputPush(ctxt, inputStream);
14480:
14481: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14482: (ctxt->input->buf != NULL)) {
14483: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14484: int cur = ctxt->input->cur - ctxt->input->base;
14485:
14486: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14487:
14488: ctxt->input->base = ctxt->input->buf->buffer->content + base;
14489: ctxt->input->cur = ctxt->input->base + cur;
14490: ctxt->input->end =
14491: &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14492: use];
14493: #ifdef DEBUG_PUSH
14494: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14495: #endif
14496: }
14497:
14498: if (encoding != NULL) {
14499: xmlCharEncodingHandlerPtr hdlr;
14500:
14501: if (ctxt->encoding != NULL)
14502: xmlFree((xmlChar *) ctxt->encoding);
14503: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14504:
14505: hdlr = xmlFindCharEncodingHandler(encoding);
14506: if (hdlr != NULL) {
14507: xmlSwitchToEncoding(ctxt, hdlr);
14508: } else {
14509: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14510: "Unsupported encoding %s\n", BAD_CAST encoding);
14511: }
14512: } else if (enc != XML_CHAR_ENCODING_NONE) {
14513: xmlSwitchEncoding(ctxt, enc);
14514: }
14515:
14516: return(0);
14517: }
14518:
14519:
14520: /**
14521: * xmlCtxtUseOptionsInternal:
14522: * @ctxt: an XML parser context
14523: * @options: a combination of xmlParserOption
14524: * @encoding: the user provided encoding to use
14525: *
14526: * Applies the options to the parser context
14527: *
14528: * Returns 0 in case of success, the set of unknown or unimplemented options
14529: * in case of error.
14530: */
14531: static int
14532: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14533: {
14534: if (ctxt == NULL)
14535: return(-1);
14536: if (encoding != NULL) {
14537: if (ctxt->encoding != NULL)
14538: xmlFree((xmlChar *) ctxt->encoding);
14539: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14540: }
14541: if (options & XML_PARSE_RECOVER) {
14542: ctxt->recovery = 1;
14543: options -= XML_PARSE_RECOVER;
14544: ctxt->options |= XML_PARSE_RECOVER;
14545: } else
14546: ctxt->recovery = 0;
14547: if (options & XML_PARSE_DTDLOAD) {
14548: ctxt->loadsubset = XML_DETECT_IDS;
14549: options -= XML_PARSE_DTDLOAD;
14550: ctxt->options |= XML_PARSE_DTDLOAD;
14551: } else
14552: ctxt->loadsubset = 0;
14553: if (options & XML_PARSE_DTDATTR) {
14554: ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14555: options -= XML_PARSE_DTDATTR;
14556: ctxt->options |= XML_PARSE_DTDATTR;
14557: }
14558: if (options & XML_PARSE_NOENT) {
14559: ctxt->replaceEntities = 1;
14560: /* ctxt->loadsubset |= XML_DETECT_IDS; */
14561: options -= XML_PARSE_NOENT;
14562: ctxt->options |= XML_PARSE_NOENT;
14563: } else
14564: ctxt->replaceEntities = 0;
14565: if (options & XML_PARSE_PEDANTIC) {
14566: ctxt->pedantic = 1;
14567: options -= XML_PARSE_PEDANTIC;
14568: ctxt->options |= XML_PARSE_PEDANTIC;
14569: } else
14570: ctxt->pedantic = 0;
14571: if (options & XML_PARSE_NOBLANKS) {
14572: ctxt->keepBlanks = 0;
14573: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14574: options -= XML_PARSE_NOBLANKS;
14575: ctxt->options |= XML_PARSE_NOBLANKS;
14576: } else
14577: ctxt->keepBlanks = 1;
14578: if (options & XML_PARSE_DTDVALID) {
14579: ctxt->validate = 1;
14580: if (options & XML_PARSE_NOWARNING)
14581: ctxt->vctxt.warning = NULL;
14582: if (options & XML_PARSE_NOERROR)
14583: ctxt->vctxt.error = NULL;
14584: options -= XML_PARSE_DTDVALID;
14585: ctxt->options |= XML_PARSE_DTDVALID;
14586: } else
14587: ctxt->validate = 0;
14588: if (options & XML_PARSE_NOWARNING) {
14589: ctxt->sax->warning = NULL;
14590: options -= XML_PARSE_NOWARNING;
14591: }
14592: if (options & XML_PARSE_NOERROR) {
14593: ctxt->sax->error = NULL;
14594: ctxt->sax->fatalError = NULL;
14595: options -= XML_PARSE_NOERROR;
14596: }
14597: #ifdef LIBXML_SAX1_ENABLED
14598: if (options & XML_PARSE_SAX1) {
14599: ctxt->sax->startElement = xmlSAX2StartElement;
14600: ctxt->sax->endElement = xmlSAX2EndElement;
14601: ctxt->sax->startElementNs = NULL;
14602: ctxt->sax->endElementNs = NULL;
14603: ctxt->sax->initialized = 1;
14604: options -= XML_PARSE_SAX1;
14605: ctxt->options |= XML_PARSE_SAX1;
14606: }
14607: #endif /* LIBXML_SAX1_ENABLED */
14608: if (options & XML_PARSE_NODICT) {
14609: ctxt->dictNames = 0;
14610: options -= XML_PARSE_NODICT;
14611: ctxt->options |= XML_PARSE_NODICT;
14612: } else {
14613: ctxt->dictNames = 1;
14614: }
14615: if (options & XML_PARSE_NOCDATA) {
14616: ctxt->sax->cdataBlock = NULL;
14617: options -= XML_PARSE_NOCDATA;
14618: ctxt->options |= XML_PARSE_NOCDATA;
14619: }
14620: if (options & XML_PARSE_NSCLEAN) {
14621: ctxt->options |= XML_PARSE_NSCLEAN;
14622: options -= XML_PARSE_NSCLEAN;
14623: }
14624: if (options & XML_PARSE_NONET) {
14625: ctxt->options |= XML_PARSE_NONET;
14626: options -= XML_PARSE_NONET;
14627: }
14628: if (options & XML_PARSE_COMPACT) {
14629: ctxt->options |= XML_PARSE_COMPACT;
14630: options -= XML_PARSE_COMPACT;
14631: }
14632: if (options & XML_PARSE_OLD10) {
14633: ctxt->options |= XML_PARSE_OLD10;
14634: options -= XML_PARSE_OLD10;
14635: }
14636: if (options & XML_PARSE_NOBASEFIX) {
14637: ctxt->options |= XML_PARSE_NOBASEFIX;
14638: options -= XML_PARSE_NOBASEFIX;
14639: }
14640: if (options & XML_PARSE_HUGE) {
14641: ctxt->options |= XML_PARSE_HUGE;
14642: options -= XML_PARSE_HUGE;
14643: }
14644: if (options & XML_PARSE_OLDSAX) {
14645: ctxt->options |= XML_PARSE_OLDSAX;
14646: options -= XML_PARSE_OLDSAX;
14647: }
1.1.1.2 misho 14648: if (options & XML_PARSE_IGNORE_ENC) {
14649: ctxt->options |= XML_PARSE_IGNORE_ENC;
14650: options -= XML_PARSE_IGNORE_ENC;
14651: }
1.1 misho 14652: ctxt->linenumbers = 1;
14653: return (options);
14654: }
14655:
14656: /**
14657: * xmlCtxtUseOptions:
14658: * @ctxt: an XML parser context
14659: * @options: a combination of xmlParserOption
14660: *
14661: * Applies the options to the parser context
14662: *
14663: * Returns 0 in case of success, the set of unknown or unimplemented options
14664: * in case of error.
14665: */
14666: int
14667: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14668: {
14669: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14670: }
14671:
14672: /**
14673: * xmlDoRead:
14674: * @ctxt: an XML parser context
14675: * @URL: the base URL to use for the document
14676: * @encoding: the document encoding, or NULL
14677: * @options: a combination of xmlParserOption
14678: * @reuse: keep the context for reuse
14679: *
14680: * Common front-end for the xmlRead functions
14681: *
14682: * Returns the resulting document tree or NULL
14683: */
14684: static xmlDocPtr
14685: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14686: int options, int reuse)
14687: {
14688: xmlDocPtr ret;
14689:
14690: xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14691: if (encoding != NULL) {
14692: xmlCharEncodingHandlerPtr hdlr;
14693:
14694: hdlr = xmlFindCharEncodingHandler(encoding);
14695: if (hdlr != NULL)
14696: xmlSwitchToEncoding(ctxt, hdlr);
14697: }
14698: if ((URL != NULL) && (ctxt->input != NULL) &&
14699: (ctxt->input->filename == NULL))
14700: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14701: xmlParseDocument(ctxt);
14702: if ((ctxt->wellFormed) || ctxt->recovery)
14703: ret = ctxt->myDoc;
14704: else {
14705: ret = NULL;
14706: if (ctxt->myDoc != NULL) {
14707: xmlFreeDoc(ctxt->myDoc);
14708: }
14709: }
14710: ctxt->myDoc = NULL;
14711: if (!reuse) {
14712: xmlFreeParserCtxt(ctxt);
14713: }
14714:
14715: return (ret);
14716: }
14717:
14718: /**
14719: * xmlReadDoc:
14720: * @cur: a pointer to a zero terminated string
14721: * @URL: the base URL to use for the document
14722: * @encoding: the document encoding, or NULL
14723: * @options: a combination of xmlParserOption
14724: *
14725: * parse an XML in-memory document and build a tree.
14726: *
14727: * Returns the resulting document tree
14728: */
14729: xmlDocPtr
14730: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14731: {
14732: xmlParserCtxtPtr ctxt;
14733:
14734: if (cur == NULL)
14735: return (NULL);
14736:
14737: ctxt = xmlCreateDocParserCtxt(cur);
14738: if (ctxt == NULL)
14739: return (NULL);
14740: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14741: }
14742:
14743: /**
14744: * xmlReadFile:
14745: * @filename: a file or URL
14746: * @encoding: the document encoding, or NULL
14747: * @options: a combination of xmlParserOption
14748: *
14749: * parse an XML file from the filesystem or the network.
14750: *
14751: * Returns the resulting document tree
14752: */
14753: xmlDocPtr
14754: xmlReadFile(const char *filename, const char *encoding, int options)
14755: {
14756: xmlParserCtxtPtr ctxt;
14757:
14758: ctxt = xmlCreateURLParserCtxt(filename, options);
14759: if (ctxt == NULL)
14760: return (NULL);
14761: return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14762: }
14763:
14764: /**
14765: * xmlReadMemory:
14766: * @buffer: a pointer to a char array
14767: * @size: the size of the array
14768: * @URL: the base URL to use for the document
14769: * @encoding: the document encoding, or NULL
14770: * @options: a combination of xmlParserOption
14771: *
14772: * parse an XML in-memory document and build a tree.
14773: *
14774: * Returns the resulting document tree
14775: */
14776: xmlDocPtr
14777: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14778: {
14779: xmlParserCtxtPtr ctxt;
14780:
14781: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14782: if (ctxt == NULL)
14783: return (NULL);
14784: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14785: }
14786:
14787: /**
14788: * xmlReadFd:
14789: * @fd: an open file descriptor
14790: * @URL: the base URL to use for the document
14791: * @encoding: the document encoding, or NULL
14792: * @options: a combination of xmlParserOption
14793: *
14794: * parse an XML from a file descriptor and build a tree.
14795: * NOTE that the file descriptor will not be closed when the
14796: * reader is closed or reset.
14797: *
14798: * Returns the resulting document tree
14799: */
14800: xmlDocPtr
14801: xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14802: {
14803: xmlParserCtxtPtr ctxt;
14804: xmlParserInputBufferPtr input;
14805: xmlParserInputPtr stream;
14806:
14807: if (fd < 0)
14808: return (NULL);
14809:
14810: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14811: if (input == NULL)
14812: return (NULL);
14813: input->closecallback = NULL;
14814: ctxt = xmlNewParserCtxt();
14815: if (ctxt == NULL) {
14816: xmlFreeParserInputBuffer(input);
14817: return (NULL);
14818: }
14819: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14820: if (stream == NULL) {
14821: xmlFreeParserInputBuffer(input);
14822: xmlFreeParserCtxt(ctxt);
14823: return (NULL);
14824: }
14825: inputPush(ctxt, stream);
14826: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14827: }
14828:
14829: /**
14830: * xmlReadIO:
14831: * @ioread: an I/O read function
14832: * @ioclose: an I/O close function
14833: * @ioctx: an I/O handler
14834: * @URL: the base URL to use for the document
14835: * @encoding: the document encoding, or NULL
14836: * @options: a combination of xmlParserOption
14837: *
14838: * parse an XML document from I/O functions and source and build a tree.
1.1.1.2 misho 14839: *
1.1 misho 14840: * Returns the resulting document tree
14841: */
14842: xmlDocPtr
14843: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14844: void *ioctx, const char *URL, const char *encoding, int options)
14845: {
14846: xmlParserCtxtPtr ctxt;
14847: xmlParserInputBufferPtr input;
14848: xmlParserInputPtr stream;
14849:
14850: if (ioread == NULL)
14851: return (NULL);
14852:
14853: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14854: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 14855: if (input == NULL) {
14856: if (ioclose != NULL)
14857: ioclose(ioctx);
1.1 misho 14858: return (NULL);
1.1.1.2 misho 14859: }
1.1 misho 14860: ctxt = xmlNewParserCtxt();
14861: if (ctxt == NULL) {
14862: xmlFreeParserInputBuffer(input);
14863: return (NULL);
14864: }
14865: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14866: if (stream == NULL) {
14867: xmlFreeParserInputBuffer(input);
14868: xmlFreeParserCtxt(ctxt);
14869: return (NULL);
14870: }
14871: inputPush(ctxt, stream);
14872: return (xmlDoRead(ctxt, URL, encoding, options, 0));
14873: }
14874:
14875: /**
14876: * xmlCtxtReadDoc:
14877: * @ctxt: an XML parser context
14878: * @cur: a pointer to a zero terminated string
14879: * @URL: the base URL to use for the document
14880: * @encoding: the document encoding, or NULL
14881: * @options: a combination of xmlParserOption
14882: *
14883: * parse an XML in-memory document and build a tree.
14884: * This reuses the existing @ctxt parser context
1.1.1.2 misho 14885: *
1.1 misho 14886: * Returns the resulting document tree
14887: */
14888: xmlDocPtr
14889: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14890: const char *URL, const char *encoding, int options)
14891: {
14892: xmlParserInputPtr stream;
14893:
14894: if (cur == NULL)
14895: return (NULL);
14896: if (ctxt == NULL)
14897: return (NULL);
14898:
14899: xmlCtxtReset(ctxt);
14900:
14901: stream = xmlNewStringInputStream(ctxt, cur);
14902: if (stream == NULL) {
14903: return (NULL);
14904: }
14905: inputPush(ctxt, stream);
14906: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14907: }
14908:
14909: /**
14910: * xmlCtxtReadFile:
14911: * @ctxt: an XML parser context
14912: * @filename: a file or URL
14913: * @encoding: the document encoding, or NULL
14914: * @options: a combination of xmlParserOption
14915: *
14916: * parse an XML file from the filesystem or the network.
14917: * This reuses the existing @ctxt parser context
14918: *
14919: * Returns the resulting document tree
14920: */
14921: xmlDocPtr
14922: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14923: const char *encoding, int options)
14924: {
14925: xmlParserInputPtr stream;
14926:
14927: if (filename == NULL)
14928: return (NULL);
14929: if (ctxt == NULL)
14930: return (NULL);
14931:
14932: xmlCtxtReset(ctxt);
14933:
14934: stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14935: if (stream == NULL) {
14936: return (NULL);
14937: }
14938: inputPush(ctxt, stream);
14939: return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14940: }
14941:
14942: /**
14943: * xmlCtxtReadMemory:
14944: * @ctxt: an XML parser context
14945: * @buffer: a pointer to a char array
14946: * @size: the size of the array
14947: * @URL: the base URL to use for the document
14948: * @encoding: the document encoding, or NULL
14949: * @options: a combination of xmlParserOption
14950: *
14951: * parse an XML in-memory document and build a tree.
14952: * This reuses the existing @ctxt parser context
14953: *
14954: * Returns the resulting document tree
14955: */
14956: xmlDocPtr
14957: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14958: const char *URL, const char *encoding, int options)
14959: {
14960: xmlParserInputBufferPtr input;
14961: xmlParserInputPtr stream;
14962:
14963: if (ctxt == NULL)
14964: return (NULL);
14965: if (buffer == NULL)
14966: return (NULL);
14967:
14968: xmlCtxtReset(ctxt);
14969:
14970: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14971: if (input == NULL) {
14972: return(NULL);
14973: }
14974:
14975: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14976: if (stream == NULL) {
14977: xmlFreeParserInputBuffer(input);
14978: return(NULL);
14979: }
14980:
14981: inputPush(ctxt, stream);
14982: return (xmlDoRead(ctxt, URL, encoding, options, 1));
14983: }
14984:
14985: /**
14986: * xmlCtxtReadFd:
14987: * @ctxt: an XML parser context
14988: * @fd: an open file descriptor
14989: * @URL: the base URL to use for the document
14990: * @encoding: the document encoding, or NULL
14991: * @options: a combination of xmlParserOption
14992: *
14993: * parse an XML from a file descriptor and build a tree.
14994: * This reuses the existing @ctxt parser context
14995: * NOTE that the file descriptor will not be closed when the
14996: * reader is closed or reset.
14997: *
14998: * Returns the resulting document tree
14999: */
15000: xmlDocPtr
15001: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15002: const char *URL, const char *encoding, int options)
15003: {
15004: xmlParserInputBufferPtr input;
15005: xmlParserInputPtr stream;
15006:
15007: if (fd < 0)
15008: return (NULL);
15009: if (ctxt == NULL)
15010: return (NULL);
15011:
15012: xmlCtxtReset(ctxt);
15013:
15014:
15015: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15016: if (input == NULL)
15017: return (NULL);
15018: input->closecallback = NULL;
15019: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15020: if (stream == NULL) {
15021: xmlFreeParserInputBuffer(input);
15022: return (NULL);
15023: }
15024: inputPush(ctxt, stream);
15025: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15026: }
15027:
15028: /**
15029: * xmlCtxtReadIO:
15030: * @ctxt: an XML parser context
15031: * @ioread: an I/O read function
15032: * @ioclose: an I/O close function
15033: * @ioctx: an I/O handler
15034: * @URL: the base URL to use for the document
15035: * @encoding: the document encoding, or NULL
15036: * @options: a combination of xmlParserOption
15037: *
15038: * parse an XML document from I/O functions and source and build a tree.
15039: * This reuses the existing @ctxt parser context
1.1.1.2 misho 15040: *
1.1 misho 15041: * Returns the resulting document tree
15042: */
15043: xmlDocPtr
15044: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15045: xmlInputCloseCallback ioclose, void *ioctx,
15046: const char *URL,
15047: const char *encoding, int options)
15048: {
15049: xmlParserInputBufferPtr input;
15050: xmlParserInputPtr stream;
15051:
15052: if (ioread == NULL)
15053: return (NULL);
15054: if (ctxt == NULL)
15055: return (NULL);
15056:
15057: xmlCtxtReset(ctxt);
15058:
15059: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15060: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 15061: if (input == NULL) {
15062: if (ioclose != NULL)
15063: ioclose(ioctx);
1.1 misho 15064: return (NULL);
1.1.1.2 misho 15065: }
1.1 misho 15066: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15067: if (stream == NULL) {
15068: xmlFreeParserInputBuffer(input);
15069: return (NULL);
15070: }
15071: inputPush(ctxt, stream);
15072: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15073: }
15074:
15075: #define bottom_parser
15076: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>