Annotation of embedaddon/libxml2/parser.c, revision 1.1.1.3.2.1
1.1 misho 1: /*
2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
4: *
5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscellaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
1.1.1.3 misho 20: * different ranges of character are actually implanted either in
1.1 misho 21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAX callbacks or as standalone functions using a preparsed
26: * document.
27: *
28: * See Copyright for the status of this software.
29: *
30: * daniel@veillard.com
31: */
32:
33: #define IN_LIBXML
34: #include "libxml.h"
35:
36: #if defined(WIN32) && !defined (__CYGWIN__)
37: #define XML_DIR_SEP '\\'
38: #else
39: #define XML_DIR_SEP '/'
40: #endif
41:
42: #include <stdlib.h>
1.1.1.3 misho 43: #include <limits.h>
1.1 misho 44: #include <string.h>
45: #include <stdarg.h>
46: #include <libxml/xmlmemory.h>
47: #include <libxml/threads.h>
48: #include <libxml/globals.h>
49: #include <libxml/tree.h>
50: #include <libxml/parser.h>
51: #include <libxml/parserInternals.h>
52: #include <libxml/valid.h>
53: #include <libxml/entities.h>
54: #include <libxml/xmlerror.h>
55: #include <libxml/encoding.h>
56: #include <libxml/xmlIO.h>
57: #include <libxml/uri.h>
58: #ifdef LIBXML_CATALOG_ENABLED
59: #include <libxml/catalog.h>
60: #endif
61: #ifdef LIBXML_SCHEMAS_ENABLED
62: #include <libxml/xmlschemastypes.h>
63: #include <libxml/relaxng.h>
64: #endif
65: #ifdef HAVE_CTYPE_H
66: #include <ctype.h>
67: #endif
68: #ifdef HAVE_STDLIB_H
69: #include <stdlib.h>
70: #endif
71: #ifdef HAVE_SYS_STAT_H
72: #include <sys/stat.h>
73: #endif
74: #ifdef HAVE_FCNTL_H
75: #include <fcntl.h>
76: #endif
77: #ifdef HAVE_UNISTD_H
78: #include <unistd.h>
79: #endif
80: #ifdef HAVE_ZLIB_H
81: #include <zlib.h>
82: #endif
1.1.1.2 misho 83: #ifdef HAVE_LZMA_H
84: #include <lzma.h>
85: #endif
1.1 misho 86:
1.1.1.3 misho 87: #include "buf.h"
88: #include "enc.h"
89:
1.1 misho 90: static void
91: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92:
93: static xmlParserCtxtPtr
94: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95: const xmlChar *base, xmlParserCtxtPtr pctx);
96:
97: /************************************************************************
98: * *
99: * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100: * *
101: ************************************************************************/
102:
103: #define XML_PARSER_BIG_ENTITY 1000
104: #define XML_PARSER_LOT_ENTITY 5000
105:
106: /*
107: * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108: * replacement over the size in byte of the input indicates that you have
109: * and eponential behaviour. A value of 10 correspond to at least 3 entity
110: * replacement per byte of input.
111: */
112: #define XML_PARSER_NON_LINEAR 10
113:
114: /*
115: * xmlParserEntityCheck
116: *
117: * Function to check non-linear entity expansion behaviour
118: * This is here to detect and stop exponential linear entity expansion
119: * This is not a limitation of the parser but a safety
120: * boundary feature. It can be disabled with the XML_PARSE_HUGE
121: * parser option.
122: */
123: static int
1.1.1.3 misho 124: xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125: xmlEntityPtr ent, size_t replacement)
1.1 misho 126: {
1.1.1.3 misho 127: size_t consumed = 0;
1.1 misho 128:
129: if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130: return (0);
131: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132: return (1);
1.1.1.3 misho 133: if (replacement != 0) {
134: if (replacement < XML_MAX_TEXT_LENGTH)
135: return(0);
136:
137: /*
138: * If the volume of entity copy reaches 10 times the
139: * amount of parsed data and over the large text threshold
140: * then that's very likely to be an abuse.
141: */
142: if (ctxt->input != NULL) {
143: consumed = ctxt->input->consumed +
144: (ctxt->input->cur - ctxt->input->base);
145: }
146: consumed += ctxt->sizeentities;
147:
148: if (replacement < XML_PARSER_NON_LINEAR * consumed)
149: return(0);
150: } else if (size != 0) {
1.1 misho 151: /*
152: * Do the check based on the replacement size of the entity
153: */
154: if (size < XML_PARSER_BIG_ENTITY)
155: return(0);
156:
157: /*
158: * A limit on the amount of text data reasonably used
159: */
160: if (ctxt->input != NULL) {
161: consumed = ctxt->input->consumed +
162: (ctxt->input->cur - ctxt->input->base);
163: }
164: consumed += ctxt->sizeentities;
165:
166: if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167: (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168: return (0);
169: } else if (ent != NULL) {
170: /*
171: * use the number of parsed entities in the replacement
172: */
1.1.1.3 misho 173: size = ent->checked / 2;
1.1 misho 174:
175: /*
176: * The amount of data parsed counting entities size only once
177: */
178: if (ctxt->input != NULL) {
179: consumed = ctxt->input->consumed +
180: (ctxt->input->cur - ctxt->input->base);
181: }
182: consumed += ctxt->sizeentities;
183:
184: /*
185: * Check the density of entities for the amount of data
186: * knowing an entity reference will take at least 3 bytes
187: */
188: if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189: return (0);
190: } else {
191: /*
192: * strange we got no data for checking just return
193: */
194: return (0);
195: }
196: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197: return (1);
198: }
199:
200: /**
201: * xmlParserMaxDepth:
202: *
203: * arbitrary depth limit for the XML documents that we allow to
204: * process. This is not a limitation of the parser but a safety
205: * boundary feature. It can be disabled with the XML_PARSE_HUGE
206: * parser option.
207: */
208: unsigned int xmlParserMaxDepth = 256;
209:
210:
211:
212: #define SAX2 1
213: #define XML_PARSER_BIG_BUFFER_SIZE 300
214: #define XML_PARSER_BUFFER_SIZE 100
215: #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216:
1.1.1.3 misho 217: /**
218: * XML_PARSER_CHUNK_SIZE
219: *
220: * When calling GROW that's the minimal amount of data
221: * the parser expected to have received. It is not a hard
222: * limit but an optimization when reading strings like Names
223: * It is not strictly needed as long as inputs available characters
224: * are followed by 0, which should be provided by the I/O level
225: */
226: #define XML_PARSER_CHUNK_SIZE 100
227:
1.1 misho 228: /*
229: * List of XML prefixed PI allowed by W3C specs
230: */
231:
232: static const char *xmlW3CPIs[] = {
233: "xml-stylesheet",
1.1.1.2 misho 234: "xml-model",
1.1 misho 235: NULL
236: };
237:
238:
239: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
240: static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241: const xmlChar **str);
242:
243: static xmlParserErrors
244: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245: xmlSAXHandlerPtr sax,
246: void *user_data, int depth, const xmlChar *URL,
247: const xmlChar *ID, xmlNodePtr *list);
248:
249: static int
250: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251: const char *encoding);
252: #ifdef LIBXML_LEGACY_ENABLED
253: static void
254: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255: xmlNodePtr lastNode);
256: #endif /* LIBXML_LEGACY_ENABLED */
257:
258: static xmlParserErrors
259: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260: const xmlChar *string, void *user_data, xmlNodePtr *lst);
261:
262: static int
263: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264:
265: /************************************************************************
266: * *
1.1.1.3 misho 267: * Some factorized error routines *
1.1 misho 268: * *
269: ************************************************************************/
270:
271: /**
272: * xmlErrAttributeDup:
273: * @ctxt: an XML parser context
274: * @prefix: the attribute prefix
275: * @localname: the attribute localname
276: *
277: * Handle a redefinition of attribute error
278: */
279: static void
280: xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281: const xmlChar * localname)
282: {
283: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284: (ctxt->instate == XML_PARSER_EOF))
285: return;
286: if (ctxt != NULL)
287: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
288:
289: if (prefix == NULL)
290: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
291: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
292: (const char *) localname, NULL, NULL, 0, 0,
293: "Attribute %s redefined\n", localname);
294: else
295: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
296: XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
297: (const char *) prefix, (const char *) localname,
298: NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299: localname);
300: if (ctxt != NULL) {
301: ctxt->wellFormed = 0;
302: if (ctxt->recovery == 0)
303: ctxt->disableSAX = 1;
304: }
305: }
306:
307: /**
308: * xmlFatalErr:
309: * @ctxt: an XML parser context
310: * @error: the error number
311: * @extra: extra information string
312: *
313: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314: */
315: static void
316: xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
317: {
318: const char *errmsg;
1.1.1.3 misho 319: char errstr[129] = "";
1.1 misho 320:
321: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322: (ctxt->instate == XML_PARSER_EOF))
323: return;
324: switch (error) {
325: case XML_ERR_INVALID_HEX_CHARREF:
1.1.1.3 misho 326: errmsg = "CharRef: invalid hexadecimal value";
1.1 misho 327: break;
328: case XML_ERR_INVALID_DEC_CHARREF:
1.1.1.3 misho 329: errmsg = "CharRef: invalid decimal value";
1.1 misho 330: break;
331: case XML_ERR_INVALID_CHARREF:
1.1.1.3 misho 332: errmsg = "CharRef: invalid value";
1.1 misho 333: break;
334: case XML_ERR_INTERNAL_ERROR:
335: errmsg = "internal error";
336: break;
337: case XML_ERR_PEREF_AT_EOF:
1.1.1.3 misho 338: errmsg = "PEReference at end of document";
1.1 misho 339: break;
340: case XML_ERR_PEREF_IN_PROLOG:
1.1.1.3 misho 341: errmsg = "PEReference in prolog";
1.1 misho 342: break;
343: case XML_ERR_PEREF_IN_EPILOG:
1.1.1.3 misho 344: errmsg = "PEReference in epilog";
1.1 misho 345: break;
346: case XML_ERR_PEREF_NO_NAME:
1.1.1.3 misho 347: errmsg = "PEReference: no name";
1.1 misho 348: break;
349: case XML_ERR_PEREF_SEMICOL_MISSING:
1.1.1.3 misho 350: errmsg = "PEReference: expecting ';'";
1.1 misho 351: break;
352: case XML_ERR_ENTITY_LOOP:
1.1.1.3 misho 353: errmsg = "Detected an entity reference loop";
1.1 misho 354: break;
355: case XML_ERR_ENTITY_NOT_STARTED:
1.1.1.3 misho 356: errmsg = "EntityValue: \" or ' expected";
1.1 misho 357: break;
358: case XML_ERR_ENTITY_PE_INTERNAL:
1.1.1.3 misho 359: errmsg = "PEReferences forbidden in internal subset";
1.1 misho 360: break;
361: case XML_ERR_ENTITY_NOT_FINISHED:
1.1.1.3 misho 362: errmsg = "EntityValue: \" or ' expected";
1.1 misho 363: break;
364: case XML_ERR_ATTRIBUTE_NOT_STARTED:
1.1.1.3 misho 365: errmsg = "AttValue: \" or ' expected";
1.1 misho 366: break;
367: case XML_ERR_LT_IN_ATTRIBUTE:
1.1.1.3 misho 368: errmsg = "Unescaped '<' not allowed in attributes values";
1.1 misho 369: break;
370: case XML_ERR_LITERAL_NOT_STARTED:
1.1.1.3 misho 371: errmsg = "SystemLiteral \" or ' expected";
1.1 misho 372: break;
373: case XML_ERR_LITERAL_NOT_FINISHED:
1.1.1.3 misho 374: errmsg = "Unfinished System or Public ID \" or ' expected";
1.1 misho 375: break;
376: case XML_ERR_MISPLACED_CDATA_END:
1.1.1.3 misho 377: errmsg = "Sequence ']]>' not allowed in content";
1.1 misho 378: break;
379: case XML_ERR_URI_REQUIRED:
1.1.1.3 misho 380: errmsg = "SYSTEM or PUBLIC, the URI is missing";
1.1 misho 381: break;
382: case XML_ERR_PUBID_REQUIRED:
1.1.1.3 misho 383: errmsg = "PUBLIC, the Public Identifier is missing";
1.1 misho 384: break;
385: case XML_ERR_HYPHEN_IN_COMMENT:
1.1.1.3 misho 386: errmsg = "Comment must not contain '--' (double-hyphen)";
1.1 misho 387: break;
388: case XML_ERR_PI_NOT_STARTED:
1.1.1.3 misho 389: errmsg = "xmlParsePI : no target name";
1.1 misho 390: break;
391: case XML_ERR_RESERVED_XML_NAME:
1.1.1.3 misho 392: errmsg = "Invalid PI name";
1.1 misho 393: break;
394: case XML_ERR_NOTATION_NOT_STARTED:
1.1.1.3 misho 395: errmsg = "NOTATION: Name expected here";
1.1 misho 396: break;
397: case XML_ERR_NOTATION_NOT_FINISHED:
1.1.1.3 misho 398: errmsg = "'>' required to close NOTATION declaration";
1.1 misho 399: break;
400: case XML_ERR_VALUE_REQUIRED:
1.1.1.3 misho 401: errmsg = "Entity value required";
1.1 misho 402: break;
403: case XML_ERR_URI_FRAGMENT:
404: errmsg = "Fragment not allowed";
405: break;
406: case XML_ERR_ATTLIST_NOT_STARTED:
1.1.1.3 misho 407: errmsg = "'(' required to start ATTLIST enumeration";
1.1 misho 408: break;
409: case XML_ERR_NMTOKEN_REQUIRED:
1.1.1.3 misho 410: errmsg = "NmToken expected in ATTLIST enumeration";
1.1 misho 411: break;
412: case XML_ERR_ATTLIST_NOT_FINISHED:
1.1.1.3 misho 413: errmsg = "')' required to finish ATTLIST enumeration";
1.1 misho 414: break;
415: case XML_ERR_MIXED_NOT_STARTED:
1.1.1.3 misho 416: errmsg = "MixedContentDecl : '|' or ')*' expected";
1.1 misho 417: break;
418: case XML_ERR_PCDATA_REQUIRED:
1.1.1.3 misho 419: errmsg = "MixedContentDecl : '#PCDATA' expected";
1.1 misho 420: break;
421: case XML_ERR_ELEMCONTENT_NOT_STARTED:
1.1.1.3 misho 422: errmsg = "ContentDecl : Name or '(' expected";
1.1 misho 423: break;
424: case XML_ERR_ELEMCONTENT_NOT_FINISHED:
1.1.1.3 misho 425: errmsg = "ContentDecl : ',' '|' or ')' expected";
1.1 misho 426: break;
427: case XML_ERR_PEREF_IN_INT_SUBSET:
428: errmsg =
1.1.1.3 misho 429: "PEReference: forbidden within markup decl in internal subset";
1.1 misho 430: break;
431: case XML_ERR_GT_REQUIRED:
1.1.1.3 misho 432: errmsg = "expected '>'";
1.1 misho 433: break;
434: case XML_ERR_CONDSEC_INVALID:
1.1.1.3 misho 435: errmsg = "XML conditional section '[' expected";
1.1 misho 436: break;
437: case XML_ERR_EXT_SUBSET_NOT_FINISHED:
1.1.1.3 misho 438: errmsg = "Content error in the external subset";
1.1 misho 439: break;
440: case XML_ERR_CONDSEC_INVALID_KEYWORD:
441: errmsg =
1.1.1.3 misho 442: "conditional section INCLUDE or IGNORE keyword expected";
1.1 misho 443: break;
444: case XML_ERR_CONDSEC_NOT_FINISHED:
1.1.1.3 misho 445: errmsg = "XML conditional section not closed";
1.1 misho 446: break;
447: case XML_ERR_XMLDECL_NOT_STARTED:
1.1.1.3 misho 448: errmsg = "Text declaration '<?xml' required";
1.1 misho 449: break;
450: case XML_ERR_XMLDECL_NOT_FINISHED:
1.1.1.3 misho 451: errmsg = "parsing XML declaration: '?>' expected";
1.1 misho 452: break;
453: case XML_ERR_EXT_ENTITY_STANDALONE:
1.1.1.3 misho 454: errmsg = "external parsed entities cannot be standalone";
1.1 misho 455: break;
456: case XML_ERR_ENTITYREF_SEMICOL_MISSING:
1.1.1.3 misho 457: errmsg = "EntityRef: expecting ';'";
1.1 misho 458: break;
459: case XML_ERR_DOCTYPE_NOT_FINISHED:
1.1.1.3 misho 460: errmsg = "DOCTYPE improperly terminated";
1.1 misho 461: break;
462: case XML_ERR_LTSLASH_REQUIRED:
1.1.1.3 misho 463: errmsg = "EndTag: '</' not found";
1.1 misho 464: break;
465: case XML_ERR_EQUAL_REQUIRED:
1.1.1.3 misho 466: errmsg = "expected '='";
1.1 misho 467: break;
468: case XML_ERR_STRING_NOT_CLOSED:
1.1.1.3 misho 469: errmsg = "String not closed expecting \" or '";
1.1 misho 470: break;
471: case XML_ERR_STRING_NOT_STARTED:
1.1.1.3 misho 472: errmsg = "String not started expecting ' or \"";
1.1 misho 473: break;
474: case XML_ERR_ENCODING_NAME:
1.1.1.3 misho 475: errmsg = "Invalid XML encoding name";
1.1 misho 476: break;
477: case XML_ERR_STANDALONE_VALUE:
1.1.1.3 misho 478: errmsg = "standalone accepts only 'yes' or 'no'";
1.1 misho 479: break;
480: case XML_ERR_DOCUMENT_EMPTY:
1.1.1.3 misho 481: errmsg = "Document is empty";
1.1 misho 482: break;
483: case XML_ERR_DOCUMENT_END:
1.1.1.3 misho 484: errmsg = "Extra content at the end of the document";
1.1 misho 485: break;
486: case XML_ERR_NOT_WELL_BALANCED:
1.1.1.3 misho 487: errmsg = "chunk is not well balanced";
1.1 misho 488: break;
489: case XML_ERR_EXTRA_CONTENT:
1.1.1.3 misho 490: errmsg = "extra content at the end of well balanced chunk";
1.1 misho 491: break;
492: case XML_ERR_VERSION_MISSING:
1.1.1.3 misho 493: errmsg = "Malformed declaration expecting version";
494: break;
495: case XML_ERR_NAME_TOO_LONG:
496: errmsg = "Name too long use XML_PARSE_HUGE option";
1.1 misho 497: break;
498: #if 0
499: case:
1.1.1.3 misho 500: errmsg = "";
1.1 misho 501: break;
502: #endif
503: default:
1.1.1.3 misho 504: errmsg = "Unregistered error message";
1.1 misho 505: }
1.1.1.3 misho 506: if (info == NULL)
507: snprintf(errstr, 128, "%s\n", errmsg);
508: else
509: snprintf(errstr, 128, "%s: %%s\n", errmsg);
1.1 misho 510: if (ctxt != NULL)
511: ctxt->errNo = error;
512: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
1.1.1.3 misho 513: XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
1.1 misho 514: info);
515: if (ctxt != NULL) {
516: ctxt->wellFormed = 0;
517: if (ctxt->recovery == 0)
518: ctxt->disableSAX = 1;
519: }
520: }
521:
522: /**
523: * xmlFatalErrMsg:
524: * @ctxt: an XML parser context
525: * @error: the error number
526: * @msg: the error message
527: *
528: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529: */
530: static void
531: xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532: const char *msg)
533: {
534: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535: (ctxt->instate == XML_PARSER_EOF))
536: return;
537: if (ctxt != NULL)
538: ctxt->errNo = error;
539: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
540: XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
541: if (ctxt != NULL) {
542: ctxt->wellFormed = 0;
543: if (ctxt->recovery == 0)
544: ctxt->disableSAX = 1;
545: }
546: }
547:
548: /**
549: * xmlWarningMsg:
550: * @ctxt: an XML parser context
551: * @error: the error number
552: * @msg: the error message
553: * @str1: extra data
554: * @str2: extra data
555: *
556: * Handle a warning.
557: */
558: static void
559: xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560: const char *msg, const xmlChar *str1, const xmlChar *str2)
561: {
562: xmlStructuredErrorFunc schannel = NULL;
563:
564: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565: (ctxt->instate == XML_PARSER_EOF))
566: return;
567: if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568: (ctxt->sax->initialized == XML_SAX2_MAGIC))
569: schannel = ctxt->sax->serror;
570: if (ctxt != NULL) {
571: __xmlRaiseError(schannel,
572: (ctxt->sax) ? ctxt->sax->warning : NULL,
573: ctxt->userData,
574: ctxt, NULL, XML_FROM_PARSER, error,
575: XML_ERR_WARNING, NULL, 0,
576: (const char *) str1, (const char *) str2, NULL, 0, 0,
577: msg, (const char *) str1, (const char *) str2);
578: } else {
579: __xmlRaiseError(schannel, NULL, NULL,
580: ctxt, NULL, XML_FROM_PARSER, error,
581: XML_ERR_WARNING, NULL, 0,
582: (const char *) str1, (const char *) str2, NULL, 0, 0,
583: msg, (const char *) str1, (const char *) str2);
584: }
585: }
586:
587: /**
588: * xmlValidityError:
589: * @ctxt: an XML parser context
590: * @error: the error number
591: * @msg: the error message
592: * @str1: extra data
593: *
594: * Handle a validity error.
595: */
596: static void
597: xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598: const char *msg, const xmlChar *str1, const xmlChar *str2)
599: {
600: xmlStructuredErrorFunc schannel = NULL;
601:
602: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603: (ctxt->instate == XML_PARSER_EOF))
604: return;
605: if (ctxt != NULL) {
606: ctxt->errNo = error;
607: if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608: schannel = ctxt->sax->serror;
609: }
610: if (ctxt != NULL) {
611: __xmlRaiseError(schannel,
612: ctxt->vctxt.error, ctxt->vctxt.userData,
613: ctxt, NULL, XML_FROM_DTD, error,
614: XML_ERR_ERROR, NULL, 0, (const char *) str1,
615: (const char *) str2, NULL, 0, 0,
616: msg, (const char *) str1, (const char *) str2);
617: ctxt->valid = 0;
618: } else {
619: __xmlRaiseError(schannel, NULL, NULL,
620: ctxt, NULL, XML_FROM_DTD, error,
621: XML_ERR_ERROR, NULL, 0, (const char *) str1,
622: (const char *) str2, NULL, 0, 0,
623: msg, (const char *) str1, (const char *) str2);
624: }
625: }
626:
627: /**
628: * xmlFatalErrMsgInt:
629: * @ctxt: an XML parser context
630: * @error: the error number
631: * @msg: the error message
632: * @val: an integer value
633: *
634: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635: */
636: static void
637: xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
638: const char *msg, int val)
639: {
640: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641: (ctxt->instate == XML_PARSER_EOF))
642: return;
643: if (ctxt != NULL)
644: ctxt->errNo = error;
645: __xmlRaiseError(NULL, NULL, NULL,
646: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
648: if (ctxt != NULL) {
649: ctxt->wellFormed = 0;
650: if (ctxt->recovery == 0)
651: ctxt->disableSAX = 1;
652: }
653: }
654:
655: /**
656: * xmlFatalErrMsgStrIntStr:
657: * @ctxt: an XML parser context
658: * @error: the error number
659: * @msg: the error message
660: * @str1: an string info
661: * @val: an integer value
662: * @str2: an string info
663: *
664: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665: */
666: static void
667: xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
1.1.1.3 misho 668: const char *msg, const xmlChar *str1, int val,
1.1 misho 669: const xmlChar *str2)
670: {
671: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672: (ctxt->instate == XML_PARSER_EOF))
673: return;
674: if (ctxt != NULL)
675: ctxt->errNo = error;
676: __xmlRaiseError(NULL, NULL, NULL,
677: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678: NULL, 0, (const char *) str1, (const char *) str2,
679: NULL, val, 0, msg, str1, val, str2);
680: if (ctxt != NULL) {
681: ctxt->wellFormed = 0;
682: if (ctxt->recovery == 0)
683: ctxt->disableSAX = 1;
684: }
685: }
686:
687: /**
688: * xmlFatalErrMsgStr:
689: * @ctxt: an XML parser context
690: * @error: the error number
691: * @msg: the error message
692: * @val: a string value
693: *
694: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695: */
696: static void
697: xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
698: const char *msg, const xmlChar * val)
699: {
700: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701: (ctxt->instate == XML_PARSER_EOF))
702: return;
703: if (ctxt != NULL)
704: ctxt->errNo = error;
705: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
706: XML_FROM_PARSER, error, XML_ERR_FATAL,
707: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708: val);
709: if (ctxt != NULL) {
710: ctxt->wellFormed = 0;
711: if (ctxt->recovery == 0)
712: ctxt->disableSAX = 1;
713: }
714: }
715:
716: /**
717: * xmlErrMsgStr:
718: * @ctxt: an XML parser context
719: * @error: the error number
720: * @msg: the error message
721: * @val: a string value
722: *
723: * Handle a non fatal parser error
724: */
725: static void
726: xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727: const char *msg, const xmlChar * val)
728: {
729: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730: (ctxt->instate == XML_PARSER_EOF))
731: return;
732: if (ctxt != NULL)
733: ctxt->errNo = error;
734: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
735: XML_FROM_PARSER, error, XML_ERR_ERROR,
736: NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737: val);
738: }
739:
740: /**
741: * xmlNsErr:
742: * @ctxt: an XML parser context
743: * @error: the error number
744: * @msg: the message
745: * @info1: extra information string
746: * @info2: extra information string
747: *
748: * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749: */
750: static void
751: xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752: const char *msg,
753: const xmlChar * info1, const xmlChar * info2,
754: const xmlChar * info3)
755: {
756: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757: (ctxt->instate == XML_PARSER_EOF))
758: return;
759: if (ctxt != NULL)
760: ctxt->errNo = error;
761: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
762: XML_ERR_ERROR, NULL, 0, (const char *) info1,
763: (const char *) info2, (const char *) info3, 0, 0, msg,
764: info1, info2, info3);
765: if (ctxt != NULL)
766: ctxt->nsWellFormed = 0;
767: }
768:
769: /**
770: * xmlNsWarn
771: * @ctxt: an XML parser context
772: * @error: the error number
773: * @msg: the message
774: * @info1: extra information string
775: * @info2: extra information string
776: *
1.1.1.2 misho 777: * Handle a namespace warning error
1.1 misho 778: */
779: static void
780: xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781: const char *msg,
782: const xmlChar * info1, const xmlChar * info2,
783: const xmlChar * info3)
784: {
785: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786: (ctxt->instate == XML_PARSER_EOF))
787: return;
788: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789: XML_ERR_WARNING, NULL, 0, (const char *) info1,
790: (const char *) info2, (const char *) info3, 0, 0, msg,
791: info1, info2, info3);
792: }
793:
794: /************************************************************************
795: * *
1.1.1.3 misho 796: * Library wide options *
1.1 misho 797: * *
798: ************************************************************************/
799:
800: /**
801: * xmlHasFeature:
802: * @feature: the feature to be examined
803: *
804: * Examines if the library has been compiled with a given feature.
805: *
806: * Returns a non-zero value if the feature exist, otherwise zero.
807: * Returns zero (0) if the feature does not exist or an unknown
808: * unknown feature is requested, non-zero otherwise.
809: */
810: int
811: xmlHasFeature(xmlFeature feature)
812: {
813: switch (feature) {
814: case XML_WITH_THREAD:
815: #ifdef LIBXML_THREAD_ENABLED
816: return(1);
817: #else
818: return(0);
819: #endif
820: case XML_WITH_TREE:
821: #ifdef LIBXML_TREE_ENABLED
822: return(1);
823: #else
824: return(0);
825: #endif
826: case XML_WITH_OUTPUT:
827: #ifdef LIBXML_OUTPUT_ENABLED
828: return(1);
829: #else
830: return(0);
831: #endif
832: case XML_WITH_PUSH:
833: #ifdef LIBXML_PUSH_ENABLED
834: return(1);
835: #else
836: return(0);
837: #endif
838: case XML_WITH_READER:
839: #ifdef LIBXML_READER_ENABLED
840: return(1);
841: #else
842: return(0);
843: #endif
844: case XML_WITH_PATTERN:
845: #ifdef LIBXML_PATTERN_ENABLED
846: return(1);
847: #else
848: return(0);
849: #endif
850: case XML_WITH_WRITER:
851: #ifdef LIBXML_WRITER_ENABLED
852: return(1);
853: #else
854: return(0);
855: #endif
856: case XML_WITH_SAX1:
857: #ifdef LIBXML_SAX1_ENABLED
858: return(1);
859: #else
860: return(0);
861: #endif
862: case XML_WITH_FTP:
863: #ifdef LIBXML_FTP_ENABLED
864: return(1);
865: #else
866: return(0);
867: #endif
868: case XML_WITH_HTTP:
869: #ifdef LIBXML_HTTP_ENABLED
870: return(1);
871: #else
872: return(0);
873: #endif
874: case XML_WITH_VALID:
875: #ifdef LIBXML_VALID_ENABLED
876: return(1);
877: #else
878: return(0);
879: #endif
880: case XML_WITH_HTML:
881: #ifdef LIBXML_HTML_ENABLED
882: return(1);
883: #else
884: return(0);
885: #endif
886: case XML_WITH_LEGACY:
887: #ifdef LIBXML_LEGACY_ENABLED
888: return(1);
889: #else
890: return(0);
891: #endif
892: case XML_WITH_C14N:
893: #ifdef LIBXML_C14N_ENABLED
894: return(1);
895: #else
896: return(0);
897: #endif
898: case XML_WITH_CATALOG:
899: #ifdef LIBXML_CATALOG_ENABLED
900: return(1);
901: #else
902: return(0);
903: #endif
904: case XML_WITH_XPATH:
905: #ifdef LIBXML_XPATH_ENABLED
906: return(1);
907: #else
908: return(0);
909: #endif
910: case XML_WITH_XPTR:
911: #ifdef LIBXML_XPTR_ENABLED
912: return(1);
913: #else
914: return(0);
915: #endif
916: case XML_WITH_XINCLUDE:
917: #ifdef LIBXML_XINCLUDE_ENABLED
918: return(1);
919: #else
920: return(0);
921: #endif
922: case XML_WITH_ICONV:
923: #ifdef LIBXML_ICONV_ENABLED
924: return(1);
925: #else
926: return(0);
927: #endif
928: case XML_WITH_ISO8859X:
929: #ifdef LIBXML_ISO8859X_ENABLED
930: return(1);
931: #else
932: return(0);
933: #endif
934: case XML_WITH_UNICODE:
935: #ifdef LIBXML_UNICODE_ENABLED
936: return(1);
937: #else
938: return(0);
939: #endif
940: case XML_WITH_REGEXP:
941: #ifdef LIBXML_REGEXP_ENABLED
942: return(1);
943: #else
944: return(0);
945: #endif
946: case XML_WITH_AUTOMATA:
947: #ifdef LIBXML_AUTOMATA_ENABLED
948: return(1);
949: #else
950: return(0);
951: #endif
952: case XML_WITH_EXPR:
953: #ifdef LIBXML_EXPR_ENABLED
954: return(1);
955: #else
956: return(0);
957: #endif
958: case XML_WITH_SCHEMAS:
959: #ifdef LIBXML_SCHEMAS_ENABLED
960: return(1);
961: #else
962: return(0);
963: #endif
964: case XML_WITH_SCHEMATRON:
965: #ifdef LIBXML_SCHEMATRON_ENABLED
966: return(1);
967: #else
968: return(0);
969: #endif
970: case XML_WITH_MODULES:
971: #ifdef LIBXML_MODULES_ENABLED
972: return(1);
973: #else
974: return(0);
975: #endif
976: case XML_WITH_DEBUG:
977: #ifdef LIBXML_DEBUG_ENABLED
978: return(1);
979: #else
980: return(0);
981: #endif
982: case XML_WITH_DEBUG_MEM:
983: #ifdef DEBUG_MEMORY_LOCATION
984: return(1);
985: #else
986: return(0);
987: #endif
988: case XML_WITH_DEBUG_RUN:
989: #ifdef LIBXML_DEBUG_RUNTIME
990: return(1);
991: #else
992: return(0);
993: #endif
994: case XML_WITH_ZLIB:
995: #ifdef LIBXML_ZLIB_ENABLED
996: return(1);
997: #else
998: return(0);
999: #endif
1.1.1.2 misho 1000: case XML_WITH_LZMA:
1001: #ifdef LIBXML_LZMA_ENABLED
1002: return(1);
1003: #else
1004: return(0);
1005: #endif
1.1 misho 1006: case XML_WITH_ICU:
1007: #ifdef LIBXML_ICU_ENABLED
1008: return(1);
1009: #else
1010: return(0);
1011: #endif
1012: default:
1013: break;
1014: }
1015: return(0);
1016: }
1017:
1018: /************************************************************************
1019: * *
1.1.1.3 misho 1020: * SAX2 defaulted attributes handling *
1.1 misho 1021: * *
1022: ************************************************************************/
1023:
1024: /**
1025: * xmlDetectSAX2:
1026: * @ctxt: an XML parser context
1027: *
1028: * Do the SAX2 detection and specific intialization
1029: */
1030: static void
1031: xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032: if (ctxt == NULL) return;
1033: #ifdef LIBXML_SAX1_ENABLED
1034: if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035: ((ctxt->sax->startElementNs != NULL) ||
1036: (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1037: #else
1038: ctxt->sax2 = 1;
1039: #endif /* LIBXML_SAX1_ENABLED */
1040:
1041: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1.1.1.3 misho 1044: if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045: (ctxt->str_xml_ns == NULL)) {
1.1 misho 1046: xmlErrMemory(ctxt, NULL);
1047: }
1048: }
1049:
1050: typedef struct _xmlDefAttrs xmlDefAttrs;
1051: typedef xmlDefAttrs *xmlDefAttrsPtr;
1052: struct _xmlDefAttrs {
1053: int nbAttrs; /* number of defaulted attributes on that element */
1054: int maxAttrs; /* the size of the array */
1055: const xmlChar *values[5]; /* array of localname/prefix/values/external */
1056: };
1057:
1058: /**
1059: * xmlAttrNormalizeSpace:
1060: * @src: the source string
1061: * @dst: the target string
1062: *
1063: * Normalize the space in non CDATA attribute values:
1064: * If the attribute type is not CDATA, then the XML processor MUST further
1065: * process the normalized attribute value by discarding any leading and
1066: * trailing space (#x20) characters, and by replacing sequences of space
1067: * (#x20) characters by a single space (#x20) character.
1068: * Note that the size of dst need to be at least src, and if one doesn't need
1069: * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070: * passing src as dst is just fine.
1071: *
1072: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073: * is needed.
1074: */
1075: static xmlChar *
1076: xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077: {
1078: if ((src == NULL) || (dst == NULL))
1079: return(NULL);
1080:
1081: while (*src == 0x20) src++;
1082: while (*src != 0) {
1083: if (*src == 0x20) {
1084: while (*src == 0x20) src++;
1085: if (*src != 0)
1086: *dst++ = 0x20;
1087: } else {
1088: *dst++ = *src++;
1089: }
1090: }
1091: *dst = 0;
1092: if (dst == src)
1093: return(NULL);
1094: return(dst);
1095: }
1096:
1097: /**
1098: * xmlAttrNormalizeSpace2:
1099: * @src: the source string
1100: *
1101: * Normalize the space in non CDATA attribute values, a slightly more complex
1102: * front end to avoid allocation problems when running on attribute values
1103: * coming from the input.
1104: *
1105: * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106: * is needed.
1107: */
1108: static const xmlChar *
1109: xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1110: {
1111: int i;
1112: int remove_head = 0;
1113: int need_realloc = 0;
1114: const xmlChar *cur;
1115:
1116: if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117: return(NULL);
1118: i = *len;
1119: if (i <= 0)
1120: return(NULL);
1121:
1122: cur = src;
1123: while (*cur == 0x20) {
1124: cur++;
1125: remove_head++;
1126: }
1127: while (*cur != 0) {
1128: if (*cur == 0x20) {
1129: cur++;
1130: if ((*cur == 0x20) || (*cur == 0)) {
1131: need_realloc = 1;
1132: break;
1133: }
1134: } else
1135: cur++;
1136: }
1137: if (need_realloc) {
1138: xmlChar *ret;
1139:
1140: ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141: if (ret == NULL) {
1142: xmlErrMemory(ctxt, NULL);
1143: return(NULL);
1144: }
1145: xmlAttrNormalizeSpace(ret, ret);
1146: *len = (int) strlen((const char *)ret);
1147: return(ret);
1148: } else if (remove_head) {
1149: *len -= remove_head;
1150: memmove(src, src + remove_head, 1 + *len);
1151: return(src);
1152: }
1153: return(NULL);
1154: }
1155:
1156: /**
1157: * xmlAddDefAttrs:
1158: * @ctxt: an XML parser context
1159: * @fullname: the element fullname
1160: * @fullattr: the attribute fullname
1161: * @value: the attribute value
1162: *
1163: * Add a defaulted attribute for an element
1164: */
1165: static void
1166: xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167: const xmlChar *fullname,
1168: const xmlChar *fullattr,
1169: const xmlChar *value) {
1170: xmlDefAttrsPtr defaults;
1171: int len;
1172: const xmlChar *name;
1173: const xmlChar *prefix;
1174:
1175: /*
1176: * Allows to detect attribute redefinitions
1177: */
1178: if (ctxt->attsSpecial != NULL) {
1179: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180: return;
1181: }
1182:
1183: if (ctxt->attsDefault == NULL) {
1184: ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1185: if (ctxt->attsDefault == NULL)
1186: goto mem_error;
1187: }
1188:
1189: /*
1190: * split the element name into prefix:localname , the string found
1191: * are within the DTD and then not associated to namespace names.
1192: */
1193: name = xmlSplitQName3(fullname, &len);
1194: if (name == NULL) {
1195: name = xmlDictLookup(ctxt->dict, fullname, -1);
1196: prefix = NULL;
1197: } else {
1198: name = xmlDictLookup(ctxt->dict, name, -1);
1199: prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200: }
1201:
1202: /*
1203: * make sure there is some storage
1204: */
1205: defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206: if (defaults == NULL) {
1207: defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1208: (4 * 5) * sizeof(const xmlChar *));
1209: if (defaults == NULL)
1210: goto mem_error;
1211: defaults->nbAttrs = 0;
1212: defaults->maxAttrs = 4;
1213: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214: defaults, NULL) < 0) {
1215: xmlFree(defaults);
1216: goto mem_error;
1217: }
1218: } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1219: xmlDefAttrsPtr temp;
1220:
1221: temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1222: (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1223: if (temp == NULL)
1224: goto mem_error;
1225: defaults = temp;
1226: defaults->maxAttrs *= 2;
1227: if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228: defaults, NULL) < 0) {
1229: xmlFree(defaults);
1230: goto mem_error;
1231: }
1232: }
1233:
1234: /*
1235: * Split the element name into prefix:localname , the string found
1236: * are within the DTD and hen not associated to namespace names.
1237: */
1238: name = xmlSplitQName3(fullattr, &len);
1239: if (name == NULL) {
1240: name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241: prefix = NULL;
1242: } else {
1243: name = xmlDictLookup(ctxt->dict, name, -1);
1244: prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245: }
1246:
1247: defaults->values[5 * defaults->nbAttrs] = name;
1248: defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1249: /* intern the string and precompute the end */
1250: len = xmlStrlen(value);
1251: value = xmlDictLookup(ctxt->dict, value, len);
1252: defaults->values[5 * defaults->nbAttrs + 2] = value;
1253: defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254: if (ctxt->external)
1255: defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256: else
1257: defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1258: defaults->nbAttrs++;
1259:
1260: return;
1261:
1262: mem_error:
1263: xmlErrMemory(ctxt, NULL);
1264: return;
1265: }
1266:
1267: /**
1268: * xmlAddSpecialAttr:
1269: * @ctxt: an XML parser context
1270: * @fullname: the element fullname
1271: * @fullattr: the attribute fullname
1272: * @type: the attribute type
1273: *
1274: * Register this attribute type
1275: */
1276: static void
1277: xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278: const xmlChar *fullname,
1279: const xmlChar *fullattr,
1280: int type)
1281: {
1282: if (ctxt->attsSpecial == NULL) {
1283: ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1284: if (ctxt->attsSpecial == NULL)
1285: goto mem_error;
1286: }
1287:
1288: if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289: return;
1290:
1291: xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292: (void *) (long) type);
1293: return;
1294:
1295: mem_error:
1296: xmlErrMemory(ctxt, NULL);
1297: return;
1298: }
1299:
1300: /**
1301: * xmlCleanSpecialAttrCallback:
1302: *
1303: * Removes CDATA attributes from the special attribute table
1304: */
1305: static void
1306: xmlCleanSpecialAttrCallback(void *payload, void *data,
1307: const xmlChar *fullname, const xmlChar *fullattr,
1308: const xmlChar *unused ATTRIBUTE_UNUSED) {
1309: xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310:
1311: if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1312: xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313: }
1314: }
1315:
1316: /**
1317: * xmlCleanSpecialAttr:
1318: * @ctxt: an XML parser context
1319: *
1320: * Trim the list of attributes defined to remove all those of type
1321: * CDATA as they are not special. This call should be done when finishing
1322: * to parse the DTD and before starting to parse the document root.
1323: */
1324: static void
1325: xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326: {
1327: if (ctxt->attsSpecial == NULL)
1328: return;
1329:
1330: xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331:
1332: if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333: xmlHashFree(ctxt->attsSpecial, NULL);
1334: ctxt->attsSpecial = NULL;
1335: }
1336: return;
1337: }
1338:
1339: /**
1340: * xmlCheckLanguageID:
1341: * @lang: pointer to the string value
1342: *
1343: * Checks that the value conforms to the LanguageID production:
1344: *
1345: * NOTE: this is somewhat deprecated, those productions were removed from
1346: * the XML Second edition.
1347: *
1348: * [33] LanguageID ::= Langcode ('-' Subcode)*
1349: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353: * [38] Subcode ::= ([a-z] | [A-Z])+
1354: *
1355: * The current REC reference the sucessors of RFC 1766, currently 5646
1356: *
1357: * http://www.rfc-editor.org/rfc/rfc5646.txt
1358: * langtag = language
1359: * ["-" script]
1360: * ["-" region]
1361: * *("-" variant)
1362: * *("-" extension)
1363: * ["-" privateuse]
1364: * language = 2*3ALPHA ; shortest ISO 639 code
1365: * ["-" extlang] ; sometimes followed by
1366: * ; extended language subtags
1367: * / 4ALPHA ; or reserved for future use
1368: * / 5*8ALPHA ; or registered language subtag
1369: *
1370: * extlang = 3ALPHA ; selected ISO 639 codes
1371: * *2("-" 3ALPHA) ; permanently reserved
1372: *
1373: * script = 4ALPHA ; ISO 15924 code
1374: *
1375: * region = 2ALPHA ; ISO 3166-1 code
1376: * / 3DIGIT ; UN M.49 code
1377: *
1378: * variant = 5*8alphanum ; registered variants
1379: * / (DIGIT 3alphanum)
1380: *
1381: * extension = singleton 1*("-" (2*8alphanum))
1382: *
1383: * ; Single alphanumerics
1384: * ; "x" reserved for private use
1385: * singleton = DIGIT ; 0 - 9
1386: * / %x41-57 ; A - W
1387: * / %x59-5A ; Y - Z
1388: * / %x61-77 ; a - w
1389: * / %x79-7A ; y - z
1390: *
1391: * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392: * The parser below doesn't try to cope with extension or privateuse
1393: * that could be added but that's not interoperable anyway
1394: *
1395: * Returns 1 if correct 0 otherwise
1396: **/
1397: int
1398: xmlCheckLanguageID(const xmlChar * lang)
1399: {
1400: const xmlChar *cur = lang, *nxt;
1401:
1402: if (cur == NULL)
1403: return (0);
1404: if (((cur[0] == 'i') && (cur[1] == '-')) ||
1405: ((cur[0] == 'I') && (cur[1] == '-')) ||
1406: ((cur[0] == 'x') && (cur[1] == '-')) ||
1407: ((cur[0] == 'X') && (cur[1] == '-'))) {
1408: /*
1409: * Still allow IANA code and user code which were coming
1410: * from the previous version of the XML-1.0 specification
1411: * it's deprecated but we should not fail
1412: */
1413: cur += 2;
1414: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1415: ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416: cur++;
1417: return(cur[0] == 0);
1418: }
1419: nxt = cur;
1420: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422: nxt++;
1423: if (nxt - cur >= 4) {
1424: /*
1425: * Reserved
1426: */
1427: if ((nxt - cur > 8) || (nxt[0] != 0))
1428: return(0);
1429: return(1);
1430: }
1431: if (nxt - cur < 2)
1432: return(0);
1433: /* we got an ISO 639 code */
1434: if (nxt[0] == 0)
1435: return(1);
1436: if (nxt[0] != '-')
1437: return(0);
1438:
1439: nxt++;
1440: cur = nxt;
1441: /* now we can have extlang or script or region or variant */
1442: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443: goto region_m49;
1444:
1445: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447: nxt++;
1448: if (nxt - cur == 4)
1449: goto script;
1450: if (nxt - cur == 2)
1451: goto region;
1452: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453: goto variant;
1454: if (nxt - cur != 3)
1455: return(0);
1456: /* we parsed an extlang */
1457: if (nxt[0] == 0)
1458: return(1);
1459: if (nxt[0] != '-')
1460: return(0);
1461:
1462: nxt++;
1463: cur = nxt;
1464: /* now we can have script or region or variant */
1465: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466: goto region_m49;
1467:
1468: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470: nxt++;
1471: if (nxt - cur == 2)
1472: goto region;
1473: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474: goto variant;
1475: if (nxt - cur != 4)
1476: return(0);
1477: /* we parsed a script */
1478: script:
1479: if (nxt[0] == 0)
1480: return(1);
1481: if (nxt[0] != '-')
1482: return(0);
1483:
1484: nxt++;
1485: cur = nxt;
1486: /* now we can have region or variant */
1487: if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488: goto region_m49;
1489:
1490: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492: nxt++;
1493:
1494: if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495: goto variant;
1496: if (nxt - cur != 2)
1497: return(0);
1498: /* we parsed a region */
1499: region:
1500: if (nxt[0] == 0)
1501: return(1);
1502: if (nxt[0] != '-')
1503: return(0);
1504:
1505: nxt++;
1506: cur = nxt;
1507: /* now we can just have a variant */
1508: while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509: ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510: nxt++;
1511:
1512: if ((nxt - cur < 5) || (nxt - cur > 8))
1513: return(0);
1514:
1515: /* we parsed a variant */
1516: variant:
1517: if (nxt[0] == 0)
1518: return(1);
1519: if (nxt[0] != '-')
1520: return(0);
1521: /* extensions and private use subtags not checked */
1522: return (1);
1523:
1524: region_m49:
1525: if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526: ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527: nxt += 3;
1528: goto region;
1529: }
1530: return(0);
1531: }
1532:
1533: /************************************************************************
1534: * *
1535: * Parser stacks related functions and macros *
1536: * *
1537: ************************************************************************/
1538:
1539: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540: const xmlChar ** str);
1541:
1542: #ifdef SAX2
1543: /**
1544: * nsPush:
1545: * @ctxt: an XML parser context
1546: * @prefix: the namespace prefix or NULL
1547: * @URL: the namespace name
1548: *
1549: * Pushes a new parser namespace on top of the ns stack
1550: *
1551: * Returns -1 in case of error, -2 if the namespace should be discarded
1552: * and the index in the stack otherwise.
1553: */
1554: static int
1555: nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556: {
1557: if (ctxt->options & XML_PARSE_NSCLEAN) {
1558: int i;
1.1.1.3 misho 1559: for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1.1 misho 1560: if (ctxt->nsTab[i] == prefix) {
1561: /* in scope */
1562: if (ctxt->nsTab[i + 1] == URL)
1563: return(-2);
1564: /* out of scope keep it */
1565: break;
1566: }
1567: }
1568: }
1569: if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570: ctxt->nsMax = 10;
1571: ctxt->nsNr = 0;
1572: ctxt->nsTab = (const xmlChar **)
1573: xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574: if (ctxt->nsTab == NULL) {
1575: xmlErrMemory(ctxt, NULL);
1576: ctxt->nsMax = 0;
1577: return (-1);
1578: }
1579: } else if (ctxt->nsNr >= ctxt->nsMax) {
1580: const xmlChar ** tmp;
1581: ctxt->nsMax *= 2;
1582: tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583: ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584: if (tmp == NULL) {
1585: xmlErrMemory(ctxt, NULL);
1586: ctxt->nsMax /= 2;
1587: return (-1);
1588: }
1589: ctxt->nsTab = tmp;
1590: }
1591: ctxt->nsTab[ctxt->nsNr++] = prefix;
1592: ctxt->nsTab[ctxt->nsNr++] = URL;
1593: return (ctxt->nsNr);
1594: }
1595: /**
1596: * nsPop:
1597: * @ctxt: an XML parser context
1598: * @nr: the number to pop
1599: *
1600: * Pops the top @nr parser prefix/namespace from the ns stack
1601: *
1602: * Returns the number of namespaces removed
1603: */
1604: static int
1605: nsPop(xmlParserCtxtPtr ctxt, int nr)
1606: {
1607: int i;
1608:
1609: if (ctxt->nsTab == NULL) return(0);
1610: if (ctxt->nsNr < nr) {
1611: xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612: nr = ctxt->nsNr;
1613: }
1614: if (ctxt->nsNr <= 0)
1615: return (0);
1616:
1617: for (i = 0;i < nr;i++) {
1618: ctxt->nsNr--;
1619: ctxt->nsTab[ctxt->nsNr] = NULL;
1620: }
1621: return(nr);
1622: }
1623: #endif
1624:
1625: static int
1626: xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627: const xmlChar **atts;
1628: int *attallocs;
1629: int maxatts;
1630:
1631: if (ctxt->atts == NULL) {
1632: maxatts = 55; /* allow for 10 attrs by default */
1633: atts = (const xmlChar **)
1634: xmlMalloc(maxatts * sizeof(xmlChar *));
1635: if (atts == NULL) goto mem_error;
1636: ctxt->atts = atts;
1637: attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638: if (attallocs == NULL) goto mem_error;
1639: ctxt->attallocs = attallocs;
1640: ctxt->maxatts = maxatts;
1641: } else if (nr + 5 > ctxt->maxatts) {
1642: maxatts = (nr + 5) * 2;
1643: atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644: maxatts * sizeof(const xmlChar *));
1645: if (atts == NULL) goto mem_error;
1646: ctxt->atts = atts;
1647: attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648: (maxatts / 5) * sizeof(int));
1649: if (attallocs == NULL) goto mem_error;
1650: ctxt->attallocs = attallocs;
1651: ctxt->maxatts = maxatts;
1652: }
1653: return(ctxt->maxatts);
1654: mem_error:
1655: xmlErrMemory(ctxt, NULL);
1656: return(-1);
1657: }
1658:
1659: /**
1660: * inputPush:
1661: * @ctxt: an XML parser context
1662: * @value: the parser input
1663: *
1664: * Pushes a new parser input on top of the input stack
1665: *
1666: * Returns -1 in case of error, the index in the stack otherwise
1667: */
1668: int
1669: inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670: {
1671: if ((ctxt == NULL) || (value == NULL))
1672: return(-1);
1673: if (ctxt->inputNr >= ctxt->inputMax) {
1674: ctxt->inputMax *= 2;
1675: ctxt->inputTab =
1676: (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677: ctxt->inputMax *
1678: sizeof(ctxt->inputTab[0]));
1679: if (ctxt->inputTab == NULL) {
1680: xmlErrMemory(ctxt, NULL);
1681: xmlFreeInputStream(value);
1682: ctxt->inputMax /= 2;
1683: value = NULL;
1684: return (-1);
1685: }
1686: }
1687: ctxt->inputTab[ctxt->inputNr] = value;
1688: ctxt->input = value;
1689: return (ctxt->inputNr++);
1690: }
1691: /**
1692: * inputPop:
1693: * @ctxt: an XML parser context
1694: *
1695: * Pops the top parser input from the input stack
1696: *
1697: * Returns the input just removed
1698: */
1699: xmlParserInputPtr
1700: inputPop(xmlParserCtxtPtr ctxt)
1701: {
1702: xmlParserInputPtr ret;
1703:
1704: if (ctxt == NULL)
1705: return(NULL);
1706: if (ctxt->inputNr <= 0)
1707: return (NULL);
1708: ctxt->inputNr--;
1709: if (ctxt->inputNr > 0)
1710: ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711: else
1712: ctxt->input = NULL;
1713: ret = ctxt->inputTab[ctxt->inputNr];
1714: ctxt->inputTab[ctxt->inputNr] = NULL;
1715: return (ret);
1716: }
1717: /**
1718: * nodePush:
1719: * @ctxt: an XML parser context
1720: * @value: the element node
1721: *
1722: * Pushes a new element node on top of the node stack
1723: *
1724: * Returns -1 in case of error, the index in the stack otherwise
1725: */
1726: int
1727: nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728: {
1729: if (ctxt == NULL) return(0);
1730: if (ctxt->nodeNr >= ctxt->nodeMax) {
1731: xmlNodePtr *tmp;
1732:
1733: tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734: ctxt->nodeMax * 2 *
1735: sizeof(ctxt->nodeTab[0]));
1736: if (tmp == NULL) {
1737: xmlErrMemory(ctxt, NULL);
1738: return (-1);
1739: }
1740: ctxt->nodeTab = tmp;
1741: ctxt->nodeMax *= 2;
1742: }
1743: if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1745: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1746: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1747: xmlParserMaxDepth);
1748: ctxt->instate = XML_PARSER_EOF;
1749: return(-1);
1750: }
1751: ctxt->nodeTab[ctxt->nodeNr] = value;
1752: ctxt->node = value;
1753: return (ctxt->nodeNr++);
1754: }
1755:
1756: /**
1757: * nodePop:
1758: * @ctxt: an XML parser context
1759: *
1760: * Pops the top element node from the node stack
1761: *
1762: * Returns the node just removed
1763: */
1764: xmlNodePtr
1765: nodePop(xmlParserCtxtPtr ctxt)
1766: {
1767: xmlNodePtr ret;
1768:
1769: if (ctxt == NULL) return(NULL);
1770: if (ctxt->nodeNr <= 0)
1771: return (NULL);
1772: ctxt->nodeNr--;
1773: if (ctxt->nodeNr > 0)
1774: ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775: else
1776: ctxt->node = NULL;
1777: ret = ctxt->nodeTab[ctxt->nodeNr];
1778: ctxt->nodeTab[ctxt->nodeNr] = NULL;
1779: return (ret);
1780: }
1781:
1782: #ifdef LIBXML_PUSH_ENABLED
1783: /**
1784: * nameNsPush:
1785: * @ctxt: an XML parser context
1786: * @value: the element name
1787: * @prefix: the element prefix
1788: * @URI: the element namespace name
1789: *
1790: * Pushes a new element name/prefix/URL on top of the name stack
1791: *
1792: * Returns -1 in case of error, the index in the stack otherwise
1793: */
1794: static int
1795: nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796: const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797: {
1798: if (ctxt->nameNr >= ctxt->nameMax) {
1799: const xmlChar * *tmp;
1800: void **tmp2;
1801: ctxt->nameMax *= 2;
1802: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803: ctxt->nameMax *
1804: sizeof(ctxt->nameTab[0]));
1805: if (tmp == NULL) {
1806: ctxt->nameMax /= 2;
1807: goto mem_error;
1808: }
1809: ctxt->nameTab = tmp;
1810: tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811: ctxt->nameMax * 3 *
1812: sizeof(ctxt->pushTab[0]));
1813: if (tmp2 == NULL) {
1814: ctxt->nameMax /= 2;
1815: goto mem_error;
1816: }
1817: ctxt->pushTab = tmp2;
1818: }
1819: ctxt->nameTab[ctxt->nameNr] = value;
1820: ctxt->name = value;
1821: ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822: ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1823: ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1824: return (ctxt->nameNr++);
1825: mem_error:
1826: xmlErrMemory(ctxt, NULL);
1827: return (-1);
1828: }
1829: /**
1830: * nameNsPop:
1831: * @ctxt: an XML parser context
1832: *
1833: * Pops the top element/prefix/URI name from the name stack
1834: *
1835: * Returns the name just removed
1836: */
1837: static const xmlChar *
1838: nameNsPop(xmlParserCtxtPtr ctxt)
1839: {
1840: const xmlChar *ret;
1841:
1842: if (ctxt->nameNr <= 0)
1843: return (NULL);
1844: ctxt->nameNr--;
1845: if (ctxt->nameNr > 0)
1846: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847: else
1848: ctxt->name = NULL;
1849: ret = ctxt->nameTab[ctxt->nameNr];
1850: ctxt->nameTab[ctxt->nameNr] = NULL;
1851: return (ret);
1852: }
1853: #endif /* LIBXML_PUSH_ENABLED */
1854:
1855: /**
1856: * namePush:
1857: * @ctxt: an XML parser context
1858: * @value: the element name
1859: *
1860: * Pushes a new element name on top of the name stack
1861: *
1862: * Returns -1 in case of error, the index in the stack otherwise
1863: */
1864: int
1865: namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1866: {
1867: if (ctxt == NULL) return (-1);
1868:
1869: if (ctxt->nameNr >= ctxt->nameMax) {
1870: const xmlChar * *tmp;
1871: tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1.1.1.2 misho 1872: ctxt->nameMax * 2 *
1.1 misho 1873: sizeof(ctxt->nameTab[0]));
1874: if (tmp == NULL) {
1875: goto mem_error;
1876: }
1877: ctxt->nameTab = tmp;
1.1.1.2 misho 1878: ctxt->nameMax *= 2;
1.1 misho 1879: }
1880: ctxt->nameTab[ctxt->nameNr] = value;
1881: ctxt->name = value;
1882: return (ctxt->nameNr++);
1883: mem_error:
1884: xmlErrMemory(ctxt, NULL);
1885: return (-1);
1886: }
1887: /**
1888: * namePop:
1889: * @ctxt: an XML parser context
1890: *
1891: * Pops the top element name from the name stack
1892: *
1893: * Returns the name just removed
1894: */
1895: const xmlChar *
1896: namePop(xmlParserCtxtPtr ctxt)
1897: {
1898: const xmlChar *ret;
1899:
1900: if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901: return (NULL);
1902: ctxt->nameNr--;
1903: if (ctxt->nameNr > 0)
1904: ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905: else
1906: ctxt->name = NULL;
1907: ret = ctxt->nameTab[ctxt->nameNr];
1908: ctxt->nameTab[ctxt->nameNr] = NULL;
1909: return (ret);
1910: }
1911:
1912: static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1913: if (ctxt->spaceNr >= ctxt->spaceMax) {
1914: int *tmp;
1915:
1916: ctxt->spaceMax *= 2;
1917: tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919: if (tmp == NULL) {
1920: xmlErrMemory(ctxt, NULL);
1921: ctxt->spaceMax /=2;
1922: return(-1);
1923: }
1924: ctxt->spaceTab = tmp;
1925: }
1926: ctxt->spaceTab[ctxt->spaceNr] = val;
1927: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928: return(ctxt->spaceNr++);
1929: }
1930:
1931: static int spacePop(xmlParserCtxtPtr ctxt) {
1932: int ret;
1933: if (ctxt->spaceNr <= 0) return(0);
1934: ctxt->spaceNr--;
1935: if (ctxt->spaceNr > 0)
1936: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937: else
1938: ctxt->space = &ctxt->spaceTab[0];
1939: ret = ctxt->spaceTab[ctxt->spaceNr];
1940: ctxt->spaceTab[ctxt->spaceNr] = -1;
1941: return(ret);
1942: }
1943:
1944: /*
1945: * Macros for accessing the content. Those should be used only by the parser,
1946: * and not exported.
1947: *
1948: * Dirty macros, i.e. one often need to make assumption on the context to
1949: * use them
1950: *
1951: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952: * To be used with extreme caution since operations consuming
1953: * characters may move the input buffer to a different location !
1954: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955: * This should be used internally by the parser
1956: * only to compare to ASCII values otherwise it would break when
1957: * running with UTF-8 encoding.
1958: * RAW same as CUR but in the input buffer, bypass any token
1959: * extraction that may have been done
1960: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961: * to compare on ASCII based substring.
1962: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1963: * strings without newlines within the parser.
1.1.1.3 misho 1964: * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1.1 misho 1965: * defined char within the parser.
1966: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967: *
1968: * NEXT Skip to the next character, this does the proper decoding
1969: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1970: * NEXTL(l) Skip the current unicode character of l xmlChars long.
1971: * CUR_CHAR(l) returns the current unicode character (int), set l
1972: * to the number of xmlChars used for the encoding [0-5].
1973: * CUR_SCHAR same but operate on a string instead of the context
1974: * COPY_BUF copy the current unicode char to the target buffer, increment
1975: * the index
1976: * GROW, SHRINK handling of input buffers
1977: */
1978:
1979: #define RAW (*ctxt->input->cur)
1980: #define CUR (*ctxt->input->cur)
1981: #define NXT(val) ctxt->input->cur[(val)]
1982: #define CUR_PTR ctxt->input->cur
1983:
1984: #define CMP4( s, c1, c2, c3, c4 ) \
1985: ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986: ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987: #define CMP5( s, c1, c2, c3, c4, c5 ) \
1988: ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989: #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990: ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991: #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992: ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993: #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994: ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995: #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996: ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997: ((unsigned char *) s)[ 8 ] == c9 )
1998: #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999: ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000: ((unsigned char *) s)[ 9 ] == c10 )
2001:
2002: #define SKIP(val) do { \
2003: ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2004: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2005: if ((*ctxt->input->cur == 0) && \
2006: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007: xmlPopInput(ctxt); \
2008: } while (0)
2009:
2010: #define SKIPL(val) do { \
2011: int skipl; \
2012: for(skipl=0; skipl<val; skipl++) { \
1.1.1.3 misho 2013: if (*(ctxt->input->cur) == '\n') { \
1.1 misho 2014: ctxt->input->line++; ctxt->input->col = 1; \
1.1.1.3 misho 2015: } else ctxt->input->col++; \
2016: ctxt->nbChars++; \
1.1 misho 2017: ctxt->input->cur++; \
2018: } \
2019: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020: if ((*ctxt->input->cur == 0) && \
2021: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022: xmlPopInput(ctxt); \
2023: } while (0)
2024:
2025: #define SHRINK if ((ctxt->progressive == 0) && \
2026: (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027: (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2028: xmlSHRINK (ctxt);
2029:
2030: static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031: xmlParserInputShrink(ctxt->input);
2032: if ((*ctxt->input->cur == 0) &&
2033: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034: xmlPopInput(ctxt);
2035: }
2036:
2037: #define GROW if ((ctxt->progressive == 0) && \
2038: (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2039: xmlGROW (ctxt);
2040:
2041: static void xmlGROW (xmlParserCtxtPtr ctxt) {
1.1.1.3 misho 2042: if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2044: ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2045: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2047: ctxt->instate = XML_PARSER_EOF;
2048: }
1.1 misho 2049: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2050: if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2051: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052: xmlPopInput(ctxt);
2053: }
2054:
2055: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056:
2057: #define NEXT xmlNextChar(ctxt)
2058:
2059: #define NEXT1 { \
2060: ctxt->input->col++; \
2061: ctxt->input->cur++; \
2062: ctxt->nbChars++; \
2063: if (*ctxt->input->cur == 0) \
2064: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065: }
2066:
2067: #define NEXTL(l) do { \
2068: if (*(ctxt->input->cur) == '\n') { \
2069: ctxt->input->line++; ctxt->input->col = 1; \
2070: } else ctxt->input->col++; \
2071: ctxt->input->cur += l; \
2072: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2073: } while (0)
2074:
2075: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077:
2078: #define COPY_BUF(l,b,i,v) \
2079: if (l == 1) b[i++] = (xmlChar) v; \
2080: else i += xmlCopyCharMultiByte(&b[i],v)
2081:
2082: /**
2083: * xmlSkipBlankChars:
2084: * @ctxt: the XML parser context
2085: *
2086: * skip all blanks character found at that point in the input streams.
2087: * It pops up finished entities in the process if allowable at that point.
2088: *
2089: * Returns the number of space chars skipped
2090: */
2091:
2092: int
2093: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2094: int res = 0;
2095:
2096: /*
2097: * It's Okay to use CUR/NEXT here since all the blanks are on
2098: * the ASCII range.
2099: */
2100: if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101: const xmlChar *cur;
2102: /*
2103: * if we are in the document content, go really fast
2104: */
2105: cur = ctxt->input->cur;
2106: while (IS_BLANK_CH(*cur)) {
2107: if (*cur == '\n') {
2108: ctxt->input->line++; ctxt->input->col = 1;
2109: }
2110: cur++;
2111: res++;
2112: if (*cur == 0) {
2113: ctxt->input->cur = cur;
2114: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115: cur = ctxt->input->cur;
2116: }
2117: }
2118: ctxt->input->cur = cur;
2119: } else {
2120: int cur;
2121: do {
2122: cur = CUR;
2123: while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2124: NEXT;
2125: cur = CUR;
2126: res++;
2127: }
2128: while ((cur == 0) && (ctxt->inputNr > 1) &&
2129: (ctxt->instate != XML_PARSER_COMMENT)) {
2130: xmlPopInput(ctxt);
2131: cur = CUR;
2132: }
2133: /*
2134: * Need to handle support of entities branching here
2135: */
2136: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138: }
2139: return(res);
2140: }
2141:
2142: /************************************************************************
2143: * *
2144: * Commodity functions to handle entities *
2145: * *
2146: ************************************************************************/
2147:
2148: /**
2149: * xmlPopInput:
2150: * @ctxt: an XML parser context
2151: *
2152: * xmlPopInput: the current input pointed by ctxt->input came to an end
2153: * pop it and return the next char.
2154: *
2155: * Returns the current xmlChar in the parser context
2156: */
2157: xmlChar
2158: xmlPopInput(xmlParserCtxtPtr ctxt) {
2159: if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2160: if (xmlParserDebugEntities)
2161: xmlGenericError(xmlGenericErrorContext,
2162: "Popping input %d\n", ctxt->inputNr);
2163: xmlFreeInputStream(inputPop(ctxt));
2164: if ((*ctxt->input->cur == 0) &&
2165: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166: return(xmlPopInput(ctxt));
2167: return(CUR);
2168: }
2169:
2170: /**
2171: * xmlPushInput:
2172: * @ctxt: an XML parser context
2173: * @input: an XML parser input fragment (entity, XML fragment ...).
2174: *
2175: * xmlPushInput: switch to a new input stream which is stacked on top
2176: * of the previous one(s).
2177: * Returns -1 in case of error or the index in the input stack
2178: */
2179: int
2180: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2181: int ret;
2182: if (input == NULL) return(-1);
2183:
2184: if (xmlParserDebugEntities) {
2185: if ((ctxt->input != NULL) && (ctxt->input->filename))
2186: xmlGenericError(xmlGenericErrorContext,
2187: "%s(%d): ", ctxt->input->filename,
2188: ctxt->input->line);
2189: xmlGenericError(xmlGenericErrorContext,
2190: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191: }
2192: ret = inputPush(ctxt, input);
1.1.1.3 misho 2193: if (ctxt->instate == XML_PARSER_EOF)
2194: return(-1);
1.1 misho 2195: GROW;
2196: return(ret);
2197: }
2198:
2199: /**
2200: * xmlParseCharRef:
2201: * @ctxt: an XML parser context
2202: *
2203: * parse Reference declarations
2204: *
2205: * [66] CharRef ::= '&#' [0-9]+ ';' |
2206: * '&#x' [0-9a-fA-F]+ ';'
2207: *
2208: * [ WFC: Legal Character ]
2209: * Characters referred to using character references must match the
1.1.1.3 misho 2210: * production for Char.
1.1 misho 2211: *
2212: * Returns the value parsed (as an int), 0 in case of error
2213: */
2214: int
2215: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2216: unsigned int val = 0;
2217: int count = 0;
2218: unsigned int outofrange = 0;
2219:
2220: /*
2221: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222: */
2223: if ((RAW == '&') && (NXT(1) == '#') &&
2224: (NXT(2) == 'x')) {
2225: SKIP(3);
2226: GROW;
2227: while (RAW != ';') { /* loop blocked by count */
2228: if (count++ > 20) {
2229: count = 0;
2230: GROW;
1.1.1.3 misho 2231: if (ctxt->instate == XML_PARSER_EOF)
2232: return(0);
1.1 misho 2233: }
1.1.1.3 misho 2234: if ((RAW >= '0') && (RAW <= '9'))
1.1 misho 2235: val = val * 16 + (CUR - '0');
2236: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237: val = val * 16 + (CUR - 'a') + 10;
2238: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239: val = val * 16 + (CUR - 'A') + 10;
2240: else {
2241: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2242: val = 0;
2243: break;
2244: }
2245: if (val > 0x10FFFF)
2246: outofrange = val;
2247:
2248: NEXT;
2249: count++;
2250: }
2251: if (RAW == ';') {
2252: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2253: ctxt->input->col++;
2254: ctxt->nbChars ++;
2255: ctxt->input->cur++;
2256: }
2257: } else if ((RAW == '&') && (NXT(1) == '#')) {
2258: SKIP(2);
2259: GROW;
2260: while (RAW != ';') { /* loop blocked by count */
2261: if (count++ > 20) {
2262: count = 0;
2263: GROW;
1.1.1.3 misho 2264: if (ctxt->instate == XML_PARSER_EOF)
2265: return(0);
1.1 misho 2266: }
1.1.1.3 misho 2267: if ((RAW >= '0') && (RAW <= '9'))
1.1 misho 2268: val = val * 10 + (CUR - '0');
2269: else {
2270: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2271: val = 0;
2272: break;
2273: }
2274: if (val > 0x10FFFF)
2275: outofrange = val;
2276:
2277: NEXT;
2278: count++;
2279: }
2280: if (RAW == ';') {
2281: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2282: ctxt->input->col++;
2283: ctxt->nbChars ++;
2284: ctxt->input->cur++;
2285: }
2286: } else {
2287: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2288: }
2289:
2290: /*
2291: * [ WFC: Legal Character ]
2292: * Characters referred to using character references must match the
1.1.1.3 misho 2293: * production for Char.
1.1 misho 2294: */
2295: if ((IS_CHAR(val) && (outofrange == 0))) {
2296: return(val);
2297: } else {
2298: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299: "xmlParseCharRef: invalid xmlChar value %d\n",
2300: val);
2301: }
2302: return(0);
2303: }
2304:
2305: /**
2306: * xmlParseStringCharRef:
2307: * @ctxt: an XML parser context
2308: * @str: a pointer to an index in the string
2309: *
2310: * parse Reference declarations, variant parsing from a string rather
2311: * than an an input flow.
2312: *
2313: * [66] CharRef ::= '&#' [0-9]+ ';' |
2314: * '&#x' [0-9a-fA-F]+ ';'
2315: *
2316: * [ WFC: Legal Character ]
2317: * Characters referred to using character references must match the
1.1.1.3 misho 2318: * production for Char.
1.1 misho 2319: *
2320: * Returns the value parsed (as an int), 0 in case of error, str will be
2321: * updated to the current value of the index
2322: */
2323: static int
2324: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325: const xmlChar *ptr;
2326: xmlChar cur;
2327: unsigned int val = 0;
2328: unsigned int outofrange = 0;
2329:
2330: if ((str == NULL) || (*str == NULL)) return(0);
2331: ptr = *str;
2332: cur = *ptr;
2333: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334: ptr += 3;
2335: cur = *ptr;
2336: while (cur != ';') { /* Non input consuming loop */
1.1.1.3 misho 2337: if ((cur >= '0') && (cur <= '9'))
1.1 misho 2338: val = val * 16 + (cur - '0');
2339: else if ((cur >= 'a') && (cur <= 'f'))
2340: val = val * 16 + (cur - 'a') + 10;
2341: else if ((cur >= 'A') && (cur <= 'F'))
2342: val = val * 16 + (cur - 'A') + 10;
2343: else {
2344: xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2345: val = 0;
2346: break;
2347: }
2348: if (val > 0x10FFFF)
2349: outofrange = val;
2350:
2351: ptr++;
2352: cur = *ptr;
2353: }
2354: if (cur == ';')
2355: ptr++;
2356: } else if ((cur == '&') && (ptr[1] == '#')){
2357: ptr += 2;
2358: cur = *ptr;
2359: while (cur != ';') { /* Non input consuming loops */
1.1.1.3 misho 2360: if ((cur >= '0') && (cur <= '9'))
1.1 misho 2361: val = val * 10 + (cur - '0');
2362: else {
2363: xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2364: val = 0;
2365: break;
2366: }
2367: if (val > 0x10FFFF)
2368: outofrange = val;
2369:
2370: ptr++;
2371: cur = *ptr;
2372: }
2373: if (cur == ';')
2374: ptr++;
2375: } else {
2376: xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2377: return(0);
2378: }
2379: *str = ptr;
2380:
2381: /*
2382: * [ WFC: Legal Character ]
2383: * Characters referred to using character references must match the
1.1.1.3 misho 2384: * production for Char.
1.1 misho 2385: */
2386: if ((IS_CHAR(val) && (outofrange == 0))) {
2387: return(val);
2388: } else {
2389: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390: "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391: val);
2392: }
2393: return(0);
2394: }
2395:
2396: /**
2397: * xmlNewBlanksWrapperInputStream:
2398: * @ctxt: an XML parser context
2399: * @entity: an Entity pointer
2400: *
2401: * Create a new input stream for wrapping
2402: * blanks around a PEReference
2403: *
2404: * Returns the new input stream or NULL
2405: */
1.1.1.3 misho 2406:
1.1 misho 2407: static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1.1.1.3 misho 2408:
1.1 misho 2409: static xmlParserInputPtr
2410: xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411: xmlParserInputPtr input;
2412: xmlChar *buffer;
2413: size_t length;
2414: if (entity == NULL) {
2415: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416: "xmlNewBlanksWrapperInputStream entity\n");
2417: return(NULL);
2418: }
2419: if (xmlParserDebugEntities)
2420: xmlGenericError(xmlGenericErrorContext,
2421: "new blanks wrapper for entity: %s\n", entity->name);
2422: input = xmlNewInputStream(ctxt);
2423: if (input == NULL) {
2424: return(NULL);
2425: }
2426: length = xmlStrlen(entity->name) + 5;
2427: buffer = xmlMallocAtomic(length);
2428: if (buffer == NULL) {
2429: xmlErrMemory(ctxt, NULL);
2430: xmlFree(input);
1.1.1.3 misho 2431: return(NULL);
1.1 misho 2432: }
2433: buffer [0] = ' ';
2434: buffer [1] = '%';
2435: buffer [length-3] = ';';
2436: buffer [length-2] = ' ';
2437: buffer [length-1] = 0;
2438: memcpy(buffer + 2, entity->name, length - 5);
2439: input->free = deallocblankswrapper;
2440: input->base = buffer;
2441: input->cur = buffer;
2442: input->length = length;
2443: input->end = &buffer[length];
2444: return(input);
2445: }
2446:
2447: /**
2448: * xmlParserHandlePEReference:
2449: * @ctxt: the parser context
1.1.1.3 misho 2450: *
1.1 misho 2451: * [69] PEReference ::= '%' Name ';'
2452: *
2453: * [ WFC: No Recursion ]
2454: * A parsed entity must not contain a recursive
1.1.1.3 misho 2455: * reference to itself, either directly or indirectly.
1.1 misho 2456: *
2457: * [ WFC: Entity Declared ]
2458: * In a document without any DTD, a document with only an internal DTD
2459: * subset which contains no parameter entity references, or a document
2460: * with "standalone='yes'", ... ... The declaration of a parameter
2461: * entity must precede any reference to it...
2462: *
2463: * [ VC: Entity Declared ]
2464: * In a document with an external subset or external parameter entities
2465: * with "standalone='no'", ... ... The declaration of a parameter entity
2466: * must precede any reference to it...
2467: *
2468: * [ WFC: In DTD ]
2469: * Parameter-entity references may only appear in the DTD.
2470: * NOTE: misleading but this is handled.
2471: *
2472: * A PEReference may have been detected in the current input stream
1.1.1.3 misho 2473: * the handling is done accordingly to
1.1 misho 2474: * http://www.w3.org/TR/REC-xml#entproc
1.1.1.3 misho 2475: * i.e.
1.1 misho 2476: * - Included in literal in entity values
2477: * - Included as Parameter Entity reference within DTDs
2478: */
2479: void
2480: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2481: const xmlChar *name;
2482: xmlEntityPtr entity = NULL;
2483: xmlParserInputPtr input;
2484:
2485: if (RAW != '%') return;
2486: switch(ctxt->instate) {
2487: case XML_PARSER_CDATA_SECTION:
2488: return;
2489: case XML_PARSER_COMMENT:
2490: return;
2491: case XML_PARSER_START_TAG:
2492: return;
2493: case XML_PARSER_END_TAG:
2494: return;
2495: case XML_PARSER_EOF:
2496: xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2497: return;
2498: case XML_PARSER_PROLOG:
2499: case XML_PARSER_START:
2500: case XML_PARSER_MISC:
2501: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2502: return;
2503: case XML_PARSER_ENTITY_DECL:
2504: case XML_PARSER_CONTENT:
2505: case XML_PARSER_ATTRIBUTE_VALUE:
2506: case XML_PARSER_PI:
2507: case XML_PARSER_SYSTEM_LITERAL:
2508: case XML_PARSER_PUBLIC_LITERAL:
2509: /* we just ignore it there */
2510: return;
2511: case XML_PARSER_EPILOG:
2512: xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2513: return;
2514: case XML_PARSER_ENTITY_VALUE:
2515: /*
2516: * NOTE: in the case of entity values, we don't do the
2517: * substitution here since we need the literal
2518: * entity value to be able to save the internal
2519: * subset of the document.
2520: * This will be handled by xmlStringDecodeEntities
2521: */
2522: return;
2523: case XML_PARSER_DTD:
2524: /*
2525: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526: * In the internal DTD subset, parameter-entity references
2527: * can occur only where markup declarations can occur, not
2528: * within markup declarations.
2529: * In that case this is handled in xmlParseMarkupDecl
2530: */
2531: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532: return;
2533: if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2534: return;
2535: break;
2536: case XML_PARSER_IGNORE:
2537: return;
2538: }
2539:
2540: NEXT;
2541: name = xmlParseName(ctxt);
2542: if (xmlParserDebugEntities)
2543: xmlGenericError(xmlGenericErrorContext,
2544: "PEReference: %s\n", name);
2545: if (name == NULL) {
2546: xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2547: } else {
2548: if (RAW == ';') {
2549: NEXT;
2550: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.1.1.3 misho 2552: if (ctxt->instate == XML_PARSER_EOF)
2553: return;
1.1 misho 2554: if (entity == NULL) {
1.1.1.3 misho 2555:
1.1 misho 2556: /*
2557: * [ WFC: Entity Declared ]
2558: * In a document without any DTD, a document with only an
2559: * internal DTD subset which contains no parameter entity
2560: * references, or a document with "standalone='yes'", ...
2561: * ... The declaration of a parameter entity must precede
2562: * any reference to it...
2563: */
2564: if ((ctxt->standalone == 1) ||
2565: ((ctxt->hasExternalSubset == 0) &&
2566: (ctxt->hasPErefs == 0))) {
2567: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2568: "PEReference: %%%s; not found\n", name);
2569: } else {
2570: /*
2571: * [ VC: Entity Declared ]
2572: * In a document with an external subset or external
2573: * parameter entities with "standalone='no'", ...
2574: * ... The declaration of a parameter entity must precede
2575: * any reference to it...
2576: */
2577: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578: xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579: "PEReference: %%%s; not found\n",
2580: name, NULL);
1.1.1.3 misho 2581: } else
1.1 misho 2582: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583: "PEReference: %%%s; not found\n",
2584: name, NULL);
2585: ctxt->valid = 0;
2586: }
2587: } else if (ctxt->input->free != deallocblankswrapper) {
2588: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2589: if (xmlPushInput(ctxt, input) < 0)
2590: return;
2591: } else {
2592: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2594: xmlChar start[4];
2595: xmlCharEncoding enc;
2596:
2597: /*
1.1.1.3.2.1! misho 2598: * Note: external parsed entities will not be loaded, it is
! 2599: * not required for a non-validating parser, unless the
! 2600: * option of validating, or substituting entities were
! 2601: * given. Doing so is far more secure as the parser will
! 2602: * only process data coming from the document entity by
! 2603: * default.
! 2604: */
! 2605: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
! 2606: ((ctxt->options & XML_PARSE_NOENT) == 0) &&
! 2607: ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
! 2608: (ctxt->validate == 0))
! 2609: return;
! 2610:
! 2611: /*
1.1 misho 2612: * handle the extra spaces added before and after
2613: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2614: * this is done independently.
2615: */
2616: input = xmlNewEntityInputStream(ctxt, entity);
2617: if (xmlPushInput(ctxt, input) < 0)
2618: return;
2619:
1.1.1.3 misho 2620: /*
1.1 misho 2621: * Get the 4 first bytes and decode the charset
2622: * if enc != XML_CHAR_ENCODING_NONE
2623: * plug some encoding conversion routines.
2624: * Note that, since we may have some non-UTF8
2625: * encoding (like UTF16, bug 135229), the 'length'
2626: * is not known, but we can calculate based upon
2627: * the amount of data in the buffer.
2628: */
2629: GROW
1.1.1.3 misho 2630: if (ctxt->instate == XML_PARSER_EOF)
2631: return;
1.1 misho 2632: if ((ctxt->input->end - ctxt->input->cur)>=4) {
2633: start[0] = RAW;
2634: start[1] = NXT(1);
2635: start[2] = NXT(2);
2636: start[3] = NXT(3);
2637: enc = xmlDetectCharEncoding(start, 4);
2638: if (enc != XML_CHAR_ENCODING_NONE) {
2639: xmlSwitchEncoding(ctxt, enc);
2640: }
2641: }
2642:
2643: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2644: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2645: (IS_BLANK_CH(NXT(5)))) {
2646: xmlParseTextDecl(ctxt);
2647: }
2648: } else {
2649: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2650: "PEReference: %s is not a parameter entity\n",
2651: name);
2652: }
2653: }
2654: } else {
2655: xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2656: }
2657: }
2658: }
2659:
2660: /*
2661: * Macro used to grow the current buffer.
1.1.1.3 misho 2662: * buffer##_size is expected to be a size_t
2663: * mem_error: is expected to handle memory allocation failures
1.1 misho 2664: */
2665: #define growBuffer(buffer, n) { \
2666: xmlChar *tmp; \
1.1.1.3 misho 2667: size_t new_size = buffer##_size * 2 + n; \
2668: if (new_size < buffer##_size) goto mem_error; \
2669: tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
1.1 misho 2670: if (tmp == NULL) goto mem_error; \
2671: buffer = tmp; \
1.1.1.3 misho 2672: buffer##_size = new_size; \
1.1 misho 2673: }
2674:
2675: /**
2676: * xmlStringLenDecodeEntities:
2677: * @ctxt: the parser context
2678: * @str: the input string
2679: * @len: the string length
2680: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2681: * @end: an end marker xmlChar, 0 if none
2682: * @end2: an end marker xmlChar, 0 if none
2683: * @end3: an end marker xmlChar, 0 if none
1.1.1.3 misho 2684: *
1.1 misho 2685: * Takes a entity string content and process to do the adequate substitutions.
2686: *
2687: * [67] Reference ::= EntityRef | CharRef
2688: *
2689: * [69] PEReference ::= '%' Name ';'
2690: *
2691: * Returns A newly allocated string with the substitution done. The caller
2692: * must deallocate it !
2693: */
2694: xmlChar *
2695: xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2696: int what, xmlChar end, xmlChar end2, xmlChar end3) {
2697: xmlChar *buffer = NULL;
1.1.1.3 misho 2698: size_t buffer_size = 0;
2699: size_t nbchars = 0;
1.1 misho 2700:
2701: xmlChar *current = NULL;
2702: xmlChar *rep = NULL;
2703: const xmlChar *last;
2704: xmlEntityPtr ent;
2705: int c,l;
2706:
2707: if ((ctxt == NULL) || (str == NULL) || (len < 0))
2708: return(NULL);
2709: last = str + len;
2710:
2711: if (((ctxt->depth > 40) &&
2712: ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2713: (ctxt->depth > 1024)) {
2714: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2715: return(NULL);
2716: }
2717:
2718: /*
2719: * allocate a translation buffer.
2720: */
2721: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.1.1.3 misho 2722: buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
1.1 misho 2723: if (buffer == NULL) goto mem_error;
2724:
2725: /*
2726: * OK loop until we reach one of the ending char or a size limit.
2727: * we are operating on already parsed values.
2728: */
2729: if (str < last)
2730: c = CUR_SCHAR(str, l);
2731: else
2732: c = 0;
2733: while ((c != 0) && (c != end) && /* non input consuming loop */
2734: (c != end2) && (c != end3)) {
2735:
2736: if (c == 0) break;
2737: if ((c == '&') && (str[1] == '#')) {
2738: int val = xmlParseStringCharRef(ctxt, &str);
2739: if (val != 0) {
2740: COPY_BUF(0,buffer,nbchars,val);
2741: }
1.1.1.3 misho 2742: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2743: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2744: }
2745: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2746: if (xmlParserDebugEntities)
2747: xmlGenericError(xmlGenericErrorContext,
2748: "String decoding Entity Reference: %.30s\n",
2749: str);
2750: ent = xmlParseStringEntityRef(ctxt, &str);
2751: if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2752: (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2753: goto int_error;
2754: if (ent != NULL)
1.1.1.3 misho 2755: ctxt->nbentities += ent->checked / 2;
1.1 misho 2756: if ((ent != NULL) &&
2757: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2758: if (ent->content != NULL) {
2759: COPY_BUF(0,buffer,nbchars,ent->content[0]);
1.1.1.3 misho 2760: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1 misho 2761: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2762: }
2763: } else {
2764: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2765: "predefined entity has no content\n");
2766: }
2767: } else if ((ent != NULL) && (ent->content != NULL)) {
2768: ctxt->depth++;
2769: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2770: 0, 0, 0);
2771: ctxt->depth--;
2772:
2773: if (rep != NULL) {
2774: current = rep;
2775: while (*current != 0) { /* non input consuming loop */
2776: buffer[nbchars++] = *current++;
1.1.1.3 misho 2777: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2779: goto int_error;
2780: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781: }
2782: }
2783: xmlFree(rep);
2784: rep = NULL;
2785: }
2786: } else if (ent != NULL) {
2787: int i = xmlStrlen(ent->name);
2788: const xmlChar *cur = ent->name;
2789:
2790: buffer[nbchars++] = '&';
1.1.1.3 misho 2791: if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
1.1.1.2 misho 2792: growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
1.1 misho 2793: }
2794: for (;i > 0;i--)
2795: buffer[nbchars++] = *cur++;
2796: buffer[nbchars++] = ';';
2797: }
2798: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2799: if (xmlParserDebugEntities)
2800: xmlGenericError(xmlGenericErrorContext,
2801: "String decoding PE Reference: %.30s\n", str);
2802: ent = xmlParseStringPEReference(ctxt, &str);
2803: if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2804: goto int_error;
2805: if (ent != NULL)
1.1.1.3 misho 2806: ctxt->nbentities += ent->checked / 2;
1.1 misho 2807: if (ent != NULL) {
2808: if (ent->content == NULL) {
2809: xmlLoadEntityContent(ctxt, ent);
2810: }
2811: ctxt->depth++;
2812: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2813: 0, 0, 0);
2814: ctxt->depth--;
2815: if (rep != NULL) {
2816: current = rep;
2817: while (*current != 0) { /* non input consuming loop */
2818: buffer[nbchars++] = *current++;
1.1.1.3 misho 2819: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2820: if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
1.1 misho 2821: goto int_error;
2822: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2823: }
2824: }
2825: xmlFree(rep);
2826: rep = NULL;
2827: }
2828: }
2829: } else {
2830: COPY_BUF(l,buffer,nbchars,c);
2831: str += l;
1.1.1.3 misho 2832: if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2833: growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
1.1 misho 2834: }
2835: }
2836: if (str < last)
2837: c = CUR_SCHAR(str, l);
2838: else
2839: c = 0;
2840: }
2841: buffer[nbchars] = 0;
2842: return(buffer);
2843:
2844: mem_error:
2845: xmlErrMemory(ctxt, NULL);
2846: int_error:
2847: if (rep != NULL)
2848: xmlFree(rep);
2849: if (buffer != NULL)
2850: xmlFree(buffer);
2851: return(NULL);
2852: }
2853:
2854: /**
2855: * xmlStringDecodeEntities:
2856: * @ctxt: the parser context
2857: * @str: the input string
2858: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2859: * @end: an end marker xmlChar, 0 if none
2860: * @end2: an end marker xmlChar, 0 if none
2861: * @end3: an end marker xmlChar, 0 if none
1.1.1.3 misho 2862: *
1.1 misho 2863: * Takes a entity string content and process to do the adequate substitutions.
2864: *
2865: * [67] Reference ::= EntityRef | CharRef
2866: *
2867: * [69] PEReference ::= '%' Name ';'
2868: *
2869: * Returns A newly allocated string with the substitution done. The caller
2870: * must deallocate it !
2871: */
2872: xmlChar *
2873: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2874: xmlChar end, xmlChar end2, xmlChar end3) {
2875: if ((ctxt == NULL) || (str == NULL)) return(NULL);
2876: return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2877: end, end2, end3));
2878: }
2879:
2880: /************************************************************************
2881: * *
2882: * Commodity functions, cleanup needed ? *
2883: * *
2884: ************************************************************************/
2885:
2886: /**
2887: * areBlanks:
2888: * @ctxt: an XML parser context
2889: * @str: a xmlChar *
2890: * @len: the size of @str
2891: * @blank_chars: we know the chars are blanks
2892: *
2893: * Is this a sequence of blank chars that one can ignore ?
2894: *
2895: * Returns 1 if ignorable 0 otherwise.
2896: */
2897:
2898: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2899: int blank_chars) {
2900: int i, ret;
2901: xmlNodePtr lastChild;
2902:
2903: /*
2904: * Don't spend time trying to differentiate them, the same callback is
2905: * used !
2906: */
2907: if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2908: return(0);
2909:
2910: /*
2911: * Check for xml:space value.
2912: */
2913: if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2914: (*(ctxt->space) == -2))
2915: return(0);
2916:
2917: /*
2918: * Check that the string is made of blanks
2919: */
2920: if (blank_chars == 0) {
2921: for (i = 0;i < len;i++)
2922: if (!(IS_BLANK_CH(str[i]))) return(0);
2923: }
2924:
2925: /*
2926: * Look if the element is mixed content in the DTD if available
2927: */
2928: if (ctxt->node == NULL) return(0);
2929: if (ctxt->myDoc != NULL) {
2930: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2931: if (ret == 0) return(1);
2932: if (ret == 1) return(0);
2933: }
2934:
2935: /*
2936: * Otherwise, heuristic :-\
2937: */
2938: if ((RAW != '<') && (RAW != 0xD)) return(0);
2939: if ((ctxt->node->children == NULL) &&
2940: (RAW == '<') && (NXT(1) == '/')) return(0);
2941:
2942: lastChild = xmlGetLastChild(ctxt->node);
2943: if (lastChild == NULL) {
2944: if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2945: (ctxt->node->content != NULL)) return(0);
2946: } else if (xmlNodeIsText(lastChild))
2947: return(0);
2948: else if ((ctxt->node->children != NULL) &&
2949: (xmlNodeIsText(ctxt->node->children)))
2950: return(0);
2951: return(1);
2952: }
2953:
2954: /************************************************************************
2955: * *
2956: * Extra stuff for namespace support *
2957: * Relates to http://www.w3.org/TR/WD-xml-names *
2958: * *
2959: ************************************************************************/
2960:
2961: /**
2962: * xmlSplitQName:
2963: * @ctxt: an XML parser context
2964: * @name: an XML parser context
2965: * @prefix: a xmlChar **
2966: *
2967: * parse an UTF8 encoded XML qualified name string
2968: *
2969: * [NS 5] QName ::= (Prefix ':')? LocalPart
2970: *
2971: * [NS 6] Prefix ::= NCName
2972: *
2973: * [NS 7] LocalPart ::= NCName
2974: *
2975: * Returns the local part, and prefix is updated
2976: * to get the Prefix if any.
2977: */
2978:
2979: xmlChar *
2980: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2981: xmlChar buf[XML_MAX_NAMELEN + 5];
2982: xmlChar *buffer = NULL;
2983: int len = 0;
2984: int max = XML_MAX_NAMELEN;
2985: xmlChar *ret = NULL;
2986: const xmlChar *cur = name;
2987: int c;
2988:
2989: if (prefix == NULL) return(NULL);
2990: *prefix = NULL;
2991:
2992: if (cur == NULL) return(NULL);
2993:
2994: #ifndef XML_XML_NAMESPACE
2995: /* xml: prefix is not really a namespace */
2996: if ((cur[0] == 'x') && (cur[1] == 'm') &&
2997: (cur[2] == 'l') && (cur[3] == ':'))
2998: return(xmlStrdup(name));
2999: #endif
3000:
3001: /* nasty but well=formed */
3002: if (cur[0] == ':')
3003: return(xmlStrdup(name));
3004:
3005: c = *cur++;
3006: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3007: buf[len++] = c;
3008: c = *cur++;
3009: }
3010: if (len >= max) {
3011: /*
3012: * Okay someone managed to make a huge name, so he's ready to pay
3013: * for the processing speed.
3014: */
3015: max = len * 2;
3016:
3017: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018: if (buffer == NULL) {
3019: xmlErrMemory(ctxt, NULL);
3020: return(NULL);
3021: }
3022: memcpy(buffer, buf, len);
3023: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3024: if (len + 10 > max) {
3025: xmlChar *tmp;
3026:
3027: max *= 2;
3028: tmp = (xmlChar *) xmlRealloc(buffer,
3029: max * sizeof(xmlChar));
3030: if (tmp == NULL) {
3031: xmlFree(buffer);
3032: xmlErrMemory(ctxt, NULL);
3033: return(NULL);
3034: }
3035: buffer = tmp;
3036: }
3037: buffer[len++] = c;
3038: c = *cur++;
3039: }
3040: buffer[len] = 0;
3041: }
3042:
3043: if ((c == ':') && (*cur == 0)) {
3044: if (buffer != NULL)
3045: xmlFree(buffer);
3046: *prefix = NULL;
3047: return(xmlStrdup(name));
3048: }
3049:
3050: if (buffer == NULL)
3051: ret = xmlStrndup(buf, len);
3052: else {
3053: ret = buffer;
3054: buffer = NULL;
3055: max = XML_MAX_NAMELEN;
3056: }
3057:
3058:
3059: if (c == ':') {
3060: c = *cur;
3061: *prefix = ret;
3062: if (c == 0) {
3063: return(xmlStrndup(BAD_CAST "", 0));
3064: }
3065: len = 0;
3066:
3067: /*
3068: * Check that the first character is proper to start
3069: * a new name
3070: */
3071: if (!(((c >= 0x61) && (c <= 0x7A)) ||
3072: ((c >= 0x41) && (c <= 0x5A)) ||
3073: (c == '_') || (c == ':'))) {
3074: int l;
3075: int first = CUR_SCHAR(cur, l);
3076:
3077: if (!IS_LETTER(first) && (first != '_')) {
3078: xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3079: "Name %s is not XML Namespace compliant\n",
3080: name);
3081: }
3082: }
3083: cur++;
3084:
3085: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3086: buf[len++] = c;
3087: c = *cur++;
3088: }
3089: if (len >= max) {
3090: /*
3091: * Okay someone managed to make a huge name, so he's ready to pay
3092: * for the processing speed.
3093: */
3094: max = len * 2;
3095:
3096: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3097: if (buffer == NULL) {
3098: xmlErrMemory(ctxt, NULL);
3099: return(NULL);
3100: }
3101: memcpy(buffer, buf, len);
3102: while (c != 0) { /* tested bigname2.xml */
3103: if (len + 10 > max) {
3104: xmlChar *tmp;
3105:
3106: max *= 2;
3107: tmp = (xmlChar *) xmlRealloc(buffer,
3108: max * sizeof(xmlChar));
3109: if (tmp == NULL) {
3110: xmlErrMemory(ctxt, NULL);
3111: xmlFree(buffer);
3112: return(NULL);
3113: }
3114: buffer = tmp;
3115: }
3116: buffer[len++] = c;
3117: c = *cur++;
3118: }
3119: buffer[len] = 0;
3120: }
3121:
3122: if (buffer == NULL)
3123: ret = xmlStrndup(buf, len);
3124: else {
3125: ret = buffer;
3126: }
3127: }
3128:
3129: return(ret);
3130: }
3131:
3132: /************************************************************************
3133: * *
3134: * The parser itself *
3135: * Relates to http://www.w3.org/TR/REC-xml *
3136: * *
3137: ************************************************************************/
3138:
3139: /************************************************************************
3140: * *
3141: * Routines to parse Name, NCName and NmToken *
3142: * *
3143: ************************************************************************/
3144: #ifdef DEBUG
3145: static unsigned long nbParseName = 0;
3146: static unsigned long nbParseNmToken = 0;
3147: static unsigned long nbParseNCName = 0;
3148: static unsigned long nbParseNCNameComplex = 0;
3149: static unsigned long nbParseNameComplex = 0;
3150: static unsigned long nbParseStringName = 0;
3151: #endif
3152:
3153: /*
3154: * The two following functions are related to the change of accepted
3155: * characters for Name and NmToken in the Revision 5 of XML-1.0
3156: * They correspond to the modified production [4] and the new production [4a]
3157: * changes in that revision. Also note that the macros used for the
3158: * productions Letter, Digit, CombiningChar and Extender are not needed
3159: * anymore.
3160: * We still keep compatibility to pre-revision5 parsing semantic if the
3161: * new XML_PARSE_OLD10 option is given to the parser.
3162: */
3163: static int
3164: xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3165: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166: /*
3167: * Use the new checks of production [4] [4a] amd [5] of the
3168: * Update 5 of XML-1.0
3169: */
3170: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171: (((c >= 'a') && (c <= 'z')) ||
3172: ((c >= 'A') && (c <= 'Z')) ||
3173: (c == '_') || (c == ':') ||
3174: ((c >= 0xC0) && (c <= 0xD6)) ||
3175: ((c >= 0xD8) && (c <= 0xF6)) ||
3176: ((c >= 0xF8) && (c <= 0x2FF)) ||
3177: ((c >= 0x370) && (c <= 0x37D)) ||
3178: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179: ((c >= 0x200C) && (c <= 0x200D)) ||
3180: ((c >= 0x2070) && (c <= 0x218F)) ||
3181: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185: ((c >= 0x10000) && (c <= 0xEFFFF))))
3186: return(1);
3187: } else {
3188: if (IS_LETTER(c) || (c == '_') || (c == ':'))
3189: return(1);
3190: }
3191: return(0);
3192: }
3193:
3194: static int
3195: xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3196: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3197: /*
3198: * Use the new checks of production [4] [4a] amd [5] of the
3199: * Update 5 of XML-1.0
3200: */
3201: if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3202: (((c >= 'a') && (c <= 'z')) ||
3203: ((c >= 'A') && (c <= 'Z')) ||
3204: ((c >= '0') && (c <= '9')) || /* !start */
3205: (c == '_') || (c == ':') ||
3206: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3207: ((c >= 0xC0) && (c <= 0xD6)) ||
3208: ((c >= 0xD8) && (c <= 0xF6)) ||
3209: ((c >= 0xF8) && (c <= 0x2FF)) ||
3210: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3211: ((c >= 0x370) && (c <= 0x37D)) ||
3212: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3213: ((c >= 0x200C) && (c <= 0x200D)) ||
3214: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3215: ((c >= 0x2070) && (c <= 0x218F)) ||
3216: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3217: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3218: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3219: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3220: ((c >= 0x10000) && (c <= 0xEFFFF))))
3221: return(1);
3222: } else {
3223: if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3224: (c == '.') || (c == '-') ||
1.1.1.3 misho 3225: (c == '_') || (c == ':') ||
1.1 misho 3226: (IS_COMBINING(c)) ||
3227: (IS_EXTENDER(c)))
3228: return(1);
3229: }
3230: return(0);
3231: }
3232:
3233: static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3234: int *len, int *alloc, int normalize);
3235:
3236: static const xmlChar *
3237: xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3238: int len = 0, l;
3239: int c;
3240: int count = 0;
3241:
3242: #ifdef DEBUG
3243: nbParseNameComplex++;
3244: #endif
3245:
3246: /*
3247: * Handler for more complex cases
3248: */
3249: GROW;
1.1.1.3 misho 3250: if (ctxt->instate == XML_PARSER_EOF)
3251: return(NULL);
1.1 misho 3252: c = CUR_CHAR(l);
3253: if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3254: /*
3255: * Use the new checks of production [4] [4a] amd [5] of the
3256: * Update 5 of XML-1.0
3257: */
3258: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3259: (!(((c >= 'a') && (c <= 'z')) ||
3260: ((c >= 'A') && (c <= 'Z')) ||
3261: (c == '_') || (c == ':') ||
3262: ((c >= 0xC0) && (c <= 0xD6)) ||
3263: ((c >= 0xD8) && (c <= 0xF6)) ||
3264: ((c >= 0xF8) && (c <= 0x2FF)) ||
3265: ((c >= 0x370) && (c <= 0x37D)) ||
3266: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3267: ((c >= 0x200C) && (c <= 0x200D)) ||
3268: ((c >= 0x2070) && (c <= 0x218F)) ||
3269: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273: ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3274: return(NULL);
3275: }
3276: len += l;
3277: NEXTL(l);
3278: c = CUR_CHAR(l);
3279: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3280: (((c >= 'a') && (c <= 'z')) ||
3281: ((c >= 'A') && (c <= 'Z')) ||
3282: ((c >= '0') && (c <= '9')) || /* !start */
3283: (c == '_') || (c == ':') ||
3284: (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3285: ((c >= 0xC0) && (c <= 0xD6)) ||
3286: ((c >= 0xD8) && (c <= 0xF6)) ||
3287: ((c >= 0xF8) && (c <= 0x2FF)) ||
3288: ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3289: ((c >= 0x370) && (c <= 0x37D)) ||
3290: ((c >= 0x37F) && (c <= 0x1FFF)) ||
3291: ((c >= 0x200C) && (c <= 0x200D)) ||
3292: ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3293: ((c >= 0x2070) && (c <= 0x218F)) ||
3294: ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3295: ((c >= 0x3001) && (c <= 0xD7FF)) ||
3296: ((c >= 0xF900) && (c <= 0xFDCF)) ||
3297: ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3298: ((c >= 0x10000) && (c <= 0xEFFFF))
3299: )) {
1.1.1.3 misho 3300: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3301: count = 0;
3302: GROW;
1.1.1.3 misho 3303: if (ctxt->instate == XML_PARSER_EOF)
3304: return(NULL);
1.1 misho 3305: }
3306: len += l;
3307: NEXTL(l);
3308: c = CUR_CHAR(l);
3309: }
3310: } else {
3311: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3312: (!IS_LETTER(c) && (c != '_') &&
3313: (c != ':'))) {
3314: return(NULL);
3315: }
3316: len += l;
3317: NEXTL(l);
3318: c = CUR_CHAR(l);
3319:
3320: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3321: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3322: (c == '.') || (c == '-') ||
1.1.1.3 misho 3323: (c == '_') || (c == ':') ||
1.1 misho 3324: (IS_COMBINING(c)) ||
3325: (IS_EXTENDER(c)))) {
1.1.1.3 misho 3326: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3327: count = 0;
3328: GROW;
1.1.1.3 misho 3329: if (ctxt->instate == XML_PARSER_EOF)
3330: return(NULL);
1.1 misho 3331: }
3332: len += l;
3333: NEXTL(l);
3334: c = CUR_CHAR(l);
1.1.1.3 misho 3335: if (c == 0) {
3336: count = 0;
3337: GROW;
3338: if (ctxt->instate == XML_PARSER_EOF)
3339: return(NULL);
3340: c = CUR_CHAR(l);
3341: }
1.1 misho 3342: }
3343: }
1.1.1.3 misho 3344: if ((len > XML_MAX_NAME_LENGTH) &&
3345: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3346: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3347: return(NULL);
3348: }
1.1 misho 3349: if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3350: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3351: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3352: }
3353:
3354: /**
3355: * xmlParseName:
3356: * @ctxt: an XML parser context
3357: *
3358: * parse an XML name.
3359: *
3360: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3361: * CombiningChar | Extender
3362: *
3363: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3364: *
3365: * [6] Names ::= Name (#x20 Name)*
3366: *
3367: * Returns the Name parsed or NULL
3368: */
3369:
3370: const xmlChar *
3371: xmlParseName(xmlParserCtxtPtr ctxt) {
3372: const xmlChar *in;
3373: const xmlChar *ret;
3374: int count = 0;
3375:
3376: GROW;
3377:
3378: #ifdef DEBUG
3379: nbParseName++;
3380: #endif
3381:
3382: /*
3383: * Accelerator for simple ASCII names
3384: */
3385: in = ctxt->input->cur;
3386: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387: ((*in >= 0x41) && (*in <= 0x5A)) ||
3388: (*in == '_') || (*in == ':')) {
3389: in++;
3390: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391: ((*in >= 0x41) && (*in <= 0x5A)) ||
3392: ((*in >= 0x30) && (*in <= 0x39)) ||
3393: (*in == '_') || (*in == '-') ||
3394: (*in == ':') || (*in == '.'))
3395: in++;
3396: if ((*in > 0) && (*in < 0x80)) {
3397: count = in - ctxt->input->cur;
1.1.1.3 misho 3398: if ((count > XML_MAX_NAME_LENGTH) &&
3399: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3400: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3401: return(NULL);
3402: }
1.1 misho 3403: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3404: ctxt->input->cur = in;
3405: ctxt->nbChars += count;
3406: ctxt->input->col += count;
3407: if (ret == NULL)
3408: xmlErrMemory(ctxt, NULL);
3409: return(ret);
3410: }
3411: }
3412: /* accelerator for special cases */
3413: return(xmlParseNameComplex(ctxt));
3414: }
3415:
3416: static const xmlChar *
3417: xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3418: int len = 0, l;
3419: int c;
3420: int count = 0;
3421:
3422: #ifdef DEBUG
3423: nbParseNCNameComplex++;
3424: #endif
3425:
3426: /*
3427: * Handler for more complex cases
3428: */
3429: GROW;
3430: c = CUR_CHAR(l);
3431: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3432: (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3433: return(NULL);
3434: }
3435:
3436: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3437: (xmlIsNameChar(ctxt, c) && (c != ':'))) {
1.1.1.3 misho 3438: if (count++ > XML_PARSER_CHUNK_SIZE) {
3439: if ((len > XML_MAX_NAME_LENGTH) &&
3440: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3441: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3442: return(NULL);
3443: }
1.1 misho 3444: count = 0;
3445: GROW;
1.1.1.3 misho 3446: if (ctxt->instate == XML_PARSER_EOF)
3447: return(NULL);
1.1 misho 3448: }
3449: len += l;
3450: NEXTL(l);
3451: c = CUR_CHAR(l);
1.1.1.3 misho 3452: if (c == 0) {
3453: count = 0;
3454: GROW;
3455: if (ctxt->instate == XML_PARSER_EOF)
3456: return(NULL);
3457: c = CUR_CHAR(l);
3458: }
3459: }
3460: if ((len > XML_MAX_NAME_LENGTH) &&
3461: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3462: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3463: return(NULL);
1.1 misho 3464: }
3465: return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3466: }
3467:
3468: /**
3469: * xmlParseNCName:
3470: * @ctxt: an XML parser context
1.1.1.3 misho 3471: * @len: length of the string parsed
1.1 misho 3472: *
3473: * parse an XML name.
3474: *
3475: * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3476: * CombiningChar | Extender
3477: *
3478: * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3479: *
3480: * Returns the Name parsed or NULL
3481: */
3482:
3483: static const xmlChar *
3484: xmlParseNCName(xmlParserCtxtPtr ctxt) {
3485: const xmlChar *in;
3486: const xmlChar *ret;
3487: int count = 0;
3488:
3489: #ifdef DEBUG
3490: nbParseNCName++;
3491: #endif
3492:
3493: /*
3494: * Accelerator for simple ASCII names
3495: */
3496: in = ctxt->input->cur;
3497: if (((*in >= 0x61) && (*in <= 0x7A)) ||
3498: ((*in >= 0x41) && (*in <= 0x5A)) ||
3499: (*in == '_')) {
3500: in++;
3501: while (((*in >= 0x61) && (*in <= 0x7A)) ||
3502: ((*in >= 0x41) && (*in <= 0x5A)) ||
3503: ((*in >= 0x30) && (*in <= 0x39)) ||
3504: (*in == '_') || (*in == '-') ||
3505: (*in == '.'))
3506: in++;
3507: if ((*in > 0) && (*in < 0x80)) {
3508: count = in - ctxt->input->cur;
1.1.1.3 misho 3509: if ((count > XML_MAX_NAME_LENGTH) &&
3510: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3511: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3512: return(NULL);
3513: }
1.1 misho 3514: ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3515: ctxt->input->cur = in;
3516: ctxt->nbChars += count;
3517: ctxt->input->col += count;
3518: if (ret == NULL) {
3519: xmlErrMemory(ctxt, NULL);
3520: }
3521: return(ret);
3522: }
3523: }
3524: return(xmlParseNCNameComplex(ctxt));
3525: }
3526:
3527: /**
3528: * xmlParseNameAndCompare:
3529: * @ctxt: an XML parser context
3530: *
3531: * parse an XML name and compares for match
3532: * (specialized for endtag parsing)
3533: *
3534: * Returns NULL for an illegal name, (xmlChar*) 1 for success
3535: * and the name for mismatch
3536: */
3537:
3538: static const xmlChar *
3539: xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3540: register const xmlChar *cmp = other;
3541: register const xmlChar *in;
3542: const xmlChar *ret;
3543:
3544: GROW;
1.1.1.3 misho 3545: if (ctxt->instate == XML_PARSER_EOF)
3546: return(NULL);
1.1 misho 3547:
3548: in = ctxt->input->cur;
3549: while (*in != 0 && *in == *cmp) {
3550: ++in;
3551: ++cmp;
3552: ctxt->input->col++;
3553: }
3554: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3555: /* success */
3556: ctxt->input->cur = in;
3557: return (const xmlChar*) 1;
3558: }
3559: /* failure (or end of input buffer), check with full function */
3560: ret = xmlParseName (ctxt);
3561: /* strings coming from the dictionnary direct compare possible */
3562: if (ret == other) {
3563: return (const xmlChar*) 1;
3564: }
3565: return ret;
3566: }
3567:
3568: /**
3569: * xmlParseStringName:
3570: * @ctxt: an XML parser context
3571: * @str: a pointer to the string pointer (IN/OUT)
3572: *
3573: * parse an XML name.
3574: *
3575: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3576: * CombiningChar | Extender
3577: *
3578: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3579: *
3580: * [6] Names ::= Name (#x20 Name)*
3581: *
1.1.1.3 misho 3582: * Returns the Name parsed or NULL. The @str pointer
1.1 misho 3583: * is updated to the current location in the string.
3584: */
3585:
3586: static xmlChar *
3587: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3588: xmlChar buf[XML_MAX_NAMELEN + 5];
3589: const xmlChar *cur = *str;
3590: int len = 0, l;
3591: int c;
3592:
3593: #ifdef DEBUG
3594: nbParseStringName++;
3595: #endif
3596:
3597: c = CUR_SCHAR(cur, l);
3598: if (!xmlIsNameStartChar(ctxt, c)) {
3599: return(NULL);
3600: }
3601:
3602: COPY_BUF(l,buf,len,c);
3603: cur += l;
3604: c = CUR_SCHAR(cur, l);
3605: while (xmlIsNameChar(ctxt, c)) {
3606: COPY_BUF(l,buf,len,c);
3607: cur += l;
3608: c = CUR_SCHAR(cur, l);
3609: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3610: /*
3611: * Okay someone managed to make a huge name, so he's ready to pay
3612: * for the processing speed.
3613: */
3614: xmlChar *buffer;
3615: int max = len * 2;
3616:
3617: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3618: if (buffer == NULL) {
3619: xmlErrMemory(ctxt, NULL);
3620: return(NULL);
3621: }
3622: memcpy(buffer, buf, len);
3623: while (xmlIsNameChar(ctxt, c)) {
3624: if (len + 10 > max) {
3625: xmlChar *tmp;
1.1.1.3 misho 3626:
3627: if ((len > XML_MAX_NAME_LENGTH) &&
3628: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3629: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3630: xmlFree(buffer);
3631: return(NULL);
3632: }
1.1 misho 3633: max *= 2;
3634: tmp = (xmlChar *) xmlRealloc(buffer,
3635: max * sizeof(xmlChar));
3636: if (tmp == NULL) {
3637: xmlErrMemory(ctxt, NULL);
3638: xmlFree(buffer);
3639: return(NULL);
3640: }
3641: buffer = tmp;
3642: }
3643: COPY_BUF(l,buffer,len,c);
3644: cur += l;
3645: c = CUR_SCHAR(cur, l);
3646: }
3647: buffer[len] = 0;
3648: *str = cur;
3649: return(buffer);
3650: }
3651: }
1.1.1.3 misho 3652: if ((len > XML_MAX_NAME_LENGTH) &&
3653: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3654: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3655: return(NULL);
3656: }
1.1 misho 3657: *str = cur;
3658: return(xmlStrndup(buf, len));
3659: }
3660:
3661: /**
3662: * xmlParseNmtoken:
3663: * @ctxt: an XML parser context
3664: *
3665: * parse an XML Nmtoken.
3666: *
3667: * [7] Nmtoken ::= (NameChar)+
3668: *
3669: * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3670: *
3671: * Returns the Nmtoken parsed or NULL
3672: */
3673:
3674: xmlChar *
3675: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3676: xmlChar buf[XML_MAX_NAMELEN + 5];
3677: int len = 0, l;
3678: int c;
3679: int count = 0;
3680:
3681: #ifdef DEBUG
3682: nbParseNmToken++;
3683: #endif
3684:
3685: GROW;
1.1.1.3 misho 3686: if (ctxt->instate == XML_PARSER_EOF)
3687: return(NULL);
1.1 misho 3688: c = CUR_CHAR(l);
3689:
3690: while (xmlIsNameChar(ctxt, c)) {
1.1.1.3 misho 3691: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3692: count = 0;
3693: GROW;
3694: }
3695: COPY_BUF(l,buf,len,c);
3696: NEXTL(l);
3697: c = CUR_CHAR(l);
1.1.1.3 misho 3698: if (c == 0) {
3699: count = 0;
3700: GROW;
3701: if (ctxt->instate == XML_PARSER_EOF)
3702: return(NULL);
3703: c = CUR_CHAR(l);
3704: }
1.1 misho 3705: if (len >= XML_MAX_NAMELEN) {
3706: /*
3707: * Okay someone managed to make a huge token, so he's ready to pay
3708: * for the processing speed.
3709: */
3710: xmlChar *buffer;
3711: int max = len * 2;
3712:
3713: buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3714: if (buffer == NULL) {
3715: xmlErrMemory(ctxt, NULL);
3716: return(NULL);
3717: }
3718: memcpy(buffer, buf, len);
3719: while (xmlIsNameChar(ctxt, c)) {
1.1.1.3 misho 3720: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 3721: count = 0;
3722: GROW;
1.1.1.3 misho 3723: if (ctxt->instate == XML_PARSER_EOF) {
3724: xmlFree(buffer);
3725: return(NULL);
3726: }
1.1 misho 3727: }
3728: if (len + 10 > max) {
3729: xmlChar *tmp;
3730:
1.1.1.3 misho 3731: if ((max > XML_MAX_NAME_LENGTH) &&
3732: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3733: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3734: xmlFree(buffer);
3735: return(NULL);
3736: }
1.1 misho 3737: max *= 2;
3738: tmp = (xmlChar *) xmlRealloc(buffer,
3739: max * sizeof(xmlChar));
3740: if (tmp == NULL) {
3741: xmlErrMemory(ctxt, NULL);
3742: xmlFree(buffer);
3743: return(NULL);
3744: }
3745: buffer = tmp;
3746: }
3747: COPY_BUF(l,buffer,len,c);
3748: NEXTL(l);
3749: c = CUR_CHAR(l);
3750: }
3751: buffer[len] = 0;
3752: return(buffer);
3753: }
3754: }
3755: if (len == 0)
3756: return(NULL);
1.1.1.3 misho 3757: if ((len > XML_MAX_NAME_LENGTH) &&
3758: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3759: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760: return(NULL);
3761: }
1.1 misho 3762: return(xmlStrndup(buf, len));
3763: }
3764:
3765: /**
3766: * xmlParseEntityValue:
3767: * @ctxt: an XML parser context
3768: * @orig: if non-NULL store a copy of the original entity value
3769: *
3770: * parse a value for ENTITY declarations
3771: *
3772: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3773: * "'" ([^%&'] | PEReference | Reference)* "'"
3774: *
3775: * Returns the EntityValue parsed with reference substituted or NULL
3776: */
3777:
3778: xmlChar *
3779: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3780: xmlChar *buf = NULL;
3781: int len = 0;
3782: int size = XML_PARSER_BUFFER_SIZE;
3783: int c, l;
3784: xmlChar stop;
3785: xmlChar *ret = NULL;
3786: const xmlChar *cur = NULL;
3787: xmlParserInputPtr input;
3788:
3789: if (RAW == '"') stop = '"';
3790: else if (RAW == '\'') stop = '\'';
3791: else {
3792: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3793: return(NULL);
3794: }
3795: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3796: if (buf == NULL) {
3797: xmlErrMemory(ctxt, NULL);
3798: return(NULL);
3799: }
3800:
3801: /*
3802: * The content of the entity definition is copied in a buffer.
3803: */
3804:
3805: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3806: input = ctxt->input;
3807: GROW;
1.1.1.3 misho 3808: if (ctxt->instate == XML_PARSER_EOF) {
3809: xmlFree(buf);
3810: return(NULL);
3811: }
1.1 misho 3812: NEXT;
3813: c = CUR_CHAR(l);
3814: /*
3815: * NOTE: 4.4.5 Included in Literal
3816: * When a parameter entity reference appears in a literal entity
3817: * value, ... a single or double quote character in the replacement
3818: * text is always treated as a normal data character and will not
1.1.1.3 misho 3819: * terminate the literal.
1.1 misho 3820: * In practice it means we stop the loop only when back at parsing
3821: * the initial entity and the quote is found
3822: */
1.1.1.3 misho 3823: while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3824: (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 3825: if (len + 5 >= size) {
3826: xmlChar *tmp;
3827:
3828: size *= 2;
3829: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3830: if (tmp == NULL) {
3831: xmlErrMemory(ctxt, NULL);
3832: xmlFree(buf);
3833: return(NULL);
3834: }
3835: buf = tmp;
3836: }
3837: COPY_BUF(l,buf,len,c);
3838: NEXTL(l);
3839: /*
3840: * Pop-up of finished entities.
3841: */
3842: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3843: xmlPopInput(ctxt);
3844:
3845: GROW;
3846: c = CUR_CHAR(l);
3847: if (c == 0) {
3848: GROW;
3849: c = CUR_CHAR(l);
3850: }
3851: }
3852: buf[len] = 0;
1.1.1.3 misho 3853: if (ctxt->instate == XML_PARSER_EOF) {
3854: xmlFree(buf);
3855: return(NULL);
3856: }
1.1 misho 3857:
3858: /*
3859: * Raise problem w.r.t. '&' and '%' being used in non-entities
3860: * reference constructs. Note Charref will be handled in
3861: * xmlStringDecodeEntities()
3862: */
3863: cur = buf;
3864: while (*cur != 0) { /* non input consuming */
3865: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3866: xmlChar *name;
3867: xmlChar tmp = *cur;
3868:
3869: cur++;
3870: name = xmlParseStringName(ctxt, &cur);
3871: if ((name == NULL) || (*cur != ';')) {
3872: xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3873: "EntityValue: '%c' forbidden except for entities references\n",
3874: tmp);
3875: }
3876: if ((tmp == '%') && (ctxt->inSubset == 1) &&
3877: (ctxt->inputNr == 1)) {
3878: xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3879: }
3880: if (name != NULL)
3881: xmlFree(name);
3882: if (*cur == 0)
3883: break;
3884: }
3885: cur++;
3886: }
3887:
3888: /*
3889: * Then PEReference entities are substituted.
3890: */
3891: if (c != stop) {
3892: xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3893: xmlFree(buf);
3894: } else {
3895: NEXT;
3896: /*
3897: * NOTE: 4.4.7 Bypassed
3898: * When a general entity reference appears in the EntityValue in
3899: * an entity declaration, it is bypassed and left as is.
3900: * so XML_SUBSTITUTE_REF is not set here.
3901: */
3902: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3903: 0, 0, 0);
1.1.1.3 misho 3904: if (orig != NULL)
1.1 misho 3905: *orig = buf;
3906: else
3907: xmlFree(buf);
3908: }
1.1.1.3 misho 3909:
1.1 misho 3910: return(ret);
3911: }
3912:
3913: /**
3914: * xmlParseAttValueComplex:
3915: * @ctxt: an XML parser context
3916: * @len: the resulting attribute len
3917: * @normalize: wether to apply the inner normalization
3918: *
3919: * parse a value for an attribute, this is the fallback function
3920: * of xmlParseAttValue() when the attribute parsing requires handling
3921: * of non-ASCII characters, or normalization compaction.
3922: *
3923: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3924: */
3925: static xmlChar *
3926: xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3927: xmlChar limit = 0;
3928: xmlChar *buf = NULL;
3929: xmlChar *rep = NULL;
1.1.1.3 misho 3930: size_t len = 0;
3931: size_t buf_size = 0;
1.1 misho 3932: int c, l, in_space = 0;
3933: xmlChar *current = NULL;
3934: xmlEntityPtr ent;
3935:
3936: if (NXT(0) == '"') {
3937: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3938: limit = '"';
3939: NEXT;
3940: } else if (NXT(0) == '\'') {
3941: limit = '\'';
3942: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3943: NEXT;
3944: } else {
3945: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3946: return(NULL);
3947: }
3948:
3949: /*
3950: * allocate a translation buffer.
3951: */
3952: buf_size = XML_PARSER_BUFFER_SIZE;
1.1.1.3 misho 3953: buf = (xmlChar *) xmlMallocAtomic(buf_size);
1.1 misho 3954: if (buf == NULL) goto mem_error;
3955:
3956: /*
3957: * OK loop until we reach one of the ending char or a size limit.
3958: */
3959: c = CUR_CHAR(l);
1.1.1.3 misho 3960: while (((NXT(0) != limit) && /* checked */
3961: (IS_CHAR(c)) && (c != '<')) &&
3962: (ctxt->instate != XML_PARSER_EOF)) {
3963: /*
3964: * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3965: * special option is given
3966: */
3967: if ((len > XML_MAX_TEXT_LENGTH) &&
3968: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3969: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3970: "AttValue length too long\n");
3971: goto mem_error;
3972: }
1.1 misho 3973: if (c == 0) break;
3974: if (c == '&') {
3975: in_space = 0;
3976: if (NXT(1) == '#') {
3977: int val = xmlParseCharRef(ctxt);
3978:
3979: if (val == '&') {
3980: if (ctxt->replaceEntities) {
1.1.1.3 misho 3981: if (len + 10 > buf_size) {
1.1 misho 3982: growBuffer(buf, 10);
3983: }
3984: buf[len++] = '&';
3985: } else {
3986: /*
3987: * The reparsing will be done in xmlStringGetNodeList()
3988: * called by the attribute() function in SAX.c
3989: */
1.1.1.3 misho 3990: if (len + 10 > buf_size) {
1.1 misho 3991: growBuffer(buf, 10);
3992: }
3993: buf[len++] = '&';
3994: buf[len++] = '#';
3995: buf[len++] = '3';
3996: buf[len++] = '8';
3997: buf[len++] = ';';
3998: }
3999: } else if (val != 0) {
1.1.1.3 misho 4000: if (len + 10 > buf_size) {
1.1 misho 4001: growBuffer(buf, 10);
4002: }
4003: len += xmlCopyChar(0, &buf[len], val);
4004: }
4005: } else {
4006: ent = xmlParseEntityRef(ctxt);
4007: ctxt->nbentities++;
4008: if (ent != NULL)
4009: ctxt->nbentities += ent->owner;
4010: if ((ent != NULL) &&
4011: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.1.1.3 misho 4012: if (len + 10 > buf_size) {
1.1 misho 4013: growBuffer(buf, 10);
4014: }
4015: if ((ctxt->replaceEntities == 0) &&
4016: (ent->content[0] == '&')) {
4017: buf[len++] = '&';
4018: buf[len++] = '#';
4019: buf[len++] = '3';
4020: buf[len++] = '8';
4021: buf[len++] = ';';
4022: } else {
4023: buf[len++] = ent->content[0];
4024: }
1.1.1.3 misho 4025: } else if ((ent != NULL) &&
1.1 misho 4026: (ctxt->replaceEntities != 0)) {
4027: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4028: rep = xmlStringDecodeEntities(ctxt, ent->content,
4029: XML_SUBSTITUTE_REF,
4030: 0, 0, 0);
4031: if (rep != NULL) {
4032: current = rep;
4033: while (*current != 0) { /* non input consuming */
4034: if ((*current == 0xD) || (*current == 0xA) ||
4035: (*current == 0x9)) {
4036: buf[len++] = 0x20;
4037: current++;
4038: } else
4039: buf[len++] = *current++;
1.1.1.3 misho 4040: if (len + 10 > buf_size) {
1.1 misho 4041: growBuffer(buf, 10);
4042: }
4043: }
4044: xmlFree(rep);
4045: rep = NULL;
4046: }
4047: } else {
1.1.1.3 misho 4048: if (len + 10 > buf_size) {
1.1 misho 4049: growBuffer(buf, 10);
4050: }
4051: if (ent->content != NULL)
4052: buf[len++] = ent->content[0];
4053: }
4054: } else if (ent != NULL) {
4055: int i = xmlStrlen(ent->name);
4056: const xmlChar *cur = ent->name;
4057:
4058: /*
4059: * This may look absurd but is needed to detect
4060: * entities problems
4061: */
4062: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.1.1.3 misho 4063: (ent->content != NULL) && (ent->checked == 0)) {
4064: unsigned long oldnbent = ctxt->nbentities;
4065:
1.1 misho 4066: rep = xmlStringDecodeEntities(ctxt, ent->content,
4067: XML_SUBSTITUTE_REF, 0, 0, 0);
1.1.1.3 misho 4068:
4069: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
1.1 misho 4070: if (rep != NULL) {
1.1.1.3 misho 4071: if (xmlStrchr(rep, '<'))
4072: ent->checked |= 1;
1.1 misho 4073: xmlFree(rep);
4074: rep = NULL;
4075: }
4076: }
4077:
4078: /*
4079: * Just output the reference
4080: */
4081: buf[len++] = '&';
1.1.1.3 misho 4082: while (len + i + 10 > buf_size) {
1.1 misho 4083: growBuffer(buf, i + 10);
4084: }
4085: for (;i > 0;i--)
4086: buf[len++] = *cur++;
4087: buf[len++] = ';';
4088: }
4089: }
4090: } else {
4091: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4092: if ((len != 0) || (!normalize)) {
4093: if ((!normalize) || (!in_space)) {
4094: COPY_BUF(l,buf,len,0x20);
1.1.1.3 misho 4095: while (len + 10 > buf_size) {
1.1 misho 4096: growBuffer(buf, 10);
4097: }
4098: }
4099: in_space = 1;
4100: }
4101: } else {
4102: in_space = 0;
4103: COPY_BUF(l,buf,len,c);
1.1.1.3 misho 4104: if (len + 10 > buf_size) {
1.1 misho 4105: growBuffer(buf, 10);
4106: }
4107: }
4108: NEXTL(l);
4109: }
4110: GROW;
4111: c = CUR_CHAR(l);
4112: }
1.1.1.3 misho 4113: if (ctxt->instate == XML_PARSER_EOF)
4114: goto error;
4115:
1.1 misho 4116: if ((in_space) && (normalize)) {
1.1.1.3 misho 4117: while ((len > 0) && (buf[len - 1] == 0x20)) len--;
1.1 misho 4118: }
4119: buf[len] = 0;
4120: if (RAW == '<') {
4121: xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4122: } else if (RAW != limit) {
4123: if ((c != 0) && (!IS_CHAR(c))) {
4124: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4125: "invalid character in attribute value\n");
4126: } else {
4127: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4128: "AttValue: ' expected\n");
4129: }
4130: } else
4131: NEXT;
1.1.1.3 misho 4132:
4133: /*
4134: * There we potentially risk an overflow, don't allow attribute value of
4135: * length more than INT_MAX it is a very reasonnable assumption !
4136: */
4137: if (len >= INT_MAX) {
4138: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139: "AttValue length too long\n");
4140: goto mem_error;
4141: }
4142:
4143: if (attlen != NULL) *attlen = (int) len;
1.1 misho 4144: return(buf);
4145:
4146: mem_error:
4147: xmlErrMemory(ctxt, NULL);
1.1.1.3 misho 4148: error:
1.1 misho 4149: if (buf != NULL)
4150: xmlFree(buf);
4151: if (rep != NULL)
4152: xmlFree(rep);
4153: return(NULL);
4154: }
4155:
4156: /**
4157: * xmlParseAttValue:
4158: * @ctxt: an XML parser context
4159: *
4160: * parse a value for an attribute
4161: * Note: the parser won't do substitution of entities here, this
4162: * will be handled later in xmlStringGetNodeList
4163: *
4164: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4165: * "'" ([^<&'] | Reference)* "'"
4166: *
4167: * 3.3.3 Attribute-Value Normalization:
4168: * Before the value of an attribute is passed to the application or
1.1.1.3 misho 4169: * checked for validity, the XML processor must normalize it as follows:
1.1 misho 4170: * - a character reference is processed by appending the referenced
4171: * character to the attribute value
4172: * - an entity reference is processed by recursively processing the
1.1.1.3 misho 4173: * replacement text of the entity
1.1 misho 4174: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4175: * appending #x20 to the normalized value, except that only a single
4176: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1.1.1.3 misho 4177: * parsed entity or the literal entity value of an internal parsed entity
4178: * - other characters are processed by appending them to the normalized value
1.1 misho 4179: * If the declared value is not CDATA, then the XML processor must further
4180: * process the normalized attribute value by discarding any leading and
4181: * trailing space (#x20) characters, and by replacing sequences of space
1.1.1.3 misho 4182: * (#x20) characters by a single space (#x20) character.
1.1 misho 4183: * All attributes for which no declaration has been read should be treated
4184: * by a non-validating parser as if declared CDATA.
4185: *
4186: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4187: */
4188:
4189:
4190: xmlChar *
4191: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4192: if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4193: return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4194: }
4195:
4196: /**
4197: * xmlParseSystemLiteral:
4198: * @ctxt: an XML parser context
1.1.1.3 misho 4199: *
1.1 misho 4200: * parse an XML Literal
4201: *
4202: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4203: *
4204: * Returns the SystemLiteral parsed or NULL
4205: */
4206:
4207: xmlChar *
4208: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4209: xmlChar *buf = NULL;
4210: int len = 0;
4211: int size = XML_PARSER_BUFFER_SIZE;
4212: int cur, l;
4213: xmlChar stop;
4214: int state = ctxt->instate;
4215: int count = 0;
4216:
4217: SHRINK;
4218: if (RAW == '"') {
4219: NEXT;
4220: stop = '"';
4221: } else if (RAW == '\'') {
4222: NEXT;
4223: stop = '\'';
4224: } else {
4225: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4226: return(NULL);
4227: }
1.1.1.3 misho 4228:
1.1 misho 4229: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4230: if (buf == NULL) {
4231: xmlErrMemory(ctxt, NULL);
4232: return(NULL);
4233: }
4234: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4235: cur = CUR_CHAR(l);
4236: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4237: if (len + 5 >= size) {
4238: xmlChar *tmp;
4239:
1.1.1.3 misho 4240: if ((size > XML_MAX_NAME_LENGTH) &&
4241: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4242: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4243: xmlFree(buf);
4244: ctxt->instate = (xmlParserInputState) state;
4245: return(NULL);
4246: }
1.1 misho 4247: size *= 2;
4248: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4249: if (tmp == NULL) {
4250: xmlFree(buf);
4251: xmlErrMemory(ctxt, NULL);
4252: ctxt->instate = (xmlParserInputState) state;
4253: return(NULL);
4254: }
4255: buf = tmp;
4256: }
4257: count++;
4258: if (count > 50) {
4259: GROW;
4260: count = 0;
1.1.1.3 misho 4261: if (ctxt->instate == XML_PARSER_EOF) {
4262: xmlFree(buf);
4263: return(NULL);
4264: }
1.1 misho 4265: }
4266: COPY_BUF(l,buf,len,cur);
4267: NEXTL(l);
4268: cur = CUR_CHAR(l);
4269: if (cur == 0) {
4270: GROW;
4271: SHRINK;
4272: cur = CUR_CHAR(l);
4273: }
4274: }
4275: buf[len] = 0;
4276: ctxt->instate = (xmlParserInputState) state;
4277: if (!IS_CHAR(cur)) {
4278: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4279: } else {
4280: NEXT;
4281: }
4282: return(buf);
4283: }
4284:
4285: /**
4286: * xmlParsePubidLiteral:
4287: * @ctxt: an XML parser context
4288: *
4289: * parse an XML public literal
4290: *
4291: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4292: *
4293: * Returns the PubidLiteral parsed or NULL.
4294: */
4295:
4296: xmlChar *
4297: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4298: xmlChar *buf = NULL;
4299: int len = 0;
4300: int size = XML_PARSER_BUFFER_SIZE;
4301: xmlChar cur;
4302: xmlChar stop;
4303: int count = 0;
4304: xmlParserInputState oldstate = ctxt->instate;
4305:
4306: SHRINK;
4307: if (RAW == '"') {
4308: NEXT;
4309: stop = '"';
4310: } else if (RAW == '\'') {
4311: NEXT;
4312: stop = '\'';
4313: } else {
4314: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4315: return(NULL);
4316: }
4317: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4318: if (buf == NULL) {
4319: xmlErrMemory(ctxt, NULL);
4320: return(NULL);
4321: }
4322: ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4323: cur = CUR;
4324: while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4325: if (len + 1 >= size) {
4326: xmlChar *tmp;
4327:
1.1.1.3 misho 4328: if ((size > XML_MAX_NAME_LENGTH) &&
4329: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4330: xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4331: xmlFree(buf);
4332: return(NULL);
4333: }
1.1 misho 4334: size *= 2;
4335: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4336: if (tmp == NULL) {
4337: xmlErrMemory(ctxt, NULL);
4338: xmlFree(buf);
4339: return(NULL);
4340: }
4341: buf = tmp;
4342: }
4343: buf[len++] = cur;
4344: count++;
4345: if (count > 50) {
4346: GROW;
4347: count = 0;
1.1.1.3 misho 4348: if (ctxt->instate == XML_PARSER_EOF) {
4349: xmlFree(buf);
4350: return(NULL);
4351: }
1.1 misho 4352: }
4353: NEXT;
4354: cur = CUR;
4355: if (cur == 0) {
4356: GROW;
4357: SHRINK;
4358: cur = CUR;
4359: }
4360: }
4361: buf[len] = 0;
4362: if (cur != stop) {
4363: xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4364: } else {
4365: NEXT;
4366: }
4367: ctxt->instate = oldstate;
4368: return(buf);
4369: }
4370:
4371: static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4372:
4373: /*
4374: * used for the test in the inner loop of the char data testing
4375: */
4376: static const unsigned char test_char_data[256] = {
4377: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378: 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4379: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4380: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381: 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4382: 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4383: 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4384: 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4385: 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4386: 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4387: 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4388: 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4389: 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4390: 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4391: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4392: 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4393: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4394: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408: 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4409: };
4410:
4411: /**
4412: * xmlParseCharData:
4413: * @ctxt: an XML parser context
4414: * @cdata: int indicating whether we are within a CDATA section
4415: *
4416: * parse a CharData section.
4417: * if we are within a CDATA section ']]>' marks an end of section.
4418: *
4419: * The right angle bracket (>) may be represented using the string ">",
4420: * and must, for compatibility, be escaped using ">" or a character
4421: * reference when it appears in the string "]]>" in content, when that
1.1.1.3 misho 4422: * string is not marking the end of a CDATA section.
1.1 misho 4423: *
4424: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4425: */
4426:
4427: void
4428: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4429: const xmlChar *in;
4430: int nbchar = 0;
4431: int line = ctxt->input->line;
4432: int col = ctxt->input->col;
4433: int ccol;
4434:
4435: SHRINK;
4436: GROW;
4437: /*
4438: * Accelerated common case where input don't need to be
4439: * modified before passing it to the handler.
4440: */
4441: if (!cdata) {
4442: in = ctxt->input->cur;
4443: do {
4444: get_more_space:
4445: while (*in == 0x20) { in++; ctxt->input->col++; }
4446: if (*in == 0xA) {
4447: do {
4448: ctxt->input->line++; ctxt->input->col = 1;
4449: in++;
4450: } while (*in == 0xA);
4451: goto get_more_space;
4452: }
4453: if (*in == '<') {
4454: nbchar = in - ctxt->input->cur;
4455: if (nbchar > 0) {
4456: const xmlChar *tmp = ctxt->input->cur;
4457: ctxt->input->cur = in;
4458:
4459: if ((ctxt->sax != NULL) &&
4460: (ctxt->sax->ignorableWhitespace !=
4461: ctxt->sax->characters)) {
4462: if (areBlanks(ctxt, tmp, nbchar, 1)) {
4463: if (ctxt->sax->ignorableWhitespace != NULL)
4464: ctxt->sax->ignorableWhitespace(ctxt->userData,
4465: tmp, nbchar);
4466: } else {
4467: if (ctxt->sax->characters != NULL)
4468: ctxt->sax->characters(ctxt->userData,
4469: tmp, nbchar);
4470: if (*ctxt->space == -1)
4471: *ctxt->space = -2;
4472: }
4473: } else if ((ctxt->sax != NULL) &&
4474: (ctxt->sax->characters != NULL)) {
4475: ctxt->sax->characters(ctxt->userData,
4476: tmp, nbchar);
4477: }
4478: }
4479: return;
4480: }
4481:
4482: get_more:
4483: ccol = ctxt->input->col;
4484: while (test_char_data[*in]) {
4485: in++;
4486: ccol++;
4487: }
4488: ctxt->input->col = ccol;
4489: if (*in == 0xA) {
4490: do {
4491: ctxt->input->line++; ctxt->input->col = 1;
4492: in++;
4493: } while (*in == 0xA);
4494: goto get_more;
4495: }
4496: if (*in == ']') {
4497: if ((in[1] == ']') && (in[2] == '>')) {
4498: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4499: ctxt->input->cur = in;
4500: return;
4501: }
4502: in++;
4503: ctxt->input->col++;
4504: goto get_more;
4505: }
4506: nbchar = in - ctxt->input->cur;
4507: if (nbchar > 0) {
4508: if ((ctxt->sax != NULL) &&
4509: (ctxt->sax->ignorableWhitespace !=
4510: ctxt->sax->characters) &&
4511: (IS_BLANK_CH(*ctxt->input->cur))) {
4512: const xmlChar *tmp = ctxt->input->cur;
4513: ctxt->input->cur = in;
4514:
4515: if (areBlanks(ctxt, tmp, nbchar, 0)) {
4516: if (ctxt->sax->ignorableWhitespace != NULL)
4517: ctxt->sax->ignorableWhitespace(ctxt->userData,
4518: tmp, nbchar);
4519: } else {
4520: if (ctxt->sax->characters != NULL)
4521: ctxt->sax->characters(ctxt->userData,
4522: tmp, nbchar);
4523: if (*ctxt->space == -1)
4524: *ctxt->space = -2;
4525: }
4526: line = ctxt->input->line;
4527: col = ctxt->input->col;
4528: } else if (ctxt->sax != NULL) {
4529: if (ctxt->sax->characters != NULL)
4530: ctxt->sax->characters(ctxt->userData,
4531: ctxt->input->cur, nbchar);
4532: line = ctxt->input->line;
4533: col = ctxt->input->col;
4534: }
4535: /* something really bad happened in the SAX callback */
4536: if (ctxt->instate != XML_PARSER_CONTENT)
4537: return;
4538: }
4539: ctxt->input->cur = in;
4540: if (*in == 0xD) {
4541: in++;
4542: if (*in == 0xA) {
4543: ctxt->input->cur = in;
4544: in++;
4545: ctxt->input->line++; ctxt->input->col = 1;
4546: continue; /* while */
4547: }
4548: in--;
4549: }
4550: if (*in == '<') {
4551: return;
4552: }
4553: if (*in == '&') {
4554: return;
4555: }
4556: SHRINK;
4557: GROW;
1.1.1.3 misho 4558: if (ctxt->instate == XML_PARSER_EOF)
4559: return;
1.1 misho 4560: in = ctxt->input->cur;
4561: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4562: nbchar = 0;
4563: }
4564: ctxt->input->line = line;
4565: ctxt->input->col = col;
4566: xmlParseCharDataComplex(ctxt, cdata);
4567: }
4568:
4569: /**
4570: * xmlParseCharDataComplex:
4571: * @ctxt: an XML parser context
4572: * @cdata: int indicating whether we are within a CDATA section
4573: *
4574: * parse a CharData section.this is the fallback function
4575: * of xmlParseCharData() when the parsing requires handling
4576: * of non-ASCII characters.
4577: */
4578: static void
4579: xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4580: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4581: int nbchar = 0;
4582: int cur, l;
4583: int count = 0;
4584:
4585: SHRINK;
4586: GROW;
4587: cur = CUR_CHAR(l);
4588: while ((cur != '<') && /* checked */
1.1.1.3 misho 4589: (cur != '&') &&
1.1 misho 4590: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4591: if ((cur == ']') && (NXT(1) == ']') &&
4592: (NXT(2) == '>')) {
4593: if (cdata) break;
4594: else {
4595: xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4596: }
4597: }
4598: COPY_BUF(l,buf,nbchar,cur);
4599: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600: buf[nbchar] = 0;
4601:
4602: /*
4603: * OK the segment is to be consumed as chars.
4604: */
4605: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606: if (areBlanks(ctxt, buf, nbchar, 0)) {
4607: if (ctxt->sax->ignorableWhitespace != NULL)
4608: ctxt->sax->ignorableWhitespace(ctxt->userData,
4609: buf, nbchar);
4610: } else {
4611: if (ctxt->sax->characters != NULL)
4612: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613: if ((ctxt->sax->characters !=
4614: ctxt->sax->ignorableWhitespace) &&
4615: (*ctxt->space == -1))
4616: *ctxt->space = -2;
4617: }
4618: }
4619: nbchar = 0;
4620: /* something really bad happened in the SAX callback */
4621: if (ctxt->instate != XML_PARSER_CONTENT)
4622: return;
4623: }
4624: count++;
4625: if (count > 50) {
4626: GROW;
4627: count = 0;
1.1.1.3 misho 4628: if (ctxt->instate == XML_PARSER_EOF)
4629: return;
1.1 misho 4630: }
4631: NEXTL(l);
4632: cur = CUR_CHAR(l);
4633: }
4634: if (nbchar != 0) {
4635: buf[nbchar] = 0;
4636: /*
4637: * OK the segment is to be consumed as chars.
4638: */
4639: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4640: if (areBlanks(ctxt, buf, nbchar, 0)) {
4641: if (ctxt->sax->ignorableWhitespace != NULL)
4642: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4643: } else {
4644: if (ctxt->sax->characters != NULL)
4645: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4646: if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4647: (*ctxt->space == -1))
4648: *ctxt->space = -2;
4649: }
4650: }
4651: }
4652: if ((cur != 0) && (!IS_CHAR(cur))) {
4653: /* Generate the error and skip the offending character */
4654: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4655: "PCDATA invalid Char value %d\n",
4656: cur);
4657: NEXTL(l);
4658: }
4659: }
4660:
4661: /**
4662: * xmlParseExternalID:
4663: * @ctxt: an XML parser context
4664: * @publicID: a xmlChar** receiving PubidLiteral
4665: * @strict: indicate whether we should restrict parsing to only
4666: * production [75], see NOTE below
4667: *
4668: * Parse an External ID or a Public ID
4669: *
4670: * NOTE: Productions [75] and [83] interact badly since [75] can generate
4671: * 'PUBLIC' S PubidLiteral S SystemLiteral
4672: *
4673: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4674: * | 'PUBLIC' S PubidLiteral S SystemLiteral
4675: *
4676: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4677: *
4678: * Returns the function returns SystemLiteral and in the second
4679: * case publicID receives PubidLiteral, is strict is off
4680: * it is possible to return NULL and have publicID set.
4681: */
4682:
4683: xmlChar *
4684: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4685: xmlChar *URI = NULL;
4686:
4687: SHRINK;
4688:
4689: *publicID = NULL;
4690: if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4691: SKIP(6);
4692: if (!IS_BLANK_CH(CUR)) {
4693: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4694: "Space required after 'SYSTEM'\n");
4695: }
4696: SKIP_BLANKS;
4697: URI = xmlParseSystemLiteral(ctxt);
4698: if (URI == NULL) {
4699: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4700: }
4701: } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4702: SKIP(6);
4703: if (!IS_BLANK_CH(CUR)) {
4704: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4705: "Space required after 'PUBLIC'\n");
4706: }
4707: SKIP_BLANKS;
4708: *publicID = xmlParsePubidLiteral(ctxt);
4709: if (*publicID == NULL) {
4710: xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4711: }
4712: if (strict) {
4713: /*
4714: * We don't handle [83] so "S SystemLiteral" is required.
4715: */
4716: if (!IS_BLANK_CH(CUR)) {
4717: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4718: "Space required after the Public Identifier\n");
4719: }
4720: } else {
4721: /*
1.1.1.3 misho 4722: * We handle [83] so we return immediately, if
1.1 misho 4723: * "S SystemLiteral" is not detected. From a purely parsing
4724: * point of view that's a nice mess.
4725: */
4726: const xmlChar *ptr;
4727: GROW;
4728:
4729: ptr = CUR_PTR;
4730: if (!IS_BLANK_CH(*ptr)) return(NULL);
1.1.1.3 misho 4731:
1.1 misho 4732: while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4733: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4734: }
4735: SKIP_BLANKS;
4736: URI = xmlParseSystemLiteral(ctxt);
4737: if (URI == NULL) {
4738: xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4739: }
4740: }
4741: return(URI);
4742: }
4743:
4744: /**
4745: * xmlParseCommentComplex:
4746: * @ctxt: an XML parser context
4747: * @buf: the already parsed part of the buffer
4748: * @len: number of bytes filles in the buffer
4749: * @size: allocated size of the buffer
4750: *
4751: * Skip an XML (SGML) comment <!-- .... -->
4752: * The spec says that "For compatibility, the string "--" (double-hyphen)
4753: * must not occur within comments. "
4754: * This is the slow routine in case the accelerator for ascii didn't work
4755: *
4756: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4757: */
4758: static void
1.1.1.3 misho 4759: xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4760: size_t len, size_t size) {
1.1 misho 4761: int q, ql;
4762: int r, rl;
4763: int cur, l;
1.1.1.3 misho 4764: size_t count = 0;
1.1 misho 4765: int inputid;
4766:
4767: inputid = ctxt->input->id;
4768:
4769: if (buf == NULL) {
4770: len = 0;
4771: size = XML_PARSER_BUFFER_SIZE;
4772: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4773: if (buf == NULL) {
4774: xmlErrMemory(ctxt, NULL);
4775: return;
4776: }
4777: }
4778: GROW; /* Assure there's enough input data */
4779: q = CUR_CHAR(ql);
4780: if (q == 0)
4781: goto not_terminated;
4782: if (!IS_CHAR(q)) {
4783: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784: "xmlParseComment: invalid xmlChar value %d\n",
4785: q);
4786: xmlFree (buf);
4787: return;
4788: }
4789: NEXTL(ql);
4790: r = CUR_CHAR(rl);
4791: if (r == 0)
4792: goto not_terminated;
4793: if (!IS_CHAR(r)) {
4794: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4795: "xmlParseComment: invalid xmlChar value %d\n",
4796: q);
4797: xmlFree (buf);
4798: return;
4799: }
4800: NEXTL(rl);
4801: cur = CUR_CHAR(l);
4802: if (cur == 0)
4803: goto not_terminated;
4804: while (IS_CHAR(cur) && /* checked */
4805: ((cur != '>') ||
4806: (r != '-') || (q != '-'))) {
4807: if ((r == '-') && (q == '-')) {
4808: xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4809: }
1.1.1.3 misho 4810: if ((len > XML_MAX_TEXT_LENGTH) &&
4811: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4812: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4813: "Comment too big found", NULL);
4814: xmlFree (buf);
4815: return;
4816: }
1.1 misho 4817: if (len + 5 >= size) {
4818: xmlChar *new_buf;
1.1.1.3 misho 4819: size_t new_size;
4820:
4821: new_size = size * 2;
4822: new_buf = (xmlChar *) xmlRealloc(buf, new_size);
1.1 misho 4823: if (new_buf == NULL) {
4824: xmlFree (buf);
4825: xmlErrMemory(ctxt, NULL);
4826: return;
4827: }
4828: buf = new_buf;
1.1.1.3 misho 4829: size = new_size;
1.1 misho 4830: }
4831: COPY_BUF(ql,buf,len,q);
4832: q = r;
4833: ql = rl;
4834: r = cur;
4835: rl = l;
4836:
4837: count++;
4838: if (count > 50) {
4839: GROW;
4840: count = 0;
1.1.1.3 misho 4841: if (ctxt->instate == XML_PARSER_EOF) {
4842: xmlFree(buf);
4843: return;
4844: }
1.1 misho 4845: }
4846: NEXTL(l);
4847: cur = CUR_CHAR(l);
4848: if (cur == 0) {
4849: SHRINK;
4850: GROW;
4851: cur = CUR_CHAR(l);
4852: }
4853: }
4854: buf[len] = 0;
4855: if (cur == 0) {
4856: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4857: "Comment not terminated \n<!--%.50s\n", buf);
4858: } else if (!IS_CHAR(cur)) {
4859: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4860: "xmlParseComment: invalid xmlChar value %d\n",
4861: cur);
4862: } else {
4863: if (inputid != ctxt->input->id) {
4864: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4865: "Comment doesn't start and stop in the same entity\n");
4866: }
4867: NEXT;
4868: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4869: (!ctxt->disableSAX))
4870: ctxt->sax->comment(ctxt->userData, buf);
4871: }
4872: xmlFree(buf);
4873: return;
4874: not_terminated:
4875: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4876: "Comment not terminated\n", NULL);
4877: xmlFree(buf);
4878: return;
4879: }
4880:
4881: /**
4882: * xmlParseComment:
4883: * @ctxt: an XML parser context
4884: *
4885: * Skip an XML (SGML) comment <!-- .... -->
4886: * The spec says that "For compatibility, the string "--" (double-hyphen)
4887: * must not occur within comments. "
4888: *
4889: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4890: */
4891: void
4892: xmlParseComment(xmlParserCtxtPtr ctxt) {
4893: xmlChar *buf = NULL;
1.1.1.3 misho 4894: size_t size = XML_PARSER_BUFFER_SIZE;
4895: size_t len = 0;
1.1 misho 4896: xmlParserInputState state;
4897: const xmlChar *in;
1.1.1.3 misho 4898: size_t nbchar = 0;
4899: int ccol;
1.1 misho 4900: int inputid;
4901:
4902: /*
4903: * Check that there is a comment right here.
4904: */
4905: if ((RAW != '<') || (NXT(1) != '!') ||
4906: (NXT(2) != '-') || (NXT(3) != '-')) return;
4907: state = ctxt->instate;
4908: ctxt->instate = XML_PARSER_COMMENT;
4909: inputid = ctxt->input->id;
4910: SKIP(4);
4911: SHRINK;
4912: GROW;
4913:
4914: /*
4915: * Accelerated common case where input don't need to be
4916: * modified before passing it to the handler.
4917: */
4918: in = ctxt->input->cur;
4919: do {
4920: if (*in == 0xA) {
4921: do {
4922: ctxt->input->line++; ctxt->input->col = 1;
4923: in++;
4924: } while (*in == 0xA);
4925: }
4926: get_more:
4927: ccol = ctxt->input->col;
4928: while (((*in > '-') && (*in <= 0x7F)) ||
4929: ((*in >= 0x20) && (*in < '-')) ||
4930: (*in == 0x09)) {
4931: in++;
4932: ccol++;
4933: }
4934: ctxt->input->col = ccol;
4935: if (*in == 0xA) {
4936: do {
4937: ctxt->input->line++; ctxt->input->col = 1;
4938: in++;
4939: } while (*in == 0xA);
4940: goto get_more;
4941: }
4942: nbchar = in - ctxt->input->cur;
4943: /*
4944: * save current set of data
4945: */
4946: if (nbchar > 0) {
4947: if ((ctxt->sax != NULL) &&
4948: (ctxt->sax->comment != NULL)) {
4949: if (buf == NULL) {
4950: if ((*in == '-') && (in[1] == '-'))
4951: size = nbchar + 1;
4952: else
4953: size = XML_PARSER_BUFFER_SIZE + nbchar;
4954: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4955: if (buf == NULL) {
4956: xmlErrMemory(ctxt, NULL);
4957: ctxt->instate = state;
4958: return;
4959: }
4960: len = 0;
4961: } else if (len + nbchar + 1 >= size) {
4962: xmlChar *new_buf;
4963: size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4964: new_buf = (xmlChar *) xmlRealloc(buf,
4965: size * sizeof(xmlChar));
4966: if (new_buf == NULL) {
4967: xmlFree (buf);
4968: xmlErrMemory(ctxt, NULL);
4969: ctxt->instate = state;
4970: return;
4971: }
4972: buf = new_buf;
4973: }
4974: memcpy(&buf[len], ctxt->input->cur, nbchar);
4975: len += nbchar;
4976: buf[len] = 0;
4977: }
4978: }
1.1.1.3 misho 4979: if ((len > XML_MAX_TEXT_LENGTH) &&
4980: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4981: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4982: "Comment too big found", NULL);
4983: xmlFree (buf);
4984: return;
4985: }
1.1 misho 4986: ctxt->input->cur = in;
4987: if (*in == 0xA) {
4988: in++;
4989: ctxt->input->line++; ctxt->input->col = 1;
4990: }
4991: if (*in == 0xD) {
4992: in++;
4993: if (*in == 0xA) {
4994: ctxt->input->cur = in;
4995: in++;
4996: ctxt->input->line++; ctxt->input->col = 1;
4997: continue; /* while */
4998: }
4999: in--;
5000: }
5001: SHRINK;
5002: GROW;
1.1.1.3 misho 5003: if (ctxt->instate == XML_PARSER_EOF) {
5004: xmlFree(buf);
5005: return;
5006: }
1.1 misho 5007: in = ctxt->input->cur;
5008: if (*in == '-') {
5009: if (in[1] == '-') {
5010: if (in[2] == '>') {
5011: if (ctxt->input->id != inputid) {
5012: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5013: "comment doesn't start and stop in the same entity\n");
5014: }
5015: SKIP(3);
5016: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5017: (!ctxt->disableSAX)) {
5018: if (buf != NULL)
5019: ctxt->sax->comment(ctxt->userData, buf);
5020: else
5021: ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5022: }
5023: if (buf != NULL)
5024: xmlFree(buf);
1.1.1.3 misho 5025: if (ctxt->instate != XML_PARSER_EOF)
5026: ctxt->instate = state;
1.1 misho 5027: return;
5028: }
1.1.1.2 misho 5029: if (buf != NULL) {
5030: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5031: "Double hyphen within comment: "
5032: "<!--%.50s\n",
1.1 misho 5033: buf);
1.1.1.2 misho 5034: } else
5035: xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5036: "Double hyphen within comment\n", NULL);
1.1 misho 5037: in++;
5038: ctxt->input->col++;
5039: }
5040: in++;
5041: ctxt->input->col++;
5042: goto get_more;
5043: }
5044: } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5045: xmlParseCommentComplex(ctxt, buf, len, size);
5046: ctxt->instate = state;
5047: return;
5048: }
5049:
5050:
5051: /**
5052: * xmlParsePITarget:
5053: * @ctxt: an XML parser context
1.1.1.3 misho 5054: *
1.1 misho 5055: * parse the name of a PI
5056: *
5057: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5058: *
5059: * Returns the PITarget name or NULL
5060: */
5061:
5062: const xmlChar *
5063: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5064: const xmlChar *name;
5065:
5066: name = xmlParseName(ctxt);
5067: if ((name != NULL) &&
5068: ((name[0] == 'x') || (name[0] == 'X')) &&
5069: ((name[1] == 'm') || (name[1] == 'M')) &&
5070: ((name[2] == 'l') || (name[2] == 'L'))) {
5071: int i;
5072: if ((name[0] == 'x') && (name[1] == 'm') &&
5073: (name[2] == 'l') && (name[3] == 0)) {
5074: xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5075: "XML declaration allowed only at the start of the document\n");
5076: return(name);
5077: } else if (name[3] == 0) {
5078: xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5079: return(name);
5080: }
5081: for (i = 0;;i++) {
5082: if (xmlW3CPIs[i] == NULL) break;
5083: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5084: return(name);
5085: }
5086: xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5087: "xmlParsePITarget: invalid name prefix 'xml'\n",
5088: NULL, NULL);
5089: }
5090: if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
1.1.1.3 misho 5091: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5092: "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5093: }
5094: return(name);
5095: }
5096:
5097: #ifdef LIBXML_CATALOG_ENABLED
5098: /**
5099: * xmlParseCatalogPI:
5100: * @ctxt: an XML parser context
5101: * @catalog: the PI value string
1.1.1.3 misho 5102: *
1.1 misho 5103: * parse an XML Catalog Processing Instruction.
5104: *
5105: * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5106: *
5107: * Occurs only if allowed by the user and if happening in the Misc
5108: * part of the document before any doctype informations
5109: * This will add the given catalog to the parsing context in order
5110: * to be used if there is a resolution need further down in the document
5111: */
5112:
5113: static void
5114: xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5115: xmlChar *URL = NULL;
5116: const xmlChar *tmp, *base;
5117: xmlChar marker;
5118:
5119: tmp = catalog;
5120: while (IS_BLANK_CH(*tmp)) tmp++;
5121: if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5122: goto error;
5123: tmp += 7;
5124: while (IS_BLANK_CH(*tmp)) tmp++;
5125: if (*tmp != '=') {
5126: return;
5127: }
5128: tmp++;
5129: while (IS_BLANK_CH(*tmp)) tmp++;
5130: marker = *tmp;
5131: if ((marker != '\'') && (marker != '"'))
5132: goto error;
5133: tmp++;
5134: base = tmp;
5135: while ((*tmp != 0) && (*tmp != marker)) tmp++;
5136: if (*tmp == 0)
5137: goto error;
5138: URL = xmlStrndup(base, tmp - base);
5139: tmp++;
5140: while (IS_BLANK_CH(*tmp)) tmp++;
5141: if (*tmp != 0)
5142: goto error;
5143:
5144: if (URL != NULL) {
5145: ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5146: xmlFree(URL);
5147: }
5148: return;
5149:
5150: error:
5151: xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5152: "Catalog PI syntax error: %s\n",
5153: catalog, NULL);
5154: if (URL != NULL)
5155: xmlFree(URL);
5156: }
5157: #endif
5158:
5159: /**
5160: * xmlParsePI:
5161: * @ctxt: an XML parser context
1.1.1.3 misho 5162: *
1.1 misho 5163: * parse an XML Processing Instruction.
5164: *
5165: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5166: *
5167: * The processing is transfered to SAX once parsed.
5168: */
5169:
5170: void
5171: xmlParsePI(xmlParserCtxtPtr ctxt) {
5172: xmlChar *buf = NULL;
1.1.1.3 misho 5173: size_t len = 0;
5174: size_t size = XML_PARSER_BUFFER_SIZE;
1.1 misho 5175: int cur, l;
5176: const xmlChar *target;
5177: xmlParserInputState state;
5178: int count = 0;
5179:
5180: if ((RAW == '<') && (NXT(1) == '?')) {
5181: xmlParserInputPtr input = ctxt->input;
5182: state = ctxt->instate;
5183: ctxt->instate = XML_PARSER_PI;
5184: /*
5185: * this is a Processing Instruction.
5186: */
5187: SKIP(2);
5188: SHRINK;
5189:
5190: /*
5191: * Parse the target name and check for special support like
5192: * namespace.
5193: */
5194: target = xmlParsePITarget(ctxt);
5195: if (target != NULL) {
5196: if ((RAW == '?') && (NXT(1) == '>')) {
5197: if (input != ctxt->input) {
5198: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5199: "PI declaration doesn't start and stop in the same entity\n");
5200: }
5201: SKIP(2);
5202:
5203: /*
5204: * SAX: PI detected.
5205: */
5206: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5207: (ctxt->sax->processingInstruction != NULL))
5208: ctxt->sax->processingInstruction(ctxt->userData,
5209: target, NULL);
1.1.1.2 misho 5210: if (ctxt->instate != XML_PARSER_EOF)
5211: ctxt->instate = state;
1.1 misho 5212: return;
5213: }
5214: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5215: if (buf == NULL) {
5216: xmlErrMemory(ctxt, NULL);
5217: ctxt->instate = state;
5218: return;
5219: }
5220: cur = CUR;
5221: if (!IS_BLANK(cur)) {
5222: xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5223: "ParsePI: PI %s space expected\n", target);
5224: }
5225: SKIP_BLANKS;
5226: cur = CUR_CHAR(l);
5227: while (IS_CHAR(cur) && /* checked */
5228: ((cur != '?') || (NXT(1) != '>'))) {
5229: if (len + 5 >= size) {
5230: xmlChar *tmp;
1.1.1.3 misho 5231: size_t new_size = size * 2;
5232: tmp = (xmlChar *) xmlRealloc(buf, new_size);
1.1 misho 5233: if (tmp == NULL) {
5234: xmlErrMemory(ctxt, NULL);
5235: xmlFree(buf);
5236: ctxt->instate = state;
5237: return;
5238: }
5239: buf = tmp;
1.1.1.3 misho 5240: size = new_size;
1.1 misho 5241: }
5242: count++;
5243: if (count > 50) {
5244: GROW;
1.1.1.3 misho 5245: if (ctxt->instate == XML_PARSER_EOF) {
5246: xmlFree(buf);
5247: return;
5248: }
1.1 misho 5249: count = 0;
1.1.1.3 misho 5250: if ((len > XML_MAX_TEXT_LENGTH) &&
5251: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5252: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5253: "PI %s too big found", target);
5254: xmlFree(buf);
5255: ctxt->instate = state;
5256: return;
5257: }
1.1 misho 5258: }
5259: COPY_BUF(l,buf,len,cur);
5260: NEXTL(l);
5261: cur = CUR_CHAR(l);
5262: if (cur == 0) {
5263: SHRINK;
5264: GROW;
5265: cur = CUR_CHAR(l);
5266: }
5267: }
1.1.1.3 misho 5268: if ((len > XML_MAX_TEXT_LENGTH) &&
5269: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5270: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5271: "PI %s too big found", target);
5272: xmlFree(buf);
5273: ctxt->instate = state;
5274: return;
5275: }
1.1 misho 5276: buf[len] = 0;
5277: if (cur != '?') {
5278: xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5279: "ParsePI: PI %s never end ...\n", target);
5280: } else {
5281: if (input != ctxt->input) {
5282: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5283: "PI declaration doesn't start and stop in the same entity\n");
5284: }
5285: SKIP(2);
5286:
5287: #ifdef LIBXML_CATALOG_ENABLED
5288: if (((state == XML_PARSER_MISC) ||
5289: (state == XML_PARSER_START)) &&
5290: (xmlStrEqual(target, XML_CATALOG_PI))) {
5291: xmlCatalogAllow allow = xmlCatalogGetDefaults();
5292: if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5293: (allow == XML_CATA_ALLOW_ALL))
5294: xmlParseCatalogPI(ctxt, buf);
5295: }
5296: #endif
5297:
5298:
5299: /*
5300: * SAX: PI detected.
5301: */
5302: if ((ctxt->sax) && (!ctxt->disableSAX) &&
5303: (ctxt->sax->processingInstruction != NULL))
5304: ctxt->sax->processingInstruction(ctxt->userData,
5305: target, buf);
5306: }
5307: xmlFree(buf);
5308: } else {
5309: xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5310: }
1.1.1.2 misho 5311: if (ctxt->instate != XML_PARSER_EOF)
5312: ctxt->instate = state;
1.1 misho 5313: }
5314: }
5315:
5316: /**
5317: * xmlParseNotationDecl:
5318: * @ctxt: an XML parser context
5319: *
5320: * parse a notation declaration
5321: *
5322: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5323: *
5324: * Hence there is actually 3 choices:
5325: * 'PUBLIC' S PubidLiteral
5326: * 'PUBLIC' S PubidLiteral S SystemLiteral
5327: * and 'SYSTEM' S SystemLiteral
5328: *
5329: * See the NOTE on xmlParseExternalID().
5330: */
5331:
5332: void
5333: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5334: const xmlChar *name;
5335: xmlChar *Pubid;
5336: xmlChar *Systemid;
1.1.1.3 misho 5337:
1.1 misho 5338: if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5339: xmlParserInputPtr input = ctxt->input;
5340: SHRINK;
5341: SKIP(10);
5342: if (!IS_BLANK_CH(CUR)) {
5343: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5344: "Space required after '<!NOTATION'\n");
5345: return;
5346: }
5347: SKIP_BLANKS;
5348:
5349: name = xmlParseName(ctxt);
5350: if (name == NULL) {
5351: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5352: return;
5353: }
5354: if (!IS_BLANK_CH(CUR)) {
5355: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5356: "Space required after the NOTATION name'\n");
5357: return;
5358: }
5359: if (xmlStrchr(name, ':') != NULL) {
1.1.1.3 misho 5360: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5361: "colon are forbidden from notation names '%s'\n",
5362: name, NULL, NULL);
5363: }
5364: SKIP_BLANKS;
5365:
5366: /*
5367: * Parse the IDs.
5368: */
5369: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5370: SKIP_BLANKS;
5371:
5372: if (RAW == '>') {
5373: if (input != ctxt->input) {
5374: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5375: "Notation declaration doesn't start and stop in the same entity\n");
5376: }
5377: NEXT;
5378: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5379: (ctxt->sax->notationDecl != NULL))
5380: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5381: } else {
5382: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5383: }
5384: if (Systemid != NULL) xmlFree(Systemid);
5385: if (Pubid != NULL) xmlFree(Pubid);
5386: }
5387: }
5388:
5389: /**
5390: * xmlParseEntityDecl:
5391: * @ctxt: an XML parser context
5392: *
5393: * parse <!ENTITY declarations
5394: *
5395: * [70] EntityDecl ::= GEDecl | PEDecl
5396: *
5397: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5398: *
5399: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5400: *
5401: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5402: *
5403: * [74] PEDef ::= EntityValue | ExternalID
5404: *
5405: * [76] NDataDecl ::= S 'NDATA' S Name
5406: *
5407: * [ VC: Notation Declared ]
5408: * The Name must match the declared name of a notation.
5409: */
5410:
5411: void
5412: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5413: const xmlChar *name = NULL;
5414: xmlChar *value = NULL;
5415: xmlChar *URI = NULL, *literal = NULL;
5416: const xmlChar *ndata = NULL;
5417: int isParameter = 0;
5418: xmlChar *orig = NULL;
5419: int skipped;
1.1.1.3 misho 5420:
1.1 misho 5421: /* GROW; done in the caller */
5422: if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5423: xmlParserInputPtr input = ctxt->input;
5424: SHRINK;
5425: SKIP(8);
5426: skipped = SKIP_BLANKS;
5427: if (skipped == 0) {
5428: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5429: "Space required after '<!ENTITY'\n");
5430: }
5431:
5432: if (RAW == '%') {
5433: NEXT;
5434: skipped = SKIP_BLANKS;
5435: if (skipped == 0) {
5436: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437: "Space required after '%'\n");
5438: }
5439: isParameter = 1;
5440: }
5441:
5442: name = xmlParseName(ctxt);
5443: if (name == NULL) {
5444: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5445: "xmlParseEntityDecl: no name\n");
5446: return;
5447: }
5448: if (xmlStrchr(name, ':') != NULL) {
1.1.1.3 misho 5449: xmlNsErr(ctxt, XML_NS_ERR_COLON,
1.1 misho 5450: "colon are forbidden from entities names '%s'\n",
5451: name, NULL, NULL);
5452: }
5453: skipped = SKIP_BLANKS;
5454: if (skipped == 0) {
5455: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456: "Space required after the entity name\n");
5457: }
5458:
5459: ctxt->instate = XML_PARSER_ENTITY_DECL;
5460: /*
5461: * handle the various case of definitions...
5462: */
5463: if (isParameter) {
5464: if ((RAW == '"') || (RAW == '\'')) {
5465: value = xmlParseEntityValue(ctxt, &orig);
5466: if (value) {
5467: if ((ctxt->sax != NULL) &&
5468: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5469: ctxt->sax->entityDecl(ctxt->userData, name,
5470: XML_INTERNAL_PARAMETER_ENTITY,
5471: NULL, NULL, value);
5472: }
5473: } else {
5474: URI = xmlParseExternalID(ctxt, &literal, 1);
5475: if ((URI == NULL) && (literal == NULL)) {
5476: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5477: }
5478: if (URI) {
5479: xmlURIPtr uri;
5480:
5481: uri = xmlParseURI((const char *) URI);
5482: if (uri == NULL) {
5483: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5484: "Invalid URI: %s\n", URI);
5485: /*
5486: * This really ought to be a well formedness error
5487: * but the XML Core WG decided otherwise c.f. issue
5488: * E26 of the XML erratas.
5489: */
5490: } else {
5491: if (uri->fragment != NULL) {
5492: /*
5493: * Okay this is foolish to block those but not
5494: * invalid URIs.
5495: */
5496: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5497: } else {
5498: if ((ctxt->sax != NULL) &&
5499: (!ctxt->disableSAX) &&
5500: (ctxt->sax->entityDecl != NULL))
5501: ctxt->sax->entityDecl(ctxt->userData, name,
5502: XML_EXTERNAL_PARAMETER_ENTITY,
5503: literal, URI, NULL);
5504: }
5505: xmlFreeURI(uri);
5506: }
5507: }
5508: }
5509: } else {
5510: if ((RAW == '"') || (RAW == '\'')) {
5511: value = xmlParseEntityValue(ctxt, &orig);
5512: if ((ctxt->sax != NULL) &&
5513: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5514: ctxt->sax->entityDecl(ctxt->userData, name,
5515: XML_INTERNAL_GENERAL_ENTITY,
5516: NULL, NULL, value);
5517: /*
5518: * For expat compatibility in SAX mode.
5519: */
5520: if ((ctxt->myDoc == NULL) ||
5521: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5522: if (ctxt->myDoc == NULL) {
5523: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5524: if (ctxt->myDoc == NULL) {
5525: xmlErrMemory(ctxt, "New Doc failed");
5526: return;
5527: }
5528: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5529: }
5530: if (ctxt->myDoc->intSubset == NULL)
5531: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5532: BAD_CAST "fake", NULL, NULL);
5533:
5534: xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5535: NULL, NULL, value);
5536: }
5537: } else {
5538: URI = xmlParseExternalID(ctxt, &literal, 1);
5539: if ((URI == NULL) && (literal == NULL)) {
5540: xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541: }
5542: if (URI) {
5543: xmlURIPtr uri;
5544:
5545: uri = xmlParseURI((const char *)URI);
5546: if (uri == NULL) {
5547: xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548: "Invalid URI: %s\n", URI);
5549: /*
5550: * This really ought to be a well formedness error
5551: * but the XML Core WG decided otherwise c.f. issue
5552: * E26 of the XML erratas.
5553: */
5554: } else {
5555: if (uri->fragment != NULL) {
5556: /*
5557: * Okay this is foolish to block those but not
5558: * invalid URIs.
5559: */
5560: xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561: }
5562: xmlFreeURI(uri);
5563: }
5564: }
5565: if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5566: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5567: "Space required before 'NDATA'\n");
5568: }
5569: SKIP_BLANKS;
5570: if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5571: SKIP(5);
5572: if (!IS_BLANK_CH(CUR)) {
5573: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5574: "Space required after 'NDATA'\n");
5575: }
5576: SKIP_BLANKS;
5577: ndata = xmlParseName(ctxt);
5578: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5579: (ctxt->sax->unparsedEntityDecl != NULL))
5580: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5581: literal, URI, ndata);
5582: } else {
5583: if ((ctxt->sax != NULL) &&
5584: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5585: ctxt->sax->entityDecl(ctxt->userData, name,
5586: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5587: literal, URI, NULL);
5588: /*
5589: * For expat compatibility in SAX mode.
5590: * assuming the entity repalcement was asked for
5591: */
5592: if ((ctxt->replaceEntities != 0) &&
5593: ((ctxt->myDoc == NULL) ||
5594: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5595: if (ctxt->myDoc == NULL) {
5596: ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5597: if (ctxt->myDoc == NULL) {
5598: xmlErrMemory(ctxt, "New Doc failed");
5599: return;
5600: }
5601: ctxt->myDoc->properties = XML_DOC_INTERNAL;
5602: }
5603:
5604: if (ctxt->myDoc->intSubset == NULL)
5605: ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5606: BAD_CAST "fake", NULL, NULL);
5607: xmlSAX2EntityDecl(ctxt, name,
5608: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5609: literal, URI, NULL);
5610: }
5611: }
5612: }
5613: }
1.1.1.3 misho 5614: if (ctxt->instate == XML_PARSER_EOF)
5615: return;
1.1 misho 5616: SKIP_BLANKS;
5617: if (RAW != '>') {
5618: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5619: "xmlParseEntityDecl: entity %s not terminated\n", name);
5620: } else {
5621: if (input != ctxt->input) {
5622: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5623: "Entity declaration doesn't start and stop in the same entity\n");
5624: }
5625: NEXT;
5626: }
5627: if (orig != NULL) {
5628: /*
5629: * Ugly mechanism to save the raw entity value.
5630: */
5631: xmlEntityPtr cur = NULL;
5632:
5633: if (isParameter) {
5634: if ((ctxt->sax != NULL) &&
5635: (ctxt->sax->getParameterEntity != NULL))
5636: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5637: } else {
5638: if ((ctxt->sax != NULL) &&
5639: (ctxt->sax->getEntity != NULL))
5640: cur = ctxt->sax->getEntity(ctxt->userData, name);
5641: if ((cur == NULL) && (ctxt->userData==ctxt)) {
5642: cur = xmlSAX2GetEntity(ctxt, name);
5643: }
5644: }
5645: if (cur != NULL) {
5646: if (cur->orig != NULL)
5647: xmlFree(orig);
5648: else
5649: cur->orig = orig;
5650: } else
5651: xmlFree(orig);
5652: }
5653: if (value != NULL) xmlFree(value);
5654: if (URI != NULL) xmlFree(URI);
5655: if (literal != NULL) xmlFree(literal);
5656: }
5657: }
5658:
5659: /**
5660: * xmlParseDefaultDecl:
5661: * @ctxt: an XML parser context
5662: * @value: Receive a possible fixed default value for the attribute
5663: *
5664: * Parse an attribute default declaration
5665: *
5666: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5667: *
5668: * [ VC: Required Attribute ]
5669: * if the default declaration is the keyword #REQUIRED, then the
5670: * attribute must be specified for all elements of the type in the
5671: * attribute-list declaration.
5672: *
5673: * [ VC: Attribute Default Legal ]
5674: * The declared default value must meet the lexical constraints of
5675: * the declared attribute type c.f. xmlValidateAttributeDecl()
5676: *
5677: * [ VC: Fixed Attribute Default ]
5678: * if an attribute has a default value declared with the #FIXED
1.1.1.3 misho 5679: * keyword, instances of that attribute must match the default value.
1.1 misho 5680: *
5681: * [ WFC: No < in Attribute Values ]
5682: * handled in xmlParseAttValue()
5683: *
5684: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
1.1.1.3 misho 5685: * or XML_ATTRIBUTE_FIXED.
1.1 misho 5686: */
5687:
5688: int
5689: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5690: int val;
5691: xmlChar *ret;
5692:
5693: *value = NULL;
5694: if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5695: SKIP(9);
5696: return(XML_ATTRIBUTE_REQUIRED);
5697: }
5698: if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5699: SKIP(8);
5700: return(XML_ATTRIBUTE_IMPLIED);
5701: }
5702: val = XML_ATTRIBUTE_NONE;
5703: if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5704: SKIP(6);
5705: val = XML_ATTRIBUTE_FIXED;
5706: if (!IS_BLANK_CH(CUR)) {
5707: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5708: "Space required after '#FIXED'\n");
5709: }
5710: SKIP_BLANKS;
5711: }
5712: ret = xmlParseAttValue(ctxt);
5713: ctxt->instate = XML_PARSER_DTD;
5714: if (ret == NULL) {
5715: xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5716: "Attribute default value declaration error\n");
5717: } else
5718: *value = ret;
5719: return(val);
5720: }
5721:
5722: /**
5723: * xmlParseNotationType:
5724: * @ctxt: an XML parser context
5725: *
5726: * parse an Notation attribute type.
5727: *
5728: * Note: the leading 'NOTATION' S part has already being parsed...
5729: *
5730: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5731: *
5732: * [ VC: Notation Attributes ]
5733: * Values of this type must match one of the notation names included
1.1.1.3 misho 5734: * in the declaration; all notation names in the declaration must be declared.
1.1 misho 5735: *
5736: * Returns: the notation attribute tree built while parsing
5737: */
5738:
5739: xmlEnumerationPtr
5740: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5741: const xmlChar *name;
5742: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5743:
5744: if (RAW != '(') {
5745: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5746: return(NULL);
5747: }
5748: SHRINK;
5749: do {
5750: NEXT;
5751: SKIP_BLANKS;
5752: name = xmlParseName(ctxt);
5753: if (name == NULL) {
5754: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5755: "Name expected in NOTATION declaration\n");
5756: xmlFreeEnumeration(ret);
5757: return(NULL);
5758: }
5759: tmp = ret;
5760: while (tmp != NULL) {
5761: if (xmlStrEqual(name, tmp->name)) {
5762: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5763: "standalone: attribute notation value token %s duplicated\n",
5764: name, NULL);
5765: if (!xmlDictOwns(ctxt->dict, name))
5766: xmlFree((xmlChar *) name);
5767: break;
5768: }
5769: tmp = tmp->next;
5770: }
5771: if (tmp == NULL) {
5772: cur = xmlCreateEnumeration(name);
5773: if (cur == NULL) {
5774: xmlFreeEnumeration(ret);
5775: return(NULL);
5776: }
5777: if (last == NULL) ret = last = cur;
5778: else {
5779: last->next = cur;
5780: last = cur;
5781: }
5782: }
5783: SKIP_BLANKS;
5784: } while (RAW == '|');
5785: if (RAW != ')') {
5786: xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5787: xmlFreeEnumeration(ret);
5788: return(NULL);
5789: }
5790: NEXT;
5791: return(ret);
5792: }
5793:
5794: /**
5795: * xmlParseEnumerationType:
5796: * @ctxt: an XML parser context
5797: *
5798: * parse an Enumeration attribute type.
5799: *
5800: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5801: *
5802: * [ VC: Enumeration ]
5803: * Values of this type must match one of the Nmtoken tokens in
5804: * the declaration
5805: *
5806: * Returns: the enumeration attribute tree built while parsing
5807: */
5808:
5809: xmlEnumerationPtr
5810: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5811: xmlChar *name;
5812: xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5813:
5814: if (RAW != '(') {
5815: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5816: return(NULL);
5817: }
5818: SHRINK;
5819: do {
5820: NEXT;
5821: SKIP_BLANKS;
5822: name = xmlParseNmtoken(ctxt);
5823: if (name == NULL) {
5824: xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5825: return(ret);
5826: }
5827: tmp = ret;
5828: while (tmp != NULL) {
5829: if (xmlStrEqual(name, tmp->name)) {
5830: xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5831: "standalone: attribute enumeration value token %s duplicated\n",
5832: name, NULL);
5833: if (!xmlDictOwns(ctxt->dict, name))
5834: xmlFree(name);
5835: break;
5836: }
5837: tmp = tmp->next;
5838: }
5839: if (tmp == NULL) {
5840: cur = xmlCreateEnumeration(name);
5841: if (!xmlDictOwns(ctxt->dict, name))
5842: xmlFree(name);
5843: if (cur == NULL) {
5844: xmlFreeEnumeration(ret);
5845: return(NULL);
5846: }
5847: if (last == NULL) ret = last = cur;
5848: else {
5849: last->next = cur;
5850: last = cur;
5851: }
5852: }
5853: SKIP_BLANKS;
5854: } while (RAW == '|');
5855: if (RAW != ')') {
5856: xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5857: return(ret);
5858: }
5859: NEXT;
5860: return(ret);
5861: }
5862:
5863: /**
5864: * xmlParseEnumeratedType:
5865: * @ctxt: an XML parser context
5866: * @tree: the enumeration tree built while parsing
5867: *
5868: * parse an Enumerated attribute type.
5869: *
5870: * [57] EnumeratedType ::= NotationType | Enumeration
5871: *
5872: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5873: *
5874: *
5875: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5876: */
5877:
5878: int
5879: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5880: if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5881: SKIP(8);
5882: if (!IS_BLANK_CH(CUR)) {
5883: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5884: "Space required after 'NOTATION'\n");
5885: return(0);
5886: }
5887: SKIP_BLANKS;
5888: *tree = xmlParseNotationType(ctxt);
5889: if (*tree == NULL) return(0);
5890: return(XML_ATTRIBUTE_NOTATION);
5891: }
5892: *tree = xmlParseEnumerationType(ctxt);
5893: if (*tree == NULL) return(0);
5894: return(XML_ATTRIBUTE_ENUMERATION);
5895: }
5896:
5897: /**
5898: * xmlParseAttributeType:
5899: * @ctxt: an XML parser context
5900: * @tree: the enumeration tree built while parsing
5901: *
5902: * parse the Attribute list def for an element
5903: *
5904: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5905: *
5906: * [55] StringType ::= 'CDATA'
5907: *
5908: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5909: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5910: *
5911: * Validity constraints for attribute values syntax are checked in
5912: * xmlValidateAttributeValue()
5913: *
5914: * [ VC: ID ]
5915: * Values of type ID must match the Name production. A name must not
5916: * appear more than once in an XML document as a value of this type;
5917: * i.e., ID values must uniquely identify the elements which bear them.
5918: *
5919: * [ VC: One ID per Element Type ]
5920: * No element type may have more than one ID attribute specified.
5921: *
5922: * [ VC: ID Attribute Default ]
5923: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5924: *
5925: * [ VC: IDREF ]
5926: * Values of type IDREF must match the Name production, and values
5927: * of type IDREFS must match Names; each IDREF Name must match the value
5928: * of an ID attribute on some element in the XML document; i.e. IDREF
5929: * values must match the value of some ID attribute.
5930: *
5931: * [ VC: Entity Name ]
5932: * Values of type ENTITY must match the Name production, values
5933: * of type ENTITIES must match Names; each Entity Name must match the
1.1.1.3 misho 5934: * name of an unparsed entity declared in the DTD.
1.1 misho 5935: *
5936: * [ VC: Name Token ]
5937: * Values of type NMTOKEN must match the Nmtoken production; values
1.1.1.3 misho 5938: * of type NMTOKENS must match Nmtokens.
1.1 misho 5939: *
5940: * Returns the attribute type
5941: */
1.1.1.3 misho 5942: int
1.1 misho 5943: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5944: SHRINK;
5945: if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5946: SKIP(5);
5947: return(XML_ATTRIBUTE_CDATA);
5948: } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5949: SKIP(6);
5950: return(XML_ATTRIBUTE_IDREFS);
5951: } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5952: SKIP(5);
5953: return(XML_ATTRIBUTE_IDREF);
5954: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5955: SKIP(2);
5956: return(XML_ATTRIBUTE_ID);
5957: } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5958: SKIP(6);
5959: return(XML_ATTRIBUTE_ENTITY);
5960: } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5961: SKIP(8);
5962: return(XML_ATTRIBUTE_ENTITIES);
5963: } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5964: SKIP(8);
5965: return(XML_ATTRIBUTE_NMTOKENS);
5966: } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5967: SKIP(7);
5968: return(XML_ATTRIBUTE_NMTOKEN);
5969: }
5970: return(xmlParseEnumeratedType(ctxt, tree));
5971: }
5972:
5973: /**
5974: * xmlParseAttributeListDecl:
5975: * @ctxt: an XML parser context
5976: *
5977: * : parse the Attribute list def for an element
5978: *
5979: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5980: *
5981: * [53] AttDef ::= S Name S AttType S DefaultDecl
5982: *
5983: */
5984: void
5985: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5986: const xmlChar *elemName;
5987: const xmlChar *attrName;
5988: xmlEnumerationPtr tree;
5989:
5990: if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5991: xmlParserInputPtr input = ctxt->input;
5992:
5993: SKIP(9);
5994: if (!IS_BLANK_CH(CUR)) {
5995: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5996: "Space required after '<!ATTLIST'\n");
5997: }
5998: SKIP_BLANKS;
5999: elemName = xmlParseName(ctxt);
6000: if (elemName == NULL) {
6001: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6002: "ATTLIST: no name for Element\n");
6003: return;
6004: }
6005: SKIP_BLANKS;
6006: GROW;
1.1.1.3 misho 6007: while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6008: const xmlChar *check = CUR_PTR;
6009: int type;
6010: int def;
6011: xmlChar *defaultValue = NULL;
6012:
6013: GROW;
6014: tree = NULL;
6015: attrName = xmlParseName(ctxt);
6016: if (attrName == NULL) {
6017: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6018: "ATTLIST: no name for Attribute\n");
6019: break;
6020: }
6021: GROW;
6022: if (!IS_BLANK_CH(CUR)) {
6023: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6024: "Space required after the attribute name\n");
6025: break;
6026: }
6027: SKIP_BLANKS;
6028:
6029: type = xmlParseAttributeType(ctxt, &tree);
6030: if (type <= 0) {
6031: break;
6032: }
6033:
6034: GROW;
6035: if (!IS_BLANK_CH(CUR)) {
6036: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6037: "Space required after the attribute type\n");
6038: if (tree != NULL)
6039: xmlFreeEnumeration(tree);
6040: break;
6041: }
6042: SKIP_BLANKS;
6043:
6044: def = xmlParseDefaultDecl(ctxt, &defaultValue);
6045: if (def <= 0) {
6046: if (defaultValue != NULL)
6047: xmlFree(defaultValue);
6048: if (tree != NULL)
6049: xmlFreeEnumeration(tree);
6050: break;
6051: }
6052: if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6053: xmlAttrNormalizeSpace(defaultValue, defaultValue);
6054:
6055: GROW;
6056: if (RAW != '>') {
6057: if (!IS_BLANK_CH(CUR)) {
6058: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6059: "Space required after the attribute default value\n");
6060: if (defaultValue != NULL)
6061: xmlFree(defaultValue);
6062: if (tree != NULL)
6063: xmlFreeEnumeration(tree);
6064: break;
6065: }
6066: SKIP_BLANKS;
6067: }
6068: if (check == CUR_PTR) {
6069: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6070: "in xmlParseAttributeListDecl\n");
6071: if (defaultValue != NULL)
6072: xmlFree(defaultValue);
6073: if (tree != NULL)
6074: xmlFreeEnumeration(tree);
6075: break;
6076: }
6077: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6078: (ctxt->sax->attributeDecl != NULL))
6079: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6080: type, def, defaultValue, tree);
6081: else if (tree != NULL)
6082: xmlFreeEnumeration(tree);
6083:
6084: if ((ctxt->sax2) && (defaultValue != NULL) &&
1.1.1.3 misho 6085: (def != XML_ATTRIBUTE_IMPLIED) &&
1.1 misho 6086: (def != XML_ATTRIBUTE_REQUIRED)) {
6087: xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6088: }
6089: if (ctxt->sax2) {
6090: xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6091: }
6092: if (defaultValue != NULL)
6093: xmlFree(defaultValue);
6094: GROW;
6095: }
6096: if (RAW == '>') {
6097: if (input != ctxt->input) {
6098: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6099: "Attribute list declaration doesn't start and stop in the same entity\n",
6100: NULL, NULL);
6101: }
6102: NEXT;
6103: }
6104: }
6105: }
6106:
6107: /**
6108: * xmlParseElementMixedContentDecl:
6109: * @ctxt: an XML parser context
6110: * @inputchk: the input used for the current entity, needed for boundary checks
6111: *
6112: * parse the declaration for a Mixed Element content
6113: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.1.1.3 misho 6114: *
1.1 misho 6115: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6116: * '(' S? '#PCDATA' S? ')'
6117: *
6118: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6119: *
6120: * [ VC: No Duplicate Types ]
6121: * The same name must not appear more than once in a single
1.1.1.3 misho 6122: * mixed-content declaration.
1.1 misho 6123: *
6124: * returns: the list of the xmlElementContentPtr describing the element choices
6125: */
6126: xmlElementContentPtr
6127: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6128: xmlElementContentPtr ret = NULL, cur = NULL, n;
6129: const xmlChar *elem = NULL;
6130:
6131: GROW;
6132: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6133: SKIP(7);
6134: SKIP_BLANKS;
6135: SHRINK;
6136: if (RAW == ')') {
6137: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6138: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6139: "Element content declaration doesn't start and stop in the same entity\n",
6140: NULL, NULL);
6141: }
6142: NEXT;
6143: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6144: if (ret == NULL)
6145: return(NULL);
6146: if (RAW == '*') {
6147: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6148: NEXT;
6149: }
6150: return(ret);
6151: }
6152: if ((RAW == '(') || (RAW == '|')) {
6153: ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6154: if (ret == NULL) return(NULL);
6155: }
1.1.1.3 misho 6156: while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6157: NEXT;
6158: if (elem == NULL) {
6159: ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6160: if (ret == NULL) return(NULL);
6161: ret->c1 = cur;
6162: if (cur != NULL)
6163: cur->parent = ret;
6164: cur = ret;
6165: } else {
6166: n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6167: if (n == NULL) return(NULL);
6168: n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6169: if (n->c1 != NULL)
6170: n->c1->parent = n;
6171: cur->c2 = n;
6172: if (n != NULL)
6173: n->parent = cur;
6174: cur = n;
6175: }
6176: SKIP_BLANKS;
6177: elem = xmlParseName(ctxt);
6178: if (elem == NULL) {
6179: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6180: "xmlParseElementMixedContentDecl : Name expected\n");
6181: xmlFreeDocElementContent(ctxt->myDoc, cur);
6182: return(NULL);
6183: }
6184: SKIP_BLANKS;
6185: GROW;
6186: }
6187: if ((RAW == ')') && (NXT(1) == '*')) {
6188: if (elem != NULL) {
6189: cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6190: XML_ELEMENT_CONTENT_ELEMENT);
6191: if (cur->c2 != NULL)
6192: cur->c2->parent = cur;
6193: }
6194: if (ret != NULL)
6195: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6196: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6197: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6198: "Element content declaration doesn't start and stop in the same entity\n",
6199: NULL, NULL);
6200: }
6201: SKIP(2);
6202: } else {
6203: xmlFreeDocElementContent(ctxt->myDoc, ret);
6204: xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6205: return(NULL);
6206: }
6207:
6208: } else {
6209: xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6210: }
6211: return(ret);
6212: }
6213:
6214: /**
6215: * xmlParseElementChildrenContentDeclPriv:
6216: * @ctxt: an XML parser context
6217: * @inputchk: the input used for the current entity, needed for boundary checks
6218: * @depth: the level of recursion
6219: *
6220: * parse the declaration for a Mixed Element content
6221: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.1.1.3 misho 6222: *
1.1 misho 6223: *
6224: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6225: *
6226: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6227: *
6228: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6229: *
6230: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6231: *
6232: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6233: * TODO Parameter-entity replacement text must be properly nested
6234: * with parenthesized groups. That is to say, if either of the
6235: * opening or closing parentheses in a choice, seq, or Mixed
6236: * construct is contained in the replacement text for a parameter
6237: * entity, both must be contained in the same replacement text. For
6238: * interoperability, if a parameter-entity reference appears in a
6239: * choice, seq, or Mixed construct, its replacement text should not
6240: * be empty, and neither the first nor last non-blank character of
6241: * the replacement text should be a connector (| or ,).
6242: *
1.1.1.3 misho 6243: * Returns the tree of xmlElementContentPtr describing the element
1.1 misho 6244: * hierarchy.
6245: */
6246: static xmlElementContentPtr
6247: xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6248: int depth) {
6249: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6250: const xmlChar *elem;
6251: xmlChar type = 0;
6252:
6253: if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6254: (depth > 2048)) {
6255: xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6256: "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6257: depth);
6258: return(NULL);
6259: }
6260: SKIP_BLANKS;
6261: GROW;
6262: if (RAW == '(') {
6263: int inputid = ctxt->input->id;
6264:
6265: /* Recurse on first child */
6266: NEXT;
6267: SKIP_BLANKS;
6268: cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6269: depth + 1);
6270: SKIP_BLANKS;
6271: GROW;
6272: } else {
6273: elem = xmlParseName(ctxt);
6274: if (elem == NULL) {
6275: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6276: return(NULL);
6277: }
6278: cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6279: if (cur == NULL) {
6280: xmlErrMemory(ctxt, NULL);
6281: return(NULL);
6282: }
6283: GROW;
6284: if (RAW == '?') {
6285: cur->ocur = XML_ELEMENT_CONTENT_OPT;
6286: NEXT;
6287: } else if (RAW == '*') {
6288: cur->ocur = XML_ELEMENT_CONTENT_MULT;
6289: NEXT;
6290: } else if (RAW == '+') {
6291: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6292: NEXT;
6293: } else {
6294: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6295: }
6296: GROW;
6297: }
6298: SKIP_BLANKS;
6299: SHRINK;
1.1.1.3 misho 6300: while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6301: /*
6302: * Each loop we parse one separator and one element.
6303: */
6304: if (RAW == ',') {
6305: if (type == 0) type = CUR;
6306:
6307: /*
6308: * Detect "Name | Name , Name" error
6309: */
6310: else if (type != CUR) {
6311: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6312: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6313: type);
6314: if ((last != NULL) && (last != ret))
6315: xmlFreeDocElementContent(ctxt->myDoc, last);
6316: if (ret != NULL)
6317: xmlFreeDocElementContent(ctxt->myDoc, ret);
6318: return(NULL);
6319: }
6320: NEXT;
6321:
6322: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6323: if (op == NULL) {
6324: if ((last != NULL) && (last != ret))
6325: xmlFreeDocElementContent(ctxt->myDoc, last);
6326: xmlFreeDocElementContent(ctxt->myDoc, ret);
6327: return(NULL);
6328: }
6329: if (last == NULL) {
6330: op->c1 = ret;
6331: if (ret != NULL)
6332: ret->parent = op;
6333: ret = cur = op;
6334: } else {
6335: cur->c2 = op;
6336: if (op != NULL)
6337: op->parent = cur;
6338: op->c1 = last;
6339: if (last != NULL)
6340: last->parent = op;
6341: cur =op;
6342: last = NULL;
6343: }
6344: } else if (RAW == '|') {
6345: if (type == 0) type = CUR;
6346:
6347: /*
6348: * Detect "Name , Name | Name" error
6349: */
6350: else if (type != CUR) {
6351: xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6352: "xmlParseElementChildrenContentDecl : '%c' expected\n",
6353: type);
6354: if ((last != NULL) && (last != ret))
6355: xmlFreeDocElementContent(ctxt->myDoc, last);
6356: if (ret != NULL)
6357: xmlFreeDocElementContent(ctxt->myDoc, ret);
6358: return(NULL);
6359: }
6360: NEXT;
6361:
6362: op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6363: if (op == NULL) {
6364: if ((last != NULL) && (last != ret))
6365: xmlFreeDocElementContent(ctxt->myDoc, last);
6366: if (ret != NULL)
6367: xmlFreeDocElementContent(ctxt->myDoc, ret);
6368: return(NULL);
6369: }
6370: if (last == NULL) {
6371: op->c1 = ret;
6372: if (ret != NULL)
6373: ret->parent = op;
6374: ret = cur = op;
6375: } else {
6376: cur->c2 = op;
6377: if (op != NULL)
6378: op->parent = cur;
6379: op->c1 = last;
6380: if (last != NULL)
6381: last->parent = op;
6382: cur =op;
6383: last = NULL;
6384: }
6385: } else {
6386: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6387: if ((last != NULL) && (last != ret))
6388: xmlFreeDocElementContent(ctxt->myDoc, last);
6389: if (ret != NULL)
6390: xmlFreeDocElementContent(ctxt->myDoc, ret);
6391: return(NULL);
6392: }
6393: GROW;
6394: SKIP_BLANKS;
6395: GROW;
6396: if (RAW == '(') {
6397: int inputid = ctxt->input->id;
6398: /* Recurse on second child */
6399: NEXT;
6400: SKIP_BLANKS;
6401: last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6402: depth + 1);
6403: SKIP_BLANKS;
6404: } else {
6405: elem = xmlParseName(ctxt);
6406: if (elem == NULL) {
6407: xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6408: if (ret != NULL)
6409: xmlFreeDocElementContent(ctxt->myDoc, ret);
6410: return(NULL);
6411: }
6412: last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6413: if (last == NULL) {
6414: if (ret != NULL)
6415: xmlFreeDocElementContent(ctxt->myDoc, ret);
6416: return(NULL);
6417: }
6418: if (RAW == '?') {
6419: last->ocur = XML_ELEMENT_CONTENT_OPT;
6420: NEXT;
6421: } else if (RAW == '*') {
6422: last->ocur = XML_ELEMENT_CONTENT_MULT;
6423: NEXT;
6424: } else if (RAW == '+') {
6425: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6426: NEXT;
6427: } else {
6428: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6429: }
6430: }
6431: SKIP_BLANKS;
6432: GROW;
6433: }
6434: if ((cur != NULL) && (last != NULL)) {
6435: cur->c2 = last;
6436: if (last != NULL)
6437: last->parent = cur;
6438: }
6439: if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6440: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6441: "Element content declaration doesn't start and stop in the same entity\n",
6442: NULL, NULL);
6443: }
6444: NEXT;
6445: if (RAW == '?') {
6446: if (ret != NULL) {
6447: if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6448: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6449: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6450: else
6451: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6452: }
6453: NEXT;
6454: } else if (RAW == '*') {
6455: if (ret != NULL) {
6456: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6457: cur = ret;
6458: /*
6459: * Some normalization:
6460: * (a | b* | c?)* == (a | b | c)*
6461: */
6462: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6463: if ((cur->c1 != NULL) &&
6464: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6466: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6467: if ((cur->c2 != NULL) &&
6468: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6469: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6470: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6471: cur = cur->c2;
6472: }
6473: }
6474: NEXT;
6475: } else if (RAW == '+') {
6476: if (ret != NULL) {
6477: int found = 0;
6478:
6479: if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480: (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482: else
6483: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6484: /*
6485: * Some normalization:
6486: * (a | b*)+ == (a | b)*
6487: * (a | b?)+ == (a | b)*
6488: */
6489: while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6490: if ((cur->c1 != NULL) &&
6491: ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492: (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6493: cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6494: found = 1;
6495: }
6496: if ((cur->c2 != NULL) &&
6497: ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498: (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6499: cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6500: found = 1;
6501: }
6502: cur = cur->c2;
6503: }
6504: if (found)
6505: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6506: }
6507: NEXT;
6508: }
6509: return(ret);
6510: }
6511:
6512: /**
6513: * xmlParseElementChildrenContentDecl:
6514: * @ctxt: an XML parser context
6515: * @inputchk: the input used for the current entity, needed for boundary checks
6516: *
6517: * parse the declaration for a Mixed Element content
6518: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6519: *
6520: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6521: *
6522: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6523: *
6524: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6525: *
6526: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6527: *
6528: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6529: * TODO Parameter-entity replacement text must be properly nested
6530: * with parenthesized groups. That is to say, if either of the
6531: * opening or closing parentheses in a choice, seq, or Mixed
6532: * construct is contained in the replacement text for a parameter
6533: * entity, both must be contained in the same replacement text. For
6534: * interoperability, if a parameter-entity reference appears in a
6535: * choice, seq, or Mixed construct, its replacement text should not
6536: * be empty, and neither the first nor last non-blank character of
6537: * the replacement text should be a connector (| or ,).
6538: *
6539: * Returns the tree of xmlElementContentPtr describing the element
6540: * hierarchy.
6541: */
6542: xmlElementContentPtr
6543: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6544: /* stub left for API/ABI compat */
6545: return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6546: }
6547:
6548: /**
6549: * xmlParseElementContentDecl:
6550: * @ctxt: an XML parser context
6551: * @name: the name of the element being defined.
6552: * @result: the Element Content pointer will be stored here if any
6553: *
6554: * parse the declaration for an Element content either Mixed or Children,
6555: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
1.1.1.3 misho 6556: *
1.1 misho 6557: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6558: *
6559: * returns: the type of element content XML_ELEMENT_TYPE_xxx
6560: */
6561:
6562: int
6563: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6564: xmlElementContentPtr *result) {
6565:
6566: xmlElementContentPtr tree = NULL;
6567: int inputid = ctxt->input->id;
6568: int res;
6569:
6570: *result = NULL;
6571:
6572: if (RAW != '(') {
6573: xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6574: "xmlParseElementContentDecl : %s '(' expected\n", name);
6575: return(-1);
6576: }
6577: NEXT;
6578: GROW;
1.1.1.3 misho 6579: if (ctxt->instate == XML_PARSER_EOF)
6580: return(-1);
1.1 misho 6581: SKIP_BLANKS;
6582: if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6583: tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6584: res = XML_ELEMENT_TYPE_MIXED;
6585: } else {
6586: tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6587: res = XML_ELEMENT_TYPE_ELEMENT;
6588: }
6589: SKIP_BLANKS;
6590: *result = tree;
6591: return(res);
6592: }
6593:
6594: /**
6595: * xmlParseElementDecl:
6596: * @ctxt: an XML parser context
6597: *
6598: * parse an Element declaration.
6599: *
6600: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6601: *
6602: * [ VC: Unique Element Type Declaration ]
6603: * No element type may be declared more than once
6604: *
6605: * Returns the type of the element, or -1 in case of error
6606: */
6607: int
6608: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6609: const xmlChar *name;
6610: int ret = -1;
6611: xmlElementContentPtr content = NULL;
6612:
6613: /* GROW; done in the caller */
6614: if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6615: xmlParserInputPtr input = ctxt->input;
6616:
6617: SKIP(9);
6618: if (!IS_BLANK_CH(CUR)) {
6619: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6620: "Space required after 'ELEMENT'\n");
6621: }
6622: SKIP_BLANKS;
6623: name = xmlParseName(ctxt);
6624: if (name == NULL) {
6625: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6626: "xmlParseElementDecl: no name for Element\n");
6627: return(-1);
6628: }
6629: while ((RAW == 0) && (ctxt->inputNr > 1))
6630: xmlPopInput(ctxt);
6631: if (!IS_BLANK_CH(CUR)) {
6632: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6633: "Space required after the element name\n");
6634: }
6635: SKIP_BLANKS;
6636: if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6637: SKIP(5);
6638: /*
6639: * Element must always be empty.
6640: */
6641: ret = XML_ELEMENT_TYPE_EMPTY;
6642: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6643: (NXT(2) == 'Y')) {
6644: SKIP(3);
6645: /*
6646: * Element is a generic container.
6647: */
6648: ret = XML_ELEMENT_TYPE_ANY;
6649: } else if (RAW == '(') {
6650: ret = xmlParseElementContentDecl(ctxt, name, &content);
6651: } else {
6652: /*
6653: * [ WFC: PEs in Internal Subset ] error handling.
6654: */
6655: if ((RAW == '%') && (ctxt->external == 0) &&
6656: (ctxt->inputNr == 1)) {
6657: xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6658: "PEReference: forbidden within markup decl in internal subset\n");
6659: } else {
6660: xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6661: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6662: }
6663: return(-1);
6664: }
6665:
6666: SKIP_BLANKS;
6667: /*
6668: * Pop-up of finished entities.
6669: */
6670: while ((RAW == 0) && (ctxt->inputNr > 1))
6671: xmlPopInput(ctxt);
6672: SKIP_BLANKS;
6673:
6674: if (RAW != '>') {
6675: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6676: if (content != NULL) {
6677: xmlFreeDocElementContent(ctxt->myDoc, content);
6678: }
6679: } else {
6680: if (input != ctxt->input) {
6681: xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6682: "Element declaration doesn't start and stop in the same entity\n");
6683: }
1.1.1.3 misho 6684:
1.1 misho 6685: NEXT;
6686: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6687: (ctxt->sax->elementDecl != NULL)) {
6688: if (content != NULL)
6689: content->parent = NULL;
6690: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6691: content);
6692: if ((content != NULL) && (content->parent == NULL)) {
6693: /*
6694: * this is a trick: if xmlAddElementDecl is called,
6695: * instead of copying the full tree it is plugged directly
1.1.1.3 misho 6696: * if called from the parser. Avoid duplicating the
1.1 misho 6697: * interfaces or change the API/ABI
6698: */
6699: xmlFreeDocElementContent(ctxt->myDoc, content);
6700: }
6701: } else if (content != NULL) {
6702: xmlFreeDocElementContent(ctxt->myDoc, content);
6703: }
6704: }
6705: }
6706: return(ret);
6707: }
6708:
6709: /**
6710: * xmlParseConditionalSections
6711: * @ctxt: an XML parser context
6712: *
1.1.1.3 misho 6713: * [61] conditionalSect ::= includeSect | ignoreSect
6714: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
1.1 misho 6715: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6716: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6717: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6718: */
6719:
6720: static void
6721: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6722: int id = ctxt->input->id;
6723:
6724: SKIP(3);
6725: SKIP_BLANKS;
6726: if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6727: SKIP(7);
6728: SKIP_BLANKS;
6729: if (RAW != '[') {
6730: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6731: } else {
6732: if (ctxt->input->id != id) {
6733: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6734: "All markup of the conditional section is not in the same entity\n",
6735: NULL, NULL);
6736: }
6737: NEXT;
6738: }
6739: if (xmlParserDebugEntities) {
6740: if ((ctxt->input != NULL) && (ctxt->input->filename))
6741: xmlGenericError(xmlGenericErrorContext,
6742: "%s(%d): ", ctxt->input->filename,
6743: ctxt->input->line);
6744: xmlGenericError(xmlGenericErrorContext,
6745: "Entering INCLUDE Conditional Section\n");
6746: }
6747:
1.1.1.3 misho 6748: while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6749: (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6750: const xmlChar *check = CUR_PTR;
6751: unsigned int cons = ctxt->input->consumed;
6752:
6753: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6754: xmlParseConditionalSections(ctxt);
6755: } else if (IS_BLANK_CH(CUR)) {
6756: NEXT;
6757: } else if (RAW == '%') {
6758: xmlParsePEReference(ctxt);
6759: } else
6760: xmlParseMarkupDecl(ctxt);
6761:
6762: /*
6763: * Pop-up of finished entities.
6764: */
6765: while ((RAW == 0) && (ctxt->inputNr > 1))
6766: xmlPopInput(ctxt);
6767:
6768: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6769: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6770: break;
6771: }
6772: }
6773: if (xmlParserDebugEntities) {
6774: if ((ctxt->input != NULL) && (ctxt->input->filename))
6775: xmlGenericError(xmlGenericErrorContext,
6776: "%s(%d): ", ctxt->input->filename,
6777: ctxt->input->line);
6778: xmlGenericError(xmlGenericErrorContext,
6779: "Leaving INCLUDE Conditional Section\n");
6780: }
6781:
6782: } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6783: int state;
6784: xmlParserInputState instate;
6785: int depth = 0;
6786:
6787: SKIP(6);
6788: SKIP_BLANKS;
6789: if (RAW != '[') {
6790: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6791: } else {
6792: if (ctxt->input->id != id) {
6793: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6794: "All markup of the conditional section is not in the same entity\n",
6795: NULL, NULL);
6796: }
6797: NEXT;
6798: }
6799: if (xmlParserDebugEntities) {
6800: if ((ctxt->input != NULL) && (ctxt->input->filename))
6801: xmlGenericError(xmlGenericErrorContext,
6802: "%s(%d): ", ctxt->input->filename,
6803: ctxt->input->line);
6804: xmlGenericError(xmlGenericErrorContext,
6805: "Entering IGNORE Conditional Section\n");
6806: }
6807:
6808: /*
6809: * Parse up to the end of the conditional section
6810: * But disable SAX event generating DTD building in the meantime
6811: */
6812: state = ctxt->disableSAX;
6813: instate = ctxt->instate;
6814: if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6815: ctxt->instate = XML_PARSER_IGNORE;
6816:
1.1.1.3 misho 6817: while (((depth >= 0) && (RAW != 0)) &&
6818: (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 6819: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6820: depth++;
6821: SKIP(3);
6822: continue;
6823: }
6824: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6825: if (--depth >= 0) SKIP(3);
6826: continue;
6827: }
6828: NEXT;
6829: continue;
6830: }
6831:
6832: ctxt->disableSAX = state;
6833: ctxt->instate = instate;
6834:
6835: if (xmlParserDebugEntities) {
6836: if ((ctxt->input != NULL) && (ctxt->input->filename))
6837: xmlGenericError(xmlGenericErrorContext,
6838: "%s(%d): ", ctxt->input->filename,
6839: ctxt->input->line);
6840: xmlGenericError(xmlGenericErrorContext,
6841: "Leaving IGNORE Conditional Section\n");
6842: }
6843:
6844: } else {
6845: xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6846: }
6847:
6848: if (RAW == 0)
6849: SHRINK;
6850:
6851: if (RAW == 0) {
6852: xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6853: } else {
6854: if (ctxt->input->id != id) {
6855: xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6856: "All markup of the conditional section is not in the same entity\n",
6857: NULL, NULL);
6858: }
6859: SKIP(3);
6860: }
6861: }
6862:
6863: /**
6864: * xmlParseMarkupDecl:
6865: * @ctxt: an XML parser context
1.1.1.3 misho 6866: *
1.1 misho 6867: * parse Markup declarations
6868: *
6869: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6870: * NotationDecl | PI | Comment
6871: *
6872: * [ VC: Proper Declaration/PE Nesting ]
6873: * Parameter-entity replacement text must be properly nested with
6874: * markup declarations. That is to say, if either the first character
6875: * or the last character of a markup declaration (markupdecl above) is
6876: * contained in the replacement text for a parameter-entity reference,
6877: * both must be contained in the same replacement text.
6878: *
6879: * [ WFC: PEs in Internal Subset ]
6880: * In the internal DTD subset, parameter-entity references can occur
6881: * only where markup declarations can occur, not within markup declarations.
6882: * (This does not apply to references that occur in external parameter
1.1.1.3 misho 6883: * entities or to the external subset.)
1.1 misho 6884: */
6885: void
6886: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6887: GROW;
6888: if (CUR == '<') {
6889: if (NXT(1) == '!') {
6890: switch (NXT(2)) {
6891: case 'E':
6892: if (NXT(3) == 'L')
6893: xmlParseElementDecl(ctxt);
6894: else if (NXT(3) == 'N')
6895: xmlParseEntityDecl(ctxt);
6896: break;
6897: case 'A':
6898: xmlParseAttributeListDecl(ctxt);
6899: break;
6900: case 'N':
6901: xmlParseNotationDecl(ctxt);
6902: break;
6903: case '-':
6904: xmlParseComment(ctxt);
6905: break;
6906: default:
6907: /* there is an error but it will be detected later */
6908: break;
6909: }
6910: } else if (NXT(1) == '?') {
6911: xmlParsePI(ctxt);
6912: }
6913: }
6914: /*
6915: * This is only for internal subset. On external entities,
6916: * the replacement is done before parsing stage
6917: */
6918: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6919: xmlParsePEReference(ctxt);
6920:
6921: /*
6922: * Conditional sections are allowed from entities included
6923: * by PE References in the internal subset.
6924: */
6925: if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6926: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6927: xmlParseConditionalSections(ctxt);
6928: }
6929: }
6930:
6931: ctxt->instate = XML_PARSER_DTD;
6932: }
6933:
6934: /**
6935: * xmlParseTextDecl:
6936: * @ctxt: an XML parser context
6937: *
6938: * parse an XML declaration header for external entities
6939: *
6940: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6941: */
6942:
6943: void
6944: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6945: xmlChar *version;
6946: const xmlChar *encoding;
6947:
6948: /*
6949: * We know that '<?xml' is here.
6950: */
6951: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6952: SKIP(5);
6953: } else {
6954: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6955: return;
6956: }
6957:
6958: if (!IS_BLANK_CH(CUR)) {
6959: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6960: "Space needed after '<?xml'\n");
6961: }
6962: SKIP_BLANKS;
6963:
6964: /*
6965: * We may have the VersionInfo here.
6966: */
6967: version = xmlParseVersionInfo(ctxt);
6968: if (version == NULL)
6969: version = xmlCharStrdup(XML_DEFAULT_VERSION);
6970: else {
6971: if (!IS_BLANK_CH(CUR)) {
6972: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6973: "Space needed here\n");
6974: }
6975: }
6976: ctxt->input->version = version;
6977:
6978: /*
6979: * We must have the encoding declaration
6980: */
6981: encoding = xmlParseEncodingDecl(ctxt);
6982: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6983: /*
6984: * The XML REC instructs us to stop parsing right here
6985: */
6986: return;
6987: }
6988: if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6989: xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6990: "Missing encoding in text declaration\n");
6991: }
6992:
6993: SKIP_BLANKS;
6994: if ((RAW == '?') && (NXT(1) == '>')) {
6995: SKIP(2);
6996: } else if (RAW == '>') {
6997: /* Deprecated old WD ... */
6998: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6999: NEXT;
7000: } else {
7001: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7002: MOVETO_ENDTAG(CUR_PTR);
7003: NEXT;
7004: }
7005: }
7006:
7007: /**
7008: * xmlParseExternalSubset:
7009: * @ctxt: an XML parser context
7010: * @ExternalID: the external identifier
7011: * @SystemID: the system identifier (or URL)
1.1.1.3 misho 7012: *
1.1 misho 7013: * parse Markup declarations from an external subset
7014: *
7015: * [30] extSubset ::= textDecl? extSubsetDecl
7016: *
7017: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7018: */
7019: void
7020: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7021: const xmlChar *SystemID) {
7022: xmlDetectSAX2(ctxt);
7023: GROW;
7024:
7025: if ((ctxt->encoding == NULL) &&
7026: (ctxt->input->end - ctxt->input->cur >= 4)) {
7027: xmlChar start[4];
7028: xmlCharEncoding enc;
7029:
7030: start[0] = RAW;
7031: start[1] = NXT(1);
7032: start[2] = NXT(2);
7033: start[3] = NXT(3);
7034: enc = xmlDetectCharEncoding(start, 4);
7035: if (enc != XML_CHAR_ENCODING_NONE)
7036: xmlSwitchEncoding(ctxt, enc);
7037: }
7038:
7039: if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7040: xmlParseTextDecl(ctxt);
7041: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7042: /*
7043: * The XML REC instructs us to stop parsing right here
7044: */
7045: ctxt->instate = XML_PARSER_EOF;
7046: return;
7047: }
7048: }
7049: if (ctxt->myDoc == NULL) {
7050: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7051: if (ctxt->myDoc == NULL) {
7052: xmlErrMemory(ctxt, "New Doc failed");
7053: return;
7054: }
7055: ctxt->myDoc->properties = XML_DOC_INTERNAL;
7056: }
7057: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7058: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7059:
7060: ctxt->instate = XML_PARSER_DTD;
7061: ctxt->external = 1;
7062: while (((RAW == '<') && (NXT(1) == '?')) ||
7063: ((RAW == '<') && (NXT(1) == '!')) ||
7064: (RAW == '%') || IS_BLANK_CH(CUR)) {
7065: const xmlChar *check = CUR_PTR;
7066: unsigned int cons = ctxt->input->consumed;
7067:
7068: GROW;
7069: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7070: xmlParseConditionalSections(ctxt);
7071: } else if (IS_BLANK_CH(CUR)) {
7072: NEXT;
7073: } else if (RAW == '%') {
7074: xmlParsePEReference(ctxt);
7075: } else
7076: xmlParseMarkupDecl(ctxt);
7077:
7078: /*
7079: * Pop-up of finished entities.
7080: */
7081: while ((RAW == 0) && (ctxt->inputNr > 1))
7082: xmlPopInput(ctxt);
7083:
7084: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7085: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7086: break;
7087: }
7088: }
1.1.1.3 misho 7089:
1.1 misho 7090: if (RAW != 0) {
7091: xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7092: }
7093:
7094: }
7095:
7096: /**
7097: * xmlParseReference:
7098: * @ctxt: an XML parser context
7099: *
7100: * parse and handle entity references in content, depending on the SAX
7101: * interface, this may end-up in a call to character() if this is a
7102: * CharRef, a predefined entity, if there is no reference() callback.
7103: * or if the parser was asked to switch to that mode.
7104: *
7105: * [67] Reference ::= EntityRef | CharRef
7106: */
7107: void
7108: xmlParseReference(xmlParserCtxtPtr ctxt) {
7109: xmlEntityPtr ent;
7110: xmlChar *val;
7111: int was_checked;
7112: xmlNodePtr list = NULL;
7113: xmlParserErrors ret = XML_ERR_OK;
7114:
7115:
7116: if (RAW != '&')
7117: return;
7118:
7119: /*
7120: * Simple case of a CharRef
7121: */
7122: if (NXT(1) == '#') {
7123: int i = 0;
7124: xmlChar out[10];
7125: int hex = NXT(2);
7126: int value = xmlParseCharRef(ctxt);
7127:
7128: if (value == 0)
7129: return;
7130: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7131: /*
7132: * So we are using non-UTF-8 buffers
7133: * Check that the char fit on 8bits, if not
7134: * generate a CharRef.
7135: */
7136: if (value <= 0xFF) {
7137: out[0] = value;
7138: out[1] = 0;
7139: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7140: (!ctxt->disableSAX))
7141: ctxt->sax->characters(ctxt->userData, out, 1);
7142: } else {
7143: if ((hex == 'x') || (hex == 'X'))
7144: snprintf((char *)out, sizeof(out), "#x%X", value);
7145: else
7146: snprintf((char *)out, sizeof(out), "#%d", value);
7147: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7148: (!ctxt->disableSAX))
7149: ctxt->sax->reference(ctxt->userData, out);
7150: }
7151: } else {
7152: /*
7153: * Just encode the value in UTF-8
7154: */
7155: COPY_BUF(0 ,out, i, value);
7156: out[i] = 0;
7157: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7158: (!ctxt->disableSAX))
7159: ctxt->sax->characters(ctxt->userData, out, i);
7160: }
7161: return;
7162: }
7163:
7164: /*
7165: * We are seeing an entity reference
7166: */
7167: ent = xmlParseEntityRef(ctxt);
7168: if (ent == NULL) return;
7169: if (!ctxt->wellFormed)
7170: return;
7171: was_checked = ent->checked;
7172:
7173: /* special case of predefined entities */
7174: if ((ent->name == NULL) ||
7175: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7176: val = ent->content;
7177: if (val == NULL) return;
7178: /*
7179: * inline the entity.
7180: */
7181: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7182: (!ctxt->disableSAX))
7183: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7184: return;
7185: }
7186:
7187: /*
7188: * The first reference to the entity trigger a parsing phase
7189: * where the ent->children is filled with the result from
7190: * the parsing.
1.1.1.3 misho 7191: * Note: external parsed entities will not be loaded, it is not
7192: * required for a non-validating parser, unless the parsing option
7193: * of validating, or substituting entities were given. Doing so is
7194: * far more secure as the parser will only process data coming from
7195: * the document entity by default.
7196: */
7197: if ((ent->checked == 0) &&
7198: ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7199: (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
1.1 misho 7200: unsigned long oldnbent = ctxt->nbentities;
7201:
7202: /*
7203: * This is a bit hackish but this seems the best
7204: * way to make sure both SAX and DOM entity support
7205: * behaves okay.
7206: */
7207: void *user_data;
7208: if (ctxt->userData == ctxt)
7209: user_data = NULL;
7210: else
7211: user_data = ctxt->userData;
7212:
7213: /*
7214: * Check that this entity is well formed
7215: * 4.3.2: An internal general parsed entity is well-formed
7216: * if its replacement text matches the production labeled
7217: * content.
7218: */
7219: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7220: ctxt->depth++;
7221: ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7222: user_data, &list);
7223: ctxt->depth--;
7224:
7225: } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7226: ctxt->depth++;
7227: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7228: user_data, ctxt->depth, ent->URI,
7229: ent->ExternalID, &list);
7230: ctxt->depth--;
7231: } else {
7232: ret = XML_ERR_ENTITY_PE_INTERNAL;
7233: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7234: "invalid entity type found\n", NULL);
7235: }
7236:
7237: /*
7238: * Store the number of entities needing parsing for this entity
7239: * content and do checkings
7240: */
1.1.1.3 misho 7241: ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7242: if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7243: ent->checked |= 1;
1.1 misho 7244: if (ret == XML_ERR_ENTITY_LOOP) {
7245: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7246: xmlFreeNodeList(list);
7247: return;
7248: }
1.1.1.3 misho 7249: if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
1.1 misho 7250: xmlFreeNodeList(list);
7251: return;
7252: }
7253:
7254: if ((ret == XML_ERR_OK) && (list != NULL)) {
7255: if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7256: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7257: (ent->children == NULL)) {
7258: ent->children = list;
7259: if (ctxt->replaceEntities) {
7260: /*
7261: * Prune it directly in the generated document
7262: * except for single text nodes.
7263: */
7264: if (((list->type == XML_TEXT_NODE) &&
7265: (list->next == NULL)) ||
7266: (ctxt->parseMode == XML_PARSE_READER)) {
7267: list->parent = (xmlNodePtr) ent;
7268: list = NULL;
7269: ent->owner = 1;
7270: } else {
7271: ent->owner = 0;
7272: while (list != NULL) {
7273: list->parent = (xmlNodePtr) ctxt->node;
7274: list->doc = ctxt->myDoc;
7275: if (list->next == NULL)
7276: ent->last = list;
7277: list = list->next;
7278: }
7279: list = ent->children;
7280: #ifdef LIBXML_LEGACY_ENABLED
7281: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7282: xmlAddEntityReference(ent, list, NULL);
7283: #endif /* LIBXML_LEGACY_ENABLED */
7284: }
7285: } else {
7286: ent->owner = 1;
7287: while (list != NULL) {
7288: list->parent = (xmlNodePtr) ent;
1.1.1.2 misho 7289: xmlSetTreeDoc(list, ent->doc);
1.1 misho 7290: if (list->next == NULL)
7291: ent->last = list;
7292: list = list->next;
7293: }
7294: }
7295: } else {
7296: xmlFreeNodeList(list);
7297: list = NULL;
7298: }
7299: } else if ((ret != XML_ERR_OK) &&
7300: (ret != XML_WAR_UNDECLARED_ENTITY)) {
7301: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7302: "Entity '%s' failed to parse\n", ent->name);
7303: } else if (list != NULL) {
7304: xmlFreeNodeList(list);
7305: list = NULL;
7306: }
7307: if (ent->checked == 0)
1.1.1.3 misho 7308: ent->checked = 2;
1.1 misho 7309: } else if (ent->checked != 1) {
1.1.1.3 misho 7310: ctxt->nbentities += ent->checked / 2;
1.1 misho 7311: }
7312:
7313: /*
7314: * Now that the entity content has been gathered
7315: * provide it to the application, this can take different forms based
7316: * on the parsing modes.
7317: */
7318: if (ent->children == NULL) {
7319: /*
7320: * Probably running in SAX mode and the callbacks don't
7321: * build the entity content. So unless we already went
7322: * though parsing for first checking go though the entity
7323: * content to generate callbacks associated to the entity
7324: */
7325: if (was_checked != 0) {
7326: void *user_data;
7327: /*
7328: * This is a bit hackish but this seems the best
7329: * way to make sure both SAX and DOM entity support
7330: * behaves okay.
7331: */
7332: if (ctxt->userData == ctxt)
7333: user_data = NULL;
7334: else
7335: user_data = ctxt->userData;
7336:
7337: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7338: ctxt->depth++;
7339: ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7340: ent->content, user_data, NULL);
7341: ctxt->depth--;
7342: } else if (ent->etype ==
7343: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7344: ctxt->depth++;
7345: ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7346: ctxt->sax, user_data, ctxt->depth,
7347: ent->URI, ent->ExternalID, NULL);
7348: ctxt->depth--;
7349: } else {
7350: ret = XML_ERR_ENTITY_PE_INTERNAL;
7351: xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7352: "invalid entity type found\n", NULL);
7353: }
7354: if (ret == XML_ERR_ENTITY_LOOP) {
7355: xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7356: return;
7357: }
7358: }
7359: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7360: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7361: /*
7362: * Entity reference callback comes second, it's somewhat
7363: * superfluous but a compatibility to historical behaviour
7364: */
7365: ctxt->sax->reference(ctxt->userData, ent->name);
7366: }
7367: return;
7368: }
7369:
7370: /*
7371: * If we didn't get any children for the entity being built
7372: */
7373: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7374: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7375: /*
7376: * Create a node.
7377: */
7378: ctxt->sax->reference(ctxt->userData, ent->name);
7379: return;
7380: }
7381:
7382: if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7383: /*
7384: * There is a problem on the handling of _private for entities
7385: * (bug 155816): Should we copy the content of the field from
7386: * the entity (possibly overwriting some value set by the user
7387: * when a copy is created), should we leave it alone, or should
7388: * we try to take care of different situations? The problem
7389: * is exacerbated by the usage of this field by the xmlReader.
7390: * To fix this bug, we look at _private on the created node
7391: * and, if it's NULL, we copy in whatever was in the entity.
7392: * If it's not NULL we leave it alone. This is somewhat of a
7393: * hack - maybe we should have further tests to determine
7394: * what to do.
7395: */
7396: if ((ctxt->node != NULL) && (ent->children != NULL)) {
7397: /*
7398: * Seems we are generating the DOM content, do
7399: * a simple tree copy for all references except the first
7400: * In the first occurrence list contains the replacement.
7401: */
7402: if (((list == NULL) && (ent->owner == 0)) ||
7403: (ctxt->parseMode == XML_PARSE_READER)) {
7404: xmlNodePtr nw = NULL, cur, firstChild = NULL;
7405:
7406: /*
1.1.1.3 misho 7407: * We are copying here, make sure there is no abuse
7408: */
7409: ctxt->sizeentcopy += ent->length;
7410: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7411: return;
7412:
7413: /*
1.1 misho 7414: * when operating on a reader, the entities definitions
7415: * are always owning the entities subtree.
7416: if (ctxt->parseMode == XML_PARSE_READER)
7417: ent->owner = 1;
7418: */
7419:
7420: cur = ent->children;
7421: while (cur != NULL) {
7422: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7423: if (nw != NULL) {
7424: if (nw->_private == NULL)
7425: nw->_private = cur->_private;
7426: if (firstChild == NULL){
7427: firstChild = nw;
7428: }
7429: nw = xmlAddChild(ctxt->node, nw);
7430: }
7431: if (cur == ent->last) {
7432: /*
7433: * needed to detect some strange empty
7434: * node cases in the reader tests
7435: */
7436: if ((ctxt->parseMode == XML_PARSE_READER) &&
7437: (nw != NULL) &&
7438: (nw->type == XML_ELEMENT_NODE) &&
7439: (nw->children == NULL))
7440: nw->extra = 1;
7441:
7442: break;
7443: }
7444: cur = cur->next;
7445: }
7446: #ifdef LIBXML_LEGACY_ENABLED
7447: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7448: xmlAddEntityReference(ent, firstChild, nw);
7449: #endif /* LIBXML_LEGACY_ENABLED */
1.1.1.3 misho 7450: } else if ((list == NULL) || (ctxt->inputNr > 0)) {
1.1 misho 7451: xmlNodePtr nw = NULL, cur, next, last,
7452: firstChild = NULL;
1.1.1.3 misho 7453:
7454: /*
7455: * We are copying here, make sure there is no abuse
7456: */
7457: ctxt->sizeentcopy += ent->length;
7458: if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7459: return;
7460:
1.1 misho 7461: /*
7462: * Copy the entity child list and make it the new
7463: * entity child list. The goal is to make sure any
7464: * ID or REF referenced will be the one from the
7465: * document content and not the entity copy.
7466: */
7467: cur = ent->children;
7468: ent->children = NULL;
7469: last = ent->last;
7470: ent->last = NULL;
7471: while (cur != NULL) {
7472: next = cur->next;
7473: cur->next = NULL;
7474: cur->parent = NULL;
7475: nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7476: if (nw != NULL) {
7477: if (nw->_private == NULL)
7478: nw->_private = cur->_private;
7479: if (firstChild == NULL){
7480: firstChild = cur;
7481: }
7482: xmlAddChild((xmlNodePtr) ent, nw);
7483: xmlAddChild(ctxt->node, cur);
7484: }
7485: if (cur == last)
7486: break;
7487: cur = next;
7488: }
7489: if (ent->owner == 0)
7490: ent->owner = 1;
7491: #ifdef LIBXML_LEGACY_ENABLED
7492: if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7493: xmlAddEntityReference(ent, firstChild, nw);
7494: #endif /* LIBXML_LEGACY_ENABLED */
7495: } else {
7496: const xmlChar *nbktext;
7497:
7498: /*
7499: * the name change is to avoid coalescing of the
7500: * node with a possible previous text one which
7501: * would make ent->children a dangling pointer
7502: */
7503: nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7504: -1);
7505: if (ent->children->type == XML_TEXT_NODE)
7506: ent->children->name = nbktext;
7507: if ((ent->last != ent->children) &&
7508: (ent->last->type == XML_TEXT_NODE))
7509: ent->last->name = nbktext;
7510: xmlAddChildList(ctxt->node, ent->children);
7511: }
7512:
7513: /*
7514: * This is to avoid a nasty side effect, see
7515: * characters() in SAX.c
7516: */
7517: ctxt->nodemem = 0;
7518: ctxt->nodelen = 0;
7519: return;
7520: }
7521: }
7522: }
7523:
7524: /**
7525: * xmlParseEntityRef:
7526: * @ctxt: an XML parser context
7527: *
7528: * parse ENTITY references declarations
7529: *
7530: * [68] EntityRef ::= '&' Name ';'
7531: *
7532: * [ WFC: Entity Declared ]
7533: * In a document without any DTD, a document with only an internal DTD
7534: * subset which contains no parameter entity references, or a document
7535: * with "standalone='yes'", the Name given in the entity reference
7536: * must match that in an entity declaration, except that well-formed
7537: * documents need not declare any of the following entities: amp, lt,
7538: * gt, apos, quot. The declaration of a parameter entity must precede
7539: * any reference to it. Similarly, the declaration of a general entity
7540: * must precede any reference to it which appears in a default value in an
7541: * attribute-list declaration. Note that if entities are declared in the
7542: * external subset or in external parameter entities, a non-validating
7543: * processor is not obligated to read and process their declarations;
7544: * for such documents, the rule that an entity must be declared is a
7545: * well-formedness constraint only if standalone='yes'.
7546: *
7547: * [ WFC: Parsed Entity ]
7548: * An entity reference must not contain the name of an unparsed entity
7549: *
7550: * Returns the xmlEntityPtr if found, or NULL otherwise.
7551: */
7552: xmlEntityPtr
7553: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7554: const xmlChar *name;
7555: xmlEntityPtr ent = NULL;
7556:
7557: GROW;
1.1.1.3 misho 7558: if (ctxt->instate == XML_PARSER_EOF)
7559: return(NULL);
1.1 misho 7560:
7561: if (RAW != '&')
7562: return(NULL);
7563: NEXT;
7564: name = xmlParseName(ctxt);
7565: if (name == NULL) {
7566: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7567: "xmlParseEntityRef: no name\n");
7568: return(NULL);
7569: }
7570: if (RAW != ';') {
7571: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7572: return(NULL);
7573: }
7574: NEXT;
7575:
7576: /*
1.1.1.3 misho 7577: * Predefined entities override any extra definition
1.1 misho 7578: */
7579: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7580: ent = xmlGetPredefinedEntity(name);
7581: if (ent != NULL)
7582: return(ent);
7583: }
7584:
7585: /*
1.1.1.3 misho 7586: * Increase the number of entity references parsed
1.1 misho 7587: */
7588: ctxt->nbentities++;
7589:
7590: /*
7591: * Ask first SAX for entity resolution, otherwise try the
7592: * entities which may have stored in the parser context.
7593: */
7594: if (ctxt->sax != NULL) {
7595: if (ctxt->sax->getEntity != NULL)
7596: ent = ctxt->sax->getEntity(ctxt->userData, name);
1.1.1.3 misho 7597: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
1.1 misho 7598: (ctxt->options & XML_PARSE_OLDSAX))
7599: ent = xmlGetPredefinedEntity(name);
7600: if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7601: (ctxt->userData==ctxt)) {
7602: ent = xmlSAX2GetEntity(ctxt, name);
7603: }
7604: }
1.1.1.3 misho 7605: if (ctxt->instate == XML_PARSER_EOF)
7606: return(NULL);
1.1 misho 7607: /*
7608: * [ WFC: Entity Declared ]
7609: * In a document without any DTD, a document with only an
7610: * internal DTD subset which contains no parameter entity
7611: * references, or a document with "standalone='yes'", the
7612: * Name given in the entity reference must match that in an
7613: * entity declaration, except that well-formed documents
7614: * need not declare any of the following entities: amp, lt,
7615: * gt, apos, quot.
7616: * The declaration of a parameter entity must precede any
7617: * reference to it.
7618: * Similarly, the declaration of a general entity must
7619: * precede any reference to it which appears in a default
7620: * value in an attribute-list declaration. Note that if
7621: * entities are declared in the external subset or in
7622: * external parameter entities, a non-validating processor
7623: * is not obligated to read and process their declarations;
7624: * for such documents, the rule that an entity must be
7625: * declared is a well-formedness constraint only if
7626: * standalone='yes'.
7627: */
7628: if (ent == NULL) {
7629: if ((ctxt->standalone == 1) ||
7630: ((ctxt->hasExternalSubset == 0) &&
7631: (ctxt->hasPErefs == 0))) {
7632: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7633: "Entity '%s' not defined\n", name);
7634: } else {
7635: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7636: "Entity '%s' not defined\n", name);
7637: if ((ctxt->inSubset == 0) &&
7638: (ctxt->sax != NULL) &&
7639: (ctxt->sax->reference != NULL)) {
7640: ctxt->sax->reference(ctxt->userData, name);
7641: }
7642: }
7643: ctxt->valid = 0;
7644: }
7645:
7646: /*
7647: * [ WFC: Parsed Entity ]
7648: * An entity reference must not contain the name of an
7649: * unparsed entity
7650: */
7651: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7652: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7653: "Entity reference to unparsed entity %s\n", name);
7654: }
7655:
7656: /*
7657: * [ WFC: No External Entity References ]
7658: * Attribute values cannot contain direct or indirect
7659: * entity references to external entities.
7660: */
7661: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7662: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7663: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7664: "Attribute references external entity '%s'\n", name);
7665: }
7666: /*
7667: * [ WFC: No < in Attribute Values ]
7668: * The replacement text of any entity referred to directly or
7669: * indirectly in an attribute value (other than "<") must
1.1.1.3 misho 7670: * not contain a <.
1.1 misho 7671: */
7672: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.1.1.3 misho 7673: (ent != NULL) &&
7674: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7675: if ((ent->checked & 1) || ((ent->checked == 0) &&
7676: (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7677: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7678: "'<' in entity '%s' is not allowed in attributes values\n", name);
7679: }
1.1 misho 7680: }
7681:
7682: /*
7683: * Internal check, no parameter entities here ...
7684: */
7685: else {
7686: switch (ent->etype) {
7687: case XML_INTERNAL_PARAMETER_ENTITY:
7688: case XML_EXTERNAL_PARAMETER_ENTITY:
7689: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7690: "Attempt to reference the parameter entity '%s'\n",
7691: name);
7692: break;
7693: default:
7694: break;
7695: }
7696: }
7697:
7698: /*
7699: * [ WFC: No Recursion ]
7700: * A parsed entity must not contain a recursive reference
1.1.1.3 misho 7701: * to itself, either directly or indirectly.
1.1 misho 7702: * Done somewhere else
7703: */
7704: return(ent);
7705: }
7706:
7707: /**
7708: * xmlParseStringEntityRef:
7709: * @ctxt: an XML parser context
7710: * @str: a pointer to an index in the string
7711: *
7712: * parse ENTITY references declarations, but this version parses it from
7713: * a string value.
7714: *
7715: * [68] EntityRef ::= '&' Name ';'
7716: *
7717: * [ WFC: Entity Declared ]
7718: * In a document without any DTD, a document with only an internal DTD
7719: * subset which contains no parameter entity references, or a document
7720: * with "standalone='yes'", the Name given in the entity reference
7721: * must match that in an entity declaration, except that well-formed
7722: * documents need not declare any of the following entities: amp, lt,
7723: * gt, apos, quot. The declaration of a parameter entity must precede
7724: * any reference to it. Similarly, the declaration of a general entity
7725: * must precede any reference to it which appears in a default value in an
7726: * attribute-list declaration. Note that if entities are declared in the
7727: * external subset or in external parameter entities, a non-validating
7728: * processor is not obligated to read and process their declarations;
7729: * for such documents, the rule that an entity must be declared is a
7730: * well-formedness constraint only if standalone='yes'.
7731: *
7732: * [ WFC: Parsed Entity ]
7733: * An entity reference must not contain the name of an unparsed entity
7734: *
7735: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7736: * is updated to the current location in the string.
7737: */
7738: static xmlEntityPtr
7739: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7740: xmlChar *name;
7741: const xmlChar *ptr;
7742: xmlChar cur;
7743: xmlEntityPtr ent = NULL;
7744:
7745: if ((str == NULL) || (*str == NULL))
7746: return(NULL);
7747: ptr = *str;
7748: cur = *ptr;
7749: if (cur != '&')
7750: return(NULL);
7751:
7752: ptr++;
7753: name = xmlParseStringName(ctxt, &ptr);
7754: if (name == NULL) {
7755: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7756: "xmlParseStringEntityRef: no name\n");
7757: *str = ptr;
7758: return(NULL);
7759: }
7760: if (*ptr != ';') {
7761: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7762: xmlFree(name);
7763: *str = ptr;
7764: return(NULL);
7765: }
7766: ptr++;
7767:
7768:
7769: /*
7770: * Predefined entites override any extra definition
7771: */
7772: if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7773: ent = xmlGetPredefinedEntity(name);
7774: if (ent != NULL) {
7775: xmlFree(name);
7776: *str = ptr;
7777: return(ent);
7778: }
7779: }
7780:
7781: /*
7782: * Increate the number of entity references parsed
7783: */
7784: ctxt->nbentities++;
7785:
7786: /*
7787: * Ask first SAX for entity resolution, otherwise try the
7788: * entities which may have stored in the parser context.
7789: */
7790: if (ctxt->sax != NULL) {
7791: if (ctxt->sax->getEntity != NULL)
7792: ent = ctxt->sax->getEntity(ctxt->userData, name);
7793: if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7794: ent = xmlGetPredefinedEntity(name);
7795: if ((ent == NULL) && (ctxt->userData==ctxt)) {
7796: ent = xmlSAX2GetEntity(ctxt, name);
7797: }
7798: }
1.1.1.3 misho 7799: if (ctxt->instate == XML_PARSER_EOF) {
7800: xmlFree(name);
7801: return(NULL);
7802: }
1.1 misho 7803:
7804: /*
7805: * [ WFC: Entity Declared ]
7806: * In a document without any DTD, a document with only an
7807: * internal DTD subset which contains no parameter entity
7808: * references, or a document with "standalone='yes'", the
7809: * Name given in the entity reference must match that in an
7810: * entity declaration, except that well-formed documents
7811: * need not declare any of the following entities: amp, lt,
7812: * gt, apos, quot.
7813: * The declaration of a parameter entity must precede any
7814: * reference to it.
7815: * Similarly, the declaration of a general entity must
7816: * precede any reference to it which appears in a default
7817: * value in an attribute-list declaration. Note that if
7818: * entities are declared in the external subset or in
7819: * external parameter entities, a non-validating processor
7820: * is not obligated to read and process their declarations;
7821: * for such documents, the rule that an entity must be
7822: * declared is a well-formedness constraint only if
1.1.1.3 misho 7823: * standalone='yes'.
1.1 misho 7824: */
7825: if (ent == NULL) {
7826: if ((ctxt->standalone == 1) ||
7827: ((ctxt->hasExternalSubset == 0) &&
7828: (ctxt->hasPErefs == 0))) {
7829: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7830: "Entity '%s' not defined\n", name);
7831: } else {
7832: xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7833: "Entity '%s' not defined\n",
7834: name);
7835: }
7836: /* TODO ? check regressions ctxt->valid = 0; */
7837: }
7838:
7839: /*
7840: * [ WFC: Parsed Entity ]
7841: * An entity reference must not contain the name of an
7842: * unparsed entity
7843: */
7844: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7845: xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7846: "Entity reference to unparsed entity %s\n", name);
7847: }
7848:
7849: /*
7850: * [ WFC: No External Entity References ]
7851: * Attribute values cannot contain direct or indirect
7852: * entity references to external entities.
7853: */
7854: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7855: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7856: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7857: "Attribute references external entity '%s'\n", name);
7858: }
7859: /*
7860: * [ WFC: No < in Attribute Values ]
7861: * The replacement text of any entity referred to directly or
7862: * indirectly in an attribute value (other than "<") must
7863: * not contain a <.
7864: */
7865: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7866: (ent != NULL) && (ent->content != NULL) &&
7867: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7868: (xmlStrchr(ent->content, '<'))) {
7869: xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7870: "'<' in entity '%s' is not allowed in attributes values\n",
7871: name);
7872: }
7873:
7874: /*
7875: * Internal check, no parameter entities here ...
7876: */
7877: else {
7878: switch (ent->etype) {
7879: case XML_INTERNAL_PARAMETER_ENTITY:
7880: case XML_EXTERNAL_PARAMETER_ENTITY:
7881: xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7882: "Attempt to reference the parameter entity '%s'\n",
7883: name);
7884: break;
7885: default:
7886: break;
7887: }
7888: }
7889:
7890: /*
7891: * [ WFC: No Recursion ]
7892: * A parsed entity must not contain a recursive reference
7893: * to itself, either directly or indirectly.
7894: * Done somewhere else
7895: */
7896:
7897: xmlFree(name);
7898: *str = ptr;
7899: return(ent);
7900: }
7901:
7902: /**
7903: * xmlParsePEReference:
7904: * @ctxt: an XML parser context
7905: *
7906: * parse PEReference declarations
7907: * The entity content is handled directly by pushing it's content as
7908: * a new input stream.
7909: *
7910: * [69] PEReference ::= '%' Name ';'
7911: *
7912: * [ WFC: No Recursion ]
7913: * A parsed entity must not contain a recursive
1.1.1.3 misho 7914: * reference to itself, either directly or indirectly.
1.1 misho 7915: *
7916: * [ WFC: Entity Declared ]
7917: * In a document without any DTD, a document with only an internal DTD
7918: * subset which contains no parameter entity references, or a document
7919: * with "standalone='yes'", ... ... The declaration of a parameter
7920: * entity must precede any reference to it...
7921: *
7922: * [ VC: Entity Declared ]
7923: * In a document with an external subset or external parameter entities
7924: * with "standalone='no'", ... ... The declaration of a parameter entity
7925: * must precede any reference to it...
7926: *
7927: * [ WFC: In DTD ]
7928: * Parameter-entity references may only appear in the DTD.
7929: * NOTE: misleading but this is handled.
7930: */
7931: void
7932: xmlParsePEReference(xmlParserCtxtPtr ctxt)
7933: {
7934: const xmlChar *name;
7935: xmlEntityPtr entity = NULL;
7936: xmlParserInputPtr input;
7937:
7938: if (RAW != '%')
7939: return;
7940: NEXT;
7941: name = xmlParseName(ctxt);
7942: if (name == NULL) {
7943: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7944: "xmlParsePEReference: no name\n");
7945: return;
7946: }
7947: if (RAW != ';') {
7948: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7949: return;
7950: }
7951:
7952: NEXT;
7953:
7954: /*
7955: * Increate the number of entity references parsed
7956: */
7957: ctxt->nbentities++;
7958:
7959: /*
7960: * Request the entity from SAX
7961: */
7962: if ((ctxt->sax != NULL) &&
7963: (ctxt->sax->getParameterEntity != NULL))
1.1.1.3 misho 7964: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7965: if (ctxt->instate == XML_PARSER_EOF)
7966: return;
1.1 misho 7967: if (entity == NULL) {
7968: /*
7969: * [ WFC: Entity Declared ]
7970: * In a document without any DTD, a document with only an
7971: * internal DTD subset which contains no parameter entity
7972: * references, or a document with "standalone='yes'", ...
7973: * ... The declaration of a parameter entity must precede
7974: * any reference to it...
7975: */
7976: if ((ctxt->standalone == 1) ||
7977: ((ctxt->hasExternalSubset == 0) &&
7978: (ctxt->hasPErefs == 0))) {
7979: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7980: "PEReference: %%%s; not found\n",
7981: name);
7982: } else {
7983: /*
7984: * [ VC: Entity Declared ]
7985: * In a document with an external subset or external
7986: * parameter entities with "standalone='no'", ...
7987: * ... The declaration of a parameter entity must
7988: * precede any reference to it...
7989: */
7990: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7991: "PEReference: %%%s; not found\n",
7992: name, NULL);
7993: ctxt->valid = 0;
7994: }
7995: } else {
7996: /*
7997: * Internal checking in case the entity quest barfed
7998: */
7999: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8000: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8001: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8002: "Internal: %%%s; is not a parameter entity\n",
8003: name, NULL);
8004: } else if (ctxt->input->free != deallocblankswrapper) {
8005: input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8006: if (xmlPushInput(ctxt, input) < 0)
8007: return;
8008: } else {
8009: /*
8010: * TODO !!!
8011: * handle the extra spaces added before and after
8012: * c.f. http://www.w3.org/TR/REC-xml#as-PE
8013: */
8014: input = xmlNewEntityInputStream(ctxt, entity);
8015: if (xmlPushInput(ctxt, input) < 0)
8016: return;
8017: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8018: (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8019: (IS_BLANK_CH(NXT(5)))) {
8020: xmlParseTextDecl(ctxt);
8021: if (ctxt->errNo ==
8022: XML_ERR_UNSUPPORTED_ENCODING) {
8023: /*
8024: * The XML REC instructs us to stop parsing
8025: * right here
8026: */
8027: ctxt->instate = XML_PARSER_EOF;
8028: return;
8029: }
8030: }
8031: }
8032: }
8033: ctxt->hasPErefs = 1;
8034: }
8035:
8036: /**
8037: * xmlLoadEntityContent:
8038: * @ctxt: an XML parser context
8039: * @entity: an unloaded system entity
8040: *
8041: * Load the original content of the given system entity from the
8042: * ExternalID/SystemID given. This is to be used for Included in Literal
8043: * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8044: *
8045: * Returns 0 in case of success and -1 in case of failure
8046: */
8047: static int
8048: xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8049: xmlParserInputPtr input;
8050: xmlBufferPtr buf;
8051: int l, c;
8052: int count = 0;
8053:
8054: if ((ctxt == NULL) || (entity == NULL) ||
8055: ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8056: (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8057: (entity->content != NULL)) {
8058: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8059: "xmlLoadEntityContent parameter error");
8060: return(-1);
8061: }
8062:
8063: if (xmlParserDebugEntities)
8064: xmlGenericError(xmlGenericErrorContext,
8065: "Reading %s entity content input\n", entity->name);
8066:
8067: buf = xmlBufferCreate();
8068: if (buf == NULL) {
8069: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8070: "xmlLoadEntityContent parameter error");
8071: return(-1);
8072: }
8073:
8074: input = xmlNewEntityInputStream(ctxt, entity);
8075: if (input == NULL) {
8076: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8077: "xmlLoadEntityContent input error");
8078: xmlBufferFree(buf);
8079: return(-1);
8080: }
8081:
8082: /*
8083: * Push the entity as the current input, read char by char
8084: * saving to the buffer until the end of the entity or an error
8085: */
8086: if (xmlPushInput(ctxt, input) < 0) {
8087: xmlBufferFree(buf);
8088: return(-1);
8089: }
8090:
8091: GROW;
8092: c = CUR_CHAR(l);
8093: while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8094: (IS_CHAR(c))) {
8095: xmlBufferAdd(buf, ctxt->input->cur, l);
1.1.1.3 misho 8096: if (count++ > XML_PARSER_CHUNK_SIZE) {
1.1 misho 8097: count = 0;
8098: GROW;
1.1.1.3 misho 8099: if (ctxt->instate == XML_PARSER_EOF) {
8100: xmlBufferFree(buf);
8101: return(-1);
8102: }
1.1 misho 8103: }
8104: NEXTL(l);
8105: c = CUR_CHAR(l);
1.1.1.3 misho 8106: if (c == 0) {
8107: count = 0;
8108: GROW;
8109: if (ctxt->instate == XML_PARSER_EOF) {
8110: xmlBufferFree(buf);
8111: return(-1);
8112: }
8113: c = CUR_CHAR(l);
8114: }
1.1 misho 8115: }
8116:
8117: if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8118: xmlPopInput(ctxt);
8119: } else if (!IS_CHAR(c)) {
8120: xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8121: "xmlLoadEntityContent: invalid char value %d\n",
8122: c);
8123: xmlBufferFree(buf);
8124: return(-1);
8125: }
8126: entity->content = buf->content;
8127: buf->content = NULL;
8128: xmlBufferFree(buf);
8129:
8130: return(0);
8131: }
8132:
8133: /**
8134: * xmlParseStringPEReference:
8135: * @ctxt: an XML parser context
8136: * @str: a pointer to an index in the string
8137: *
8138: * parse PEReference declarations
8139: *
8140: * [69] PEReference ::= '%' Name ';'
8141: *
8142: * [ WFC: No Recursion ]
8143: * A parsed entity must not contain a recursive
8144: * reference to itself, either directly or indirectly.
8145: *
8146: * [ WFC: Entity Declared ]
8147: * In a document without any DTD, a document with only an internal DTD
8148: * subset which contains no parameter entity references, or a document
8149: * with "standalone='yes'", ... ... The declaration of a parameter
8150: * entity must precede any reference to it...
8151: *
8152: * [ VC: Entity Declared ]
8153: * In a document with an external subset or external parameter entities
8154: * with "standalone='no'", ... ... The declaration of a parameter entity
8155: * must precede any reference to it...
8156: *
8157: * [ WFC: In DTD ]
8158: * Parameter-entity references may only appear in the DTD.
8159: * NOTE: misleading but this is handled.
8160: *
8161: * Returns the string of the entity content.
8162: * str is updated to the current value of the index
8163: */
8164: static xmlEntityPtr
8165: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8166: const xmlChar *ptr;
8167: xmlChar cur;
8168: xmlChar *name;
8169: xmlEntityPtr entity = NULL;
8170:
8171: if ((str == NULL) || (*str == NULL)) return(NULL);
8172: ptr = *str;
8173: cur = *ptr;
8174: if (cur != '%')
8175: return(NULL);
8176: ptr++;
8177: name = xmlParseStringName(ctxt, &ptr);
8178: if (name == NULL) {
8179: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8180: "xmlParseStringPEReference: no name\n");
8181: *str = ptr;
8182: return(NULL);
8183: }
8184: cur = *ptr;
8185: if (cur != ';') {
8186: xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8187: xmlFree(name);
8188: *str = ptr;
8189: return(NULL);
8190: }
8191: ptr++;
8192:
8193: /*
8194: * Increate the number of entity references parsed
8195: */
8196: ctxt->nbentities++;
8197:
8198: /*
8199: * Request the entity from SAX
8200: */
8201: if ((ctxt->sax != NULL) &&
8202: (ctxt->sax->getParameterEntity != NULL))
1.1.1.3 misho 8203: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8204: if (ctxt->instate == XML_PARSER_EOF) {
8205: xmlFree(name);
8206: return(NULL);
8207: }
1.1 misho 8208: if (entity == NULL) {
8209: /*
8210: * [ WFC: Entity Declared ]
8211: * In a document without any DTD, a document with only an
8212: * internal DTD subset which contains no parameter entity
8213: * references, or a document with "standalone='yes'", ...
8214: * ... The declaration of a parameter entity must precede
8215: * any reference to it...
8216: */
8217: if ((ctxt->standalone == 1) ||
8218: ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8219: xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8220: "PEReference: %%%s; not found\n", name);
8221: } else {
8222: /*
8223: * [ VC: Entity Declared ]
8224: * In a document with an external subset or external
8225: * parameter entities with "standalone='no'", ...
8226: * ... The declaration of a parameter entity must
8227: * precede any reference to it...
8228: */
8229: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8230: "PEReference: %%%s; not found\n",
8231: name, NULL);
8232: ctxt->valid = 0;
8233: }
8234: } else {
8235: /*
8236: * Internal checking in case the entity quest barfed
8237: */
8238: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8239: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8240: xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8241: "%%%s; is not a parameter entity\n",
8242: name, NULL);
8243: }
8244: }
8245: ctxt->hasPErefs = 1;
8246: xmlFree(name);
8247: *str = ptr;
8248: return(entity);
8249: }
8250:
8251: /**
8252: * xmlParseDocTypeDecl:
8253: * @ctxt: an XML parser context
8254: *
8255: * parse a DOCTYPE declaration
8256: *
1.1.1.3 misho 8257: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1.1 misho 8258: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8259: *
8260: * [ VC: Root Element Type ]
8261: * The Name in the document type declaration must match the element
1.1.1.3 misho 8262: * type of the root element.
1.1 misho 8263: */
8264:
8265: void
8266: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8267: const xmlChar *name = NULL;
8268: xmlChar *ExternalID = NULL;
8269: xmlChar *URI = NULL;
8270:
8271: /*
8272: * We know that '<!DOCTYPE' has been detected.
8273: */
8274: SKIP(9);
8275:
8276: SKIP_BLANKS;
8277:
8278: /*
8279: * Parse the DOCTYPE name.
8280: */
8281: name = xmlParseName(ctxt);
8282: if (name == NULL) {
8283: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8284: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8285: }
8286: ctxt->intSubName = name;
8287:
8288: SKIP_BLANKS;
8289:
8290: /*
8291: * Check for SystemID and ExternalID
8292: */
8293: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8294:
8295: if ((URI != NULL) || (ExternalID != NULL)) {
8296: ctxt->hasExternalSubset = 1;
8297: }
8298: ctxt->extSubURI = URI;
8299: ctxt->extSubSystem = ExternalID;
8300:
8301: SKIP_BLANKS;
8302:
8303: /*
8304: * Create and update the internal subset.
8305: */
8306: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8307: (!ctxt->disableSAX))
8308: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.1.1.3 misho 8309: if (ctxt->instate == XML_PARSER_EOF)
8310: return;
1.1 misho 8311:
8312: /*
8313: * Is there any internal subset declarations ?
8314: * they are handled separately in xmlParseInternalSubset()
8315: */
8316: if (RAW == '[')
8317: return;
8318:
8319: /*
8320: * We should be at the end of the DOCTYPE declaration.
8321: */
8322: if (RAW != '>') {
8323: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8324: }
8325: NEXT;
8326: }
8327:
8328: /**
8329: * xmlParseInternalSubset:
8330: * @ctxt: an XML parser context
8331: *
8332: * parse the internal subset declaration
8333: *
8334: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8335: */
8336:
8337: static void
8338: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8339: /*
8340: * Is there any DTD definition ?
8341: */
8342: if (RAW == '[') {
8343: ctxt->instate = XML_PARSER_DTD;
8344: NEXT;
8345: /*
1.1.1.3 misho 8346: * Parse the succession of Markup declarations and
1.1 misho 8347: * PEReferences.
8348: * Subsequence (markupdecl | PEReference | S)*
8349: */
1.1.1.3 misho 8350: while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 8351: const xmlChar *check = CUR_PTR;
8352: unsigned int cons = ctxt->input->consumed;
8353:
8354: SKIP_BLANKS;
8355: xmlParseMarkupDecl(ctxt);
8356: xmlParsePEReference(ctxt);
8357:
8358: /*
8359: * Pop-up of finished entities.
8360: */
8361: while ((RAW == 0) && (ctxt->inputNr > 1))
8362: xmlPopInput(ctxt);
8363:
8364: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8365: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8366: "xmlParseInternalSubset: error detected in Markup declaration\n");
8367: break;
8368: }
8369: }
1.1.1.3 misho 8370: if (RAW == ']') {
1.1 misho 8371: NEXT;
8372: SKIP_BLANKS;
8373: }
8374: }
8375:
8376: /*
8377: * We should be at the end of the DOCTYPE declaration.
8378: */
8379: if (RAW != '>') {
8380: xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8381: }
8382: NEXT;
8383: }
8384:
8385: #ifdef LIBXML_SAX1_ENABLED
8386: /**
8387: * xmlParseAttribute:
8388: * @ctxt: an XML parser context
8389: * @value: a xmlChar ** used to store the value of the attribute
8390: *
8391: * parse an attribute
8392: *
8393: * [41] Attribute ::= Name Eq AttValue
8394: *
8395: * [ WFC: No External Entity References ]
8396: * Attribute values cannot contain direct or indirect entity references
8397: * to external entities.
8398: *
8399: * [ WFC: No < in Attribute Values ]
8400: * The replacement text of any entity referred to directly or indirectly in
1.1.1.3 misho 8401: * an attribute value (other than "<") must not contain a <.
8402: *
1.1 misho 8403: * [ VC: Attribute Value Type ]
8404: * The attribute must have been declared; the value must be of the type
8405: * declared for it.
8406: *
8407: * [25] Eq ::= S? '=' S?
8408: *
8409: * With namespace:
8410: *
8411: * [NS 11] Attribute ::= QName Eq AttValue
8412: *
8413: * Also the case QName == xmlns:??? is handled independently as a namespace
8414: * definition.
8415: *
8416: * Returns the attribute name, and the value in *value.
8417: */
8418:
8419: const xmlChar *
8420: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8421: const xmlChar *name;
8422: xmlChar *val;
8423:
8424: *value = NULL;
8425: GROW;
8426: name = xmlParseName(ctxt);
8427: if (name == NULL) {
8428: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8429: "error parsing attribute name\n");
8430: return(NULL);
8431: }
8432:
8433: /*
8434: * read the value
8435: */
8436: SKIP_BLANKS;
8437: if (RAW == '=') {
8438: NEXT;
8439: SKIP_BLANKS;
8440: val = xmlParseAttValue(ctxt);
8441: ctxt->instate = XML_PARSER_CONTENT;
8442: } else {
8443: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8444: "Specification mandate value for attribute %s\n", name);
8445: return(NULL);
8446: }
8447:
8448: /*
8449: * Check that xml:lang conforms to the specification
8450: * No more registered as an error, just generate a warning now
8451: * since this was deprecated in XML second edition
8452: */
8453: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8454: if (!xmlCheckLanguageID(val)) {
8455: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8456: "Malformed value for xml:lang : %s\n",
8457: val, NULL);
8458: }
8459: }
8460:
8461: /*
8462: * Check that xml:space conforms to the specification
8463: */
8464: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8465: if (xmlStrEqual(val, BAD_CAST "default"))
8466: *(ctxt->space) = 0;
8467: else if (xmlStrEqual(val, BAD_CAST "preserve"))
8468: *(ctxt->space) = 1;
8469: else {
8470: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8471: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8472: val, NULL);
8473: }
8474: }
8475:
8476: *value = val;
8477: return(name);
8478: }
8479:
8480: /**
8481: * xmlParseStartTag:
8482: * @ctxt: an XML parser context
1.1.1.3 misho 8483: *
1.1 misho 8484: * parse a start of tag either for rule element or
8485: * EmptyElement. In both case we don't parse the tag closing chars.
8486: *
8487: * [40] STag ::= '<' Name (S Attribute)* S? '>'
8488: *
8489: * [ WFC: Unique Att Spec ]
8490: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 misho 8491: * empty-element tag.
1.1 misho 8492: *
8493: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8494: *
8495: * [ WFC: Unique Att Spec ]
8496: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 misho 8497: * empty-element tag.
1.1 misho 8498: *
8499: * With namespace:
8500: *
8501: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8502: *
8503: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8504: *
8505: * Returns the element name parsed
8506: */
8507:
8508: const xmlChar *
8509: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8510: const xmlChar *name;
8511: const xmlChar *attname;
8512: xmlChar *attvalue;
8513: const xmlChar **atts = ctxt->atts;
8514: int nbatts = 0;
8515: int maxatts = ctxt->maxatts;
8516: int i;
8517:
8518: if (RAW != '<') return(NULL);
8519: NEXT1;
8520:
8521: name = xmlParseName(ctxt);
8522: if (name == NULL) {
8523: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8524: "xmlParseStartTag: invalid element name\n");
8525: return(NULL);
8526: }
8527:
8528: /*
8529: * Now parse the attributes, it ends up with the ending
8530: *
8531: * (S Attribute)* S?
8532: */
8533: SKIP_BLANKS;
8534: GROW;
8535:
1.1.1.3 misho 8536: while (((RAW != '>') &&
1.1 misho 8537: ((RAW != '/') || (NXT(1) != '>')) &&
1.1.1.3 misho 8538: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 8539: const xmlChar *q = CUR_PTR;
8540: unsigned int cons = ctxt->input->consumed;
8541:
8542: attname = xmlParseAttribute(ctxt, &attvalue);
8543: if ((attname != NULL) && (attvalue != NULL)) {
8544: /*
8545: * [ WFC: Unique Att Spec ]
8546: * No attribute name may appear more than once in the same
1.1.1.3 misho 8547: * start-tag or empty-element tag.
1.1 misho 8548: */
8549: for (i = 0; i < nbatts;i += 2) {
8550: if (xmlStrEqual(atts[i], attname)) {
8551: xmlErrAttributeDup(ctxt, NULL, attname);
8552: xmlFree(attvalue);
8553: goto failed;
8554: }
8555: }
8556: /*
8557: * Add the pair to atts
8558: */
8559: if (atts == NULL) {
8560: maxatts = 22; /* allow for 10 attrs by default */
8561: atts = (const xmlChar **)
8562: xmlMalloc(maxatts * sizeof(xmlChar *));
8563: if (atts == NULL) {
8564: xmlErrMemory(ctxt, NULL);
8565: if (attvalue != NULL)
8566: xmlFree(attvalue);
8567: goto failed;
8568: }
8569: ctxt->atts = atts;
8570: ctxt->maxatts = maxatts;
8571: } else if (nbatts + 4 > maxatts) {
8572: const xmlChar **n;
8573:
8574: maxatts *= 2;
8575: n = (const xmlChar **) xmlRealloc((void *) atts,
8576: maxatts * sizeof(const xmlChar *));
8577: if (n == NULL) {
8578: xmlErrMemory(ctxt, NULL);
8579: if (attvalue != NULL)
8580: xmlFree(attvalue);
8581: goto failed;
8582: }
8583: atts = n;
8584: ctxt->atts = atts;
8585: ctxt->maxatts = maxatts;
8586: }
8587: atts[nbatts++] = attname;
8588: atts[nbatts++] = attvalue;
8589: atts[nbatts] = NULL;
8590: atts[nbatts + 1] = NULL;
8591: } else {
8592: if (attvalue != NULL)
8593: xmlFree(attvalue);
8594: }
8595:
1.1.1.3 misho 8596: failed:
1.1 misho 8597:
8598: GROW
8599: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8600: break;
8601: if (!IS_BLANK_CH(RAW)) {
8602: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8603: "attributes construct error\n");
8604: }
8605: SKIP_BLANKS;
8606: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8607: (attname == NULL) && (attvalue == NULL)) {
8608: xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8609: "xmlParseStartTag: problem parsing attributes\n");
8610: break;
8611: }
8612: SHRINK;
8613: GROW;
8614: }
8615:
8616: /*
8617: * SAX: Start of Element !
8618: */
8619: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8620: (!ctxt->disableSAX)) {
8621: if (nbatts > 0)
8622: ctxt->sax->startElement(ctxt->userData, name, atts);
8623: else
8624: ctxt->sax->startElement(ctxt->userData, name, NULL);
8625: }
8626:
8627: if (atts != NULL) {
8628: /* Free only the content strings */
8629: for (i = 1;i < nbatts;i+=2)
8630: if (atts[i] != NULL)
8631: xmlFree((xmlChar *) atts[i]);
8632: }
8633: return(name);
8634: }
8635:
8636: /**
8637: * xmlParseEndTag1:
8638: * @ctxt: an XML parser context
8639: * @line: line of the start tag
8640: * @nsNr: number of namespaces on the start tag
8641: *
8642: * parse an end of tag
8643: *
8644: * [42] ETag ::= '</' Name S? '>'
8645: *
8646: * With namespace
8647: *
8648: * [NS 9] ETag ::= '</' QName S? '>'
8649: */
8650:
8651: static void
8652: xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8653: const xmlChar *name;
8654:
8655: GROW;
8656: if ((RAW != '<') || (NXT(1) != '/')) {
8657: xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8658: "xmlParseEndTag: '</' not found\n");
8659: return;
8660: }
8661: SKIP(2);
8662:
8663: name = xmlParseNameAndCompare(ctxt,ctxt->name);
8664:
8665: /*
8666: * We should definitely be at the ending "S? '>'" part
8667: */
8668: GROW;
8669: SKIP_BLANKS;
8670: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8671: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8672: } else
8673: NEXT1;
8674:
8675: /*
8676: * [ WFC: Element Type Match ]
8677: * The Name in an element's end-tag must match the element type in the
1.1.1.3 misho 8678: * start-tag.
1.1 misho 8679: *
8680: */
8681: if (name != (xmlChar*)1) {
8682: if (name == NULL) name = BAD_CAST "unparseable";
8683: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8684: "Opening and ending tag mismatch: %s line %d and %s\n",
8685: ctxt->name, line, name);
8686: }
8687:
8688: /*
8689: * SAX: End of Tag
8690: */
8691: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8692: (!ctxt->disableSAX))
8693: ctxt->sax->endElement(ctxt->userData, ctxt->name);
8694:
8695: namePop(ctxt);
8696: spacePop(ctxt);
8697: return;
8698: }
8699:
8700: /**
8701: * xmlParseEndTag:
8702: * @ctxt: an XML parser context
8703: *
8704: * parse an end of tag
8705: *
8706: * [42] ETag ::= '</' Name S? '>'
8707: *
8708: * With namespace
8709: *
8710: * [NS 9] ETag ::= '</' QName S? '>'
8711: */
8712:
8713: void
8714: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8715: xmlParseEndTag1(ctxt, 0);
8716: }
8717: #endif /* LIBXML_SAX1_ENABLED */
8718:
8719: /************************************************************************
8720: * *
8721: * SAX 2 specific operations *
8722: * *
8723: ************************************************************************/
8724:
8725: /*
8726: * xmlGetNamespace:
8727: * @ctxt: an XML parser context
8728: * @prefix: the prefix to lookup
8729: *
8730: * Lookup the namespace name for the @prefix (which ca be NULL)
8731: * The prefix must come from the @ctxt->dict dictionnary
8732: *
8733: * Returns the namespace name or NULL if not bound
8734: */
8735: static const xmlChar *
8736: xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8737: int i;
8738:
8739: if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8740: for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8741: if (ctxt->nsTab[i] == prefix) {
8742: if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8743: return(NULL);
8744: return(ctxt->nsTab[i + 1]);
8745: }
8746: return(NULL);
8747: }
8748:
8749: /**
8750: * xmlParseQName:
8751: * @ctxt: an XML parser context
8752: * @prefix: pointer to store the prefix part
8753: *
8754: * parse an XML Namespace QName
8755: *
8756: * [6] QName ::= (Prefix ':')? LocalPart
8757: * [7] Prefix ::= NCName
8758: * [8] LocalPart ::= NCName
8759: *
8760: * Returns the Name parsed or NULL
8761: */
8762:
8763: static const xmlChar *
8764: xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8765: const xmlChar *l, *p;
8766:
8767: GROW;
8768:
8769: l = xmlParseNCName(ctxt);
8770: if (l == NULL) {
8771: if (CUR == ':') {
8772: l = xmlParseName(ctxt);
8773: if (l != NULL) {
1.1.1.3 misho 8774: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
1.1 misho 8775: "Failed to parse QName '%s'\n", l, NULL, NULL);
8776: *prefix = NULL;
8777: return(l);
8778: }
8779: }
8780: return(NULL);
8781: }
8782: if (CUR == ':') {
8783: NEXT;
8784: p = l;
8785: l = xmlParseNCName(ctxt);
8786: if (l == NULL) {
8787: xmlChar *tmp;
8788:
8789: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8790: "Failed to parse QName '%s:'\n", p, NULL, NULL);
8791: l = xmlParseNmtoken(ctxt);
8792: if (l == NULL)
8793: tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8794: else {
8795: tmp = xmlBuildQName(l, p, NULL, 0);
8796: xmlFree((char *)l);
8797: }
8798: p = xmlDictLookup(ctxt->dict, tmp, -1);
8799: if (tmp != NULL) xmlFree(tmp);
8800: *prefix = NULL;
8801: return(p);
8802: }
8803: if (CUR == ':') {
8804: xmlChar *tmp;
8805:
8806: xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8807: "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8808: NEXT;
8809: tmp = (xmlChar *) xmlParseName(ctxt);
8810: if (tmp != NULL) {
8811: tmp = xmlBuildQName(tmp, l, NULL, 0);
8812: l = xmlDictLookup(ctxt->dict, tmp, -1);
8813: if (tmp != NULL) xmlFree(tmp);
8814: *prefix = p;
8815: return(l);
8816: }
8817: tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8818: l = xmlDictLookup(ctxt->dict, tmp, -1);
8819: if (tmp != NULL) xmlFree(tmp);
8820: *prefix = p;
8821: return(l);
8822: }
8823: *prefix = p;
8824: } else
8825: *prefix = NULL;
8826: return(l);
8827: }
8828:
8829: /**
8830: * xmlParseQNameAndCompare:
8831: * @ctxt: an XML parser context
8832: * @name: the localname
8833: * @prefix: the prefix, if any.
8834: *
8835: * parse an XML name and compares for match
8836: * (specialized for endtag parsing)
8837: *
8838: * Returns NULL for an illegal name, (xmlChar*) 1 for success
8839: * and the name for mismatch
8840: */
8841:
8842: static const xmlChar *
8843: xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8844: xmlChar const *prefix) {
8845: const xmlChar *cmp;
8846: const xmlChar *in;
8847: const xmlChar *ret;
8848: const xmlChar *prefix2;
8849:
8850: if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8851:
8852: GROW;
8853: in = ctxt->input->cur;
8854:
8855: cmp = prefix;
8856: while (*in != 0 && *in == *cmp) {
1.1.1.3 misho 8857: ++in;
1.1 misho 8858: ++cmp;
8859: }
8860: if ((*cmp == 0) && (*in == ':')) {
8861: in++;
8862: cmp = name;
8863: while (*in != 0 && *in == *cmp) {
8864: ++in;
8865: ++cmp;
8866: }
8867: if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8868: /* success */
8869: ctxt->input->cur = in;
8870: return((const xmlChar*) 1);
8871: }
8872: }
8873: /*
8874: * all strings coms from the dictionary, equality can be done directly
8875: */
8876: ret = xmlParseQName (ctxt, &prefix2);
8877: if ((ret == name) && (prefix == prefix2))
8878: return((const xmlChar*) 1);
8879: return ret;
8880: }
8881:
8882: /**
8883: * xmlParseAttValueInternal:
8884: * @ctxt: an XML parser context
8885: * @len: attribute len result
8886: * @alloc: whether the attribute was reallocated as a new string
8887: * @normalize: if 1 then further non-CDATA normalization must be done
8888: *
8889: * parse a value for an attribute.
8890: * NOTE: if no normalization is needed, the routine will return pointers
8891: * directly from the data buffer.
8892: *
8893: * 3.3.3 Attribute-Value Normalization:
8894: * Before the value of an attribute is passed to the application or
1.1.1.3 misho 8895: * checked for validity, the XML processor must normalize it as follows:
1.1 misho 8896: * - a character reference is processed by appending the referenced
8897: * character to the attribute value
8898: * - an entity reference is processed by recursively processing the
1.1.1.3 misho 8899: * replacement text of the entity
1.1 misho 8900: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8901: * appending #x20 to the normalized value, except that only a single
8902: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1.1.1.3 misho 8903: * parsed entity or the literal entity value of an internal parsed entity
8904: * - other characters are processed by appending them to the normalized value
1.1 misho 8905: * If the declared value is not CDATA, then the XML processor must further
8906: * process the normalized attribute value by discarding any leading and
8907: * trailing space (#x20) characters, and by replacing sequences of space
1.1.1.3 misho 8908: * (#x20) characters by a single space (#x20) character.
1.1 misho 8909: * All attributes for which no declaration has been read should be treated
8910: * by a non-validating parser as if declared CDATA.
8911: *
8912: * Returns the AttValue parsed or NULL. The value has to be freed by the
8913: * caller if it was copied, this can be detected by val[*len] == 0.
8914: */
8915:
8916: static xmlChar *
8917: xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8918: int normalize)
8919: {
8920: xmlChar limit = 0;
8921: const xmlChar *in = NULL, *start, *end, *last;
8922: xmlChar *ret = NULL;
8923:
8924: GROW;
8925: in = (xmlChar *) CUR_PTR;
8926: if (*in != '"' && *in != '\'') {
8927: xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8928: return (NULL);
8929: }
8930: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8931:
8932: /*
8933: * try to handle in this routine the most common case where no
8934: * allocation of a new string is required and where content is
8935: * pure ASCII.
8936: */
8937: limit = *in++;
8938: end = ctxt->input->end;
8939: start = in;
8940: if (in >= end) {
8941: const xmlChar *oldbase = ctxt->input->base;
8942: GROW;
8943: if (oldbase != ctxt->input->base) {
8944: long delta = ctxt->input->base - oldbase;
8945: start = start + delta;
8946: in = in + delta;
8947: }
8948: end = ctxt->input->end;
8949: }
8950: if (normalize) {
8951: /*
8952: * Skip any leading spaces
8953: */
1.1.1.3 misho 8954: while ((in < end) && (*in != limit) &&
1.1 misho 8955: ((*in == 0x20) || (*in == 0x9) ||
8956: (*in == 0xA) || (*in == 0xD))) {
8957: in++;
8958: start = in;
8959: if (in >= end) {
8960: const xmlChar *oldbase = ctxt->input->base;
8961: GROW;
1.1.1.3 misho 8962: if (ctxt->instate == XML_PARSER_EOF)
8963: return(NULL);
1.1 misho 8964: if (oldbase != ctxt->input->base) {
8965: long delta = ctxt->input->base - oldbase;
8966: start = start + delta;
8967: in = in + delta;
8968: }
8969: end = ctxt->input->end;
1.1.1.3 misho 8970: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8971: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8972: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8973: "AttValue length too long\n");
8974: return(NULL);
8975: }
1.1 misho 8976: }
8977: }
8978: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8979: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8980: if ((*in++ == 0x20) && (*in == 0x20)) break;
8981: if (in >= end) {
8982: const xmlChar *oldbase = ctxt->input->base;
8983: GROW;
1.1.1.3 misho 8984: if (ctxt->instate == XML_PARSER_EOF)
8985: return(NULL);
1.1 misho 8986: if (oldbase != ctxt->input->base) {
8987: long delta = ctxt->input->base - oldbase;
8988: start = start + delta;
8989: in = in + delta;
8990: }
8991: end = ctxt->input->end;
1.1.1.3 misho 8992: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8993: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8994: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8995: "AttValue length too long\n");
8996: return(NULL);
8997: }
1.1 misho 8998: }
8999: }
9000: last = in;
9001: /*
9002: * skip the trailing blanks
9003: */
9004: while ((last[-1] == 0x20) && (last > start)) last--;
1.1.1.3 misho 9005: while ((in < end) && (*in != limit) &&
1.1 misho 9006: ((*in == 0x20) || (*in == 0x9) ||
9007: (*in == 0xA) || (*in == 0xD))) {
9008: in++;
9009: if (in >= end) {
9010: const xmlChar *oldbase = ctxt->input->base;
9011: GROW;
1.1.1.3 misho 9012: if (ctxt->instate == XML_PARSER_EOF)
9013: return(NULL);
1.1 misho 9014: if (oldbase != ctxt->input->base) {
9015: long delta = ctxt->input->base - oldbase;
9016: start = start + delta;
9017: in = in + delta;
9018: last = last + delta;
9019: }
9020: end = ctxt->input->end;
1.1.1.3 misho 9021: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9022: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9023: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9024: "AttValue length too long\n");
9025: return(NULL);
9026: }
1.1 misho 9027: }
9028: }
1.1.1.3 misho 9029: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9030: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9031: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9032: "AttValue length too long\n");
9033: return(NULL);
9034: }
1.1 misho 9035: if (*in != limit) goto need_complex;
9036: } else {
9037: while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9038: (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9039: in++;
9040: if (in >= end) {
9041: const xmlChar *oldbase = ctxt->input->base;
9042: GROW;
1.1.1.3 misho 9043: if (ctxt->instate == XML_PARSER_EOF)
9044: return(NULL);
1.1 misho 9045: if (oldbase != ctxt->input->base) {
9046: long delta = ctxt->input->base - oldbase;
9047: start = start + delta;
9048: in = in + delta;
9049: }
9050: end = ctxt->input->end;
1.1.1.3 misho 9051: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9052: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9053: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9054: "AttValue length too long\n");
9055: return(NULL);
9056: }
1.1 misho 9057: }
9058: }
9059: last = in;
1.1.1.3 misho 9060: if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9061: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9062: xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9063: "AttValue length too long\n");
9064: return(NULL);
9065: }
1.1 misho 9066: if (*in != limit) goto need_complex;
9067: }
9068: in++;
9069: if (len != NULL) {
9070: *len = last - start;
9071: ret = (xmlChar *) start;
9072: } else {
9073: if (alloc) *alloc = 1;
9074: ret = xmlStrndup(start, last - start);
9075: }
9076: CUR_PTR = in;
9077: if (alloc) *alloc = 0;
9078: return ret;
9079: need_complex:
9080: if (alloc) *alloc = 1;
9081: return xmlParseAttValueComplex(ctxt, len, normalize);
9082: }
9083:
9084: /**
9085: * xmlParseAttribute2:
9086: * @ctxt: an XML parser context
9087: * @pref: the element prefix
9088: * @elem: the element name
9089: * @prefix: a xmlChar ** used to store the value of the attribute prefix
9090: * @value: a xmlChar ** used to store the value of the attribute
9091: * @len: an int * to save the length of the attribute
9092: * @alloc: an int * to indicate if the attribute was allocated
9093: *
9094: * parse an attribute in the new SAX2 framework.
9095: *
9096: * Returns the attribute name, and the value in *value, .
9097: */
9098:
9099: static const xmlChar *
9100: xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9101: const xmlChar * pref, const xmlChar * elem,
9102: const xmlChar ** prefix, xmlChar ** value,
9103: int *len, int *alloc)
9104: {
9105: const xmlChar *name;
9106: xmlChar *val, *internal_val = NULL;
9107: int normalize = 0;
9108:
9109: *value = NULL;
9110: GROW;
9111: name = xmlParseQName(ctxt, prefix);
9112: if (name == NULL) {
9113: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9114: "error parsing attribute name\n");
9115: return (NULL);
9116: }
9117:
9118: /*
9119: * get the type if needed
9120: */
9121: if (ctxt->attsSpecial != NULL) {
9122: int type;
9123:
9124: type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9125: pref, elem, *prefix, name);
9126: if (type != 0)
9127: normalize = 1;
9128: }
9129:
9130: /*
9131: * read the value
9132: */
9133: SKIP_BLANKS;
9134: if (RAW == '=') {
9135: NEXT;
9136: SKIP_BLANKS;
9137: val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9138: if (normalize) {
9139: /*
9140: * Sometimes a second normalisation pass for spaces is needed
9141: * but that only happens if charrefs or entities refernces
9142: * have been used in the attribute value, i.e. the attribute
9143: * value have been extracted in an allocated string already.
9144: */
9145: if (*alloc) {
9146: const xmlChar *val2;
9147:
9148: val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9149: if ((val2 != NULL) && (val2 != val)) {
9150: xmlFree(val);
9151: val = (xmlChar *) val2;
9152: }
9153: }
9154: }
9155: ctxt->instate = XML_PARSER_CONTENT;
9156: } else {
9157: xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9158: "Specification mandate value for attribute %s\n",
9159: name);
9160: return (NULL);
9161: }
9162:
9163: if (*prefix == ctxt->str_xml) {
9164: /*
9165: * Check that xml:lang conforms to the specification
9166: * No more registered as an error, just generate a warning now
9167: * since this was deprecated in XML second edition
9168: */
9169: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9170: internal_val = xmlStrndup(val, *len);
9171: if (!xmlCheckLanguageID(internal_val)) {
9172: xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9173: "Malformed value for xml:lang : %s\n",
9174: internal_val, NULL);
9175: }
9176: }
9177:
9178: /*
9179: * Check that xml:space conforms to the specification
9180: */
9181: if (xmlStrEqual(name, BAD_CAST "space")) {
9182: internal_val = xmlStrndup(val, *len);
9183: if (xmlStrEqual(internal_val, BAD_CAST "default"))
9184: *(ctxt->space) = 0;
9185: else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9186: *(ctxt->space) = 1;
9187: else {
9188: xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9189: "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9190: internal_val, NULL);
9191: }
9192: }
9193: if (internal_val) {
9194: xmlFree(internal_val);
9195: }
9196: }
9197:
9198: *value = val;
9199: return (name);
9200: }
9201: /**
9202: * xmlParseStartTag2:
9203: * @ctxt: an XML parser context
1.1.1.3 misho 9204: *
1.1 misho 9205: * parse a start of tag either for rule element or
9206: * EmptyElement. In both case we don't parse the tag closing chars.
9207: * This routine is called when running SAX2 parsing
9208: *
9209: * [40] STag ::= '<' Name (S Attribute)* S? '>'
9210: *
9211: * [ WFC: Unique Att Spec ]
9212: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 misho 9213: * empty-element tag.
1.1 misho 9214: *
9215: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9216: *
9217: * [ WFC: Unique Att Spec ]
9218: * No attribute name may appear more than once in the same start-tag or
1.1.1.3 misho 9219: * empty-element tag.
1.1 misho 9220: *
9221: * With namespace:
9222: *
9223: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9224: *
9225: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9226: *
9227: * Returns the element name parsed
9228: */
9229:
9230: static const xmlChar *
9231: xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9232: const xmlChar **URI, int *tlen) {
9233: const xmlChar *localname;
9234: const xmlChar *prefix;
9235: const xmlChar *attname;
9236: const xmlChar *aprefix;
9237: const xmlChar *nsname;
9238: xmlChar *attvalue;
9239: const xmlChar **atts = ctxt->atts;
9240: int maxatts = ctxt->maxatts;
9241: int nratts, nbatts, nbdef;
9242: int i, j, nbNs, attval, oldline, oldcol;
9243: const xmlChar *base;
9244: unsigned long cur;
9245: int nsNr = ctxt->nsNr;
9246:
9247: if (RAW != '<') return(NULL);
9248: NEXT1;
9249:
9250: /*
9251: * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9252: * point since the attribute values may be stored as pointers to
9253: * the buffer and calling SHRINK would destroy them !
9254: * The Shrinking is only possible once the full set of attribute
9255: * callbacks have been done.
9256: */
9257: reparse:
9258: SHRINK;
9259: base = ctxt->input->base;
9260: cur = ctxt->input->cur - ctxt->input->base;
9261: oldline = ctxt->input->line;
9262: oldcol = ctxt->input->col;
9263: nbatts = 0;
9264: nratts = 0;
9265: nbdef = 0;
9266: nbNs = 0;
9267: attval = 0;
9268: /* Forget any namespaces added during an earlier parse of this element. */
9269: ctxt->nsNr = nsNr;
9270:
9271: localname = xmlParseQName(ctxt, &prefix);
9272: if (localname == NULL) {
9273: xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9274: "StartTag: invalid element name\n");
9275: return(NULL);
9276: }
9277: *tlen = ctxt->input->cur - ctxt->input->base - cur;
9278:
9279: /*
9280: * Now parse the attributes, it ends up with the ending
9281: *
9282: * (S Attribute)* S?
9283: */
9284: SKIP_BLANKS;
9285: GROW;
9286: if (ctxt->input->base != base) goto base_changed;
9287:
1.1.1.3 misho 9288: while (((RAW != '>') &&
1.1 misho 9289: ((RAW != '/') || (NXT(1) != '>')) &&
1.1.1.3 misho 9290: (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
1.1 misho 9291: const xmlChar *q = CUR_PTR;
9292: unsigned int cons = ctxt->input->consumed;
9293: int len = -1, alloc = 0;
9294:
9295: attname = xmlParseAttribute2(ctxt, prefix, localname,
9296: &aprefix, &attvalue, &len, &alloc);
9297: if (ctxt->input->base != base) {
9298: if ((attvalue != NULL) && (alloc != 0))
9299: xmlFree(attvalue);
9300: attvalue = NULL;
9301: goto base_changed;
9302: }
9303: if ((attname != NULL) && (attvalue != NULL)) {
9304: if (len < 0) len = xmlStrlen(attvalue);
9305: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9306: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9307: xmlURIPtr uri;
9308:
9309: if (*URL != 0) {
9310: uri = xmlParseURI((const char *) URL);
9311: if (uri == NULL) {
9312: xmlNsErr(ctxt, XML_WAR_NS_URI,
9313: "xmlns: '%s' is not a valid URI\n",
9314: URL, NULL, NULL);
9315: } else {
9316: if (uri->scheme == NULL) {
9317: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9318: "xmlns: URI %s is not absolute\n",
9319: URL, NULL, NULL);
9320: }
9321: xmlFreeURI(uri);
9322: }
9323: if (URL == ctxt->str_xml_ns) {
9324: if (attname != ctxt->str_xml) {
9325: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9326: "xml namespace URI cannot be the default namespace\n",
9327: NULL, NULL, NULL);
9328: }
9329: goto skip_default_ns;
9330: }
9331: if ((len == 29) &&
9332: (xmlStrEqual(URL,
9333: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9334: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9335: "reuse of the xmlns namespace name is forbidden\n",
9336: NULL, NULL, NULL);
9337: goto skip_default_ns;
9338: }
9339: }
9340: /*
9341: * check that it's not a defined namespace
9342: */
9343: for (j = 1;j <= nbNs;j++)
9344: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9345: break;
9346: if (j <= nbNs)
9347: xmlErrAttributeDup(ctxt, NULL, attname);
9348: else
9349: if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9350: skip_default_ns:
9351: if (alloc != 0) xmlFree(attvalue);
9352: SKIP_BLANKS;
9353: continue;
9354: }
9355: if (aprefix == ctxt->str_xmlns) {
9356: const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9357: xmlURIPtr uri;
9358:
9359: if (attname == ctxt->str_xml) {
9360: if (URL != ctxt->str_xml_ns) {
9361: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9362: "xml namespace prefix mapped to wrong URI\n",
9363: NULL, NULL, NULL);
9364: }
9365: /*
9366: * Do not keep a namespace definition node
9367: */
9368: goto skip_ns;
9369: }
9370: if (URL == ctxt->str_xml_ns) {
9371: if (attname != ctxt->str_xml) {
9372: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9373: "xml namespace URI mapped to wrong prefix\n",
9374: NULL, NULL, NULL);
9375: }
9376: goto skip_ns;
9377: }
9378: if (attname == ctxt->str_xmlns) {
9379: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380: "redefinition of the xmlns prefix is forbidden\n",
9381: NULL, NULL, NULL);
9382: goto skip_ns;
9383: }
9384: if ((len == 29) &&
9385: (xmlStrEqual(URL,
9386: BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9387: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9388: "reuse of the xmlns namespace name is forbidden\n",
9389: NULL, NULL, NULL);
9390: goto skip_ns;
9391: }
9392: if ((URL == NULL) || (URL[0] == 0)) {
9393: xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9394: "xmlns:%s: Empty XML namespace is not allowed\n",
9395: attname, NULL, NULL);
9396: goto skip_ns;
9397: } else {
9398: uri = xmlParseURI((const char *) URL);
9399: if (uri == NULL) {
9400: xmlNsErr(ctxt, XML_WAR_NS_URI,
9401: "xmlns:%s: '%s' is not a valid URI\n",
9402: attname, URL, NULL);
9403: } else {
9404: if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9405: xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9406: "xmlns:%s: URI %s is not absolute\n",
9407: attname, URL, NULL);
9408: }
9409: xmlFreeURI(uri);
9410: }
9411: }
9412:
9413: /*
9414: * check that it's not a defined namespace
9415: */
9416: for (j = 1;j <= nbNs;j++)
9417: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9418: break;
9419: if (j <= nbNs)
9420: xmlErrAttributeDup(ctxt, aprefix, attname);
9421: else
9422: if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9423: skip_ns:
9424: if (alloc != 0) xmlFree(attvalue);
9425: SKIP_BLANKS;
9426: if (ctxt->input->base != base) goto base_changed;
9427: continue;
9428: }
9429:
9430: /*
9431: * Add the pair to atts
9432: */
9433: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9434: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9435: if (attvalue[len] == 0)
9436: xmlFree(attvalue);
9437: goto failed;
9438: }
9439: maxatts = ctxt->maxatts;
9440: atts = ctxt->atts;
9441: }
9442: ctxt->attallocs[nratts++] = alloc;
9443: atts[nbatts++] = attname;
9444: atts[nbatts++] = aprefix;
9445: atts[nbatts++] = NULL; /* the URI will be fetched later */
9446: atts[nbatts++] = attvalue;
9447: attvalue += len;
9448: atts[nbatts++] = attvalue;
9449: /*
9450: * tag if some deallocation is needed
9451: */
9452: if (alloc != 0) attval = 1;
9453: } else {
9454: if ((attvalue != NULL) && (attvalue[len] == 0))
9455: xmlFree(attvalue);
9456: }
9457:
9458: failed:
9459:
9460: GROW
1.1.1.3 misho 9461: if (ctxt->instate == XML_PARSER_EOF)
9462: break;
1.1 misho 9463: if (ctxt->input->base != base) goto base_changed;
9464: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9465: break;
9466: if (!IS_BLANK_CH(RAW)) {
9467: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9468: "attributes construct error\n");
9469: break;
9470: }
9471: SKIP_BLANKS;
9472: if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9473: (attname == NULL) && (attvalue == NULL)) {
9474: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9475: "xmlParseStartTag: problem parsing attributes\n");
9476: break;
9477: }
9478: GROW;
9479: if (ctxt->input->base != base) goto base_changed;
9480: }
9481:
9482: /*
9483: * The attributes defaulting
9484: */
9485: if (ctxt->attsDefault != NULL) {
9486: xmlDefAttrsPtr defaults;
9487:
9488: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9489: if (defaults != NULL) {
9490: for (i = 0;i < defaults->nbAttrs;i++) {
9491: attname = defaults->values[5 * i];
9492: aprefix = defaults->values[5 * i + 1];
9493:
9494: /*
9495: * special work for namespaces defaulted defs
9496: */
9497: if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9498: /*
9499: * check that it's not a defined namespace
9500: */
9501: for (j = 1;j <= nbNs;j++)
9502: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9503: break;
9504: if (j <= nbNs) continue;
9505:
9506: nsname = xmlGetNamespace(ctxt, NULL);
9507: if (nsname != defaults->values[5 * i + 2]) {
9508: if (nsPush(ctxt, NULL,
9509: defaults->values[5 * i + 2]) > 0)
9510: nbNs++;
9511: }
9512: } else if (aprefix == ctxt->str_xmlns) {
9513: /*
9514: * check that it's not a defined namespace
9515: */
9516: for (j = 1;j <= nbNs;j++)
9517: if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9518: break;
9519: if (j <= nbNs) continue;
9520:
9521: nsname = xmlGetNamespace(ctxt, attname);
9522: if (nsname != defaults->values[2]) {
9523: if (nsPush(ctxt, attname,
9524: defaults->values[5 * i + 2]) > 0)
9525: nbNs++;
9526: }
9527: } else {
9528: /*
9529: * check that it's not a defined attribute
9530: */
9531: for (j = 0;j < nbatts;j+=5) {
9532: if ((attname == atts[j]) && (aprefix == atts[j+1]))
9533: break;
9534: }
9535: if (j < nbatts) continue;
9536:
9537: if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9538: if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9539: return(NULL);
9540: }
9541: maxatts = ctxt->maxatts;
9542: atts = ctxt->atts;
9543: }
9544: atts[nbatts++] = attname;
9545: atts[nbatts++] = aprefix;
9546: if (aprefix == NULL)
9547: atts[nbatts++] = NULL;
9548: else
9549: atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9550: atts[nbatts++] = defaults->values[5 * i + 2];
9551: atts[nbatts++] = defaults->values[5 * i + 3];
9552: if ((ctxt->standalone == 1) &&
9553: (defaults->values[5 * i + 4] != NULL)) {
1.1.1.3 misho 9554: xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
1.1 misho 9555: "standalone: attribute %s on %s defaulted from external subset\n",
9556: attname, localname);
9557: }
9558: nbdef++;
9559: }
9560: }
9561: }
9562: }
9563:
9564: /*
9565: * The attributes checkings
9566: */
9567: for (i = 0; i < nbatts;i += 5) {
9568: /*
9569: * The default namespace does not apply to attribute names.
9570: */
9571: if (atts[i + 1] != NULL) {
9572: nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9573: if (nsname == NULL) {
9574: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9575: "Namespace prefix %s for %s on %s is not defined\n",
9576: atts[i + 1], atts[i], localname);
9577: }
9578: atts[i + 2] = nsname;
9579: } else
9580: nsname = NULL;
9581: /*
9582: * [ WFC: Unique Att Spec ]
9583: * No attribute name may appear more than once in the same
1.1.1.3 misho 9584: * start-tag or empty-element tag.
1.1 misho 9585: * As extended by the Namespace in XML REC.
9586: */
9587: for (j = 0; j < i;j += 5) {
9588: if (atts[i] == atts[j]) {
9589: if (atts[i+1] == atts[j+1]) {
9590: xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9591: break;
9592: }
9593: if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9594: xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9595: "Namespaced Attribute %s in '%s' redefined\n",
9596: atts[i], nsname, NULL);
9597: break;
9598: }
9599: }
9600: }
9601: }
9602:
9603: nsname = xmlGetNamespace(ctxt, prefix);
9604: if ((prefix != NULL) && (nsname == NULL)) {
9605: xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9606: "Namespace prefix %s on %s is not defined\n",
9607: prefix, localname, NULL);
9608: }
9609: *pref = prefix;
9610: *URI = nsname;
9611:
9612: /*
9613: * SAX: Start of Element !
9614: */
9615: if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9616: (!ctxt->disableSAX)) {
9617: if (nbNs > 0)
9618: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9619: nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9620: nbatts / 5, nbdef, atts);
9621: else
9622: ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9623: nsname, 0, NULL, nbatts / 5, nbdef, atts);
9624: }
9625:
9626: /*
9627: * Free up attribute allocated strings if needed
9628: */
9629: if (attval != 0) {
9630: for (i = 3,j = 0; j < nratts;i += 5,j++)
9631: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9632: xmlFree((xmlChar *) atts[i]);
9633: }
9634:
9635: return(localname);
9636:
9637: base_changed:
9638: /*
9639: * the attribute strings are valid iif the base didn't changed
9640: */
9641: if (attval != 0) {
9642: for (i = 3,j = 0; j < nratts;i += 5,j++)
9643: if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9644: xmlFree((xmlChar *) atts[i]);
9645: }
9646: ctxt->input->cur = ctxt->input->base + cur;
9647: ctxt->input->line = oldline;
9648: ctxt->input->col = oldcol;
9649: if (ctxt->wellFormed == 1) {
9650: goto reparse;
9651: }
9652: return(NULL);
9653: }
9654:
9655: /**
9656: * xmlParseEndTag2:
9657: * @ctxt: an XML parser context
9658: * @line: line of the start tag
9659: * @nsNr: number of namespaces on the start tag
9660: *
9661: * parse an end of tag
9662: *
9663: * [42] ETag ::= '</' Name S? '>'
9664: *
9665: * With namespace
9666: *
9667: * [NS 9] ETag ::= '</' QName S? '>'
9668: */
9669:
9670: static void
9671: xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9672: const xmlChar *URI, int line, int nsNr, int tlen) {
9673: const xmlChar *name;
9674:
9675: GROW;
9676: if ((RAW != '<') || (NXT(1) != '/')) {
9677: xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9678: return;
9679: }
9680: SKIP(2);
9681:
9682: if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9683: if (ctxt->input->cur[tlen] == '>') {
9684: ctxt->input->cur += tlen + 1;
9685: goto done;
9686: }
9687: ctxt->input->cur += tlen;
9688: name = (xmlChar*)1;
9689: } else {
9690: if (prefix == NULL)
9691: name = xmlParseNameAndCompare(ctxt, ctxt->name);
9692: else
9693: name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9694: }
9695:
9696: /*
9697: * We should definitely be at the ending "S? '>'" part
9698: */
9699: GROW;
1.1.1.3 misho 9700: if (ctxt->instate == XML_PARSER_EOF)
9701: return;
1.1 misho 9702: SKIP_BLANKS;
9703: if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9704: xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9705: } else
9706: NEXT1;
9707:
9708: /*
9709: * [ WFC: Element Type Match ]
9710: * The Name in an element's end-tag must match the element type in the
1.1.1.3 misho 9711: * start-tag.
1.1 misho 9712: *
9713: */
9714: if (name != (xmlChar*)1) {
9715: if (name == NULL) name = BAD_CAST "unparseable";
9716: if ((line == 0) && (ctxt->node != NULL))
9717: line = ctxt->node->line;
9718: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9719: "Opening and ending tag mismatch: %s line %d and %s\n",
9720: ctxt->name, line, name);
9721: }
9722:
9723: /*
9724: * SAX: End of Tag
9725: */
9726: done:
9727: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9728: (!ctxt->disableSAX))
9729: ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9730:
9731: spacePop(ctxt);
9732: if (nsNr != 0)
9733: nsPop(ctxt, nsNr);
9734: return;
9735: }
9736:
9737: /**
9738: * xmlParseCDSect:
9739: * @ctxt: an XML parser context
1.1.1.3 misho 9740: *
1.1 misho 9741: * Parse escaped pure raw content.
9742: *
9743: * [18] CDSect ::= CDStart CData CDEnd
9744: *
9745: * [19] CDStart ::= '<![CDATA['
9746: *
9747: * [20] Data ::= (Char* - (Char* ']]>' Char*))
9748: *
9749: * [21] CDEnd ::= ']]>'
9750: */
9751: void
9752: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9753: xmlChar *buf = NULL;
9754: int len = 0;
9755: int size = XML_PARSER_BUFFER_SIZE;
9756: int r, rl;
9757: int s, sl;
9758: int cur, l;
9759: int count = 0;
9760:
9761: /* Check 2.6.0 was NXT(0) not RAW */
9762: if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9763: SKIP(9);
9764: } else
9765: return;
9766:
9767: ctxt->instate = XML_PARSER_CDATA_SECTION;
9768: r = CUR_CHAR(rl);
9769: if (!IS_CHAR(r)) {
9770: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9771: ctxt->instate = XML_PARSER_CONTENT;
9772: return;
9773: }
9774: NEXTL(rl);
9775: s = CUR_CHAR(sl);
9776: if (!IS_CHAR(s)) {
9777: xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9778: ctxt->instate = XML_PARSER_CONTENT;
9779: return;
9780: }
9781: NEXTL(sl);
9782: cur = CUR_CHAR(l);
9783: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9784: if (buf == NULL) {
9785: xmlErrMemory(ctxt, NULL);
9786: return;
9787: }
9788: while (IS_CHAR(cur) &&
9789: ((r != ']') || (s != ']') || (cur != '>'))) {
9790: if (len + 5 >= size) {
9791: xmlChar *tmp;
9792:
1.1.1.3 misho 9793: if ((size > XML_MAX_TEXT_LENGTH) &&
9794: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9795: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9796: "CData section too big found", NULL);
9797: xmlFree (buf);
9798: return;
9799: }
9800: tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
1.1 misho 9801: if (tmp == NULL) {
9802: xmlFree(buf);
9803: xmlErrMemory(ctxt, NULL);
9804: return;
9805: }
9806: buf = tmp;
1.1.1.3 misho 9807: size *= 2;
1.1 misho 9808: }
9809: COPY_BUF(rl,buf,len,r);
9810: r = s;
9811: rl = sl;
9812: s = cur;
9813: sl = l;
9814: count++;
9815: if (count > 50) {
9816: GROW;
1.1.1.3 misho 9817: if (ctxt->instate == XML_PARSER_EOF) {
9818: xmlFree(buf);
9819: return;
9820: }
1.1 misho 9821: count = 0;
9822: }
9823: NEXTL(l);
9824: cur = CUR_CHAR(l);
9825: }
9826: buf[len] = 0;
9827: ctxt->instate = XML_PARSER_CONTENT;
9828: if (cur != '>') {
9829: xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9830: "CData section not finished\n%.50s\n", buf);
9831: xmlFree(buf);
9832: return;
9833: }
9834: NEXTL(l);
9835:
9836: /*
9837: * OK the buffer is to be consumed as cdata.
9838: */
9839: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9840: if (ctxt->sax->cdataBlock != NULL)
9841: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9842: else if (ctxt->sax->characters != NULL)
9843: ctxt->sax->characters(ctxt->userData, buf, len);
9844: }
9845: xmlFree(buf);
9846: }
9847:
9848: /**
9849: * xmlParseContent:
9850: * @ctxt: an XML parser context
9851: *
9852: * Parse a content:
9853: *
9854: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9855: */
9856:
9857: void
9858: xmlParseContent(xmlParserCtxtPtr ctxt) {
9859: GROW;
9860: while ((RAW != 0) &&
9861: ((RAW != '<') || (NXT(1) != '/')) &&
9862: (ctxt->instate != XML_PARSER_EOF)) {
9863: const xmlChar *test = CUR_PTR;
9864: unsigned int cons = ctxt->input->consumed;
9865: const xmlChar *cur = ctxt->input->cur;
9866:
9867: /*
9868: * First case : a Processing Instruction.
9869: */
9870: if ((*cur == '<') && (cur[1] == '?')) {
9871: xmlParsePI(ctxt);
9872: }
9873:
9874: /*
9875: * Second case : a CDSection
9876: */
9877: /* 2.6.0 test was *cur not RAW */
9878: else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9879: xmlParseCDSect(ctxt);
9880: }
9881:
9882: /*
9883: * Third case : a comment
9884: */
9885: else if ((*cur == '<') && (NXT(1) == '!') &&
9886: (NXT(2) == '-') && (NXT(3) == '-')) {
9887: xmlParseComment(ctxt);
9888: ctxt->instate = XML_PARSER_CONTENT;
9889: }
9890:
9891: /*
9892: * Fourth case : a sub-element.
9893: */
9894: else if (*cur == '<') {
9895: xmlParseElement(ctxt);
9896: }
9897:
9898: /*
9899: * Fifth case : a reference. If if has not been resolved,
1.1.1.3 misho 9900: * parsing returns it's Name, create the node
1.1 misho 9901: */
9902:
9903: else if (*cur == '&') {
9904: xmlParseReference(ctxt);
9905: }
9906:
9907: /*
9908: * Last case, text. Note that References are handled directly.
9909: */
9910: else {
9911: xmlParseCharData(ctxt, 0);
9912: }
9913:
9914: GROW;
9915: /*
9916: * Pop-up of finished entities.
9917: */
9918: while ((RAW == 0) && (ctxt->inputNr > 1))
9919: xmlPopInput(ctxt);
9920: SHRINK;
9921:
9922: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9923: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9924: "detected an error in element content\n");
9925: ctxt->instate = XML_PARSER_EOF;
9926: break;
9927: }
9928: }
9929: }
9930:
9931: /**
9932: * xmlParseElement:
9933: * @ctxt: an XML parser context
9934: *
9935: * parse an XML element, this is highly recursive
9936: *
9937: * [39] element ::= EmptyElemTag | STag content ETag
9938: *
9939: * [ WFC: Element Type Match ]
9940: * The Name in an element's end-tag must match the element type in the
1.1.1.3 misho 9941: * start-tag.
1.1 misho 9942: *
9943: */
9944:
9945: void
9946: xmlParseElement(xmlParserCtxtPtr ctxt) {
9947: const xmlChar *name;
9948: const xmlChar *prefix = NULL;
9949: const xmlChar *URI = NULL;
9950: xmlParserNodeInfo node_info;
1.1.1.2 misho 9951: int line, tlen = 0;
1.1 misho 9952: xmlNodePtr ret;
9953: int nsNr = ctxt->nsNr;
9954:
9955: if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9956: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9957: xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9958: "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9959: xmlParserMaxDepth);
9960: ctxt->instate = XML_PARSER_EOF;
9961: return;
9962: }
9963:
9964: /* Capture start position */
9965: if (ctxt->record_info) {
9966: node_info.begin_pos = ctxt->input->consumed +
9967: (CUR_PTR - ctxt->input->base);
9968: node_info.begin_line = ctxt->input->line;
9969: }
9970:
9971: if (ctxt->spaceNr == 0)
9972: spacePush(ctxt, -1);
9973: else if (*ctxt->space == -2)
9974: spacePush(ctxt, -1);
9975: else
9976: spacePush(ctxt, *ctxt->space);
9977:
9978: line = ctxt->input->line;
9979: #ifdef LIBXML_SAX1_ENABLED
9980: if (ctxt->sax2)
9981: #endif /* LIBXML_SAX1_ENABLED */
9982: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9983: #ifdef LIBXML_SAX1_ENABLED
9984: else
9985: name = xmlParseStartTag(ctxt);
9986: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 9987: if (ctxt->instate == XML_PARSER_EOF)
9988: return;
1.1 misho 9989: if (name == NULL) {
9990: spacePop(ctxt);
9991: return;
9992: }
9993: namePush(ctxt, name);
9994: ret = ctxt->node;
9995:
9996: #ifdef LIBXML_VALID_ENABLED
9997: /*
9998: * [ VC: Root Element Type ]
9999: * The Name in the document type declaration must match the element
1.1.1.3 misho 10000: * type of the root element.
1.1 misho 10001: */
10002: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10003: ctxt->node && (ctxt->node == ctxt->myDoc->children))
10004: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10005: #endif /* LIBXML_VALID_ENABLED */
10006:
10007: /*
10008: * Check for an Empty Element.
10009: */
10010: if ((RAW == '/') && (NXT(1) == '>')) {
10011: SKIP(2);
10012: if (ctxt->sax2) {
10013: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10014: (!ctxt->disableSAX))
10015: ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10016: #ifdef LIBXML_SAX1_ENABLED
10017: } else {
10018: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10019: (!ctxt->disableSAX))
10020: ctxt->sax->endElement(ctxt->userData, name);
10021: #endif /* LIBXML_SAX1_ENABLED */
10022: }
10023: namePop(ctxt);
10024: spacePop(ctxt);
10025: if (nsNr != ctxt->nsNr)
10026: nsPop(ctxt, ctxt->nsNr - nsNr);
10027: if ( ret != NULL && ctxt->record_info ) {
10028: node_info.end_pos = ctxt->input->consumed +
10029: (CUR_PTR - ctxt->input->base);
10030: node_info.end_line = ctxt->input->line;
10031: node_info.node = ret;
10032: xmlParserAddNodeInfo(ctxt, &node_info);
10033: }
10034: return;
10035: }
10036: if (RAW == '>') {
10037: NEXT1;
10038: } else {
10039: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10040: "Couldn't find end of Start Tag %s line %d\n",
10041: name, line, NULL);
10042:
10043: /*
10044: * end of parsing of this node.
10045: */
10046: nodePop(ctxt);
10047: namePop(ctxt);
10048: spacePop(ctxt);
10049: if (nsNr != ctxt->nsNr)
10050: nsPop(ctxt, ctxt->nsNr - nsNr);
10051:
10052: /*
10053: * Capture end position and add node
10054: */
10055: if ( ret != NULL && ctxt->record_info ) {
10056: node_info.end_pos = ctxt->input->consumed +
10057: (CUR_PTR - ctxt->input->base);
10058: node_info.end_line = ctxt->input->line;
10059: node_info.node = ret;
10060: xmlParserAddNodeInfo(ctxt, &node_info);
10061: }
10062: return;
10063: }
10064:
10065: /*
10066: * Parse the content of the element:
10067: */
10068: xmlParseContent(ctxt);
1.1.1.3 misho 10069: if (ctxt->instate == XML_PARSER_EOF)
10070: return;
1.1 misho 10071: if (!IS_BYTE_CHAR(RAW)) {
10072: xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10073: "Premature end of data in tag %s line %d\n",
10074: name, line, NULL);
10075:
10076: /*
10077: * end of parsing of this node.
10078: */
10079: nodePop(ctxt);
10080: namePop(ctxt);
10081: spacePop(ctxt);
10082: if (nsNr != ctxt->nsNr)
10083: nsPop(ctxt, ctxt->nsNr - nsNr);
10084: return;
10085: }
10086:
10087: /*
10088: * parse the end of tag: '</' should be here.
10089: */
10090: if (ctxt->sax2) {
10091: xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10092: namePop(ctxt);
10093: }
10094: #ifdef LIBXML_SAX1_ENABLED
10095: else
10096: xmlParseEndTag1(ctxt, line);
10097: #endif /* LIBXML_SAX1_ENABLED */
10098:
10099: /*
10100: * Capture end position and add node
10101: */
10102: if ( ret != NULL && ctxt->record_info ) {
10103: node_info.end_pos = ctxt->input->consumed +
10104: (CUR_PTR - ctxt->input->base);
10105: node_info.end_line = ctxt->input->line;
10106: node_info.node = ret;
10107: xmlParserAddNodeInfo(ctxt, &node_info);
10108: }
10109: }
10110:
10111: /**
10112: * xmlParseVersionNum:
10113: * @ctxt: an XML parser context
10114: *
10115: * parse the XML version value.
10116: *
10117: * [26] VersionNum ::= '1.' [0-9]+
10118: *
10119: * In practice allow [0-9].[0-9]+ at that level
10120: *
10121: * Returns the string giving the XML version number, or NULL
10122: */
10123: xmlChar *
10124: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10125: xmlChar *buf = NULL;
10126: int len = 0;
10127: int size = 10;
10128: xmlChar cur;
10129:
10130: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10131: if (buf == NULL) {
10132: xmlErrMemory(ctxt, NULL);
10133: return(NULL);
10134: }
10135: cur = CUR;
10136: if (!((cur >= '0') && (cur <= '9'))) {
10137: xmlFree(buf);
10138: return(NULL);
10139: }
10140: buf[len++] = cur;
10141: NEXT;
10142: cur=CUR;
10143: if (cur != '.') {
10144: xmlFree(buf);
10145: return(NULL);
10146: }
10147: buf[len++] = cur;
10148: NEXT;
10149: cur=CUR;
10150: while ((cur >= '0') && (cur <= '9')) {
10151: if (len + 1 >= size) {
10152: xmlChar *tmp;
10153:
10154: size *= 2;
10155: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10156: if (tmp == NULL) {
10157: xmlFree(buf);
10158: xmlErrMemory(ctxt, NULL);
10159: return(NULL);
10160: }
10161: buf = tmp;
10162: }
10163: buf[len++] = cur;
10164: NEXT;
10165: cur=CUR;
10166: }
10167: buf[len] = 0;
10168: return(buf);
10169: }
10170:
10171: /**
10172: * xmlParseVersionInfo:
10173: * @ctxt: an XML parser context
10174: *
10175: * parse the XML version.
10176: *
10177: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10178: *
10179: * [25] Eq ::= S? '=' S?
10180: *
10181: * Returns the version string, e.g. "1.0"
10182: */
10183:
10184: xmlChar *
10185: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10186: xmlChar *version = NULL;
10187:
10188: if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10189: SKIP(7);
10190: SKIP_BLANKS;
10191: if (RAW != '=') {
10192: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10193: return(NULL);
10194: }
10195: NEXT;
10196: SKIP_BLANKS;
10197: if (RAW == '"') {
10198: NEXT;
10199: version = xmlParseVersionNum(ctxt);
10200: if (RAW != '"') {
10201: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10202: } else
10203: NEXT;
10204: } else if (RAW == '\''){
10205: NEXT;
10206: version = xmlParseVersionNum(ctxt);
10207: if (RAW != '\'') {
10208: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10209: } else
10210: NEXT;
10211: } else {
10212: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10213: }
10214: }
10215: return(version);
10216: }
10217:
10218: /**
10219: * xmlParseEncName:
10220: * @ctxt: an XML parser context
10221: *
10222: * parse the XML encoding name
10223: *
10224: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10225: *
10226: * Returns the encoding name value or NULL
10227: */
10228: xmlChar *
10229: xmlParseEncName(xmlParserCtxtPtr ctxt) {
10230: xmlChar *buf = NULL;
10231: int len = 0;
10232: int size = 10;
10233: xmlChar cur;
10234:
10235: cur = CUR;
10236: if (((cur >= 'a') && (cur <= 'z')) ||
10237: ((cur >= 'A') && (cur <= 'Z'))) {
10238: buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10239: if (buf == NULL) {
10240: xmlErrMemory(ctxt, NULL);
10241: return(NULL);
10242: }
10243:
10244: buf[len++] = cur;
10245: NEXT;
10246: cur = CUR;
10247: while (((cur >= 'a') && (cur <= 'z')) ||
10248: ((cur >= 'A') && (cur <= 'Z')) ||
10249: ((cur >= '0') && (cur <= '9')) ||
10250: (cur == '.') || (cur == '_') ||
10251: (cur == '-')) {
10252: if (len + 1 >= size) {
10253: xmlChar *tmp;
10254:
10255: size *= 2;
10256: tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10257: if (tmp == NULL) {
10258: xmlErrMemory(ctxt, NULL);
10259: xmlFree(buf);
10260: return(NULL);
10261: }
10262: buf = tmp;
10263: }
10264: buf[len++] = cur;
10265: NEXT;
10266: cur = CUR;
10267: if (cur == 0) {
10268: SHRINK;
10269: GROW;
10270: cur = CUR;
10271: }
10272: }
10273: buf[len] = 0;
10274: } else {
10275: xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10276: }
10277: return(buf);
10278: }
10279:
10280: /**
10281: * xmlParseEncodingDecl:
10282: * @ctxt: an XML parser context
1.1.1.3 misho 10283: *
1.1 misho 10284: * parse the XML encoding declaration
10285: *
10286: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10287: *
10288: * this setups the conversion filters.
10289: *
10290: * Returns the encoding value or NULL
10291: */
10292:
10293: const xmlChar *
10294: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10295: xmlChar *encoding = NULL;
10296:
10297: SKIP_BLANKS;
10298: if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10299: SKIP(8);
10300: SKIP_BLANKS;
10301: if (RAW != '=') {
10302: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10303: return(NULL);
10304: }
10305: NEXT;
10306: SKIP_BLANKS;
10307: if (RAW == '"') {
10308: NEXT;
10309: encoding = xmlParseEncName(ctxt);
10310: if (RAW != '"') {
10311: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10312: } else
10313: NEXT;
10314: } else if (RAW == '\''){
10315: NEXT;
10316: encoding = xmlParseEncName(ctxt);
10317: if (RAW != '\'') {
10318: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319: } else
10320: NEXT;
10321: } else {
10322: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10323: }
1.1.1.2 misho 10324:
10325: /*
10326: * Non standard parsing, allowing the user to ignore encoding
10327: */
10328: if (ctxt->options & XML_PARSE_IGNORE_ENC)
10329: return(encoding);
10330:
1.1 misho 10331: /*
10332: * UTF-16 encoding stwich has already taken place at this stage,
10333: * more over the little-endian/big-endian selection is already done
10334: */
10335: if ((encoding != NULL) &&
10336: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10337: (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10338: /*
10339: * If no encoding was passed to the parser, that we are
1.1.1.3 misho 10340: * using UTF-16 and no decoder is present i.e. the
1.1 misho 10341: * document is apparently UTF-8 compatible, then raise an
10342: * encoding mismatch fatal error
10343: */
10344: if ((ctxt->encoding == NULL) &&
10345: (ctxt->input->buf != NULL) &&
10346: (ctxt->input->buf->encoder == NULL)) {
10347: xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10348: "Document labelled UTF-16 but has UTF-8 content\n");
10349: }
10350: if (ctxt->encoding != NULL)
10351: xmlFree((xmlChar *) ctxt->encoding);
10352: ctxt->encoding = encoding;
10353: }
10354: /*
10355: * UTF-8 encoding is handled natively
10356: */
10357: else if ((encoding != NULL) &&
10358: ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10359: (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10360: if (ctxt->encoding != NULL)
10361: xmlFree((xmlChar *) ctxt->encoding);
10362: ctxt->encoding = encoding;
10363: }
10364: else if (encoding != NULL) {
10365: xmlCharEncodingHandlerPtr handler;
10366:
10367: if (ctxt->input->encoding != NULL)
10368: xmlFree((xmlChar *) ctxt->input->encoding);
10369: ctxt->input->encoding = encoding;
10370:
10371: handler = xmlFindCharEncodingHandler((const char *) encoding);
10372: if (handler != NULL) {
10373: xmlSwitchToEncoding(ctxt, handler);
10374: } else {
10375: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10376: "Unsupported encoding %s\n", encoding);
10377: return(NULL);
10378: }
10379: }
10380: }
10381: return(encoding);
10382: }
10383:
10384: /**
10385: * xmlParseSDDecl:
10386: * @ctxt: an XML parser context
10387: *
10388: * parse the XML standalone declaration
10389: *
10390: * [32] SDDecl ::= S 'standalone' Eq
1.1.1.3 misho 10391: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.1 misho 10392: *
10393: * [ VC: Standalone Document Declaration ]
10394: * TODO The standalone document declaration must have the value "no"
10395: * if any external markup declarations contain declarations of:
10396: * - attributes with default values, if elements to which these
10397: * attributes apply appear in the document without specifications
10398: * of values for these attributes, or
10399: * - entities (other than amp, lt, gt, apos, quot), if references
10400: * to those entities appear in the document, or
10401: * - attributes with values subject to normalization, where the
10402: * attribute appears in the document with a value which will change
10403: * as a result of normalization, or
10404: * - element types with element content, if white space occurs directly
10405: * within any instance of those types.
10406: *
10407: * Returns:
10408: * 1 if standalone="yes"
10409: * 0 if standalone="no"
10410: * -2 if standalone attribute is missing or invalid
10411: * (A standalone value of -2 means that the XML declaration was found,
10412: * but no value was specified for the standalone attribute).
10413: */
10414:
10415: int
10416: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10417: int standalone = -2;
10418:
10419: SKIP_BLANKS;
10420: if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10421: SKIP(10);
10422: SKIP_BLANKS;
10423: if (RAW != '=') {
10424: xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10425: return(standalone);
10426: }
10427: NEXT;
10428: SKIP_BLANKS;
10429: if (RAW == '\''){
10430: NEXT;
10431: if ((RAW == 'n') && (NXT(1) == 'o')) {
10432: standalone = 0;
10433: SKIP(2);
10434: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10435: (NXT(2) == 's')) {
10436: standalone = 1;
10437: SKIP(3);
10438: } else {
10439: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10440: }
10441: if (RAW != '\'') {
10442: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10443: } else
10444: NEXT;
10445: } else if (RAW == '"'){
10446: NEXT;
10447: if ((RAW == 'n') && (NXT(1) == 'o')) {
10448: standalone = 0;
10449: SKIP(2);
10450: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10451: (NXT(2) == 's')) {
10452: standalone = 1;
10453: SKIP(3);
10454: } else {
10455: xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10456: }
10457: if (RAW != '"') {
10458: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10459: } else
10460: NEXT;
10461: } else {
10462: xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10463: }
10464: }
10465: return(standalone);
10466: }
10467:
10468: /**
10469: * xmlParseXMLDecl:
10470: * @ctxt: an XML parser context
1.1.1.3 misho 10471: *
1.1 misho 10472: * parse an XML declaration header
10473: *
10474: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10475: */
10476:
10477: void
10478: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10479: xmlChar *version;
10480:
10481: /*
10482: * This value for standalone indicates that the document has an
10483: * XML declaration but it does not have a standalone attribute.
10484: * It will be overwritten later if a standalone attribute is found.
10485: */
10486: ctxt->input->standalone = -2;
10487:
10488: /*
10489: * We know that '<?xml' is here.
10490: */
10491: SKIP(5);
10492:
10493: if (!IS_BLANK_CH(RAW)) {
10494: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10495: "Blank needed after '<?xml'\n");
10496: }
10497: SKIP_BLANKS;
10498:
10499: /*
10500: * We must have the VersionInfo here.
10501: */
10502: version = xmlParseVersionInfo(ctxt);
10503: if (version == NULL) {
10504: xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10505: } else {
10506: if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10507: /*
10508: * Changed here for XML-1.0 5th edition
10509: */
10510: if (ctxt->options & XML_PARSE_OLD10) {
10511: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10512: "Unsupported version '%s'\n",
10513: version);
10514: } else {
10515: if ((version[0] == '1') && ((version[1] == '.'))) {
10516: xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10517: "Unsupported version '%s'\n",
10518: version, NULL);
10519: } else {
10520: xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10521: "Unsupported version '%s'\n",
10522: version);
10523: }
10524: }
10525: }
10526: if (ctxt->version != NULL)
10527: xmlFree((void *) ctxt->version);
10528: ctxt->version = version;
10529: }
10530:
10531: /*
10532: * We may have the encoding declaration
10533: */
10534: if (!IS_BLANK_CH(RAW)) {
10535: if ((RAW == '?') && (NXT(1) == '>')) {
10536: SKIP(2);
10537: return;
10538: }
10539: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10540: }
10541: xmlParseEncodingDecl(ctxt);
10542: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10543: /*
10544: * The XML REC instructs us to stop parsing right here
10545: */
10546: return;
10547: }
10548:
10549: /*
10550: * We may have the standalone status.
10551: */
10552: if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10553: if ((RAW == '?') && (NXT(1) == '>')) {
10554: SKIP(2);
10555: return;
10556: }
10557: xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10558: }
10559:
10560: /*
10561: * We can grow the input buffer freely at that point
10562: */
10563: GROW;
10564:
10565: SKIP_BLANKS;
10566: ctxt->input->standalone = xmlParseSDDecl(ctxt);
10567:
10568: SKIP_BLANKS;
10569: if ((RAW == '?') && (NXT(1) == '>')) {
10570: SKIP(2);
10571: } else if (RAW == '>') {
10572: /* Deprecated old WD ... */
10573: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10574: NEXT;
10575: } else {
10576: xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10577: MOVETO_ENDTAG(CUR_PTR);
10578: NEXT;
10579: }
10580: }
10581:
10582: /**
10583: * xmlParseMisc:
10584: * @ctxt: an XML parser context
1.1.1.3 misho 10585: *
1.1 misho 10586: * parse an XML Misc* optional field.
10587: *
10588: * [27] Misc ::= Comment | PI | S
10589: */
10590:
10591: void
10592: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.1.1.3 misho 10593: while ((ctxt->instate != XML_PARSER_EOF) &&
10594: (((RAW == '<') && (NXT(1) == '?')) ||
10595: (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10596: IS_BLANK_CH(CUR))) {
1.1 misho 10597: if ((RAW == '<') && (NXT(1) == '?')) {
10598: xmlParsePI(ctxt);
10599: } else if (IS_BLANK_CH(CUR)) {
10600: NEXT;
10601: } else
10602: xmlParseComment(ctxt);
10603: }
10604: }
10605:
10606: /**
10607: * xmlParseDocument:
10608: * @ctxt: an XML parser context
1.1.1.3 misho 10609: *
1.1 misho 10610: * parse an XML document (and build a tree if using the standard SAX
10611: * interface).
10612: *
10613: * [1] document ::= prolog element Misc*
10614: *
10615: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10616: *
10617: * Returns 0, -1 in case of error. the parser context is augmented
10618: * as a result of the parsing.
10619: */
10620:
10621: int
10622: xmlParseDocument(xmlParserCtxtPtr ctxt) {
10623: xmlChar start[4];
10624: xmlCharEncoding enc;
10625:
10626: xmlInitParser();
10627:
10628: if ((ctxt == NULL) || (ctxt->input == NULL))
10629: return(-1);
10630:
10631: GROW;
10632:
10633: /*
10634: * SAX: detecting the level.
10635: */
10636: xmlDetectSAX2(ctxt);
10637:
10638: /*
10639: * SAX: beginning of the document processing.
10640: */
10641: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10642: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.1.1.3 misho 10643: if (ctxt->instate == XML_PARSER_EOF)
10644: return(-1);
1.1 misho 10645:
10646: if ((ctxt->encoding == NULL) &&
10647: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
1.1.1.3 misho 10648: /*
1.1 misho 10649: * Get the 4 first bytes and decode the charset
10650: * if enc != XML_CHAR_ENCODING_NONE
10651: * plug some encoding conversion routines.
10652: */
10653: start[0] = RAW;
10654: start[1] = NXT(1);
10655: start[2] = NXT(2);
10656: start[3] = NXT(3);
10657: enc = xmlDetectCharEncoding(&start[0], 4);
10658: if (enc != XML_CHAR_ENCODING_NONE) {
10659: xmlSwitchEncoding(ctxt, enc);
10660: }
10661: }
10662:
10663:
10664: if (CUR == 0) {
10665: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10666: }
10667:
10668: /*
10669: * Check for the XMLDecl in the Prolog.
10670: * do not GROW here to avoid the detected encoder to decode more
10671: * than just the first line, unless the amount of data is really
10672: * too small to hold "<?xml version="1.0" encoding="foo"
10673: */
10674: if ((ctxt->input->end - ctxt->input->cur) < 35) {
10675: GROW;
10676: }
10677: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10678:
10679: /*
10680: * Note that we will switch encoding on the fly.
10681: */
10682: xmlParseXMLDecl(ctxt);
10683: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10684: /*
10685: * The XML REC instructs us to stop parsing right here
10686: */
10687: return(-1);
10688: }
10689: ctxt->standalone = ctxt->input->standalone;
10690: SKIP_BLANKS;
10691: } else {
10692: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10693: }
10694: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10695: ctxt->sax->startDocument(ctxt->userData);
1.1.1.3 misho 10696: if (ctxt->instate == XML_PARSER_EOF)
10697: return(-1);
1.1 misho 10698:
10699: /*
10700: * The Misc part of the Prolog
10701: */
10702: GROW;
10703: xmlParseMisc(ctxt);
10704:
10705: /*
10706: * Then possibly doc type declaration(s) and more Misc
10707: * (doctypedecl Misc*)?
10708: */
10709: GROW;
10710: if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10711:
10712: ctxt->inSubset = 1;
10713: xmlParseDocTypeDecl(ctxt);
10714: if (RAW == '[') {
10715: ctxt->instate = XML_PARSER_DTD;
10716: xmlParseInternalSubset(ctxt);
1.1.1.3 misho 10717: if (ctxt->instate == XML_PARSER_EOF)
10718: return(-1);
1.1 misho 10719: }
10720:
10721: /*
10722: * Create and update the external subset.
10723: */
10724: ctxt->inSubset = 2;
10725: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10726: (!ctxt->disableSAX))
10727: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10728: ctxt->extSubSystem, ctxt->extSubURI);
1.1.1.3 misho 10729: if (ctxt->instate == XML_PARSER_EOF)
10730: return(-1);
1.1 misho 10731: ctxt->inSubset = 0;
10732:
10733: xmlCleanSpecialAttr(ctxt);
10734:
10735: ctxt->instate = XML_PARSER_PROLOG;
10736: xmlParseMisc(ctxt);
10737: }
10738:
10739: /*
10740: * Time to start parsing the tree itself
10741: */
10742: GROW;
10743: if (RAW != '<') {
10744: xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10745: "Start tag expected, '<' not found\n");
10746: } else {
10747: ctxt->instate = XML_PARSER_CONTENT;
10748: xmlParseElement(ctxt);
10749: ctxt->instate = XML_PARSER_EPILOG;
10750:
10751:
10752: /*
10753: * The Misc part at the end
10754: */
10755: xmlParseMisc(ctxt);
10756:
10757: if (RAW != 0) {
10758: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10759: }
10760: ctxt->instate = XML_PARSER_EOF;
10761: }
10762:
10763: /*
10764: * SAX: end of the document processing.
10765: */
10766: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10767: ctxt->sax->endDocument(ctxt->userData);
10768:
10769: /*
10770: * Remove locally kept entity definitions if the tree was not built
10771: */
10772: if ((ctxt->myDoc != NULL) &&
10773: (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10774: xmlFreeDoc(ctxt->myDoc);
10775: ctxt->myDoc = NULL;
10776: }
10777:
10778: if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10779: ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10780: if (ctxt->valid)
10781: ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10782: if (ctxt->nsWellFormed)
10783: ctxt->myDoc->properties |= XML_DOC_NSVALID;
10784: if (ctxt->options & XML_PARSE_OLD10)
10785: ctxt->myDoc->properties |= XML_DOC_OLD10;
10786: }
10787: if (! ctxt->wellFormed) {
10788: ctxt->valid = 0;
10789: return(-1);
10790: }
10791: return(0);
10792: }
10793:
10794: /**
10795: * xmlParseExtParsedEnt:
10796: * @ctxt: an XML parser context
1.1.1.3 misho 10797: *
1.1 misho 10798: * parse a general parsed entity
10799: * An external general parsed entity is well-formed if it matches the
10800: * production labeled extParsedEnt.
10801: *
10802: * [78] extParsedEnt ::= TextDecl? content
10803: *
10804: * Returns 0, -1 in case of error. the parser context is augmented
10805: * as a result of the parsing.
10806: */
10807:
10808: int
10809: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10810: xmlChar start[4];
10811: xmlCharEncoding enc;
10812:
10813: if ((ctxt == NULL) || (ctxt->input == NULL))
10814: return(-1);
10815:
10816: xmlDefaultSAXHandlerInit();
10817:
10818: xmlDetectSAX2(ctxt);
10819:
10820: GROW;
10821:
10822: /*
10823: * SAX: beginning of the document processing.
10824: */
10825: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10826: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10827:
1.1.1.3 misho 10828: /*
1.1 misho 10829: * Get the 4 first bytes and decode the charset
10830: * if enc != XML_CHAR_ENCODING_NONE
10831: * plug some encoding conversion routines.
10832: */
10833: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10834: start[0] = RAW;
10835: start[1] = NXT(1);
10836: start[2] = NXT(2);
10837: start[3] = NXT(3);
10838: enc = xmlDetectCharEncoding(start, 4);
10839: if (enc != XML_CHAR_ENCODING_NONE) {
10840: xmlSwitchEncoding(ctxt, enc);
10841: }
10842: }
10843:
10844:
10845: if (CUR == 0) {
10846: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10847: }
10848:
10849: /*
10850: * Check for the XMLDecl in the Prolog.
10851: */
10852: GROW;
10853: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10854:
10855: /*
10856: * Note that we will switch encoding on the fly.
10857: */
10858: xmlParseXMLDecl(ctxt);
10859: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10860: /*
10861: * The XML REC instructs us to stop parsing right here
10862: */
10863: return(-1);
10864: }
10865: SKIP_BLANKS;
10866: } else {
10867: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10868: }
10869: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10870: ctxt->sax->startDocument(ctxt->userData);
1.1.1.3 misho 10871: if (ctxt->instate == XML_PARSER_EOF)
10872: return(-1);
1.1 misho 10873:
10874: /*
10875: * Doing validity checking on chunk doesn't make sense
10876: */
10877: ctxt->instate = XML_PARSER_CONTENT;
10878: ctxt->validate = 0;
10879: ctxt->loadsubset = 0;
10880: ctxt->depth = 0;
10881:
10882: xmlParseContent(ctxt);
1.1.1.3 misho 10883: if (ctxt->instate == XML_PARSER_EOF)
10884: return(-1);
10885:
1.1 misho 10886: if ((RAW == '<') && (NXT(1) == '/')) {
10887: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10888: } else if (RAW != 0) {
10889: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10890: }
10891:
10892: /*
10893: * SAX: end of the document processing.
10894: */
10895: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10896: ctxt->sax->endDocument(ctxt->userData);
10897:
10898: if (! ctxt->wellFormed) return(-1);
10899: return(0);
10900: }
10901:
10902: #ifdef LIBXML_PUSH_ENABLED
10903: /************************************************************************
10904: * *
1.1.1.3 misho 10905: * Progressive parsing interfaces *
1.1 misho 10906: * *
10907: ************************************************************************/
10908:
10909: /**
10910: * xmlParseLookupSequence:
10911: * @ctxt: an XML parser context
10912: * @first: the first char to lookup
10913: * @next: the next char to lookup or zero
10914: * @third: the next char to lookup or zero
10915: *
10916: * Try to find if a sequence (first, next, third) or just (first next) or
10917: * (first) is available in the input stream.
10918: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10919: * to avoid rescanning sequences of bytes, it DOES change the state of the
10920: * parser, do not use liberally.
10921: *
10922: * Returns the index to the current parsing point if the full sequence
10923: * is available, -1 otherwise.
10924: */
10925: static int
10926: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10927: xmlChar next, xmlChar third) {
10928: int base, len;
10929: xmlParserInputPtr in;
10930: const xmlChar *buf;
10931:
10932: in = ctxt->input;
10933: if (in == NULL) return(-1);
10934: base = in->cur - in->base;
10935: if (base < 0) return(-1);
10936: if (ctxt->checkIndex > base)
10937: base = ctxt->checkIndex;
10938: if (in->buf == NULL) {
10939: buf = in->base;
10940: len = in->length;
10941: } else {
1.1.1.3 misho 10942: buf = xmlBufContent(in->buf->buffer);
10943: len = xmlBufUse(in->buf->buffer);
1.1 misho 10944: }
10945: /* take into account the sequence length */
10946: if (third) len -= 2;
10947: else if (next) len --;
10948: for (;base < len;base++) {
10949: if (buf[base] == first) {
10950: if (third != 0) {
10951: if ((buf[base + 1] != next) ||
10952: (buf[base + 2] != third)) continue;
10953: } else if (next != 0) {
10954: if (buf[base + 1] != next) continue;
10955: }
10956: ctxt->checkIndex = 0;
10957: #ifdef DEBUG_PUSH
10958: if (next == 0)
10959: xmlGenericError(xmlGenericErrorContext,
10960: "PP: lookup '%c' found at %d\n",
10961: first, base);
10962: else if (third == 0)
10963: xmlGenericError(xmlGenericErrorContext,
10964: "PP: lookup '%c%c' found at %d\n",
10965: first, next, base);
1.1.1.3 misho 10966: else
1.1 misho 10967: xmlGenericError(xmlGenericErrorContext,
10968: "PP: lookup '%c%c%c' found at %d\n",
10969: first, next, third, base);
10970: #endif
10971: return(base - (in->cur - in->base));
10972: }
10973: }
10974: ctxt->checkIndex = base;
10975: #ifdef DEBUG_PUSH
10976: if (next == 0)
10977: xmlGenericError(xmlGenericErrorContext,
10978: "PP: lookup '%c' failed\n", first);
10979: else if (third == 0)
10980: xmlGenericError(xmlGenericErrorContext,
10981: "PP: lookup '%c%c' failed\n", first, next);
1.1.1.3 misho 10982: else
1.1 misho 10983: xmlGenericError(xmlGenericErrorContext,
10984: "PP: lookup '%c%c%c' failed\n", first, next, third);
10985: #endif
10986: return(-1);
10987: }
10988:
10989: /**
10990: * xmlParseGetLasts:
10991: * @ctxt: an XML parser context
10992: * @lastlt: pointer to store the last '<' from the input
10993: * @lastgt: pointer to store the last '>' from the input
10994: *
10995: * Lookup the last < and > in the current chunk
10996: */
10997: static void
10998: xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10999: const xmlChar **lastgt) {
11000: const xmlChar *tmp;
11001:
11002: if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11003: xmlGenericError(xmlGenericErrorContext,
11004: "Internal error: xmlParseGetLasts\n");
11005: return;
11006: }
11007: if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11008: tmp = ctxt->input->end;
11009: tmp--;
11010: while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11011: if (tmp < ctxt->input->base) {
11012: *lastlt = NULL;
11013: *lastgt = NULL;
11014: } else {
11015: *lastlt = tmp;
11016: tmp++;
11017: while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11018: if (*tmp == '\'') {
11019: tmp++;
11020: while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11021: if (tmp < ctxt->input->end) tmp++;
11022: } else if (*tmp == '"') {
11023: tmp++;
11024: while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11025: if (tmp < ctxt->input->end) tmp++;
11026: } else
11027: tmp++;
11028: }
11029: if (tmp < ctxt->input->end)
11030: *lastgt = tmp;
11031: else {
11032: tmp = *lastlt;
11033: tmp--;
11034: while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11035: if (tmp >= ctxt->input->base)
11036: *lastgt = tmp;
11037: else
11038: *lastgt = NULL;
11039: }
11040: }
11041: } else {
11042: *lastlt = NULL;
11043: *lastgt = NULL;
11044: }
11045: }
11046: /**
11047: * xmlCheckCdataPush:
11048: * @cur: pointer to the bock of characters
11049: * @len: length of the block in bytes
11050: *
11051: * Check that the block of characters is okay as SCdata content [20]
11052: *
11053: * Returns the number of bytes to pass if okay, a negative index where an
11054: * UTF-8 error occured otherwise
11055: */
11056: static int
11057: xmlCheckCdataPush(const xmlChar *utf, int len) {
11058: int ix;
11059: unsigned char c;
11060: int codepoint;
11061:
11062: if ((utf == NULL) || (len <= 0))
11063: return(0);
1.1.1.3 misho 11064:
1.1 misho 11065: for (ix = 0; ix < len;) { /* string is 0-terminated */
11066: c = utf[ix];
11067: if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11068: if (c >= 0x20)
11069: ix++;
11070: else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11071: ix++;
11072: else
11073: return(-ix);
11074: } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11075: if (ix + 2 > len) return(ix);
11076: if ((utf[ix+1] & 0xc0 ) != 0x80)
11077: return(-ix);
11078: codepoint = (utf[ix] & 0x1f) << 6;
11079: codepoint |= utf[ix+1] & 0x3f;
11080: if (!xmlIsCharQ(codepoint))
11081: return(-ix);
11082: ix += 2;
11083: } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11084: if (ix + 3 > len) return(ix);
11085: if (((utf[ix+1] & 0xc0) != 0x80) ||
11086: ((utf[ix+2] & 0xc0) != 0x80))
11087: return(-ix);
11088: codepoint = (utf[ix] & 0xf) << 12;
11089: codepoint |= (utf[ix+1] & 0x3f) << 6;
11090: codepoint |= utf[ix+2] & 0x3f;
11091: if (!xmlIsCharQ(codepoint))
11092: return(-ix);
11093: ix += 3;
11094: } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11095: if (ix + 4 > len) return(ix);
11096: if (((utf[ix+1] & 0xc0) != 0x80) ||
11097: ((utf[ix+2] & 0xc0) != 0x80) ||
11098: ((utf[ix+3] & 0xc0) != 0x80))
11099: return(-ix);
11100: codepoint = (utf[ix] & 0x7) << 18;
11101: codepoint |= (utf[ix+1] & 0x3f) << 12;
11102: codepoint |= (utf[ix+2] & 0x3f) << 6;
11103: codepoint |= utf[ix+3] & 0x3f;
11104: if (!xmlIsCharQ(codepoint))
11105: return(-ix);
11106: ix += 4;
11107: } else /* unknown encoding */
11108: return(-ix);
11109: }
11110: return(ix);
11111: }
11112:
11113: /**
11114: * xmlParseTryOrFinish:
11115: * @ctxt: an XML parser context
11116: * @terminate: last chunk indicator
11117: *
11118: * Try to progress on parsing
11119: *
11120: * Returns zero if no parsing was possible
11121: */
11122: static int
11123: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11124: int ret = 0;
11125: int avail, tlen;
11126: xmlChar cur, next;
11127: const xmlChar *lastlt, *lastgt;
11128:
11129: if (ctxt->input == NULL)
11130: return(0);
11131:
11132: #ifdef DEBUG_PUSH
11133: switch (ctxt->instate) {
11134: case XML_PARSER_EOF:
11135: xmlGenericError(xmlGenericErrorContext,
11136: "PP: try EOF\n"); break;
11137: case XML_PARSER_START:
11138: xmlGenericError(xmlGenericErrorContext,
11139: "PP: try START\n"); break;
11140: case XML_PARSER_MISC:
11141: xmlGenericError(xmlGenericErrorContext,
11142: "PP: try MISC\n");break;
11143: case XML_PARSER_COMMENT:
11144: xmlGenericError(xmlGenericErrorContext,
11145: "PP: try COMMENT\n");break;
11146: case XML_PARSER_PROLOG:
11147: xmlGenericError(xmlGenericErrorContext,
11148: "PP: try PROLOG\n");break;
11149: case XML_PARSER_START_TAG:
11150: xmlGenericError(xmlGenericErrorContext,
11151: "PP: try START_TAG\n");break;
11152: case XML_PARSER_CONTENT:
11153: xmlGenericError(xmlGenericErrorContext,
11154: "PP: try CONTENT\n");break;
11155: case XML_PARSER_CDATA_SECTION:
11156: xmlGenericError(xmlGenericErrorContext,
11157: "PP: try CDATA_SECTION\n");break;
11158: case XML_PARSER_END_TAG:
11159: xmlGenericError(xmlGenericErrorContext,
11160: "PP: try END_TAG\n");break;
11161: case XML_PARSER_ENTITY_DECL:
11162: xmlGenericError(xmlGenericErrorContext,
11163: "PP: try ENTITY_DECL\n");break;
11164: case XML_PARSER_ENTITY_VALUE:
11165: xmlGenericError(xmlGenericErrorContext,
11166: "PP: try ENTITY_VALUE\n");break;
11167: case XML_PARSER_ATTRIBUTE_VALUE:
11168: xmlGenericError(xmlGenericErrorContext,
11169: "PP: try ATTRIBUTE_VALUE\n");break;
11170: case XML_PARSER_DTD:
11171: xmlGenericError(xmlGenericErrorContext,
11172: "PP: try DTD\n");break;
11173: case XML_PARSER_EPILOG:
11174: xmlGenericError(xmlGenericErrorContext,
11175: "PP: try EPILOG\n");break;
11176: case XML_PARSER_PI:
11177: xmlGenericError(xmlGenericErrorContext,
11178: "PP: try PI\n");break;
11179: case XML_PARSER_IGNORE:
11180: xmlGenericError(xmlGenericErrorContext,
11181: "PP: try IGNORE\n");break;
11182: }
11183: #endif
11184:
11185: if ((ctxt->input != NULL) &&
11186: (ctxt->input->cur - ctxt->input->base > 4096)) {
11187: xmlSHRINK(ctxt);
11188: ctxt->checkIndex = 0;
11189: }
11190: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11191:
1.1.1.3 misho 11192: while (ctxt->instate != XML_PARSER_EOF) {
1.1 misho 11193: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11194: return(0);
11195:
1.1.1.3 misho 11196:
1.1 misho 11197: /*
11198: * Pop-up of finished entities.
11199: */
11200: while ((RAW == 0) && (ctxt->inputNr > 1))
11201: xmlPopInput(ctxt);
11202:
11203: if (ctxt->input == NULL) break;
11204: if (ctxt->input->buf == NULL)
11205: avail = ctxt->input->length -
11206: (ctxt->input->cur - ctxt->input->base);
11207: else {
11208: /*
11209: * If we are operating on converted input, try to flush
11210: * remainng chars to avoid them stalling in the non-converted
1.1.1.3 misho 11211: * buffer. But do not do this in document start where
11212: * encoding="..." may not have been read and we work on a
11213: * guessed encoding.
11214: */
11215: if ((ctxt->instate != XML_PARSER_START) &&
11216: (ctxt->input->buf->raw != NULL) &&
11217: (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11218: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11219: ctxt->input);
11220: size_t current = ctxt->input->cur - ctxt->input->base;
1.1 misho 11221:
11222: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
1.1.1.3 misho 11223: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11224: base, current);
1.1 misho 11225: }
1.1.1.3 misho 11226: avail = xmlBufUse(ctxt->input->buf->buffer) -
1.1 misho 11227: (ctxt->input->cur - ctxt->input->base);
11228: }
11229: if (avail < 1)
11230: goto done;
11231: switch (ctxt->instate) {
11232: case XML_PARSER_EOF:
11233: /*
11234: * Document parsing is done !
11235: */
11236: goto done;
11237: case XML_PARSER_START:
11238: if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11239: xmlChar start[4];
11240: xmlCharEncoding enc;
11241:
11242: /*
11243: * Very first chars read from the document flow.
11244: */
11245: if (avail < 4)
11246: goto done;
11247:
1.1.1.3 misho 11248: /*
1.1 misho 11249: * Get the 4 first bytes and decode the charset
11250: * if enc != XML_CHAR_ENCODING_NONE
11251: * plug some encoding conversion routines,
11252: * else xmlSwitchEncoding will set to (default)
11253: * UTF8.
11254: */
11255: start[0] = RAW;
11256: start[1] = NXT(1);
11257: start[2] = NXT(2);
11258: start[3] = NXT(3);
11259: enc = xmlDetectCharEncoding(start, 4);
11260: xmlSwitchEncoding(ctxt, enc);
11261: break;
11262: }
11263:
11264: if (avail < 2)
11265: goto done;
11266: cur = ctxt->input->cur[0];
11267: next = ctxt->input->cur[1];
11268: if (cur == 0) {
11269: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11270: ctxt->sax->setDocumentLocator(ctxt->userData,
11271: &xmlDefaultSAXLocator);
11272: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11273: ctxt->instate = XML_PARSER_EOF;
11274: #ifdef DEBUG_PUSH
11275: xmlGenericError(xmlGenericErrorContext,
11276: "PP: entering EOF\n");
11277: #endif
11278: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11279: ctxt->sax->endDocument(ctxt->userData);
11280: goto done;
11281: }
11282: if ((cur == '<') && (next == '?')) {
11283: /* PI or XML decl */
11284: if (avail < 5) return(ret);
11285: if ((!terminate) &&
11286: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11287: return(ret);
11288: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11289: ctxt->sax->setDocumentLocator(ctxt->userData,
11290: &xmlDefaultSAXLocator);
11291: if ((ctxt->input->cur[2] == 'x') &&
11292: (ctxt->input->cur[3] == 'm') &&
11293: (ctxt->input->cur[4] == 'l') &&
11294: (IS_BLANK_CH(ctxt->input->cur[5]))) {
11295: ret += 5;
11296: #ifdef DEBUG_PUSH
11297: xmlGenericError(xmlGenericErrorContext,
11298: "PP: Parsing XML Decl\n");
11299: #endif
11300: xmlParseXMLDecl(ctxt);
11301: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11302: /*
11303: * The XML REC instructs us to stop parsing right
11304: * here
11305: */
11306: ctxt->instate = XML_PARSER_EOF;
11307: return(0);
11308: }
11309: ctxt->standalone = ctxt->input->standalone;
11310: if ((ctxt->encoding == NULL) &&
11311: (ctxt->input->encoding != NULL))
11312: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11313: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11314: (!ctxt->disableSAX))
11315: ctxt->sax->startDocument(ctxt->userData);
11316: ctxt->instate = XML_PARSER_MISC;
11317: #ifdef DEBUG_PUSH
11318: xmlGenericError(xmlGenericErrorContext,
11319: "PP: entering MISC\n");
11320: #endif
11321: } else {
11322: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11324: (!ctxt->disableSAX))
11325: ctxt->sax->startDocument(ctxt->userData);
11326: ctxt->instate = XML_PARSER_MISC;
11327: #ifdef DEBUG_PUSH
11328: xmlGenericError(xmlGenericErrorContext,
11329: "PP: entering MISC\n");
11330: #endif
11331: }
11332: } else {
11333: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11334: ctxt->sax->setDocumentLocator(ctxt->userData,
11335: &xmlDefaultSAXLocator);
11336: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11337: if (ctxt->version == NULL) {
11338: xmlErrMemory(ctxt, NULL);
11339: break;
11340: }
11341: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11342: (!ctxt->disableSAX))
11343: ctxt->sax->startDocument(ctxt->userData);
11344: ctxt->instate = XML_PARSER_MISC;
11345: #ifdef DEBUG_PUSH
11346: xmlGenericError(xmlGenericErrorContext,
11347: "PP: entering MISC\n");
11348: #endif
11349: }
11350: break;
11351: case XML_PARSER_START_TAG: {
11352: const xmlChar *name;
11353: const xmlChar *prefix = NULL;
11354: const xmlChar *URI = NULL;
11355: int nsNr = ctxt->nsNr;
11356:
11357: if ((avail < 2) && (ctxt->inputNr == 1))
11358: goto done;
11359: cur = ctxt->input->cur[0];
11360: if (cur != '<') {
11361: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11362: ctxt->instate = XML_PARSER_EOF;
11363: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11364: ctxt->sax->endDocument(ctxt->userData);
11365: goto done;
11366: }
11367: if (!terminate) {
11368: if (ctxt->progressive) {
11369: /* > can be found unescaped in attribute values */
11370: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11371: goto done;
11372: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11373: goto done;
11374: }
11375: }
11376: if (ctxt->spaceNr == 0)
11377: spacePush(ctxt, -1);
11378: else if (*ctxt->space == -2)
11379: spacePush(ctxt, -1);
11380: else
11381: spacePush(ctxt, *ctxt->space);
11382: #ifdef LIBXML_SAX1_ENABLED
11383: if (ctxt->sax2)
11384: #endif /* LIBXML_SAX1_ENABLED */
11385: name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11386: #ifdef LIBXML_SAX1_ENABLED
11387: else
11388: name = xmlParseStartTag(ctxt);
11389: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11390: if (ctxt->instate == XML_PARSER_EOF)
11391: goto done;
1.1 misho 11392: if (name == NULL) {
11393: spacePop(ctxt);
11394: ctxt->instate = XML_PARSER_EOF;
11395: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11396: ctxt->sax->endDocument(ctxt->userData);
11397: goto done;
11398: }
11399: #ifdef LIBXML_VALID_ENABLED
11400: /*
11401: * [ VC: Root Element Type ]
11402: * The Name in the document type declaration must match
1.1.1.3 misho 11403: * the element type of the root element.
1.1 misho 11404: */
11405: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11406: ctxt->node && (ctxt->node == ctxt->myDoc->children))
11407: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11408: #endif /* LIBXML_VALID_ENABLED */
11409:
11410: /*
11411: * Check for an Empty Element.
11412: */
11413: if ((RAW == '/') && (NXT(1) == '>')) {
11414: SKIP(2);
11415:
11416: if (ctxt->sax2) {
11417: if ((ctxt->sax != NULL) &&
11418: (ctxt->sax->endElementNs != NULL) &&
11419: (!ctxt->disableSAX))
11420: ctxt->sax->endElementNs(ctxt->userData, name,
11421: prefix, URI);
11422: if (ctxt->nsNr - nsNr > 0)
11423: nsPop(ctxt, ctxt->nsNr - nsNr);
11424: #ifdef LIBXML_SAX1_ENABLED
11425: } else {
11426: if ((ctxt->sax != NULL) &&
11427: (ctxt->sax->endElement != NULL) &&
11428: (!ctxt->disableSAX))
11429: ctxt->sax->endElement(ctxt->userData, name);
11430: #endif /* LIBXML_SAX1_ENABLED */
11431: }
1.1.1.3 misho 11432: if (ctxt->instate == XML_PARSER_EOF)
11433: goto done;
1.1 misho 11434: spacePop(ctxt);
11435: if (ctxt->nameNr == 0) {
11436: ctxt->instate = XML_PARSER_EPILOG;
11437: } else {
11438: ctxt->instate = XML_PARSER_CONTENT;
11439: }
1.1.1.3 misho 11440: ctxt->progressive = 1;
1.1 misho 11441: break;
11442: }
11443: if (RAW == '>') {
11444: NEXT;
11445: } else {
11446: xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11447: "Couldn't find end of Start Tag %s\n",
11448: name);
11449: nodePop(ctxt);
11450: spacePop(ctxt);
11451: }
11452: if (ctxt->sax2)
11453: nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11454: #ifdef LIBXML_SAX1_ENABLED
11455: else
11456: namePush(ctxt, name);
11457: #endif /* LIBXML_SAX1_ENABLED */
11458:
11459: ctxt->instate = XML_PARSER_CONTENT;
1.1.1.3 misho 11460: ctxt->progressive = 1;
1.1 misho 11461: break;
11462: }
11463: case XML_PARSER_CONTENT: {
11464: const xmlChar *test;
11465: unsigned int cons;
11466: if ((avail < 2) && (ctxt->inputNr == 1))
11467: goto done;
11468: cur = ctxt->input->cur[0];
11469: next = ctxt->input->cur[1];
11470:
11471: test = CUR_PTR;
11472: cons = ctxt->input->consumed;
11473: if ((cur == '<') && (next == '/')) {
11474: ctxt->instate = XML_PARSER_END_TAG;
11475: break;
11476: } else if ((cur == '<') && (next == '?')) {
11477: if ((!terminate) &&
1.1.1.3 misho 11478: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11479: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11480: goto done;
1.1.1.3 misho 11481: }
1.1 misho 11482: xmlParsePI(ctxt);
1.1.1.3 misho 11483: ctxt->instate = XML_PARSER_CONTENT;
11484: ctxt->progressive = 1;
1.1 misho 11485: } else if ((cur == '<') && (next != '!')) {
11486: ctxt->instate = XML_PARSER_START_TAG;
11487: break;
11488: } else if ((cur == '<') && (next == '!') &&
11489: (ctxt->input->cur[2] == '-') &&
11490: (ctxt->input->cur[3] == '-')) {
11491: int term;
11492:
11493: if (avail < 4)
11494: goto done;
11495: ctxt->input->cur += 4;
11496: term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11497: ctxt->input->cur -= 4;
1.1.1.3 misho 11498: if ((!terminate) && (term < 0)) {
11499: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11500: goto done;
1.1.1.3 misho 11501: }
1.1 misho 11502: xmlParseComment(ctxt);
11503: ctxt->instate = XML_PARSER_CONTENT;
1.1.1.3 misho 11504: ctxt->progressive = 1;
1.1 misho 11505: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11506: (ctxt->input->cur[2] == '[') &&
11507: (ctxt->input->cur[3] == 'C') &&
11508: (ctxt->input->cur[4] == 'D') &&
11509: (ctxt->input->cur[5] == 'A') &&
11510: (ctxt->input->cur[6] == 'T') &&
11511: (ctxt->input->cur[7] == 'A') &&
11512: (ctxt->input->cur[8] == '[')) {
11513: SKIP(9);
11514: ctxt->instate = XML_PARSER_CDATA_SECTION;
11515: break;
11516: } else if ((cur == '<') && (next == '!') &&
11517: (avail < 9)) {
11518: goto done;
11519: } else if (cur == '&') {
11520: if ((!terminate) &&
11521: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11522: goto done;
11523: xmlParseReference(ctxt);
11524: } else {
11525: /* TODO Avoid the extra copy, handle directly !!! */
11526: /*
11527: * Goal of the following test is:
11528: * - minimize calls to the SAX 'character' callback
11529: * when they are mergeable
11530: * - handle an problem for isBlank when we only parse
11531: * a sequence of blank chars and the next one is
11532: * not available to check against '<' presence.
11533: * - tries to homogenize the differences in SAX
11534: * callbacks between the push and pull versions
11535: * of the parser.
11536: */
11537: if ((ctxt->inputNr == 1) &&
11538: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11539: if (!terminate) {
11540: if (ctxt->progressive) {
11541: if ((lastlt == NULL) ||
11542: (ctxt->input->cur > lastlt))
11543: goto done;
11544: } else if (xmlParseLookupSequence(ctxt,
11545: '<', 0, 0) < 0) {
11546: goto done;
11547: }
11548: }
11549: }
11550: ctxt->checkIndex = 0;
11551: xmlParseCharData(ctxt, 0);
11552: }
11553: /*
11554: * Pop-up of finished entities.
11555: */
11556: while ((RAW == 0) && (ctxt->inputNr > 1))
11557: xmlPopInput(ctxt);
11558: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11559: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11560: "detected an error in element content\n");
11561: ctxt->instate = XML_PARSER_EOF;
11562: break;
11563: }
11564: break;
11565: }
11566: case XML_PARSER_END_TAG:
11567: if (avail < 2)
11568: goto done;
11569: if (!terminate) {
11570: if (ctxt->progressive) {
11571: /* > can be found unescaped in attribute values */
11572: if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11573: goto done;
11574: } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11575: goto done;
11576: }
11577: }
11578: if (ctxt->sax2) {
11579: xmlParseEndTag2(ctxt,
11580: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11581: (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11582: (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11583: nameNsPop(ctxt);
11584: }
11585: #ifdef LIBXML_SAX1_ENABLED
11586: else
11587: xmlParseEndTag1(ctxt, 0);
11588: #endif /* LIBXML_SAX1_ENABLED */
1.1.1.2 misho 11589: if (ctxt->instate == XML_PARSER_EOF) {
11590: /* Nothing */
11591: } else if (ctxt->nameNr == 0) {
1.1 misho 11592: ctxt->instate = XML_PARSER_EPILOG;
11593: } else {
11594: ctxt->instate = XML_PARSER_CONTENT;
11595: }
11596: break;
11597: case XML_PARSER_CDATA_SECTION: {
11598: /*
1.1.1.3 misho 11599: * The Push mode need to have the SAX callback for
1.1 misho 11600: * cdataBlock merge back contiguous callbacks.
11601: */
11602: int base;
11603:
11604: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11605: if (base < 0) {
11606: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11607: int tmp;
11608:
1.1.1.3 misho 11609: tmp = xmlCheckCdataPush(ctxt->input->cur,
1.1 misho 11610: XML_PARSER_BIG_BUFFER_SIZE);
11611: if (tmp < 0) {
11612: tmp = -tmp;
11613: ctxt->input->cur += tmp;
11614: goto encoding_error;
11615: }
11616: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11617: if (ctxt->sax->cdataBlock != NULL)
11618: ctxt->sax->cdataBlock(ctxt->userData,
11619: ctxt->input->cur, tmp);
11620: else if (ctxt->sax->characters != NULL)
11621: ctxt->sax->characters(ctxt->userData,
11622: ctxt->input->cur, tmp);
11623: }
1.1.1.3 misho 11624: if (ctxt->instate == XML_PARSER_EOF)
11625: goto done;
1.1 misho 11626: SKIPL(tmp);
11627: ctxt->checkIndex = 0;
11628: }
11629: goto done;
11630: } else {
11631: int tmp;
11632:
11633: tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11634: if ((tmp < 0) || (tmp != base)) {
11635: tmp = -tmp;
11636: ctxt->input->cur += tmp;
11637: goto encoding_error;
11638: }
11639: if ((ctxt->sax != NULL) && (base == 0) &&
11640: (ctxt->sax->cdataBlock != NULL) &&
11641: (!ctxt->disableSAX)) {
11642: /*
11643: * Special case to provide identical behaviour
11644: * between pull and push parsers on enpty CDATA
11645: * sections
11646: */
11647: if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11648: (!strncmp((const char *)&ctxt->input->cur[-9],
11649: "<![CDATA[", 9)))
11650: ctxt->sax->cdataBlock(ctxt->userData,
11651: BAD_CAST "", 0);
11652: } else if ((ctxt->sax != NULL) && (base > 0) &&
11653: (!ctxt->disableSAX)) {
11654: if (ctxt->sax->cdataBlock != NULL)
11655: ctxt->sax->cdataBlock(ctxt->userData,
11656: ctxt->input->cur, base);
11657: else if (ctxt->sax->characters != NULL)
11658: ctxt->sax->characters(ctxt->userData,
11659: ctxt->input->cur, base);
11660: }
1.1.1.3 misho 11661: if (ctxt->instate == XML_PARSER_EOF)
11662: goto done;
1.1 misho 11663: SKIPL(base + 3);
11664: ctxt->checkIndex = 0;
11665: ctxt->instate = XML_PARSER_CONTENT;
11666: #ifdef DEBUG_PUSH
11667: xmlGenericError(xmlGenericErrorContext,
11668: "PP: entering CONTENT\n");
11669: #endif
11670: }
11671: break;
11672: }
11673: case XML_PARSER_MISC:
11674: SKIP_BLANKS;
11675: if (ctxt->input->buf == NULL)
11676: avail = ctxt->input->length -
11677: (ctxt->input->cur - ctxt->input->base);
11678: else
1.1.1.3 misho 11679: avail = xmlBufUse(ctxt->input->buf->buffer) -
1.1 misho 11680: (ctxt->input->cur - ctxt->input->base);
11681: if (avail < 2)
11682: goto done;
11683: cur = ctxt->input->cur[0];
11684: next = ctxt->input->cur[1];
11685: if ((cur == '<') && (next == '?')) {
11686: if ((!terminate) &&
1.1.1.3 misho 11687: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11688: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11689: goto done;
1.1.1.3 misho 11690: }
1.1 misho 11691: #ifdef DEBUG_PUSH
11692: xmlGenericError(xmlGenericErrorContext,
11693: "PP: Parsing PI\n");
11694: #endif
11695: xmlParsePI(ctxt);
1.1.1.3 misho 11696: if (ctxt->instate == XML_PARSER_EOF)
11697: goto done;
11698: ctxt->instate = XML_PARSER_MISC;
11699: ctxt->progressive = 1;
1.1 misho 11700: ctxt->checkIndex = 0;
11701: } else if ((cur == '<') && (next == '!') &&
11702: (ctxt->input->cur[2] == '-') &&
11703: (ctxt->input->cur[3] == '-')) {
11704: if ((!terminate) &&
1.1.1.3 misho 11705: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11706: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11707: goto done;
1.1.1.3 misho 11708: }
1.1 misho 11709: #ifdef DEBUG_PUSH
11710: xmlGenericError(xmlGenericErrorContext,
11711: "PP: Parsing Comment\n");
11712: #endif
11713: xmlParseComment(ctxt);
1.1.1.3 misho 11714: if (ctxt->instate == XML_PARSER_EOF)
11715: goto done;
1.1 misho 11716: ctxt->instate = XML_PARSER_MISC;
1.1.1.3 misho 11717: ctxt->progressive = 1;
1.1 misho 11718: ctxt->checkIndex = 0;
11719: } else if ((cur == '<') && (next == '!') &&
11720: (ctxt->input->cur[2] == 'D') &&
11721: (ctxt->input->cur[3] == 'O') &&
11722: (ctxt->input->cur[4] == 'C') &&
11723: (ctxt->input->cur[5] == 'T') &&
11724: (ctxt->input->cur[6] == 'Y') &&
11725: (ctxt->input->cur[7] == 'P') &&
11726: (ctxt->input->cur[8] == 'E')) {
11727: if ((!terminate) &&
1.1.1.3 misho 11728: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11729: ctxt->progressive = XML_PARSER_DTD;
1.1 misho 11730: goto done;
1.1.1.3 misho 11731: }
1.1 misho 11732: #ifdef DEBUG_PUSH
11733: xmlGenericError(xmlGenericErrorContext,
11734: "PP: Parsing internal subset\n");
11735: #endif
11736: ctxt->inSubset = 1;
1.1.1.3 misho 11737: ctxt->progressive = 0;
11738: ctxt->checkIndex = 0;
1.1 misho 11739: xmlParseDocTypeDecl(ctxt);
1.1.1.3 misho 11740: if (ctxt->instate == XML_PARSER_EOF)
11741: goto done;
1.1 misho 11742: if (RAW == '[') {
11743: ctxt->instate = XML_PARSER_DTD;
11744: #ifdef DEBUG_PUSH
11745: xmlGenericError(xmlGenericErrorContext,
11746: "PP: entering DTD\n");
11747: #endif
11748: } else {
11749: /*
11750: * Create and update the external subset.
11751: */
11752: ctxt->inSubset = 2;
11753: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11754: (ctxt->sax->externalSubset != NULL))
11755: ctxt->sax->externalSubset(ctxt->userData,
11756: ctxt->intSubName, ctxt->extSubSystem,
11757: ctxt->extSubURI);
11758: ctxt->inSubset = 0;
11759: xmlCleanSpecialAttr(ctxt);
11760: ctxt->instate = XML_PARSER_PROLOG;
11761: #ifdef DEBUG_PUSH
11762: xmlGenericError(xmlGenericErrorContext,
11763: "PP: entering PROLOG\n");
11764: #endif
11765: }
11766: } else if ((cur == '<') && (next == '!') &&
11767: (avail < 9)) {
11768: goto done;
11769: } else {
11770: ctxt->instate = XML_PARSER_START_TAG;
1.1.1.3 misho 11771: ctxt->progressive = XML_PARSER_START_TAG;
1.1 misho 11772: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11773: #ifdef DEBUG_PUSH
11774: xmlGenericError(xmlGenericErrorContext,
11775: "PP: entering START_TAG\n");
11776: #endif
11777: }
11778: break;
11779: case XML_PARSER_PROLOG:
11780: SKIP_BLANKS;
11781: if (ctxt->input->buf == NULL)
11782: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11783: else
1.1.1.3 misho 11784: avail = xmlBufUse(ctxt->input->buf->buffer) -
11785: (ctxt->input->cur - ctxt->input->base);
11786: if (avail < 2)
1.1 misho 11787: goto done;
11788: cur = ctxt->input->cur[0];
11789: next = ctxt->input->cur[1];
11790: if ((cur == '<') && (next == '?')) {
11791: if ((!terminate) &&
1.1.1.3 misho 11792: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11793: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11794: goto done;
1.1.1.3 misho 11795: }
1.1 misho 11796: #ifdef DEBUG_PUSH
11797: xmlGenericError(xmlGenericErrorContext,
11798: "PP: Parsing PI\n");
11799: #endif
11800: xmlParsePI(ctxt);
1.1.1.3 misho 11801: if (ctxt->instate == XML_PARSER_EOF)
11802: goto done;
11803: ctxt->instate = XML_PARSER_PROLOG;
11804: ctxt->progressive = 1;
1.1 misho 11805: } else if ((cur == '<') && (next == '!') &&
11806: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11807: if ((!terminate) &&
1.1.1.3 misho 11808: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11809: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11810: goto done;
1.1.1.3 misho 11811: }
1.1 misho 11812: #ifdef DEBUG_PUSH
11813: xmlGenericError(xmlGenericErrorContext,
11814: "PP: Parsing Comment\n");
11815: #endif
11816: xmlParseComment(ctxt);
1.1.1.3 misho 11817: if (ctxt->instate == XML_PARSER_EOF)
11818: goto done;
1.1 misho 11819: ctxt->instate = XML_PARSER_PROLOG;
1.1.1.3 misho 11820: ctxt->progressive = 1;
1.1 misho 11821: } else if ((cur == '<') && (next == '!') &&
11822: (avail < 4)) {
11823: goto done;
11824: } else {
11825: ctxt->instate = XML_PARSER_START_TAG;
11826: if (ctxt->progressive == 0)
1.1.1.3 misho 11827: ctxt->progressive = XML_PARSER_START_TAG;
1.1 misho 11828: xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11829: #ifdef DEBUG_PUSH
11830: xmlGenericError(xmlGenericErrorContext,
11831: "PP: entering START_TAG\n");
11832: #endif
11833: }
11834: break;
11835: case XML_PARSER_EPILOG:
11836: SKIP_BLANKS;
11837: if (ctxt->input->buf == NULL)
11838: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11839: else
1.1.1.3 misho 11840: avail = xmlBufUse(ctxt->input->buf->buffer) -
11841: (ctxt->input->cur - ctxt->input->base);
1.1 misho 11842: if (avail < 2)
11843: goto done;
11844: cur = ctxt->input->cur[0];
11845: next = ctxt->input->cur[1];
11846: if ((cur == '<') && (next == '?')) {
11847: if ((!terminate) &&
1.1.1.3 misho 11848: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11849: ctxt->progressive = XML_PARSER_PI;
1.1 misho 11850: goto done;
1.1.1.3 misho 11851: }
1.1 misho 11852: #ifdef DEBUG_PUSH
11853: xmlGenericError(xmlGenericErrorContext,
11854: "PP: Parsing PI\n");
11855: #endif
11856: xmlParsePI(ctxt);
1.1.1.3 misho 11857: if (ctxt->instate == XML_PARSER_EOF)
11858: goto done;
1.1 misho 11859: ctxt->instate = XML_PARSER_EPILOG;
1.1.1.3 misho 11860: ctxt->progressive = 1;
1.1 misho 11861: } else if ((cur == '<') && (next == '!') &&
11862: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11863: if ((!terminate) &&
1.1.1.3 misho 11864: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11865: ctxt->progressive = XML_PARSER_COMMENT;
1.1 misho 11866: goto done;
1.1.1.3 misho 11867: }
1.1 misho 11868: #ifdef DEBUG_PUSH
11869: xmlGenericError(xmlGenericErrorContext,
11870: "PP: Parsing Comment\n");
11871: #endif
11872: xmlParseComment(ctxt);
1.1.1.3 misho 11873: if (ctxt->instate == XML_PARSER_EOF)
11874: goto done;
1.1 misho 11875: ctxt->instate = XML_PARSER_EPILOG;
1.1.1.3 misho 11876: ctxt->progressive = 1;
1.1 misho 11877: } else if ((cur == '<') && (next == '!') &&
11878: (avail < 4)) {
11879: goto done;
11880: } else {
11881: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11882: ctxt->instate = XML_PARSER_EOF;
11883: #ifdef DEBUG_PUSH
11884: xmlGenericError(xmlGenericErrorContext,
11885: "PP: entering EOF\n");
11886: #endif
11887: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11888: ctxt->sax->endDocument(ctxt->userData);
11889: goto done;
11890: }
11891: break;
11892: case XML_PARSER_DTD: {
11893: /*
11894: * Sorry but progressive parsing of the internal subset
11895: * is not expected to be supported. We first check that
11896: * the full content of the internal subset is available and
11897: * the parsing is launched only at that point.
11898: * Internal subset ends up with "']' S? '>'" in an unescaped
11899: * section and not in a ']]>' sequence which are conditional
11900: * sections (whoever argued to keep that crap in XML deserve
11901: * a place in hell !).
11902: */
11903: int base, i;
11904: xmlChar *buf;
11905: xmlChar quote = 0;
1.1.1.3 misho 11906: size_t use;
1.1 misho 11907:
11908: base = ctxt->input->cur - ctxt->input->base;
11909: if (base < 0) return(0);
11910: if (ctxt->checkIndex > base)
11911: base = ctxt->checkIndex;
1.1.1.3 misho 11912: buf = xmlBufContent(ctxt->input->buf->buffer);
11913: use = xmlBufUse(ctxt->input->buf->buffer);
11914: for (;(unsigned int) base < use; base++) {
1.1 misho 11915: if (quote != 0) {
11916: if (buf[base] == quote)
11917: quote = 0;
1.1.1.3 misho 11918: continue;
1.1 misho 11919: }
11920: if ((quote == 0) && (buf[base] == '<')) {
11921: int found = 0;
11922: /* special handling of comments */
1.1.1.3 misho 11923: if (((unsigned int) base + 4 < use) &&
1.1 misho 11924: (buf[base + 1] == '!') &&
11925: (buf[base + 2] == '-') &&
11926: (buf[base + 3] == '-')) {
1.1.1.3 misho 11927: for (;(unsigned int) base + 3 < use; base++) {
1.1 misho 11928: if ((buf[base] == '-') &&
11929: (buf[base + 1] == '-') &&
11930: (buf[base + 2] == '>')) {
11931: found = 1;
11932: base += 2;
11933: break;
11934: }
11935: }
11936: if (!found) {
11937: #if 0
11938: fprintf(stderr, "unfinished comment\n");
11939: #endif
11940: break; /* for */
11941: }
11942: continue;
11943: }
11944: }
11945: if (buf[base] == '"') {
11946: quote = '"';
11947: continue;
11948: }
11949: if (buf[base] == '\'') {
11950: quote = '\'';
11951: continue;
11952: }
11953: if (buf[base] == ']') {
11954: #if 0
11955: fprintf(stderr, "%c%c%c%c: ", buf[base],
11956: buf[base + 1], buf[base + 2], buf[base + 3]);
11957: #endif
1.1.1.3 misho 11958: if ((unsigned int) base +1 >= use)
1.1 misho 11959: break;
11960: if (buf[base + 1] == ']') {
11961: /* conditional crap, skip both ']' ! */
11962: base++;
11963: continue;
11964: }
1.1.1.3 misho 11965: for (i = 1; (unsigned int) base + i < use; i++) {
1.1 misho 11966: if (buf[base + i] == '>') {
11967: #if 0
11968: fprintf(stderr, "found\n");
11969: #endif
11970: goto found_end_int_subset;
11971: }
11972: if (!IS_BLANK_CH(buf[base + i])) {
11973: #if 0
11974: fprintf(stderr, "not found\n");
11975: #endif
11976: goto not_end_of_int_subset;
11977: }
11978: }
11979: #if 0
11980: fprintf(stderr, "end of stream\n");
11981: #endif
11982: break;
1.1.1.3 misho 11983:
1.1 misho 11984: }
11985: not_end_of_int_subset:
11986: continue; /* for */
11987: }
11988: /*
11989: * We didn't found the end of the Internal subset
11990: */
1.1.1.3 misho 11991: if (quote == 0)
11992: ctxt->checkIndex = base;
11993: else
11994: ctxt->checkIndex = 0;
1.1 misho 11995: #ifdef DEBUG_PUSH
11996: if (next == 0)
11997: xmlGenericError(xmlGenericErrorContext,
11998: "PP: lookup of int subset end filed\n");
11999: #endif
12000: goto done;
12001:
12002: found_end_int_subset:
1.1.1.3 misho 12003: ctxt->checkIndex = 0;
1.1 misho 12004: xmlParseInternalSubset(ctxt);
1.1.1.3 misho 12005: if (ctxt->instate == XML_PARSER_EOF)
12006: goto done;
1.1 misho 12007: ctxt->inSubset = 2;
12008: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12009: (ctxt->sax->externalSubset != NULL))
12010: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12011: ctxt->extSubSystem, ctxt->extSubURI);
12012: ctxt->inSubset = 0;
12013: xmlCleanSpecialAttr(ctxt);
1.1.1.3 misho 12014: if (ctxt->instate == XML_PARSER_EOF)
12015: goto done;
1.1 misho 12016: ctxt->instate = XML_PARSER_PROLOG;
12017: ctxt->checkIndex = 0;
12018: #ifdef DEBUG_PUSH
12019: xmlGenericError(xmlGenericErrorContext,
12020: "PP: entering PROLOG\n");
12021: #endif
12022: break;
12023: }
12024: case XML_PARSER_COMMENT:
12025: xmlGenericError(xmlGenericErrorContext,
12026: "PP: internal error, state == COMMENT\n");
12027: ctxt->instate = XML_PARSER_CONTENT;
12028: #ifdef DEBUG_PUSH
12029: xmlGenericError(xmlGenericErrorContext,
12030: "PP: entering CONTENT\n");
12031: #endif
12032: break;
12033: case XML_PARSER_IGNORE:
12034: xmlGenericError(xmlGenericErrorContext,
12035: "PP: internal error, state == IGNORE");
12036: ctxt->instate = XML_PARSER_DTD;
12037: #ifdef DEBUG_PUSH
12038: xmlGenericError(xmlGenericErrorContext,
12039: "PP: entering DTD\n");
12040: #endif
12041: break;
12042: case XML_PARSER_PI:
12043: xmlGenericError(xmlGenericErrorContext,
12044: "PP: internal error, state == PI\n");
12045: ctxt->instate = XML_PARSER_CONTENT;
12046: #ifdef DEBUG_PUSH
12047: xmlGenericError(xmlGenericErrorContext,
12048: "PP: entering CONTENT\n");
12049: #endif
12050: break;
12051: case XML_PARSER_ENTITY_DECL:
12052: xmlGenericError(xmlGenericErrorContext,
12053: "PP: internal error, state == ENTITY_DECL\n");
12054: ctxt->instate = XML_PARSER_DTD;
12055: #ifdef DEBUG_PUSH
12056: xmlGenericError(xmlGenericErrorContext,
12057: "PP: entering DTD\n");
12058: #endif
12059: break;
12060: case XML_PARSER_ENTITY_VALUE:
12061: xmlGenericError(xmlGenericErrorContext,
12062: "PP: internal error, state == ENTITY_VALUE\n");
12063: ctxt->instate = XML_PARSER_CONTENT;
12064: #ifdef DEBUG_PUSH
12065: xmlGenericError(xmlGenericErrorContext,
12066: "PP: entering DTD\n");
12067: #endif
12068: break;
12069: case XML_PARSER_ATTRIBUTE_VALUE:
12070: xmlGenericError(xmlGenericErrorContext,
12071: "PP: internal error, state == ATTRIBUTE_VALUE\n");
12072: ctxt->instate = XML_PARSER_START_TAG;
12073: #ifdef DEBUG_PUSH
12074: xmlGenericError(xmlGenericErrorContext,
12075: "PP: entering START_TAG\n");
12076: #endif
12077: break;
12078: case XML_PARSER_SYSTEM_LITERAL:
12079: xmlGenericError(xmlGenericErrorContext,
12080: "PP: internal error, state == SYSTEM_LITERAL\n");
12081: ctxt->instate = XML_PARSER_START_TAG;
12082: #ifdef DEBUG_PUSH
12083: xmlGenericError(xmlGenericErrorContext,
12084: "PP: entering START_TAG\n");
12085: #endif
12086: break;
12087: case XML_PARSER_PUBLIC_LITERAL:
12088: xmlGenericError(xmlGenericErrorContext,
12089: "PP: internal error, state == PUBLIC_LITERAL\n");
12090: ctxt->instate = XML_PARSER_START_TAG;
12091: #ifdef DEBUG_PUSH
12092: xmlGenericError(xmlGenericErrorContext,
12093: "PP: entering START_TAG\n");
12094: #endif
12095: break;
12096: }
12097: }
1.1.1.3 misho 12098: done:
1.1 misho 12099: #ifdef DEBUG_PUSH
12100: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12101: #endif
12102: return(ret);
12103: encoding_error:
12104: {
12105: char buffer[150];
12106:
12107: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12108: ctxt->input->cur[0], ctxt->input->cur[1],
12109: ctxt->input->cur[2], ctxt->input->cur[3]);
12110: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12111: "Input is not proper UTF-8, indicate encoding !\n%s",
12112: BAD_CAST buffer, NULL);
12113: }
12114: return(0);
12115: }
12116:
12117: /**
1.1.1.3 misho 12118: * xmlParseCheckTransition:
12119: * @ctxt: an XML parser context
12120: * @chunk: a char array
12121: * @size: the size in byte of the chunk
12122: *
12123: * Check depending on the current parser state if the chunk given must be
12124: * processed immediately or one need more data to advance on parsing.
12125: *
12126: * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12127: */
12128: static int
12129: xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12130: if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12131: return(-1);
12132: if (ctxt->instate == XML_PARSER_START_TAG) {
12133: if (memchr(chunk, '>', size) != NULL)
12134: return(1);
12135: return(0);
12136: }
12137: if (ctxt->progressive == XML_PARSER_COMMENT) {
12138: if (memchr(chunk, '>', size) != NULL)
12139: return(1);
12140: return(0);
12141: }
12142: if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12143: if (memchr(chunk, '>', size) != NULL)
12144: return(1);
12145: return(0);
12146: }
12147: if (ctxt->progressive == XML_PARSER_PI) {
12148: if (memchr(chunk, '>', size) != NULL)
12149: return(1);
12150: return(0);
12151: }
12152: if (ctxt->instate == XML_PARSER_END_TAG) {
12153: if (memchr(chunk, '>', size) != NULL)
12154: return(1);
12155: return(0);
12156: }
12157: if ((ctxt->progressive == XML_PARSER_DTD) ||
12158: (ctxt->instate == XML_PARSER_DTD)) {
12159: if (memchr(chunk, '>', size) != NULL)
12160: return(1);
12161: return(0);
12162: }
12163: return(1);
12164: }
12165:
12166: /**
1.1 misho 12167: * xmlParseChunk:
12168: * @ctxt: an XML parser context
12169: * @chunk: an char array
12170: * @size: the size in byte of the chunk
12171: * @terminate: last chunk indicator
12172: *
12173: * Parse a Chunk of memory
12174: *
12175: * Returns zero if no error, the xmlParserErrors otherwise.
12176: */
12177: int
12178: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12179: int terminate) {
12180: int end_in_lf = 0;
12181: int remain = 0;
1.1.1.3 misho 12182: size_t old_avail = 0;
12183: size_t avail = 0;
1.1 misho 12184:
12185: if (ctxt == NULL)
12186: return(XML_ERR_INTERNAL_ERROR);
12187: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12188: return(ctxt->errNo);
1.1.1.3 misho 12189: if (ctxt->instate == XML_PARSER_EOF)
12190: return(-1);
1.1 misho 12191: if (ctxt->instate == XML_PARSER_START)
12192: xmlDetectSAX2(ctxt);
12193: if ((size > 0) && (chunk != NULL) && (!terminate) &&
12194: (chunk[size - 1] == '\r')) {
12195: end_in_lf = 1;
12196: size--;
12197: }
12198:
12199: xmldecl_done:
12200:
12201: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12202: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
1.1.1.3 misho 12203: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12204: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 12205: int res;
12206:
1.1.1.3 misho 12207: old_avail = xmlBufUse(ctxt->input->buf->buffer);
1.1 misho 12208: /*
12209: * Specific handling if we autodetected an encoding, we should not
12210: * push more than the first line ... which depend on the encoding
12211: * And only push the rest once the final encoding was detected
12212: */
12213: if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12214: (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12215: unsigned int len = 45;
12216:
12217: if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12218: BAD_CAST "UTF-16")) ||
12219: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12220: BAD_CAST "UTF16")))
12221: len = 90;
12222: else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12223: BAD_CAST "UCS-4")) ||
12224: (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12225: BAD_CAST "UCS4")))
12226: len = 180;
12227:
12228: if (ctxt->input->buf->rawconsumed < len)
12229: len -= ctxt->input->buf->rawconsumed;
12230:
12231: /*
12232: * Change size for reading the initial declaration only
12233: * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12234: * will blindly copy extra bytes from memory.
12235: */
12236: if ((unsigned int) size > len) {
12237: remain = size - len;
12238: size = len;
12239: } else {
12240: remain = 0;
12241: }
12242: }
1.1.1.3 misho 12243: res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.1 misho 12244: if (res < 0) {
12245: ctxt->errNo = XML_PARSER_EOF;
12246: ctxt->disableSAX = 1;
12247: return (XML_PARSER_EOF);
12248: }
1.1.1.3 misho 12249: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 12250: #ifdef DEBUG_PUSH
12251: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12252: #endif
12253:
12254: } else if (ctxt->instate != XML_PARSER_EOF) {
12255: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12256: xmlParserInputBufferPtr in = ctxt->input->buf;
12257: if ((in->encoder != NULL) && (in->buffer != NULL) &&
12258: (in->raw != NULL)) {
12259: int nbchars;
1.1.1.3 misho 12260: size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12261: size_t current = ctxt->input->cur - ctxt->input->base;
1.1 misho 12262:
1.1.1.3 misho 12263: nbchars = xmlCharEncInput(in, terminate);
1.1 misho 12264: if (nbchars < 0) {
12265: /* TODO 2.6.0 */
12266: xmlGenericError(xmlGenericErrorContext,
12267: "xmlParseChunk: encoder error\n");
12268: return(XML_ERR_INVALID_ENCODING);
12269: }
1.1.1.3 misho 12270: xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
1.1 misho 12271: }
12272: }
12273: }
1.1.1.3 misho 12274: if (remain != 0) {
1.1 misho 12275: xmlParseTryOrFinish(ctxt, 0);
1.1.1.3 misho 12276: } else {
12277: if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12278: avail = xmlBufUse(ctxt->input->buf->buffer);
12279: /*
12280: * Depending on the current state it may not be such
12281: * a good idea to try parsing if there is nothing in the chunk
12282: * which would be worth doing a parser state transition and we
12283: * need to wait for more data
12284: */
12285: if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12286: (old_avail == 0) || (avail == 0) ||
12287: (xmlParseCheckTransition(ctxt,
12288: (const char *)&ctxt->input->base[old_avail],
12289: avail - old_avail)))
12290: xmlParseTryOrFinish(ctxt, terminate);
12291: }
12292: if (ctxt->instate == XML_PARSER_EOF)
12293: return(ctxt->errNo);
12294:
12295: if ((ctxt->input != NULL) &&
12296: (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12297: ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12298: ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12299: xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12300: ctxt->instate = XML_PARSER_EOF;
12301: }
1.1 misho 12302: if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12303: return(ctxt->errNo);
12304:
12305: if (remain != 0) {
12306: chunk += size;
12307: size = remain;
12308: remain = 0;
12309: goto xmldecl_done;
12310: }
12311: if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12312: (ctxt->input->buf != NULL)) {
1.1.1.3 misho 12313: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12314: ctxt->input);
12315: size_t current = ctxt->input->cur - ctxt->input->base;
12316:
1.1 misho 12317: xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
1.1.1.3 misho 12318:
12319: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12320: base, current);
1.1 misho 12321: }
12322: if (terminate) {
12323: /*
12324: * Check for termination
12325: */
1.1.1.3 misho 12326: int cur_avail = 0;
1.1 misho 12327:
12328: if (ctxt->input != NULL) {
12329: if (ctxt->input->buf == NULL)
1.1.1.3 misho 12330: cur_avail = ctxt->input->length -
12331: (ctxt->input->cur - ctxt->input->base);
1.1 misho 12332: else
1.1.1.3 misho 12333: cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12334: (ctxt->input->cur - ctxt->input->base);
1.1 misho 12335: }
1.1.1.3 misho 12336:
1.1 misho 12337: if ((ctxt->instate != XML_PARSER_EOF) &&
12338: (ctxt->instate != XML_PARSER_EPILOG)) {
12339: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
1.1.1.3 misho 12340: }
12341: if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
1.1 misho 12342: xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12343: }
12344: if (ctxt->instate != XML_PARSER_EOF) {
12345: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12346: ctxt->sax->endDocument(ctxt->userData);
12347: }
12348: ctxt->instate = XML_PARSER_EOF;
12349: }
1.1.1.3 misho 12350: if (ctxt->wellFormed == 0)
12351: return((xmlParserErrors) ctxt->errNo);
12352: else
12353: return(0);
1.1 misho 12354: }
12355:
12356: /************************************************************************
12357: * *
1.1.1.3 misho 12358: * I/O front end functions to the parser *
1.1 misho 12359: * *
12360: ************************************************************************/
12361:
12362: /**
12363: * xmlCreatePushParserCtxt:
12364: * @sax: a SAX handler
12365: * @user_data: The user data returned on SAX callbacks
12366: * @chunk: a pointer to an array of chars
12367: * @size: number of chars in the array
12368: * @filename: an optional file name or URI
12369: *
12370: * Create a parser context for using the XML parser in push mode.
12371: * If @buffer and @size are non-NULL, the data is used to detect
12372: * the encoding. The remaining characters will be parsed so they
12373: * don't need to be fed in again through xmlParseChunk.
12374: * To allow content encoding detection, @size should be >= 4
12375: * The value of @filename is used for fetching external entities
12376: * and error/warning reports.
12377: *
12378: * Returns the new parser context or NULL
12379: */
12380:
12381: xmlParserCtxtPtr
1.1.1.3 misho 12382: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
1.1 misho 12383: const char *chunk, int size, const char *filename) {
12384: xmlParserCtxtPtr ctxt;
12385: xmlParserInputPtr inputStream;
12386: xmlParserInputBufferPtr buf;
12387: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12388:
12389: /*
12390: * plug some encoding conversion routines
12391: */
12392: if ((chunk != NULL) && (size >= 4))
12393: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12394:
12395: buf = xmlAllocParserInputBuffer(enc);
12396: if (buf == NULL) return(NULL);
12397:
12398: ctxt = xmlNewParserCtxt();
12399: if (ctxt == NULL) {
12400: xmlErrMemory(NULL, "creating parser: out of memory\n");
12401: xmlFreeParserInputBuffer(buf);
12402: return(NULL);
12403: }
12404: ctxt->dictNames = 1;
12405: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12406: if (ctxt->pushTab == NULL) {
12407: xmlErrMemory(ctxt, NULL);
12408: xmlFreeParserInputBuffer(buf);
12409: xmlFreeParserCtxt(ctxt);
12410: return(NULL);
12411: }
12412: if (sax != NULL) {
12413: #ifdef LIBXML_SAX1_ENABLED
12414: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12415: #endif /* LIBXML_SAX1_ENABLED */
12416: xmlFree(ctxt->sax);
12417: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12418: if (ctxt->sax == NULL) {
12419: xmlErrMemory(ctxt, NULL);
12420: xmlFreeParserInputBuffer(buf);
12421: xmlFreeParserCtxt(ctxt);
12422: return(NULL);
12423: }
12424: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12425: if (sax->initialized == XML_SAX2_MAGIC)
12426: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12427: else
12428: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12429: if (user_data != NULL)
12430: ctxt->userData = user_data;
1.1.1.3 misho 12431: }
1.1 misho 12432: if (filename == NULL) {
12433: ctxt->directory = NULL;
12434: } else {
12435: ctxt->directory = xmlParserGetDirectory(filename);
12436: }
12437:
12438: inputStream = xmlNewInputStream(ctxt);
12439: if (inputStream == NULL) {
12440: xmlFreeParserCtxt(ctxt);
12441: xmlFreeParserInputBuffer(buf);
12442: return(NULL);
12443: }
12444:
12445: if (filename == NULL)
12446: inputStream->filename = NULL;
12447: else {
12448: inputStream->filename = (char *)
12449: xmlCanonicPath((const xmlChar *) filename);
12450: if (inputStream->filename == NULL) {
12451: xmlFreeParserCtxt(ctxt);
12452: xmlFreeParserInputBuffer(buf);
12453: return(NULL);
12454: }
12455: }
12456: inputStream->buf = buf;
1.1.1.3 misho 12457: xmlBufResetInput(inputStream->buf->buffer, inputStream);
1.1 misho 12458: inputPush(ctxt, inputStream);
12459:
12460: /*
12461: * If the caller didn't provide an initial 'chunk' for determining
12462: * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12463: * that it can be automatically determined later
12464: */
12465: if ((size == 0) || (chunk == NULL)) {
12466: ctxt->charset = XML_CHAR_ENCODING_NONE;
12467: } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
1.1.1.3 misho 12468: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12469: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 12470:
1.1.1.3 misho 12471: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.1 misho 12472:
1.1.1.3 misho 12473: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 12474: #ifdef DEBUG_PUSH
12475: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12476: #endif
12477: }
12478:
12479: if (enc != XML_CHAR_ENCODING_NONE) {
12480: xmlSwitchEncoding(ctxt, enc);
12481: }
12482:
12483: return(ctxt);
12484: }
12485: #endif /* LIBXML_PUSH_ENABLED */
12486:
12487: /**
12488: * xmlStopParser:
12489: * @ctxt: an XML parser context
12490: *
12491: * Blocks further parser processing
12492: */
1.1.1.3 misho 12493: void
1.1 misho 12494: xmlStopParser(xmlParserCtxtPtr ctxt) {
12495: if (ctxt == NULL)
12496: return;
12497: ctxt->instate = XML_PARSER_EOF;
1.1.1.3 misho 12498: ctxt->errNo = XML_ERR_USER_STOP;
1.1 misho 12499: ctxt->disableSAX = 1;
12500: if (ctxt->input != NULL) {
12501: ctxt->input->cur = BAD_CAST"";
12502: ctxt->input->base = ctxt->input->cur;
12503: }
12504: }
12505:
12506: /**
12507: * xmlCreateIOParserCtxt:
12508: * @sax: a SAX handler
12509: * @user_data: The user data returned on SAX callbacks
12510: * @ioread: an I/O read function
12511: * @ioclose: an I/O close function
12512: * @ioctx: an I/O handler
12513: * @enc: the charset encoding if known
12514: *
12515: * Create a parser context for using the XML parser with an existing
12516: * I/O stream
12517: *
12518: * Returns the new parser context or NULL
12519: */
12520: xmlParserCtxtPtr
12521: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12522: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12523: void *ioctx, xmlCharEncoding enc) {
12524: xmlParserCtxtPtr ctxt;
12525: xmlParserInputPtr inputStream;
12526: xmlParserInputBufferPtr buf;
1.1.1.2 misho 12527:
1.1 misho 12528: if (ioread == NULL) return(NULL);
12529:
12530: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
1.1.1.2 misho 12531: if (buf == NULL) {
12532: if (ioclose != NULL)
12533: ioclose(ioctx);
12534: return (NULL);
12535: }
1.1 misho 12536:
12537: ctxt = xmlNewParserCtxt();
12538: if (ctxt == NULL) {
12539: xmlFreeParserInputBuffer(buf);
12540: return(NULL);
12541: }
12542: if (sax != NULL) {
12543: #ifdef LIBXML_SAX1_ENABLED
12544: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12545: #endif /* LIBXML_SAX1_ENABLED */
12546: xmlFree(ctxt->sax);
12547: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12548: if (ctxt->sax == NULL) {
12549: xmlErrMemory(ctxt, NULL);
12550: xmlFreeParserCtxt(ctxt);
12551: return(NULL);
12552: }
12553: memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12554: if (sax->initialized == XML_SAX2_MAGIC)
12555: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12556: else
12557: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12558: if (user_data != NULL)
12559: ctxt->userData = user_data;
1.1.1.2 misho 12560: }
1.1 misho 12561:
12562: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12563: if (inputStream == NULL) {
12564: xmlFreeParserCtxt(ctxt);
12565: return(NULL);
12566: }
12567: inputPush(ctxt, inputStream);
12568:
12569: return(ctxt);
12570: }
12571:
12572: #ifdef LIBXML_VALID_ENABLED
12573: /************************************************************************
12574: * *
1.1.1.3 misho 12575: * Front ends when parsing a DTD *
1.1 misho 12576: * *
12577: ************************************************************************/
12578:
12579: /**
12580: * xmlIOParseDTD:
12581: * @sax: the SAX handler block or NULL
12582: * @input: an Input Buffer
12583: * @enc: the charset encoding if known
12584: *
12585: * Load and parse a DTD
1.1.1.3 misho 12586: *
1.1 misho 12587: * Returns the resulting xmlDtdPtr or NULL in case of error.
12588: * @input will be freed by the function in any case.
12589: */
12590:
12591: xmlDtdPtr
12592: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12593: xmlCharEncoding enc) {
12594: xmlDtdPtr ret = NULL;
12595: xmlParserCtxtPtr ctxt;
12596: xmlParserInputPtr pinput = NULL;
12597: xmlChar start[4];
12598:
12599: if (input == NULL)
12600: return(NULL);
12601:
12602: ctxt = xmlNewParserCtxt();
12603: if (ctxt == NULL) {
12604: xmlFreeParserInputBuffer(input);
12605: return(NULL);
12606: }
12607:
12608: /*
12609: * Set-up the SAX context
12610: */
1.1.1.3 misho 12611: if (sax != NULL) {
1.1 misho 12612: if (ctxt->sax != NULL)
12613: xmlFree(ctxt->sax);
12614: ctxt->sax = sax;
12615: ctxt->userData = ctxt;
12616: }
12617: xmlDetectSAX2(ctxt);
12618:
12619: /*
12620: * generate a parser input from the I/O handler
12621: */
12622:
12623: pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12624: if (pinput == NULL) {
12625: if (sax != NULL) ctxt->sax = NULL;
12626: xmlFreeParserInputBuffer(input);
12627: xmlFreeParserCtxt(ctxt);
12628: return(NULL);
12629: }
12630:
12631: /*
12632: * plug some encoding conversion routines here.
12633: */
12634: if (xmlPushInput(ctxt, pinput) < 0) {
12635: if (sax != NULL) ctxt->sax = NULL;
12636: xmlFreeParserCtxt(ctxt);
12637: return(NULL);
12638: }
12639: if (enc != XML_CHAR_ENCODING_NONE) {
12640: xmlSwitchEncoding(ctxt, enc);
12641: }
12642:
12643: pinput->filename = NULL;
12644: pinput->line = 1;
12645: pinput->col = 1;
12646: pinput->base = ctxt->input->cur;
12647: pinput->cur = ctxt->input->cur;
12648: pinput->free = NULL;
12649:
12650: /*
12651: * let's parse that entity knowing it's an external subset.
12652: */
12653: ctxt->inSubset = 2;
12654: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12655: if (ctxt->myDoc == NULL) {
12656: xmlErrMemory(ctxt, "New Doc failed");
12657: return(NULL);
12658: }
12659: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12660: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12661: BAD_CAST "none", BAD_CAST "none");
12662:
12663: if ((enc == XML_CHAR_ENCODING_NONE) &&
12664: ((ctxt->input->end - ctxt->input->cur) >= 4)) {
1.1.1.3 misho 12665: /*
1.1 misho 12666: * Get the 4 first bytes and decode the charset
12667: * if enc != XML_CHAR_ENCODING_NONE
12668: * plug some encoding conversion routines.
12669: */
12670: start[0] = RAW;
12671: start[1] = NXT(1);
12672: start[2] = NXT(2);
12673: start[3] = NXT(3);
12674: enc = xmlDetectCharEncoding(start, 4);
12675: if (enc != XML_CHAR_ENCODING_NONE) {
12676: xmlSwitchEncoding(ctxt, enc);
12677: }
12678: }
12679:
12680: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12681:
12682: if (ctxt->myDoc != NULL) {
12683: if (ctxt->wellFormed) {
12684: ret = ctxt->myDoc->extSubset;
12685: ctxt->myDoc->extSubset = NULL;
12686: if (ret != NULL) {
12687: xmlNodePtr tmp;
12688:
12689: ret->doc = NULL;
12690: tmp = ret->children;
12691: while (tmp != NULL) {
12692: tmp->doc = NULL;
12693: tmp = tmp->next;
12694: }
12695: }
12696: } else {
12697: ret = NULL;
12698: }
12699: xmlFreeDoc(ctxt->myDoc);
12700: ctxt->myDoc = NULL;
12701: }
12702: if (sax != NULL) ctxt->sax = NULL;
12703: xmlFreeParserCtxt(ctxt);
1.1.1.3 misho 12704:
1.1 misho 12705: return(ret);
12706: }
12707:
12708: /**
12709: * xmlSAXParseDTD:
12710: * @sax: the SAX handler block
12711: * @ExternalID: a NAME* containing the External ID of the DTD
12712: * @SystemID: a NAME* containing the URL to the DTD
12713: *
12714: * Load and parse an external subset.
1.1.1.3 misho 12715: *
1.1 misho 12716: * Returns the resulting xmlDtdPtr or NULL in case of error.
12717: */
12718:
12719: xmlDtdPtr
12720: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12721: const xmlChar *SystemID) {
12722: xmlDtdPtr ret = NULL;
12723: xmlParserCtxtPtr ctxt;
12724: xmlParserInputPtr input = NULL;
12725: xmlCharEncoding enc;
12726: xmlChar* systemIdCanonic;
12727:
12728: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12729:
12730: ctxt = xmlNewParserCtxt();
12731: if (ctxt == NULL) {
12732: return(NULL);
12733: }
12734:
12735: /*
12736: * Set-up the SAX context
12737: */
1.1.1.3 misho 12738: if (sax != NULL) {
1.1 misho 12739: if (ctxt->sax != NULL)
12740: xmlFree(ctxt->sax);
12741: ctxt->sax = sax;
12742: ctxt->userData = ctxt;
12743: }
1.1.1.3 misho 12744:
1.1 misho 12745: /*
12746: * Canonicalise the system ID
12747: */
12748: systemIdCanonic = xmlCanonicPath(SystemID);
12749: if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12750: xmlFreeParserCtxt(ctxt);
12751: return(NULL);
12752: }
12753:
12754: /*
12755: * Ask the Entity resolver to load the damn thing
12756: */
12757:
12758: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12759: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12760: systemIdCanonic);
12761: if (input == NULL) {
12762: if (sax != NULL) ctxt->sax = NULL;
12763: xmlFreeParserCtxt(ctxt);
12764: if (systemIdCanonic != NULL)
12765: xmlFree(systemIdCanonic);
12766: return(NULL);
12767: }
12768:
12769: /*
12770: * plug some encoding conversion routines here.
12771: */
12772: if (xmlPushInput(ctxt, input) < 0) {
12773: if (sax != NULL) ctxt->sax = NULL;
12774: xmlFreeParserCtxt(ctxt);
12775: if (systemIdCanonic != NULL)
12776: xmlFree(systemIdCanonic);
12777: return(NULL);
12778: }
12779: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12780: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12781: xmlSwitchEncoding(ctxt, enc);
12782: }
12783:
12784: if (input->filename == NULL)
12785: input->filename = (char *) systemIdCanonic;
12786: else
12787: xmlFree(systemIdCanonic);
12788: input->line = 1;
12789: input->col = 1;
12790: input->base = ctxt->input->cur;
12791: input->cur = ctxt->input->cur;
12792: input->free = NULL;
12793:
12794: /*
12795: * let's parse that entity knowing it's an external subset.
12796: */
12797: ctxt->inSubset = 2;
12798: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12799: if (ctxt->myDoc == NULL) {
12800: xmlErrMemory(ctxt, "New Doc failed");
12801: if (sax != NULL) ctxt->sax = NULL;
12802: xmlFreeParserCtxt(ctxt);
12803: return(NULL);
12804: }
12805: ctxt->myDoc->properties = XML_DOC_INTERNAL;
12806: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12807: ExternalID, SystemID);
12808: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12809:
12810: if (ctxt->myDoc != NULL) {
12811: if (ctxt->wellFormed) {
12812: ret = ctxt->myDoc->extSubset;
12813: ctxt->myDoc->extSubset = NULL;
12814: if (ret != NULL) {
12815: xmlNodePtr tmp;
12816:
12817: ret->doc = NULL;
12818: tmp = ret->children;
12819: while (tmp != NULL) {
12820: tmp->doc = NULL;
12821: tmp = tmp->next;
12822: }
12823: }
12824: } else {
12825: ret = NULL;
12826: }
12827: xmlFreeDoc(ctxt->myDoc);
12828: ctxt->myDoc = NULL;
12829: }
12830: if (sax != NULL) ctxt->sax = NULL;
12831: xmlFreeParserCtxt(ctxt);
12832:
12833: return(ret);
12834: }
12835:
12836:
12837: /**
12838: * xmlParseDTD:
12839: * @ExternalID: a NAME* containing the External ID of the DTD
12840: * @SystemID: a NAME* containing the URL to the DTD
12841: *
12842: * Load and parse an external subset.
12843: *
12844: * Returns the resulting xmlDtdPtr or NULL in case of error.
12845: */
12846:
12847: xmlDtdPtr
12848: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12849: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12850: }
12851: #endif /* LIBXML_VALID_ENABLED */
12852:
12853: /************************************************************************
12854: * *
1.1.1.3 misho 12855: * Front ends when parsing an Entity *
1.1 misho 12856: * *
12857: ************************************************************************/
12858:
12859: /**
12860: * xmlParseCtxtExternalEntity:
12861: * @ctx: the existing parsing context
12862: * @URL: the URL for the entity to load
12863: * @ID: the System ID for the entity to load
12864: * @lst: the return value for the set of parsed nodes
12865: *
12866: * Parse an external general entity within an existing parsing context
12867: * An external general parsed entity is well-formed if it matches the
12868: * production labeled extParsedEnt.
12869: *
12870: * [78] extParsedEnt ::= TextDecl? content
12871: *
12872: * Returns 0 if the entity is well formed, -1 in case of args problem and
12873: * the parser error code otherwise
12874: */
12875:
12876: int
12877: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12878: const xmlChar *ID, xmlNodePtr *lst) {
12879: xmlParserCtxtPtr ctxt;
12880: xmlDocPtr newDoc;
12881: xmlNodePtr newRoot;
12882: xmlSAXHandlerPtr oldsax = NULL;
12883: int ret = 0;
12884: xmlChar start[4];
12885: xmlCharEncoding enc;
12886:
12887: if (ctx == NULL) return(-1);
12888:
12889: if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12890: (ctx->depth > 1024)) {
12891: return(XML_ERR_ENTITY_LOOP);
12892: }
12893:
12894: if (lst != NULL)
12895: *lst = NULL;
12896: if ((URL == NULL) && (ID == NULL))
12897: return(-1);
12898: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12899: return(-1);
12900:
12901: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12902: if (ctxt == NULL) {
12903: return(-1);
12904: }
12905:
12906: oldsax = ctxt->sax;
12907: ctxt->sax = ctx->sax;
12908: xmlDetectSAX2(ctxt);
12909: newDoc = xmlNewDoc(BAD_CAST "1.0");
12910: if (newDoc == NULL) {
12911: xmlFreeParserCtxt(ctxt);
12912: return(-1);
12913: }
12914: newDoc->properties = XML_DOC_INTERNAL;
12915: if (ctx->myDoc->dict) {
12916: newDoc->dict = ctx->myDoc->dict;
12917: xmlDictReference(newDoc->dict);
12918: }
12919: if (ctx->myDoc != NULL) {
12920: newDoc->intSubset = ctx->myDoc->intSubset;
12921: newDoc->extSubset = ctx->myDoc->extSubset;
12922: }
12923: if (ctx->myDoc->URL != NULL) {
12924: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12925: }
12926: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12927: if (newRoot == NULL) {
12928: ctxt->sax = oldsax;
12929: xmlFreeParserCtxt(ctxt);
12930: newDoc->intSubset = NULL;
12931: newDoc->extSubset = NULL;
12932: xmlFreeDoc(newDoc);
12933: return(-1);
12934: }
12935: xmlAddChild((xmlNodePtr) newDoc, newRoot);
12936: nodePush(ctxt, newDoc->children);
12937: if (ctx->myDoc == NULL) {
12938: ctxt->myDoc = newDoc;
12939: } else {
12940: ctxt->myDoc = ctx->myDoc;
12941: newDoc->children->doc = ctx->myDoc;
12942: }
12943:
12944: /*
12945: * Get the 4 first bytes and decode the charset
12946: * if enc != XML_CHAR_ENCODING_NONE
12947: * plug some encoding conversion routines.
12948: */
12949: GROW
12950: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12951: start[0] = RAW;
12952: start[1] = NXT(1);
12953: start[2] = NXT(2);
12954: start[3] = NXT(3);
12955: enc = xmlDetectCharEncoding(start, 4);
12956: if (enc != XML_CHAR_ENCODING_NONE) {
12957: xmlSwitchEncoding(ctxt, enc);
12958: }
12959: }
12960:
12961: /*
12962: * Parse a possible text declaration first
12963: */
12964: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12965: xmlParseTextDecl(ctxt);
12966: /*
12967: * An XML-1.0 document can't reference an entity not XML-1.0
12968: */
12969: if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12970: (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
1.1.1.3 misho 12971: xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
1.1 misho 12972: "Version mismatch between document and entity\n");
12973: }
12974: }
12975:
12976: /*
1.1.1.2 misho 12977: * If the user provided its own SAX callbacks then reuse the
12978: * useData callback field, otherwise the expected setup in a
12979: * DOM builder is to have userData == ctxt
12980: */
12981: if (ctx->userData == ctx)
12982: ctxt->userData = ctxt;
12983: else
12984: ctxt->userData = ctx->userData;
12985:
12986: /*
1.1 misho 12987: * Doing validity checking on chunk doesn't make sense
12988: */
12989: ctxt->instate = XML_PARSER_CONTENT;
12990: ctxt->validate = ctx->validate;
12991: ctxt->valid = ctx->valid;
12992: ctxt->loadsubset = ctx->loadsubset;
12993: ctxt->depth = ctx->depth + 1;
12994: ctxt->replaceEntities = ctx->replaceEntities;
12995: if (ctxt->validate) {
12996: ctxt->vctxt.error = ctx->vctxt.error;
12997: ctxt->vctxt.warning = ctx->vctxt.warning;
12998: } else {
12999: ctxt->vctxt.error = NULL;
13000: ctxt->vctxt.warning = NULL;
13001: }
13002: ctxt->vctxt.nodeTab = NULL;
13003: ctxt->vctxt.nodeNr = 0;
13004: ctxt->vctxt.nodeMax = 0;
13005: ctxt->vctxt.node = NULL;
13006: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13007: ctxt->dict = ctx->dict;
13008: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13009: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13010: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13011: ctxt->dictNames = ctx->dictNames;
13012: ctxt->attsDefault = ctx->attsDefault;
13013: ctxt->attsSpecial = ctx->attsSpecial;
13014: ctxt->linenumbers = ctx->linenumbers;
13015:
13016: xmlParseContent(ctxt);
13017:
13018: ctx->validate = ctxt->validate;
13019: ctx->valid = ctxt->valid;
13020: if ((RAW == '<') && (NXT(1) == '/')) {
13021: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13022: } else if (RAW != 0) {
13023: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13024: }
13025: if (ctxt->node != newDoc->children) {
13026: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13027: }
13028:
13029: if (!ctxt->wellFormed) {
13030: if (ctxt->errNo == 0)
13031: ret = 1;
13032: else
13033: ret = ctxt->errNo;
13034: } else {
13035: if (lst != NULL) {
13036: xmlNodePtr cur;
13037:
13038: /*
13039: * Return the newly created nodeset after unlinking it from
13040: * they pseudo parent.
13041: */
13042: cur = newDoc->children->children;
13043: *lst = cur;
13044: while (cur != NULL) {
13045: cur->parent = NULL;
13046: cur = cur->next;
13047: }
13048: newDoc->children->children = NULL;
13049: }
13050: ret = 0;
13051: }
13052: ctxt->sax = oldsax;
13053: ctxt->dict = NULL;
13054: ctxt->attsDefault = NULL;
13055: ctxt->attsSpecial = NULL;
13056: xmlFreeParserCtxt(ctxt);
13057: newDoc->intSubset = NULL;
13058: newDoc->extSubset = NULL;
13059: xmlFreeDoc(newDoc);
13060:
13061: return(ret);
13062: }
13063:
13064: /**
13065: * xmlParseExternalEntityPrivate:
13066: * @doc: the document the chunk pertains to
13067: * @oldctxt: the previous parser context if available
13068: * @sax: the SAX handler bloc (possibly NULL)
13069: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13070: * @depth: Used for loop detection, use 0
13071: * @URL: the URL for the entity to load
13072: * @ID: the System ID for the entity to load
13073: * @list: the return value for the set of parsed nodes
13074: *
13075: * Private version of xmlParseExternalEntity()
13076: *
13077: * Returns 0 if the entity is well formed, -1 in case of args problem and
13078: * the parser error code otherwise
13079: */
13080:
13081: static xmlParserErrors
13082: xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13083: xmlSAXHandlerPtr sax,
13084: void *user_data, int depth, const xmlChar *URL,
13085: const xmlChar *ID, xmlNodePtr *list) {
13086: xmlParserCtxtPtr ctxt;
13087: xmlDocPtr newDoc;
13088: xmlNodePtr newRoot;
13089: xmlSAXHandlerPtr oldsax = NULL;
13090: xmlParserErrors ret = XML_ERR_OK;
13091: xmlChar start[4];
13092: xmlCharEncoding enc;
13093:
13094: if (((depth > 40) &&
13095: ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13096: (depth > 1024)) {
13097: return(XML_ERR_ENTITY_LOOP);
13098: }
13099:
13100: if (list != NULL)
13101: *list = NULL;
13102: if ((URL == NULL) && (ID == NULL))
13103: return(XML_ERR_INTERNAL_ERROR);
13104: if (doc == NULL)
13105: return(XML_ERR_INTERNAL_ERROR);
13106:
13107:
13108: ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13109: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13110: ctxt->userData = ctxt;
13111: if (oldctxt != NULL) {
13112: ctxt->_private = oldctxt->_private;
13113: ctxt->loadsubset = oldctxt->loadsubset;
13114: ctxt->validate = oldctxt->validate;
13115: ctxt->external = oldctxt->external;
13116: ctxt->record_info = oldctxt->record_info;
13117: ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13118: ctxt->node_seq.length = oldctxt->node_seq.length;
13119: ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13120: } else {
13121: /*
13122: * Doing validity checking on chunk without context
13123: * doesn't make sense
13124: */
13125: ctxt->_private = NULL;
13126: ctxt->validate = 0;
13127: ctxt->external = 2;
13128: ctxt->loadsubset = 0;
13129: }
13130: if (sax != NULL) {
13131: oldsax = ctxt->sax;
13132: ctxt->sax = sax;
13133: if (user_data != NULL)
13134: ctxt->userData = user_data;
13135: }
13136: xmlDetectSAX2(ctxt);
13137: newDoc = xmlNewDoc(BAD_CAST "1.0");
13138: if (newDoc == NULL) {
13139: ctxt->node_seq.maximum = 0;
13140: ctxt->node_seq.length = 0;
13141: ctxt->node_seq.buffer = NULL;
13142: xmlFreeParserCtxt(ctxt);
13143: return(XML_ERR_INTERNAL_ERROR);
13144: }
13145: newDoc->properties = XML_DOC_INTERNAL;
13146: newDoc->intSubset = doc->intSubset;
13147: newDoc->extSubset = doc->extSubset;
13148: newDoc->dict = doc->dict;
13149: xmlDictReference(newDoc->dict);
13150:
13151: if (doc->URL != NULL) {
13152: newDoc->URL = xmlStrdup(doc->URL);
13153: }
13154: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13155: if (newRoot == NULL) {
13156: if (sax != NULL)
13157: ctxt->sax = oldsax;
13158: ctxt->node_seq.maximum = 0;
13159: ctxt->node_seq.length = 0;
13160: ctxt->node_seq.buffer = NULL;
13161: xmlFreeParserCtxt(ctxt);
13162: newDoc->intSubset = NULL;
13163: newDoc->extSubset = NULL;
13164: xmlFreeDoc(newDoc);
13165: return(XML_ERR_INTERNAL_ERROR);
13166: }
13167: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13168: nodePush(ctxt, newDoc->children);
13169: ctxt->myDoc = doc;
13170: newRoot->doc = doc;
13171:
13172: /*
13173: * Get the 4 first bytes and decode the charset
13174: * if enc != XML_CHAR_ENCODING_NONE
13175: * plug some encoding conversion routines.
13176: */
13177: GROW;
13178: if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13179: start[0] = RAW;
13180: start[1] = NXT(1);
13181: start[2] = NXT(2);
13182: start[3] = NXT(3);
13183: enc = xmlDetectCharEncoding(start, 4);
13184: if (enc != XML_CHAR_ENCODING_NONE) {
13185: xmlSwitchEncoding(ctxt, enc);
13186: }
13187: }
13188:
13189: /*
13190: * Parse a possible text declaration first
13191: */
13192: if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13193: xmlParseTextDecl(ctxt);
13194: }
13195:
13196: ctxt->instate = XML_PARSER_CONTENT;
13197: ctxt->depth = depth;
13198:
13199: xmlParseContent(ctxt);
13200:
13201: if ((RAW == '<') && (NXT(1) == '/')) {
13202: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13203: } else if (RAW != 0) {
13204: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13205: }
13206: if (ctxt->node != newDoc->children) {
13207: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13208: }
13209:
13210: if (!ctxt->wellFormed) {
13211: if (ctxt->errNo == 0)
13212: ret = XML_ERR_INTERNAL_ERROR;
13213: else
13214: ret = (xmlParserErrors)ctxt->errNo;
13215: } else {
13216: if (list != NULL) {
13217: xmlNodePtr cur;
13218:
13219: /*
13220: * Return the newly created nodeset after unlinking it from
13221: * they pseudo parent.
13222: */
13223: cur = newDoc->children->children;
13224: *list = cur;
13225: while (cur != NULL) {
13226: cur->parent = NULL;
13227: cur = cur->next;
13228: }
13229: newDoc->children->children = NULL;
13230: }
13231: ret = XML_ERR_OK;
13232: }
13233:
13234: /*
13235: * Record in the parent context the number of entities replacement
13236: * done when parsing that reference.
13237: */
13238: if (oldctxt != NULL)
13239: oldctxt->nbentities += ctxt->nbentities;
13240:
13241: /*
13242: * Also record the size of the entity parsed
13243: */
13244: if (ctxt->input != NULL) {
13245: oldctxt->sizeentities += ctxt->input->consumed;
13246: oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13247: }
13248: /*
13249: * And record the last error if any
13250: */
13251: if (ctxt->lastError.code != XML_ERR_OK)
13252: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13253:
1.1.1.3 misho 13254: if (sax != NULL)
1.1 misho 13255: ctxt->sax = oldsax;
13256: oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13257: oldctxt->node_seq.length = ctxt->node_seq.length;
13258: oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13259: ctxt->node_seq.maximum = 0;
13260: ctxt->node_seq.length = 0;
13261: ctxt->node_seq.buffer = NULL;
13262: xmlFreeParserCtxt(ctxt);
13263: newDoc->intSubset = NULL;
13264: newDoc->extSubset = NULL;
13265: xmlFreeDoc(newDoc);
13266:
13267: return(ret);
13268: }
13269:
13270: #ifdef LIBXML_SAX1_ENABLED
13271: /**
13272: * xmlParseExternalEntity:
13273: * @doc: the document the chunk pertains to
13274: * @sax: the SAX handler bloc (possibly NULL)
13275: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13276: * @depth: Used for loop detection, use 0
13277: * @URL: the URL for the entity to load
13278: * @ID: the System ID for the entity to load
13279: * @lst: the return value for the set of parsed nodes
13280: *
13281: * Parse an external general entity
13282: * An external general parsed entity is well-formed if it matches the
13283: * production labeled extParsedEnt.
13284: *
13285: * [78] extParsedEnt ::= TextDecl? content
13286: *
13287: * Returns 0 if the entity is well formed, -1 in case of args problem and
13288: * the parser error code otherwise
13289: */
13290:
13291: int
13292: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13293: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13294: return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13295: ID, lst));
13296: }
13297:
13298: /**
13299: * xmlParseBalancedChunkMemory:
13300: * @doc: the document the chunk pertains to
13301: * @sax: the SAX handler bloc (possibly NULL)
13302: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13303: * @depth: Used for loop detection, use 0
13304: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13305: * @lst: the return value for the set of parsed nodes
13306: *
13307: * Parse a well-balanced chunk of an XML document
13308: * called by the parser
13309: * The allowed sequence for the Well Balanced Chunk is the one defined by
13310: * the content production in the XML grammar:
13311: *
13312: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13313: *
13314: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13315: * the parser error code otherwise
13316: */
13317:
13318: int
13319: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13320: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13321: return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13322: depth, string, lst, 0 );
13323: }
13324: #endif /* LIBXML_SAX1_ENABLED */
13325:
13326: /**
13327: * xmlParseBalancedChunkMemoryInternal:
13328: * @oldctxt: the existing parsing context
13329: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13330: * @user_data: the user data field for the parser context
13331: * @lst: the return value for the set of parsed nodes
13332: *
13333: *
13334: * Parse a well-balanced chunk of an XML document
13335: * called by the parser
13336: * The allowed sequence for the Well Balanced Chunk is the one defined by
13337: * the content production in the XML grammar:
13338: *
13339: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13340: *
13341: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13342: * error code otherwise
13343: *
13344: * In case recover is set to 1, the nodelist will not be empty even if
13345: * the parsed chunk is not well balanced.
13346: */
13347: static xmlParserErrors
13348: xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13349: const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13350: xmlParserCtxtPtr ctxt;
13351: xmlDocPtr newDoc = NULL;
13352: xmlNodePtr newRoot;
13353: xmlSAXHandlerPtr oldsax = NULL;
13354: xmlNodePtr content = NULL;
13355: xmlNodePtr last = NULL;
13356: int size;
13357: xmlParserErrors ret = XML_ERR_OK;
13358: #ifdef SAX2
13359: int i;
13360: #endif
13361:
13362: if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13363: (oldctxt->depth > 1024)) {
13364: return(XML_ERR_ENTITY_LOOP);
13365: }
13366:
13367:
13368: if (lst != NULL)
13369: *lst = NULL;
13370: if (string == NULL)
13371: return(XML_ERR_INTERNAL_ERROR);
13372:
13373: size = xmlStrlen(string);
13374:
13375: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13376: if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13377: if (user_data != NULL)
13378: ctxt->userData = user_data;
13379: else
13380: ctxt->userData = ctxt;
13381: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13382: ctxt->dict = oldctxt->dict;
13383: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13384: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13385: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13386:
13387: #ifdef SAX2
13388: /* propagate namespaces down the entity */
13389: for (i = 0;i < oldctxt->nsNr;i += 2) {
13390: nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13391: }
13392: #endif
13393:
13394: oldsax = ctxt->sax;
13395: ctxt->sax = oldctxt->sax;
13396: xmlDetectSAX2(ctxt);
13397: ctxt->replaceEntities = oldctxt->replaceEntities;
13398: ctxt->options = oldctxt->options;
13399:
13400: ctxt->_private = oldctxt->_private;
13401: if (oldctxt->myDoc == NULL) {
13402: newDoc = xmlNewDoc(BAD_CAST "1.0");
13403: if (newDoc == NULL) {
13404: ctxt->sax = oldsax;
13405: ctxt->dict = NULL;
13406: xmlFreeParserCtxt(ctxt);
13407: return(XML_ERR_INTERNAL_ERROR);
13408: }
13409: newDoc->properties = XML_DOC_INTERNAL;
13410: newDoc->dict = ctxt->dict;
13411: xmlDictReference(newDoc->dict);
13412: ctxt->myDoc = newDoc;
13413: } else {
13414: ctxt->myDoc = oldctxt->myDoc;
13415: content = ctxt->myDoc->children;
13416: last = ctxt->myDoc->last;
13417: }
13418: newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13419: if (newRoot == NULL) {
13420: ctxt->sax = oldsax;
13421: ctxt->dict = NULL;
13422: xmlFreeParserCtxt(ctxt);
13423: if (newDoc != NULL) {
13424: xmlFreeDoc(newDoc);
13425: }
13426: return(XML_ERR_INTERNAL_ERROR);
13427: }
13428: ctxt->myDoc->children = NULL;
13429: ctxt->myDoc->last = NULL;
13430: xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13431: nodePush(ctxt, ctxt->myDoc->children);
13432: ctxt->instate = XML_PARSER_CONTENT;
13433: ctxt->depth = oldctxt->depth + 1;
13434:
13435: ctxt->validate = 0;
13436: ctxt->loadsubset = oldctxt->loadsubset;
13437: if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13438: /*
13439: * ID/IDREF registration will be done in xmlValidateElement below
13440: */
13441: ctxt->loadsubset |= XML_SKIP_IDS;
13442: }
13443: ctxt->dictNames = oldctxt->dictNames;
13444: ctxt->attsDefault = oldctxt->attsDefault;
13445: ctxt->attsSpecial = oldctxt->attsSpecial;
13446:
13447: xmlParseContent(ctxt);
13448: if ((RAW == '<') && (NXT(1) == '/')) {
13449: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13450: } else if (RAW != 0) {
13451: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13452: }
13453: if (ctxt->node != ctxt->myDoc->children) {
13454: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13455: }
13456:
13457: if (!ctxt->wellFormed) {
13458: if (ctxt->errNo == 0)
13459: ret = XML_ERR_INTERNAL_ERROR;
13460: else
13461: ret = (xmlParserErrors)ctxt->errNo;
13462: } else {
13463: ret = XML_ERR_OK;
13464: }
13465:
13466: if ((lst != NULL) && (ret == XML_ERR_OK)) {
13467: xmlNodePtr cur;
13468:
13469: /*
13470: * Return the newly created nodeset after unlinking it from
13471: * they pseudo parent.
13472: */
13473: cur = ctxt->myDoc->children->children;
13474: *lst = cur;
13475: while (cur != NULL) {
13476: #ifdef LIBXML_VALID_ENABLED
13477: if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13478: (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13479: (cur->type == XML_ELEMENT_NODE)) {
13480: oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13481: oldctxt->myDoc, cur);
13482: }
13483: #endif /* LIBXML_VALID_ENABLED */
13484: cur->parent = NULL;
13485: cur = cur->next;
13486: }
13487: ctxt->myDoc->children->children = NULL;
13488: }
13489: if (ctxt->myDoc != NULL) {
13490: xmlFreeNode(ctxt->myDoc->children);
13491: ctxt->myDoc->children = content;
13492: ctxt->myDoc->last = last;
13493: }
13494:
13495: /*
13496: * Record in the parent context the number of entities replacement
13497: * done when parsing that reference.
13498: */
13499: if (oldctxt != NULL)
13500: oldctxt->nbentities += ctxt->nbentities;
13501:
13502: /*
13503: * Also record the last error if any
13504: */
13505: if (ctxt->lastError.code != XML_ERR_OK)
13506: xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13507:
13508: ctxt->sax = oldsax;
13509: ctxt->dict = NULL;
13510: ctxt->attsDefault = NULL;
13511: ctxt->attsSpecial = NULL;
13512: xmlFreeParserCtxt(ctxt);
13513: if (newDoc != NULL) {
13514: xmlFreeDoc(newDoc);
13515: }
13516:
13517: return(ret);
13518: }
13519:
13520: /**
13521: * xmlParseInNodeContext:
13522: * @node: the context node
13523: * @data: the input string
13524: * @datalen: the input string length in bytes
13525: * @options: a combination of xmlParserOption
13526: * @lst: the return value for the set of parsed nodes
13527: *
13528: * Parse a well-balanced chunk of an XML document
13529: * within the context (DTD, namespaces, etc ...) of the given node.
13530: *
13531: * The allowed sequence for the data is a Well Balanced Chunk defined by
13532: * the content production in the XML grammar:
13533: *
13534: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13535: *
13536: * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13537: * error code otherwise
13538: */
13539: xmlParserErrors
13540: xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13541: int options, xmlNodePtr *lst) {
13542: #ifdef SAX2
13543: xmlParserCtxtPtr ctxt;
13544: xmlDocPtr doc = NULL;
13545: xmlNodePtr fake, cur;
13546: int nsnr = 0;
13547:
13548: xmlParserErrors ret = XML_ERR_OK;
13549:
13550: /*
13551: * check all input parameters, grab the document
13552: */
13553: if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13554: return(XML_ERR_INTERNAL_ERROR);
13555: switch (node->type) {
13556: case XML_ELEMENT_NODE:
13557: case XML_ATTRIBUTE_NODE:
13558: case XML_TEXT_NODE:
13559: case XML_CDATA_SECTION_NODE:
13560: case XML_ENTITY_REF_NODE:
13561: case XML_PI_NODE:
13562: case XML_COMMENT_NODE:
13563: case XML_DOCUMENT_NODE:
13564: case XML_HTML_DOCUMENT_NODE:
13565: break;
13566: default:
13567: return(XML_ERR_INTERNAL_ERROR);
13568:
13569: }
13570: while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13571: (node->type != XML_DOCUMENT_NODE) &&
13572: (node->type != XML_HTML_DOCUMENT_NODE))
13573: node = node->parent;
13574: if (node == NULL)
13575: return(XML_ERR_INTERNAL_ERROR);
13576: if (node->type == XML_ELEMENT_NODE)
13577: doc = node->doc;
13578: else
13579: doc = (xmlDocPtr) node;
13580: if (doc == NULL)
13581: return(XML_ERR_INTERNAL_ERROR);
13582:
13583: /*
13584: * allocate a context and set-up everything not related to the
13585: * node position in the tree
13586: */
13587: if (doc->type == XML_DOCUMENT_NODE)
13588: ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13589: #ifdef LIBXML_HTML_ENABLED
13590: else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13591: ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13592: /*
13593: * When parsing in context, it makes no sense to add implied
13594: * elements like html/body/etc...
13595: */
13596: options |= HTML_PARSE_NOIMPLIED;
13597: }
13598: #endif
13599: else
13600: return(XML_ERR_INTERNAL_ERROR);
13601:
13602: if (ctxt == NULL)
13603: return(XML_ERR_NO_MEMORY);
13604:
13605: /*
13606: * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13607: * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13608: * we must wait until the last moment to free the original one.
13609: */
13610: if (doc->dict != NULL) {
13611: if (ctxt->dict != NULL)
13612: xmlDictFree(ctxt->dict);
13613: ctxt->dict = doc->dict;
13614: } else
13615: options |= XML_PARSE_NODICT;
13616:
13617: if (doc->encoding != NULL) {
13618: xmlCharEncodingHandlerPtr hdlr;
13619:
13620: if (ctxt->encoding != NULL)
13621: xmlFree((xmlChar *) ctxt->encoding);
13622: ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13623:
13624: hdlr = xmlFindCharEncodingHandler(doc->encoding);
13625: if (hdlr != NULL) {
13626: xmlSwitchToEncoding(ctxt, hdlr);
13627: } else {
13628: return(XML_ERR_UNSUPPORTED_ENCODING);
13629: }
13630: }
13631:
13632: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13633: xmlDetectSAX2(ctxt);
13634: ctxt->myDoc = doc;
13635:
13636: fake = xmlNewComment(NULL);
13637: if (fake == NULL) {
13638: xmlFreeParserCtxt(ctxt);
13639: return(XML_ERR_NO_MEMORY);
13640: }
13641: xmlAddChild(node, fake);
13642:
13643: if (node->type == XML_ELEMENT_NODE) {
13644: nodePush(ctxt, node);
13645: /*
13646: * initialize the SAX2 namespaces stack
13647: */
13648: cur = node;
13649: while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13650: xmlNsPtr ns = cur->nsDef;
13651: const xmlChar *iprefix, *ihref;
13652:
13653: while (ns != NULL) {
13654: if (ctxt->dict) {
13655: iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13656: ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13657: } else {
13658: iprefix = ns->prefix;
13659: ihref = ns->href;
13660: }
13661:
13662: if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13663: nsPush(ctxt, iprefix, ihref);
13664: nsnr++;
13665: }
13666: ns = ns->next;
13667: }
13668: cur = cur->parent;
13669: }
13670: ctxt->instate = XML_PARSER_CONTENT;
13671: }
13672:
13673: if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13674: /*
13675: * ID/IDREF registration will be done in xmlValidateElement below
13676: */
13677: ctxt->loadsubset |= XML_SKIP_IDS;
13678: }
13679:
13680: #ifdef LIBXML_HTML_ENABLED
13681: if (doc->type == XML_HTML_DOCUMENT_NODE)
13682: __htmlParseContent(ctxt);
13683: else
13684: #endif
13685: xmlParseContent(ctxt);
13686:
13687: nsPop(ctxt, nsnr);
13688: if ((RAW == '<') && (NXT(1) == '/')) {
13689: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13690: } else if (RAW != 0) {
13691: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13692: }
13693: if ((ctxt->node != NULL) && (ctxt->node != node)) {
13694: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13695: ctxt->wellFormed = 0;
13696: }
13697:
13698: if (!ctxt->wellFormed) {
13699: if (ctxt->errNo == 0)
13700: ret = XML_ERR_INTERNAL_ERROR;
13701: else
13702: ret = (xmlParserErrors)ctxt->errNo;
13703: } else {
13704: ret = XML_ERR_OK;
13705: }
13706:
13707: /*
13708: * Return the newly created nodeset after unlinking it from
13709: * the pseudo sibling.
13710: */
13711:
13712: cur = fake->next;
13713: fake->next = NULL;
13714: node->last = fake;
13715:
13716: if (cur != NULL) {
13717: cur->prev = NULL;
13718: }
13719:
13720: *lst = cur;
13721:
13722: while (cur != NULL) {
13723: cur->parent = NULL;
13724: cur = cur->next;
13725: }
13726:
13727: xmlUnlinkNode(fake);
13728: xmlFreeNode(fake);
13729:
13730:
13731: if (ret != XML_ERR_OK) {
13732: xmlFreeNodeList(*lst);
13733: *lst = NULL;
13734: }
13735:
13736: if (doc->dict != NULL)
13737: ctxt->dict = NULL;
13738: xmlFreeParserCtxt(ctxt);
13739:
13740: return(ret);
13741: #else /* !SAX2 */
13742: return(XML_ERR_INTERNAL_ERROR);
13743: #endif
13744: }
13745:
13746: #ifdef LIBXML_SAX1_ENABLED
13747: /**
13748: * xmlParseBalancedChunkMemoryRecover:
13749: * @doc: the document the chunk pertains to
13750: * @sax: the SAX handler bloc (possibly NULL)
13751: * @user_data: The user data returned on SAX callbacks (possibly NULL)
13752: * @depth: Used for loop detection, use 0
13753: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13754: * @lst: the return value for the set of parsed nodes
13755: * @recover: return nodes even if the data is broken (use 0)
13756: *
13757: *
13758: * Parse a well-balanced chunk of an XML document
13759: * called by the parser
13760: * The allowed sequence for the Well Balanced Chunk is the one defined by
13761: * the content production in the XML grammar:
13762: *
13763: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13764: *
13765: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13766: * the parser error code otherwise
13767: *
13768: * In case recover is set to 1, the nodelist will not be empty even if
13769: * the parsed chunk is not well balanced, assuming the parsing succeeded to
13770: * some extent.
13771: */
13772: int
13773: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13774: void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13775: int recover) {
13776: xmlParserCtxtPtr ctxt;
13777: xmlDocPtr newDoc;
13778: xmlSAXHandlerPtr oldsax = NULL;
13779: xmlNodePtr content, newRoot;
13780: int size;
13781: int ret = 0;
13782:
13783: if (depth > 40) {
13784: return(XML_ERR_ENTITY_LOOP);
13785: }
13786:
13787:
13788: if (lst != NULL)
13789: *lst = NULL;
13790: if (string == NULL)
13791: return(-1);
13792:
13793: size = xmlStrlen(string);
13794:
13795: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13796: if (ctxt == NULL) return(-1);
13797: ctxt->userData = ctxt;
13798: if (sax != NULL) {
13799: oldsax = ctxt->sax;
13800: ctxt->sax = sax;
13801: if (user_data != NULL)
13802: ctxt->userData = user_data;
13803: }
13804: newDoc = xmlNewDoc(BAD_CAST "1.0");
13805: if (newDoc == NULL) {
13806: xmlFreeParserCtxt(ctxt);
13807: return(-1);
13808: }
13809: newDoc->properties = XML_DOC_INTERNAL;
13810: if ((doc != NULL) && (doc->dict != NULL)) {
13811: xmlDictFree(ctxt->dict);
13812: ctxt->dict = doc->dict;
13813: xmlDictReference(ctxt->dict);
13814: ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13815: ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13816: ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13817: ctxt->dictNames = 1;
13818: } else {
13819: xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13820: }
13821: if (doc != NULL) {
13822: newDoc->intSubset = doc->intSubset;
13823: newDoc->extSubset = doc->extSubset;
13824: }
13825: newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13826: if (newRoot == NULL) {
13827: if (sax != NULL)
13828: ctxt->sax = oldsax;
13829: xmlFreeParserCtxt(ctxt);
13830: newDoc->intSubset = NULL;
13831: newDoc->extSubset = NULL;
13832: xmlFreeDoc(newDoc);
13833: return(-1);
13834: }
13835: xmlAddChild((xmlNodePtr) newDoc, newRoot);
13836: nodePush(ctxt, newRoot);
13837: if (doc == NULL) {
13838: ctxt->myDoc = newDoc;
13839: } else {
13840: ctxt->myDoc = newDoc;
13841: newDoc->children->doc = doc;
13842: /* Ensure that doc has XML spec namespace */
13843: xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13844: newDoc->oldNs = doc->oldNs;
13845: }
13846: ctxt->instate = XML_PARSER_CONTENT;
13847: ctxt->depth = depth;
13848:
13849: /*
13850: * Doing validity checking on chunk doesn't make sense
13851: */
13852: ctxt->validate = 0;
13853: ctxt->loadsubset = 0;
13854: xmlDetectSAX2(ctxt);
13855:
13856: if ( doc != NULL ){
13857: content = doc->children;
13858: doc->children = NULL;
13859: xmlParseContent(ctxt);
13860: doc->children = content;
13861: }
13862: else {
13863: xmlParseContent(ctxt);
13864: }
13865: if ((RAW == '<') && (NXT(1) == '/')) {
13866: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13867: } else if (RAW != 0) {
13868: xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13869: }
13870: if (ctxt->node != newDoc->children) {
13871: xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13872: }
13873:
13874: if (!ctxt->wellFormed) {
13875: if (ctxt->errNo == 0)
13876: ret = 1;
13877: else
13878: ret = ctxt->errNo;
13879: } else {
13880: ret = 0;
13881: }
13882:
13883: if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13884: xmlNodePtr cur;
13885:
13886: /*
13887: * Return the newly created nodeset after unlinking it from
13888: * they pseudo parent.
13889: */
13890: cur = newDoc->children->children;
13891: *lst = cur;
13892: while (cur != NULL) {
13893: xmlSetTreeDoc(cur, doc);
13894: cur->parent = NULL;
13895: cur = cur->next;
13896: }
13897: newDoc->children->children = NULL;
13898: }
13899:
13900: if (sax != NULL)
13901: ctxt->sax = oldsax;
13902: xmlFreeParserCtxt(ctxt);
13903: newDoc->intSubset = NULL;
13904: newDoc->extSubset = NULL;
13905: newDoc->oldNs = NULL;
13906: xmlFreeDoc(newDoc);
13907:
13908: return(ret);
13909: }
13910:
13911: /**
13912: * xmlSAXParseEntity:
13913: * @sax: the SAX handler block
13914: * @filename: the filename
13915: *
13916: * parse an XML external entity out of context and build a tree.
13917: * It use the given SAX function block to handle the parsing callback.
13918: * If sax is NULL, fallback to the default DOM tree building routines.
13919: *
13920: * [78] extParsedEnt ::= TextDecl? content
13921: *
13922: * This correspond to a "Well Balanced" chunk
13923: *
13924: * Returns the resulting document tree
13925: */
13926:
13927: xmlDocPtr
13928: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13929: xmlDocPtr ret;
13930: xmlParserCtxtPtr ctxt;
13931:
13932: ctxt = xmlCreateFileParserCtxt(filename);
13933: if (ctxt == NULL) {
13934: return(NULL);
13935: }
13936: if (sax != NULL) {
13937: if (ctxt->sax != NULL)
13938: xmlFree(ctxt->sax);
13939: ctxt->sax = sax;
13940: ctxt->userData = NULL;
13941: }
13942:
13943: xmlParseExtParsedEnt(ctxt);
13944:
13945: if (ctxt->wellFormed)
13946: ret = ctxt->myDoc;
13947: else {
13948: ret = NULL;
13949: xmlFreeDoc(ctxt->myDoc);
13950: ctxt->myDoc = NULL;
13951: }
13952: if (sax != NULL)
13953: ctxt->sax = NULL;
13954: xmlFreeParserCtxt(ctxt);
13955:
13956: return(ret);
13957: }
13958:
13959: /**
13960: * xmlParseEntity:
13961: * @filename: the filename
13962: *
13963: * parse an XML external entity out of context and build a tree.
13964: *
13965: * [78] extParsedEnt ::= TextDecl? content
13966: *
13967: * This correspond to a "Well Balanced" chunk
13968: *
13969: * Returns the resulting document tree
13970: */
13971:
13972: xmlDocPtr
13973: xmlParseEntity(const char *filename) {
13974: return(xmlSAXParseEntity(NULL, filename));
13975: }
13976: #endif /* LIBXML_SAX1_ENABLED */
13977:
13978: /**
13979: * xmlCreateEntityParserCtxtInternal:
13980: * @URL: the entity URL
13981: * @ID: the entity PUBLIC ID
13982: * @base: a possible base for the target URI
13983: * @pctx: parser context used to set options on new context
13984: *
13985: * Create a parser context for an external entity
13986: * Automatic support for ZLIB/Compress compressed document is provided
13987: * by default if found at compile-time.
13988: *
13989: * Returns the new parser context or NULL
13990: */
13991: static xmlParserCtxtPtr
13992: xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13993: const xmlChar *base, xmlParserCtxtPtr pctx) {
13994: xmlParserCtxtPtr ctxt;
13995: xmlParserInputPtr inputStream;
13996: char *directory = NULL;
13997: xmlChar *uri;
13998:
13999: ctxt = xmlNewParserCtxt();
14000: if (ctxt == NULL) {
14001: return(NULL);
14002: }
14003:
14004: if (pctx != NULL) {
14005: ctxt->options = pctx->options;
14006: ctxt->_private = pctx->_private;
14007: }
14008:
14009: uri = xmlBuildURI(URL, base);
14010:
14011: if (uri == NULL) {
14012: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14013: if (inputStream == NULL) {
14014: xmlFreeParserCtxt(ctxt);
14015: return(NULL);
14016: }
14017:
14018: inputPush(ctxt, inputStream);
14019:
14020: if ((ctxt->directory == NULL) && (directory == NULL))
14021: directory = xmlParserGetDirectory((char *)URL);
14022: if ((ctxt->directory == NULL) && (directory != NULL))
14023: ctxt->directory = directory;
14024: } else {
14025: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14026: if (inputStream == NULL) {
14027: xmlFree(uri);
14028: xmlFreeParserCtxt(ctxt);
14029: return(NULL);
14030: }
14031:
14032: inputPush(ctxt, inputStream);
14033:
14034: if ((ctxt->directory == NULL) && (directory == NULL))
14035: directory = xmlParserGetDirectory((char *)uri);
14036: if ((ctxt->directory == NULL) && (directory != NULL))
14037: ctxt->directory = directory;
14038: xmlFree(uri);
14039: }
14040: return(ctxt);
14041: }
14042:
14043: /**
14044: * xmlCreateEntityParserCtxt:
14045: * @URL: the entity URL
14046: * @ID: the entity PUBLIC ID
14047: * @base: a possible base for the target URI
14048: *
14049: * Create a parser context for an external entity
14050: * Automatic support for ZLIB/Compress compressed document is provided
14051: * by default if found at compile-time.
14052: *
14053: * Returns the new parser context or NULL
14054: */
14055: xmlParserCtxtPtr
14056: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14057: const xmlChar *base) {
14058: return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14059:
14060: }
14061:
14062: /************************************************************************
14063: * *
14064: * Front ends when parsing from a file *
14065: * *
14066: ************************************************************************/
14067:
14068: /**
14069: * xmlCreateURLParserCtxt:
14070: * @filename: the filename or URL
14071: * @options: a combination of xmlParserOption
14072: *
1.1.1.3 misho 14073: * Create a parser context for a file or URL content.
1.1 misho 14074: * Automatic support for ZLIB/Compress compressed document is provided
14075: * by default if found at compile-time and for file accesses
14076: *
14077: * Returns the new parser context or NULL
14078: */
14079: xmlParserCtxtPtr
14080: xmlCreateURLParserCtxt(const char *filename, int options)
14081: {
14082: xmlParserCtxtPtr ctxt;
14083: xmlParserInputPtr inputStream;
14084: char *directory = NULL;
14085:
14086: ctxt = xmlNewParserCtxt();
14087: if (ctxt == NULL) {
14088: xmlErrMemory(NULL, "cannot allocate parser context");
14089: return(NULL);
14090: }
14091:
14092: if (options)
14093: xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14094: ctxt->linenumbers = 1;
14095:
14096: inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14097: if (inputStream == NULL) {
14098: xmlFreeParserCtxt(ctxt);
14099: return(NULL);
14100: }
14101:
14102: inputPush(ctxt, inputStream);
14103: if ((ctxt->directory == NULL) && (directory == NULL))
14104: directory = xmlParserGetDirectory(filename);
14105: if ((ctxt->directory == NULL) && (directory != NULL))
14106: ctxt->directory = directory;
14107:
14108: return(ctxt);
14109: }
14110:
14111: /**
14112: * xmlCreateFileParserCtxt:
14113: * @filename: the filename
14114: *
1.1.1.3 misho 14115: * Create a parser context for a file content.
1.1 misho 14116: * Automatic support for ZLIB/Compress compressed document is provided
14117: * by default if found at compile-time.
14118: *
14119: * Returns the new parser context or NULL
14120: */
14121: xmlParserCtxtPtr
14122: xmlCreateFileParserCtxt(const char *filename)
14123: {
14124: return(xmlCreateURLParserCtxt(filename, 0));
14125: }
14126:
14127: #ifdef LIBXML_SAX1_ENABLED
14128: /**
14129: * xmlSAXParseFileWithData:
14130: * @sax: the SAX handler block
14131: * @filename: the filename
14132: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14133: * documents
14134: * @data: the userdata
14135: *
14136: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14137: * compressed document is provided by default if found at compile-time.
14138: * It use the given SAX function block to handle the parsing callback.
14139: * If sax is NULL, fallback to the default DOM tree building routines.
14140: *
14141: * User data (void *) is stored within the parser context in the
14142: * context's _private member, so it is available nearly everywhere in libxml
14143: *
14144: * Returns the resulting document tree
14145: */
14146:
14147: xmlDocPtr
14148: xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14149: int recovery, void *data) {
14150: xmlDocPtr ret;
14151: xmlParserCtxtPtr ctxt;
14152:
14153: xmlInitParser();
14154:
14155: ctxt = xmlCreateFileParserCtxt(filename);
14156: if (ctxt == NULL) {
14157: return(NULL);
14158: }
14159: if (sax != NULL) {
14160: if (ctxt->sax != NULL)
14161: xmlFree(ctxt->sax);
14162: ctxt->sax = sax;
14163: }
14164: xmlDetectSAX2(ctxt);
14165: if (data!=NULL) {
14166: ctxt->_private = data;
14167: }
14168:
14169: if (ctxt->directory == NULL)
14170: ctxt->directory = xmlParserGetDirectory(filename);
14171:
14172: ctxt->recovery = recovery;
14173:
14174: xmlParseDocument(ctxt);
14175:
14176: if ((ctxt->wellFormed) || recovery) {
14177: ret = ctxt->myDoc;
14178: if (ret != NULL) {
14179: if (ctxt->input->buf->compressed > 0)
14180: ret->compression = 9;
14181: else
14182: ret->compression = ctxt->input->buf->compressed;
14183: }
14184: }
14185: else {
14186: ret = NULL;
14187: xmlFreeDoc(ctxt->myDoc);
14188: ctxt->myDoc = NULL;
14189: }
14190: if (sax != NULL)
14191: ctxt->sax = NULL;
14192: xmlFreeParserCtxt(ctxt);
1.1.1.3 misho 14193:
1.1 misho 14194: return(ret);
14195: }
14196:
14197: /**
14198: * xmlSAXParseFile:
14199: * @sax: the SAX handler block
14200: * @filename: the filename
14201: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14202: * documents
14203: *
14204: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14205: * compressed document is provided by default if found at compile-time.
14206: * It use the given SAX function block to handle the parsing callback.
14207: * If sax is NULL, fallback to the default DOM tree building routines.
14208: *
14209: * Returns the resulting document tree
14210: */
14211:
14212: xmlDocPtr
14213: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14214: int recovery) {
14215: return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14216: }
14217:
14218: /**
14219: * xmlRecoverDoc:
14220: * @cur: a pointer to an array of xmlChar
14221: *
14222: * parse an XML in-memory document and build a tree.
14223: * In the case the document is not Well Formed, a attempt to build a
14224: * tree is tried anyway
14225: *
14226: * Returns the resulting document tree or NULL in case of failure
14227: */
14228:
14229: xmlDocPtr
14230: xmlRecoverDoc(const xmlChar *cur) {
14231: return(xmlSAXParseDoc(NULL, cur, 1));
14232: }
14233:
14234: /**
14235: * xmlParseFile:
14236: * @filename: the filename
14237: *
14238: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14239: * compressed document is provided by default if found at compile-time.
14240: *
14241: * Returns the resulting document tree if the file was wellformed,
14242: * NULL otherwise.
14243: */
14244:
14245: xmlDocPtr
14246: xmlParseFile(const char *filename) {
14247: return(xmlSAXParseFile(NULL, filename, 0));
14248: }
14249:
14250: /**
14251: * xmlRecoverFile:
14252: * @filename: the filename
14253: *
14254: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14255: * compressed document is provided by default if found at compile-time.
14256: * In the case the document is not Well Formed, it attempts to build
14257: * a tree anyway
14258: *
14259: * Returns the resulting document tree or NULL in case of failure
14260: */
14261:
14262: xmlDocPtr
14263: xmlRecoverFile(const char *filename) {
14264: return(xmlSAXParseFile(NULL, filename, 1));
14265: }
14266:
14267:
14268: /**
14269: * xmlSetupParserForBuffer:
14270: * @ctxt: an XML parser context
14271: * @buffer: a xmlChar * buffer
14272: * @filename: a file name
14273: *
14274: * Setup the parser context to parse a new buffer; Clears any prior
14275: * contents from the parser context. The buffer parameter must not be
14276: * NULL, but the filename parameter can be
14277: */
14278: void
14279: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14280: const char* filename)
14281: {
14282: xmlParserInputPtr input;
14283:
14284: if ((ctxt == NULL) || (buffer == NULL))
14285: return;
14286:
14287: input = xmlNewInputStream(ctxt);
14288: if (input == NULL) {
14289: xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14290: xmlClearParserCtxt(ctxt);
14291: return;
14292: }
1.1.1.3 misho 14293:
1.1 misho 14294: xmlClearParserCtxt(ctxt);
14295: if (filename != NULL)
14296: input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14297: input->base = buffer;
14298: input->cur = buffer;
14299: input->end = &buffer[xmlStrlen(buffer)];
14300: inputPush(ctxt, input);
14301: }
14302:
14303: /**
14304: * xmlSAXUserParseFile:
14305: * @sax: a SAX handler
14306: * @user_data: The user data returned on SAX callbacks
14307: * @filename: a file name
14308: *
14309: * parse an XML file and call the given SAX handler routines.
14310: * Automatic support for ZLIB/Compress compressed document is provided
1.1.1.3 misho 14311: *
1.1 misho 14312: * Returns 0 in case of success or a error number otherwise
14313: */
14314: int
14315: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14316: const char *filename) {
14317: int ret = 0;
14318: xmlParserCtxtPtr ctxt;
1.1.1.3 misho 14319:
1.1 misho 14320: ctxt = xmlCreateFileParserCtxt(filename);
14321: if (ctxt == NULL) return -1;
14322: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14323: xmlFree(ctxt->sax);
14324: ctxt->sax = sax;
14325: xmlDetectSAX2(ctxt);
14326:
14327: if (user_data != NULL)
14328: ctxt->userData = user_data;
1.1.1.3 misho 14329:
1.1 misho 14330: xmlParseDocument(ctxt);
1.1.1.3 misho 14331:
1.1 misho 14332: if (ctxt->wellFormed)
14333: ret = 0;
14334: else {
14335: if (ctxt->errNo != 0)
14336: ret = ctxt->errNo;
14337: else
14338: ret = -1;
14339: }
14340: if (sax != NULL)
14341: ctxt->sax = NULL;
14342: if (ctxt->myDoc != NULL) {
14343: xmlFreeDoc(ctxt->myDoc);
14344: ctxt->myDoc = NULL;
14345: }
14346: xmlFreeParserCtxt(ctxt);
1.1.1.3 misho 14347:
1.1 misho 14348: return ret;
14349: }
14350: #endif /* LIBXML_SAX1_ENABLED */
14351:
14352: /************************************************************************
14353: * *
1.1.1.3 misho 14354: * Front ends when parsing from memory *
1.1 misho 14355: * *
14356: ************************************************************************/
14357:
14358: /**
14359: * xmlCreateMemoryParserCtxt:
14360: * @buffer: a pointer to a char array
14361: * @size: the size of the array
14362: *
14363: * Create a parser context for an XML in-memory document.
14364: *
14365: * Returns the new parser context or NULL
14366: */
14367: xmlParserCtxtPtr
14368: xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14369: xmlParserCtxtPtr ctxt;
14370: xmlParserInputPtr input;
14371: xmlParserInputBufferPtr buf;
14372:
14373: if (buffer == NULL)
14374: return(NULL);
14375: if (size <= 0)
14376: return(NULL);
14377:
14378: ctxt = xmlNewParserCtxt();
14379: if (ctxt == NULL)
14380: return(NULL);
14381:
14382: /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14383: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14384: if (buf == NULL) {
14385: xmlFreeParserCtxt(ctxt);
14386: return(NULL);
14387: }
14388:
14389: input = xmlNewInputStream(ctxt);
14390: if (input == NULL) {
14391: xmlFreeParserInputBuffer(buf);
14392: xmlFreeParserCtxt(ctxt);
14393: return(NULL);
14394: }
14395:
14396: input->filename = NULL;
14397: input->buf = buf;
1.1.1.3 misho 14398: xmlBufResetInput(input->buf->buffer, input);
1.1 misho 14399:
14400: inputPush(ctxt, input);
14401: return(ctxt);
14402: }
14403:
14404: #ifdef LIBXML_SAX1_ENABLED
14405: /**
14406: * xmlSAXParseMemoryWithData:
14407: * @sax: the SAX handler block
14408: * @buffer: an pointer to a char array
14409: * @size: the size of the array
14410: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14411: * documents
14412: * @data: the userdata
14413: *
14414: * parse an XML in-memory block and use the given SAX function block
14415: * to handle the parsing callback. If sax is NULL, fallback to the default
14416: * DOM tree building routines.
14417: *
14418: * User data (void *) is stored within the parser context in the
14419: * context's _private member, so it is available nearly everywhere in libxml
14420: *
14421: * Returns the resulting document tree
14422: */
14423:
14424: xmlDocPtr
14425: xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14426: int size, int recovery, void *data) {
14427: xmlDocPtr ret;
14428: xmlParserCtxtPtr ctxt;
14429:
14430: xmlInitParser();
14431:
14432: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14433: if (ctxt == NULL) return(NULL);
14434: if (sax != NULL) {
14435: if (ctxt->sax != NULL)
14436: xmlFree(ctxt->sax);
14437: ctxt->sax = sax;
14438: }
14439: xmlDetectSAX2(ctxt);
14440: if (data!=NULL) {
14441: ctxt->_private=data;
14442: }
14443:
14444: ctxt->recovery = recovery;
14445:
14446: xmlParseDocument(ctxt);
14447:
14448: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14449: else {
14450: ret = NULL;
14451: xmlFreeDoc(ctxt->myDoc);
14452: ctxt->myDoc = NULL;
14453: }
1.1.1.3 misho 14454: if (sax != NULL)
1.1 misho 14455: ctxt->sax = NULL;
14456: xmlFreeParserCtxt(ctxt);
14457:
14458: return(ret);
14459: }
14460:
14461: /**
14462: * xmlSAXParseMemory:
14463: * @sax: the SAX handler block
14464: * @buffer: an pointer to a char array
14465: * @size: the size of the array
14466: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14467: * documents
14468: *
14469: * parse an XML in-memory block and use the given SAX function block
14470: * to handle the parsing callback. If sax is NULL, fallback to the default
14471: * DOM tree building routines.
1.1.1.3 misho 14472: *
1.1 misho 14473: * Returns the resulting document tree
14474: */
14475: xmlDocPtr
14476: xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14477: int size, int recovery) {
14478: return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14479: }
14480:
14481: /**
14482: * xmlParseMemory:
14483: * @buffer: an pointer to a char array
14484: * @size: the size of the array
14485: *
14486: * parse an XML in-memory block and build a tree.
1.1.1.3 misho 14487: *
1.1 misho 14488: * Returns the resulting document tree
14489: */
14490:
14491: xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14492: return(xmlSAXParseMemory(NULL, buffer, size, 0));
14493: }
14494:
14495: /**
14496: * xmlRecoverMemory:
14497: * @buffer: an pointer to a char array
14498: * @size: the size of the array
14499: *
14500: * parse an XML in-memory block and build a tree.
14501: * In the case the document is not Well Formed, an attempt to
14502: * build a tree is tried anyway
14503: *
14504: * Returns the resulting document tree or NULL in case of error
14505: */
14506:
14507: xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14508: return(xmlSAXParseMemory(NULL, buffer, size, 1));
14509: }
14510:
14511: /**
14512: * xmlSAXUserParseMemory:
14513: * @sax: a SAX handler
14514: * @user_data: The user data returned on SAX callbacks
14515: * @buffer: an in-memory XML document input
14516: * @size: the length of the XML document in bytes
14517: *
14518: * A better SAX parsing routine.
14519: * parse an XML in-memory buffer and call the given SAX handler routines.
14520: *
14521: * Returns 0 in case of success or a error number otherwise
14522: */
14523: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14524: const char *buffer, int size) {
14525: int ret = 0;
14526: xmlParserCtxtPtr ctxt;
14527:
14528: xmlInitParser();
14529:
14530: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14531: if (ctxt == NULL) return -1;
14532: if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14533: xmlFree(ctxt->sax);
14534: ctxt->sax = sax;
14535: xmlDetectSAX2(ctxt);
14536:
14537: if (user_data != NULL)
14538: ctxt->userData = user_data;
14539:
14540: xmlParseDocument(ctxt);
1.1.1.3 misho 14541:
1.1 misho 14542: if (ctxt->wellFormed)
14543: ret = 0;
14544: else {
14545: if (ctxt->errNo != 0)
14546: ret = ctxt->errNo;
14547: else
14548: ret = -1;
14549: }
14550: if (sax != NULL)
14551: ctxt->sax = NULL;
14552: if (ctxt->myDoc != NULL) {
14553: xmlFreeDoc(ctxt->myDoc);
14554: ctxt->myDoc = NULL;
14555: }
14556: xmlFreeParserCtxt(ctxt);
1.1.1.3 misho 14557:
1.1 misho 14558: return ret;
14559: }
14560: #endif /* LIBXML_SAX1_ENABLED */
14561:
14562: /**
14563: * xmlCreateDocParserCtxt:
14564: * @cur: a pointer to an array of xmlChar
14565: *
14566: * Creates a parser context for an XML in-memory document.
14567: *
14568: * Returns the new parser context or NULL
14569: */
14570: xmlParserCtxtPtr
14571: xmlCreateDocParserCtxt(const xmlChar *cur) {
14572: int len;
14573:
14574: if (cur == NULL)
14575: return(NULL);
14576: len = xmlStrlen(cur);
14577: return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14578: }
14579:
14580: #ifdef LIBXML_SAX1_ENABLED
14581: /**
14582: * xmlSAXParseDoc:
14583: * @sax: the SAX handler block
14584: * @cur: a pointer to an array of xmlChar
14585: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14586: * documents
14587: *
14588: * parse an XML in-memory document and build a tree.
14589: * It use the given SAX function block to handle the parsing callback.
14590: * If sax is NULL, fallback to the default DOM tree building routines.
1.1.1.3 misho 14591: *
1.1 misho 14592: * Returns the resulting document tree
14593: */
14594:
14595: xmlDocPtr
14596: xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14597: xmlDocPtr ret;
14598: xmlParserCtxtPtr ctxt;
14599: xmlSAXHandlerPtr oldsax = NULL;
14600:
14601: if (cur == NULL) return(NULL);
14602:
14603:
14604: ctxt = xmlCreateDocParserCtxt(cur);
14605: if (ctxt == NULL) return(NULL);
1.1.1.3 misho 14606: if (sax != NULL) {
1.1 misho 14607: oldsax = ctxt->sax;
14608: ctxt->sax = sax;
14609: ctxt->userData = NULL;
14610: }
14611: xmlDetectSAX2(ctxt);
14612:
14613: xmlParseDocument(ctxt);
14614: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14615: else {
14616: ret = NULL;
14617: xmlFreeDoc(ctxt->myDoc);
14618: ctxt->myDoc = NULL;
14619: }
14620: if (sax != NULL)
14621: ctxt->sax = oldsax;
14622: xmlFreeParserCtxt(ctxt);
1.1.1.3 misho 14623:
1.1 misho 14624: return(ret);
14625: }
14626:
14627: /**
14628: * xmlParseDoc:
14629: * @cur: a pointer to an array of xmlChar
14630: *
14631: * parse an XML in-memory document and build a tree.
1.1.1.3 misho 14632: *
1.1 misho 14633: * Returns the resulting document tree
14634: */
14635:
14636: xmlDocPtr
14637: xmlParseDoc(const xmlChar *cur) {
14638: return(xmlSAXParseDoc(NULL, cur, 0));
14639: }
14640: #endif /* LIBXML_SAX1_ENABLED */
14641:
14642: #ifdef LIBXML_LEGACY_ENABLED
14643: /************************************************************************
14644: * *
1.1.1.3 misho 14645: * Specific function to keep track of entities references *
14646: * and used by the XSLT debugger *
1.1 misho 14647: * *
14648: ************************************************************************/
14649:
14650: static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14651:
14652: /**
14653: * xmlAddEntityReference:
14654: * @ent : A valid entity
14655: * @firstNode : A valid first node for children of entity
1.1.1.3 misho 14656: * @lastNode : A valid last node of children entity
1.1 misho 14657: *
14658: * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14659: */
14660: static void
14661: xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14662: xmlNodePtr lastNode)
14663: {
14664: if (xmlEntityRefFunc != NULL) {
14665: (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14666: }
14667: }
14668:
14669:
14670: /**
14671: * xmlSetEntityReferenceFunc:
14672: * @func: A valid function
14673: *
14674: * Set the function to call call back when a xml reference has been made
14675: */
14676: void
14677: xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14678: {
14679: xmlEntityRefFunc = func;
14680: }
14681: #endif /* LIBXML_LEGACY_ENABLED */
14682:
14683: /************************************************************************
14684: * *
1.1.1.3 misho 14685: * Miscellaneous *
1.1 misho 14686: * *
14687: ************************************************************************/
14688:
14689: #ifdef LIBXML_XPATH_ENABLED
14690: #include <libxml/xpath.h>
14691: #endif
14692:
14693: extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14694: static int xmlParserInitialized = 0;
14695:
14696: /**
14697: * xmlInitParser:
14698: *
14699: * Initialization function for the XML parser.
14700: * This is not reentrant. Call once before processing in case of
14701: * use in multithreaded programs.
14702: */
14703:
14704: void
14705: xmlInitParser(void) {
14706: if (xmlParserInitialized != 0)
14707: return;
14708:
14709: #ifdef LIBXML_THREAD_ENABLED
14710: __xmlGlobalInitMutexLock();
14711: if (xmlParserInitialized == 0) {
14712: #endif
14713: xmlInitThreads();
14714: xmlInitGlobals();
14715: if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14716: (xmlGenericError == NULL))
14717: initGenericErrorDefaultFunc(NULL);
14718: xmlInitMemory();
1.1.1.2 misho 14719: xmlInitializeDict();
1.1 misho 14720: xmlInitCharEncodingHandlers();
14721: xmlDefaultSAXHandlerInit();
14722: xmlRegisterDefaultInputCallbacks();
14723: #ifdef LIBXML_OUTPUT_ENABLED
14724: xmlRegisterDefaultOutputCallbacks();
14725: #endif /* LIBXML_OUTPUT_ENABLED */
14726: #ifdef LIBXML_HTML_ENABLED
14727: htmlInitAutoClose();
14728: htmlDefaultSAXHandlerInit();
14729: #endif
14730: #ifdef LIBXML_XPATH_ENABLED
14731: xmlXPathInit();
14732: #endif
14733: xmlParserInitialized = 1;
14734: #ifdef LIBXML_THREAD_ENABLED
14735: }
14736: __xmlGlobalInitMutexUnlock();
14737: #endif
14738: }
14739:
14740: /**
14741: * xmlCleanupParser:
14742: *
14743: * This function name is somewhat misleading. It does not clean up
14744: * parser state, it cleans up memory allocated by the library itself.
14745: * It is a cleanup function for the XML library. It tries to reclaim all
14746: * related global memory allocated for the library processing.
14747: * It doesn't deallocate any document related memory. One should
14748: * call xmlCleanupParser() only when the process has finished using
14749: * the library and all XML/HTML documents built with it.
14750: * See also xmlInitParser() which has the opposite function of preparing
14751: * the library for operations.
14752: *
14753: * WARNING: if your application is multithreaded or has plugin support
14754: * calling this may crash the application if another thread or
14755: * a plugin is still using libxml2. It's sometimes very hard to
14756: * guess if libxml2 is in use in the application, some libraries
14757: * or plugins may use it without notice. In case of doubt abstain
14758: * from calling this function or do it just before calling exit()
14759: * to avoid leak reports from valgrind !
14760: */
14761:
14762: void
14763: xmlCleanupParser(void) {
14764: if (!xmlParserInitialized)
14765: return;
14766:
14767: xmlCleanupCharEncodingHandlers();
14768: #ifdef LIBXML_CATALOG_ENABLED
14769: xmlCatalogCleanup();
14770: #endif
14771: xmlDictCleanup();
14772: xmlCleanupInputCallbacks();
14773: #ifdef LIBXML_OUTPUT_ENABLED
14774: xmlCleanupOutputCallbacks();
14775: #endif
14776: #ifdef LIBXML_SCHEMAS_ENABLED
14777: xmlSchemaCleanupTypes();
14778: xmlRelaxNGCleanupTypes();
14779: #endif
14780: xmlCleanupGlobals();
14781: xmlResetLastError();
14782: xmlCleanupThreads(); /* must be last if called not from the main thread */
14783: xmlCleanupMemory();
14784: xmlParserInitialized = 0;
14785: }
14786:
14787: /************************************************************************
14788: * *
14789: * New set (2.6.0) of simpler and more flexible APIs *
14790: * *
14791: ************************************************************************/
14792:
14793: /**
14794: * DICT_FREE:
14795: * @str: a string
14796: *
14797: * Free a string if it is not owned by the "dict" dictionnary in the
14798: * current scope
14799: */
14800: #define DICT_FREE(str) \
1.1.1.3 misho 14801: if ((str) && ((!dict) || \
1.1 misho 14802: (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14803: xmlFree((char *)(str));
14804:
14805: /**
14806: * xmlCtxtReset:
14807: * @ctxt: an XML parser context
14808: *
14809: * Reset a parser context
14810: */
14811: void
14812: xmlCtxtReset(xmlParserCtxtPtr ctxt)
14813: {
14814: xmlParserInputPtr input;
14815: xmlDictPtr dict;
1.1.1.3 misho 14816:
1.1 misho 14817: if (ctxt == NULL)
14818: return;
14819:
14820: dict = ctxt->dict;
14821:
14822: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14823: xmlFreeInputStream(input);
14824: }
14825: ctxt->inputNr = 0;
14826: ctxt->input = NULL;
14827:
14828: ctxt->spaceNr = 0;
14829: if (ctxt->spaceTab != NULL) {
14830: ctxt->spaceTab[0] = -1;
14831: ctxt->space = &ctxt->spaceTab[0];
14832: } else {
14833: ctxt->space = NULL;
14834: }
14835:
14836:
14837: ctxt->nodeNr = 0;
14838: ctxt->node = NULL;
14839:
14840: ctxt->nameNr = 0;
14841: ctxt->name = NULL;
14842:
14843: DICT_FREE(ctxt->version);
14844: ctxt->version = NULL;
14845: DICT_FREE(ctxt->encoding);
14846: ctxt->encoding = NULL;
14847: DICT_FREE(ctxt->directory);
14848: ctxt->directory = NULL;
14849: DICT_FREE(ctxt->extSubURI);
14850: ctxt->extSubURI = NULL;
14851: DICT_FREE(ctxt->extSubSystem);
14852: ctxt->extSubSystem = NULL;
14853: if (ctxt->myDoc != NULL)
14854: xmlFreeDoc(ctxt->myDoc);
14855: ctxt->myDoc = NULL;
14856:
14857: ctxt->standalone = -1;
14858: ctxt->hasExternalSubset = 0;
14859: ctxt->hasPErefs = 0;
14860: ctxt->html = 0;
14861: ctxt->external = 0;
14862: ctxt->instate = XML_PARSER_START;
14863: ctxt->token = 0;
14864:
14865: ctxt->wellFormed = 1;
14866: ctxt->nsWellFormed = 1;
14867: ctxt->disableSAX = 0;
14868: ctxt->valid = 1;
14869: #if 0
14870: ctxt->vctxt.userData = ctxt;
14871: ctxt->vctxt.error = xmlParserValidityError;
14872: ctxt->vctxt.warning = xmlParserValidityWarning;
14873: #endif
14874: ctxt->record_info = 0;
14875: ctxt->nbChars = 0;
14876: ctxt->checkIndex = 0;
14877: ctxt->inSubset = 0;
14878: ctxt->errNo = XML_ERR_OK;
14879: ctxt->depth = 0;
14880: ctxt->charset = XML_CHAR_ENCODING_UTF8;
14881: ctxt->catalogs = NULL;
14882: ctxt->nbentities = 0;
14883: ctxt->sizeentities = 0;
1.1.1.3 misho 14884: ctxt->sizeentcopy = 0;
1.1 misho 14885: xmlInitNodeInfoSeq(&ctxt->node_seq);
14886:
14887: if (ctxt->attsDefault != NULL) {
14888: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14889: ctxt->attsDefault = NULL;
14890: }
14891: if (ctxt->attsSpecial != NULL) {
14892: xmlHashFree(ctxt->attsSpecial, NULL);
14893: ctxt->attsSpecial = NULL;
14894: }
14895:
14896: #ifdef LIBXML_CATALOG_ENABLED
14897: if (ctxt->catalogs != NULL)
14898: xmlCatalogFreeLocal(ctxt->catalogs);
14899: #endif
14900: if (ctxt->lastError.code != XML_ERR_OK)
14901: xmlResetError(&ctxt->lastError);
14902: }
14903:
14904: /**
14905: * xmlCtxtResetPush:
14906: * @ctxt: an XML parser context
14907: * @chunk: a pointer to an array of chars
14908: * @size: number of chars in the array
14909: * @filename: an optional file name or URI
14910: * @encoding: the document encoding, or NULL
14911: *
14912: * Reset a push parser context
14913: *
14914: * Returns 0 in case of success and 1 in case of error
14915: */
14916: int
14917: xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14918: int size, const char *filename, const char *encoding)
14919: {
14920: xmlParserInputPtr inputStream;
14921: xmlParserInputBufferPtr buf;
14922: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14923:
14924: if (ctxt == NULL)
14925: return(1);
14926:
14927: if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14928: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14929:
14930: buf = xmlAllocParserInputBuffer(enc);
14931: if (buf == NULL)
14932: return(1);
14933:
14934: if (ctxt == NULL) {
14935: xmlFreeParserInputBuffer(buf);
14936: return(1);
14937: }
14938:
14939: xmlCtxtReset(ctxt);
14940:
14941: if (ctxt->pushTab == NULL) {
14942: ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14943: sizeof(xmlChar *));
14944: if (ctxt->pushTab == NULL) {
14945: xmlErrMemory(ctxt, NULL);
14946: xmlFreeParserInputBuffer(buf);
14947: return(1);
14948: }
14949: }
14950:
14951: if (filename == NULL) {
14952: ctxt->directory = NULL;
14953: } else {
14954: ctxt->directory = xmlParserGetDirectory(filename);
14955: }
14956:
14957: inputStream = xmlNewInputStream(ctxt);
14958: if (inputStream == NULL) {
14959: xmlFreeParserInputBuffer(buf);
14960: return(1);
14961: }
14962:
14963: if (filename == NULL)
14964: inputStream->filename = NULL;
14965: else
14966: inputStream->filename = (char *)
14967: xmlCanonicPath((const xmlChar *) filename);
14968: inputStream->buf = buf;
1.1.1.3 misho 14969: xmlBufResetInput(buf->buffer, inputStream);
1.1 misho 14970:
14971: inputPush(ctxt, inputStream);
14972:
14973: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14974: (ctxt->input->buf != NULL)) {
1.1.1.3 misho 14975: size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14976: size_t cur = ctxt->input->cur - ctxt->input->base;
1.1 misho 14977:
14978: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14979:
1.1.1.3 misho 14980: xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
1.1 misho 14981: #ifdef DEBUG_PUSH
14982: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14983: #endif
14984: }
14985:
14986: if (encoding != NULL) {
14987: xmlCharEncodingHandlerPtr hdlr;
14988:
14989: if (ctxt->encoding != NULL)
14990: xmlFree((xmlChar *) ctxt->encoding);
14991: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14992:
14993: hdlr = xmlFindCharEncodingHandler(encoding);
14994: if (hdlr != NULL) {
14995: xmlSwitchToEncoding(ctxt, hdlr);
14996: } else {
14997: xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14998: "Unsupported encoding %s\n", BAD_CAST encoding);
14999: }
15000: } else if (enc != XML_CHAR_ENCODING_NONE) {
15001: xmlSwitchEncoding(ctxt, enc);
15002: }
15003:
15004: return(0);
15005: }
15006:
15007:
15008: /**
15009: * xmlCtxtUseOptionsInternal:
15010: * @ctxt: an XML parser context
15011: * @options: a combination of xmlParserOption
15012: * @encoding: the user provided encoding to use
15013: *
15014: * Applies the options to the parser context
15015: *
15016: * Returns 0 in case of success, the set of unknown or unimplemented options
15017: * in case of error.
15018: */
15019: static int
15020: xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15021: {
15022: if (ctxt == NULL)
15023: return(-1);
15024: if (encoding != NULL) {
15025: if (ctxt->encoding != NULL)
15026: xmlFree((xmlChar *) ctxt->encoding);
15027: ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15028: }
15029: if (options & XML_PARSE_RECOVER) {
15030: ctxt->recovery = 1;
15031: options -= XML_PARSE_RECOVER;
15032: ctxt->options |= XML_PARSE_RECOVER;
15033: } else
15034: ctxt->recovery = 0;
15035: if (options & XML_PARSE_DTDLOAD) {
15036: ctxt->loadsubset = XML_DETECT_IDS;
15037: options -= XML_PARSE_DTDLOAD;
15038: ctxt->options |= XML_PARSE_DTDLOAD;
15039: } else
15040: ctxt->loadsubset = 0;
15041: if (options & XML_PARSE_DTDATTR) {
15042: ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15043: options -= XML_PARSE_DTDATTR;
15044: ctxt->options |= XML_PARSE_DTDATTR;
15045: }
15046: if (options & XML_PARSE_NOENT) {
15047: ctxt->replaceEntities = 1;
15048: /* ctxt->loadsubset |= XML_DETECT_IDS; */
15049: options -= XML_PARSE_NOENT;
15050: ctxt->options |= XML_PARSE_NOENT;
15051: } else
15052: ctxt->replaceEntities = 0;
15053: if (options & XML_PARSE_PEDANTIC) {
15054: ctxt->pedantic = 1;
15055: options -= XML_PARSE_PEDANTIC;
15056: ctxt->options |= XML_PARSE_PEDANTIC;
15057: } else
15058: ctxt->pedantic = 0;
15059: if (options & XML_PARSE_NOBLANKS) {
15060: ctxt->keepBlanks = 0;
15061: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15062: options -= XML_PARSE_NOBLANKS;
15063: ctxt->options |= XML_PARSE_NOBLANKS;
15064: } else
15065: ctxt->keepBlanks = 1;
15066: if (options & XML_PARSE_DTDVALID) {
15067: ctxt->validate = 1;
15068: if (options & XML_PARSE_NOWARNING)
15069: ctxt->vctxt.warning = NULL;
15070: if (options & XML_PARSE_NOERROR)
15071: ctxt->vctxt.error = NULL;
15072: options -= XML_PARSE_DTDVALID;
15073: ctxt->options |= XML_PARSE_DTDVALID;
15074: } else
15075: ctxt->validate = 0;
15076: if (options & XML_PARSE_NOWARNING) {
15077: ctxt->sax->warning = NULL;
15078: options -= XML_PARSE_NOWARNING;
15079: }
15080: if (options & XML_PARSE_NOERROR) {
15081: ctxt->sax->error = NULL;
15082: ctxt->sax->fatalError = NULL;
15083: options -= XML_PARSE_NOERROR;
15084: }
15085: #ifdef LIBXML_SAX1_ENABLED
15086: if (options & XML_PARSE_SAX1) {
15087: ctxt->sax->startElement = xmlSAX2StartElement;
15088: ctxt->sax->endElement = xmlSAX2EndElement;
15089: ctxt->sax->startElementNs = NULL;
15090: ctxt->sax->endElementNs = NULL;
15091: ctxt->sax->initialized = 1;
15092: options -= XML_PARSE_SAX1;
15093: ctxt->options |= XML_PARSE_SAX1;
15094: }
15095: #endif /* LIBXML_SAX1_ENABLED */
15096: if (options & XML_PARSE_NODICT) {
15097: ctxt->dictNames = 0;
15098: options -= XML_PARSE_NODICT;
15099: ctxt->options |= XML_PARSE_NODICT;
15100: } else {
15101: ctxt->dictNames = 1;
15102: }
15103: if (options & XML_PARSE_NOCDATA) {
15104: ctxt->sax->cdataBlock = NULL;
15105: options -= XML_PARSE_NOCDATA;
15106: ctxt->options |= XML_PARSE_NOCDATA;
15107: }
15108: if (options & XML_PARSE_NSCLEAN) {
15109: ctxt->options |= XML_PARSE_NSCLEAN;
15110: options -= XML_PARSE_NSCLEAN;
15111: }
15112: if (options & XML_PARSE_NONET) {
15113: ctxt->options |= XML_PARSE_NONET;
15114: options -= XML_PARSE_NONET;
15115: }
15116: if (options & XML_PARSE_COMPACT) {
15117: ctxt->options |= XML_PARSE_COMPACT;
15118: options -= XML_PARSE_COMPACT;
15119: }
15120: if (options & XML_PARSE_OLD10) {
15121: ctxt->options |= XML_PARSE_OLD10;
15122: options -= XML_PARSE_OLD10;
15123: }
15124: if (options & XML_PARSE_NOBASEFIX) {
15125: ctxt->options |= XML_PARSE_NOBASEFIX;
15126: options -= XML_PARSE_NOBASEFIX;
15127: }
15128: if (options & XML_PARSE_HUGE) {
15129: ctxt->options |= XML_PARSE_HUGE;
15130: options -= XML_PARSE_HUGE;
1.1.1.3 misho 15131: if (ctxt->dict != NULL)
15132: xmlDictSetLimit(ctxt->dict, 0);
1.1 misho 15133: }
15134: if (options & XML_PARSE_OLDSAX) {
15135: ctxt->options |= XML_PARSE_OLDSAX;
15136: options -= XML_PARSE_OLDSAX;
15137: }
1.1.1.2 misho 15138: if (options & XML_PARSE_IGNORE_ENC) {
15139: ctxt->options |= XML_PARSE_IGNORE_ENC;
15140: options -= XML_PARSE_IGNORE_ENC;
15141: }
1.1.1.3 misho 15142: if (options & XML_PARSE_BIG_LINES) {
15143: ctxt->options |= XML_PARSE_BIG_LINES;
15144: options -= XML_PARSE_BIG_LINES;
15145: }
1.1 misho 15146: ctxt->linenumbers = 1;
15147: return (options);
15148: }
15149:
15150: /**
15151: * xmlCtxtUseOptions:
15152: * @ctxt: an XML parser context
15153: * @options: a combination of xmlParserOption
15154: *
15155: * Applies the options to the parser context
15156: *
15157: * Returns 0 in case of success, the set of unknown or unimplemented options
15158: * in case of error.
15159: */
15160: int
15161: xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15162: {
15163: return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15164: }
15165:
15166: /**
15167: * xmlDoRead:
15168: * @ctxt: an XML parser context
15169: * @URL: the base URL to use for the document
15170: * @encoding: the document encoding, or NULL
15171: * @options: a combination of xmlParserOption
15172: * @reuse: keep the context for reuse
15173: *
15174: * Common front-end for the xmlRead functions
15175: *
15176: * Returns the resulting document tree or NULL
15177: */
15178: static xmlDocPtr
15179: xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15180: int options, int reuse)
15181: {
15182: xmlDocPtr ret;
15183:
15184: xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15185: if (encoding != NULL) {
15186: xmlCharEncodingHandlerPtr hdlr;
15187:
15188: hdlr = xmlFindCharEncodingHandler(encoding);
15189: if (hdlr != NULL)
15190: xmlSwitchToEncoding(ctxt, hdlr);
15191: }
15192: if ((URL != NULL) && (ctxt->input != NULL) &&
15193: (ctxt->input->filename == NULL))
15194: ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15195: xmlParseDocument(ctxt);
15196: if ((ctxt->wellFormed) || ctxt->recovery)
15197: ret = ctxt->myDoc;
15198: else {
15199: ret = NULL;
15200: if (ctxt->myDoc != NULL) {
15201: xmlFreeDoc(ctxt->myDoc);
15202: }
15203: }
15204: ctxt->myDoc = NULL;
15205: if (!reuse) {
15206: xmlFreeParserCtxt(ctxt);
15207: }
15208:
15209: return (ret);
15210: }
15211:
15212: /**
15213: * xmlReadDoc:
15214: * @cur: a pointer to a zero terminated string
15215: * @URL: the base URL to use for the document
15216: * @encoding: the document encoding, or NULL
15217: * @options: a combination of xmlParserOption
15218: *
15219: * parse an XML in-memory document and build a tree.
1.1.1.3 misho 15220: *
1.1 misho 15221: * Returns the resulting document tree
15222: */
15223: xmlDocPtr
15224: xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15225: {
15226: xmlParserCtxtPtr ctxt;
15227:
15228: if (cur == NULL)
15229: return (NULL);
15230:
15231: ctxt = xmlCreateDocParserCtxt(cur);
15232: if (ctxt == NULL)
15233: return (NULL);
15234: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15235: }
15236:
15237: /**
15238: * xmlReadFile:
15239: * @filename: a file or URL
15240: * @encoding: the document encoding, or NULL
15241: * @options: a combination of xmlParserOption
15242: *
15243: * parse an XML file from the filesystem or the network.
1.1.1.3 misho 15244: *
1.1 misho 15245: * Returns the resulting document tree
15246: */
15247: xmlDocPtr
15248: xmlReadFile(const char *filename, const char *encoding, int options)
15249: {
15250: xmlParserCtxtPtr ctxt;
15251:
15252: ctxt = xmlCreateURLParserCtxt(filename, options);
15253: if (ctxt == NULL)
15254: return (NULL);
15255: return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15256: }
15257:
15258: /**
15259: * xmlReadMemory:
15260: * @buffer: a pointer to a char array
15261: * @size: the size of the array
15262: * @URL: the base URL to use for the document
15263: * @encoding: the document encoding, or NULL
15264: * @options: a combination of xmlParserOption
15265: *
15266: * parse an XML in-memory document and build a tree.
1.1.1.3 misho 15267: *
1.1 misho 15268: * Returns the resulting document tree
15269: */
15270: xmlDocPtr
15271: xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15272: {
15273: xmlParserCtxtPtr ctxt;
15274:
15275: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15276: if (ctxt == NULL)
15277: return (NULL);
15278: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15279: }
15280:
15281: /**
15282: * xmlReadFd:
15283: * @fd: an open file descriptor
15284: * @URL: the base URL to use for the document
15285: * @encoding: the document encoding, or NULL
15286: * @options: a combination of xmlParserOption
15287: *
15288: * parse an XML from a file descriptor and build a tree.
15289: * NOTE that the file descriptor will not be closed when the
15290: * reader is closed or reset.
1.1.1.3 misho 15291: *
1.1 misho 15292: * Returns the resulting document tree
15293: */
15294: xmlDocPtr
15295: xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15296: {
15297: xmlParserCtxtPtr ctxt;
15298: xmlParserInputBufferPtr input;
15299: xmlParserInputPtr stream;
15300:
15301: if (fd < 0)
15302: return (NULL);
15303:
15304: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15305: if (input == NULL)
15306: return (NULL);
15307: input->closecallback = NULL;
15308: ctxt = xmlNewParserCtxt();
15309: if (ctxt == NULL) {
15310: xmlFreeParserInputBuffer(input);
15311: return (NULL);
15312: }
15313: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15314: if (stream == NULL) {
15315: xmlFreeParserInputBuffer(input);
15316: xmlFreeParserCtxt(ctxt);
15317: return (NULL);
15318: }
15319: inputPush(ctxt, stream);
15320: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15321: }
15322:
15323: /**
15324: * xmlReadIO:
15325: * @ioread: an I/O read function
15326: * @ioclose: an I/O close function
15327: * @ioctx: an I/O handler
15328: * @URL: the base URL to use for the document
15329: * @encoding: the document encoding, or NULL
15330: * @options: a combination of xmlParserOption
15331: *
15332: * parse an XML document from I/O functions and source and build a tree.
1.1.1.2 misho 15333: *
1.1 misho 15334: * Returns the resulting document tree
15335: */
15336: xmlDocPtr
15337: xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15338: void *ioctx, const char *URL, const char *encoding, int options)
15339: {
15340: xmlParserCtxtPtr ctxt;
15341: xmlParserInputBufferPtr input;
15342: xmlParserInputPtr stream;
15343:
15344: if (ioread == NULL)
15345: return (NULL);
15346:
15347: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15348: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 15349: if (input == NULL) {
15350: if (ioclose != NULL)
15351: ioclose(ioctx);
1.1 misho 15352: return (NULL);
1.1.1.2 misho 15353: }
1.1 misho 15354: ctxt = xmlNewParserCtxt();
15355: if (ctxt == NULL) {
15356: xmlFreeParserInputBuffer(input);
15357: return (NULL);
15358: }
15359: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15360: if (stream == NULL) {
15361: xmlFreeParserInputBuffer(input);
15362: xmlFreeParserCtxt(ctxt);
15363: return (NULL);
15364: }
15365: inputPush(ctxt, stream);
15366: return (xmlDoRead(ctxt, URL, encoding, options, 0));
15367: }
15368:
15369: /**
15370: * xmlCtxtReadDoc:
15371: * @ctxt: an XML parser context
15372: * @cur: a pointer to a zero terminated string
15373: * @URL: the base URL to use for the document
15374: * @encoding: the document encoding, or NULL
15375: * @options: a combination of xmlParserOption
15376: *
15377: * parse an XML in-memory document and build a tree.
15378: * This reuses the existing @ctxt parser context
1.1.1.2 misho 15379: *
1.1 misho 15380: * Returns the resulting document tree
15381: */
15382: xmlDocPtr
15383: xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15384: const char *URL, const char *encoding, int options)
15385: {
15386: xmlParserInputPtr stream;
15387:
15388: if (cur == NULL)
15389: return (NULL);
15390: if (ctxt == NULL)
15391: return (NULL);
15392:
15393: xmlCtxtReset(ctxt);
15394:
15395: stream = xmlNewStringInputStream(ctxt, cur);
15396: if (stream == NULL) {
15397: return (NULL);
15398: }
15399: inputPush(ctxt, stream);
15400: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15401: }
15402:
15403: /**
15404: * xmlCtxtReadFile:
15405: * @ctxt: an XML parser context
15406: * @filename: a file or URL
15407: * @encoding: the document encoding, or NULL
15408: * @options: a combination of xmlParserOption
15409: *
15410: * parse an XML file from the filesystem or the network.
15411: * This reuses the existing @ctxt parser context
1.1.1.3 misho 15412: *
1.1 misho 15413: * Returns the resulting document tree
15414: */
15415: xmlDocPtr
15416: xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15417: const char *encoding, int options)
15418: {
15419: xmlParserInputPtr stream;
15420:
15421: if (filename == NULL)
15422: return (NULL);
15423: if (ctxt == NULL)
15424: return (NULL);
15425:
15426: xmlCtxtReset(ctxt);
15427:
15428: stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15429: if (stream == NULL) {
15430: return (NULL);
15431: }
15432: inputPush(ctxt, stream);
15433: return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15434: }
15435:
15436: /**
15437: * xmlCtxtReadMemory:
15438: * @ctxt: an XML parser context
15439: * @buffer: a pointer to a char array
15440: * @size: the size of the array
15441: * @URL: the base URL to use for the document
15442: * @encoding: the document encoding, or NULL
15443: * @options: a combination of xmlParserOption
15444: *
15445: * parse an XML in-memory document and build a tree.
15446: * This reuses the existing @ctxt parser context
1.1.1.3 misho 15447: *
1.1 misho 15448: * Returns the resulting document tree
15449: */
15450: xmlDocPtr
15451: xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15452: const char *URL, const char *encoding, int options)
15453: {
15454: xmlParserInputBufferPtr input;
15455: xmlParserInputPtr stream;
15456:
15457: if (ctxt == NULL)
15458: return (NULL);
15459: if (buffer == NULL)
15460: return (NULL);
15461:
15462: xmlCtxtReset(ctxt);
15463:
15464: input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15465: if (input == NULL) {
15466: return(NULL);
15467: }
15468:
15469: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15470: if (stream == NULL) {
15471: xmlFreeParserInputBuffer(input);
15472: return(NULL);
15473: }
15474:
15475: inputPush(ctxt, stream);
15476: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15477: }
15478:
15479: /**
15480: * xmlCtxtReadFd:
15481: * @ctxt: an XML parser context
15482: * @fd: an open file descriptor
15483: * @URL: the base URL to use for the document
15484: * @encoding: the document encoding, or NULL
15485: * @options: a combination of xmlParserOption
15486: *
15487: * parse an XML from a file descriptor and build a tree.
15488: * This reuses the existing @ctxt parser context
15489: * NOTE that the file descriptor will not be closed when the
15490: * reader is closed or reset.
1.1.1.3 misho 15491: *
1.1 misho 15492: * Returns the resulting document tree
15493: */
15494: xmlDocPtr
15495: xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15496: const char *URL, const char *encoding, int options)
15497: {
15498: xmlParserInputBufferPtr input;
15499: xmlParserInputPtr stream;
15500:
15501: if (fd < 0)
15502: return (NULL);
15503: if (ctxt == NULL)
15504: return (NULL);
15505:
15506: xmlCtxtReset(ctxt);
15507:
15508:
15509: input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15510: if (input == NULL)
15511: return (NULL);
15512: input->closecallback = NULL;
15513: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15514: if (stream == NULL) {
15515: xmlFreeParserInputBuffer(input);
15516: return (NULL);
15517: }
15518: inputPush(ctxt, stream);
15519: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15520: }
15521:
15522: /**
15523: * xmlCtxtReadIO:
15524: * @ctxt: an XML parser context
15525: * @ioread: an I/O read function
15526: * @ioclose: an I/O close function
15527: * @ioctx: an I/O handler
15528: * @URL: the base URL to use for the document
15529: * @encoding: the document encoding, or NULL
15530: * @options: a combination of xmlParserOption
15531: *
15532: * parse an XML document from I/O functions and source and build a tree.
15533: * This reuses the existing @ctxt parser context
1.1.1.2 misho 15534: *
1.1 misho 15535: * Returns the resulting document tree
15536: */
15537: xmlDocPtr
15538: xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15539: xmlInputCloseCallback ioclose, void *ioctx,
15540: const char *URL,
15541: const char *encoding, int options)
15542: {
15543: xmlParserInputBufferPtr input;
15544: xmlParserInputPtr stream;
15545:
15546: if (ioread == NULL)
15547: return (NULL);
15548: if (ctxt == NULL)
15549: return (NULL);
15550:
15551: xmlCtxtReset(ctxt);
15552:
15553: input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15554: XML_CHAR_ENCODING_NONE);
1.1.1.2 misho 15555: if (input == NULL) {
15556: if (ioclose != NULL)
15557: ioclose(ioctx);
1.1 misho 15558: return (NULL);
1.1.1.2 misho 15559: }
1.1 misho 15560: stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15561: if (stream == NULL) {
15562: xmlFreeParserInputBuffer(input);
15563: return (NULL);
15564: }
15565: inputPush(ctxt, stream);
15566: return (xmlDoRead(ctxt, URL, encoding, options, 1));
15567: }
15568:
15569: #define bottom_parser
15570: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>