Annotation of embedaddon/libxml2/include/libxml/parser.h, revision 1.1.1.1
1.1 misho 1: /*
2: * Summary: the core parser module
3: * Description: Interfaces, constants and types related to the XML parser
4: *
5: * Copy: See Copyright for the status of this software.
6: *
7: * Author: Daniel Veillard
8: */
9:
10: #ifndef __XML_PARSER_H__
11: #define __XML_PARSER_H__
12:
13: #include <stdarg.h>
14:
15: #include <libxml/xmlversion.h>
16: #include <libxml/tree.h>
17: #include <libxml/dict.h>
18: #include <libxml/hash.h>
19: #include <libxml/valid.h>
20: #include <libxml/entities.h>
21: #include <libxml/xmlerror.h>
22: #include <libxml/xmlstring.h>
23:
24: #ifdef __cplusplus
25: extern "C" {
26: #endif
27:
28: /**
29: * XML_DEFAULT_VERSION:
30: *
31: * The default version of XML used: 1.0
32: */
33: #define XML_DEFAULT_VERSION "1.0"
34:
35: /**
36: * xmlParserInput:
37: *
38: * An xmlParserInput is an input flow for the XML processor.
39: * Each entity parsed is associated an xmlParserInput (except the
40: * few predefined ones). This is the case both for internal entities
41: * - in which case the flow is already completely in memory - or
42: * external entities - in which case we use the buf structure for
43: * progressive reading and I18N conversions to the internal UTF-8 format.
44: */
45:
46: /**
47: * xmlParserInputDeallocate:
48: * @str: the string to deallocate
49: *
50: * Callback for freeing some parser input allocations.
51: */
52: typedef void (* xmlParserInputDeallocate)(xmlChar *str);
53:
54: struct _xmlParserInput {
55: /* Input buffer */
56: xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
57:
58: const char *filename; /* The file analyzed, if any */
59: const char *directory; /* the directory/base of the file */
60: const xmlChar *base; /* Base of the array to parse */
61: const xmlChar *cur; /* Current char being parsed */
62: const xmlChar *end; /* end of the array to parse */
63: int length; /* length if known */
64: int line; /* Current line */
65: int col; /* Current column */
66: /*
67: * NOTE: consumed is only tested for equality in the parser code,
68: * so even if there is an overflow this should not give troubles
69: * for parsing very large instances.
70: */
71: unsigned long consumed; /* How many xmlChars already consumed */
72: xmlParserInputDeallocate free; /* function to deallocate the base */
73: const xmlChar *encoding; /* the encoding string for entity */
74: const xmlChar *version; /* the version string for entity */
75: int standalone; /* Was that entity marked standalone */
76: int id; /* an unique identifier for the entity */
77: };
78:
79: /**
80: * xmlParserNodeInfo:
81: *
82: * The parser can be asked to collect Node informations, i.e. at what
83: * place in the file they were detected.
84: * NOTE: This is off by default and not very well tested.
85: */
86: typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87: typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
88:
89: struct _xmlParserNodeInfo {
90: const struct _xmlNode* node;
91: /* Position & line # that text that created the node begins & ends on */
92: unsigned long begin_pos;
93: unsigned long begin_line;
94: unsigned long end_pos;
95: unsigned long end_line;
96: };
97:
98: typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99: typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100: struct _xmlParserNodeInfoSeq {
101: unsigned long maximum;
102: unsigned long length;
103: xmlParserNodeInfo* buffer;
104: };
105:
106: /**
107: * xmlParserInputState:
108: *
109: * The parser is now working also as a state based parser.
110: * The recursive one use the state info for entities processing.
111: */
112: typedef enum {
113: XML_PARSER_EOF = -1, /* nothing is to be parsed */
114: XML_PARSER_START = 0, /* nothing has been parsed */
115: XML_PARSER_MISC, /* Misc* before int subset */
116: XML_PARSER_PI, /* Within a processing instruction */
117: XML_PARSER_DTD, /* within some DTD content */
118: XML_PARSER_PROLOG, /* Misc* after internal subset */
119: XML_PARSER_COMMENT, /* within a comment */
120: XML_PARSER_START_TAG, /* within a start tag */
121: XML_PARSER_CONTENT, /* within the content */
122: XML_PARSER_CDATA_SECTION, /* within a CDATA section */
123: XML_PARSER_END_TAG, /* within a closing tag */
124: XML_PARSER_ENTITY_DECL, /* within an entity declaration */
125: XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
126: XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
127: XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
128: XML_PARSER_EPILOG, /* the Misc* after the last end tag */
129: XML_PARSER_IGNORE, /* within an IGNORED section */
130: XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
131: } xmlParserInputState;
132:
133: /**
134: * XML_DETECT_IDS:
135: *
136: * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137: * Use it to initialize xmlLoadExtDtdDefaultValue.
138: */
139: #define XML_DETECT_IDS 2
140:
141: /**
142: * XML_COMPLETE_ATTRS:
143: *
144: * Bit in the loadsubset context field to tell to do complete the
145: * elements attributes lists with the ones defaulted from the DTDs.
146: * Use it to initialize xmlLoadExtDtdDefaultValue.
147: */
148: #define XML_COMPLETE_ATTRS 4
149:
150: /**
151: * XML_SKIP_IDS:
152: *
153: * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154: * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
155: */
156: #define XML_SKIP_IDS 8
157:
158: /**
159: * xmlParserMode:
160: *
161: * A parser can operate in various modes
162: */
163: typedef enum {
164: XML_PARSE_UNKNOWN = 0,
165: XML_PARSE_DOM = 1,
166: XML_PARSE_SAX = 2,
167: XML_PARSE_PUSH_DOM = 3,
168: XML_PARSE_PUSH_SAX = 4,
169: XML_PARSE_READER = 5
170: } xmlParserMode;
171:
172: /**
173: * xmlParserCtxt:
174: *
175: * The parser context.
176: * NOTE This doesn't completely define the parser state, the (current ?)
177: * design of the parser uses recursive function calls since this allow
178: * and easy mapping from the production rules of the specification
179: * to the actual code. The drawback is that the actual function call
180: * also reflect the parser state. However most of the parsing routines
181: * takes as the only argument the parser context pointer, so migrating
182: * to a state based parser for progressive parsing shouldn't be too hard.
183: */
184: struct _xmlParserCtxt {
185: struct _xmlSAXHandler *sax; /* The SAX handler */
186: void *userData; /* For SAX interface only, used by DOM build */
187: xmlDocPtr myDoc; /* the document being built */
188: int wellFormed; /* is the document well formed */
189: int replaceEntities; /* shall we replace entities ? */
190: const xmlChar *version; /* the XML version string */
191: const xmlChar *encoding; /* the declared encoding, if any */
192: int standalone; /* standalone document */
193: int html; /* an HTML(1)/Docbook(2) document
194: * 3 is HTML after <head>
195: * 10 is HTML after <body>
196: */
197:
198: /* Input stream stack */
199: xmlParserInputPtr input; /* Current input stream */
200: int inputNr; /* Number of current input streams */
201: int inputMax; /* Max number of input streams */
202: xmlParserInputPtr *inputTab; /* stack of inputs */
203:
204: /* Node analysis stack only used for DOM building */
205: xmlNodePtr node; /* Current parsed Node */
206: int nodeNr; /* Depth of the parsing stack */
207: int nodeMax; /* Max depth of the parsing stack */
208: xmlNodePtr *nodeTab; /* array of nodes */
209:
210: int record_info; /* Whether node info should be kept */
211: xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
212:
213: int errNo; /* error code */
214:
215: int hasExternalSubset; /* reference and external subset */
216: int hasPErefs; /* the internal subset has PE refs */
217: int external; /* are we parsing an external entity */
218:
219: int valid; /* is the document valid */
220: int validate; /* shall we try to validate ? */
221: xmlValidCtxt vctxt; /* The validity context */
222:
223: xmlParserInputState instate; /* current type of input */
224: int token; /* next char look-ahead */
225:
226: char *directory; /* the data directory */
227:
228: /* Node name stack */
229: const xmlChar *name; /* Current parsed Node */
230: int nameNr; /* Depth of the parsing stack */
231: int nameMax; /* Max depth of the parsing stack */
232: const xmlChar * *nameTab; /* array of nodes */
233:
234: long nbChars; /* number of xmlChar processed */
235: long checkIndex; /* used by progressive parsing lookup */
236: int keepBlanks; /* ugly but ... */
237: int disableSAX; /* SAX callbacks are disabled */
238: int inSubset; /* Parsing is in int 1/ext 2 subset */
239: const xmlChar * intSubName; /* name of subset */
240: xmlChar * extSubURI; /* URI of external subset */
241: xmlChar * extSubSystem; /* SYSTEM ID of external subset */
242:
243: /* xml:space values */
244: int * space; /* Should the parser preserve spaces */
245: int spaceNr; /* Depth of the parsing stack */
246: int spaceMax; /* Max depth of the parsing stack */
247: int * spaceTab; /* array of space infos */
248:
249: int depth; /* to prevent entity substitution loops */
250: xmlParserInputPtr entity; /* used to check entities boundaries */
251: int charset; /* encoding of the in-memory content
252: actually an xmlCharEncoding */
253: int nodelen; /* Those two fields are there to */
254: int nodemem; /* Speed up large node parsing */
255: int pedantic; /* signal pedantic warnings */
256: void *_private; /* For user data, libxml won't touch it */
257:
258: int loadsubset; /* should the external subset be loaded */
259: int linenumbers; /* set line number in element content */
260: void *catalogs; /* document's own catalog */
261: int recovery; /* run in recovery mode */
262: int progressive; /* is this a progressive parsing */
263: xmlDictPtr dict; /* dictionnary for the parser */
264: const xmlChar * *atts; /* array for the attributes callbacks */
265: int maxatts; /* the size of the array */
266: int docdict; /* use strings from dict to build tree */
267:
268: /*
269: * pre-interned strings
270: */
271: const xmlChar *str_xml;
272: const xmlChar *str_xmlns;
273: const xmlChar *str_xml_ns;
274:
275: /*
276: * Everything below is used only by the new SAX mode
277: */
278: int sax2; /* operating in the new SAX mode */
279: int nsNr; /* the number of inherited namespaces */
280: int nsMax; /* the size of the arrays */
281: const xmlChar * *nsTab; /* the array of prefix/namespace name */
282: int *attallocs; /* which attribute were allocated */
283: void * *pushTab; /* array of data for push */
284: xmlHashTablePtr attsDefault; /* defaulted attributes if any */
285: xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
286: int nsWellFormed; /* is the document XML Nanespace okay */
287: int options; /* Extra options */
288:
289: /*
290: * Those fields are needed only for treaming parsing so far
291: */
292: int dictNames; /* Use dictionary names for the tree */
293: int freeElemsNr; /* number of freed element nodes */
294: xmlNodePtr freeElems; /* List of freed element nodes */
295: int freeAttrsNr; /* number of freed attributes nodes */
296: xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
297:
298: /*
299: * the complete error informations for the last error.
300: */
301: xmlError lastError;
302: xmlParserMode parseMode; /* the parser mode */
303: unsigned long nbentities; /* number of entities references */
304: unsigned long sizeentities; /* size of parsed entities */
305:
306: /* for use by HTML non-recursive parser */
307: xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */
308: int nodeInfoNr; /* Depth of the parsing stack */
309: int nodeInfoMax; /* Max depth of the parsing stack */
310: xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */
311: };
312:
313: /**
314: * xmlSAXLocator:
315: *
316: * A SAX Locator.
317: */
318: struct _xmlSAXLocator {
319: const xmlChar *(*getPublicId)(void *ctx);
320: const xmlChar *(*getSystemId)(void *ctx);
321: int (*getLineNumber)(void *ctx);
322: int (*getColumnNumber)(void *ctx);
323: };
324:
325: /**
326: * xmlSAXHandler:
327: *
328: * A SAX handler is bunch of callbacks called by the parser when processing
329: * of the input generate data or structure informations.
330: */
331:
332: /**
333: * resolveEntitySAXFunc:
334: * @ctx: the user data (XML parser context)
335: * @publicId: The public ID of the entity
336: * @systemId: The system ID of the entity
337: *
338: * Callback:
339: * The entity loader, to control the loading of external entities,
340: * the application can either:
341: * - override this resolveEntity() callback in the SAX block
342: * - or better use the xmlSetExternalEntityLoader() function to
343: * set up it's own entity resolution routine
344: *
345: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
346: */
347: typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
348: const xmlChar *publicId,
349: const xmlChar *systemId);
350: /**
351: * internalSubsetSAXFunc:
352: * @ctx: the user data (XML parser context)
353: * @name: the root element name
354: * @ExternalID: the external ID
355: * @SystemID: the SYSTEM ID (e.g. filename or URL)
356: *
357: * Callback on internal subset declaration.
358: */
359: typedef void (*internalSubsetSAXFunc) (void *ctx,
360: const xmlChar *name,
361: const xmlChar *ExternalID,
362: const xmlChar *SystemID);
363: /**
364: * externalSubsetSAXFunc:
365: * @ctx: the user data (XML parser context)
366: * @name: the root element name
367: * @ExternalID: the external ID
368: * @SystemID: the SYSTEM ID (e.g. filename or URL)
369: *
370: * Callback on external subset declaration.
371: */
372: typedef void (*externalSubsetSAXFunc) (void *ctx,
373: const xmlChar *name,
374: const xmlChar *ExternalID,
375: const xmlChar *SystemID);
376: /**
377: * getEntitySAXFunc:
378: * @ctx: the user data (XML parser context)
379: * @name: The entity name
380: *
381: * Get an entity by name.
382: *
383: * Returns the xmlEntityPtr if found.
384: */
385: typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
386: const xmlChar *name);
387: /**
388: * getParameterEntitySAXFunc:
389: * @ctx: the user data (XML parser context)
390: * @name: The entity name
391: *
392: * Get a parameter entity by name.
393: *
394: * Returns the xmlEntityPtr if found.
395: */
396: typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
397: const xmlChar *name);
398: /**
399: * entityDeclSAXFunc:
400: * @ctx: the user data (XML parser context)
401: * @name: the entity name
402: * @type: the entity type
403: * @publicId: The public ID of the entity
404: * @systemId: The system ID of the entity
405: * @content: the entity value (without processing).
406: *
407: * An entity definition has been parsed.
408: */
409: typedef void (*entityDeclSAXFunc) (void *ctx,
410: const xmlChar *name,
411: int type,
412: const xmlChar *publicId,
413: const xmlChar *systemId,
414: xmlChar *content);
415: /**
416: * notationDeclSAXFunc:
417: * @ctx: the user data (XML parser context)
418: * @name: The name of the notation
419: * @publicId: The public ID of the entity
420: * @systemId: The system ID of the entity
421: *
422: * What to do when a notation declaration has been parsed.
423: */
424: typedef void (*notationDeclSAXFunc)(void *ctx,
425: const xmlChar *name,
426: const xmlChar *publicId,
427: const xmlChar *systemId);
428: /**
429: * attributeDeclSAXFunc:
430: * @ctx: the user data (XML parser context)
431: * @elem: the name of the element
432: * @fullname: the attribute name
433: * @type: the attribute type
434: * @def: the type of default value
435: * @defaultValue: the attribute default value
436: * @tree: the tree of enumerated value set
437: *
438: * An attribute definition has been parsed.
439: */
440: typedef void (*attributeDeclSAXFunc)(void *ctx,
441: const xmlChar *elem,
442: const xmlChar *fullname,
443: int type,
444: int def,
445: const xmlChar *defaultValue,
446: xmlEnumerationPtr tree);
447: /**
448: * elementDeclSAXFunc:
449: * @ctx: the user data (XML parser context)
450: * @name: the element name
451: * @type: the element type
452: * @content: the element value tree
453: *
454: * An element definition has been parsed.
455: */
456: typedef void (*elementDeclSAXFunc)(void *ctx,
457: const xmlChar *name,
458: int type,
459: xmlElementContentPtr content);
460: /**
461: * unparsedEntityDeclSAXFunc:
462: * @ctx: the user data (XML parser context)
463: * @name: The name of the entity
464: * @publicId: The public ID of the entity
465: * @systemId: The system ID of the entity
466: * @notationName: the name of the notation
467: *
468: * What to do when an unparsed entity declaration is parsed.
469: */
470: typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
471: const xmlChar *name,
472: const xmlChar *publicId,
473: const xmlChar *systemId,
474: const xmlChar *notationName);
475: /**
476: * setDocumentLocatorSAXFunc:
477: * @ctx: the user data (XML parser context)
478: * @loc: A SAX Locator
479: *
480: * Receive the document locator at startup, actually xmlDefaultSAXLocator.
481: * Everything is available on the context, so this is useless in our case.
482: */
483: typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
484: xmlSAXLocatorPtr loc);
485: /**
486: * startDocumentSAXFunc:
487: * @ctx: the user data (XML parser context)
488: *
489: * Called when the document start being processed.
490: */
491: typedef void (*startDocumentSAXFunc) (void *ctx);
492: /**
493: * endDocumentSAXFunc:
494: * @ctx: the user data (XML parser context)
495: *
496: * Called when the document end has been detected.
497: */
498: typedef void (*endDocumentSAXFunc) (void *ctx);
499: /**
500: * startElementSAXFunc:
501: * @ctx: the user data (XML parser context)
502: * @name: The element name, including namespace prefix
503: * @atts: An array of name/value attributes pairs, NULL terminated
504: *
505: * Called when an opening tag has been processed.
506: */
507: typedef void (*startElementSAXFunc) (void *ctx,
508: const xmlChar *name,
509: const xmlChar **atts);
510: /**
511: * endElementSAXFunc:
512: * @ctx: the user data (XML parser context)
513: * @name: The element name
514: *
515: * Called when the end of an element has been detected.
516: */
517: typedef void (*endElementSAXFunc) (void *ctx,
518: const xmlChar *name);
519: /**
520: * attributeSAXFunc:
521: * @ctx: the user data (XML parser context)
522: * @name: The attribute name, including namespace prefix
523: * @value: The attribute value
524: *
525: * Handle an attribute that has been read by the parser.
526: * The default handling is to convert the attribute into an
527: * DOM subtree and past it in a new xmlAttr element added to
528: * the element.
529: */
530: typedef void (*attributeSAXFunc) (void *ctx,
531: const xmlChar *name,
532: const xmlChar *value);
533: /**
534: * referenceSAXFunc:
535: * @ctx: the user data (XML parser context)
536: * @name: The entity name
537: *
538: * Called when an entity reference is detected.
539: */
540: typedef void (*referenceSAXFunc) (void *ctx,
541: const xmlChar *name);
542: /**
543: * charactersSAXFunc:
544: * @ctx: the user data (XML parser context)
545: * @ch: a xmlChar string
546: * @len: the number of xmlChar
547: *
548: * Receiving some chars from the parser.
549: */
550: typedef void (*charactersSAXFunc) (void *ctx,
551: const xmlChar *ch,
552: int len);
553: /**
554: * ignorableWhitespaceSAXFunc:
555: * @ctx: the user data (XML parser context)
556: * @ch: a xmlChar string
557: * @len: the number of xmlChar
558: *
559: * Receiving some ignorable whitespaces from the parser.
560: * UNUSED: by default the DOM building will use characters.
561: */
562: typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
563: const xmlChar *ch,
564: int len);
565: /**
566: * processingInstructionSAXFunc:
567: * @ctx: the user data (XML parser context)
568: * @target: the target name
569: * @data: the PI data's
570: *
571: * A processing instruction has been parsed.
572: */
573: typedef void (*processingInstructionSAXFunc) (void *ctx,
574: const xmlChar *target,
575: const xmlChar *data);
576: /**
577: * commentSAXFunc:
578: * @ctx: the user data (XML parser context)
579: * @value: the comment content
580: *
581: * A comment has been parsed.
582: */
583: typedef void (*commentSAXFunc) (void *ctx,
584: const xmlChar *value);
585: /**
586: * cdataBlockSAXFunc:
587: * @ctx: the user data (XML parser context)
588: * @value: The pcdata content
589: * @len: the block length
590: *
591: * Called when a pcdata block has been parsed.
592: */
593: typedef void (*cdataBlockSAXFunc) (
594: void *ctx,
595: const xmlChar *value,
596: int len);
597: /**
598: * warningSAXFunc:
599: * @ctx: an XML parser context
600: * @msg: the message to display/transmit
601: * @...: extra parameters for the message display
602: *
603: * Display and format a warning messages, callback.
604: */
605: typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
606: const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
607: /**
608: * errorSAXFunc:
609: * @ctx: an XML parser context
610: * @msg: the message to display/transmit
611: * @...: extra parameters for the message display
612: *
613: * Display and format an error messages, callback.
614: */
615: typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
616: const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
617: /**
618: * fatalErrorSAXFunc:
619: * @ctx: an XML parser context
620: * @msg: the message to display/transmit
621: * @...: extra parameters for the message display
622: *
623: * Display and format fatal error messages, callback.
624: * Note: so far fatalError() SAX callbacks are not used, error()
625: * get all the callbacks for errors.
626: */
627: typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
628: const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
629: /**
630: * isStandaloneSAXFunc:
631: * @ctx: the user data (XML parser context)
632: *
633: * Is this document tagged standalone?
634: *
635: * Returns 1 if true
636: */
637: typedef int (*isStandaloneSAXFunc) (void *ctx);
638: /**
639: * hasInternalSubsetSAXFunc:
640: * @ctx: the user data (XML parser context)
641: *
642: * Does this document has an internal subset.
643: *
644: * Returns 1 if true
645: */
646: typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
647:
648: /**
649: * hasExternalSubsetSAXFunc:
650: * @ctx: the user data (XML parser context)
651: *
652: * Does this document has an external subset?
653: *
654: * Returns 1 if true
655: */
656: typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
657:
658: /************************************************************************
659: * *
660: * The SAX version 2 API extensions *
661: * *
662: ************************************************************************/
663: /**
664: * XML_SAX2_MAGIC:
665: *
666: * Special constant found in SAX2 blocks initialized fields
667: */
668: #define XML_SAX2_MAGIC 0xDEEDBEAF
669:
670: /**
671: * startElementNsSAX2Func:
672: * @ctx: the user data (XML parser context)
673: * @localname: the local name of the element
674: * @prefix: the element namespace prefix if available
675: * @URI: the element namespace name if available
676: * @nb_namespaces: number of namespace definitions on that node
677: * @namespaces: pointer to the array of prefix/URI pairs namespace definitions
678: * @nb_attributes: the number of attributes on that node
679: * @nb_defaulted: the number of defaulted attributes. The defaulted
680: * ones are at the end of the array
681: * @attributes: pointer to the array of (localname/prefix/URI/value/end)
682: * attribute values.
683: *
684: * SAX2 callback when an element start has been detected by the parser.
685: * It provides the namespace informations for the element, as well as
686: * the new namespace declarations on the element.
687: */
688:
689: typedef void (*startElementNsSAX2Func) (void *ctx,
690: const xmlChar *localname,
691: const xmlChar *prefix,
692: const xmlChar *URI,
693: int nb_namespaces,
694: const xmlChar **namespaces,
695: int nb_attributes,
696: int nb_defaulted,
697: const xmlChar **attributes);
698:
699: /**
700: * endElementNsSAX2Func:
701: * @ctx: the user data (XML parser context)
702: * @localname: the local name of the element
703: * @prefix: the element namespace prefix if available
704: * @URI: the element namespace name if available
705: *
706: * SAX2 callback when an element end has been detected by the parser.
707: * It provides the namespace informations for the element.
708: */
709:
710: typedef void (*endElementNsSAX2Func) (void *ctx,
711: const xmlChar *localname,
712: const xmlChar *prefix,
713: const xmlChar *URI);
714:
715:
716: struct _xmlSAXHandler {
717: internalSubsetSAXFunc internalSubset;
718: isStandaloneSAXFunc isStandalone;
719: hasInternalSubsetSAXFunc hasInternalSubset;
720: hasExternalSubsetSAXFunc hasExternalSubset;
721: resolveEntitySAXFunc resolveEntity;
722: getEntitySAXFunc getEntity;
723: entityDeclSAXFunc entityDecl;
724: notationDeclSAXFunc notationDecl;
725: attributeDeclSAXFunc attributeDecl;
726: elementDeclSAXFunc elementDecl;
727: unparsedEntityDeclSAXFunc unparsedEntityDecl;
728: setDocumentLocatorSAXFunc setDocumentLocator;
729: startDocumentSAXFunc startDocument;
730: endDocumentSAXFunc endDocument;
731: startElementSAXFunc startElement;
732: endElementSAXFunc endElement;
733: referenceSAXFunc reference;
734: charactersSAXFunc characters;
735: ignorableWhitespaceSAXFunc ignorableWhitespace;
736: processingInstructionSAXFunc processingInstruction;
737: commentSAXFunc comment;
738: warningSAXFunc warning;
739: errorSAXFunc error;
740: fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
741: getParameterEntitySAXFunc getParameterEntity;
742: cdataBlockSAXFunc cdataBlock;
743: externalSubsetSAXFunc externalSubset;
744: unsigned int initialized;
745: /* The following fields are extensions available only on version 2 */
746: void *_private;
747: startElementNsSAX2Func startElementNs;
748: endElementNsSAX2Func endElementNs;
749: xmlStructuredErrorFunc serror;
750: };
751:
752: /*
753: * SAX Version 1
754: */
755: typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
756: typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
757: struct _xmlSAXHandlerV1 {
758: internalSubsetSAXFunc internalSubset;
759: isStandaloneSAXFunc isStandalone;
760: hasInternalSubsetSAXFunc hasInternalSubset;
761: hasExternalSubsetSAXFunc hasExternalSubset;
762: resolveEntitySAXFunc resolveEntity;
763: getEntitySAXFunc getEntity;
764: entityDeclSAXFunc entityDecl;
765: notationDeclSAXFunc notationDecl;
766: attributeDeclSAXFunc attributeDecl;
767: elementDeclSAXFunc elementDecl;
768: unparsedEntityDeclSAXFunc unparsedEntityDecl;
769: setDocumentLocatorSAXFunc setDocumentLocator;
770: startDocumentSAXFunc startDocument;
771: endDocumentSAXFunc endDocument;
772: startElementSAXFunc startElement;
773: endElementSAXFunc endElement;
774: referenceSAXFunc reference;
775: charactersSAXFunc characters;
776: ignorableWhitespaceSAXFunc ignorableWhitespace;
777: processingInstructionSAXFunc processingInstruction;
778: commentSAXFunc comment;
779: warningSAXFunc warning;
780: errorSAXFunc error;
781: fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
782: getParameterEntitySAXFunc getParameterEntity;
783: cdataBlockSAXFunc cdataBlock;
784: externalSubsetSAXFunc externalSubset;
785: unsigned int initialized;
786: };
787:
788:
789: /**
790: * xmlExternalEntityLoader:
791: * @URL: The System ID of the resource requested
792: * @ID: The Public ID of the resource requested
793: * @context: the XML parser context
794: *
795: * External entity loaders types.
796: *
797: * Returns the entity input parser.
798: */
799: typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
800: const char *ID,
801: xmlParserCtxtPtr context);
802:
803: #ifdef __cplusplus
804: }
805: #endif
806:
807: #include <libxml/encoding.h>
808: #include <libxml/xmlIO.h>
809: #include <libxml/globals.h>
810:
811: #ifdef __cplusplus
812: extern "C" {
813: #endif
814:
815:
816: /*
817: * Init/Cleanup
818: */
819: XMLPUBFUN void XMLCALL
820: xmlInitParser (void);
821: XMLPUBFUN void XMLCALL
822: xmlCleanupParser (void);
823:
824: /*
825: * Input functions
826: */
827: XMLPUBFUN int XMLCALL
828: xmlParserInputRead (xmlParserInputPtr in,
829: int len);
830: XMLPUBFUN int XMLCALL
831: xmlParserInputGrow (xmlParserInputPtr in,
832: int len);
833:
834: /*
835: * Basic parsing Interfaces
836: */
837: #ifdef LIBXML_SAX1_ENABLED
838: XMLPUBFUN xmlDocPtr XMLCALL
839: xmlParseDoc (const xmlChar *cur);
840: XMLPUBFUN xmlDocPtr XMLCALL
841: xmlParseFile (const char *filename);
842: XMLPUBFUN xmlDocPtr XMLCALL
843: xmlParseMemory (const char *buffer,
844: int size);
845: #endif /* LIBXML_SAX1_ENABLED */
846: XMLPUBFUN int XMLCALL
847: xmlSubstituteEntitiesDefault(int val);
848: XMLPUBFUN int XMLCALL
849: xmlKeepBlanksDefault (int val);
850: XMLPUBFUN void XMLCALL
851: xmlStopParser (xmlParserCtxtPtr ctxt);
852: XMLPUBFUN int XMLCALL
853: xmlPedanticParserDefault(int val);
854: XMLPUBFUN int XMLCALL
855: xmlLineNumbersDefault (int val);
856:
857: #ifdef LIBXML_SAX1_ENABLED
858: /*
859: * Recovery mode
860: */
861: XMLPUBFUN xmlDocPtr XMLCALL
862: xmlRecoverDoc (const xmlChar *cur);
863: XMLPUBFUN xmlDocPtr XMLCALL
864: xmlRecoverMemory (const char *buffer,
865: int size);
866: XMLPUBFUN xmlDocPtr XMLCALL
867: xmlRecoverFile (const char *filename);
868: #endif /* LIBXML_SAX1_ENABLED */
869:
870: /*
871: * Less common routines and SAX interfaces
872: */
873: XMLPUBFUN int XMLCALL
874: xmlParseDocument (xmlParserCtxtPtr ctxt);
875: XMLPUBFUN int XMLCALL
876: xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
877: #ifdef LIBXML_SAX1_ENABLED
878: XMLPUBFUN int XMLCALL
879: xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
880: void *user_data,
881: const char *filename);
882: XMLPUBFUN int XMLCALL
883: xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
884: void *user_data,
885: const char *buffer,
886: int size);
887: XMLPUBFUN xmlDocPtr XMLCALL
888: xmlSAXParseDoc (xmlSAXHandlerPtr sax,
889: const xmlChar *cur,
890: int recovery);
891: XMLPUBFUN xmlDocPtr XMLCALL
892: xmlSAXParseMemory (xmlSAXHandlerPtr sax,
893: const char *buffer,
894: int size,
895: int recovery);
896: XMLPUBFUN xmlDocPtr XMLCALL
897: xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
898: const char *buffer,
899: int size,
900: int recovery,
901: void *data);
902: XMLPUBFUN xmlDocPtr XMLCALL
903: xmlSAXParseFile (xmlSAXHandlerPtr sax,
904: const char *filename,
905: int recovery);
906: XMLPUBFUN xmlDocPtr XMLCALL
907: xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
908: const char *filename,
909: int recovery,
910: void *data);
911: XMLPUBFUN xmlDocPtr XMLCALL
912: xmlSAXParseEntity (xmlSAXHandlerPtr sax,
913: const char *filename);
914: XMLPUBFUN xmlDocPtr XMLCALL
915: xmlParseEntity (const char *filename);
916: #endif /* LIBXML_SAX1_ENABLED */
917:
918: #ifdef LIBXML_VALID_ENABLED
919: XMLPUBFUN xmlDtdPtr XMLCALL
920: xmlSAXParseDTD (xmlSAXHandlerPtr sax,
921: const xmlChar *ExternalID,
922: const xmlChar *SystemID);
923: XMLPUBFUN xmlDtdPtr XMLCALL
924: xmlParseDTD (const xmlChar *ExternalID,
925: const xmlChar *SystemID);
926: XMLPUBFUN xmlDtdPtr XMLCALL
927: xmlIOParseDTD (xmlSAXHandlerPtr sax,
928: xmlParserInputBufferPtr input,
929: xmlCharEncoding enc);
930: #endif /* LIBXML_VALID_ENABLE */
931: #ifdef LIBXML_SAX1_ENABLED
932: XMLPUBFUN int XMLCALL
933: xmlParseBalancedChunkMemory(xmlDocPtr doc,
934: xmlSAXHandlerPtr sax,
935: void *user_data,
936: int depth,
937: const xmlChar *string,
938: xmlNodePtr *lst);
939: #endif /* LIBXML_SAX1_ENABLED */
940: XMLPUBFUN xmlParserErrors XMLCALL
941: xmlParseInNodeContext (xmlNodePtr node,
942: const char *data,
943: int datalen,
944: int options,
945: xmlNodePtr *lst);
946: #ifdef LIBXML_SAX1_ENABLED
947: XMLPUBFUN int XMLCALL
948: xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
949: xmlSAXHandlerPtr sax,
950: void *user_data,
951: int depth,
952: const xmlChar *string,
953: xmlNodePtr *lst,
954: int recover);
955: XMLPUBFUN int XMLCALL
956: xmlParseExternalEntity (xmlDocPtr doc,
957: xmlSAXHandlerPtr sax,
958: void *user_data,
959: int depth,
960: const xmlChar *URL,
961: const xmlChar *ID,
962: xmlNodePtr *lst);
963: #endif /* LIBXML_SAX1_ENABLED */
964: XMLPUBFUN int XMLCALL
965: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
966: const xmlChar *URL,
967: const xmlChar *ID,
968: xmlNodePtr *lst);
969:
970: /*
971: * Parser contexts handling.
972: */
973: XMLPUBFUN xmlParserCtxtPtr XMLCALL
974: xmlNewParserCtxt (void);
975: XMLPUBFUN int XMLCALL
976: xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
977: XMLPUBFUN void XMLCALL
978: xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
979: XMLPUBFUN void XMLCALL
980: xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
981: #ifdef LIBXML_SAX1_ENABLED
982: XMLPUBFUN void XMLCALL
983: xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
984: const xmlChar* buffer,
985: const char *filename);
986: #endif /* LIBXML_SAX1_ENABLED */
987: XMLPUBFUN xmlParserCtxtPtr XMLCALL
988: xmlCreateDocParserCtxt (const xmlChar *cur);
989:
990: #ifdef LIBXML_LEGACY_ENABLED
991: /*
992: * Reading/setting optional parsing features.
993: */
994: XMLPUBFUN int XMLCALL
995: xmlGetFeaturesList (int *len,
996: const char **result);
997: XMLPUBFUN int XMLCALL
998: xmlGetFeature (xmlParserCtxtPtr ctxt,
999: const char *name,
1000: void *result);
1001: XMLPUBFUN int XMLCALL
1002: xmlSetFeature (xmlParserCtxtPtr ctxt,
1003: const char *name,
1004: void *value);
1005: #endif /* LIBXML_LEGACY_ENABLED */
1006:
1007: #ifdef LIBXML_PUSH_ENABLED
1008: /*
1009: * Interfaces for the Push mode.
1010: */
1011: XMLPUBFUN xmlParserCtxtPtr XMLCALL
1012: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1013: void *user_data,
1014: const char *chunk,
1015: int size,
1016: const char *filename);
1017: XMLPUBFUN int XMLCALL
1018: xmlParseChunk (xmlParserCtxtPtr ctxt,
1019: const char *chunk,
1020: int size,
1021: int terminate);
1022: #endif /* LIBXML_PUSH_ENABLED */
1023:
1024: /*
1025: * Special I/O mode.
1026: */
1027:
1028: XMLPUBFUN xmlParserCtxtPtr XMLCALL
1029: xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
1030: void *user_data,
1031: xmlInputReadCallback ioread,
1032: xmlInputCloseCallback ioclose,
1033: void *ioctx,
1034: xmlCharEncoding enc);
1035:
1036: XMLPUBFUN xmlParserInputPtr XMLCALL
1037: xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
1038: xmlParserInputBufferPtr input,
1039: xmlCharEncoding enc);
1040:
1041: /*
1042: * Node infos.
1043: */
1044: XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1045: xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
1046: const xmlNodePtr node);
1047: XMLPUBFUN void XMLCALL
1048: xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1049: XMLPUBFUN void XMLCALL
1050: xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1051: XMLPUBFUN unsigned long XMLCALL
1052: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1053: const xmlNodePtr node);
1054: XMLPUBFUN void XMLCALL
1055: xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
1056: const xmlParserNodeInfoPtr info);
1057:
1058: /*
1059: * External entities handling actually implemented in xmlIO.
1060: */
1061:
1062: XMLPUBFUN void XMLCALL
1063: xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1064: XMLPUBFUN xmlExternalEntityLoader XMLCALL
1065: xmlGetExternalEntityLoader(void);
1066: XMLPUBFUN xmlParserInputPtr XMLCALL
1067: xmlLoadExternalEntity (const char *URL,
1068: const char *ID,
1069: xmlParserCtxtPtr ctxt);
1070:
1071: /*
1072: * Index lookup, actually implemented in the encoding module
1073: */
1074: XMLPUBFUN long XMLCALL
1075: xmlByteConsumed (xmlParserCtxtPtr ctxt);
1076:
1077: /*
1078: * New set of simpler/more flexible APIs
1079: */
1080: /**
1081: * xmlParserOption:
1082: *
1083: * This is the set of XML parser options that can be passed down
1084: * to the xmlReadDoc() and similar calls.
1085: */
1086: typedef enum {
1087: XML_PARSE_RECOVER = 1<<0, /* recover on errors */
1088: XML_PARSE_NOENT = 1<<1, /* substitute entities */
1089: XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */
1090: XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */
1091: XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */
1092: XML_PARSE_NOERROR = 1<<5, /* suppress error reports */
1093: XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */
1094: XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
1095: XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
1096: XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
1097: XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
1098: XML_PARSE_NONET = 1<<11,/* Forbid network access */
1099: XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
1100: XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
1101: XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */
1102: XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1103: XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of
1104: the tree allowed afterwards (will possibly
1105: crash if you try to modify the tree) */
1106: XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */
1107: XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
1108: XML_PARSE_HUGE = 1<<19, /* relax any hardcoded limit from the parser */
1109: XML_PARSE_OLDSAX = 1<<20 /* parse using SAX2 interface from before 2.7.0 */
1110: } xmlParserOption;
1111:
1112: XMLPUBFUN void XMLCALL
1113: xmlCtxtReset (xmlParserCtxtPtr ctxt);
1114: XMLPUBFUN int XMLCALL
1115: xmlCtxtResetPush (xmlParserCtxtPtr ctxt,
1116: const char *chunk,
1117: int size,
1118: const char *filename,
1119: const char *encoding);
1120: XMLPUBFUN int XMLCALL
1121: xmlCtxtUseOptions (xmlParserCtxtPtr ctxt,
1122: int options);
1123: XMLPUBFUN xmlDocPtr XMLCALL
1124: xmlReadDoc (const xmlChar *cur,
1125: const char *URL,
1126: const char *encoding,
1127: int options);
1128: XMLPUBFUN xmlDocPtr XMLCALL
1129: xmlReadFile (const char *URL,
1130: const char *encoding,
1131: int options);
1132: XMLPUBFUN xmlDocPtr XMLCALL
1133: xmlReadMemory (const char *buffer,
1134: int size,
1135: const char *URL,
1136: const char *encoding,
1137: int options);
1138: XMLPUBFUN xmlDocPtr XMLCALL
1139: xmlReadFd (int fd,
1140: const char *URL,
1141: const char *encoding,
1142: int options);
1143: XMLPUBFUN xmlDocPtr XMLCALL
1144: xmlReadIO (xmlInputReadCallback ioread,
1145: xmlInputCloseCallback ioclose,
1146: void *ioctx,
1147: const char *URL,
1148: const char *encoding,
1149: int options);
1150: XMLPUBFUN xmlDocPtr XMLCALL
1151: xmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
1152: const xmlChar *cur,
1153: const char *URL,
1154: const char *encoding,
1155: int options);
1156: XMLPUBFUN xmlDocPtr XMLCALL
1157: xmlCtxtReadFile (xmlParserCtxtPtr ctxt,
1158: const char *filename,
1159: const char *encoding,
1160: int options);
1161: XMLPUBFUN xmlDocPtr XMLCALL
1162: xmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
1163: const char *buffer,
1164: int size,
1165: const char *URL,
1166: const char *encoding,
1167: int options);
1168: XMLPUBFUN xmlDocPtr XMLCALL
1169: xmlCtxtReadFd (xmlParserCtxtPtr ctxt,
1170: int fd,
1171: const char *URL,
1172: const char *encoding,
1173: int options);
1174: XMLPUBFUN xmlDocPtr XMLCALL
1175: xmlCtxtReadIO (xmlParserCtxtPtr ctxt,
1176: xmlInputReadCallback ioread,
1177: xmlInputCloseCallback ioclose,
1178: void *ioctx,
1179: const char *URL,
1180: const char *encoding,
1181: int options);
1182:
1183: /*
1184: * Library wide options
1185: */
1186: /**
1187: * xmlFeature:
1188: *
1189: * Used to examine the existance of features that can be enabled
1190: * or disabled at compile-time.
1191: * They used to be called XML_FEATURE_xxx but this clashed with Expat
1192: */
1193: typedef enum {
1194: XML_WITH_THREAD = 1,
1195: XML_WITH_TREE = 2,
1196: XML_WITH_OUTPUT = 3,
1197: XML_WITH_PUSH = 4,
1198: XML_WITH_READER = 5,
1199: XML_WITH_PATTERN = 6,
1200: XML_WITH_WRITER = 7,
1201: XML_WITH_SAX1 = 8,
1202: XML_WITH_FTP = 9,
1203: XML_WITH_HTTP = 10,
1204: XML_WITH_VALID = 11,
1205: XML_WITH_HTML = 12,
1206: XML_WITH_LEGACY = 13,
1207: XML_WITH_C14N = 14,
1208: XML_WITH_CATALOG = 15,
1209: XML_WITH_XPATH = 16,
1210: XML_WITH_XPTR = 17,
1211: XML_WITH_XINCLUDE = 18,
1212: XML_WITH_ICONV = 19,
1213: XML_WITH_ISO8859X = 20,
1214: XML_WITH_UNICODE = 21,
1215: XML_WITH_REGEXP = 22,
1216: XML_WITH_AUTOMATA = 23,
1217: XML_WITH_EXPR = 24,
1218: XML_WITH_SCHEMAS = 25,
1219: XML_WITH_SCHEMATRON = 26,
1220: XML_WITH_MODULES = 27,
1221: XML_WITH_DEBUG = 28,
1222: XML_WITH_DEBUG_MEM = 29,
1223: XML_WITH_DEBUG_RUN = 30,
1224: XML_WITH_ZLIB = 31,
1225: XML_WITH_ICU = 32,
1226: XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1227: } xmlFeature;
1228:
1229: XMLPUBFUN int XMLCALL
1230: xmlHasFeature (xmlFeature feature);
1231:
1232: #ifdef __cplusplus
1233: }
1234: #endif
1235: #endif /* __XML_PARSER_H__ */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>