Annotation of embedaddon/libxml2/include/libxml/HTMLparser.h, revision 1.1
1.1 ! misho 1: /*
! 2: * Summary: interface for an HTML 4.0 non-verifying parser
! 3: * Description: this module implements an HTML 4.0 non-verifying parser
! 4: * with API compatible with the XML parser ones. It should
! 5: * be able to parse "real world" HTML, even if severely
! 6: * broken from a specification point of view.
! 7: *
! 8: * Copy: See Copyright for the status of this software.
! 9: *
! 10: * Author: Daniel Veillard
! 11: */
! 12:
! 13: #ifndef __HTML_PARSER_H__
! 14: #define __HTML_PARSER_H__
! 15: #include <libxml/xmlversion.h>
! 16: #include <libxml/parser.h>
! 17:
! 18: #ifdef LIBXML_HTML_ENABLED
! 19:
! 20: #ifdef __cplusplus
! 21: extern "C" {
! 22: #endif
! 23:
! 24: /*
! 25: * Most of the back-end structures from XML and HTML are shared.
! 26: */
! 27: typedef xmlParserCtxt htmlParserCtxt;
! 28: typedef xmlParserCtxtPtr htmlParserCtxtPtr;
! 29: typedef xmlParserNodeInfo htmlParserNodeInfo;
! 30: typedef xmlSAXHandler htmlSAXHandler;
! 31: typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
! 32: typedef xmlParserInput htmlParserInput;
! 33: typedef xmlParserInputPtr htmlParserInputPtr;
! 34: typedef xmlDocPtr htmlDocPtr;
! 35: typedef xmlNodePtr htmlNodePtr;
! 36:
! 37: /*
! 38: * Internal description of an HTML element, representing HTML 4.01
! 39: * and XHTML 1.0 (which share the same structure).
! 40: */
! 41: typedef struct _htmlElemDesc htmlElemDesc;
! 42: typedef htmlElemDesc *htmlElemDescPtr;
! 43: struct _htmlElemDesc {
! 44: const char *name; /* The tag name */
! 45: char startTag; /* Whether the start tag can be implied */
! 46: char endTag; /* Whether the end tag can be implied */
! 47: char saveEndTag; /* Whether the end tag should be saved */
! 48: char empty; /* Is this an empty element ? */
! 49: char depr; /* Is this a deprecated element ? */
! 50: char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
! 51: char isinline; /* is this a block 0 or inline 1 element */
! 52: const char *desc; /* the description */
! 53:
! 54: /* NRK Jan.2003
! 55: * New fields encapsulating HTML structure
! 56: *
! 57: * Bugs:
! 58: * This is a very limited representation. It fails to tell us when
! 59: * an element *requires* subelements (we only have whether they're
! 60: * allowed or not), and it doesn't tell us where CDATA and PCDATA
! 61: * are allowed. Some element relationships are not fully represented:
! 62: * these are flagged with the word MODIFIER
! 63: */
! 64: const char** subelts; /* allowed sub-elements of this element */
! 65: const char* defaultsubelt; /* subelement for suggested auto-repair
! 66: if necessary or NULL */
! 67: const char** attrs_opt; /* Optional Attributes */
! 68: const char** attrs_depr; /* Additional deprecated attributes */
! 69: const char** attrs_req; /* Required attributes */
! 70: };
! 71:
! 72: /*
! 73: * Internal description of an HTML entity.
! 74: */
! 75: typedef struct _htmlEntityDesc htmlEntityDesc;
! 76: typedef htmlEntityDesc *htmlEntityDescPtr;
! 77: struct _htmlEntityDesc {
! 78: unsigned int value; /* the UNICODE value for the character */
! 79: const char *name; /* The entity name */
! 80: const char *desc; /* the description */
! 81: };
! 82:
! 83: /*
! 84: * There is only few public functions.
! 85: */
! 86: XMLPUBFUN const htmlElemDesc * XMLCALL
! 87: htmlTagLookup (const xmlChar *tag);
! 88: XMLPUBFUN const htmlEntityDesc * XMLCALL
! 89: htmlEntityLookup(const xmlChar *name);
! 90: XMLPUBFUN const htmlEntityDesc * XMLCALL
! 91: htmlEntityValueLookup(unsigned int value);
! 92:
! 93: XMLPUBFUN int XMLCALL
! 94: htmlIsAutoClosed(htmlDocPtr doc,
! 95: htmlNodePtr elem);
! 96: XMLPUBFUN int XMLCALL
! 97: htmlAutoCloseTag(htmlDocPtr doc,
! 98: const xmlChar *name,
! 99: htmlNodePtr elem);
! 100: XMLPUBFUN const htmlEntityDesc * XMLCALL
! 101: htmlParseEntityRef(htmlParserCtxtPtr ctxt,
! 102: const xmlChar **str);
! 103: XMLPUBFUN int XMLCALL
! 104: htmlParseCharRef(htmlParserCtxtPtr ctxt);
! 105: XMLPUBFUN void XMLCALL
! 106: htmlParseElement(htmlParserCtxtPtr ctxt);
! 107:
! 108: XMLPUBFUN htmlParserCtxtPtr XMLCALL
! 109: htmlNewParserCtxt(void);
! 110:
! 111: XMLPUBFUN htmlParserCtxtPtr XMLCALL
! 112: htmlCreateMemoryParserCtxt(const char *buffer,
! 113: int size);
! 114:
! 115: XMLPUBFUN int XMLCALL
! 116: htmlParseDocument(htmlParserCtxtPtr ctxt);
! 117: XMLPUBFUN htmlDocPtr XMLCALL
! 118: htmlSAXParseDoc (xmlChar *cur,
! 119: const char *encoding,
! 120: htmlSAXHandlerPtr sax,
! 121: void *userData);
! 122: XMLPUBFUN htmlDocPtr XMLCALL
! 123: htmlParseDoc (xmlChar *cur,
! 124: const char *encoding);
! 125: XMLPUBFUN htmlDocPtr XMLCALL
! 126: htmlSAXParseFile(const char *filename,
! 127: const char *encoding,
! 128: htmlSAXHandlerPtr sax,
! 129: void *userData);
! 130: XMLPUBFUN htmlDocPtr XMLCALL
! 131: htmlParseFile (const char *filename,
! 132: const char *encoding);
! 133: XMLPUBFUN int XMLCALL
! 134: UTF8ToHtml (unsigned char *out,
! 135: int *outlen,
! 136: const unsigned char *in,
! 137: int *inlen);
! 138: XMLPUBFUN int XMLCALL
! 139: htmlEncodeEntities(unsigned char *out,
! 140: int *outlen,
! 141: const unsigned char *in,
! 142: int *inlen, int quoteChar);
! 143: XMLPUBFUN int XMLCALL
! 144: htmlIsScriptAttribute(const xmlChar *name);
! 145: XMLPUBFUN int XMLCALL
! 146: htmlHandleOmittedElem(int val);
! 147:
! 148: #ifdef LIBXML_PUSH_ENABLED
! 149: /**
! 150: * Interfaces for the Push mode.
! 151: */
! 152: XMLPUBFUN htmlParserCtxtPtr XMLCALL
! 153: htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
! 154: void *user_data,
! 155: const char *chunk,
! 156: int size,
! 157: const char *filename,
! 158: xmlCharEncoding enc);
! 159: XMLPUBFUN int XMLCALL
! 160: htmlParseChunk (htmlParserCtxtPtr ctxt,
! 161: const char *chunk,
! 162: int size,
! 163: int terminate);
! 164: #endif /* LIBXML_PUSH_ENABLED */
! 165:
! 166: XMLPUBFUN void XMLCALL
! 167: htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
! 168:
! 169: /*
! 170: * New set of simpler/more flexible APIs
! 171: */
! 172: /**
! 173: * xmlParserOption:
! 174: *
! 175: * This is the set of XML parser options that can be passed down
! 176: * to the xmlReadDoc() and similar calls.
! 177: */
! 178: typedef enum {
! 179: HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
! 180: HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
! 181: HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
! 182: HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
! 183: HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
! 184: HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
! 185: HTML_PARSE_NONET = 1<<11,/* Forbid network access */
! 186: HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
! 187: HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
! 188: } htmlParserOption;
! 189:
! 190: XMLPUBFUN void XMLCALL
! 191: htmlCtxtReset (htmlParserCtxtPtr ctxt);
! 192: XMLPUBFUN int XMLCALL
! 193: htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
! 194: int options);
! 195: XMLPUBFUN htmlDocPtr XMLCALL
! 196: htmlReadDoc (const xmlChar *cur,
! 197: const char *URL,
! 198: const char *encoding,
! 199: int options);
! 200: XMLPUBFUN htmlDocPtr XMLCALL
! 201: htmlReadFile (const char *URL,
! 202: const char *encoding,
! 203: int options);
! 204: XMLPUBFUN htmlDocPtr XMLCALL
! 205: htmlReadMemory (const char *buffer,
! 206: int size,
! 207: const char *URL,
! 208: const char *encoding,
! 209: int options);
! 210: XMLPUBFUN htmlDocPtr XMLCALL
! 211: htmlReadFd (int fd,
! 212: const char *URL,
! 213: const char *encoding,
! 214: int options);
! 215: XMLPUBFUN htmlDocPtr XMLCALL
! 216: htmlReadIO (xmlInputReadCallback ioread,
! 217: xmlInputCloseCallback ioclose,
! 218: void *ioctx,
! 219: const char *URL,
! 220: const char *encoding,
! 221: int options);
! 222: XMLPUBFUN htmlDocPtr XMLCALL
! 223: htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
! 224: const xmlChar *cur,
! 225: const char *URL,
! 226: const char *encoding,
! 227: int options);
! 228: XMLPUBFUN htmlDocPtr XMLCALL
! 229: htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
! 230: const char *filename,
! 231: const char *encoding,
! 232: int options);
! 233: XMLPUBFUN htmlDocPtr XMLCALL
! 234: htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
! 235: const char *buffer,
! 236: int size,
! 237: const char *URL,
! 238: const char *encoding,
! 239: int options);
! 240: XMLPUBFUN htmlDocPtr XMLCALL
! 241: htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
! 242: int fd,
! 243: const char *URL,
! 244: const char *encoding,
! 245: int options);
! 246: XMLPUBFUN htmlDocPtr XMLCALL
! 247: htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
! 248: xmlInputReadCallback ioread,
! 249: xmlInputCloseCallback ioclose,
! 250: void *ioctx,
! 251: const char *URL,
! 252: const char *encoding,
! 253: int options);
! 254:
! 255: /* NRK/Jan2003: further knowledge of HTML structure
! 256: */
! 257: typedef enum {
! 258: HTML_NA = 0 , /* something we don't check at all */
! 259: HTML_INVALID = 0x1 ,
! 260: HTML_DEPRECATED = 0x2 ,
! 261: HTML_VALID = 0x4 ,
! 262: HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
! 263: } htmlStatus ;
! 264:
! 265: /* Using htmlElemDesc rather than name here, to emphasise the fact
! 266: that otherwise there's a lookup overhead
! 267: */
! 268: XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
! 269: XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
! 270: XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
! 271: XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
! 272: /**
! 273: * htmlDefaultSubelement:
! 274: * @elt: HTML element
! 275: *
! 276: * Returns the default subelement for this element
! 277: */
! 278: #define htmlDefaultSubelement(elt) elt->defaultsubelt
! 279: /**
! 280: * htmlElementAllowedHereDesc:
! 281: * @parent: HTML parent element
! 282: * @elt: HTML element
! 283: *
! 284: * Checks whether an HTML element description may be a
! 285: * direct child of the specified element.
! 286: *
! 287: * Returns 1 if allowed; 0 otherwise.
! 288: */
! 289: #define htmlElementAllowedHereDesc(parent,elt) \
! 290: htmlElementAllowedHere((parent), (elt)->name)
! 291: /**
! 292: * htmlRequiredAttrs:
! 293: * @elt: HTML element
! 294: *
! 295: * Returns the attributes required for the specified element.
! 296: */
! 297: #define htmlRequiredAttrs(elt) (elt)->attrs_req
! 298:
! 299:
! 300: #ifdef __cplusplus
! 301: }
! 302: #endif
! 303:
! 304: #endif /* LIBXML_HTML_ENABLED */
! 305: #endif /* __HTML_PARSER_H__ */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>