File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / testHTML.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:57 2012 UTC (12 years, 4 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, v2_7_8, HEAD
libxml2

    1: /*
    2:  * testHTML.c : a small tester program for HTML input.
    3:  *
    4:  * See Copyright for the status of this software.
    5:  *
    6:  * daniel@veillard.com
    7:  */
    8: 
    9: #include "libxml.h"
   10: 
   11: #ifdef LIBXML_HTML_ENABLED
   12: 
   13: #include <string.h>
   14: #include <stdarg.h>
   15: 
   16: 
   17: #ifdef HAVE_SYS_TYPES_H
   18: #include <sys/types.h>
   19: #endif
   20: #ifdef HAVE_SYS_STAT_H
   21: #include <sys/stat.h>
   22: #endif
   23: #ifdef HAVE_FCNTL_H
   24: #include <fcntl.h>
   25: #endif
   26: #ifdef HAVE_UNISTD_H
   27: #include <unistd.h>
   28: #endif
   29: #ifdef HAVE_STDLIB_H
   30: #include <stdlib.h>
   31: #endif
   32: 
   33: #include <libxml/xmlmemory.h>
   34: #include <libxml/HTMLparser.h>
   35: #include <libxml/HTMLtree.h>
   36: #include <libxml/debugXML.h>
   37: #include <libxml/xmlerror.h>
   38: #include <libxml/globals.h>
   39: 
   40: #ifdef LIBXML_DEBUG_ENABLED
   41: static int debug = 0;
   42: #endif
   43: static int copy = 0;
   44: static int sax = 0;
   45: static int repeat = 0;
   46: static int noout = 0;
   47: #ifdef LIBXML_PUSH_ENABLED
   48: static int push = 0;
   49: #endif /* LIBXML_PUSH_ENABLED */
   50: static char *encoding = NULL;
   51: static int options = 0;
   52: 
   53: static xmlSAXHandler emptySAXHandlerStruct = {
   54:     NULL, /* internalSubset */
   55:     NULL, /* isStandalone */
   56:     NULL, /* hasInternalSubset */
   57:     NULL, /* hasExternalSubset */
   58:     NULL, /* resolveEntity */
   59:     NULL, /* getEntity */
   60:     NULL, /* entityDecl */
   61:     NULL, /* notationDecl */
   62:     NULL, /* attributeDecl */
   63:     NULL, /* elementDecl */
   64:     NULL, /* unparsedEntityDecl */
   65:     NULL, /* setDocumentLocator */
   66:     NULL, /* startDocument */
   67:     NULL, /* endDocument */
   68:     NULL, /* startElement */
   69:     NULL, /* endElement */
   70:     NULL, /* reference */
   71:     NULL, /* characters */
   72:     NULL, /* ignorableWhitespace */
   73:     NULL, /* processingInstruction */
   74:     NULL, /* comment */
   75:     NULL, /* xmlParserWarning */
   76:     NULL, /* xmlParserError */
   77:     NULL, /* xmlParserError */
   78:     NULL, /* getParameterEntity */
   79:     NULL, /* cdataBlock */
   80:     NULL, /* externalSubset */
   81:     1,    /* initialized */
   82:     NULL, /* private */
   83:     NULL, /* startElementNsSAX2Func */
   84:     NULL, /* endElementNsSAX2Func */
   85:     NULL  /* xmlStructuredErrorFunc */
   86: };
   87: 
   88: static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
   89: extern xmlSAXHandlerPtr debugSAXHandler;
   90: 
   91: /************************************************************************
   92:  *									*
   93:  *				Debug Handlers				*
   94:  *									*
   95:  ************************************************************************/
   96: 
   97: /**
   98:  * isStandaloneDebug:
   99:  * @ctxt:  An XML parser context
  100:  *
  101:  * Is this document tagged standalone ?
  102:  *
  103:  * Returns 1 if true
  104:  */
  105: static int
  106: isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
  107: {
  108:     fprintf(stdout, "SAX.isStandalone()\n");
  109:     return(0);
  110: }
  111: 
  112: /**
  113:  * hasInternalSubsetDebug:
  114:  * @ctxt:  An XML parser context
  115:  *
  116:  * Does this document has an internal subset
  117:  *
  118:  * Returns 1 if true
  119:  */
  120: static int
  121: hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
  122: {
  123:     fprintf(stdout, "SAX.hasInternalSubset()\n");
  124:     return(0);
  125: }
  126: 
  127: /**
  128:  * hasExternalSubsetDebug:
  129:  * @ctxt:  An XML parser context
  130:  *
  131:  * Does this document has an external subset
  132:  *
  133:  * Returns 1 if true
  134:  */
  135: static int
  136: hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
  137: {
  138:     fprintf(stdout, "SAX.hasExternalSubset()\n");
  139:     return(0);
  140: }
  141: 
  142: /**
  143:  * hasInternalSubsetDebug:
  144:  * @ctxt:  An XML parser context
  145:  *
  146:  * Does this document has an internal subset
  147:  */
  148: static void
  149: internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
  150: 	       const xmlChar *ExternalID, const xmlChar *SystemID)
  151: {
  152:     fprintf(stdout, "SAX.internalSubset(%s,", name);
  153:     if (ExternalID == NULL)
  154: 	fprintf(stdout, " ,");
  155:     else
  156: 	fprintf(stdout, " %s,", ExternalID);
  157:     if (SystemID == NULL)
  158: 	fprintf(stdout, " )\n");
  159:     else
  160: 	fprintf(stdout, " %s)\n", SystemID);
  161: }
  162: 
  163: /**
  164:  * resolveEntityDebug:
  165:  * @ctxt:  An XML parser context
  166:  * @publicId: The public ID of the entity
  167:  * @systemId: The system ID of the entity
  168:  *
  169:  * Special entity resolver, better left to the parser, it has
  170:  * more context than the application layer.
  171:  * The default behaviour is to NOT resolve the entities, in that case
  172:  * the ENTITY_REF nodes are built in the structure (and the parameter
  173:  * values).
  174:  *
  175:  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
  176:  */
  177: static xmlParserInputPtr
  178: resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
  179: {
  180:     /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
  181: 
  182:     
  183:     fprintf(stdout, "SAX.resolveEntity(");
  184:     if (publicId != NULL)
  185: 	fprintf(stdout, "%s", (char *)publicId);
  186:     else
  187: 	fprintf(stdout, " ");
  188:     if (systemId != NULL)
  189: 	fprintf(stdout, ", %s)\n", (char *)systemId);
  190:     else
  191: 	fprintf(stdout, ", )\n");
  192: /*********
  193:     if (systemId != NULL) {
  194:         return(xmlNewInputFromFile(ctxt, (char *) systemId));
  195:     }
  196:  *********/
  197:     return(NULL);
  198: }
  199: 
  200: /**
  201:  * getEntityDebug:
  202:  * @ctxt:  An XML parser context
  203:  * @name: The entity name
  204:  *
  205:  * Get an entity by name
  206:  *
  207:  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
  208:  */
  209: static xmlEntityPtr
  210: getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
  211: {
  212:     fprintf(stdout, "SAX.getEntity(%s)\n", name);
  213:     return(NULL);
  214: }
  215: 
  216: /**
  217:  * getParameterEntityDebug:
  218:  * @ctxt:  An XML parser context
  219:  * @name: The entity name
  220:  *
  221:  * Get a parameter entity by name
  222:  *
  223:  * Returns the xmlParserInputPtr
  224:  */
  225: static xmlEntityPtr
  226: getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
  227: {
  228:     fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
  229:     return(NULL);
  230: }
  231: 
  232: 
  233: /**
  234:  * entityDeclDebug:
  235:  * @ctxt:  An XML parser context
  236:  * @name:  the entity name 
  237:  * @type:  the entity type 
  238:  * @publicId: The public ID of the entity
  239:  * @systemId: The system ID of the entity
  240:  * @content: the entity value (without processing).
  241:  *
  242:  * An entity definition has been parsed
  243:  */
  244: static void
  245: entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
  246:           const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
  247: {
  248:     fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
  249:             name, type, publicId, systemId, content);
  250: }
  251: 
  252: /**
  253:  * attributeDeclDebug:
  254:  * @ctxt:  An XML parser context
  255:  * @name:  the attribute name 
  256:  * @type:  the attribute type 
  257:  *
  258:  * An attribute definition has been parsed
  259:  */
  260: static void
  261: attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
  262:               int type, int def, const xmlChar *defaultValue,
  263: 	      xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
  264: {
  265:     fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
  266:             elem, name, type, def, defaultValue);
  267: }
  268: 
  269: /**
  270:  * elementDeclDebug:
  271:  * @ctxt:  An XML parser context
  272:  * @name:  the element name 
  273:  * @type:  the element type 
  274:  * @content: the element value (without processing).
  275:  *
  276:  * An element definition has been parsed
  277:  */
  278: static void
  279: elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
  280: 	    xmlElementContentPtr content ATTRIBUTE_UNUSED)
  281: {
  282:     fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
  283:             name, type);
  284: }
  285: 
  286: /**
  287:  * notationDeclDebug:
  288:  * @ctxt:  An XML parser context
  289:  * @name: The name of the notation
  290:  * @publicId: The public ID of the entity
  291:  * @systemId: The system ID of the entity
  292:  *
  293:  * What to do when a notation declaration has been parsed.
  294:  */
  295: static void
  296: notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
  297: 	     const xmlChar *publicId, const xmlChar *systemId)
  298: {
  299:     fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
  300:             (char *) name, (char *) publicId, (char *) systemId);
  301: }
  302: 
  303: /**
  304:  * unparsedEntityDeclDebug:
  305:  * @ctxt:  An XML parser context
  306:  * @name: The name of the entity
  307:  * @publicId: The public ID of the entity
  308:  * @systemId: The system ID of the entity
  309:  * @notationName: the name of the notation
  310:  *
  311:  * What to do when an unparsed entity declaration is parsed
  312:  */
  313: static void
  314: unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
  315: 		   const xmlChar *publicId, const xmlChar *systemId,
  316: 		   const xmlChar *notationName)
  317: {
  318:     fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
  319:             (char *) name, (char *) publicId, (char *) systemId,
  320: 	    (char *) notationName);
  321: }
  322: 
  323: /**
  324:  * setDocumentLocatorDebug:
  325:  * @ctxt:  An XML parser context
  326:  * @loc: A SAX Locator
  327:  *
  328:  * Receive the document locator at startup, actually xmlDefaultSAXLocator
  329:  * Everything is available on the context, so this is useless in our case.
  330:  */
  331: static void
  332: setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
  333: {
  334:     fprintf(stdout, "SAX.setDocumentLocator()\n");
  335: }
  336: 
  337: /**
  338:  * startDocumentDebug:
  339:  * @ctxt:  An XML parser context
  340:  *
  341:  * called when the document start being processed.
  342:  */
  343: static void
  344: startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
  345: {
  346:     fprintf(stdout, "SAX.startDocument()\n");
  347: }
  348: 
  349: /**
  350:  * endDocumentDebug:
  351:  * @ctxt:  An XML parser context
  352:  *
  353:  * called when the document end has been detected.
  354:  */
  355: static void
  356: endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
  357: {
  358:     fprintf(stdout, "SAX.endDocument()\n");
  359: }
  360: 
  361: /**
  362:  * startElementDebug:
  363:  * @ctxt:  An XML parser context
  364:  * @name:  The element name
  365:  *
  366:  * called when an opening tag has been processed.
  367:  */
  368: static void
  369: startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
  370: {
  371:     int i;
  372: 
  373:     fprintf(stdout, "SAX.startElement(%s", (char *) name);
  374:     if (atts != NULL) {
  375:         for (i = 0;(atts[i] != NULL);i++) {
  376: 	    fprintf(stdout, ", %s", atts[i++]);
  377: 	    if (atts[i] != NULL) {
  378: 		unsigned char output[40];
  379: 		const unsigned char *att = atts[i];
  380: 		int outlen, attlen;
  381: 	        fprintf(stdout, "='");
  382: 		while ((attlen = strlen((char*)att)) > 0) {
  383: 		    outlen = sizeof output - 1;
  384: 		    htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
  385: 		    output[outlen] = 0;
  386: 		    fprintf(stdout, "%s", (char *) output);
  387: 		    att += attlen;
  388: 		}
  389: 		fprintf(stdout, "'");
  390: 	    }
  391: 	}
  392:     }
  393:     fprintf(stdout, ")\n");
  394: }
  395: 
  396: /**
  397:  * endElementDebug:
  398:  * @ctxt:  An XML parser context
  399:  * @name:  The element name
  400:  *
  401:  * called when the end of an element has been detected.
  402:  */
  403: static void
  404: endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
  405: {
  406:     fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
  407: }
  408: 
  409: /**
  410:  * charactersDebug:
  411:  * @ctxt:  An XML parser context
  412:  * @ch:  a xmlChar string
  413:  * @len: the number of xmlChar
  414:  *
  415:  * receiving some chars from the parser.
  416:  * Question: how much at a time ???
  417:  */
  418: static void
  419: charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
  420: {
  421:     unsigned char output[40];
  422:     int inlen = len, outlen = 30;
  423: 
  424:     htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
  425:     output[outlen] = 0;
  426: 
  427:     fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
  428: }
  429: 
  430: /**
  431:  * cdataDebug:
  432:  * @ctxt:  An XML parser context
  433:  * @ch:  a xmlChar string
  434:  * @len: the number of xmlChar
  435:  *
  436:  * receiving some cdata chars from the parser.
  437:  * Question: how much at a time ???
  438:  */
  439: static void
  440: cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
  441: {
  442:     unsigned char output[40];
  443:     int inlen = len, outlen = 30;
  444: 
  445:     htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
  446:     output[outlen] = 0;
  447: 
  448:     fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
  449: }
  450: 
  451: /**
  452:  * referenceDebug:
  453:  * @ctxt:  An XML parser context
  454:  * @name:  The entity name
  455:  *
  456:  * called when an entity reference is detected. 
  457:  */
  458: static void
  459: referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
  460: {
  461:     fprintf(stdout, "SAX.reference(%s)\n", name);
  462: }
  463: 
  464: /**
  465:  * ignorableWhitespaceDebug:
  466:  * @ctxt:  An XML parser context
  467:  * @ch:  a xmlChar string
  468:  * @start: the first char in the string
  469:  * @len: the number of xmlChar
  470:  *
  471:  * receiving some ignorable whitespaces from the parser.
  472:  * Question: how much at a time ???
  473:  */
  474: static void
  475: ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
  476: {
  477:     char output[40];
  478:     int i;
  479: 
  480:     for (i = 0;(i<len) && (i < 30);i++)
  481: 	output[i] = ch[i];
  482:     output[i] = 0;
  483: 
  484:     fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
  485: }
  486: 
  487: /**
  488:  * processingInstructionDebug:
  489:  * @ctxt:  An XML parser context
  490:  * @target:  the target name
  491:  * @data: the PI data's
  492:  * @len: the number of xmlChar
  493:  *
  494:  * A processing instruction has been parsed.
  495:  */
  496: static void
  497: processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
  498:                       const xmlChar *data)
  499: {
  500:     fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
  501:             (char *) target, (char *) data);
  502: }
  503: 
  504: /**
  505:  * commentDebug:
  506:  * @ctxt:  An XML parser context
  507:  * @value:  the comment content
  508:  *
  509:  * A comment has been parsed.
  510:  */
  511: static void
  512: commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
  513: {
  514:     fprintf(stdout, "SAX.comment(%s)\n", value);
  515: }
  516: 
  517: /**
  518:  * warningDebug:
  519:  * @ctxt:  An XML parser context
  520:  * @msg:  the message to display/transmit
  521:  * @...:  extra parameters for the message display
  522:  *
  523:  * Display and format a warning messages, gives file, line, position and
  524:  * extra parameters.
  525:  */
  526: static void XMLCDECL
  527: warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
  528: {
  529:     va_list args;
  530: 
  531:     va_start(args, msg);
  532:     fprintf(stdout, "SAX.warning: ");
  533:     vfprintf(stdout, msg, args);
  534:     va_end(args);
  535: }
  536: 
  537: /**
  538:  * errorDebug:
  539:  * @ctxt:  An XML parser context
  540:  * @msg:  the message to display/transmit
  541:  * @...:  extra parameters for the message display
  542:  *
  543:  * Display and format a error messages, gives file, line, position and
  544:  * extra parameters.
  545:  */
  546: static void XMLCDECL
  547: errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
  548: {
  549:     va_list args;
  550: 
  551:     va_start(args, msg);
  552:     fprintf(stdout, "SAX.error: ");
  553:     vfprintf(stdout, msg, args);
  554:     va_end(args);
  555: }
  556: 
  557: /**
  558:  * fatalErrorDebug:
  559:  * @ctxt:  An XML parser context
  560:  * @msg:  the message to display/transmit
  561:  * @...:  extra parameters for the message display
  562:  *
  563:  * Display and format a fatalError messages, gives file, line, position and
  564:  * extra parameters.
  565:  */
  566: static void XMLCDECL
  567: fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
  568: {
  569:     va_list args;
  570: 
  571:     va_start(args, msg);
  572:     fprintf(stdout, "SAX.fatalError: ");
  573:     vfprintf(stdout, msg, args);
  574:     va_end(args);
  575: }
  576: 
  577: static xmlSAXHandler debugSAXHandlerStruct = {
  578:     internalSubsetDebug,
  579:     isStandaloneDebug,
  580:     hasInternalSubsetDebug,
  581:     hasExternalSubsetDebug,
  582:     resolveEntityDebug,
  583:     getEntityDebug,
  584:     entityDeclDebug,
  585:     notationDeclDebug,
  586:     attributeDeclDebug,
  587:     elementDeclDebug,
  588:     unparsedEntityDeclDebug,
  589:     setDocumentLocatorDebug,
  590:     startDocumentDebug,
  591:     endDocumentDebug,
  592:     startElementDebug,
  593:     endElementDebug,
  594:     referenceDebug,
  595:     charactersDebug,
  596:     ignorableWhitespaceDebug,
  597:     processingInstructionDebug,
  598:     commentDebug,
  599:     warningDebug,
  600:     errorDebug,
  601:     fatalErrorDebug,
  602:     getParameterEntityDebug,
  603:     cdataDebug,
  604:     NULL,
  605:     1,
  606:     NULL,
  607:     NULL,
  608:     NULL,
  609:     NULL
  610: };
  611: 
  612: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
  613: /************************************************************************
  614:  *									*
  615:  *				Debug					*
  616:  *									*
  617:  ************************************************************************/
  618: 
  619: static void
  620: parseSAXFile(char *filename) {
  621:     htmlDocPtr doc = NULL;
  622: 
  623:     /*
  624:      * Empty callbacks for checking
  625:      */
  626: #ifdef LIBXML_PUSH_ENABLED
  627:     if (push) {
  628: 	FILE *f;
  629: 
  630: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
  631: 	f = fopen(filename, "rb");
  632: #else
  633: 	f = fopen(filename, "r");
  634: #endif
  635: 	if (f != NULL) {
  636: 	    int res, size = 3;
  637: 	    char chars[4096];
  638: 	    htmlParserCtxtPtr ctxt;
  639: 
  640: 	    /* if (repeat) */
  641: 		size = 4096;
  642: 	    res = fread(chars, 1, 4, f);
  643: 	    if (res > 0) {
  644: 		ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
  645: 			    chars, res, filename, XML_CHAR_ENCODING_NONE);
  646: 		while ((res = fread(chars, 1, size, f)) > 0) {
  647: 		    htmlParseChunk(ctxt, chars, res, 0);
  648: 		}
  649: 		htmlParseChunk(ctxt, chars, 0, 1);
  650: 		doc = ctxt->myDoc;
  651: 		htmlFreeParserCtxt(ctxt);
  652: 	    }
  653: 	    if (doc != NULL) {
  654: 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
  655: 		xmlFreeDoc(doc);
  656: 	    }
  657: 	    fclose(f);
  658: 	}
  659: 	if (!noout) {
  660: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
  661: 		f = fopen(filename, "rb");
  662: #else
  663: 		f = fopen(filename, "r");
  664: #endif
  665: 	    if (f != NULL) {
  666: 		int res, size = 3;
  667: 		char chars[4096];
  668: 		htmlParserCtxtPtr ctxt;
  669: 
  670: 		/* if (repeat) */
  671: 		    size = 4096;
  672: 		res = fread(chars, 1, 4, f);
  673: 		if (res > 0) {
  674: 		    ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
  675: 				chars, res, filename, XML_CHAR_ENCODING_NONE);
  676: 		    while ((res = fread(chars, 1, size, f)) > 0) {
  677: 			htmlParseChunk(ctxt, chars, res, 0);
  678: 		    }
  679: 		    htmlParseChunk(ctxt, chars, 0, 1);
  680: 		    doc = ctxt->myDoc;
  681: 		    htmlFreeParserCtxt(ctxt);
  682: 		}
  683: 		if (doc != NULL) {
  684: 		    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
  685: 		    xmlFreeDoc(doc);
  686: 		}
  687: 		fclose(f);
  688: 	    }
  689: 	}
  690:     } else {	
  691: #endif /* LIBXML_PUSH_ENABLED */
  692: 	doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
  693: 	if (doc != NULL) {
  694: 	    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
  695: 	    xmlFreeDoc(doc);
  696: 	}
  697: 
  698: 	if (!noout) {
  699: 	    /*
  700: 	     * Debug callback
  701: 	     */
  702: 	    doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
  703: 	    if (doc != NULL) {
  704: 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
  705: 		xmlFreeDoc(doc);
  706: 	    }
  707: 	}
  708: #ifdef LIBXML_PUSH_ENABLED
  709:     }
  710: #endif /* LIBXML_PUSH_ENABLED */
  711: }
  712: 
  713: static void
  714: parseAndPrintFile(char *filename) {
  715:     htmlDocPtr doc = NULL;
  716: 
  717:     /*
  718:      * build an HTML tree from a string;
  719:      */
  720: #ifdef LIBXML_PUSH_ENABLED
  721:     if (push) {
  722: 	FILE *f;
  723: 
  724: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
  725: 	f = fopen(filename, "rb");
  726: #else
  727: 	f = fopen(filename, "r");
  728: #endif
  729: 	if (f != NULL) {
  730: 	    int res, size = 3;
  731: 	    char chars[4096];
  732: 	    htmlParserCtxtPtr ctxt;
  733: 
  734: 	    /* if (repeat) */
  735: 		size = 4096;
  736: 	    res = fread(chars, 1, 4, f);
  737: 	    if (res > 0) {
  738: 		ctxt = htmlCreatePushParserCtxt(NULL, NULL,
  739: 			    chars, res, filename, XML_CHAR_ENCODING_NONE);
  740: 		while ((res = fread(chars, 1, size, f)) > 0) {
  741: 		    htmlParseChunk(ctxt, chars, res, 0);
  742: 		}
  743: 		htmlParseChunk(ctxt, chars, 0, 1);
  744: 		doc = ctxt->myDoc;
  745: 		htmlFreeParserCtxt(ctxt);
  746: 	    }
  747: 	    fclose(f);
  748: 	}
  749:     } else {	
  750: 	doc = htmlReadFile(filename, NULL, options);
  751:     }
  752: #else
  753: 	doc = htmlReadFile(filename,NULL,options);
  754: #endif
  755:     if (doc == NULL) {
  756:         xmlGenericError(xmlGenericErrorContext,
  757: 		"Could not parse %s\n", filename);
  758:     }
  759: 
  760: #ifdef LIBXML_TREE_ENABLED
  761:     /*
  762:      * test intermediate copy if needed.
  763:      */
  764:     if (copy) {
  765:         htmlDocPtr tmp;
  766: 
  767:         tmp = doc;
  768: 	doc = xmlCopyDoc(doc, 1);
  769: 	xmlFreeDoc(tmp);
  770:     }
  771: #endif
  772: 
  773: #ifdef LIBXML_OUTPUT_ENABLED
  774:     /*
  775:      * print it.
  776:      */
  777:     if (!noout) { 
  778: #ifdef LIBXML_DEBUG_ENABLED
  779: 	if (!debug) {
  780: 	    if (encoding)
  781: 		htmlSaveFileEnc("-", doc, encoding);
  782: 	    else
  783: 		htmlDocDump(stdout, doc);
  784: 	} else
  785: 	    xmlDebugDumpDocument(stdout, doc);
  786: #else
  787: 	if (encoding)
  788: 	    htmlSaveFileEnc("-", doc, encoding);
  789: 	else
  790: 	    htmlDocDump(stdout, doc);
  791: #endif
  792:     }	
  793: #endif /* LIBXML_OUTPUT_ENABLED */
  794: 
  795:     /*
  796:      * free it.
  797:      */
  798:     xmlFreeDoc(doc);
  799: }
  800: 
  801: int main(int argc, char **argv) {
  802:     int i, count;
  803:     int files = 0;
  804: 
  805:     for (i = 1; i < argc ; i++) {
  806: #ifdef LIBXML_DEBUG_ENABLED
  807: 	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
  808: 	    debug++;
  809: 	else
  810: #endif
  811: 	    if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
  812: 	    copy++;
  813: #ifdef LIBXML_PUSH_ENABLED
  814: 	else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
  815: 	    push++;
  816: #endif /* LIBXML_PUSH_ENABLED */
  817: 	else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
  818: 	    sax++;
  819: 	else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
  820: 	    noout++;
  821: 	else if ((!strcmp(argv[i], "-repeat")) ||
  822: 	         (!strcmp(argv[i], "--repeat")))
  823: 	    repeat++;
  824: 	else if ((!strcmp(argv[i], "-encode")) ||
  825: 	         (!strcmp(argv[i], "--encode"))) {
  826: 	    i++;
  827: 	    encoding = argv[i];
  828:         }
  829:     }
  830:     for (i = 1; i < argc ; i++) {
  831: 	if ((!strcmp(argv[i], "-encode")) ||
  832: 	         (!strcmp(argv[i], "--encode"))) {
  833: 	    i++;
  834: 	    continue;
  835:         }
  836: 	if (argv[i][0] != '-') {
  837: 	    if (repeat) {
  838: 		for (count = 0;count < 100 * repeat;count++) {
  839: 		    if (sax)
  840: 			parseSAXFile(argv[i]);
  841: 		    else   
  842: 			parseAndPrintFile(argv[i]);
  843: 		}    
  844: 	    } else {
  845: 		if (sax)
  846: 		    parseSAXFile(argv[i]);
  847: 		else   
  848: 		    parseAndPrintFile(argv[i]);
  849: 	    }
  850: 	    files ++;
  851: 	}
  852:     }
  853:     if (files == 0) {
  854: 	printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
  855: 	       argv[0]);
  856: 	printf("\tParse the HTML files and output the result of the parsing\n");
  857: #ifdef LIBXML_DEBUG_ENABLED
  858: 	printf("\t--debug : dump a debug tree of the in-memory document\n");
  859: #endif
  860: 	printf("\t--copy : used to test the internal copy implementation\n");
  861: 	printf("\t--sax : debug the sequence of SAX callbacks\n");
  862: 	printf("\t--repeat : parse the file 100 times, for timing\n");
  863: 	printf("\t--noout : do not print the result\n");
  864: #ifdef LIBXML_PUSH_ENABLED
  865: 	printf("\t--push : use the push mode parser\n");
  866: #endif /* LIBXML_PUSH_ENABLED */
  867: 	printf("\t--encode encoding : output in the given encoding\n");
  868:     }
  869:     xmlCleanupParser();
  870:     xmlMemoryDump();
  871: 
  872:     return(0);
  873: }
  874: #else /* !LIBXML_HTML_ENABLED */
  875: #include <stdio.h>
  876: int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
  877:     printf("%s : HTML support not compiled in\n", argv[0]);
  878:     return(0);
  879: }
  880: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>