Annotation of embedaddon/libxml2/testHTML.c, revision 1.1
1.1 ! misho 1: /*
! 2: * testHTML.c : a small tester program for HTML input.
! 3: *
! 4: * See Copyright for the status of this software.
! 5: *
! 6: * daniel@veillard.com
! 7: */
! 8:
! 9: #include "libxml.h"
! 10:
! 11: #ifdef LIBXML_HTML_ENABLED
! 12:
! 13: #include <string.h>
! 14: #include <stdarg.h>
! 15:
! 16:
! 17: #ifdef HAVE_SYS_TYPES_H
! 18: #include <sys/types.h>
! 19: #endif
! 20: #ifdef HAVE_SYS_STAT_H
! 21: #include <sys/stat.h>
! 22: #endif
! 23: #ifdef HAVE_FCNTL_H
! 24: #include <fcntl.h>
! 25: #endif
! 26: #ifdef HAVE_UNISTD_H
! 27: #include <unistd.h>
! 28: #endif
! 29: #ifdef HAVE_STDLIB_H
! 30: #include <stdlib.h>
! 31: #endif
! 32:
! 33: #include <libxml/xmlmemory.h>
! 34: #include <libxml/HTMLparser.h>
! 35: #include <libxml/HTMLtree.h>
! 36: #include <libxml/debugXML.h>
! 37: #include <libxml/xmlerror.h>
! 38: #include <libxml/globals.h>
! 39:
! 40: #ifdef LIBXML_DEBUG_ENABLED
! 41: static int debug = 0;
! 42: #endif
! 43: static int copy = 0;
! 44: static int sax = 0;
! 45: static int repeat = 0;
! 46: static int noout = 0;
! 47: #ifdef LIBXML_PUSH_ENABLED
! 48: static int push = 0;
! 49: #endif /* LIBXML_PUSH_ENABLED */
! 50: static char *encoding = NULL;
! 51: static int options = 0;
! 52:
! 53: static xmlSAXHandler emptySAXHandlerStruct = {
! 54: NULL, /* internalSubset */
! 55: NULL, /* isStandalone */
! 56: NULL, /* hasInternalSubset */
! 57: NULL, /* hasExternalSubset */
! 58: NULL, /* resolveEntity */
! 59: NULL, /* getEntity */
! 60: NULL, /* entityDecl */
! 61: NULL, /* notationDecl */
! 62: NULL, /* attributeDecl */
! 63: NULL, /* elementDecl */
! 64: NULL, /* unparsedEntityDecl */
! 65: NULL, /* setDocumentLocator */
! 66: NULL, /* startDocument */
! 67: NULL, /* endDocument */
! 68: NULL, /* startElement */
! 69: NULL, /* endElement */
! 70: NULL, /* reference */
! 71: NULL, /* characters */
! 72: NULL, /* ignorableWhitespace */
! 73: NULL, /* processingInstruction */
! 74: NULL, /* comment */
! 75: NULL, /* xmlParserWarning */
! 76: NULL, /* xmlParserError */
! 77: NULL, /* xmlParserError */
! 78: NULL, /* getParameterEntity */
! 79: NULL, /* cdataBlock */
! 80: NULL, /* externalSubset */
! 81: 1, /* initialized */
! 82: NULL, /* private */
! 83: NULL, /* startElementNsSAX2Func */
! 84: NULL, /* endElementNsSAX2Func */
! 85: NULL /* xmlStructuredErrorFunc */
! 86: };
! 87:
! 88: static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
! 89: extern xmlSAXHandlerPtr debugSAXHandler;
! 90:
! 91: /************************************************************************
! 92: * *
! 93: * Debug Handlers *
! 94: * *
! 95: ************************************************************************/
! 96:
! 97: /**
! 98: * isStandaloneDebug:
! 99: * @ctxt: An XML parser context
! 100: *
! 101: * Is this document tagged standalone ?
! 102: *
! 103: * Returns 1 if true
! 104: */
! 105: static int
! 106: isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
! 107: {
! 108: fprintf(stdout, "SAX.isStandalone()\n");
! 109: return(0);
! 110: }
! 111:
! 112: /**
! 113: * hasInternalSubsetDebug:
! 114: * @ctxt: An XML parser context
! 115: *
! 116: * Does this document has an internal subset
! 117: *
! 118: * Returns 1 if true
! 119: */
! 120: static int
! 121: hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
! 122: {
! 123: fprintf(stdout, "SAX.hasInternalSubset()\n");
! 124: return(0);
! 125: }
! 126:
! 127: /**
! 128: * hasExternalSubsetDebug:
! 129: * @ctxt: An XML parser context
! 130: *
! 131: * Does this document has an external subset
! 132: *
! 133: * Returns 1 if true
! 134: */
! 135: static int
! 136: hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
! 137: {
! 138: fprintf(stdout, "SAX.hasExternalSubset()\n");
! 139: return(0);
! 140: }
! 141:
! 142: /**
! 143: * hasInternalSubsetDebug:
! 144: * @ctxt: An XML parser context
! 145: *
! 146: * Does this document has an internal subset
! 147: */
! 148: static void
! 149: internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
! 150: const xmlChar *ExternalID, const xmlChar *SystemID)
! 151: {
! 152: fprintf(stdout, "SAX.internalSubset(%s,", name);
! 153: if (ExternalID == NULL)
! 154: fprintf(stdout, " ,");
! 155: else
! 156: fprintf(stdout, " %s,", ExternalID);
! 157: if (SystemID == NULL)
! 158: fprintf(stdout, " )\n");
! 159: else
! 160: fprintf(stdout, " %s)\n", SystemID);
! 161: }
! 162:
! 163: /**
! 164: * resolveEntityDebug:
! 165: * @ctxt: An XML parser context
! 166: * @publicId: The public ID of the entity
! 167: * @systemId: The system ID of the entity
! 168: *
! 169: * Special entity resolver, better left to the parser, it has
! 170: * more context than the application layer.
! 171: * The default behaviour is to NOT resolve the entities, in that case
! 172: * the ENTITY_REF nodes are built in the structure (and the parameter
! 173: * values).
! 174: *
! 175: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
! 176: */
! 177: static xmlParserInputPtr
! 178: resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
! 179: {
! 180: /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
! 181:
! 182:
! 183: fprintf(stdout, "SAX.resolveEntity(");
! 184: if (publicId != NULL)
! 185: fprintf(stdout, "%s", (char *)publicId);
! 186: else
! 187: fprintf(stdout, " ");
! 188: if (systemId != NULL)
! 189: fprintf(stdout, ", %s)\n", (char *)systemId);
! 190: else
! 191: fprintf(stdout, ", )\n");
! 192: /*********
! 193: if (systemId != NULL) {
! 194: return(xmlNewInputFromFile(ctxt, (char *) systemId));
! 195: }
! 196: *********/
! 197: return(NULL);
! 198: }
! 199:
! 200: /**
! 201: * getEntityDebug:
! 202: * @ctxt: An XML parser context
! 203: * @name: The entity name
! 204: *
! 205: * Get an entity by name
! 206: *
! 207: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
! 208: */
! 209: static xmlEntityPtr
! 210: getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
! 211: {
! 212: fprintf(stdout, "SAX.getEntity(%s)\n", name);
! 213: return(NULL);
! 214: }
! 215:
! 216: /**
! 217: * getParameterEntityDebug:
! 218: * @ctxt: An XML parser context
! 219: * @name: The entity name
! 220: *
! 221: * Get a parameter entity by name
! 222: *
! 223: * Returns the xmlParserInputPtr
! 224: */
! 225: static xmlEntityPtr
! 226: getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
! 227: {
! 228: fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
! 229: return(NULL);
! 230: }
! 231:
! 232:
! 233: /**
! 234: * entityDeclDebug:
! 235: * @ctxt: An XML parser context
! 236: * @name: the entity name
! 237: * @type: the entity type
! 238: * @publicId: The public ID of the entity
! 239: * @systemId: The system ID of the entity
! 240: * @content: the entity value (without processing).
! 241: *
! 242: * An entity definition has been parsed
! 243: */
! 244: static void
! 245: entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
! 246: const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
! 247: {
! 248: fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
! 249: name, type, publicId, systemId, content);
! 250: }
! 251:
! 252: /**
! 253: * attributeDeclDebug:
! 254: * @ctxt: An XML parser context
! 255: * @name: the attribute name
! 256: * @type: the attribute type
! 257: *
! 258: * An attribute definition has been parsed
! 259: */
! 260: static void
! 261: attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
! 262: int type, int def, const xmlChar *defaultValue,
! 263: xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
! 264: {
! 265: fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
! 266: elem, name, type, def, defaultValue);
! 267: }
! 268:
! 269: /**
! 270: * elementDeclDebug:
! 271: * @ctxt: An XML parser context
! 272: * @name: the element name
! 273: * @type: the element type
! 274: * @content: the element value (without processing).
! 275: *
! 276: * An element definition has been parsed
! 277: */
! 278: static void
! 279: elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
! 280: xmlElementContentPtr content ATTRIBUTE_UNUSED)
! 281: {
! 282: fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
! 283: name, type);
! 284: }
! 285:
! 286: /**
! 287: * notationDeclDebug:
! 288: * @ctxt: An XML parser context
! 289: * @name: The name of the notation
! 290: * @publicId: The public ID of the entity
! 291: * @systemId: The system ID of the entity
! 292: *
! 293: * What to do when a notation declaration has been parsed.
! 294: */
! 295: static void
! 296: notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
! 297: const xmlChar *publicId, const xmlChar *systemId)
! 298: {
! 299: fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
! 300: (char *) name, (char *) publicId, (char *) systemId);
! 301: }
! 302:
! 303: /**
! 304: * unparsedEntityDeclDebug:
! 305: * @ctxt: An XML parser context
! 306: * @name: The name of the entity
! 307: * @publicId: The public ID of the entity
! 308: * @systemId: The system ID of the entity
! 309: * @notationName: the name of the notation
! 310: *
! 311: * What to do when an unparsed entity declaration is parsed
! 312: */
! 313: static void
! 314: unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
! 315: const xmlChar *publicId, const xmlChar *systemId,
! 316: const xmlChar *notationName)
! 317: {
! 318: fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
! 319: (char *) name, (char *) publicId, (char *) systemId,
! 320: (char *) notationName);
! 321: }
! 322:
! 323: /**
! 324: * setDocumentLocatorDebug:
! 325: * @ctxt: An XML parser context
! 326: * @loc: A SAX Locator
! 327: *
! 328: * Receive the document locator at startup, actually xmlDefaultSAXLocator
! 329: * Everything is available on the context, so this is useless in our case.
! 330: */
! 331: static void
! 332: setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
! 333: {
! 334: fprintf(stdout, "SAX.setDocumentLocator()\n");
! 335: }
! 336:
! 337: /**
! 338: * startDocumentDebug:
! 339: * @ctxt: An XML parser context
! 340: *
! 341: * called when the document start being processed.
! 342: */
! 343: static void
! 344: startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
! 345: {
! 346: fprintf(stdout, "SAX.startDocument()\n");
! 347: }
! 348:
! 349: /**
! 350: * endDocumentDebug:
! 351: * @ctxt: An XML parser context
! 352: *
! 353: * called when the document end has been detected.
! 354: */
! 355: static void
! 356: endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
! 357: {
! 358: fprintf(stdout, "SAX.endDocument()\n");
! 359: }
! 360:
! 361: /**
! 362: * startElementDebug:
! 363: * @ctxt: An XML parser context
! 364: * @name: The element name
! 365: *
! 366: * called when an opening tag has been processed.
! 367: */
! 368: static void
! 369: startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
! 370: {
! 371: int i;
! 372:
! 373: fprintf(stdout, "SAX.startElement(%s", (char *) name);
! 374: if (atts != NULL) {
! 375: for (i = 0;(atts[i] != NULL);i++) {
! 376: fprintf(stdout, ", %s", atts[i++]);
! 377: if (atts[i] != NULL) {
! 378: unsigned char output[40];
! 379: const unsigned char *att = atts[i];
! 380: int outlen, attlen;
! 381: fprintf(stdout, "='");
! 382: while ((attlen = strlen((char*)att)) > 0) {
! 383: outlen = sizeof output - 1;
! 384: htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
! 385: output[outlen] = 0;
! 386: fprintf(stdout, "%s", (char *) output);
! 387: att += attlen;
! 388: }
! 389: fprintf(stdout, "'");
! 390: }
! 391: }
! 392: }
! 393: fprintf(stdout, ")\n");
! 394: }
! 395:
! 396: /**
! 397: * endElementDebug:
! 398: * @ctxt: An XML parser context
! 399: * @name: The element name
! 400: *
! 401: * called when the end of an element has been detected.
! 402: */
! 403: static void
! 404: endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
! 405: {
! 406: fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
! 407: }
! 408:
! 409: /**
! 410: * charactersDebug:
! 411: * @ctxt: An XML parser context
! 412: * @ch: a xmlChar string
! 413: * @len: the number of xmlChar
! 414: *
! 415: * receiving some chars from the parser.
! 416: * Question: how much at a time ???
! 417: */
! 418: static void
! 419: charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
! 420: {
! 421: unsigned char output[40];
! 422: int inlen = len, outlen = 30;
! 423:
! 424: htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
! 425: output[outlen] = 0;
! 426:
! 427: fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
! 428: }
! 429:
! 430: /**
! 431: * cdataDebug:
! 432: * @ctxt: An XML parser context
! 433: * @ch: a xmlChar string
! 434: * @len: the number of xmlChar
! 435: *
! 436: * receiving some cdata chars from the parser.
! 437: * Question: how much at a time ???
! 438: */
! 439: static void
! 440: cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
! 441: {
! 442: unsigned char output[40];
! 443: int inlen = len, outlen = 30;
! 444:
! 445: htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
! 446: output[outlen] = 0;
! 447:
! 448: fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
! 449: }
! 450:
! 451: /**
! 452: * referenceDebug:
! 453: * @ctxt: An XML parser context
! 454: * @name: The entity name
! 455: *
! 456: * called when an entity reference is detected.
! 457: */
! 458: static void
! 459: referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
! 460: {
! 461: fprintf(stdout, "SAX.reference(%s)\n", name);
! 462: }
! 463:
! 464: /**
! 465: * ignorableWhitespaceDebug:
! 466: * @ctxt: An XML parser context
! 467: * @ch: a xmlChar string
! 468: * @start: the first char in the string
! 469: * @len: the number of xmlChar
! 470: *
! 471: * receiving some ignorable whitespaces from the parser.
! 472: * Question: how much at a time ???
! 473: */
! 474: static void
! 475: ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
! 476: {
! 477: char output[40];
! 478: int i;
! 479:
! 480: for (i = 0;(i<len) && (i < 30);i++)
! 481: output[i] = ch[i];
! 482: output[i] = 0;
! 483:
! 484: fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
! 485: }
! 486:
! 487: /**
! 488: * processingInstructionDebug:
! 489: * @ctxt: An XML parser context
! 490: * @target: the target name
! 491: * @data: the PI data's
! 492: * @len: the number of xmlChar
! 493: *
! 494: * A processing instruction has been parsed.
! 495: */
! 496: static void
! 497: processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
! 498: const xmlChar *data)
! 499: {
! 500: fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
! 501: (char *) target, (char *) data);
! 502: }
! 503:
! 504: /**
! 505: * commentDebug:
! 506: * @ctxt: An XML parser context
! 507: * @value: the comment content
! 508: *
! 509: * A comment has been parsed.
! 510: */
! 511: static void
! 512: commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
! 513: {
! 514: fprintf(stdout, "SAX.comment(%s)\n", value);
! 515: }
! 516:
! 517: /**
! 518: * warningDebug:
! 519: * @ctxt: An XML parser context
! 520: * @msg: the message to display/transmit
! 521: * @...: extra parameters for the message display
! 522: *
! 523: * Display and format a warning messages, gives file, line, position and
! 524: * extra parameters.
! 525: */
! 526: static void XMLCDECL
! 527: warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
! 528: {
! 529: va_list args;
! 530:
! 531: va_start(args, msg);
! 532: fprintf(stdout, "SAX.warning: ");
! 533: vfprintf(stdout, msg, args);
! 534: va_end(args);
! 535: }
! 536:
! 537: /**
! 538: * errorDebug:
! 539: * @ctxt: An XML parser context
! 540: * @msg: the message to display/transmit
! 541: * @...: extra parameters for the message display
! 542: *
! 543: * Display and format a error messages, gives file, line, position and
! 544: * extra parameters.
! 545: */
! 546: static void XMLCDECL
! 547: errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
! 548: {
! 549: va_list args;
! 550:
! 551: va_start(args, msg);
! 552: fprintf(stdout, "SAX.error: ");
! 553: vfprintf(stdout, msg, args);
! 554: va_end(args);
! 555: }
! 556:
! 557: /**
! 558: * fatalErrorDebug:
! 559: * @ctxt: An XML parser context
! 560: * @msg: the message to display/transmit
! 561: * @...: extra parameters for the message display
! 562: *
! 563: * Display and format a fatalError messages, gives file, line, position and
! 564: * extra parameters.
! 565: */
! 566: static void XMLCDECL
! 567: fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
! 568: {
! 569: va_list args;
! 570:
! 571: va_start(args, msg);
! 572: fprintf(stdout, "SAX.fatalError: ");
! 573: vfprintf(stdout, msg, args);
! 574: va_end(args);
! 575: }
! 576:
! 577: static xmlSAXHandler debugSAXHandlerStruct = {
! 578: internalSubsetDebug,
! 579: isStandaloneDebug,
! 580: hasInternalSubsetDebug,
! 581: hasExternalSubsetDebug,
! 582: resolveEntityDebug,
! 583: getEntityDebug,
! 584: entityDeclDebug,
! 585: notationDeclDebug,
! 586: attributeDeclDebug,
! 587: elementDeclDebug,
! 588: unparsedEntityDeclDebug,
! 589: setDocumentLocatorDebug,
! 590: startDocumentDebug,
! 591: endDocumentDebug,
! 592: startElementDebug,
! 593: endElementDebug,
! 594: referenceDebug,
! 595: charactersDebug,
! 596: ignorableWhitespaceDebug,
! 597: processingInstructionDebug,
! 598: commentDebug,
! 599: warningDebug,
! 600: errorDebug,
! 601: fatalErrorDebug,
! 602: getParameterEntityDebug,
! 603: cdataDebug,
! 604: NULL,
! 605: 1,
! 606: NULL,
! 607: NULL,
! 608: NULL,
! 609: NULL
! 610: };
! 611:
! 612: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
! 613: /************************************************************************
! 614: * *
! 615: * Debug *
! 616: * *
! 617: ************************************************************************/
! 618:
! 619: static void
! 620: parseSAXFile(char *filename) {
! 621: htmlDocPtr doc = NULL;
! 622:
! 623: /*
! 624: * Empty callbacks for checking
! 625: */
! 626: #ifdef LIBXML_PUSH_ENABLED
! 627: if (push) {
! 628: FILE *f;
! 629:
! 630: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
! 631: f = fopen(filename, "rb");
! 632: #else
! 633: f = fopen(filename, "r");
! 634: #endif
! 635: if (f != NULL) {
! 636: int res, size = 3;
! 637: char chars[4096];
! 638: htmlParserCtxtPtr ctxt;
! 639:
! 640: /* if (repeat) */
! 641: size = 4096;
! 642: res = fread(chars, 1, 4, f);
! 643: if (res > 0) {
! 644: ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
! 645: chars, res, filename, XML_CHAR_ENCODING_NONE);
! 646: while ((res = fread(chars, 1, size, f)) > 0) {
! 647: htmlParseChunk(ctxt, chars, res, 0);
! 648: }
! 649: htmlParseChunk(ctxt, chars, 0, 1);
! 650: doc = ctxt->myDoc;
! 651: htmlFreeParserCtxt(ctxt);
! 652: }
! 653: if (doc != NULL) {
! 654: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
! 655: xmlFreeDoc(doc);
! 656: }
! 657: fclose(f);
! 658: }
! 659: if (!noout) {
! 660: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
! 661: f = fopen(filename, "rb");
! 662: #else
! 663: f = fopen(filename, "r");
! 664: #endif
! 665: if (f != NULL) {
! 666: int res, size = 3;
! 667: char chars[4096];
! 668: htmlParserCtxtPtr ctxt;
! 669:
! 670: /* if (repeat) */
! 671: size = 4096;
! 672: res = fread(chars, 1, 4, f);
! 673: if (res > 0) {
! 674: ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
! 675: chars, res, filename, XML_CHAR_ENCODING_NONE);
! 676: while ((res = fread(chars, 1, size, f)) > 0) {
! 677: htmlParseChunk(ctxt, chars, res, 0);
! 678: }
! 679: htmlParseChunk(ctxt, chars, 0, 1);
! 680: doc = ctxt->myDoc;
! 681: htmlFreeParserCtxt(ctxt);
! 682: }
! 683: if (doc != NULL) {
! 684: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
! 685: xmlFreeDoc(doc);
! 686: }
! 687: fclose(f);
! 688: }
! 689: }
! 690: } else {
! 691: #endif /* LIBXML_PUSH_ENABLED */
! 692: doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
! 693: if (doc != NULL) {
! 694: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
! 695: xmlFreeDoc(doc);
! 696: }
! 697:
! 698: if (!noout) {
! 699: /*
! 700: * Debug callback
! 701: */
! 702: doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
! 703: if (doc != NULL) {
! 704: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
! 705: xmlFreeDoc(doc);
! 706: }
! 707: }
! 708: #ifdef LIBXML_PUSH_ENABLED
! 709: }
! 710: #endif /* LIBXML_PUSH_ENABLED */
! 711: }
! 712:
! 713: static void
! 714: parseAndPrintFile(char *filename) {
! 715: htmlDocPtr doc = NULL;
! 716:
! 717: /*
! 718: * build an HTML tree from a string;
! 719: */
! 720: #ifdef LIBXML_PUSH_ENABLED
! 721: if (push) {
! 722: FILE *f;
! 723:
! 724: #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
! 725: f = fopen(filename, "rb");
! 726: #else
! 727: f = fopen(filename, "r");
! 728: #endif
! 729: if (f != NULL) {
! 730: int res, size = 3;
! 731: char chars[4096];
! 732: htmlParserCtxtPtr ctxt;
! 733:
! 734: /* if (repeat) */
! 735: size = 4096;
! 736: res = fread(chars, 1, 4, f);
! 737: if (res > 0) {
! 738: ctxt = htmlCreatePushParserCtxt(NULL, NULL,
! 739: chars, res, filename, XML_CHAR_ENCODING_NONE);
! 740: while ((res = fread(chars, 1, size, f)) > 0) {
! 741: htmlParseChunk(ctxt, chars, res, 0);
! 742: }
! 743: htmlParseChunk(ctxt, chars, 0, 1);
! 744: doc = ctxt->myDoc;
! 745: htmlFreeParserCtxt(ctxt);
! 746: }
! 747: fclose(f);
! 748: }
! 749: } else {
! 750: doc = htmlReadFile(filename, NULL, options);
! 751: }
! 752: #else
! 753: doc = htmlReadFile(filename,NULL,options);
! 754: #endif
! 755: if (doc == NULL) {
! 756: xmlGenericError(xmlGenericErrorContext,
! 757: "Could not parse %s\n", filename);
! 758: }
! 759:
! 760: #ifdef LIBXML_TREE_ENABLED
! 761: /*
! 762: * test intermediate copy if needed.
! 763: */
! 764: if (copy) {
! 765: htmlDocPtr tmp;
! 766:
! 767: tmp = doc;
! 768: doc = xmlCopyDoc(doc, 1);
! 769: xmlFreeDoc(tmp);
! 770: }
! 771: #endif
! 772:
! 773: #ifdef LIBXML_OUTPUT_ENABLED
! 774: /*
! 775: * print it.
! 776: */
! 777: if (!noout) {
! 778: #ifdef LIBXML_DEBUG_ENABLED
! 779: if (!debug) {
! 780: if (encoding)
! 781: htmlSaveFileEnc("-", doc, encoding);
! 782: else
! 783: htmlDocDump(stdout, doc);
! 784: } else
! 785: xmlDebugDumpDocument(stdout, doc);
! 786: #else
! 787: if (encoding)
! 788: htmlSaveFileEnc("-", doc, encoding);
! 789: else
! 790: htmlDocDump(stdout, doc);
! 791: #endif
! 792: }
! 793: #endif /* LIBXML_OUTPUT_ENABLED */
! 794:
! 795: /*
! 796: * free it.
! 797: */
! 798: xmlFreeDoc(doc);
! 799: }
! 800:
! 801: int main(int argc, char **argv) {
! 802: int i, count;
! 803: int files = 0;
! 804:
! 805: for (i = 1; i < argc ; i++) {
! 806: #ifdef LIBXML_DEBUG_ENABLED
! 807: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
! 808: debug++;
! 809: else
! 810: #endif
! 811: if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
! 812: copy++;
! 813: #ifdef LIBXML_PUSH_ENABLED
! 814: else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
! 815: push++;
! 816: #endif /* LIBXML_PUSH_ENABLED */
! 817: else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
! 818: sax++;
! 819: else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
! 820: noout++;
! 821: else if ((!strcmp(argv[i], "-repeat")) ||
! 822: (!strcmp(argv[i], "--repeat")))
! 823: repeat++;
! 824: else if ((!strcmp(argv[i], "-encode")) ||
! 825: (!strcmp(argv[i], "--encode"))) {
! 826: i++;
! 827: encoding = argv[i];
! 828: }
! 829: }
! 830: for (i = 1; i < argc ; i++) {
! 831: if ((!strcmp(argv[i], "-encode")) ||
! 832: (!strcmp(argv[i], "--encode"))) {
! 833: i++;
! 834: continue;
! 835: }
! 836: if (argv[i][0] != '-') {
! 837: if (repeat) {
! 838: for (count = 0;count < 100 * repeat;count++) {
! 839: if (sax)
! 840: parseSAXFile(argv[i]);
! 841: else
! 842: parseAndPrintFile(argv[i]);
! 843: }
! 844: } else {
! 845: if (sax)
! 846: parseSAXFile(argv[i]);
! 847: else
! 848: parseAndPrintFile(argv[i]);
! 849: }
! 850: files ++;
! 851: }
! 852: }
! 853: if (files == 0) {
! 854: printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
! 855: argv[0]);
! 856: printf("\tParse the HTML files and output the result of the parsing\n");
! 857: #ifdef LIBXML_DEBUG_ENABLED
! 858: printf("\t--debug : dump a debug tree of the in-memory document\n");
! 859: #endif
! 860: printf("\t--copy : used to test the internal copy implementation\n");
! 861: printf("\t--sax : debug the sequence of SAX callbacks\n");
! 862: printf("\t--repeat : parse the file 100 times, for timing\n");
! 863: printf("\t--noout : do not print the result\n");
! 864: #ifdef LIBXML_PUSH_ENABLED
! 865: printf("\t--push : use the push mode parser\n");
! 866: #endif /* LIBXML_PUSH_ENABLED */
! 867: printf("\t--encode encoding : output in the given encoding\n");
! 868: }
! 869: xmlCleanupParser();
! 870: xmlMemoryDump();
! 871:
! 872: return(0);
! 873: }
! 874: #else /* !LIBXML_HTML_ENABLED */
! 875: #include <stdio.h>
! 876: int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
! 877: printf("%s : HTML support not compiled in\n", argv[0]);
! 878: return(0);
! 879: }
! 880: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>