blob: 63ba2375916a242bd8b1def9ebf2ee5823765ac6 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardc2def842000-11-07 14:21:01 +000011#undef LIBXML_DLL_IMPORT
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000014#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#include <stdio.h>
20#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000021#include <stdarg.h>
22
Daniel Veillard7f7d1111999-09-22 09:46:25 +000023
24#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000025#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000026#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000027#ifdef HAVE_SYS_STAT_H
28#include <sys/stat.h>
29#endif
30#ifdef HAVE_FCNTL_H
31#include <fcntl.h>
32#endif
33#ifdef HAVE_UNISTD_H
34#include <unistd.h>
35#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000036#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000037#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000038#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#include <libxml/xmlmemory.h>
41#include <libxml/HTMLparser.h>
42#include <libxml/HTMLtree.h>
43#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000044#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000045
Daniel Veillard361d8452000-04-03 19:48:13 +000046#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000047static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000048#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000049static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000050static int sax = 0;
51static int repeat = 0;
52static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000053static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000054static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000055
Daniel Veillard7c1206f1999-10-14 09:10:25 +000056xmlSAXHandler emptySAXHandlerStruct = {
57 NULL, /* internalSubset */
58 NULL, /* isStandalone */
59 NULL, /* hasInternalSubset */
60 NULL, /* hasExternalSubset */
61 NULL, /* resolveEntity */
62 NULL, /* getEntity */
63 NULL, /* entityDecl */
64 NULL, /* notationDecl */
65 NULL, /* attributeDecl */
66 NULL, /* elementDecl */
67 NULL, /* unparsedEntityDecl */
68 NULL, /* setDocumentLocator */
69 NULL, /* startDocument */
70 NULL, /* endDocument */
71 NULL, /* startElement */
72 NULL, /* endElement */
73 NULL, /* reference */
74 NULL, /* characters */
75 NULL, /* ignorableWhitespace */
76 NULL, /* processingInstruction */
77 NULL, /* comment */
78 NULL, /* xmlParserWarning */
79 NULL, /* xmlParserError */
80 NULL, /* xmlParserError */
81 NULL, /* getParameterEntity */
82};
83
84xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
85extern xmlSAXHandlerPtr debugSAXHandler;
86
87/************************************************************************
88 * *
89 * Debug Handlers *
90 * *
91 ************************************************************************/
92
93/**
94 * isStandaloneDebug:
95 * @ctxt: An XML parser context
96 *
97 * Is this document tagged standalone ?
98 *
99 * Returns 1 if true
100 */
101int
102isStandaloneDebug(void *ctx)
103{
104 fprintf(stdout, "SAX.isStandalone()\n");
105 return(0);
106}
107
108/**
109 * hasInternalSubsetDebug:
110 * @ctxt: An XML parser context
111 *
112 * Does this document has an internal subset
113 *
114 * Returns 1 if true
115 */
116int
117hasInternalSubsetDebug(void *ctx)
118{
119 fprintf(stdout, "SAX.hasInternalSubset()\n");
120 return(0);
121}
122
123/**
124 * hasExternalSubsetDebug:
125 * @ctxt: An XML parser context
126 *
127 * Does this document has an external subset
128 *
129 * Returns 1 if true
130 */
131int
132hasExternalSubsetDebug(void *ctx)
133{
134 fprintf(stdout, "SAX.hasExternalSubset()\n");
135 return(0);
136}
137
138/**
139 * hasInternalSubsetDebug:
140 * @ctxt: An XML parser context
141 *
142 * Does this document has an internal subset
143 */
144void
145internalSubsetDebug(void *ctx, const xmlChar *name,
146 const xmlChar *ExternalID, const xmlChar *SystemID)
147{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000148 fprintf(stdout, "SAX.internalSubset(%s,", name);
149 if (ExternalID == NULL)
150 fprintf(stdout, " ,");
151 else
152 fprintf(stdout, " %s,", ExternalID);
153 if (SystemID == NULL)
154 fprintf(stdout, " )\n");
155 else
156 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000157}
158
159/**
160 * resolveEntityDebug:
161 * @ctxt: An XML parser context
162 * @publicId: The public ID of the entity
163 * @systemId: The system ID of the entity
164 *
165 * Special entity resolver, better left to the parser, it has
166 * more context than the application layer.
167 * The default behaviour is to NOT resolve the entities, in that case
168 * the ENTITY_REF nodes are built in the structure (and the parameter
169 * values).
170 *
171 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
172 */
173xmlParserInputPtr
174resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
175{
176 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
177
178
179 fprintf(stdout, "SAX.resolveEntity(");
180 if (publicId != NULL)
181 fprintf(stdout, "%s", (char *)publicId);
182 else
183 fprintf(stdout, " ");
184 if (systemId != NULL)
185 fprintf(stdout, ", %s)\n", (char *)systemId);
186 else
187 fprintf(stdout, ", )\n");
188/*********
189 if (systemId != NULL) {
190 return(xmlNewInputFromFile(ctxt, (char *) systemId));
191 }
192 *********/
193 return(NULL);
194}
195
196/**
197 * getEntityDebug:
198 * @ctxt: An XML parser context
199 * @name: The entity name
200 *
201 * Get an entity by name
202 *
203 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
204 */
205xmlEntityPtr
206getEntityDebug(void *ctx, const xmlChar *name)
207{
208 fprintf(stdout, "SAX.getEntity(%s)\n", name);
209 return(NULL);
210}
211
212/**
213 * getParameterEntityDebug:
214 * @ctxt: An XML parser context
215 * @name: The entity name
216 *
217 * Get a parameter entity by name
218 *
219 * Returns the xmlParserInputPtr
220 */
221xmlEntityPtr
222getParameterEntityDebug(void *ctx, const xmlChar *name)
223{
224 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
225 return(NULL);
226}
227
228
229/**
230 * entityDeclDebug:
231 * @ctxt: An XML parser context
232 * @name: the entity name
233 * @type: the entity type
234 * @publicId: The public ID of the entity
235 * @systemId: The system ID of the entity
236 * @content: the entity value (without processing).
237 *
238 * An entity definition has been parsed
239 */
240void
241entityDeclDebug(void *ctx, const xmlChar *name, int type,
242 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
243{
244 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
245 name, type, publicId, systemId, content);
246}
247
248/**
249 * attributeDeclDebug:
250 * @ctxt: An XML parser context
251 * @name: the attribute name
252 * @type: the attribute type
253 *
254 * An attribute definition has been parsed
255 */
256void
257attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
258 int type, int def, const xmlChar *defaultValue,
259 xmlEnumerationPtr tree)
260{
261 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
262 elem, name, type, def, defaultValue);
263}
264
265/**
266 * elementDeclDebug:
267 * @ctxt: An XML parser context
268 * @name: the element name
269 * @type: the element type
270 * @content: the element value (without processing).
271 *
272 * An element definition has been parsed
273 */
274void
275elementDeclDebug(void *ctx, const xmlChar *name, int type,
276 xmlElementContentPtr content)
277{
278 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
279 name, type);
280}
281
282/**
283 * notationDeclDebug:
284 * @ctxt: An XML parser context
285 * @name: The name of the notation
286 * @publicId: The public ID of the entity
287 * @systemId: The system ID of the entity
288 *
289 * What to do when a notation declaration has been parsed.
290 */
291void
292notationDeclDebug(void *ctx, const xmlChar *name,
293 const xmlChar *publicId, const xmlChar *systemId)
294{
295 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
296 (char *) name, (char *) publicId, (char *) systemId);
297}
298
299/**
300 * unparsedEntityDeclDebug:
301 * @ctxt: An XML parser context
302 * @name: The name of the entity
303 * @publicId: The public ID of the entity
304 * @systemId: The system ID of the entity
305 * @notationName: the name of the notation
306 *
307 * What to do when an unparsed entity declaration is parsed
308 */
309void
310unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
311 const xmlChar *publicId, const xmlChar *systemId,
312 const xmlChar *notationName)
313{
314 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
315 (char *) name, (char *) publicId, (char *) systemId,
316 (char *) notationName);
317}
318
319/**
320 * setDocumentLocatorDebug:
321 * @ctxt: An XML parser context
322 * @loc: A SAX Locator
323 *
324 * Receive the document locator at startup, actually xmlDefaultSAXLocator
325 * Everything is available on the context, so this is useless in our case.
326 */
327void
328setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
329{
330 fprintf(stdout, "SAX.setDocumentLocator()\n");
331}
332
333/**
334 * startDocumentDebug:
335 * @ctxt: An XML parser context
336 *
337 * called when the document start being processed.
338 */
339void
340startDocumentDebug(void *ctx)
341{
342 fprintf(stdout, "SAX.startDocument()\n");
343}
344
345/**
346 * endDocumentDebug:
347 * @ctxt: An XML parser context
348 *
349 * called when the document end has been detected.
350 */
351void
352endDocumentDebug(void *ctx)
353{
354 fprintf(stdout, "SAX.endDocument()\n");
355}
356
357/**
358 * startElementDebug:
359 * @ctxt: An XML parser context
360 * @name: The element name
361 *
362 * called when an opening tag has been processed.
363 */
364void
365startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
366{
367 int i;
368
369 fprintf(stdout, "SAX.startElement(%s", (char *) name);
370 if (atts != NULL) {
371 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000372 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000373 if (atts[i] != NULL) {
374 unsigned char output[40];
375 const unsigned char *att = atts[i];
376 int outlen, attlen;
377 fprintf(stdout, "='");
378 while ((attlen = strlen((char*)att)) > 0) {
379 outlen = sizeof output - 1;
380 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
381 fprintf(stdout, "%.*s", outlen, output);
382 att += attlen;
383 }
384 fprintf(stdout, "'");
385 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000386 }
387 }
388 fprintf(stdout, ")\n");
389}
390
391/**
392 * endElementDebug:
393 * @ctxt: An XML parser context
394 * @name: The element name
395 *
396 * called when the end of an element has been detected.
397 */
398void
399endElementDebug(void *ctx, const xmlChar *name)
400{
401 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
402}
403
404/**
405 * charactersDebug:
406 * @ctxt: An XML parser context
407 * @ch: a xmlChar string
408 * @len: the number of xmlChar
409 *
410 * receiving some chars from the parser.
411 * Question: how much at a time ???
412 */
413void
414charactersDebug(void *ctx, const xmlChar *ch, int len)
415{
Daniel Veillarde010c172000-08-28 10:04:51 +0000416 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000417 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000418
Daniel Veillard4948eb42000-08-29 09:41:15 +0000419 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000420 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000421
422 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000423}
424
425/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000426 * cdataDebug:
427 * @ctxt: An XML parser context
428 * @ch: a xmlChar string
429 * @len: the number of xmlChar
430 *
431 * receiving some cdata chars from the parser.
432 * Question: how much at a time ???
433 */
434void
435cdataDebug(void *ctx, const xmlChar *ch, int len)
436{
437 unsigned char output[40];
438 int inlen = len, outlen = 30;
439
440 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
441 output[outlen] = 0;
442
443 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
444}
445
446/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000447 * referenceDebug:
448 * @ctxt: An XML parser context
449 * @name: The entity name
450 *
451 * called when an entity reference is detected.
452 */
453void
454referenceDebug(void *ctx, const xmlChar *name)
455{
456 fprintf(stdout, "SAX.reference(%s)\n", name);
457}
458
459/**
460 * ignorableWhitespaceDebug:
461 * @ctxt: An XML parser context
462 * @ch: a xmlChar string
463 * @start: the first char in the string
464 * @len: the number of xmlChar
465 *
466 * receiving some ignorable whitespaces from the parser.
467 * Question: how much at a time ???
468 */
469void
470ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
471{
Daniel Veillard87b95392000-08-12 21:12:04 +0000472 char output[40];
473 int i;
474
475 for (i = 0;(i<len) && (i < 30);i++)
476 output[i] = ch[i];
477 output[i] = 0;
478
479 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000480}
481
482/**
483 * processingInstructionDebug:
484 * @ctxt: An XML parser context
485 * @target: the target name
486 * @data: the PI data's
487 * @len: the number of xmlChar
488 *
489 * A processing instruction has been parsed.
490 */
491void
492processingInstructionDebug(void *ctx, const xmlChar *target,
493 const xmlChar *data)
494{
495 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
496 (char *) target, (char *) data);
497}
498
499/**
500 * commentDebug:
501 * @ctxt: An XML parser context
502 * @value: the comment content
503 *
504 * A comment has been parsed.
505 */
506void
507commentDebug(void *ctx, const xmlChar *value)
508{
509 fprintf(stdout, "SAX.comment(%s)\n", value);
510}
511
512/**
513 * warningDebug:
514 * @ctxt: An XML parser context
515 * @msg: the message to display/transmit
516 * @...: extra parameters for the message display
517 *
518 * Display and format a warning messages, gives file, line, position and
519 * extra parameters.
520 */
521void
522warningDebug(void *ctx, const char *msg, ...)
523{
524 va_list args;
525
526 va_start(args, msg);
527 fprintf(stdout, "SAX.warning: ");
528 vfprintf(stdout, msg, args);
529 va_end(args);
530}
531
532/**
533 * errorDebug:
534 * @ctxt: An XML parser context
535 * @msg: the message to display/transmit
536 * @...: extra parameters for the message display
537 *
538 * Display and format a error messages, gives file, line, position and
539 * extra parameters.
540 */
541void
542errorDebug(void *ctx, const char *msg, ...)
543{
544 va_list args;
545
546 va_start(args, msg);
547 fprintf(stdout, "SAX.error: ");
548 vfprintf(stdout, msg, args);
549 va_end(args);
550}
551
552/**
553 * fatalErrorDebug:
554 * @ctxt: An XML parser context
555 * @msg: the message to display/transmit
556 * @...: extra parameters for the message display
557 *
558 * Display and format a fatalError messages, gives file, line, position and
559 * extra parameters.
560 */
561void
562fatalErrorDebug(void *ctx, const char *msg, ...)
563{
564 va_list args;
565
566 va_start(args, msg);
567 fprintf(stdout, "SAX.fatalError: ");
568 vfprintf(stdout, msg, args);
569 va_end(args);
570}
571
572xmlSAXHandler debugSAXHandlerStruct = {
573 internalSubsetDebug,
574 isStandaloneDebug,
575 hasInternalSubsetDebug,
576 hasExternalSubsetDebug,
577 resolveEntityDebug,
578 getEntityDebug,
579 entityDeclDebug,
580 notationDeclDebug,
581 attributeDeclDebug,
582 elementDeclDebug,
583 unparsedEntityDeclDebug,
584 setDocumentLocatorDebug,
585 startDocumentDebug,
586 endDocumentDebug,
587 startElementDebug,
588 endElementDebug,
589 referenceDebug,
590 charactersDebug,
591 ignorableWhitespaceDebug,
592 processingInstructionDebug,
593 commentDebug,
594 warningDebug,
595 errorDebug,
596 fatalErrorDebug,
597 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000598 cdataDebug,
599 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000600};
601
602xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000603/************************************************************************
604 * *
605 * Debug *
606 * *
607 ************************************************************************/
608
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000609void parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000610 htmlDocPtr doc = NULL;
611
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000612 /*
613 * Empty callbacks for checking
614 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000615 if (push) {
616 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000617
Daniel Veillard87b95392000-08-12 21:12:04 +0000618 f = fopen(filename, "r");
619 if (f != NULL) {
620 int res, size = 3;
621 char chars[4096];
622 htmlParserCtxtPtr ctxt;
623
624 /* if (repeat) */
625 size = 4096;
626 res = fread(chars, 1, 4, f);
627 if (res > 0) {
628 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
629 chars, res, filename, 0);
630 while ((res = fread(chars, 1, size, f)) > 0) {
631 htmlParseChunk(ctxt, chars, res, 0);
632 }
633 htmlParseChunk(ctxt, chars, 0, 1);
634 doc = ctxt->myDoc;
635 htmlFreeParserCtxt(ctxt);
636 }
637 if (doc != NULL) {
638 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
639 xmlFreeDoc(doc);
640 }
641 fclose(f);
642 }
643 if (!noout) {
644 f = fopen(filename, "r");
645 if (f != NULL) {
646 int res, size = 3;
647 char chars[4096];
648 htmlParserCtxtPtr ctxt;
649
650 /* if (repeat) */
651 size = 4096;
652 res = fread(chars, 1, 4, f);
653 if (res > 0) {
654 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
655 chars, res, filename, 0);
656 while ((res = fread(chars, 1, size, f)) > 0) {
657 htmlParseChunk(ctxt, chars, res, 0);
658 }
659 htmlParseChunk(ctxt, chars, 0, 1);
660 doc = ctxt->myDoc;
661 htmlFreeParserCtxt(ctxt);
662 }
663 if (doc != NULL) {
664 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
665 xmlFreeDoc(doc);
666 }
667 fclose(f);
668 }
669 }
670 } else {
671 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000672 if (doc != NULL) {
673 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
674 xmlFreeDoc(doc);
675 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000676
677 if (!noout) {
678 /*
679 * Debug callback
680 */
681 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
682 if (doc != NULL) {
683 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
684 xmlFreeDoc(doc);
685 }
686 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000687 }
688}
689
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000690void parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000691 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000692
693 /*
694 * build an HTML tree from a string;
695 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000696 if (push) {
697 FILE *f;
698
699 f = fopen(filename, "r");
700 if (f != NULL) {
701 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000702 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000703 htmlParserCtxtPtr ctxt;
704
Daniel Veillard87b95392000-08-12 21:12:04 +0000705 /* if (repeat) */
706 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000707 res = fread(chars, 1, 4, f);
708 if (res > 0) {
709 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
710 chars, res, filename, 0);
711 while ((res = fread(chars, 1, size, f)) > 0) {
712 htmlParseChunk(ctxt, chars, res, 0);
713 }
714 htmlParseChunk(ctxt, chars, 0, 1);
715 doc = ctxt->myDoc;
716 htmlFreeParserCtxt(ctxt);
717 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000718 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000719 }
720 } else {
721 doc = htmlParseFile(filename, NULL);
722 }
723 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000724 xmlGenericError(xmlGenericErrorContext,
725 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000726 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000727
728 /*
729 * test intermediate copy if needed.
730 */
731 if (copy) {
732 tmp = doc;
733 doc = xmlCopyDoc(doc, 1);
734 xmlFreeDoc(tmp);
735 }
736
737 /*
738 * print it.
739 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000740 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000741#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000742 if (!debug) {
743 if (encoding)
744 htmlSaveFileEnc("-", doc, encoding);
745 else
746 htmlDocDump(stdout, doc);
747 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000748 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000749#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000750 if (encoding)
751 htmlSaveFileEnc("-", doc, encoding);
752 else
753 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000754#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000755 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000756
757 /*
758 * free it.
759 */
760 xmlFreeDoc(doc);
761}
762
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000763int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000764 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000765 int files = 0;
766
767 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000768#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000769 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
770 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000771 else
772#endif
773 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000774 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000775 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
776 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000777 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
778 sax++;
779 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
780 noout++;
781 else if ((!strcmp(argv[i], "-repeat")) ||
782 (!strcmp(argv[i], "--repeat")))
783 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000784 else if ((!strcmp(argv[i], "-encode")) ||
785 (!strcmp(argv[i], "--encode"))) {
786 i++;
787 encoding = argv[i];
788 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000789 }
790 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000791 if ((!strcmp(argv[i], "-encode")) ||
792 (!strcmp(argv[i], "--encode"))) {
793 i++;
794 continue;
795 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000796 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000797 if (repeat) {
798 for (count = 0;count < 100 * repeat;count++) {
799 if (sax)
800 parseSAXFile(argv[i]);
801 else
802 parseAndPrintFile(argv[i]);
803 }
804 } else {
805 if (sax)
806 parseSAXFile(argv[i]);
807 else
808 parseAndPrintFile(argv[i]);
809 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000810 files ++;
811 }
812 }
813 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000814 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000815 argv[0]);
816 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000817#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000818 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000819#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000820 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000821 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000822 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000823 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000824 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000825 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000826 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000827 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000828 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000829
830 return(0);
831}
Daniel Veillard361d8452000-04-03 19:48:13 +0000832#else /* !LIBXML_HTML_ENABLED */
833#include <stdio.h>
834int main(int argc, char **argv) {
835 printf("%s : HTML support not compiled in\n", argv[0]);
836 return(0);
837}
838#endif