blob: 541620e344c5252b2630c043b5c4eedcce4615cf [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
Daniel Veillardb71379b2000-10-09 12:30:39 +000015#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000016#ifdef LIBXML_HTML_ENABLED
17
Daniel Veillard7f7d1111999-09-22 09:46:25 +000018#include <stdio.h>
19#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000020#include <stdarg.h>
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022
23#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000024#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000026#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000035#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000036#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000037#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000038
Daniel Veillard361d8452000-04-03 19:48:13 +000039#include <libxml/xmlmemory.h>
40#include <libxml/HTMLparser.h>
41#include <libxml/HTMLtree.h>
42#include <libxml/debugXML.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043
Daniel Veillard361d8452000-04-03 19:48:13 +000044#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000045static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000046#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000047static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000048static int sax = 0;
49static int repeat = 0;
50static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000051static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000052static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000053
Daniel Veillard7c1206f1999-10-14 09:10:25 +000054xmlSAXHandler emptySAXHandlerStruct = {
55 NULL, /* internalSubset */
56 NULL, /* isStandalone */
57 NULL, /* hasInternalSubset */
58 NULL, /* hasExternalSubset */
59 NULL, /* resolveEntity */
60 NULL, /* getEntity */
61 NULL, /* entityDecl */
62 NULL, /* notationDecl */
63 NULL, /* attributeDecl */
64 NULL, /* elementDecl */
65 NULL, /* unparsedEntityDecl */
66 NULL, /* setDocumentLocator */
67 NULL, /* startDocument */
68 NULL, /* endDocument */
69 NULL, /* startElement */
70 NULL, /* endElement */
71 NULL, /* reference */
72 NULL, /* characters */
73 NULL, /* ignorableWhitespace */
74 NULL, /* processingInstruction */
75 NULL, /* comment */
76 NULL, /* xmlParserWarning */
77 NULL, /* xmlParserError */
78 NULL, /* xmlParserError */
79 NULL, /* getParameterEntity */
80};
81
82xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
83extern xmlSAXHandlerPtr debugSAXHandler;
84
85/************************************************************************
86 * *
87 * Debug Handlers *
88 * *
89 ************************************************************************/
90
91/**
92 * isStandaloneDebug:
93 * @ctxt: An XML parser context
94 *
95 * Is this document tagged standalone ?
96 *
97 * Returns 1 if true
98 */
99int
100isStandaloneDebug(void *ctx)
101{
102 fprintf(stdout, "SAX.isStandalone()\n");
103 return(0);
104}
105
106/**
107 * hasInternalSubsetDebug:
108 * @ctxt: An XML parser context
109 *
110 * Does this document has an internal subset
111 *
112 * Returns 1 if true
113 */
114int
115hasInternalSubsetDebug(void *ctx)
116{
117 fprintf(stdout, "SAX.hasInternalSubset()\n");
118 return(0);
119}
120
121/**
122 * hasExternalSubsetDebug:
123 * @ctxt: An XML parser context
124 *
125 * Does this document has an external subset
126 *
127 * Returns 1 if true
128 */
129int
130hasExternalSubsetDebug(void *ctx)
131{
132 fprintf(stdout, "SAX.hasExternalSubset()\n");
133 return(0);
134}
135
136/**
137 * hasInternalSubsetDebug:
138 * @ctxt: An XML parser context
139 *
140 * Does this document has an internal subset
141 */
142void
143internalSubsetDebug(void *ctx, const xmlChar *name,
144 const xmlChar *ExternalID, const xmlChar *SystemID)
145{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000146 fprintf(stdout, "SAX.internalSubset(%s,", name);
147 if (ExternalID == NULL)
148 fprintf(stdout, " ,");
149 else
150 fprintf(stdout, " %s,", ExternalID);
151 if (SystemID == NULL)
152 fprintf(stdout, " )\n");
153 else
154 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000155}
156
157/**
158 * resolveEntityDebug:
159 * @ctxt: An XML parser context
160 * @publicId: The public ID of the entity
161 * @systemId: The system ID of the entity
162 *
163 * Special entity resolver, better left to the parser, it has
164 * more context than the application layer.
165 * The default behaviour is to NOT resolve the entities, in that case
166 * the ENTITY_REF nodes are built in the structure (and the parameter
167 * values).
168 *
169 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
170 */
171xmlParserInputPtr
172resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
173{
174 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
175
176
177 fprintf(stdout, "SAX.resolveEntity(");
178 if (publicId != NULL)
179 fprintf(stdout, "%s", (char *)publicId);
180 else
181 fprintf(stdout, " ");
182 if (systemId != NULL)
183 fprintf(stdout, ", %s)\n", (char *)systemId);
184 else
185 fprintf(stdout, ", )\n");
186/*********
187 if (systemId != NULL) {
188 return(xmlNewInputFromFile(ctxt, (char *) systemId));
189 }
190 *********/
191 return(NULL);
192}
193
194/**
195 * getEntityDebug:
196 * @ctxt: An XML parser context
197 * @name: The entity name
198 *
199 * Get an entity by name
200 *
201 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
202 */
203xmlEntityPtr
204getEntityDebug(void *ctx, const xmlChar *name)
205{
206 fprintf(stdout, "SAX.getEntity(%s)\n", name);
207 return(NULL);
208}
209
210/**
211 * getParameterEntityDebug:
212 * @ctxt: An XML parser context
213 * @name: The entity name
214 *
215 * Get a parameter entity by name
216 *
217 * Returns the xmlParserInputPtr
218 */
219xmlEntityPtr
220getParameterEntityDebug(void *ctx, const xmlChar *name)
221{
222 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
223 return(NULL);
224}
225
226
227/**
228 * entityDeclDebug:
229 * @ctxt: An XML parser context
230 * @name: the entity name
231 * @type: the entity type
232 * @publicId: The public ID of the entity
233 * @systemId: The system ID of the entity
234 * @content: the entity value (without processing).
235 *
236 * An entity definition has been parsed
237 */
238void
239entityDeclDebug(void *ctx, const xmlChar *name, int type,
240 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
241{
242 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
243 name, type, publicId, systemId, content);
244}
245
246/**
247 * attributeDeclDebug:
248 * @ctxt: An XML parser context
249 * @name: the attribute name
250 * @type: the attribute type
251 *
252 * An attribute definition has been parsed
253 */
254void
255attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
256 int type, int def, const xmlChar *defaultValue,
257 xmlEnumerationPtr tree)
258{
259 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
260 elem, name, type, def, defaultValue);
261}
262
263/**
264 * elementDeclDebug:
265 * @ctxt: An XML parser context
266 * @name: the element name
267 * @type: the element type
268 * @content: the element value (without processing).
269 *
270 * An element definition has been parsed
271 */
272void
273elementDeclDebug(void *ctx, const xmlChar *name, int type,
274 xmlElementContentPtr content)
275{
276 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
277 name, type);
278}
279
280/**
281 * notationDeclDebug:
282 * @ctxt: An XML parser context
283 * @name: The name of the notation
284 * @publicId: The public ID of the entity
285 * @systemId: The system ID of the entity
286 *
287 * What to do when a notation declaration has been parsed.
288 */
289void
290notationDeclDebug(void *ctx, const xmlChar *name,
291 const xmlChar *publicId, const xmlChar *systemId)
292{
293 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
294 (char *) name, (char *) publicId, (char *) systemId);
295}
296
297/**
298 * unparsedEntityDeclDebug:
299 * @ctxt: An XML parser context
300 * @name: The name of the entity
301 * @publicId: The public ID of the entity
302 * @systemId: The system ID of the entity
303 * @notationName: the name of the notation
304 *
305 * What to do when an unparsed entity declaration is parsed
306 */
307void
308unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
309 const xmlChar *publicId, const xmlChar *systemId,
310 const xmlChar *notationName)
311{
312 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
313 (char *) name, (char *) publicId, (char *) systemId,
314 (char *) notationName);
315}
316
317/**
318 * setDocumentLocatorDebug:
319 * @ctxt: An XML parser context
320 * @loc: A SAX Locator
321 *
322 * Receive the document locator at startup, actually xmlDefaultSAXLocator
323 * Everything is available on the context, so this is useless in our case.
324 */
325void
326setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
327{
328 fprintf(stdout, "SAX.setDocumentLocator()\n");
329}
330
331/**
332 * startDocumentDebug:
333 * @ctxt: An XML parser context
334 *
335 * called when the document start being processed.
336 */
337void
338startDocumentDebug(void *ctx)
339{
340 fprintf(stdout, "SAX.startDocument()\n");
341}
342
343/**
344 * endDocumentDebug:
345 * @ctxt: An XML parser context
346 *
347 * called when the document end has been detected.
348 */
349void
350endDocumentDebug(void *ctx)
351{
352 fprintf(stdout, "SAX.endDocument()\n");
353}
354
355/**
356 * startElementDebug:
357 * @ctxt: An XML parser context
358 * @name: The element name
359 *
360 * called when an opening tag has been processed.
361 */
362void
363startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
364{
365 int i;
366
367 fprintf(stdout, "SAX.startElement(%s", (char *) name);
368 if (atts != NULL) {
369 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000370 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000371 if (atts[i] != NULL) {
372 unsigned char output[40];
373 const unsigned char *att = atts[i];
374 int outlen, attlen;
375 fprintf(stdout, "='");
376 while ((attlen = strlen((char*)att)) > 0) {
377 outlen = sizeof output - 1;
378 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
379 fprintf(stdout, "%.*s", outlen, output);
380 att += attlen;
381 }
382 fprintf(stdout, "'");
383 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000384 }
385 }
386 fprintf(stdout, ")\n");
387}
388
389/**
390 * endElementDebug:
391 * @ctxt: An XML parser context
392 * @name: The element name
393 *
394 * called when the end of an element has been detected.
395 */
396void
397endElementDebug(void *ctx, const xmlChar *name)
398{
399 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
400}
401
402/**
403 * charactersDebug:
404 * @ctxt: An XML parser context
405 * @ch: a xmlChar string
406 * @len: the number of xmlChar
407 *
408 * receiving some chars from the parser.
409 * Question: how much at a time ???
410 */
411void
412charactersDebug(void *ctx, const xmlChar *ch, int len)
413{
Daniel Veillarde010c172000-08-28 10:04:51 +0000414 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000415 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000416
Daniel Veillard4948eb42000-08-29 09:41:15 +0000417 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000418 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000419
420 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000421}
422
423/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000424 * cdataDebug:
425 * @ctxt: An XML parser context
426 * @ch: a xmlChar string
427 * @len: the number of xmlChar
428 *
429 * receiving some cdata chars from the parser.
430 * Question: how much at a time ???
431 */
432void
433cdataDebug(void *ctx, const xmlChar *ch, int len)
434{
435 unsigned char output[40];
436 int inlen = len, outlen = 30;
437
438 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
439 output[outlen] = 0;
440
441 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
442}
443
444/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000445 * referenceDebug:
446 * @ctxt: An XML parser context
447 * @name: The entity name
448 *
449 * called when an entity reference is detected.
450 */
451void
452referenceDebug(void *ctx, const xmlChar *name)
453{
454 fprintf(stdout, "SAX.reference(%s)\n", name);
455}
456
457/**
458 * ignorableWhitespaceDebug:
459 * @ctxt: An XML parser context
460 * @ch: a xmlChar string
461 * @start: the first char in the string
462 * @len: the number of xmlChar
463 *
464 * receiving some ignorable whitespaces from the parser.
465 * Question: how much at a time ???
466 */
467void
468ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
469{
Daniel Veillard87b95392000-08-12 21:12:04 +0000470 char output[40];
471 int i;
472
473 for (i = 0;(i<len) && (i < 30);i++)
474 output[i] = ch[i];
475 output[i] = 0;
476
477 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000478}
479
480/**
481 * processingInstructionDebug:
482 * @ctxt: An XML parser context
483 * @target: the target name
484 * @data: the PI data's
485 * @len: the number of xmlChar
486 *
487 * A processing instruction has been parsed.
488 */
489void
490processingInstructionDebug(void *ctx, const xmlChar *target,
491 const xmlChar *data)
492{
493 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
494 (char *) target, (char *) data);
495}
496
497/**
498 * commentDebug:
499 * @ctxt: An XML parser context
500 * @value: the comment content
501 *
502 * A comment has been parsed.
503 */
504void
505commentDebug(void *ctx, const xmlChar *value)
506{
507 fprintf(stdout, "SAX.comment(%s)\n", value);
508}
509
510/**
511 * warningDebug:
512 * @ctxt: An XML parser context
513 * @msg: the message to display/transmit
514 * @...: extra parameters for the message display
515 *
516 * Display and format a warning messages, gives file, line, position and
517 * extra parameters.
518 */
519void
520warningDebug(void *ctx, const char *msg, ...)
521{
522 va_list args;
523
524 va_start(args, msg);
525 fprintf(stdout, "SAX.warning: ");
526 vfprintf(stdout, msg, args);
527 va_end(args);
528}
529
530/**
531 * errorDebug:
532 * @ctxt: An XML parser context
533 * @msg: the message to display/transmit
534 * @...: extra parameters for the message display
535 *
536 * Display and format a error messages, gives file, line, position and
537 * extra parameters.
538 */
539void
540errorDebug(void *ctx, const char *msg, ...)
541{
542 va_list args;
543
544 va_start(args, msg);
545 fprintf(stdout, "SAX.error: ");
546 vfprintf(stdout, msg, args);
547 va_end(args);
548}
549
550/**
551 * fatalErrorDebug:
552 * @ctxt: An XML parser context
553 * @msg: the message to display/transmit
554 * @...: extra parameters for the message display
555 *
556 * Display and format a fatalError messages, gives file, line, position and
557 * extra parameters.
558 */
559void
560fatalErrorDebug(void *ctx, const char *msg, ...)
561{
562 va_list args;
563
564 va_start(args, msg);
565 fprintf(stdout, "SAX.fatalError: ");
566 vfprintf(stdout, msg, args);
567 va_end(args);
568}
569
570xmlSAXHandler debugSAXHandlerStruct = {
571 internalSubsetDebug,
572 isStandaloneDebug,
573 hasInternalSubsetDebug,
574 hasExternalSubsetDebug,
575 resolveEntityDebug,
576 getEntityDebug,
577 entityDeclDebug,
578 notationDeclDebug,
579 attributeDeclDebug,
580 elementDeclDebug,
581 unparsedEntityDeclDebug,
582 setDocumentLocatorDebug,
583 startDocumentDebug,
584 endDocumentDebug,
585 startElementDebug,
586 endElementDebug,
587 referenceDebug,
588 charactersDebug,
589 ignorableWhitespaceDebug,
590 processingInstructionDebug,
591 commentDebug,
592 warningDebug,
593 errorDebug,
594 fatalErrorDebug,
595 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000596 cdataDebug,
597 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000598};
599
600xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000601/************************************************************************
602 * *
603 * Debug *
604 * *
605 ************************************************************************/
606
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000607void parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000608 htmlDocPtr doc = NULL;
609
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000610 /*
611 * Empty callbacks for checking
612 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000613 if (push) {
614 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000615
Daniel Veillard87b95392000-08-12 21:12:04 +0000616 f = fopen(filename, "r");
617 if (f != NULL) {
618 int res, size = 3;
619 char chars[4096];
620 htmlParserCtxtPtr ctxt;
621
622 /* if (repeat) */
623 size = 4096;
624 res = fread(chars, 1, 4, f);
625 if (res > 0) {
626 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
627 chars, res, filename, 0);
628 while ((res = fread(chars, 1, size, f)) > 0) {
629 htmlParseChunk(ctxt, chars, res, 0);
630 }
631 htmlParseChunk(ctxt, chars, 0, 1);
632 doc = ctxt->myDoc;
633 htmlFreeParserCtxt(ctxt);
634 }
635 if (doc != NULL) {
636 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
637 xmlFreeDoc(doc);
638 }
639 fclose(f);
640 }
641 if (!noout) {
642 f = fopen(filename, "r");
643 if (f != NULL) {
644 int res, size = 3;
645 char chars[4096];
646 htmlParserCtxtPtr ctxt;
647
648 /* if (repeat) */
649 size = 4096;
650 res = fread(chars, 1, 4, f);
651 if (res > 0) {
652 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
653 chars, res, filename, 0);
654 while ((res = fread(chars, 1, size, f)) > 0) {
655 htmlParseChunk(ctxt, chars, res, 0);
656 }
657 htmlParseChunk(ctxt, chars, 0, 1);
658 doc = ctxt->myDoc;
659 htmlFreeParserCtxt(ctxt);
660 }
661 if (doc != NULL) {
662 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
663 xmlFreeDoc(doc);
664 }
665 fclose(f);
666 }
667 }
668 } else {
669 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000670 if (doc != NULL) {
671 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
672 xmlFreeDoc(doc);
673 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000674
675 if (!noout) {
676 /*
677 * Debug callback
678 */
679 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
680 if (doc != NULL) {
681 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
682 xmlFreeDoc(doc);
683 }
684 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000685 }
686}
687
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000688void parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000689 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000690
691 /*
692 * build an HTML tree from a string;
693 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000694 if (push) {
695 FILE *f;
696
697 f = fopen(filename, "r");
698 if (f != NULL) {
699 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000700 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000701 htmlParserCtxtPtr ctxt;
702
Daniel Veillard87b95392000-08-12 21:12:04 +0000703 /* if (repeat) */
704 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000705 res = fread(chars, 1, 4, f);
706 if (res > 0) {
707 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
708 chars, res, filename, 0);
709 while ((res = fread(chars, 1, size, f)) > 0) {
710 htmlParseChunk(ctxt, chars, res, 0);
711 }
712 htmlParseChunk(ctxt, chars, 0, 1);
713 doc = ctxt->myDoc;
714 htmlFreeParserCtxt(ctxt);
715 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000716 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000717 }
718 } else {
719 doc = htmlParseFile(filename, NULL);
720 }
721 if (doc == NULL) {
722 fprintf(stderr, "Could not parse %s\n", filename);
723 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000724
725 /*
726 * test intermediate copy if needed.
727 */
728 if (copy) {
729 tmp = doc;
730 doc = xmlCopyDoc(doc, 1);
731 xmlFreeDoc(tmp);
732 }
733
734 /*
735 * print it.
736 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000737 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000738#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000739 if (!debug) {
740 if (encoding)
741 htmlSaveFileEnc("-", doc, encoding);
742 else
743 htmlDocDump(stdout, doc);
744 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000745 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000746#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000747 if (encoding)
748 htmlSaveFileEnc("-", doc, encoding);
749 else
750 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000751#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000752 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000753
754 /*
755 * free it.
756 */
757 xmlFreeDoc(doc);
758}
759
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000760int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000761 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000762 int files = 0;
763
764 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000765#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000766 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
767 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000768 else
769#endif
770 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000771 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000772 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
773 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000774 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
775 sax++;
776 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
777 noout++;
778 else if ((!strcmp(argv[i], "-repeat")) ||
779 (!strcmp(argv[i], "--repeat")))
780 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000781 else if ((!strcmp(argv[i], "-encode")) ||
782 (!strcmp(argv[i], "--encode"))) {
783 i++;
784 encoding = argv[i];
785 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000786 }
787 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000788 if ((!strcmp(argv[i], "-encode")) ||
789 (!strcmp(argv[i], "--encode"))) {
790 i++;
791 continue;
792 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000793 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000794 if (repeat) {
795 for (count = 0;count < 100 * repeat;count++) {
796 if (sax)
797 parseSAXFile(argv[i]);
798 else
799 parseAndPrintFile(argv[i]);
800 }
801 } else {
802 if (sax)
803 parseSAXFile(argv[i]);
804 else
805 parseAndPrintFile(argv[i]);
806 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000807 files ++;
808 }
809 }
810 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000811 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000812 argv[0]);
813 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000814#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000815 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000816#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000817 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000818 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000819 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000820 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000821 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000822 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000823 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000824 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000825 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000826
827 return(0);
828}
Daniel Veillard361d8452000-04-03 19:48:13 +0000829#else /* !LIBXML_HTML_ENABLED */
830#include <stdio.h>
831int main(int argc, char **argv) {
832 printf("%s : HTML support not compiled in\n", argv[0]);
833 return(0);
834}
835#endif