blob: 61fc95e7619d2d9ef45cba0d18d81b8caef69bef [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
15#include <stdio.h>
16#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000017#include <stdarg.h>
18
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019
20#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000021#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000023#ifdef HAVE_SYS_STAT_H
24#include <sys/stat.h>
25#endif
26#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000032#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000033#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000034#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000035
Daniel Veillard7c1206f1999-10-14 09:10:25 +000036#include "xmlmemory.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000037#include "HTMLparser.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000038#include "HTMLtree.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039#include "debugXML.h"
40
41static int debug = 0;
42static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000043static int sax = 0;
44static int repeat = 0;
45static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000046static int push = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000047
Daniel Veillard7c1206f1999-10-14 09:10:25 +000048xmlSAXHandler emptySAXHandlerStruct = {
49 NULL, /* internalSubset */
50 NULL, /* isStandalone */
51 NULL, /* hasInternalSubset */
52 NULL, /* hasExternalSubset */
53 NULL, /* resolveEntity */
54 NULL, /* getEntity */
55 NULL, /* entityDecl */
56 NULL, /* notationDecl */
57 NULL, /* attributeDecl */
58 NULL, /* elementDecl */
59 NULL, /* unparsedEntityDecl */
60 NULL, /* setDocumentLocator */
61 NULL, /* startDocument */
62 NULL, /* endDocument */
63 NULL, /* startElement */
64 NULL, /* endElement */
65 NULL, /* reference */
66 NULL, /* characters */
67 NULL, /* ignorableWhitespace */
68 NULL, /* processingInstruction */
69 NULL, /* comment */
70 NULL, /* xmlParserWarning */
71 NULL, /* xmlParserError */
72 NULL, /* xmlParserError */
73 NULL, /* getParameterEntity */
74};
75
76xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
77extern xmlSAXHandlerPtr debugSAXHandler;
78
79/************************************************************************
80 * *
81 * Debug Handlers *
82 * *
83 ************************************************************************/
84
85/**
86 * isStandaloneDebug:
87 * @ctxt: An XML parser context
88 *
89 * Is this document tagged standalone ?
90 *
91 * Returns 1 if true
92 */
93int
94isStandaloneDebug(void *ctx)
95{
96 fprintf(stdout, "SAX.isStandalone()\n");
97 return(0);
98}
99
100/**
101 * hasInternalSubsetDebug:
102 * @ctxt: An XML parser context
103 *
104 * Does this document has an internal subset
105 *
106 * Returns 1 if true
107 */
108int
109hasInternalSubsetDebug(void *ctx)
110{
111 fprintf(stdout, "SAX.hasInternalSubset()\n");
112 return(0);
113}
114
115/**
116 * hasExternalSubsetDebug:
117 * @ctxt: An XML parser context
118 *
119 * Does this document has an external subset
120 *
121 * Returns 1 if true
122 */
123int
124hasExternalSubsetDebug(void *ctx)
125{
126 fprintf(stdout, "SAX.hasExternalSubset()\n");
127 return(0);
128}
129
130/**
131 * hasInternalSubsetDebug:
132 * @ctxt: An XML parser context
133 *
134 * Does this document has an internal subset
135 */
136void
137internalSubsetDebug(void *ctx, const xmlChar *name,
138 const xmlChar *ExternalID, const xmlChar *SystemID)
139{
140 /* xmlDtdPtr externalSubset; */
141
142 fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
143 name, ExternalID, SystemID);
144
145/***********
146 if ((ExternalID != NULL) || (SystemID != NULL)) {
147 externalSubset = xmlParseDTD(ExternalID, SystemID);
148 if (externalSubset != NULL) {
149 xmlFreeDtd(externalSubset);
150 }
151 }
152 ***********/
153}
154
155/**
156 * resolveEntityDebug:
157 * @ctxt: An XML parser context
158 * @publicId: The public ID of the entity
159 * @systemId: The system ID of the entity
160 *
161 * Special entity resolver, better left to the parser, it has
162 * more context than the application layer.
163 * The default behaviour is to NOT resolve the entities, in that case
164 * the ENTITY_REF nodes are built in the structure (and the parameter
165 * values).
166 *
167 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
168 */
169xmlParserInputPtr
170resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
171{
172 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
173
174
175 fprintf(stdout, "SAX.resolveEntity(");
176 if (publicId != NULL)
177 fprintf(stdout, "%s", (char *)publicId);
178 else
179 fprintf(stdout, " ");
180 if (systemId != NULL)
181 fprintf(stdout, ", %s)\n", (char *)systemId);
182 else
183 fprintf(stdout, ", )\n");
184/*********
185 if (systemId != NULL) {
186 return(xmlNewInputFromFile(ctxt, (char *) systemId));
187 }
188 *********/
189 return(NULL);
190}
191
192/**
193 * getEntityDebug:
194 * @ctxt: An XML parser context
195 * @name: The entity name
196 *
197 * Get an entity by name
198 *
199 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
200 */
201xmlEntityPtr
202getEntityDebug(void *ctx, const xmlChar *name)
203{
204 fprintf(stdout, "SAX.getEntity(%s)\n", name);
205 return(NULL);
206}
207
208/**
209 * getParameterEntityDebug:
210 * @ctxt: An XML parser context
211 * @name: The entity name
212 *
213 * Get a parameter entity by name
214 *
215 * Returns the xmlParserInputPtr
216 */
217xmlEntityPtr
218getParameterEntityDebug(void *ctx, const xmlChar *name)
219{
220 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
221 return(NULL);
222}
223
224
225/**
226 * entityDeclDebug:
227 * @ctxt: An XML parser context
228 * @name: the entity name
229 * @type: the entity type
230 * @publicId: The public ID of the entity
231 * @systemId: The system ID of the entity
232 * @content: the entity value (without processing).
233 *
234 * An entity definition has been parsed
235 */
236void
237entityDeclDebug(void *ctx, const xmlChar *name, int type,
238 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
239{
240 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
241 name, type, publicId, systemId, content);
242}
243
244/**
245 * attributeDeclDebug:
246 * @ctxt: An XML parser context
247 * @name: the attribute name
248 * @type: the attribute type
249 *
250 * An attribute definition has been parsed
251 */
252void
253attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
254 int type, int def, const xmlChar *defaultValue,
255 xmlEnumerationPtr tree)
256{
257 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
258 elem, name, type, def, defaultValue);
259}
260
261/**
262 * elementDeclDebug:
263 * @ctxt: An XML parser context
264 * @name: the element name
265 * @type: the element type
266 * @content: the element value (without processing).
267 *
268 * An element definition has been parsed
269 */
270void
271elementDeclDebug(void *ctx, const xmlChar *name, int type,
272 xmlElementContentPtr content)
273{
274 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
275 name, type);
276}
277
278/**
279 * notationDeclDebug:
280 * @ctxt: An XML parser context
281 * @name: The name of the notation
282 * @publicId: The public ID of the entity
283 * @systemId: The system ID of the entity
284 *
285 * What to do when a notation declaration has been parsed.
286 */
287void
288notationDeclDebug(void *ctx, const xmlChar *name,
289 const xmlChar *publicId, const xmlChar *systemId)
290{
291 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
292 (char *) name, (char *) publicId, (char *) systemId);
293}
294
295/**
296 * unparsedEntityDeclDebug:
297 * @ctxt: An XML parser context
298 * @name: The name of the entity
299 * @publicId: The public ID of the entity
300 * @systemId: The system ID of the entity
301 * @notationName: the name of the notation
302 *
303 * What to do when an unparsed entity declaration is parsed
304 */
305void
306unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
307 const xmlChar *publicId, const xmlChar *systemId,
308 const xmlChar *notationName)
309{
310 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
311 (char *) name, (char *) publicId, (char *) systemId,
312 (char *) notationName);
313}
314
315/**
316 * setDocumentLocatorDebug:
317 * @ctxt: An XML parser context
318 * @loc: A SAX Locator
319 *
320 * Receive the document locator at startup, actually xmlDefaultSAXLocator
321 * Everything is available on the context, so this is useless in our case.
322 */
323void
324setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
325{
326 fprintf(stdout, "SAX.setDocumentLocator()\n");
327}
328
329/**
330 * startDocumentDebug:
331 * @ctxt: An XML parser context
332 *
333 * called when the document start being processed.
334 */
335void
336startDocumentDebug(void *ctx)
337{
338 fprintf(stdout, "SAX.startDocument()\n");
339}
340
341/**
342 * endDocumentDebug:
343 * @ctxt: An XML parser context
344 *
345 * called when the document end has been detected.
346 */
347void
348endDocumentDebug(void *ctx)
349{
350 fprintf(stdout, "SAX.endDocument()\n");
351}
352
353/**
354 * startElementDebug:
355 * @ctxt: An XML parser context
356 * @name: The element name
357 *
358 * called when an opening tag has been processed.
359 */
360void
361startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
362{
363 int i;
364
365 fprintf(stdout, "SAX.startElement(%s", (char *) name);
366 if (atts != NULL) {
367 for (i = 0;(atts[i] != NULL);i++) {
368 fprintf(stdout, ", %s='", atts[i++]);
369 fprintf(stdout, "%s'", atts[i]);
370 }
371 }
372 fprintf(stdout, ")\n");
373}
374
375/**
376 * endElementDebug:
377 * @ctxt: An XML parser context
378 * @name: The element name
379 *
380 * called when the end of an element has been detected.
381 */
382void
383endElementDebug(void *ctx, const xmlChar *name)
384{
385 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
386}
387
388/**
389 * charactersDebug:
390 * @ctxt: An XML parser context
391 * @ch: a xmlChar string
392 * @len: the number of xmlChar
393 *
394 * receiving some chars from the parser.
395 * Question: how much at a time ???
396 */
397void
398charactersDebug(void *ctx, const xmlChar *ch, int len)
399{
400 int i;
401
402 fprintf(stdout, "SAX.characters(");
403 for (i = 0;(i < len) && (i < 30);i++)
404 fprintf(stdout, "%c", ch[i]);
405 fprintf(stdout, ", %d)\n", len);
406}
407
408/**
409 * referenceDebug:
410 * @ctxt: An XML parser context
411 * @name: The entity name
412 *
413 * called when an entity reference is detected.
414 */
415void
416referenceDebug(void *ctx, const xmlChar *name)
417{
418 fprintf(stdout, "SAX.reference(%s)\n", name);
419}
420
421/**
422 * ignorableWhitespaceDebug:
423 * @ctxt: An XML parser context
424 * @ch: a xmlChar string
425 * @start: the first char in the string
426 * @len: the number of xmlChar
427 *
428 * receiving some ignorable whitespaces from the parser.
429 * Question: how much at a time ???
430 */
431void
432ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
433{
434 fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
435 (char *) ch, len);
436}
437
438/**
439 * processingInstructionDebug:
440 * @ctxt: An XML parser context
441 * @target: the target name
442 * @data: the PI data's
443 * @len: the number of xmlChar
444 *
445 * A processing instruction has been parsed.
446 */
447void
448processingInstructionDebug(void *ctx, const xmlChar *target,
449 const xmlChar *data)
450{
451 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
452 (char *) target, (char *) data);
453}
454
455/**
456 * commentDebug:
457 * @ctxt: An XML parser context
458 * @value: the comment content
459 *
460 * A comment has been parsed.
461 */
462void
463commentDebug(void *ctx, const xmlChar *value)
464{
465 fprintf(stdout, "SAX.comment(%s)\n", value);
466}
467
468/**
469 * warningDebug:
470 * @ctxt: An XML parser context
471 * @msg: the message to display/transmit
472 * @...: extra parameters for the message display
473 *
474 * Display and format a warning messages, gives file, line, position and
475 * extra parameters.
476 */
477void
478warningDebug(void *ctx, const char *msg, ...)
479{
480 va_list args;
481
482 va_start(args, msg);
483 fprintf(stdout, "SAX.warning: ");
484 vfprintf(stdout, msg, args);
485 va_end(args);
486}
487
488/**
489 * errorDebug:
490 * @ctxt: An XML parser context
491 * @msg: the message to display/transmit
492 * @...: extra parameters for the message display
493 *
494 * Display and format a error messages, gives file, line, position and
495 * extra parameters.
496 */
497void
498errorDebug(void *ctx, const char *msg, ...)
499{
500 va_list args;
501
502 va_start(args, msg);
503 fprintf(stdout, "SAX.error: ");
504 vfprintf(stdout, msg, args);
505 va_end(args);
506}
507
508/**
509 * fatalErrorDebug:
510 * @ctxt: An XML parser context
511 * @msg: the message to display/transmit
512 * @...: extra parameters for the message display
513 *
514 * Display and format a fatalError messages, gives file, line, position and
515 * extra parameters.
516 */
517void
518fatalErrorDebug(void *ctx, const char *msg, ...)
519{
520 va_list args;
521
522 va_start(args, msg);
523 fprintf(stdout, "SAX.fatalError: ");
524 vfprintf(stdout, msg, args);
525 va_end(args);
526}
527
528xmlSAXHandler debugSAXHandlerStruct = {
529 internalSubsetDebug,
530 isStandaloneDebug,
531 hasInternalSubsetDebug,
532 hasExternalSubsetDebug,
533 resolveEntityDebug,
534 getEntityDebug,
535 entityDeclDebug,
536 notationDeclDebug,
537 attributeDeclDebug,
538 elementDeclDebug,
539 unparsedEntityDeclDebug,
540 setDocumentLocatorDebug,
541 startDocumentDebug,
542 endDocumentDebug,
543 startElementDebug,
544 endElementDebug,
545 referenceDebug,
546 charactersDebug,
547 ignorableWhitespaceDebug,
548 processingInstructionDebug,
549 commentDebug,
550 warningDebug,
551 errorDebug,
552 fatalErrorDebug,
553 getParameterEntityDebug,
554};
555
556xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000557/************************************************************************
558 * *
559 * Debug *
560 * *
561 ************************************************************************/
562
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000563void parseSAXFile(char *filename) {
564 htmlDocPtr doc;
565 /*
566 * Empty callbacks for checking
567 */
568 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
569 if (doc != NULL) {
570 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
571 xmlFreeDoc(doc);
572 }
573
574 if (!noout) {
575 /*
576 * Debug callback
577 */
578 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
579 if (doc != NULL) {
580 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
581 xmlFreeDoc(doc);
582 }
583 }
584}
585
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000586void parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000587 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000588
589 /*
590 * build an HTML tree from a string;
591 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000592 if (push) {
593 FILE *f;
594
595 f = fopen(filename, "r");
596 if (f != NULL) {
597 int res, size = 3;
598 char chars[1024];
599 htmlParserCtxtPtr ctxt;
600
601 if (repeat)
602 size = 1024;
603 res = fread(chars, 1, 4, f);
604 if (res > 0) {
605 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
606 chars, res, filename, 0);
607 while ((res = fread(chars, 1, size, f)) > 0) {
608 htmlParseChunk(ctxt, chars, res, 0);
609 }
610 htmlParseChunk(ctxt, chars, 0, 1);
611 doc = ctxt->myDoc;
612 htmlFreeParserCtxt(ctxt);
613 }
614 }
615 } else {
616 doc = htmlParseFile(filename, NULL);
617 }
618 if (doc == NULL) {
619 fprintf(stderr, "Could not parse %s\n", filename);
620 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000621
622 /*
623 * test intermediate copy if needed.
624 */
625 if (copy) {
626 tmp = doc;
627 doc = xmlCopyDoc(doc, 1);
628 xmlFreeDoc(tmp);
629 }
630
631 /*
632 * print it.
633 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000634 if (!noout) {
635 if (!debug)
636 htmlDocDump(stdout, doc);
637 else
638 xmlDebugDumpDocument(stdout, doc);
639 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000640
641 /*
642 * free it.
643 */
644 xmlFreeDoc(doc);
645}
646
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000647int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000648 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000649 int files = 0;
650
651 for (i = 1; i < argc ; i++) {
652 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
653 debug++;
654 else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
655 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000656 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
657 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000658 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
659 sax++;
660 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
661 noout++;
662 else if ((!strcmp(argv[i], "-repeat")) ||
663 (!strcmp(argv[i], "--repeat")))
664 repeat++;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000665 }
666 for (i = 1; i < argc ; i++) {
667 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000668 if (repeat) {
669 for (count = 0;count < 100 * repeat;count++) {
670 if (sax)
671 parseSAXFile(argv[i]);
672 else
673 parseAndPrintFile(argv[i]);
674 }
675 } else {
676 if (sax)
677 parseSAXFile(argv[i]);
678 else
679 parseAndPrintFile(argv[i]);
680 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000681 files ++;
682 }
683 }
684 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000685 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000686 argv[0]);
687 printf("\tParse the HTML files and output the result of the parsing\n");
688 printf("\t--debug : dump a debug tree of the in-memory document\n");
689 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000690 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000691 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000692 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000693 printf("\t--push : use the push mode parser\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000694 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000695 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000696 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000697
698 return(0);
699}