blob: 41ebea32722ac7f2c9abd30458200761e9559bc1 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
15#include <stdio.h>
16#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000017#include <stdarg.h>
18
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019
20#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000021#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000023#ifdef HAVE_SYS_STAT_H
24#include <sys/stat.h>
25#endif
26#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000032#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000033#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000034#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000035
Daniel Veillard7c1206f1999-10-14 09:10:25 +000036#include "xmlmemory.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000037#include "HTMLparser.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000038#include "HTMLtree.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039#include "debugXML.h"
40
41static int debug = 0;
42static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000043static int sax = 0;
44static int repeat = 0;
45static int noout = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000046
47/*
48 * Note: this is perfectly clean HTML, i.e. not a useful test.
Daniel Veillarddd6b3671999-09-23 22:19:22 +000049static xmlChar buffer[] =
Daniel Veillardbe70ff71999-07-05 16:50:46 +000050"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
51 \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
52<html>\n\
53<head>\n\
54 <title>This service is temporary down</title>\n\
55</head>\n\
56\n\
57<body bgcolor=\"#FFFFFF\">\n\
58<h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
59We are doing our best to get it back on-line,\n\
60\n\
61<p>The W3C system administrators</p>\n\
62</body>\n\
63</html>\n\
64";
Daniel Veillardb96e6431999-08-29 21:02:19 +000065 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000066
Daniel Veillard7c1206f1999-10-14 09:10:25 +000067xmlSAXHandler emptySAXHandlerStruct = {
68 NULL, /* internalSubset */
69 NULL, /* isStandalone */
70 NULL, /* hasInternalSubset */
71 NULL, /* hasExternalSubset */
72 NULL, /* resolveEntity */
73 NULL, /* getEntity */
74 NULL, /* entityDecl */
75 NULL, /* notationDecl */
76 NULL, /* attributeDecl */
77 NULL, /* elementDecl */
78 NULL, /* unparsedEntityDecl */
79 NULL, /* setDocumentLocator */
80 NULL, /* startDocument */
81 NULL, /* endDocument */
82 NULL, /* startElement */
83 NULL, /* endElement */
84 NULL, /* reference */
85 NULL, /* characters */
86 NULL, /* ignorableWhitespace */
87 NULL, /* processingInstruction */
88 NULL, /* comment */
89 NULL, /* xmlParserWarning */
90 NULL, /* xmlParserError */
91 NULL, /* xmlParserError */
92 NULL, /* getParameterEntity */
93};
94
95xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
96extern xmlSAXHandlerPtr debugSAXHandler;
97
98/************************************************************************
99 * *
100 * Debug Handlers *
101 * *
102 ************************************************************************/
103
104/**
105 * isStandaloneDebug:
106 * @ctxt: An XML parser context
107 *
108 * Is this document tagged standalone ?
109 *
110 * Returns 1 if true
111 */
112int
113isStandaloneDebug(void *ctx)
114{
115 fprintf(stdout, "SAX.isStandalone()\n");
116 return(0);
117}
118
119/**
120 * hasInternalSubsetDebug:
121 * @ctxt: An XML parser context
122 *
123 * Does this document has an internal subset
124 *
125 * Returns 1 if true
126 */
127int
128hasInternalSubsetDebug(void *ctx)
129{
130 fprintf(stdout, "SAX.hasInternalSubset()\n");
131 return(0);
132}
133
134/**
135 * hasExternalSubsetDebug:
136 * @ctxt: An XML parser context
137 *
138 * Does this document has an external subset
139 *
140 * Returns 1 if true
141 */
142int
143hasExternalSubsetDebug(void *ctx)
144{
145 fprintf(stdout, "SAX.hasExternalSubset()\n");
146 return(0);
147}
148
149/**
150 * hasInternalSubsetDebug:
151 * @ctxt: An XML parser context
152 *
153 * Does this document has an internal subset
154 */
155void
156internalSubsetDebug(void *ctx, const xmlChar *name,
157 const xmlChar *ExternalID, const xmlChar *SystemID)
158{
159 /* xmlDtdPtr externalSubset; */
160
161 fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
162 name, ExternalID, SystemID);
163
164/***********
165 if ((ExternalID != NULL) || (SystemID != NULL)) {
166 externalSubset = xmlParseDTD(ExternalID, SystemID);
167 if (externalSubset != NULL) {
168 xmlFreeDtd(externalSubset);
169 }
170 }
171 ***********/
172}
173
174/**
175 * resolveEntityDebug:
176 * @ctxt: An XML parser context
177 * @publicId: The public ID of the entity
178 * @systemId: The system ID of the entity
179 *
180 * Special entity resolver, better left to the parser, it has
181 * more context than the application layer.
182 * The default behaviour is to NOT resolve the entities, in that case
183 * the ENTITY_REF nodes are built in the structure (and the parameter
184 * values).
185 *
186 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
187 */
188xmlParserInputPtr
189resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
190{
191 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
192
193
194 fprintf(stdout, "SAX.resolveEntity(");
195 if (publicId != NULL)
196 fprintf(stdout, "%s", (char *)publicId);
197 else
198 fprintf(stdout, " ");
199 if (systemId != NULL)
200 fprintf(stdout, ", %s)\n", (char *)systemId);
201 else
202 fprintf(stdout, ", )\n");
203/*********
204 if (systemId != NULL) {
205 return(xmlNewInputFromFile(ctxt, (char *) systemId));
206 }
207 *********/
208 return(NULL);
209}
210
211/**
212 * getEntityDebug:
213 * @ctxt: An XML parser context
214 * @name: The entity name
215 *
216 * Get an entity by name
217 *
218 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
219 */
220xmlEntityPtr
221getEntityDebug(void *ctx, const xmlChar *name)
222{
223 fprintf(stdout, "SAX.getEntity(%s)\n", name);
224 return(NULL);
225}
226
227/**
228 * getParameterEntityDebug:
229 * @ctxt: An XML parser context
230 * @name: The entity name
231 *
232 * Get a parameter entity by name
233 *
234 * Returns the xmlParserInputPtr
235 */
236xmlEntityPtr
237getParameterEntityDebug(void *ctx, const xmlChar *name)
238{
239 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
240 return(NULL);
241}
242
243
244/**
245 * entityDeclDebug:
246 * @ctxt: An XML parser context
247 * @name: the entity name
248 * @type: the entity type
249 * @publicId: The public ID of the entity
250 * @systemId: The system ID of the entity
251 * @content: the entity value (without processing).
252 *
253 * An entity definition has been parsed
254 */
255void
256entityDeclDebug(void *ctx, const xmlChar *name, int type,
257 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
258{
259 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
260 name, type, publicId, systemId, content);
261}
262
263/**
264 * attributeDeclDebug:
265 * @ctxt: An XML parser context
266 * @name: the attribute name
267 * @type: the attribute type
268 *
269 * An attribute definition has been parsed
270 */
271void
272attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
273 int type, int def, const xmlChar *defaultValue,
274 xmlEnumerationPtr tree)
275{
276 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
277 elem, name, type, def, defaultValue);
278}
279
280/**
281 * elementDeclDebug:
282 * @ctxt: An XML parser context
283 * @name: the element name
284 * @type: the element type
285 * @content: the element value (without processing).
286 *
287 * An element definition has been parsed
288 */
289void
290elementDeclDebug(void *ctx, const xmlChar *name, int type,
291 xmlElementContentPtr content)
292{
293 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
294 name, type);
295}
296
297/**
298 * notationDeclDebug:
299 * @ctxt: An XML parser context
300 * @name: The name of the notation
301 * @publicId: The public ID of the entity
302 * @systemId: The system ID of the entity
303 *
304 * What to do when a notation declaration has been parsed.
305 */
306void
307notationDeclDebug(void *ctx, const xmlChar *name,
308 const xmlChar *publicId, const xmlChar *systemId)
309{
310 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
311 (char *) name, (char *) publicId, (char *) systemId);
312}
313
314/**
315 * unparsedEntityDeclDebug:
316 * @ctxt: An XML parser context
317 * @name: The name of the entity
318 * @publicId: The public ID of the entity
319 * @systemId: The system ID of the entity
320 * @notationName: the name of the notation
321 *
322 * What to do when an unparsed entity declaration is parsed
323 */
324void
325unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
326 const xmlChar *publicId, const xmlChar *systemId,
327 const xmlChar *notationName)
328{
329 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
330 (char *) name, (char *) publicId, (char *) systemId,
331 (char *) notationName);
332}
333
334/**
335 * setDocumentLocatorDebug:
336 * @ctxt: An XML parser context
337 * @loc: A SAX Locator
338 *
339 * Receive the document locator at startup, actually xmlDefaultSAXLocator
340 * Everything is available on the context, so this is useless in our case.
341 */
342void
343setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
344{
345 fprintf(stdout, "SAX.setDocumentLocator()\n");
346}
347
348/**
349 * startDocumentDebug:
350 * @ctxt: An XML parser context
351 *
352 * called when the document start being processed.
353 */
354void
355startDocumentDebug(void *ctx)
356{
357 fprintf(stdout, "SAX.startDocument()\n");
358}
359
360/**
361 * endDocumentDebug:
362 * @ctxt: An XML parser context
363 *
364 * called when the document end has been detected.
365 */
366void
367endDocumentDebug(void *ctx)
368{
369 fprintf(stdout, "SAX.endDocument()\n");
370}
371
372/**
373 * startElementDebug:
374 * @ctxt: An XML parser context
375 * @name: The element name
376 *
377 * called when an opening tag has been processed.
378 */
379void
380startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
381{
382 int i;
383
384 fprintf(stdout, "SAX.startElement(%s", (char *) name);
385 if (atts != NULL) {
386 for (i = 0;(atts[i] != NULL);i++) {
387 fprintf(stdout, ", %s='", atts[i++]);
388 fprintf(stdout, "%s'", atts[i]);
389 }
390 }
391 fprintf(stdout, ")\n");
392}
393
394/**
395 * endElementDebug:
396 * @ctxt: An XML parser context
397 * @name: The element name
398 *
399 * called when the end of an element has been detected.
400 */
401void
402endElementDebug(void *ctx, const xmlChar *name)
403{
404 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
405}
406
407/**
408 * charactersDebug:
409 * @ctxt: An XML parser context
410 * @ch: a xmlChar string
411 * @len: the number of xmlChar
412 *
413 * receiving some chars from the parser.
414 * Question: how much at a time ???
415 */
416void
417charactersDebug(void *ctx, const xmlChar *ch, int len)
418{
419 int i;
420
421 fprintf(stdout, "SAX.characters(");
422 for (i = 0;(i < len) && (i < 30);i++)
423 fprintf(stdout, "%c", ch[i]);
424 fprintf(stdout, ", %d)\n", len);
425}
426
427/**
428 * referenceDebug:
429 * @ctxt: An XML parser context
430 * @name: The entity name
431 *
432 * called when an entity reference is detected.
433 */
434void
435referenceDebug(void *ctx, const xmlChar *name)
436{
437 fprintf(stdout, "SAX.reference(%s)\n", name);
438}
439
440/**
441 * ignorableWhitespaceDebug:
442 * @ctxt: An XML parser context
443 * @ch: a xmlChar string
444 * @start: the first char in the string
445 * @len: the number of xmlChar
446 *
447 * receiving some ignorable whitespaces from the parser.
448 * Question: how much at a time ???
449 */
450void
451ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
452{
453 fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
454 (char *) ch, len);
455}
456
457/**
458 * processingInstructionDebug:
459 * @ctxt: An XML parser context
460 * @target: the target name
461 * @data: the PI data's
462 * @len: the number of xmlChar
463 *
464 * A processing instruction has been parsed.
465 */
466void
467processingInstructionDebug(void *ctx, const xmlChar *target,
468 const xmlChar *data)
469{
470 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
471 (char *) target, (char *) data);
472}
473
474/**
475 * commentDebug:
476 * @ctxt: An XML parser context
477 * @value: the comment content
478 *
479 * A comment has been parsed.
480 */
481void
482commentDebug(void *ctx, const xmlChar *value)
483{
484 fprintf(stdout, "SAX.comment(%s)\n", value);
485}
486
487/**
488 * warningDebug:
489 * @ctxt: An XML parser context
490 * @msg: the message to display/transmit
491 * @...: extra parameters for the message display
492 *
493 * Display and format a warning messages, gives file, line, position and
494 * extra parameters.
495 */
496void
497warningDebug(void *ctx, const char *msg, ...)
498{
499 va_list args;
500
501 va_start(args, msg);
502 fprintf(stdout, "SAX.warning: ");
503 vfprintf(stdout, msg, args);
504 va_end(args);
505}
506
507/**
508 * errorDebug:
509 * @ctxt: An XML parser context
510 * @msg: the message to display/transmit
511 * @...: extra parameters for the message display
512 *
513 * Display and format a error messages, gives file, line, position and
514 * extra parameters.
515 */
516void
517errorDebug(void *ctx, const char *msg, ...)
518{
519 va_list args;
520
521 va_start(args, msg);
522 fprintf(stdout, "SAX.error: ");
523 vfprintf(stdout, msg, args);
524 va_end(args);
525}
526
527/**
528 * fatalErrorDebug:
529 * @ctxt: An XML parser context
530 * @msg: the message to display/transmit
531 * @...: extra parameters for the message display
532 *
533 * Display and format a fatalError messages, gives file, line, position and
534 * extra parameters.
535 */
536void
537fatalErrorDebug(void *ctx, const char *msg, ...)
538{
539 va_list args;
540
541 va_start(args, msg);
542 fprintf(stdout, "SAX.fatalError: ");
543 vfprintf(stdout, msg, args);
544 va_end(args);
545}
546
547xmlSAXHandler debugSAXHandlerStruct = {
548 internalSubsetDebug,
549 isStandaloneDebug,
550 hasInternalSubsetDebug,
551 hasExternalSubsetDebug,
552 resolveEntityDebug,
553 getEntityDebug,
554 entityDeclDebug,
555 notationDeclDebug,
556 attributeDeclDebug,
557 elementDeclDebug,
558 unparsedEntityDeclDebug,
559 setDocumentLocatorDebug,
560 startDocumentDebug,
561 endDocumentDebug,
562 startElementDebug,
563 endElementDebug,
564 referenceDebug,
565 charactersDebug,
566 ignorableWhitespaceDebug,
567 processingInstructionDebug,
568 commentDebug,
569 warningDebug,
570 errorDebug,
571 fatalErrorDebug,
572 getParameterEntityDebug,
573};
574
575xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000576/************************************************************************
577 * *
578 * Debug *
579 * *
580 ************************************************************************/
581
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000582void parseSAXFile(char *filename) {
583 htmlDocPtr doc;
584 /*
585 * Empty callbacks for checking
586 */
587 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
588 if (doc != NULL) {
589 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
590 xmlFreeDoc(doc);
591 }
592
593 if (!noout) {
594 /*
595 * Debug callback
596 */
597 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
598 if (doc != NULL) {
599 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
600 xmlFreeDoc(doc);
601 }
602 }
603}
604
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000605void parseAndPrintFile(char *filename) {
606 htmlDocPtr doc, tmp;
607
608 /*
609 * build an HTML tree from a string;
610 */
611 doc = htmlParseFile(filename, NULL);
612
613 /*
614 * test intermediate copy if needed.
615 */
616 if (copy) {
617 tmp = doc;
618 doc = xmlCopyDoc(doc, 1);
619 xmlFreeDoc(tmp);
620 }
621
622 /*
623 * print it.
624 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000625 if (!noout) {
626 if (!debug)
627 htmlDocDump(stdout, doc);
628 else
629 xmlDebugDumpDocument(stdout, doc);
630 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000631
632 /*
633 * free it.
634 */
635 xmlFreeDoc(doc);
636}
637
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000638void parseAndPrintBuffer(xmlChar *buf) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000639 htmlDocPtr doc, tmp;
640
641 /*
642 * build an HTML tree from a string;
643 */
644 doc = htmlParseDoc(buf, NULL);
645
646 /*
647 * test intermediate copy if needed.
648 */
649 if (copy) {
650 tmp = doc;
651 doc = xmlCopyDoc(doc, 1);
652 xmlFreeDoc(tmp);
653 }
654
655 /*
656 * print it.
657 */
658 if (!debug)
Daniel Veillard82150d81999-07-07 07:32:15 +0000659 htmlDocDump(stdout, doc);
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000660 else
661 xmlDebugDumpDocument(stdout, doc);
662
663 /*
664 * free it.
665 */
666 xmlFreeDoc(doc);
667}
668
669int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000670 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000671 int files = 0;
672
673 for (i = 1; i < argc ; i++) {
674 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
675 debug++;
676 else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
677 copy++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000678 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
679 sax++;
680 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
681 noout++;
682 else if ((!strcmp(argv[i], "-repeat")) ||
683 (!strcmp(argv[i], "--repeat")))
684 repeat++;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000685 }
686 for (i = 1; i < argc ; i++) {
687 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000688 if (repeat) {
689 for (count = 0;count < 100 * repeat;count++) {
690 if (sax)
691 parseSAXFile(argv[i]);
692 else
693 parseAndPrintFile(argv[i]);
694 }
695 } else {
696 if (sax)
697 parseSAXFile(argv[i]);
698 else
699 parseAndPrintFile(argv[i]);
700 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000701 files ++;
702 }
703 }
704 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000705 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000706 argv[0]);
707 printf("\tParse the HTML files and output the result of the parsing\n");
708 printf("\t--debug : dump a debug tree of the in-memory document\n");
709 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000710 printf("\t--sax : debug the sequence of SAX callbacks\n");
711 printf("\t--repeat : parse the file 100 times, for timing or profiling\n");
712 printf("\t--noout : do not print the result\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000713 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000714 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000715 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000716
717 return(0);
718}