blob: 8bced6cbfa588a24e8265263b59f7451dabe70ea [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000014#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000015
16#include <stdio.h>
17#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000018#include <stdarg.h>
19
Daniel Veillard7f7d1111999-09-22 09:46:25 +000020
21#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000022#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000023#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000024#ifdef HAVE_SYS_STAT_H
25#include <sys/stat.h>
26#endif
27#ifdef HAVE_FCNTL_H
28#include <fcntl.h>
29#endif
30#ifdef HAVE_UNISTD_H
31#include <unistd.h>
32#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000033#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000034#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000035#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000036
Daniel Veillard7c1206f1999-10-14 09:10:25 +000037#include "xmlmemory.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000038#include "HTMLparser.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000039#include "HTMLtree.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000040#include "debugXML.h"
41
42static int debug = 0;
43static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000047
48/*
49 * Note: this is perfectly clean HTML, i.e. not a useful test.
Daniel Veillarddd6b3671999-09-23 22:19:22 +000050static xmlChar buffer[] =
Daniel Veillardbe70ff71999-07-05 16:50:46 +000051"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
52 \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
53<html>\n\
54<head>\n\
55 <title>This service is temporary down</title>\n\
56</head>\n\
57\n\
58<body bgcolor=\"#FFFFFF\">\n\
59<h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
60We are doing our best to get it back on-line,\n\
61\n\
62<p>The W3C system administrators</p>\n\
63</body>\n\
64</html>\n\
65";
Daniel Veillardb96e6431999-08-29 21:02:19 +000066 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000067
Daniel Veillard7c1206f1999-10-14 09:10:25 +000068xmlSAXHandler emptySAXHandlerStruct = {
69 NULL, /* internalSubset */
70 NULL, /* isStandalone */
71 NULL, /* hasInternalSubset */
72 NULL, /* hasExternalSubset */
73 NULL, /* resolveEntity */
74 NULL, /* getEntity */
75 NULL, /* entityDecl */
76 NULL, /* notationDecl */
77 NULL, /* attributeDecl */
78 NULL, /* elementDecl */
79 NULL, /* unparsedEntityDecl */
80 NULL, /* setDocumentLocator */
81 NULL, /* startDocument */
82 NULL, /* endDocument */
83 NULL, /* startElement */
84 NULL, /* endElement */
85 NULL, /* reference */
86 NULL, /* characters */
87 NULL, /* ignorableWhitespace */
88 NULL, /* processingInstruction */
89 NULL, /* comment */
90 NULL, /* xmlParserWarning */
91 NULL, /* xmlParserError */
92 NULL, /* xmlParserError */
93 NULL, /* getParameterEntity */
94};
95
96xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
97extern xmlSAXHandlerPtr debugSAXHandler;
98
99/************************************************************************
100 * *
101 * Debug Handlers *
102 * *
103 ************************************************************************/
104
105/**
106 * isStandaloneDebug:
107 * @ctxt: An XML parser context
108 *
109 * Is this document tagged standalone ?
110 *
111 * Returns 1 if true
112 */
113int
114isStandaloneDebug(void *ctx)
115{
116 fprintf(stdout, "SAX.isStandalone()\n");
117 return(0);
118}
119
120/**
121 * hasInternalSubsetDebug:
122 * @ctxt: An XML parser context
123 *
124 * Does this document has an internal subset
125 *
126 * Returns 1 if true
127 */
128int
129hasInternalSubsetDebug(void *ctx)
130{
131 fprintf(stdout, "SAX.hasInternalSubset()\n");
132 return(0);
133}
134
135/**
136 * hasExternalSubsetDebug:
137 * @ctxt: An XML parser context
138 *
139 * Does this document has an external subset
140 *
141 * Returns 1 if true
142 */
143int
144hasExternalSubsetDebug(void *ctx)
145{
146 fprintf(stdout, "SAX.hasExternalSubset()\n");
147 return(0);
148}
149
150/**
151 * hasInternalSubsetDebug:
152 * @ctxt: An XML parser context
153 *
154 * Does this document has an internal subset
155 */
156void
157internalSubsetDebug(void *ctx, const xmlChar *name,
158 const xmlChar *ExternalID, const xmlChar *SystemID)
159{
160 /* xmlDtdPtr externalSubset; */
161
162 fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
163 name, ExternalID, SystemID);
164
165/***********
166 if ((ExternalID != NULL) || (SystemID != NULL)) {
167 externalSubset = xmlParseDTD(ExternalID, SystemID);
168 if (externalSubset != NULL) {
169 xmlFreeDtd(externalSubset);
170 }
171 }
172 ***********/
173}
174
175/**
176 * resolveEntityDebug:
177 * @ctxt: An XML parser context
178 * @publicId: The public ID of the entity
179 * @systemId: The system ID of the entity
180 *
181 * Special entity resolver, better left to the parser, it has
182 * more context than the application layer.
183 * The default behaviour is to NOT resolve the entities, in that case
184 * the ENTITY_REF nodes are built in the structure (and the parameter
185 * values).
186 *
187 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
188 */
189xmlParserInputPtr
190resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
191{
192 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
193
194
195 fprintf(stdout, "SAX.resolveEntity(");
196 if (publicId != NULL)
197 fprintf(stdout, "%s", (char *)publicId);
198 else
199 fprintf(stdout, " ");
200 if (systemId != NULL)
201 fprintf(stdout, ", %s)\n", (char *)systemId);
202 else
203 fprintf(stdout, ", )\n");
204/*********
205 if (systemId != NULL) {
206 return(xmlNewInputFromFile(ctxt, (char *) systemId));
207 }
208 *********/
209 return(NULL);
210}
211
212/**
213 * getEntityDebug:
214 * @ctxt: An XML parser context
215 * @name: The entity name
216 *
217 * Get an entity by name
218 *
219 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
220 */
221xmlEntityPtr
222getEntityDebug(void *ctx, const xmlChar *name)
223{
224 fprintf(stdout, "SAX.getEntity(%s)\n", name);
225 return(NULL);
226}
227
228/**
229 * getParameterEntityDebug:
230 * @ctxt: An XML parser context
231 * @name: The entity name
232 *
233 * Get a parameter entity by name
234 *
235 * Returns the xmlParserInputPtr
236 */
237xmlEntityPtr
238getParameterEntityDebug(void *ctx, const xmlChar *name)
239{
240 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
241 return(NULL);
242}
243
244
245/**
246 * entityDeclDebug:
247 * @ctxt: An XML parser context
248 * @name: the entity name
249 * @type: the entity type
250 * @publicId: The public ID of the entity
251 * @systemId: The system ID of the entity
252 * @content: the entity value (without processing).
253 *
254 * An entity definition has been parsed
255 */
256void
257entityDeclDebug(void *ctx, const xmlChar *name, int type,
258 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
259{
260 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
261 name, type, publicId, systemId, content);
262}
263
264/**
265 * attributeDeclDebug:
266 * @ctxt: An XML parser context
267 * @name: the attribute name
268 * @type: the attribute type
269 *
270 * An attribute definition has been parsed
271 */
272void
273attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
274 int type, int def, const xmlChar *defaultValue,
275 xmlEnumerationPtr tree)
276{
277 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
278 elem, name, type, def, defaultValue);
279}
280
281/**
282 * elementDeclDebug:
283 * @ctxt: An XML parser context
284 * @name: the element name
285 * @type: the element type
286 * @content: the element value (without processing).
287 *
288 * An element definition has been parsed
289 */
290void
291elementDeclDebug(void *ctx, const xmlChar *name, int type,
292 xmlElementContentPtr content)
293{
294 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
295 name, type);
296}
297
298/**
299 * notationDeclDebug:
300 * @ctxt: An XML parser context
301 * @name: The name of the notation
302 * @publicId: The public ID of the entity
303 * @systemId: The system ID of the entity
304 *
305 * What to do when a notation declaration has been parsed.
306 */
307void
308notationDeclDebug(void *ctx, const xmlChar *name,
309 const xmlChar *publicId, const xmlChar *systemId)
310{
311 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
312 (char *) name, (char *) publicId, (char *) systemId);
313}
314
315/**
316 * unparsedEntityDeclDebug:
317 * @ctxt: An XML parser context
318 * @name: The name of the entity
319 * @publicId: The public ID of the entity
320 * @systemId: The system ID of the entity
321 * @notationName: the name of the notation
322 *
323 * What to do when an unparsed entity declaration is parsed
324 */
325void
326unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
327 const xmlChar *publicId, const xmlChar *systemId,
328 const xmlChar *notationName)
329{
330 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
331 (char *) name, (char *) publicId, (char *) systemId,
332 (char *) notationName);
333}
334
335/**
336 * setDocumentLocatorDebug:
337 * @ctxt: An XML parser context
338 * @loc: A SAX Locator
339 *
340 * Receive the document locator at startup, actually xmlDefaultSAXLocator
341 * Everything is available on the context, so this is useless in our case.
342 */
343void
344setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
345{
346 fprintf(stdout, "SAX.setDocumentLocator()\n");
347}
348
349/**
350 * startDocumentDebug:
351 * @ctxt: An XML parser context
352 *
353 * called when the document start being processed.
354 */
355void
356startDocumentDebug(void *ctx)
357{
358 fprintf(stdout, "SAX.startDocument()\n");
359}
360
361/**
362 * endDocumentDebug:
363 * @ctxt: An XML parser context
364 *
365 * called when the document end has been detected.
366 */
367void
368endDocumentDebug(void *ctx)
369{
370 fprintf(stdout, "SAX.endDocument()\n");
371}
372
373/**
374 * startElementDebug:
375 * @ctxt: An XML parser context
376 * @name: The element name
377 *
378 * called when an opening tag has been processed.
379 */
380void
381startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
382{
383 int i;
384
385 fprintf(stdout, "SAX.startElement(%s", (char *) name);
386 if (atts != NULL) {
387 for (i = 0;(atts[i] != NULL);i++) {
388 fprintf(stdout, ", %s='", atts[i++]);
389 fprintf(stdout, "%s'", atts[i]);
390 }
391 }
392 fprintf(stdout, ")\n");
393}
394
395/**
396 * endElementDebug:
397 * @ctxt: An XML parser context
398 * @name: The element name
399 *
400 * called when the end of an element has been detected.
401 */
402void
403endElementDebug(void *ctx, const xmlChar *name)
404{
405 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
406}
407
408/**
409 * charactersDebug:
410 * @ctxt: An XML parser context
411 * @ch: a xmlChar string
412 * @len: the number of xmlChar
413 *
414 * receiving some chars from the parser.
415 * Question: how much at a time ???
416 */
417void
418charactersDebug(void *ctx, const xmlChar *ch, int len)
419{
420 int i;
421
422 fprintf(stdout, "SAX.characters(");
423 for (i = 0;(i < len) && (i < 30);i++)
424 fprintf(stdout, "%c", ch[i]);
425 fprintf(stdout, ", %d)\n", len);
426}
427
428/**
429 * referenceDebug:
430 * @ctxt: An XML parser context
431 * @name: The entity name
432 *
433 * called when an entity reference is detected.
434 */
435void
436referenceDebug(void *ctx, const xmlChar *name)
437{
438 fprintf(stdout, "SAX.reference(%s)\n", name);
439}
440
441/**
442 * ignorableWhitespaceDebug:
443 * @ctxt: An XML parser context
444 * @ch: a xmlChar string
445 * @start: the first char in the string
446 * @len: the number of xmlChar
447 *
448 * receiving some ignorable whitespaces from the parser.
449 * Question: how much at a time ???
450 */
451void
452ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
453{
454 fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
455 (char *) ch, len);
456}
457
458/**
459 * processingInstructionDebug:
460 * @ctxt: An XML parser context
461 * @target: the target name
462 * @data: the PI data's
463 * @len: the number of xmlChar
464 *
465 * A processing instruction has been parsed.
466 */
467void
468processingInstructionDebug(void *ctx, const xmlChar *target,
469 const xmlChar *data)
470{
471 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
472 (char *) target, (char *) data);
473}
474
475/**
476 * commentDebug:
477 * @ctxt: An XML parser context
478 * @value: the comment content
479 *
480 * A comment has been parsed.
481 */
482void
483commentDebug(void *ctx, const xmlChar *value)
484{
485 fprintf(stdout, "SAX.comment(%s)\n", value);
486}
487
488/**
489 * warningDebug:
490 * @ctxt: An XML parser context
491 * @msg: the message to display/transmit
492 * @...: extra parameters for the message display
493 *
494 * Display and format a warning messages, gives file, line, position and
495 * extra parameters.
496 */
497void
498warningDebug(void *ctx, const char *msg, ...)
499{
500 va_list args;
501
502 va_start(args, msg);
503 fprintf(stdout, "SAX.warning: ");
504 vfprintf(stdout, msg, args);
505 va_end(args);
506}
507
508/**
509 * errorDebug:
510 * @ctxt: An XML parser context
511 * @msg: the message to display/transmit
512 * @...: extra parameters for the message display
513 *
514 * Display and format a error messages, gives file, line, position and
515 * extra parameters.
516 */
517void
518errorDebug(void *ctx, const char *msg, ...)
519{
520 va_list args;
521
522 va_start(args, msg);
523 fprintf(stdout, "SAX.error: ");
524 vfprintf(stdout, msg, args);
525 va_end(args);
526}
527
528/**
529 * fatalErrorDebug:
530 * @ctxt: An XML parser context
531 * @msg: the message to display/transmit
532 * @...: extra parameters for the message display
533 *
534 * Display and format a fatalError messages, gives file, line, position and
535 * extra parameters.
536 */
537void
538fatalErrorDebug(void *ctx, const char *msg, ...)
539{
540 va_list args;
541
542 va_start(args, msg);
543 fprintf(stdout, "SAX.fatalError: ");
544 vfprintf(stdout, msg, args);
545 va_end(args);
546}
547
548xmlSAXHandler debugSAXHandlerStruct = {
549 internalSubsetDebug,
550 isStandaloneDebug,
551 hasInternalSubsetDebug,
552 hasExternalSubsetDebug,
553 resolveEntityDebug,
554 getEntityDebug,
555 entityDeclDebug,
556 notationDeclDebug,
557 attributeDeclDebug,
558 elementDeclDebug,
559 unparsedEntityDeclDebug,
560 setDocumentLocatorDebug,
561 startDocumentDebug,
562 endDocumentDebug,
563 startElementDebug,
564 endElementDebug,
565 referenceDebug,
566 charactersDebug,
567 ignorableWhitespaceDebug,
568 processingInstructionDebug,
569 commentDebug,
570 warningDebug,
571 errorDebug,
572 fatalErrorDebug,
573 getParameterEntityDebug,
574};
575
576xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000577/************************************************************************
578 * *
579 * Debug *
580 * *
581 ************************************************************************/
582
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000583void parseSAXFile(char *filename) {
584 htmlDocPtr doc;
585 /*
586 * Empty callbacks for checking
587 */
588 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
589 if (doc != NULL) {
590 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
591 xmlFreeDoc(doc);
592 }
593
594 if (!noout) {
595 /*
596 * Debug callback
597 */
598 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
599 if (doc != NULL) {
600 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
601 xmlFreeDoc(doc);
602 }
603 }
604}
605
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000606void parseAndPrintFile(char *filename) {
607 htmlDocPtr doc, tmp;
608
609 /*
610 * build an HTML tree from a string;
611 */
612 doc = htmlParseFile(filename, NULL);
613
614 /*
615 * test intermediate copy if needed.
616 */
617 if (copy) {
618 tmp = doc;
619 doc = xmlCopyDoc(doc, 1);
620 xmlFreeDoc(tmp);
621 }
622
623 /*
624 * print it.
625 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000626 if (!noout) {
627 if (!debug)
628 htmlDocDump(stdout, doc);
629 else
630 xmlDebugDumpDocument(stdout, doc);
631 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000632
633 /*
634 * free it.
635 */
636 xmlFreeDoc(doc);
637}
638
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000639void parseAndPrintBuffer(xmlChar *buf) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000640 htmlDocPtr doc, tmp;
641
642 /*
643 * build an HTML tree from a string;
644 */
645 doc = htmlParseDoc(buf, NULL);
646
647 /*
648 * test intermediate copy if needed.
649 */
650 if (copy) {
651 tmp = doc;
652 doc = xmlCopyDoc(doc, 1);
653 xmlFreeDoc(tmp);
654 }
655
656 /*
657 * print it.
658 */
659 if (!debug)
Daniel Veillard82150d81999-07-07 07:32:15 +0000660 htmlDocDump(stdout, doc);
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000661 else
662 xmlDebugDumpDocument(stdout, doc);
663
664 /*
665 * free it.
666 */
667 xmlFreeDoc(doc);
668}
669
670int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000671 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000672 int files = 0;
673
674 for (i = 1; i < argc ; i++) {
675 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
676 debug++;
677 else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
678 copy++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000679 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
680 sax++;
681 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
682 noout++;
683 else if ((!strcmp(argv[i], "-repeat")) ||
684 (!strcmp(argv[i], "--repeat")))
685 repeat++;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000686 }
687 for (i = 1; i < argc ; i++) {
688 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000689 if (repeat) {
690 for (count = 0;count < 100 * repeat;count++) {
691 if (sax)
692 parseSAXFile(argv[i]);
693 else
694 parseAndPrintFile(argv[i]);
695 }
696 } else {
697 if (sax)
698 parseSAXFile(argv[i]);
699 else
700 parseAndPrintFile(argv[i]);
701 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000702 files ++;
703 }
704 }
705 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000706 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000707 argv[0]);
708 printf("\tParse the HTML files and output the result of the parsing\n");
709 printf("\t--debug : dump a debug tree of the in-memory document\n");
710 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000711 printf("\t--sax : debug the sequence of SAX callbacks\n");
712 printf("\t--repeat : parse the file 100 times, for timing or profiling\n");
713 printf("\t--noout : do not print the result\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000714 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000715 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000716
717 return(0);
718}