blob: dcea309d06079bd3a8d628714945ad11fe620509 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardc2def842000-11-07 14:21:01 +000011#undef LIBXML_DLL_IMPORT
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000014#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#include <stdio.h>
20#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000021#include <stdarg.h>
22
Daniel Veillard7f7d1111999-09-22 09:46:25 +000023
24#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000025#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000026#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000027#ifdef HAVE_SYS_STAT_H
28#include <sys/stat.h>
29#endif
30#ifdef HAVE_FCNTL_H
31#include <fcntl.h>
32#endif
33#ifdef HAVE_UNISTD_H
34#include <unistd.h>
35#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000036#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000037#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000038#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#include <libxml/xmlmemory.h>
41#include <libxml/HTMLparser.h>
42#include <libxml/HTMLtree.h>
43#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000044#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000045
Daniel Veillard56a4cb82001-03-24 17:00:36 +000046/************************************************************************
47 * *
48 * When running GCC in vaacum cleaner mode *
49 * *
50 ************************************************************************/
51
52#ifdef __GNUC__
53#define UNUSED __attribute__((__unused__))
54#else
55#define UNUSED
56#endif
57
Daniel Veillard361d8452000-04-03 19:48:13 +000058#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000059static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000060#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000061static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000062static int sax = 0;
63static int repeat = 0;
64static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000065static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000066static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000067
Daniel Veillard7c1206f1999-10-14 09:10:25 +000068xmlSAXHandler emptySAXHandlerStruct = {
69 NULL, /* internalSubset */
70 NULL, /* isStandalone */
71 NULL, /* hasInternalSubset */
72 NULL, /* hasExternalSubset */
73 NULL, /* resolveEntity */
74 NULL, /* getEntity */
75 NULL, /* entityDecl */
76 NULL, /* notationDecl */
77 NULL, /* attributeDecl */
78 NULL, /* elementDecl */
79 NULL, /* unparsedEntityDecl */
80 NULL, /* setDocumentLocator */
81 NULL, /* startDocument */
82 NULL, /* endDocument */
83 NULL, /* startElement */
84 NULL, /* endElement */
85 NULL, /* reference */
86 NULL, /* characters */
87 NULL, /* ignorableWhitespace */
88 NULL, /* processingInstruction */
89 NULL, /* comment */
90 NULL, /* xmlParserWarning */
91 NULL, /* xmlParserError */
92 NULL, /* xmlParserError */
93 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000094 NULL, /* cdataBlock */
95 NULL /* externalSubset */
Daniel Veillard7c1206f1999-10-14 09:10:25 +000096};
97
98xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
99extern xmlSAXHandlerPtr debugSAXHandler;
100
101/************************************************************************
102 * *
103 * Debug Handlers *
104 * *
105 ************************************************************************/
106
107/**
108 * isStandaloneDebug:
109 * @ctxt: An XML parser context
110 *
111 * Is this document tagged standalone ?
112 *
113 * Returns 1 if true
114 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000115static int
116isStandaloneDebug(void *ctx UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000117{
118 fprintf(stdout, "SAX.isStandalone()\n");
119 return(0);
120}
121
122/**
123 * hasInternalSubsetDebug:
124 * @ctxt: An XML parser context
125 *
126 * Does this document has an internal subset
127 *
128 * Returns 1 if true
129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000130static int
131hasInternalSubsetDebug(void *ctx UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000132{
133 fprintf(stdout, "SAX.hasInternalSubset()\n");
134 return(0);
135}
136
137/**
138 * hasExternalSubsetDebug:
139 * @ctxt: An XML parser context
140 *
141 * Does this document has an external subset
142 *
143 * Returns 1 if true
144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000145static int
146hasExternalSubsetDebug(void *ctx UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000147{
148 fprintf(stdout, "SAX.hasExternalSubset()\n");
149 return(0);
150}
151
152/**
153 * hasInternalSubsetDebug:
154 * @ctxt: An XML parser context
155 *
156 * Does this document has an internal subset
157 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000158static void
159internalSubsetDebug(void *ctx UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000160 const xmlChar *ExternalID, const xmlChar *SystemID)
161{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000162 fprintf(stdout, "SAX.internalSubset(%s,", name);
163 if (ExternalID == NULL)
164 fprintf(stdout, " ,");
165 else
166 fprintf(stdout, " %s,", ExternalID);
167 if (SystemID == NULL)
168 fprintf(stdout, " )\n");
169 else
170 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000171}
172
173/**
174 * resolveEntityDebug:
175 * @ctxt: An XML parser context
176 * @publicId: The public ID of the entity
177 * @systemId: The system ID of the entity
178 *
179 * Special entity resolver, better left to the parser, it has
180 * more context than the application layer.
181 * The default behaviour is to NOT resolve the entities, in that case
182 * the ENTITY_REF nodes are built in the structure (and the parameter
183 * values).
184 *
185 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
186 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000187static xmlParserInputPtr
188resolveEntityDebug(void *ctx UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000189{
190 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
191
192
193 fprintf(stdout, "SAX.resolveEntity(");
194 if (publicId != NULL)
195 fprintf(stdout, "%s", (char *)publicId);
196 else
197 fprintf(stdout, " ");
198 if (systemId != NULL)
199 fprintf(stdout, ", %s)\n", (char *)systemId);
200 else
201 fprintf(stdout, ", )\n");
202/*********
203 if (systemId != NULL) {
204 return(xmlNewInputFromFile(ctxt, (char *) systemId));
205 }
206 *********/
207 return(NULL);
208}
209
210/**
211 * getEntityDebug:
212 * @ctxt: An XML parser context
213 * @name: The entity name
214 *
215 * Get an entity by name
216 *
217 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
218 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000219static xmlEntityPtr
220getEntityDebug(void *ctx UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000221{
222 fprintf(stdout, "SAX.getEntity(%s)\n", name);
223 return(NULL);
224}
225
226/**
227 * getParameterEntityDebug:
228 * @ctxt: An XML parser context
229 * @name: The entity name
230 *
231 * Get a parameter entity by name
232 *
233 * Returns the xmlParserInputPtr
234 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000235static xmlEntityPtr
236getParameterEntityDebug(void *ctx UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000237{
238 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
239 return(NULL);
240}
241
242
243/**
244 * entityDeclDebug:
245 * @ctxt: An XML parser context
246 * @name: the entity name
247 * @type: the entity type
248 * @publicId: The public ID of the entity
249 * @systemId: The system ID of the entity
250 * @content: the entity value (without processing).
251 *
252 * An entity definition has been parsed
253 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000254static void
255entityDeclDebug(void *ctx UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000256 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
257{
258 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
259 name, type, publicId, systemId, content);
260}
261
262/**
263 * attributeDeclDebug:
264 * @ctxt: An XML parser context
265 * @name: the attribute name
266 * @type: the attribute type
267 *
268 * An attribute definition has been parsed
269 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000270static void
271attributeDeclDebug(void *ctx UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000272 int type, int def, const xmlChar *defaultValue,
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000273 xmlEnumerationPtr tree UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000274{
275 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
276 elem, name, type, def, defaultValue);
277}
278
279/**
280 * elementDeclDebug:
281 * @ctxt: An XML parser context
282 * @name: the element name
283 * @type: the element type
284 * @content: the element value (without processing).
285 *
286 * An element definition has been parsed
287 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000288static void
289elementDeclDebug(void *ctx UNUSED, const xmlChar *name, int type,
290 xmlElementContentPtr content UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000291{
292 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
293 name, type);
294}
295
296/**
297 * notationDeclDebug:
298 * @ctxt: An XML parser context
299 * @name: The name of the notation
300 * @publicId: The public ID of the entity
301 * @systemId: The system ID of the entity
302 *
303 * What to do when a notation declaration has been parsed.
304 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000305static void
306notationDeclDebug(void *ctx UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000307 const xmlChar *publicId, const xmlChar *systemId)
308{
309 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
310 (char *) name, (char *) publicId, (char *) systemId);
311}
312
313/**
314 * unparsedEntityDeclDebug:
315 * @ctxt: An XML parser context
316 * @name: The name of the entity
317 * @publicId: The public ID of the entity
318 * @systemId: The system ID of the entity
319 * @notationName: the name of the notation
320 *
321 * What to do when an unparsed entity declaration is parsed
322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000323static void
324unparsedEntityDeclDebug(void *ctx UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000325 const xmlChar *publicId, const xmlChar *systemId,
326 const xmlChar *notationName)
327{
328 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
329 (char *) name, (char *) publicId, (char *) systemId,
330 (char *) notationName);
331}
332
333/**
334 * setDocumentLocatorDebug:
335 * @ctxt: An XML parser context
336 * @loc: A SAX Locator
337 *
338 * Receive the document locator at startup, actually xmlDefaultSAXLocator
339 * Everything is available on the context, so this is useless in our case.
340 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000341static void
342setDocumentLocatorDebug(void *ctx UNUSED, xmlSAXLocatorPtr loc UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000343{
344 fprintf(stdout, "SAX.setDocumentLocator()\n");
345}
346
347/**
348 * startDocumentDebug:
349 * @ctxt: An XML parser context
350 *
351 * called when the document start being processed.
352 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000353static void
354startDocumentDebug(void *ctx UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000355{
356 fprintf(stdout, "SAX.startDocument()\n");
357}
358
359/**
360 * endDocumentDebug:
361 * @ctxt: An XML parser context
362 *
363 * called when the document end has been detected.
364 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000365static void
366endDocumentDebug(void *ctx UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000367{
368 fprintf(stdout, "SAX.endDocument()\n");
369}
370
371/**
372 * startElementDebug:
373 * @ctxt: An XML parser context
374 * @name: The element name
375 *
376 * called when an opening tag has been processed.
377 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000378static void
379startElementDebug(void *ctx UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000380{
381 int i;
382
383 fprintf(stdout, "SAX.startElement(%s", (char *) name);
384 if (atts != NULL) {
385 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000386 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000387 if (atts[i] != NULL) {
388 unsigned char output[40];
389 const unsigned char *att = atts[i];
390 int outlen, attlen;
391 fprintf(stdout, "='");
392 while ((attlen = strlen((char*)att)) > 0) {
393 outlen = sizeof output - 1;
394 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
395 fprintf(stdout, "%.*s", outlen, output);
396 att += attlen;
397 }
398 fprintf(stdout, "'");
399 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000400 }
401 }
402 fprintf(stdout, ")\n");
403}
404
405/**
406 * endElementDebug:
407 * @ctxt: An XML parser context
408 * @name: The element name
409 *
410 * called when the end of an element has been detected.
411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000412static void
413endElementDebug(void *ctx UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000414{
415 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
416}
417
418/**
419 * charactersDebug:
420 * @ctxt: An XML parser context
421 * @ch: a xmlChar string
422 * @len: the number of xmlChar
423 *
424 * receiving some chars from the parser.
425 * Question: how much at a time ???
426 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000427static void
428charactersDebug(void *ctx UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000429{
Daniel Veillarde010c172000-08-28 10:04:51 +0000430 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000431 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000432
Daniel Veillard4948eb42000-08-29 09:41:15 +0000433 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000434 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000435
436 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000437}
438
439/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000440 * cdataDebug:
441 * @ctxt: An XML parser context
442 * @ch: a xmlChar string
443 * @len: the number of xmlChar
444 *
445 * receiving some cdata chars from the parser.
446 * Question: how much at a time ???
447 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000448static void
449cdataDebug(void *ctx UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000450{
451 unsigned char output[40];
452 int inlen = len, outlen = 30;
453
454 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
455 output[outlen] = 0;
456
457 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
458}
459
460/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000461 * referenceDebug:
462 * @ctxt: An XML parser context
463 * @name: The entity name
464 *
465 * called when an entity reference is detected.
466 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000467static void
468referenceDebug(void *ctx UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000469{
470 fprintf(stdout, "SAX.reference(%s)\n", name);
471}
472
473/**
474 * ignorableWhitespaceDebug:
475 * @ctxt: An XML parser context
476 * @ch: a xmlChar string
477 * @start: the first char in the string
478 * @len: the number of xmlChar
479 *
480 * receiving some ignorable whitespaces from the parser.
481 * Question: how much at a time ???
482 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000483static void
484ignorableWhitespaceDebug(void *ctx UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000485{
Daniel Veillard87b95392000-08-12 21:12:04 +0000486 char output[40];
487 int i;
488
489 for (i = 0;(i<len) && (i < 30);i++)
490 output[i] = ch[i];
491 output[i] = 0;
492
493 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000494}
495
496/**
497 * processingInstructionDebug:
498 * @ctxt: An XML parser context
499 * @target: the target name
500 * @data: the PI data's
501 * @len: the number of xmlChar
502 *
503 * A processing instruction has been parsed.
504 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000505static void
506processingInstructionDebug(void *ctx UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000507 const xmlChar *data)
508{
509 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
510 (char *) target, (char *) data);
511}
512
513/**
514 * commentDebug:
515 * @ctxt: An XML parser context
516 * @value: the comment content
517 *
518 * A comment has been parsed.
519 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000520static void
521commentDebug(void *ctx UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000522{
523 fprintf(stdout, "SAX.comment(%s)\n", value);
524}
525
526/**
527 * warningDebug:
528 * @ctxt: An XML parser context
529 * @msg: the message to display/transmit
530 * @...: extra parameters for the message display
531 *
532 * Display and format a warning messages, gives file, line, position and
533 * extra parameters.
534 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000535static void
536warningDebug(void *ctx UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000537{
538 va_list args;
539
540 va_start(args, msg);
541 fprintf(stdout, "SAX.warning: ");
542 vfprintf(stdout, msg, args);
543 va_end(args);
544}
545
546/**
547 * errorDebug:
548 * @ctxt: An XML parser context
549 * @msg: the message to display/transmit
550 * @...: extra parameters for the message display
551 *
552 * Display and format a error messages, gives file, line, position and
553 * extra parameters.
554 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000555static void
556errorDebug(void *ctx UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000557{
558 va_list args;
559
560 va_start(args, msg);
561 fprintf(stdout, "SAX.error: ");
562 vfprintf(stdout, msg, args);
563 va_end(args);
564}
565
566/**
567 * fatalErrorDebug:
568 * @ctxt: An XML parser context
569 * @msg: the message to display/transmit
570 * @...: extra parameters for the message display
571 *
572 * Display and format a fatalError messages, gives file, line, position and
573 * extra parameters.
574 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000575static void
576fatalErrorDebug(void *ctx UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000577{
578 va_list args;
579
580 va_start(args, msg);
581 fprintf(stdout, "SAX.fatalError: ");
582 vfprintf(stdout, msg, args);
583 va_end(args);
584}
585
586xmlSAXHandler debugSAXHandlerStruct = {
587 internalSubsetDebug,
588 isStandaloneDebug,
589 hasInternalSubsetDebug,
590 hasExternalSubsetDebug,
591 resolveEntityDebug,
592 getEntityDebug,
593 entityDeclDebug,
594 notationDeclDebug,
595 attributeDeclDebug,
596 elementDeclDebug,
597 unparsedEntityDeclDebug,
598 setDocumentLocatorDebug,
599 startDocumentDebug,
600 endDocumentDebug,
601 startElementDebug,
602 endElementDebug,
603 referenceDebug,
604 charactersDebug,
605 ignorableWhitespaceDebug,
606 processingInstructionDebug,
607 commentDebug,
608 warningDebug,
609 errorDebug,
610 fatalErrorDebug,
611 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000612 cdataDebug,
613 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000614};
615
616xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000617/************************************************************************
618 * *
619 * Debug *
620 * *
621 ************************************************************************/
622
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000623static void
624parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000625 htmlDocPtr doc = NULL;
626
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000627 /*
628 * Empty callbacks for checking
629 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000630 if (push) {
631 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000632
Daniel Veillard87b95392000-08-12 21:12:04 +0000633 f = fopen(filename, "r");
634 if (f != NULL) {
635 int res, size = 3;
636 char chars[4096];
637 htmlParserCtxtPtr ctxt;
638
639 /* if (repeat) */
640 size = 4096;
641 res = fread(chars, 1, 4, f);
642 if (res > 0) {
643 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
644 chars, res, filename, 0);
645 while ((res = fread(chars, 1, size, f)) > 0) {
646 htmlParseChunk(ctxt, chars, res, 0);
647 }
648 htmlParseChunk(ctxt, chars, 0, 1);
649 doc = ctxt->myDoc;
650 htmlFreeParserCtxt(ctxt);
651 }
652 if (doc != NULL) {
653 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
654 xmlFreeDoc(doc);
655 }
656 fclose(f);
657 }
658 if (!noout) {
659 f = fopen(filename, "r");
660 if (f != NULL) {
661 int res, size = 3;
662 char chars[4096];
663 htmlParserCtxtPtr ctxt;
664
665 /* if (repeat) */
666 size = 4096;
667 res = fread(chars, 1, 4, f);
668 if (res > 0) {
669 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
670 chars, res, filename, 0);
671 while ((res = fread(chars, 1, size, f)) > 0) {
672 htmlParseChunk(ctxt, chars, res, 0);
673 }
674 htmlParseChunk(ctxt, chars, 0, 1);
675 doc = ctxt->myDoc;
676 htmlFreeParserCtxt(ctxt);
677 }
678 if (doc != NULL) {
679 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
680 xmlFreeDoc(doc);
681 }
682 fclose(f);
683 }
684 }
685 } else {
686 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000687 if (doc != NULL) {
688 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
689 xmlFreeDoc(doc);
690 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000691
692 if (!noout) {
693 /*
694 * Debug callback
695 */
696 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
697 if (doc != NULL) {
698 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
699 xmlFreeDoc(doc);
700 }
701 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000702 }
703}
704
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000705static void
706parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000707 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000708
709 /*
710 * build an HTML tree from a string;
711 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000712 if (push) {
713 FILE *f;
714
715 f = fopen(filename, "r");
716 if (f != NULL) {
717 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000718 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000719 htmlParserCtxtPtr ctxt;
720
Daniel Veillard87b95392000-08-12 21:12:04 +0000721 /* if (repeat) */
722 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000723 res = fread(chars, 1, 4, f);
724 if (res > 0) {
725 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
726 chars, res, filename, 0);
727 while ((res = fread(chars, 1, size, f)) > 0) {
728 htmlParseChunk(ctxt, chars, res, 0);
729 }
730 htmlParseChunk(ctxt, chars, 0, 1);
731 doc = ctxt->myDoc;
732 htmlFreeParserCtxt(ctxt);
733 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000734 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000735 }
736 } else {
737 doc = htmlParseFile(filename, NULL);
738 }
739 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000740 xmlGenericError(xmlGenericErrorContext,
741 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000742 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000743
744 /*
745 * test intermediate copy if needed.
746 */
747 if (copy) {
748 tmp = doc;
749 doc = xmlCopyDoc(doc, 1);
750 xmlFreeDoc(tmp);
751 }
752
753 /*
754 * print it.
755 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000756 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000757#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000758 if (!debug) {
759 if (encoding)
760 htmlSaveFileEnc("-", doc, encoding);
761 else
762 htmlDocDump(stdout, doc);
763 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000764 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000765#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000766 if (encoding)
767 htmlSaveFileEnc("-", doc, encoding);
768 else
769 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000770#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000771 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000772
773 /*
774 * free it.
775 */
776 xmlFreeDoc(doc);
777}
778
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000779int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000780 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000781 int files = 0;
782
783 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000784#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000785 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
786 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000787 else
788#endif
789 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000790 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000791 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
792 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000793 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
794 sax++;
795 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
796 noout++;
797 else if ((!strcmp(argv[i], "-repeat")) ||
798 (!strcmp(argv[i], "--repeat")))
799 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000800 else if ((!strcmp(argv[i], "-encode")) ||
801 (!strcmp(argv[i], "--encode"))) {
802 i++;
803 encoding = argv[i];
804 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000805 }
806 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000807 if ((!strcmp(argv[i], "-encode")) ||
808 (!strcmp(argv[i], "--encode"))) {
809 i++;
810 continue;
811 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000812 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000813 if (repeat) {
814 for (count = 0;count < 100 * repeat;count++) {
815 if (sax)
816 parseSAXFile(argv[i]);
817 else
818 parseAndPrintFile(argv[i]);
819 }
820 } else {
821 if (sax)
822 parseSAXFile(argv[i]);
823 else
824 parseAndPrintFile(argv[i]);
825 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000826 files ++;
827 }
828 }
829 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000830 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000831 argv[0]);
832 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000833#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000834 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000835#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000836 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000837 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000838 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000839 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000840 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000841 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000842 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000843 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000844 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000845
846 return(0);
847}
Daniel Veillard361d8452000-04-03 19:48:13 +0000848#else /* !LIBXML_HTML_ENABLED */
849#include <stdio.h>
850int main(int argc, char **argv) {
851 printf("%s : HTML support not compiled in\n", argv[0]);
852 return(0);
853}
854#endif