blob: ce780ce48b60ff9f8f3117de91f630f7967c43fa [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000047static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000048static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000049
Daniel Veillard7c1206f1999-10-14 09:10:25 +000050xmlSAXHandler emptySAXHandlerStruct = {
51 NULL, /* internalSubset */
52 NULL, /* isStandalone */
53 NULL, /* hasInternalSubset */
54 NULL, /* hasExternalSubset */
55 NULL, /* resolveEntity */
56 NULL, /* getEntity */
57 NULL, /* entityDecl */
58 NULL, /* notationDecl */
59 NULL, /* attributeDecl */
60 NULL, /* elementDecl */
61 NULL, /* unparsedEntityDecl */
62 NULL, /* setDocumentLocator */
63 NULL, /* startDocument */
64 NULL, /* endDocument */
65 NULL, /* startElement */
66 NULL, /* endElement */
67 NULL, /* reference */
68 NULL, /* characters */
69 NULL, /* ignorableWhitespace */
70 NULL, /* processingInstruction */
71 NULL, /* comment */
72 NULL, /* xmlParserWarning */
73 NULL, /* xmlParserError */
74 NULL, /* xmlParserError */
75 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000076 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000077 NULL, /* externalSubset */
78 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +000079};
80
81xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
82extern xmlSAXHandlerPtr debugSAXHandler;
83
84/************************************************************************
85 * *
86 * Debug Handlers *
87 * *
88 ************************************************************************/
89
90/**
91 * isStandaloneDebug:
92 * @ctxt: An XML parser context
93 *
94 * Is this document tagged standalone ?
95 *
96 * Returns 1 if true
97 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000098static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +000099isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000100{
101 fprintf(stdout, "SAX.isStandalone()\n");
102 return(0);
103}
104
105/**
106 * hasInternalSubsetDebug:
107 * @ctxt: An XML parser context
108 *
109 * Does this document has an internal subset
110 *
111 * Returns 1 if true
112 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000113static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000114hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000115{
116 fprintf(stdout, "SAX.hasInternalSubset()\n");
117 return(0);
118}
119
120/**
121 * hasExternalSubsetDebug:
122 * @ctxt: An XML parser context
123 *
124 * Does this document has an external subset
125 *
126 * Returns 1 if true
127 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000128static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000129hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000130{
131 fprintf(stdout, "SAX.hasExternalSubset()\n");
132 return(0);
133}
134
135/**
136 * hasInternalSubsetDebug:
137 * @ctxt: An XML parser context
138 *
139 * Does this document has an internal subset
140 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000141static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000142internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000143 const xmlChar *ExternalID, const xmlChar *SystemID)
144{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000145 fprintf(stdout, "SAX.internalSubset(%s,", name);
146 if (ExternalID == NULL)
147 fprintf(stdout, " ,");
148 else
149 fprintf(stdout, " %s,", ExternalID);
150 if (SystemID == NULL)
151 fprintf(stdout, " )\n");
152 else
153 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000154}
155
156/**
157 * resolveEntityDebug:
158 * @ctxt: An XML parser context
159 * @publicId: The public ID of the entity
160 * @systemId: The system ID of the entity
161 *
162 * Special entity resolver, better left to the parser, it has
163 * more context than the application layer.
164 * The default behaviour is to NOT resolve the entities, in that case
165 * the ENTITY_REF nodes are built in the structure (and the parameter
166 * values).
167 *
168 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
169 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000170static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000171resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000172{
173 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
174
175
176 fprintf(stdout, "SAX.resolveEntity(");
177 if (publicId != NULL)
178 fprintf(stdout, "%s", (char *)publicId);
179 else
180 fprintf(stdout, " ");
181 if (systemId != NULL)
182 fprintf(stdout, ", %s)\n", (char *)systemId);
183 else
184 fprintf(stdout, ", )\n");
185/*********
186 if (systemId != NULL) {
187 return(xmlNewInputFromFile(ctxt, (char *) systemId));
188 }
189 *********/
190 return(NULL);
191}
192
193/**
194 * getEntityDebug:
195 * @ctxt: An XML parser context
196 * @name: The entity name
197 *
198 * Get an entity by name
199 *
200 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
201 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000202static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000203getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000204{
205 fprintf(stdout, "SAX.getEntity(%s)\n", name);
206 return(NULL);
207}
208
209/**
210 * getParameterEntityDebug:
211 * @ctxt: An XML parser context
212 * @name: The entity name
213 *
214 * Get a parameter entity by name
215 *
216 * Returns the xmlParserInputPtr
217 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000218static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000219getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000220{
221 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
222 return(NULL);
223}
224
225
226/**
227 * entityDeclDebug:
228 * @ctxt: An XML parser context
229 * @name: the entity name
230 * @type: the entity type
231 * @publicId: The public ID of the entity
232 * @systemId: The system ID of the entity
233 * @content: the entity value (without processing).
234 *
235 * An entity definition has been parsed
236 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000237static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000238entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000239 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
240{
241 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
242 name, type, publicId, systemId, content);
243}
244
245/**
246 * attributeDeclDebug:
247 * @ctxt: An XML parser context
248 * @name: the attribute name
249 * @type: the attribute type
250 *
251 * An attribute definition has been parsed
252 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000253static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000254attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000255 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000256 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000257{
258 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
259 elem, name, type, def, defaultValue);
260}
261
262/**
263 * elementDeclDebug:
264 * @ctxt: An XML parser context
265 * @name: the element name
266 * @type: the element type
267 * @content: the element value (without processing).
268 *
269 * An element definition has been parsed
270 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000271static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000272elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
273 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000274{
275 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
276 name, type);
277}
278
279/**
280 * notationDeclDebug:
281 * @ctxt: An XML parser context
282 * @name: The name of the notation
283 * @publicId: The public ID of the entity
284 * @systemId: The system ID of the entity
285 *
286 * What to do when a notation declaration has been parsed.
287 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000288static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000289notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000290 const xmlChar *publicId, const xmlChar *systemId)
291{
292 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
293 (char *) name, (char *) publicId, (char *) systemId);
294}
295
296/**
297 * unparsedEntityDeclDebug:
298 * @ctxt: An XML parser context
299 * @name: The name of the entity
300 * @publicId: The public ID of the entity
301 * @systemId: The system ID of the entity
302 * @notationName: the name of the notation
303 *
304 * What to do when an unparsed entity declaration is parsed
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000307unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000308 const xmlChar *publicId, const xmlChar *systemId,
309 const xmlChar *notationName)
310{
311 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
312 (char *) name, (char *) publicId, (char *) systemId,
313 (char *) notationName);
314}
315
316/**
317 * setDocumentLocatorDebug:
318 * @ctxt: An XML parser context
319 * @loc: A SAX Locator
320 *
321 * Receive the document locator at startup, actually xmlDefaultSAXLocator
322 * Everything is available on the context, so this is useless in our case.
323 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000324static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000325setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000326{
327 fprintf(stdout, "SAX.setDocumentLocator()\n");
328}
329
330/**
331 * startDocumentDebug:
332 * @ctxt: An XML parser context
333 *
334 * called when the document start being processed.
335 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000336static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000337startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000338{
339 fprintf(stdout, "SAX.startDocument()\n");
340}
341
342/**
343 * endDocumentDebug:
344 * @ctxt: An XML parser context
345 *
346 * called when the document end has been detected.
347 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000348static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000349endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000350{
351 fprintf(stdout, "SAX.endDocument()\n");
352}
353
354/**
355 * startElementDebug:
356 * @ctxt: An XML parser context
357 * @name: The element name
358 *
359 * called when an opening tag has been processed.
360 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000361static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000362startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000363{
364 int i;
365
366 fprintf(stdout, "SAX.startElement(%s", (char *) name);
367 if (atts != NULL) {
368 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000369 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000370 if (atts[i] != NULL) {
371 unsigned char output[40];
372 const unsigned char *att = atts[i];
373 int outlen, attlen;
374 fprintf(stdout, "='");
375 while ((attlen = strlen((char*)att)) > 0) {
376 outlen = sizeof output - 1;
377 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Daniel Veillard5f704af2003-03-05 10:01:43 +0000378 output[outlen] = 0;
379 fprintf(stdout, "%s", output);
Daniel Veillarde010c172000-08-28 10:04:51 +0000380 att += attlen;
381 }
382 fprintf(stdout, "'");
383 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000384 }
385 }
386 fprintf(stdout, ")\n");
387}
388
389/**
390 * endElementDebug:
391 * @ctxt: An XML parser context
392 * @name: The element name
393 *
394 * called when the end of an element has been detected.
395 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000396static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000397endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000398{
399 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
400}
401
402/**
403 * charactersDebug:
404 * @ctxt: An XML parser context
405 * @ch: a xmlChar string
406 * @len: the number of xmlChar
407 *
408 * receiving some chars from the parser.
409 * Question: how much at a time ???
410 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000411static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000412charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000413{
Daniel Veillarde010c172000-08-28 10:04:51 +0000414 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000415 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000416
Daniel Veillard4948eb42000-08-29 09:41:15 +0000417 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000418 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000419
420 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000421}
422
423/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000424 * cdataDebug:
425 * @ctxt: An XML parser context
426 * @ch: a xmlChar string
427 * @len: the number of xmlChar
428 *
429 * receiving some cdata chars from the parser.
430 * Question: how much at a time ???
431 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000432static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000433cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000434{
435 unsigned char output[40];
436 int inlen = len, outlen = 30;
437
438 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
439 output[outlen] = 0;
440
441 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
442}
443
444/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000445 * referenceDebug:
446 * @ctxt: An XML parser context
447 * @name: The entity name
448 *
449 * called when an entity reference is detected.
450 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000451static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000452referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000453{
454 fprintf(stdout, "SAX.reference(%s)\n", name);
455}
456
457/**
458 * ignorableWhitespaceDebug:
459 * @ctxt: An XML parser context
460 * @ch: a xmlChar string
461 * @start: the first char in the string
462 * @len: the number of xmlChar
463 *
464 * receiving some ignorable whitespaces from the parser.
465 * Question: how much at a time ???
466 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000467static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000468ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000469{
Daniel Veillard87b95392000-08-12 21:12:04 +0000470 char output[40];
471 int i;
472
473 for (i = 0;(i<len) && (i < 30);i++)
474 output[i] = ch[i];
475 output[i] = 0;
476
477 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000478}
479
480/**
481 * processingInstructionDebug:
482 * @ctxt: An XML parser context
483 * @target: the target name
484 * @data: the PI data's
485 * @len: the number of xmlChar
486 *
487 * A processing instruction has been parsed.
488 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000489static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000490processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000491 const xmlChar *data)
492{
493 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
494 (char *) target, (char *) data);
495}
496
497/**
498 * commentDebug:
499 * @ctxt: An XML parser context
500 * @value: the comment content
501 *
502 * A comment has been parsed.
503 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000504static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000505commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000506{
507 fprintf(stdout, "SAX.comment(%s)\n", value);
508}
509
510/**
511 * warningDebug:
512 * @ctxt: An XML parser context
513 * @msg: the message to display/transmit
514 * @...: extra parameters for the message display
515 *
516 * Display and format a warning messages, gives file, line, position and
517 * extra parameters.
518 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000519static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000520warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000521{
522 va_list args;
523
524 va_start(args, msg);
525 fprintf(stdout, "SAX.warning: ");
526 vfprintf(stdout, msg, args);
527 va_end(args);
528}
529
530/**
531 * errorDebug:
532 * @ctxt: An XML parser context
533 * @msg: the message to display/transmit
534 * @...: extra parameters for the message display
535 *
536 * Display and format a error messages, gives file, line, position and
537 * extra parameters.
538 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000539static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000540errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000541{
542 va_list args;
543
544 va_start(args, msg);
545 fprintf(stdout, "SAX.error: ");
546 vfprintf(stdout, msg, args);
547 va_end(args);
548}
549
550/**
551 * fatalErrorDebug:
552 * @ctxt: An XML parser context
553 * @msg: the message to display/transmit
554 * @...: extra parameters for the message display
555 *
556 * Display and format a fatalError messages, gives file, line, position and
557 * extra parameters.
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000560fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000561{
562 va_list args;
563
564 va_start(args, msg);
565 fprintf(stdout, "SAX.fatalError: ");
566 vfprintf(stdout, msg, args);
567 va_end(args);
568}
569
570xmlSAXHandler debugSAXHandlerStruct = {
571 internalSubsetDebug,
572 isStandaloneDebug,
573 hasInternalSubsetDebug,
574 hasExternalSubsetDebug,
575 resolveEntityDebug,
576 getEntityDebug,
577 entityDeclDebug,
578 notationDeclDebug,
579 attributeDeclDebug,
580 elementDeclDebug,
581 unparsedEntityDeclDebug,
582 setDocumentLocatorDebug,
583 startDocumentDebug,
584 endDocumentDebug,
585 startElementDebug,
586 endElementDebug,
587 referenceDebug,
588 charactersDebug,
589 ignorableWhitespaceDebug,
590 processingInstructionDebug,
591 commentDebug,
592 warningDebug,
593 errorDebug,
594 fatalErrorDebug,
595 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000596 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000597 NULL,
598 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000599};
600
601xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000602/************************************************************************
603 * *
604 * Debug *
605 * *
606 ************************************************************************/
607
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000608static void
609parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000610 htmlDocPtr doc = NULL;
611
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000612 /*
613 * Empty callbacks for checking
614 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000615 if (push) {
616 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000617
Daniel Veillard87b95392000-08-12 21:12:04 +0000618 f = fopen(filename, "r");
619 if (f != NULL) {
620 int res, size = 3;
621 char chars[4096];
622 htmlParserCtxtPtr ctxt;
623
624 /* if (repeat) */
625 size = 4096;
626 res = fread(chars, 1, 4, f);
627 if (res > 0) {
628 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000629 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000630 while ((res = fread(chars, 1, size, f)) > 0) {
631 htmlParseChunk(ctxt, chars, res, 0);
632 }
633 htmlParseChunk(ctxt, chars, 0, 1);
634 doc = ctxt->myDoc;
635 htmlFreeParserCtxt(ctxt);
636 }
637 if (doc != NULL) {
638 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
639 xmlFreeDoc(doc);
640 }
641 fclose(f);
642 }
643 if (!noout) {
644 f = fopen(filename, "r");
645 if (f != NULL) {
646 int res, size = 3;
647 char chars[4096];
648 htmlParserCtxtPtr ctxt;
649
650 /* if (repeat) */
651 size = 4096;
652 res = fread(chars, 1, 4, f);
653 if (res > 0) {
654 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000655 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000656 while ((res = fread(chars, 1, size, f)) > 0) {
657 htmlParseChunk(ctxt, chars, res, 0);
658 }
659 htmlParseChunk(ctxt, chars, 0, 1);
660 doc = ctxt->myDoc;
661 htmlFreeParserCtxt(ctxt);
662 }
663 if (doc != NULL) {
664 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
665 xmlFreeDoc(doc);
666 }
667 fclose(f);
668 }
669 }
670 } else {
671 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000672 if (doc != NULL) {
673 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
674 xmlFreeDoc(doc);
675 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000676
677 if (!noout) {
678 /*
679 * Debug callback
680 */
681 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
682 if (doc != NULL) {
683 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
684 xmlFreeDoc(doc);
685 }
686 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000687 }
688}
689
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000690static void
691parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000692 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000693
694 /*
695 * build an HTML tree from a string;
696 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000697 if (push) {
698 FILE *f;
699
700 f = fopen(filename, "r");
701 if (f != NULL) {
702 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000703 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000704 htmlParserCtxtPtr ctxt;
705
Daniel Veillard87b95392000-08-12 21:12:04 +0000706 /* if (repeat) */
707 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000708 res = fread(chars, 1, 4, f);
709 if (res > 0) {
710 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000711 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000712 while ((res = fread(chars, 1, size, f)) > 0) {
713 htmlParseChunk(ctxt, chars, res, 0);
714 }
715 htmlParseChunk(ctxt, chars, 0, 1);
716 doc = ctxt->myDoc;
717 htmlFreeParserCtxt(ctxt);
718 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000719 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000720 }
721 } else {
722 doc = htmlParseFile(filename, NULL);
723 }
724 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000725 xmlGenericError(xmlGenericErrorContext,
726 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000727 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000728
729 /*
730 * test intermediate copy if needed.
731 */
732 if (copy) {
733 tmp = doc;
734 doc = xmlCopyDoc(doc, 1);
735 xmlFreeDoc(tmp);
736 }
737
738 /*
739 * print it.
740 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000741 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000742#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000743 if (!debug) {
744 if (encoding)
745 htmlSaveFileEnc("-", doc, encoding);
746 else
747 htmlDocDump(stdout, doc);
748 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000749 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000750#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000751 if (encoding)
752 htmlSaveFileEnc("-", doc, encoding);
753 else
754 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000755#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000756 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000757
758 /*
759 * free it.
760 */
761 xmlFreeDoc(doc);
762}
763
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000764int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000765 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000766 int files = 0;
767
768 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000769#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000770 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
771 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000772 else
773#endif
774 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000775 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000776 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
777 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000778 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
779 sax++;
780 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
781 noout++;
782 else if ((!strcmp(argv[i], "-repeat")) ||
783 (!strcmp(argv[i], "--repeat")))
784 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000785 else if ((!strcmp(argv[i], "-encode")) ||
786 (!strcmp(argv[i], "--encode"))) {
787 i++;
788 encoding = argv[i];
789 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000790 }
791 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000792 if ((!strcmp(argv[i], "-encode")) ||
793 (!strcmp(argv[i], "--encode"))) {
794 i++;
795 continue;
796 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000797 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000798 if (repeat) {
799 for (count = 0;count < 100 * repeat;count++) {
800 if (sax)
801 parseSAXFile(argv[i]);
802 else
803 parseAndPrintFile(argv[i]);
804 }
805 } else {
806 if (sax)
807 parseSAXFile(argv[i]);
808 else
809 parseAndPrintFile(argv[i]);
810 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000811 files ++;
812 }
813 }
814 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000815 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000816 argv[0]);
817 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000818#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000819 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000820#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000821 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000822 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000823 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000824 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000825 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000826 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000827 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000828 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000829 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000830
831 return(0);
832}
Daniel Veillard361d8452000-04-03 19:48:13 +0000833#else /* !LIBXML_HTML_ENABLED */
834#include <stdio.h>
835int main(int argc, char **argv) {
836 printf("%s : HTML support not compiled in\n", argv[0]);
837 return(0);
838}
839#endif