blob: f48612d8a5fd9cf20698bcf1738968388bda94e2 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000047static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000048static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000049
Daniel Veillard7c1206f1999-10-14 09:10:25 +000050xmlSAXHandler emptySAXHandlerStruct = {
51 NULL, /* internalSubset */
52 NULL, /* isStandalone */
53 NULL, /* hasInternalSubset */
54 NULL, /* hasExternalSubset */
55 NULL, /* resolveEntity */
56 NULL, /* getEntity */
57 NULL, /* entityDecl */
58 NULL, /* notationDecl */
59 NULL, /* attributeDecl */
60 NULL, /* elementDecl */
61 NULL, /* unparsedEntityDecl */
62 NULL, /* setDocumentLocator */
63 NULL, /* startDocument */
64 NULL, /* endDocument */
65 NULL, /* startElement */
66 NULL, /* endElement */
67 NULL, /* reference */
68 NULL, /* characters */
69 NULL, /* ignorableWhitespace */
70 NULL, /* processingInstruction */
71 NULL, /* comment */
72 NULL, /* xmlParserWarning */
73 NULL, /* xmlParserError */
74 NULL, /* xmlParserError */
75 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000076 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000077 NULL, /* externalSubset */
Daniel Veillard092643b2003-09-25 14:29:29 +000078 1,
79 NULL,
80 NULL,
81 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +000082};
83
84xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
85extern xmlSAXHandlerPtr debugSAXHandler;
86
87/************************************************************************
88 * *
89 * Debug Handlers *
90 * *
91 ************************************************************************/
92
93/**
94 * isStandaloneDebug:
95 * @ctxt: An XML parser context
96 *
97 * Is this document tagged standalone ?
98 *
99 * Returns 1 if true
100 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000101static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000102isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000103{
104 fprintf(stdout, "SAX.isStandalone()\n");
105 return(0);
106}
107
108/**
109 * hasInternalSubsetDebug:
110 * @ctxt: An XML parser context
111 *
112 * Does this document has an internal subset
113 *
114 * Returns 1 if true
115 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000116static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000117hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000118{
119 fprintf(stdout, "SAX.hasInternalSubset()\n");
120 return(0);
121}
122
123/**
124 * hasExternalSubsetDebug:
125 * @ctxt: An XML parser context
126 *
127 * Does this document has an external subset
128 *
129 * Returns 1 if true
130 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000131static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000132hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000133{
134 fprintf(stdout, "SAX.hasExternalSubset()\n");
135 return(0);
136}
137
138/**
139 * hasInternalSubsetDebug:
140 * @ctxt: An XML parser context
141 *
142 * Does this document has an internal subset
143 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000144static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000145internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000146 const xmlChar *ExternalID, const xmlChar *SystemID)
147{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000148 fprintf(stdout, "SAX.internalSubset(%s,", name);
149 if (ExternalID == NULL)
150 fprintf(stdout, " ,");
151 else
152 fprintf(stdout, " %s,", ExternalID);
153 if (SystemID == NULL)
154 fprintf(stdout, " )\n");
155 else
156 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000157}
158
159/**
160 * resolveEntityDebug:
161 * @ctxt: An XML parser context
162 * @publicId: The public ID of the entity
163 * @systemId: The system ID of the entity
164 *
165 * Special entity resolver, better left to the parser, it has
166 * more context than the application layer.
167 * The default behaviour is to NOT resolve the entities, in that case
168 * the ENTITY_REF nodes are built in the structure (and the parameter
169 * values).
170 *
171 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
172 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000173static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000174resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000175{
176 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
177
178
179 fprintf(stdout, "SAX.resolveEntity(");
180 if (publicId != NULL)
181 fprintf(stdout, "%s", (char *)publicId);
182 else
183 fprintf(stdout, " ");
184 if (systemId != NULL)
185 fprintf(stdout, ", %s)\n", (char *)systemId);
186 else
187 fprintf(stdout, ", )\n");
188/*********
189 if (systemId != NULL) {
190 return(xmlNewInputFromFile(ctxt, (char *) systemId));
191 }
192 *********/
193 return(NULL);
194}
195
196/**
197 * getEntityDebug:
198 * @ctxt: An XML parser context
199 * @name: The entity name
200 *
201 * Get an entity by name
202 *
203 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
204 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000206getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000207{
208 fprintf(stdout, "SAX.getEntity(%s)\n", name);
209 return(NULL);
210}
211
212/**
213 * getParameterEntityDebug:
214 * @ctxt: An XML parser context
215 * @name: The entity name
216 *
217 * Get a parameter entity by name
218 *
219 * Returns the xmlParserInputPtr
220 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000222getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000223{
224 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
225 return(NULL);
226}
227
228
229/**
230 * entityDeclDebug:
231 * @ctxt: An XML parser context
232 * @name: the entity name
233 * @type: the entity type
234 * @publicId: The public ID of the entity
235 * @systemId: The system ID of the entity
236 * @content: the entity value (without processing).
237 *
238 * An entity definition has been parsed
239 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000240static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000241entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000242 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
243{
244 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
245 name, type, publicId, systemId, content);
246}
247
248/**
249 * attributeDeclDebug:
250 * @ctxt: An XML parser context
251 * @name: the attribute name
252 * @type: the attribute type
253 *
254 * An attribute definition has been parsed
255 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000256static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000257attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000258 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000259 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000260{
261 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
262 elem, name, type, def, defaultValue);
263}
264
265/**
266 * elementDeclDebug:
267 * @ctxt: An XML parser context
268 * @name: the element name
269 * @type: the element type
270 * @content: the element value (without processing).
271 *
272 * An element definition has been parsed
273 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000275elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
276 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000277{
278 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
279 name, type);
280}
281
282/**
283 * notationDeclDebug:
284 * @ctxt: An XML parser context
285 * @name: The name of the notation
286 * @publicId: The public ID of the entity
287 * @systemId: The system ID of the entity
288 *
289 * What to do when a notation declaration has been parsed.
290 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000291static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000292notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000293 const xmlChar *publicId, const xmlChar *systemId)
294{
295 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
296 (char *) name, (char *) publicId, (char *) systemId);
297}
298
299/**
300 * unparsedEntityDeclDebug:
301 * @ctxt: An XML parser context
302 * @name: The name of the entity
303 * @publicId: The public ID of the entity
304 * @systemId: The system ID of the entity
305 * @notationName: the name of the notation
306 *
307 * What to do when an unparsed entity declaration is parsed
308 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000309static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000310unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000311 const xmlChar *publicId, const xmlChar *systemId,
312 const xmlChar *notationName)
313{
314 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
315 (char *) name, (char *) publicId, (char *) systemId,
316 (char *) notationName);
317}
318
319/**
320 * setDocumentLocatorDebug:
321 * @ctxt: An XML parser context
322 * @loc: A SAX Locator
323 *
324 * Receive the document locator at startup, actually xmlDefaultSAXLocator
325 * Everything is available on the context, so this is useless in our case.
326 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000327static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000328setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000329{
330 fprintf(stdout, "SAX.setDocumentLocator()\n");
331}
332
333/**
334 * startDocumentDebug:
335 * @ctxt: An XML parser context
336 *
337 * called when the document start being processed.
338 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000339static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000340startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000341{
342 fprintf(stdout, "SAX.startDocument()\n");
343}
344
345/**
346 * endDocumentDebug:
347 * @ctxt: An XML parser context
348 *
349 * called when the document end has been detected.
350 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000351static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000352endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000353{
354 fprintf(stdout, "SAX.endDocument()\n");
355}
356
357/**
358 * startElementDebug:
359 * @ctxt: An XML parser context
360 * @name: The element name
361 *
362 * called when an opening tag has been processed.
363 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000364static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000365startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000366{
367 int i;
368
369 fprintf(stdout, "SAX.startElement(%s", (char *) name);
370 if (atts != NULL) {
371 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000372 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000373 if (atts[i] != NULL) {
374 unsigned char output[40];
375 const unsigned char *att = atts[i];
376 int outlen, attlen;
377 fprintf(stdout, "='");
378 while ((attlen = strlen((char*)att)) > 0) {
379 outlen = sizeof output - 1;
380 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Daniel Veillard5f704af2003-03-05 10:01:43 +0000381 output[outlen] = 0;
William M. Brackc1939562003-08-05 15:52:22 +0000382 fprintf(stdout, "%s", (char *) output);
Daniel Veillarde010c172000-08-28 10:04:51 +0000383 att += attlen;
384 }
385 fprintf(stdout, "'");
386 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000387 }
388 }
389 fprintf(stdout, ")\n");
390}
391
392/**
393 * endElementDebug:
394 * @ctxt: An XML parser context
395 * @name: The element name
396 *
397 * called when the end of an element has been detected.
398 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000399static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000400endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000401{
402 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
403}
404
405/**
406 * charactersDebug:
407 * @ctxt: An XML parser context
408 * @ch: a xmlChar string
409 * @len: the number of xmlChar
410 *
411 * receiving some chars from the parser.
412 * Question: how much at a time ???
413 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000414static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000415charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000416{
Daniel Veillarde010c172000-08-28 10:04:51 +0000417 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000418 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000419
Daniel Veillard4948eb42000-08-29 09:41:15 +0000420 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000421 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000422
423 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000424}
425
426/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000427 * cdataDebug:
428 * @ctxt: An XML parser context
429 * @ch: a xmlChar string
430 * @len: the number of xmlChar
431 *
432 * receiving some cdata chars from the parser.
433 * Question: how much at a time ???
434 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000435static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000436cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000437{
438 unsigned char output[40];
439 int inlen = len, outlen = 30;
440
441 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
442 output[outlen] = 0;
443
444 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
445}
446
447/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000448 * referenceDebug:
449 * @ctxt: An XML parser context
450 * @name: The entity name
451 *
452 * called when an entity reference is detected.
453 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000454static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000455referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000456{
457 fprintf(stdout, "SAX.reference(%s)\n", name);
458}
459
460/**
461 * ignorableWhitespaceDebug:
462 * @ctxt: An XML parser context
463 * @ch: a xmlChar string
464 * @start: the first char in the string
465 * @len: the number of xmlChar
466 *
467 * receiving some ignorable whitespaces from the parser.
468 * Question: how much at a time ???
469 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000470static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000471ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000472{
Daniel Veillard87b95392000-08-12 21:12:04 +0000473 char output[40];
474 int i;
475
476 for (i = 0;(i<len) && (i < 30);i++)
477 output[i] = ch[i];
478 output[i] = 0;
479
480 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000481}
482
483/**
484 * processingInstructionDebug:
485 * @ctxt: An XML parser context
486 * @target: the target name
487 * @data: the PI data's
488 * @len: the number of xmlChar
489 *
490 * A processing instruction has been parsed.
491 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000492static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000493processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000494 const xmlChar *data)
495{
496 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
497 (char *) target, (char *) data);
498}
499
500/**
501 * commentDebug:
502 * @ctxt: An XML parser context
503 * @value: the comment content
504 *
505 * A comment has been parsed.
506 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000507static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000508commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000509{
510 fprintf(stdout, "SAX.comment(%s)\n", value);
511}
512
513/**
514 * warningDebug:
515 * @ctxt: An XML parser context
516 * @msg: the message to display/transmit
517 * @...: extra parameters for the message display
518 *
519 * Display and format a warning messages, gives file, line, position and
520 * extra parameters.
521 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000522static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000523warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000524{
525 va_list args;
526
527 va_start(args, msg);
528 fprintf(stdout, "SAX.warning: ");
529 vfprintf(stdout, msg, args);
530 va_end(args);
531}
532
533/**
534 * errorDebug:
535 * @ctxt: An XML parser context
536 * @msg: the message to display/transmit
537 * @...: extra parameters for the message display
538 *
539 * Display and format a error messages, gives file, line, position and
540 * extra parameters.
541 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000542static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000543errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000544{
545 va_list args;
546
547 va_start(args, msg);
548 fprintf(stdout, "SAX.error: ");
549 vfprintf(stdout, msg, args);
550 va_end(args);
551}
552
553/**
554 * fatalErrorDebug:
555 * @ctxt: An XML parser context
556 * @msg: the message to display/transmit
557 * @...: extra parameters for the message display
558 *
559 * Display and format a fatalError messages, gives file, line, position and
560 * extra parameters.
561 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000562static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000563fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000564{
565 va_list args;
566
567 va_start(args, msg);
568 fprintf(stdout, "SAX.fatalError: ");
569 vfprintf(stdout, msg, args);
570 va_end(args);
571}
572
573xmlSAXHandler debugSAXHandlerStruct = {
574 internalSubsetDebug,
575 isStandaloneDebug,
576 hasInternalSubsetDebug,
577 hasExternalSubsetDebug,
578 resolveEntityDebug,
579 getEntityDebug,
580 entityDeclDebug,
581 notationDeclDebug,
582 attributeDeclDebug,
583 elementDeclDebug,
584 unparsedEntityDeclDebug,
585 setDocumentLocatorDebug,
586 startDocumentDebug,
587 endDocumentDebug,
588 startElementDebug,
589 endElementDebug,
590 referenceDebug,
591 charactersDebug,
592 ignorableWhitespaceDebug,
593 processingInstructionDebug,
594 commentDebug,
595 warningDebug,
596 errorDebug,
597 fatalErrorDebug,
598 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000599 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000600 NULL,
Daniel Veillard092643b2003-09-25 14:29:29 +0000601 1,
602 NULL,
603 NULL,
604 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000605};
606
607xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000608/************************************************************************
609 * *
610 * Debug *
611 * *
612 ************************************************************************/
613
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000614static void
615parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000616 htmlDocPtr doc = NULL;
617
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000618 /*
619 * Empty callbacks for checking
620 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000621 if (push) {
622 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000623
Daniel Veillard87b95392000-08-12 21:12:04 +0000624 f = fopen(filename, "r");
625 if (f != NULL) {
626 int res, size = 3;
627 char chars[4096];
628 htmlParserCtxtPtr ctxt;
629
630 /* if (repeat) */
631 size = 4096;
632 res = fread(chars, 1, 4, f);
633 if (res > 0) {
634 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000635 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000636 while ((res = fread(chars, 1, size, f)) > 0) {
637 htmlParseChunk(ctxt, chars, res, 0);
638 }
639 htmlParseChunk(ctxt, chars, 0, 1);
640 doc = ctxt->myDoc;
641 htmlFreeParserCtxt(ctxt);
642 }
643 if (doc != NULL) {
644 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
645 xmlFreeDoc(doc);
646 }
647 fclose(f);
648 }
649 if (!noout) {
650 f = fopen(filename, "r");
651 if (f != NULL) {
652 int res, size = 3;
653 char chars[4096];
654 htmlParserCtxtPtr ctxt;
655
656 /* if (repeat) */
657 size = 4096;
658 res = fread(chars, 1, 4, f);
659 if (res > 0) {
660 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000661 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000662 while ((res = fread(chars, 1, size, f)) > 0) {
663 htmlParseChunk(ctxt, chars, res, 0);
664 }
665 htmlParseChunk(ctxt, chars, 0, 1);
666 doc = ctxt->myDoc;
667 htmlFreeParserCtxt(ctxt);
668 }
669 if (doc != NULL) {
670 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
671 xmlFreeDoc(doc);
672 }
673 fclose(f);
674 }
675 }
676 } else {
677 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000678 if (doc != NULL) {
679 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
680 xmlFreeDoc(doc);
681 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000682
683 if (!noout) {
684 /*
685 * Debug callback
686 */
687 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
688 if (doc != NULL) {
689 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
690 xmlFreeDoc(doc);
691 }
692 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000693 }
694}
695
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000696static void
697parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000698 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000699
700 /*
701 * build an HTML tree from a string;
702 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000703 if (push) {
704 FILE *f;
705
706 f = fopen(filename, "r");
707 if (f != NULL) {
708 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000709 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000710 htmlParserCtxtPtr ctxt;
711
Daniel Veillard87b95392000-08-12 21:12:04 +0000712 /* if (repeat) */
713 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000714 res = fread(chars, 1, 4, f);
715 if (res > 0) {
716 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000717 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000718 while ((res = fread(chars, 1, size, f)) > 0) {
719 htmlParseChunk(ctxt, chars, res, 0);
720 }
721 htmlParseChunk(ctxt, chars, 0, 1);
722 doc = ctxt->myDoc;
723 htmlFreeParserCtxt(ctxt);
724 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000725 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000726 }
727 } else {
728 doc = htmlParseFile(filename, NULL);
729 }
730 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000731 xmlGenericError(xmlGenericErrorContext,
732 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000733 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000734
735 /*
736 * test intermediate copy if needed.
737 */
738 if (copy) {
739 tmp = doc;
740 doc = xmlCopyDoc(doc, 1);
741 xmlFreeDoc(tmp);
742 }
743
744 /*
745 * print it.
746 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000747 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000748#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000749 if (!debug) {
750 if (encoding)
751 htmlSaveFileEnc("-", doc, encoding);
752 else
753 htmlDocDump(stdout, doc);
754 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000755 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000756#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000757 if (encoding)
758 htmlSaveFileEnc("-", doc, encoding);
759 else
760 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000761#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000762 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000763
764 /*
765 * free it.
766 */
767 xmlFreeDoc(doc);
768}
769
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000770int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000771 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000772 int files = 0;
773
774 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000775#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000776 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
777 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000778 else
779#endif
780 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000781 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000782 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
783 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000784 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
785 sax++;
786 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
787 noout++;
788 else if ((!strcmp(argv[i], "-repeat")) ||
789 (!strcmp(argv[i], "--repeat")))
790 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000791 else if ((!strcmp(argv[i], "-encode")) ||
792 (!strcmp(argv[i], "--encode"))) {
793 i++;
794 encoding = argv[i];
795 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000796 }
797 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000798 if ((!strcmp(argv[i], "-encode")) ||
799 (!strcmp(argv[i], "--encode"))) {
800 i++;
801 continue;
802 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000803 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000804 if (repeat) {
805 for (count = 0;count < 100 * repeat;count++) {
806 if (sax)
807 parseSAXFile(argv[i]);
808 else
809 parseAndPrintFile(argv[i]);
810 }
811 } else {
812 if (sax)
813 parseSAXFile(argv[i]);
814 else
815 parseAndPrintFile(argv[i]);
816 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000817 files ++;
818 }
819 }
820 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000821 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000822 argv[0]);
823 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000824#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000825 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000826#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000827 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000828 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000829 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000830 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000831 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000832 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000833 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000834 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000835 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000836
837 return(0);
838}
Daniel Veillard361d8452000-04-03 19:48:13 +0000839#else /* !LIBXML_HTML_ENABLED */
840#include <stdio.h>
841int main(int argc, char **argv) {
842 printf("%s : HTML support not compiled in\n", argv[0]);
843 return(0);
844}
845#endif