blob: 102e3c65b74bd3971ada64b9a2d8e8d4f7ed4ca5 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000038
Daniel Veillard361d8452000-04-03 19:48:13 +000039#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000040static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000041#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000042static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000043static int sax = 0;
44static int repeat = 0;
45static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000046static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000047static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000048
Daniel Veillard7c1206f1999-10-14 09:10:25 +000049xmlSAXHandler emptySAXHandlerStruct = {
50 NULL, /* internalSubset */
51 NULL, /* isStandalone */
52 NULL, /* hasInternalSubset */
53 NULL, /* hasExternalSubset */
54 NULL, /* resolveEntity */
55 NULL, /* getEntity */
56 NULL, /* entityDecl */
57 NULL, /* notationDecl */
58 NULL, /* attributeDecl */
59 NULL, /* elementDecl */
60 NULL, /* unparsedEntityDecl */
61 NULL, /* setDocumentLocator */
62 NULL, /* startDocument */
63 NULL, /* endDocument */
64 NULL, /* startElement */
65 NULL, /* endElement */
66 NULL, /* reference */
67 NULL, /* characters */
68 NULL, /* ignorableWhitespace */
69 NULL, /* processingInstruction */
70 NULL, /* comment */
71 NULL, /* xmlParserWarning */
72 NULL, /* xmlParserError */
73 NULL, /* xmlParserError */
74 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000075 NULL, /* cdataBlock */
76 NULL /* externalSubset */
Daniel Veillard7c1206f1999-10-14 09:10:25 +000077};
78
79xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
80extern xmlSAXHandlerPtr debugSAXHandler;
81
82/************************************************************************
83 * *
84 * Debug Handlers *
85 * *
86 ************************************************************************/
87
88/**
89 * isStandaloneDebug:
90 * @ctxt: An XML parser context
91 *
92 * Is this document tagged standalone ?
93 *
94 * Returns 1 if true
95 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000096static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +000097isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +000098{
99 fprintf(stdout, "SAX.isStandalone()\n");
100 return(0);
101}
102
103/**
104 * hasInternalSubsetDebug:
105 * @ctxt: An XML parser context
106 *
107 * Does this document has an internal subset
108 *
109 * Returns 1 if true
110 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000111static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000112hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000113{
114 fprintf(stdout, "SAX.hasInternalSubset()\n");
115 return(0);
116}
117
118/**
119 * hasExternalSubsetDebug:
120 * @ctxt: An XML parser context
121 *
122 * Does this document has an external subset
123 *
124 * Returns 1 if true
125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000126static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000127hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000128{
129 fprintf(stdout, "SAX.hasExternalSubset()\n");
130 return(0);
131}
132
133/**
134 * hasInternalSubsetDebug:
135 * @ctxt: An XML parser context
136 *
137 * Does this document has an internal subset
138 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000139static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000140internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000141 const xmlChar *ExternalID, const xmlChar *SystemID)
142{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000143 fprintf(stdout, "SAX.internalSubset(%s,", name);
144 if (ExternalID == NULL)
145 fprintf(stdout, " ,");
146 else
147 fprintf(stdout, " %s,", ExternalID);
148 if (SystemID == NULL)
149 fprintf(stdout, " )\n");
150 else
151 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000152}
153
154/**
155 * resolveEntityDebug:
156 * @ctxt: An XML parser context
157 * @publicId: The public ID of the entity
158 * @systemId: The system ID of the entity
159 *
160 * Special entity resolver, better left to the parser, it has
161 * more context than the application layer.
162 * The default behaviour is to NOT resolve the entities, in that case
163 * the ENTITY_REF nodes are built in the structure (and the parameter
164 * values).
165 *
166 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
167 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000168static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000169resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000170{
171 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
172
173
174 fprintf(stdout, "SAX.resolveEntity(");
175 if (publicId != NULL)
176 fprintf(stdout, "%s", (char *)publicId);
177 else
178 fprintf(stdout, " ");
179 if (systemId != NULL)
180 fprintf(stdout, ", %s)\n", (char *)systemId);
181 else
182 fprintf(stdout, ", )\n");
183/*********
184 if (systemId != NULL) {
185 return(xmlNewInputFromFile(ctxt, (char *) systemId));
186 }
187 *********/
188 return(NULL);
189}
190
191/**
192 * getEntityDebug:
193 * @ctxt: An XML parser context
194 * @name: The entity name
195 *
196 * Get an entity by name
197 *
198 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
199 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000200static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000201getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000202{
203 fprintf(stdout, "SAX.getEntity(%s)\n", name);
204 return(NULL);
205}
206
207/**
208 * getParameterEntityDebug:
209 * @ctxt: An XML parser context
210 * @name: The entity name
211 *
212 * Get a parameter entity by name
213 *
214 * Returns the xmlParserInputPtr
215 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000216static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000217getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000218{
219 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
220 return(NULL);
221}
222
223
224/**
225 * entityDeclDebug:
226 * @ctxt: An XML parser context
227 * @name: the entity name
228 * @type: the entity type
229 * @publicId: The public ID of the entity
230 * @systemId: The system ID of the entity
231 * @content: the entity value (without processing).
232 *
233 * An entity definition has been parsed
234 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000235static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000236entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000237 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
238{
239 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
240 name, type, publicId, systemId, content);
241}
242
243/**
244 * attributeDeclDebug:
245 * @ctxt: An XML parser context
246 * @name: the attribute name
247 * @type: the attribute type
248 *
249 * An attribute definition has been parsed
250 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000251static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000252attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000253 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000254 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000255{
256 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
257 elem, name, type, def, defaultValue);
258}
259
260/**
261 * elementDeclDebug:
262 * @ctxt: An XML parser context
263 * @name: the element name
264 * @type: the element type
265 * @content: the element value (without processing).
266 *
267 * An element definition has been parsed
268 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000269static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000270elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
271 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000272{
273 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
274 name, type);
275}
276
277/**
278 * notationDeclDebug:
279 * @ctxt: An XML parser context
280 * @name: The name of the notation
281 * @publicId: The public ID of the entity
282 * @systemId: The system ID of the entity
283 *
284 * What to do when a notation declaration has been parsed.
285 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000286static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000287notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000288 const xmlChar *publicId, const xmlChar *systemId)
289{
290 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
291 (char *) name, (char *) publicId, (char *) systemId);
292}
293
294/**
295 * unparsedEntityDeclDebug:
296 * @ctxt: An XML parser context
297 * @name: The name of the entity
298 * @publicId: The public ID of the entity
299 * @systemId: The system ID of the entity
300 * @notationName: the name of the notation
301 *
302 * What to do when an unparsed entity declaration is parsed
303 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000304static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000305unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000306 const xmlChar *publicId, const xmlChar *systemId,
307 const xmlChar *notationName)
308{
309 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
310 (char *) name, (char *) publicId, (char *) systemId,
311 (char *) notationName);
312}
313
314/**
315 * setDocumentLocatorDebug:
316 * @ctxt: An XML parser context
317 * @loc: A SAX Locator
318 *
319 * Receive the document locator at startup, actually xmlDefaultSAXLocator
320 * Everything is available on the context, so this is useless in our case.
321 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000322static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000323setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000324{
325 fprintf(stdout, "SAX.setDocumentLocator()\n");
326}
327
328/**
329 * startDocumentDebug:
330 * @ctxt: An XML parser context
331 *
332 * called when the document start being processed.
333 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000334static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000335startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000336{
337 fprintf(stdout, "SAX.startDocument()\n");
338}
339
340/**
341 * endDocumentDebug:
342 * @ctxt: An XML parser context
343 *
344 * called when the document end has been detected.
345 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000346static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000347endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000348{
349 fprintf(stdout, "SAX.endDocument()\n");
350}
351
352/**
353 * startElementDebug:
354 * @ctxt: An XML parser context
355 * @name: The element name
356 *
357 * called when an opening tag has been processed.
358 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000359static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000360startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000361{
362 int i;
363
364 fprintf(stdout, "SAX.startElement(%s", (char *) name);
365 if (atts != NULL) {
366 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000367 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000368 if (atts[i] != NULL) {
369 unsigned char output[40];
370 const unsigned char *att = atts[i];
371 int outlen, attlen;
372 fprintf(stdout, "='");
373 while ((attlen = strlen((char*)att)) > 0) {
374 outlen = sizeof output - 1;
375 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
376 fprintf(stdout, "%.*s", outlen, output);
377 att += attlen;
378 }
379 fprintf(stdout, "'");
380 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000381 }
382 }
383 fprintf(stdout, ")\n");
384}
385
386/**
387 * endElementDebug:
388 * @ctxt: An XML parser context
389 * @name: The element name
390 *
391 * called when the end of an element has been detected.
392 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000393static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000394endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000395{
396 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
397}
398
399/**
400 * charactersDebug:
401 * @ctxt: An XML parser context
402 * @ch: a xmlChar string
403 * @len: the number of xmlChar
404 *
405 * receiving some chars from the parser.
406 * Question: how much at a time ???
407 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000408static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000409charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000410{
Daniel Veillarde010c172000-08-28 10:04:51 +0000411 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000412 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000413
Daniel Veillard4948eb42000-08-29 09:41:15 +0000414 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000415 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000416
417 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000418}
419
420/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000421 * cdataDebug:
422 * @ctxt: An XML parser context
423 * @ch: a xmlChar string
424 * @len: the number of xmlChar
425 *
426 * receiving some cdata chars from the parser.
427 * Question: how much at a time ???
428 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000429static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000430cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000431{
432 unsigned char output[40];
433 int inlen = len, outlen = 30;
434
435 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
436 output[outlen] = 0;
437
438 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
439}
440
441/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000442 * referenceDebug:
443 * @ctxt: An XML parser context
444 * @name: The entity name
445 *
446 * called when an entity reference is detected.
447 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000448static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000449referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000450{
451 fprintf(stdout, "SAX.reference(%s)\n", name);
452}
453
454/**
455 * ignorableWhitespaceDebug:
456 * @ctxt: An XML parser context
457 * @ch: a xmlChar string
458 * @start: the first char in the string
459 * @len: the number of xmlChar
460 *
461 * receiving some ignorable whitespaces from the parser.
462 * Question: how much at a time ???
463 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000464static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000465ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000466{
Daniel Veillard87b95392000-08-12 21:12:04 +0000467 char output[40];
468 int i;
469
470 for (i = 0;(i<len) && (i < 30);i++)
471 output[i] = ch[i];
472 output[i] = 0;
473
474 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000475}
476
477/**
478 * processingInstructionDebug:
479 * @ctxt: An XML parser context
480 * @target: the target name
481 * @data: the PI data's
482 * @len: the number of xmlChar
483 *
484 * A processing instruction has been parsed.
485 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000486static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000487processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000488 const xmlChar *data)
489{
490 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
491 (char *) target, (char *) data);
492}
493
494/**
495 * commentDebug:
496 * @ctxt: An XML parser context
497 * @value: the comment content
498 *
499 * A comment has been parsed.
500 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000501static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000502commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000503{
504 fprintf(stdout, "SAX.comment(%s)\n", value);
505}
506
507/**
508 * warningDebug:
509 * @ctxt: An XML parser context
510 * @msg: the message to display/transmit
511 * @...: extra parameters for the message display
512 *
513 * Display and format a warning messages, gives file, line, position and
514 * extra parameters.
515 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000516static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000517warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000518{
519 va_list args;
520
521 va_start(args, msg);
522 fprintf(stdout, "SAX.warning: ");
523 vfprintf(stdout, msg, args);
524 va_end(args);
525}
526
527/**
528 * errorDebug:
529 * @ctxt: An XML parser context
530 * @msg: the message to display/transmit
531 * @...: extra parameters for the message display
532 *
533 * Display and format a error messages, gives file, line, position and
534 * extra parameters.
535 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000536static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000537errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000538{
539 va_list args;
540
541 va_start(args, msg);
542 fprintf(stdout, "SAX.error: ");
543 vfprintf(stdout, msg, args);
544 va_end(args);
545}
546
547/**
548 * fatalErrorDebug:
549 * @ctxt: An XML parser context
550 * @msg: the message to display/transmit
551 * @...: extra parameters for the message display
552 *
553 * Display and format a fatalError messages, gives file, line, position and
554 * extra parameters.
555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000557fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000558{
559 va_list args;
560
561 va_start(args, msg);
562 fprintf(stdout, "SAX.fatalError: ");
563 vfprintf(stdout, msg, args);
564 va_end(args);
565}
566
567xmlSAXHandler debugSAXHandlerStruct = {
568 internalSubsetDebug,
569 isStandaloneDebug,
570 hasInternalSubsetDebug,
571 hasExternalSubsetDebug,
572 resolveEntityDebug,
573 getEntityDebug,
574 entityDeclDebug,
575 notationDeclDebug,
576 attributeDeclDebug,
577 elementDeclDebug,
578 unparsedEntityDeclDebug,
579 setDocumentLocatorDebug,
580 startDocumentDebug,
581 endDocumentDebug,
582 startElementDebug,
583 endElementDebug,
584 referenceDebug,
585 charactersDebug,
586 ignorableWhitespaceDebug,
587 processingInstructionDebug,
588 commentDebug,
589 warningDebug,
590 errorDebug,
591 fatalErrorDebug,
592 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000593 cdataDebug,
594 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000595};
596
597xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000598/************************************************************************
599 * *
600 * Debug *
601 * *
602 ************************************************************************/
603
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000604static void
605parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000606 htmlDocPtr doc = NULL;
607
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000608 /*
609 * Empty callbacks for checking
610 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000611 if (push) {
612 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000613
Daniel Veillard87b95392000-08-12 21:12:04 +0000614 f = fopen(filename, "r");
615 if (f != NULL) {
616 int res, size = 3;
617 char chars[4096];
618 htmlParserCtxtPtr ctxt;
619
620 /* if (repeat) */
621 size = 4096;
622 res = fread(chars, 1, 4, f);
623 if (res > 0) {
624 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
625 chars, res, filename, 0);
626 while ((res = fread(chars, 1, size, f)) > 0) {
627 htmlParseChunk(ctxt, chars, res, 0);
628 }
629 htmlParseChunk(ctxt, chars, 0, 1);
630 doc = ctxt->myDoc;
631 htmlFreeParserCtxt(ctxt);
632 }
633 if (doc != NULL) {
634 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
635 xmlFreeDoc(doc);
636 }
637 fclose(f);
638 }
639 if (!noout) {
640 f = fopen(filename, "r");
641 if (f != NULL) {
642 int res, size = 3;
643 char chars[4096];
644 htmlParserCtxtPtr ctxt;
645
646 /* if (repeat) */
647 size = 4096;
648 res = fread(chars, 1, 4, f);
649 if (res > 0) {
650 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
651 chars, res, filename, 0);
652 while ((res = fread(chars, 1, size, f)) > 0) {
653 htmlParseChunk(ctxt, chars, res, 0);
654 }
655 htmlParseChunk(ctxt, chars, 0, 1);
656 doc = ctxt->myDoc;
657 htmlFreeParserCtxt(ctxt);
658 }
659 if (doc != NULL) {
660 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
661 xmlFreeDoc(doc);
662 }
663 fclose(f);
664 }
665 }
666 } else {
667 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000668 if (doc != NULL) {
669 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
670 xmlFreeDoc(doc);
671 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000672
673 if (!noout) {
674 /*
675 * Debug callback
676 */
677 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
678 if (doc != NULL) {
679 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
680 xmlFreeDoc(doc);
681 }
682 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000683 }
684}
685
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000686static void
687parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000688 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000689
690 /*
691 * build an HTML tree from a string;
692 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000693 if (push) {
694 FILE *f;
695
696 f = fopen(filename, "r");
697 if (f != NULL) {
698 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000699 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000700 htmlParserCtxtPtr ctxt;
701
Daniel Veillard87b95392000-08-12 21:12:04 +0000702 /* if (repeat) */
703 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000704 res = fread(chars, 1, 4, f);
705 if (res > 0) {
706 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
707 chars, res, filename, 0);
708 while ((res = fread(chars, 1, size, f)) > 0) {
709 htmlParseChunk(ctxt, chars, res, 0);
710 }
711 htmlParseChunk(ctxt, chars, 0, 1);
712 doc = ctxt->myDoc;
713 htmlFreeParserCtxt(ctxt);
714 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000715 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000716 }
717 } else {
718 doc = htmlParseFile(filename, NULL);
719 }
720 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000721 xmlGenericError(xmlGenericErrorContext,
722 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000723 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000724
725 /*
726 * test intermediate copy if needed.
727 */
728 if (copy) {
729 tmp = doc;
730 doc = xmlCopyDoc(doc, 1);
731 xmlFreeDoc(tmp);
732 }
733
734 /*
735 * print it.
736 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000737 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000738#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000739 if (!debug) {
740 if (encoding)
741 htmlSaveFileEnc("-", doc, encoding);
742 else
743 htmlDocDump(stdout, doc);
744 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000745 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000746#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000747 if (encoding)
748 htmlSaveFileEnc("-", doc, encoding);
749 else
750 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000751#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000752 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000753
754 /*
755 * free it.
756 */
757 xmlFreeDoc(doc);
758}
759
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000760int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000761 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000762 int files = 0;
763
764 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000765#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000766 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
767 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000768 else
769#endif
770 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000771 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000772 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
773 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000774 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
775 sax++;
776 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
777 noout++;
778 else if ((!strcmp(argv[i], "-repeat")) ||
779 (!strcmp(argv[i], "--repeat")))
780 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000781 else if ((!strcmp(argv[i], "-encode")) ||
782 (!strcmp(argv[i], "--encode"))) {
783 i++;
784 encoding = argv[i];
785 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000786 }
787 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000788 if ((!strcmp(argv[i], "-encode")) ||
789 (!strcmp(argv[i], "--encode"))) {
790 i++;
791 continue;
792 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000793 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000794 if (repeat) {
795 for (count = 0;count < 100 * repeat;count++) {
796 if (sax)
797 parseSAXFile(argv[i]);
798 else
799 parseAndPrintFile(argv[i]);
800 }
801 } else {
802 if (sax)
803 parseSAXFile(argv[i]);
804 else
805 parseAndPrintFile(argv[i]);
806 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000807 files ++;
808 }
809 }
810 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000811 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000812 argv[0]);
813 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000814#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000815 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000816#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000817 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000818 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000819 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000820 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000821 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000822 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000823 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000824 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000825 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000826
827 return(0);
828}
Daniel Veillard361d8452000-04-03 19:48:13 +0000829#else /* !LIBXML_HTML_ENABLED */
830#include <stdio.h>
831int main(int argc, char **argv) {
832 printf("%s : HTML support not compiled in\n", argv[0]);
833 return(0);
834}
835#endif