blob: dbfccc39997f874cbeff090456aa9bdedda74cba [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000047static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000048static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000049
Daniel Veillard7c1206f1999-10-14 09:10:25 +000050xmlSAXHandler emptySAXHandlerStruct = {
51 NULL, /* internalSubset */
52 NULL, /* isStandalone */
53 NULL, /* hasInternalSubset */
54 NULL, /* hasExternalSubset */
55 NULL, /* resolveEntity */
56 NULL, /* getEntity */
57 NULL, /* entityDecl */
58 NULL, /* notationDecl */
59 NULL, /* attributeDecl */
60 NULL, /* elementDecl */
61 NULL, /* unparsedEntityDecl */
62 NULL, /* setDocumentLocator */
63 NULL, /* startDocument */
64 NULL, /* endDocument */
65 NULL, /* startElement */
66 NULL, /* endElement */
67 NULL, /* reference */
68 NULL, /* characters */
69 NULL, /* ignorableWhitespace */
70 NULL, /* processingInstruction */
71 NULL, /* comment */
72 NULL, /* xmlParserWarning */
73 NULL, /* xmlParserError */
74 NULL, /* xmlParserError */
75 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000076 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000077 NULL, /* externalSubset */
78 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +000079};
80
81xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
82extern xmlSAXHandlerPtr debugSAXHandler;
83
84/************************************************************************
85 * *
86 * Debug Handlers *
87 * *
88 ************************************************************************/
89
90/**
91 * isStandaloneDebug:
92 * @ctxt: An XML parser context
93 *
94 * Is this document tagged standalone ?
95 *
96 * Returns 1 if true
97 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000098static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +000099isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000100{
101 fprintf(stdout, "SAX.isStandalone()\n");
102 return(0);
103}
104
105/**
106 * hasInternalSubsetDebug:
107 * @ctxt: An XML parser context
108 *
109 * Does this document has an internal subset
110 *
111 * Returns 1 if true
112 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000113static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000114hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000115{
116 fprintf(stdout, "SAX.hasInternalSubset()\n");
117 return(0);
118}
119
120/**
121 * hasExternalSubsetDebug:
122 * @ctxt: An XML parser context
123 *
124 * Does this document has an external subset
125 *
126 * Returns 1 if true
127 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000128static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000129hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000130{
131 fprintf(stdout, "SAX.hasExternalSubset()\n");
132 return(0);
133}
134
135/**
136 * hasInternalSubsetDebug:
137 * @ctxt: An XML parser context
138 *
139 * Does this document has an internal subset
140 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000141static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000142internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000143 const xmlChar *ExternalID, const xmlChar *SystemID)
144{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000145 fprintf(stdout, "SAX.internalSubset(%s,", name);
146 if (ExternalID == NULL)
147 fprintf(stdout, " ,");
148 else
149 fprintf(stdout, " %s,", ExternalID);
150 if (SystemID == NULL)
151 fprintf(stdout, " )\n");
152 else
153 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000154}
155
156/**
157 * resolveEntityDebug:
158 * @ctxt: An XML parser context
159 * @publicId: The public ID of the entity
160 * @systemId: The system ID of the entity
161 *
162 * Special entity resolver, better left to the parser, it has
163 * more context than the application layer.
164 * The default behaviour is to NOT resolve the entities, in that case
165 * the ENTITY_REF nodes are built in the structure (and the parameter
166 * values).
167 *
168 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
169 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000170static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000171resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000172{
173 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
174
175
176 fprintf(stdout, "SAX.resolveEntity(");
177 if (publicId != NULL)
178 fprintf(stdout, "%s", (char *)publicId);
179 else
180 fprintf(stdout, " ");
181 if (systemId != NULL)
182 fprintf(stdout, ", %s)\n", (char *)systemId);
183 else
184 fprintf(stdout, ", )\n");
185/*********
186 if (systemId != NULL) {
187 return(xmlNewInputFromFile(ctxt, (char *) systemId));
188 }
189 *********/
190 return(NULL);
191}
192
193/**
194 * getEntityDebug:
195 * @ctxt: An XML parser context
196 * @name: The entity name
197 *
198 * Get an entity by name
199 *
200 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
201 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000202static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000203getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000204{
205 fprintf(stdout, "SAX.getEntity(%s)\n", name);
206 return(NULL);
207}
208
209/**
210 * getParameterEntityDebug:
211 * @ctxt: An XML parser context
212 * @name: The entity name
213 *
214 * Get a parameter entity by name
215 *
216 * Returns the xmlParserInputPtr
217 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000218static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000219getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000220{
221 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
222 return(NULL);
223}
224
225
226/**
227 * entityDeclDebug:
228 * @ctxt: An XML parser context
229 * @name: the entity name
230 * @type: the entity type
231 * @publicId: The public ID of the entity
232 * @systemId: The system ID of the entity
233 * @content: the entity value (without processing).
234 *
235 * An entity definition has been parsed
236 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000237static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000238entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000239 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
240{
241 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
242 name, type, publicId, systemId, content);
243}
244
245/**
246 * attributeDeclDebug:
247 * @ctxt: An XML parser context
248 * @name: the attribute name
249 * @type: the attribute type
250 *
251 * An attribute definition has been parsed
252 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000253static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000254attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000255 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000256 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000257{
258 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
259 elem, name, type, def, defaultValue);
260}
261
262/**
263 * elementDeclDebug:
264 * @ctxt: An XML parser context
265 * @name: the element name
266 * @type: the element type
267 * @content: the element value (without processing).
268 *
269 * An element definition has been parsed
270 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000271static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000272elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
273 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000274{
275 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
276 name, type);
277}
278
279/**
280 * notationDeclDebug:
281 * @ctxt: An XML parser context
282 * @name: The name of the notation
283 * @publicId: The public ID of the entity
284 * @systemId: The system ID of the entity
285 *
286 * What to do when a notation declaration has been parsed.
287 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000288static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000289notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000290 const xmlChar *publicId, const xmlChar *systemId)
291{
292 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
293 (char *) name, (char *) publicId, (char *) systemId);
294}
295
296/**
297 * unparsedEntityDeclDebug:
298 * @ctxt: An XML parser context
299 * @name: The name of the entity
300 * @publicId: The public ID of the entity
301 * @systemId: The system ID of the entity
302 * @notationName: the name of the notation
303 *
304 * What to do when an unparsed entity declaration is parsed
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000307unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000308 const xmlChar *publicId, const xmlChar *systemId,
309 const xmlChar *notationName)
310{
311 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
312 (char *) name, (char *) publicId, (char *) systemId,
313 (char *) notationName);
314}
315
316/**
317 * setDocumentLocatorDebug:
318 * @ctxt: An XML parser context
319 * @loc: A SAX Locator
320 *
321 * Receive the document locator at startup, actually xmlDefaultSAXLocator
322 * Everything is available on the context, so this is useless in our case.
323 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000324static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000325setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000326{
327 fprintf(stdout, "SAX.setDocumentLocator()\n");
328}
329
330/**
331 * startDocumentDebug:
332 * @ctxt: An XML parser context
333 *
334 * called when the document start being processed.
335 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000336static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000337startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000338{
339 fprintf(stdout, "SAX.startDocument()\n");
340}
341
342/**
343 * endDocumentDebug:
344 * @ctxt: An XML parser context
345 *
346 * called when the document end has been detected.
347 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000348static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000349endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000350{
351 fprintf(stdout, "SAX.endDocument()\n");
352}
353
354/**
355 * startElementDebug:
356 * @ctxt: An XML parser context
357 * @name: The element name
358 *
359 * called when an opening tag has been processed.
360 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000361static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000362startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000363{
364 int i;
365
366 fprintf(stdout, "SAX.startElement(%s", (char *) name);
367 if (atts != NULL) {
368 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000369 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000370 if (atts[i] != NULL) {
371 unsigned char output[40];
372 const unsigned char *att = atts[i];
373 int outlen, attlen;
374 fprintf(stdout, "='");
375 while ((attlen = strlen((char*)att)) > 0) {
376 outlen = sizeof output - 1;
377 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
378 fprintf(stdout, "%.*s", outlen, output);
379 att += attlen;
380 }
381 fprintf(stdout, "'");
382 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000383 }
384 }
385 fprintf(stdout, ")\n");
386}
387
388/**
389 * endElementDebug:
390 * @ctxt: An XML parser context
391 * @name: The element name
392 *
393 * called when the end of an element has been detected.
394 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000395static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000396endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000397{
398 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
399}
400
401/**
402 * charactersDebug:
403 * @ctxt: An XML parser context
404 * @ch: a xmlChar string
405 * @len: the number of xmlChar
406 *
407 * receiving some chars from the parser.
408 * Question: how much at a time ???
409 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000410static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000411charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000412{
Daniel Veillarde010c172000-08-28 10:04:51 +0000413 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000414 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000415
Daniel Veillard4948eb42000-08-29 09:41:15 +0000416 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000417 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000418
419 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000420}
421
422/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000423 * cdataDebug:
424 * @ctxt: An XML parser context
425 * @ch: a xmlChar string
426 * @len: the number of xmlChar
427 *
428 * receiving some cdata chars from the parser.
429 * Question: how much at a time ???
430 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000431static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000432cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000433{
434 unsigned char output[40];
435 int inlen = len, outlen = 30;
436
437 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
438 output[outlen] = 0;
439
440 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
441}
442
443/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000444 * referenceDebug:
445 * @ctxt: An XML parser context
446 * @name: The entity name
447 *
448 * called when an entity reference is detected.
449 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000450static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000451referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000452{
453 fprintf(stdout, "SAX.reference(%s)\n", name);
454}
455
456/**
457 * ignorableWhitespaceDebug:
458 * @ctxt: An XML parser context
459 * @ch: a xmlChar string
460 * @start: the first char in the string
461 * @len: the number of xmlChar
462 *
463 * receiving some ignorable whitespaces from the parser.
464 * Question: how much at a time ???
465 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000466static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000467ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000468{
Daniel Veillard87b95392000-08-12 21:12:04 +0000469 char output[40];
470 int i;
471
472 for (i = 0;(i<len) && (i < 30);i++)
473 output[i] = ch[i];
474 output[i] = 0;
475
476 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000477}
478
479/**
480 * processingInstructionDebug:
481 * @ctxt: An XML parser context
482 * @target: the target name
483 * @data: the PI data's
484 * @len: the number of xmlChar
485 *
486 * A processing instruction has been parsed.
487 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000488static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000489processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000490 const xmlChar *data)
491{
492 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
493 (char *) target, (char *) data);
494}
495
496/**
497 * commentDebug:
498 * @ctxt: An XML parser context
499 * @value: the comment content
500 *
501 * A comment has been parsed.
502 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000503static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000504commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000505{
506 fprintf(stdout, "SAX.comment(%s)\n", value);
507}
508
509/**
510 * warningDebug:
511 * @ctxt: An XML parser context
512 * @msg: the message to display/transmit
513 * @...: extra parameters for the message display
514 *
515 * Display and format a warning messages, gives file, line, position and
516 * extra parameters.
517 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000518static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000519warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000520{
521 va_list args;
522
523 va_start(args, msg);
524 fprintf(stdout, "SAX.warning: ");
525 vfprintf(stdout, msg, args);
526 va_end(args);
527}
528
529/**
530 * errorDebug:
531 * @ctxt: An XML parser context
532 * @msg: the message to display/transmit
533 * @...: extra parameters for the message display
534 *
535 * Display and format a error messages, gives file, line, position and
536 * extra parameters.
537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000538static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000539errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000540{
541 va_list args;
542
543 va_start(args, msg);
544 fprintf(stdout, "SAX.error: ");
545 vfprintf(stdout, msg, args);
546 va_end(args);
547}
548
549/**
550 * fatalErrorDebug:
551 * @ctxt: An XML parser context
552 * @msg: the message to display/transmit
553 * @...: extra parameters for the message display
554 *
555 * Display and format a fatalError messages, gives file, line, position and
556 * extra parameters.
557 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000558static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000559fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000560{
561 va_list args;
562
563 va_start(args, msg);
564 fprintf(stdout, "SAX.fatalError: ");
565 vfprintf(stdout, msg, args);
566 va_end(args);
567}
568
569xmlSAXHandler debugSAXHandlerStruct = {
570 internalSubsetDebug,
571 isStandaloneDebug,
572 hasInternalSubsetDebug,
573 hasExternalSubsetDebug,
574 resolveEntityDebug,
575 getEntityDebug,
576 entityDeclDebug,
577 notationDeclDebug,
578 attributeDeclDebug,
579 elementDeclDebug,
580 unparsedEntityDeclDebug,
581 setDocumentLocatorDebug,
582 startDocumentDebug,
583 endDocumentDebug,
584 startElementDebug,
585 endElementDebug,
586 referenceDebug,
587 charactersDebug,
588 ignorableWhitespaceDebug,
589 processingInstructionDebug,
590 commentDebug,
591 warningDebug,
592 errorDebug,
593 fatalErrorDebug,
594 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000595 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000596 NULL,
597 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000598};
599
600xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000601/************************************************************************
602 * *
603 * Debug *
604 * *
605 ************************************************************************/
606
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000607static void
608parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000609 htmlDocPtr doc = NULL;
610
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000611 /*
612 * Empty callbacks for checking
613 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000614 if (push) {
615 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000616
Daniel Veillard87b95392000-08-12 21:12:04 +0000617 f = fopen(filename, "r");
618 if (f != NULL) {
619 int res, size = 3;
620 char chars[4096];
621 htmlParserCtxtPtr ctxt;
622
623 /* if (repeat) */
624 size = 4096;
625 res = fread(chars, 1, 4, f);
626 if (res > 0) {
627 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
628 chars, res, filename, 0);
629 while ((res = fread(chars, 1, size, f)) > 0) {
630 htmlParseChunk(ctxt, chars, res, 0);
631 }
632 htmlParseChunk(ctxt, chars, 0, 1);
633 doc = ctxt->myDoc;
634 htmlFreeParserCtxt(ctxt);
635 }
636 if (doc != NULL) {
637 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
638 xmlFreeDoc(doc);
639 }
640 fclose(f);
641 }
642 if (!noout) {
643 f = fopen(filename, "r");
644 if (f != NULL) {
645 int res, size = 3;
646 char chars[4096];
647 htmlParserCtxtPtr ctxt;
648
649 /* if (repeat) */
650 size = 4096;
651 res = fread(chars, 1, 4, f);
652 if (res > 0) {
653 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
654 chars, res, filename, 0);
655 while ((res = fread(chars, 1, size, f)) > 0) {
656 htmlParseChunk(ctxt, chars, res, 0);
657 }
658 htmlParseChunk(ctxt, chars, 0, 1);
659 doc = ctxt->myDoc;
660 htmlFreeParserCtxt(ctxt);
661 }
662 if (doc != NULL) {
663 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
664 xmlFreeDoc(doc);
665 }
666 fclose(f);
667 }
668 }
669 } else {
670 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000671 if (doc != NULL) {
672 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
673 xmlFreeDoc(doc);
674 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000675
676 if (!noout) {
677 /*
678 * Debug callback
679 */
680 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
681 if (doc != NULL) {
682 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
683 xmlFreeDoc(doc);
684 }
685 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000686 }
687}
688
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000689static void
690parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000691 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000692
693 /*
694 * build an HTML tree from a string;
695 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000696 if (push) {
697 FILE *f;
698
699 f = fopen(filename, "r");
700 if (f != NULL) {
701 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000702 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000703 htmlParserCtxtPtr ctxt;
704
Daniel Veillard87b95392000-08-12 21:12:04 +0000705 /* if (repeat) */
706 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000707 res = fread(chars, 1, 4, f);
708 if (res > 0) {
709 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
710 chars, res, filename, 0);
711 while ((res = fread(chars, 1, size, f)) > 0) {
712 htmlParseChunk(ctxt, chars, res, 0);
713 }
714 htmlParseChunk(ctxt, chars, 0, 1);
715 doc = ctxt->myDoc;
716 htmlFreeParserCtxt(ctxt);
717 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000718 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000719 }
720 } else {
721 doc = htmlParseFile(filename, NULL);
722 }
723 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000724 xmlGenericError(xmlGenericErrorContext,
725 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000726 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000727
728 /*
729 * test intermediate copy if needed.
730 */
731 if (copy) {
732 tmp = doc;
733 doc = xmlCopyDoc(doc, 1);
734 xmlFreeDoc(tmp);
735 }
736
737 /*
738 * print it.
739 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000740 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000741#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000742 if (!debug) {
743 if (encoding)
744 htmlSaveFileEnc("-", doc, encoding);
745 else
746 htmlDocDump(stdout, doc);
747 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000748 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000749#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000750 if (encoding)
751 htmlSaveFileEnc("-", doc, encoding);
752 else
753 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000754#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000755 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000756
757 /*
758 * free it.
759 */
760 xmlFreeDoc(doc);
761}
762
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000763int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000764 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000765 int files = 0;
766
767 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000768#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000769 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
770 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000771 else
772#endif
773 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000774 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000775 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
776 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000777 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
778 sax++;
779 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
780 noout++;
781 else if ((!strcmp(argv[i], "-repeat")) ||
782 (!strcmp(argv[i], "--repeat")))
783 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000784 else if ((!strcmp(argv[i], "-encode")) ||
785 (!strcmp(argv[i], "--encode"))) {
786 i++;
787 encoding = argv[i];
788 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000789 }
790 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000791 if ((!strcmp(argv[i], "-encode")) ||
792 (!strcmp(argv[i], "--encode"))) {
793 i++;
794 continue;
795 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000796 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000797 if (repeat) {
798 for (count = 0;count < 100 * repeat;count++) {
799 if (sax)
800 parseSAXFile(argv[i]);
801 else
802 parseAndPrintFile(argv[i]);
803 }
804 } else {
805 if (sax)
806 parseSAXFile(argv[i]);
807 else
808 parseAndPrintFile(argv[i]);
809 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000810 files ++;
811 }
812 }
813 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000814 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000815 argv[0]);
816 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000817#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000818 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000819#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000820 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000821 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000822 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000823 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000824 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000825 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000826 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000827 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000828 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000829
830 return(0);
831}
Daniel Veillard361d8452000-04-03 19:48:13 +0000832#else /* !LIBXML_HTML_ENABLED */
833#include <stdio.h>
834int main(int argc, char **argv) {
835 printf("%s : HTML support not compiled in\n", argv[0]);
836 return(0);
837}
838#endif